diff options
author | gilad_ilsar <gandismidas1> | 2016-09-22 14:13:48 +0300 |
---|---|---|
committer | gilad_ilsar <gandismidas1> | 2016-09-22 14:13:48 +0300 |
commit | e24a4199fc75f9939c488c46aea3d8ff745a6ba8 (patch) | |
tree | 689fb682e26da9c25de272b33c23493c060f3193 | |
parent | cc9c0e57d5a23be30a9d0ad5a97acaa01019b573 (diff) |
updates
-rw-r--r-- | .idea/workspace.xml | 155 | ||||
-rw-r--r-- | entities/location.py | 3 | ||||
-rw-r--r-- | entities/person.py | 29 | ||||
-rw-r--r-- | factories/INL_factory.py | 10 | ||||
-rw-r--r-- | parsers/INL_xml_parser.py | 6 | ||||
-rw-r--r-- | readers/xml_reader.py | 12 | ||||
-rw-r--r-- | testers/factorytester.py | 2 |
7 files changed, 119 insertions, 98 deletions
diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 292b4fc..966bd42 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -3,6 +3,10 @@ <component name="ChangeListManager"> <list default="true" id="1d9b5e9b-4282-4345-b663-d1b92a287a32" name="Default" comment=""> <change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" /> + <change type="MODIFICATION" beforePath="$PROJECT_DIR$/entities/location.py" afterPath="$PROJECT_DIR$/entities/location.py" /> + <change type="MODIFICATION" beforePath="$PROJECT_DIR$/entities/person.py" afterPath="$PROJECT_DIR$/entities/person.py" /> + <change type="MODIFICATION" beforePath="$PROJECT_DIR$/factories/INL_factory.py" afterPath="$PROJECT_DIR$/factories/INL_factory.py" /> + <change type="MODIFICATION" beforePath="$PROJECT_DIR$/parsers/INL_xml_parser.py" afterPath="$PROJECT_DIR$/parsers/INL_xml_parser.py" /> <change type="MODIFICATION" beforePath="$PROJECT_DIR$/readers/xml_reader.py" afterPath="$PROJECT_DIR$/readers/xml_reader.py" /> <change type="MODIFICATION" beforePath="$PROJECT_DIR$/testers/factorytester.py" afterPath="$PROJECT_DIR$/testers/factorytester.py" /> </list> @@ -17,7 +21,7 @@ <option name="LAST_RESOLUTION" value="IGNORE" /> </component> <component name="CoverageDataManager"> - <SUITE FILE_PATH="coverage/parser$factorytester.coverage" NAME="factorytester Coverage Results" MODIFIED="1474538841285" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/testers" /> + <SUITE FILE_PATH="coverage/parser$factorytester.coverage" NAME="factorytester Coverage Results" MODIFIED="1474542426173" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/testers" /> </component> <component name="CreatePatchCommitExecutor"> <option name="PATCH_PATH" value="" /> @@ -28,11 +32,21 @@ </component> <component name="FileEditorManager"> <leaf SIDE_TABS_SIZE_LIMIT_KEY="300"> + <file leaf-file-name="xml_reader.py" pinned="false" current-in-tab="true"> + <entry file="file://$PROJECT_DIR$/readers/xml_reader.py"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="289"> + <caret line="23" column="34" selection-start-line="23" selection-start-column="34" selection-end-line="23" selection-end-column="34" /> + <folding /> + </state> + </provider> + </entry> + </file> <file leaf-file-name="INL_xml_parser.py" pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="340"> - <caret line="20" column="46" selection-start-line="20" selection-start-column="46" selection-end-line="20" selection-end-column="46" /> + <state relative-caret-position="7"> + <caret line="11" column="28" selection-start-line="11" selection-start-column="28" selection-end-line="11" selection-end-column="28" /> <folding /> </state> </provider> @@ -51,32 +65,21 @@ <file leaf-file-name="INL_factory.py" pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/factories/INL_factory.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="313"> - <caret line="77" column="20" selection-start-line="77" selection-start-column="20" selection-end-line="77" selection-end-column="47" /> + <state relative-caret-position="828"> + <caret line="86" column="33" selection-start-line="86" selection-start-column="33" selection-end-line="86" selection-end-column="33" /> <folding> <element signature="e#0#15#0" expanded="true" /> - <element signature="e#1654#2071#0" expanded="false" /> - <element signature="e#2866#4151#0" expanded="false" /> + <element signature="e#1688#2105#0" expanded="false" /> </folding> </state> </provider> </entry> </file> - <file leaf-file-name="xml_reader.py" pinned="false" current-in-tab="true"> - <entry file="file://$PROJECT_DIR$/readers/xml_reader.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="301"> - <caret line="20" column="34" selection-start-line="20" selection-start-column="34" selection-end-line="20" selection-end-column="34" /> - <folding /> - </state> - </provider> - </entry> - </file> <file leaf-file-name="person.py" pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/entities/person.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="17"> - <caret line="1" column="0" selection-start-line="1" selection-start-column="0" selection-end-line="1" selection-end-column="0" /> + <state relative-caret-position="930"> + <caret line="55" column="37" selection-start-line="55" selection-start-column="37" selection-end-line="55" selection-end-column="37" /> <folding /> </state> </provider> @@ -95,8 +98,8 @@ <file leaf-file-name="location.py" pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/entities/location.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="68"> - <caret line="4" column="60" selection-start-line="4" selection-start-column="60" selection-end-line="4" selection-end-column="60" /> + <state relative-caret-position="170"> + <caret line="10" column="27" selection-start-line="10" selection-start-column="27" selection-end-line="10" selection-end-column="27" /> <folding /> </state> </provider> @@ -105,8 +108,8 @@ <file leaf-file-name="factorytester.py" pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/testers/factorytester.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="323"> - <caret line="19" column="16" selection-start-line="19" selection-start-column="16" selection-end-line="19" selection-end-column="16" /> + <state relative-caret-position="170"> + <caret line="10" column="0" selection-start-line="10" selection-start-column="0" selection-end-line="11" selection-end-column="35" /> <folding> <element signature="e#0#38#0" expanded="true" /> </folding> @@ -114,26 +117,6 @@ </provider> </entry> </file> - <file leaf-file-name="__init__.py" pinned="false" current-in-tab="false"> - <entry file="file://$PROJECT_DIR$/entities/__init__.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="34"> - <caret line="2" column="14" selection-start-line="2" selection-start-column="14" selection-end-line="2" selection-end-column="14" /> - <folding /> - </state> - </provider> - </entry> - </file> - <file leaf-file-name="json_tools.py" pinned="false" current-in-tab="false"> - <entry file="file://$PROJECT_DIR$/libs/json_tools.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="51"> - <caret line="3" column="6" selection-start-line="3" selection-start-column="6" selection-end-line="3" selection-end-column="6" /> - <folding /> - </state> - </provider> - </entry> - </file> </leaf> </component> <component name="FileTemplateManagerImpl"> @@ -160,11 +143,11 @@ <option value="$PROJECT_DIR$/libs/__init__.py" /> <option value="$PROJECT_DIR$/entities/testers/factorytester.py" /> <option value="$PROJECT_DIR$/parsers/__init__.py" /> - <option value="$PROJECT_DIR$/entities/person.py" /> + <option value="$PROJECT_DIR$/testers/factorytester.py" /> <option value="$PROJECT_DIR$/parsers/INL_xml_parser.py" /> - <option value="$PROJECT_DIR$/entities/location.py" /> + <option value="$PROJECT_DIR$/entities/person.py" /> <option value="$PROJECT_DIR$/factories/INL_factory.py" /> - <option value="$PROJECT_DIR$/testers/factorytester.py" /> + <option value="$PROJECT_DIR$/entities/location.py" /> <option value="$PROJECT_DIR$/readers/xml_reader.py" /> </list> </option> @@ -551,7 +534,14 @@ <option name="project" value="LOCAL" /> <updated>1474537703873</updated> </task> - <option name="localTasksCounter" value="7" /> + <task id="LOCAL-00007" summary="update the loctaion entity"> + <created>1474539772357</created> + <option name="number" value="00007" /> + <option name="presentableId" value="LOCAL-00007" /> + <option name="project" value="LOCAL" /> + <updated>1474539772357</updated> + </task> + <option name="localTasksCounter" value="8" /> <servers /> </component> <component name="ToolWindowManager"> @@ -632,8 +622,7 @@ <caret line="37" column="31" selection-start-line="37" selection-start-column="31" selection-end-line="37" selection-end-column="31" /> <folding> <element signature="e#0#15#0" expanded="true" /> - <element signature="e#1654#2071#0" expanded="false" /> - <element signature="e#2866#4151#0" expanded="false" /> + <element signature="e#1688#2105#0" expanded="false" /> </folding> </state> </provider> @@ -694,8 +683,7 @@ <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" /> <folding> <element signature="e#0#15#0" expanded="true" /> - <element signature="e#1654#2071#0" expanded="false" /> - <element signature="e#2866#4151#0" expanded="false" /> + <element signature="e#1688#2105#0" expanded="false" /> </folding> </state> </provider> @@ -762,22 +750,6 @@ </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/entities/location.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="68"> - <caret line="4" column="60" selection-start-line="4" selection-start-column="60" selection-end-line="4" selection-end-column="60" /> - <folding /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="340"> - <caret line="20" column="46" selection-start-line="20" selection-start-column="46" selection-end-line="20" selection-end-column="46" /> - <folding /> - </state> - </provider> - </entry> <entry file="file://$PROJECT_DIR$/parsers/__init__.py"> <provider selected="true" editor-type-id="text-editor"> <state relative-caret-position="17"> @@ -786,18 +758,18 @@ </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/entities/__init__.py"> + <entry file="file://$PROJECT_DIR$/libs/json_tools.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="34"> - <caret line="2" column="14" selection-start-line="2" selection-start-column="14" selection-end-line="2" selection-end-column="14" /> + <state relative-caret-position="51"> + <caret line="3" column="6" selection-start-line="3" selection-start-column="6" selection-end-line="3" selection-end-column="6" /> <folding /> </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/libs/json_tools.py"> + <entry file="file://$PROJECT_DIR$/entities/__init__.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="51"> - <caret line="3" column="6" selection-start-line="3" selection-start-column="6" selection-end-line="3" selection-end-column="6" /> + <state relative-caret-position="34"> + <caret line="2" column="14" selection-start-line="2" selection-start-column="14" selection-end-line="2" selection-end-column="14" /> <folding /> </state> </provider> @@ -810,30 +782,45 @@ </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/entities/person.py"> + <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="17"> - <caret line="1" column="0" selection-start-line="1" selection-start-column="0" selection-end-line="1" selection-end-column="0" /> + <state relative-caret-position="7"> + <caret line="11" column="28" selection-start-line="11" selection-start-column="28" selection-end-line="11" selection-end-column="28" /> <folding /> </state> </provider> </entry> <entry file="file://$PROJECT_DIR$/factories/INL_factory.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="313"> - <caret line="77" column="20" selection-start-line="77" selection-start-column="20" selection-end-line="77" selection-end-column="47" /> + <state relative-caret-position="828"> + <caret line="86" column="33" selection-start-line="86" selection-start-column="33" selection-end-line="86" selection-end-column="33" /> <folding> <element signature="e#0#15#0" expanded="true" /> - <element signature="e#1654#2071#0" expanded="false" /> - <element signature="e#2866#4151#0" expanded="false" /> + <element signature="e#1688#2105#0" expanded="false" /> </folding> </state> </provider> </entry> + <entry file="file://$PROJECT_DIR$/entities/person.py"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="930"> + <caret line="55" column="37" selection-start-line="55" selection-start-column="37" selection-end-line="55" selection-end-column="37" /> + <folding /> + </state> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/entities/location.py"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="170"> + <caret line="10" column="27" selection-start-line="10" selection-start-column="27" selection-end-line="10" selection-end-column="27" /> + <folding /> + </state> + </provider> + </entry> <entry file="file://$PROJECT_DIR$/testers/factorytester.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="323"> - <caret line="19" column="16" selection-start-line="19" selection-start-column="16" selection-end-line="19" selection-end-column="16" /> + <state relative-caret-position="170"> + <caret line="10" column="0" selection-start-line="10" selection-start-column="0" selection-end-line="11" selection-end-column="35" /> <folding> <element signature="e#0#38#0" expanded="true" /> </folding> @@ -842,8 +829,8 @@ </entry> <entry file="file://$PROJECT_DIR$/readers/xml_reader.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="301"> - <caret line="20" column="34" selection-start-line="20" selection-start-column="34" selection-end-line="20" selection-end-column="34" /> + <state relative-caret-position="289"> + <caret line="23" column="34" selection-start-line="23" selection-start-column="34" selection-end-line="23" selection-end-column="34" /> <folding /> </state> </provider> diff --git a/entities/location.py b/entities/location.py index cd1ca01..07ef7ff 100644 --- a/entities/location.py +++ b/entities/location.py @@ -2,10 +2,11 @@ from entities.basic_entity import BasicEntity class Location(BasicEntity): - def __init__(self, name, types_of_place, name_in_langs): + def __init__(self, name, types_of_place, name_in_langs, comments_list): self.name = name self.types_of_place = types_of_place self.name_in_langs = name_in_langs + self.comments_list = comments_list def print_entity(self): print("Name = " + self.name) diff --git a/entities/person.py b/entities/person.py index d541bb4..fa04566 100644 --- a/entities/person.py +++ b/entities/person.py @@ -2,7 +2,7 @@ from entities.basic_entity import BasicEntity class Person(BasicEntity): - def __init__(self, name, date_of_birth, name_in_langs, bio_data): + def __init__(self, name, date_of_birth, name_in_langs, bio_data, comments_list): """ :param name: @@ -22,6 +22,7 @@ class Person(BasicEntity): self.birth_year = date_of_birth.strip() self.death_year = '' self.name_in_langs = name_in_langs + ''' place_of_birth = list() place_of_death = list() profession = list() @@ -37,12 +38,26 @@ class Person(BasicEntity): self.place_of_birth = place_of_birth self.place_of_death = place_of_death self.profession = profession + ''' + bio_data_dict = dict() + for elem in bio_data: + elem_splitted = elem.split(":") + if len(elem_splitted) == 2: + bio_data_key = elem_splitted[0] + bio_data_value = elem_splitted[1] + if bio_data_key in bio_data_dict: + bio_data_dict.get(bio_data_key).append(bio_data_value) + else: + bio_data_dict.update( + {bio_data_key: [bio_data_value]} + ) + else: + bio_data_dict.update({elem: ''}) + self.bio_data = bio_data_dict + self.comments_list = comments_list + def print_entity(self): print("Name = " + self.name) - print("Birth year = " + self.birth_year) - print("Death year = " + self.death_year) - print("Names in langs" + str(self.name_in_langs)) - print("Places of birth = " + str(self.place_of_birth)) - print("Places of death = " + str(self.place_of_death)) - print("profession = " + str(self.profession)) + print("Names in langs = " + str(self.name_in_langs)) + print("Bio Data = " + str(self.bio_data)) diff --git a/factories/INL_factory.py b/factories/INL_factory.py index 6b75f28..e52257f 100644 --- a/factories/INL_factory.py +++ b/factories/INL_factory.py @@ -47,6 +47,7 @@ class INLFactory(BasicFactory): name = '' name_in_langs = dict() bio_data = list() + comment_list = list() eng_name = '' date_of_birth = '' #get the names and date of birth and bio data @@ -76,7 +77,9 @@ class INLFactory(BasicFactory): name_in_langs.update({field.text: [name_diff]}) elif tag == 'bio_data': bio_data.append(field.text) - return entities.Person(eng_name, date_of_birth, name_in_langs, bio_data) + elif tag == 'comment': + comment_list.append(field.text) + return entities.Person(eng_name, date_of_birth, name_in_langs, bio_data, comment_list) #110 is institue elif record_key == '110': return entities.Institution() @@ -84,6 +87,7 @@ class INLFactory(BasicFactory): elif record_key == '151': name_in_langs = dict() types_of_place = list() + comment_list = list() for field in raw_object.getroot(): key = field.attrib.get('tag') tag = entity_keys.get(key) @@ -108,7 +112,9 @@ class INLFactory(BasicFactory): name_in_langs.get(field.text).append(name_diff) else: name_in_langs.update({field.text: [name_diff]}) - return entities.Location(eng_name, types_of_place , name_in_langs) + elif tag == 'comment': + comment_list.append(field.text) + return entities.Location(eng_name, types_of_place , name_in_langs, comment_list) else: raise KeyError('Key {} was not recognized for factory {}'.format(entity_keys, type(self))) diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py index 968bf55..879dad7 100644 --- a/parsers/INL_xml_parser.py +++ b/parsers/INL_xml_parser.py @@ -5,11 +5,13 @@ except ImportError: KNOWN_FIELD_TAGS = ['100', '110', '151'] +TAG_WHITELIST = ['100', '400', '700', '678', '667', '151', '550', '451'] class INLXmlParser: - def __init__(self, reader, whitelist=None): + def __init__(self, reader, whitelist=TAG_WHITELIST): self.reader = reader - self.whitelist = whitelist or KNOWN_FIELD_TAGS + #self.whitelist = whitelist or KNOWN_FIELD_TAGS + self.whitelist = whitelist def clearxml(self): newTreeRoot = ET.Element('data') diff --git a/readers/xml_reader.py b/readers/xml_reader.py index bd7821b..0ed07d5 100644 --- a/readers/xml_reader.py +++ b/readers/xml_reader.py @@ -1,5 +1,5 @@ # from __future__ import absolute_import -import parsers +import parsers, factories try: import xml.etree.cElementTree as ET @@ -18,11 +18,21 @@ def read_file(path, element_key): # get the root element event, root = context.__next__() + #the factory + inl_factory = factories.INLFactory() + for event, element in context: if 'end' in event: if element_key in element.tag: + #enter the processing here record_counter += 1 + #cleaned element is a tree cleaned_element = parsers.INLXmlParser(element).clearxml() + entity = inl_factory.get_entity(cleaned_element) + + #test print the entity + entity.print_entity() + # import pdb; pdb.set_trace() print(record_counter, cleaned_element.getroot().tag, '@@@', cleaned_element.getroot().attrib, '@@@', cleaned_element.getroot().text) element.clear() diff --git a/testers/factorytester.py b/testers/factorytester.py index 121e068..1fb6154 100644 --- a/testers/factorytester.py +++ b/testers/factorytester.py @@ -17,5 +17,5 @@ for record in xmltree.getroot(): entities.append(inl_factory.get_entity(clean_record)) for entity in entities: - print(entity) + entity. print_entity() |