diff options
author | gilad_ilsar <gandismidas1> | 2016-09-22 14:28:05 +0300 |
---|---|---|
committer | gilad_ilsar <gandismidas1> | 2016-09-22 14:28:05 +0300 |
commit | 9fe2b6c8bf631a265dec44b5f474b97ad9d277c2 (patch) | |
tree | f8399baa768f7f67a9c338c9ac7b774b3f18ee2f | |
parent | e24a4199fc75f9939c488c46aea3d8ff745a6ba8 (diff) |
updates
-rw-r--r-- | .idea/workspace.xml | 114 | ||||
-rw-r--r-- | entities/location.py | 1 | ||||
-rw-r--r-- | entities/person.py | 7 | ||||
-rw-r--r-- | factories/INL_factory.py | 8 | ||||
-rw-r--r-- | parsers/INL_xml_parser.py | 2 | ||||
-rw-r--r-- | readers/xml_reader.py | 4 | ||||
-rw-r--r-- | testers/factorytester.py | 4 |
7 files changed, 80 insertions, 60 deletions
diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 966bd42..d837dbc 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -21,7 +21,7 @@ <option name="LAST_RESOLUTION" value="IGNORE" /> </component> <component name="CoverageDataManager"> - <SUITE FILE_PATH="coverage/parser$factorytester.coverage" NAME="factorytester Coverage Results" MODIFIED="1474542426173" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/testers" /> + <SUITE FILE_PATH="coverage/parser$factorytester.coverage" NAME="factorytester Coverage Results" MODIFIED="1474543643085" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/testers" /> </component> <component name="CreatePatchCommitExecutor"> <option name="PATCH_PATH" value="" /> @@ -32,11 +32,11 @@ </component> <component name="FileEditorManager"> <leaf SIDE_TABS_SIZE_LIMIT_KEY="300"> - <file leaf-file-name="xml_reader.py" pinned="false" current-in-tab="true"> + <file leaf-file-name="xml_reader.py" pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/readers/xml_reader.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="289"> - <caret line="23" column="34" selection-start-line="23" selection-start-column="34" selection-end-line="23" selection-end-column="34" /> + <state relative-caret-position="170"> + <caret line="10" column="21" selection-start-line="10" selection-start-column="21" selection-end-line="10" selection-end-column="21" /> <folding /> </state> </provider> @@ -45,8 +45,8 @@ <file leaf-file-name="INL_xml_parser.py" pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="7"> - <caret line="11" column="28" selection-start-line="11" selection-start-column="28" selection-end-line="11" selection-end-column="28" /> + <state relative-caret-position="17"> + <caret line="7" column="79" selection-start-line="7" selection-start-column="79" selection-end-line="7" selection-end-column="79" /> <folding /> </state> </provider> @@ -65,21 +65,21 @@ <file leaf-file-name="INL_factory.py" pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/factories/INL_factory.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="828"> - <caret line="86" column="33" selection-start-line="86" selection-start-column="33" selection-end-line="86" selection-end-column="33" /> + <state relative-caret-position="211"> + <caret line="78" column="32" selection-start-line="78" selection-start-column="32" selection-end-line="78" selection-end-column="32" /> <folding> <element signature="e#0#15#0" expanded="true" /> - <element signature="e#1688#2105#0" expanded="false" /> + <element signature="e#1747#2164#0" expanded="false" /> </folding> </state> </provider> </entry> </file> - <file leaf-file-name="person.py" pinned="false" current-in-tab="false"> + <file leaf-file-name="person.py" pinned="false" current-in-tab="true"> <entry file="file://$PROJECT_DIR$/entities/person.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="930"> - <caret line="55" column="37" selection-start-line="55" selection-start-column="37" selection-end-line="55" selection-end-column="37" /> + <state relative-caret-position="296"> + <caret line="68" column="0" selection-start-line="68" selection-start-column="0" selection-end-line="68" selection-end-column="0" /> <folding /> </state> </provider> @@ -108,8 +108,8 @@ <file leaf-file-name="factorytester.py" pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/testers/factorytester.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="170"> - <caret line="10" column="0" selection-start-line="10" selection-start-column="0" selection-end-line="11" selection-end-column="35" /> + <state relative-caret-position="272"> + <caret line="16" column="23" selection-start-line="16" selection-start-column="23" selection-end-line="16" selection-end-column="23" /> <folding> <element signature="e#0#38#0" expanded="true" /> </folding> @@ -143,12 +143,12 @@ <option value="$PROJECT_DIR$/libs/__init__.py" /> <option value="$PROJECT_DIR$/entities/testers/factorytester.py" /> <option value="$PROJECT_DIR$/parsers/__init__.py" /> + <option value="$PROJECT_DIR$/entities/location.py" /> + <option value="$PROJECT_DIR$/readers/xml_reader.py" /> <option value="$PROJECT_DIR$/testers/factorytester.py" /> <option value="$PROJECT_DIR$/parsers/INL_xml_parser.py" /> - <option value="$PROJECT_DIR$/entities/person.py" /> <option value="$PROJECT_DIR$/factories/INL_factory.py" /> - <option value="$PROJECT_DIR$/entities/location.py" /> - <option value="$PROJECT_DIR$/readers/xml_reader.py" /> + <option value="$PROJECT_DIR$/entities/person.py" /> </list> </option> </component> @@ -541,12 +541,19 @@ <option name="project" value="LOCAL" /> <updated>1474539772357</updated> </task> - <option name="localTasksCounter" value="8" /> + <task id="LOCAL-00008" summary="updates"> + <created>1474542828452</created> + <option name="number" value="00008" /> + <option name="presentableId" value="LOCAL-00008" /> + <option name="project" value="LOCAL" /> + <updated>1474542828452</updated> + </task> + <option name="localTasksCounter" value="9" /> <servers /> </component> <component name="ToolWindowManager"> <frame x="-8" y="-8" width="1382" height="744" extended-state="6" /> - <editor active="true" /> + <editor active="false" /> <layout> <window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.25549048" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" /> <window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" /> @@ -554,7 +561,7 @@ <window_info id="Database" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" /> <window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.3298969" sideWeight="0.5" order="10" side_tool="false" content_ui="tabs" /> <window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="8" side_tool="false" content_ui="tabs" /> - <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32913387" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" /> + <window_info id="Run" active="true" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.32913387" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" /> <window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" /> <window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="9" side_tool="false" content_ui="tabs" /> <window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="true" content_ui="tabs" /> @@ -583,7 +590,8 @@ <MESSAGE value="implemented the factory" /> <MESSAGE value="tester and person entity" /> <MESSAGE value="update the loctaion entity" /> - <option name="LAST_COMMIT_MESSAGE" value="update the loctaion entity" /> + <MESSAGE value="updates" /> + <option name="LAST_COMMIT_MESSAGE" value="updates" /> </component> <component name="XDebuggerManager"> <breakpoint-manager> @@ -622,7 +630,7 @@ <caret line="37" column="31" selection-start-line="37" selection-start-column="31" selection-end-line="37" selection-end-column="31" /> <folding> <element signature="e#0#15#0" expanded="true" /> - <element signature="e#1688#2105#0" expanded="false" /> + <element signature="e#1747#2164#0" expanded="false" /> </folding> </state> </provider> @@ -683,7 +691,7 @@ <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" /> <folding> <element signature="e#0#15#0" expanded="true" /> - <element signature="e#1688#2105#0" expanded="false" /> + <element signature="e#1747#2164#0" expanded="false" /> </folding> </state> </provider> @@ -750,14 +758,6 @@ </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/parsers/__init__.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="17"> - <caret line="1" column="40" selection-start-line="1" selection-start-column="40" selection-end-line="1" selection-end-column="40" /> - <folding /> - </state> - </provider> - </entry> <entry file="file://$PROJECT_DIR$/libs/json_tools.py"> <provider selected="true" editor-type-id="text-editor"> <state relative-caret-position="51"> @@ -774,6 +774,14 @@ </state> </provider> </entry> + <entry file="file://$PROJECT_DIR$/parsers/__init__.py"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="17"> + <caret line="1" column="40" selection-start-line="1" selection-start-column="40" selection-end-line="1" selection-end-column="40" /> + <folding /> + </state> + </provider> + </entry> <entry file="file://$PROJECT_DIR$/entities/basic_entity.py"> <provider selected="true" editor-type-id="text-editor"> <state relative-caret-position="51"> @@ -782,55 +790,55 @@ </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py"> + <entry file="file://$PROJECT_DIR$/entities/location.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="7"> - <caret line="11" column="28" selection-start-line="11" selection-start-column="28" selection-end-line="11" selection-end-column="28" /> + <state relative-caret-position="170"> + <caret line="10" column="27" selection-start-line="10" selection-start-column="27" selection-end-line="10" selection-end-column="27" /> <folding /> </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/factories/INL_factory.py"> + <entry file="file://$PROJECT_DIR$/readers/xml_reader.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="828"> - <caret line="86" column="33" selection-start-line="86" selection-start-column="33" selection-end-line="86" selection-end-column="33" /> - <folding> - <element signature="e#0#15#0" expanded="true" /> - <element signature="e#1688#2105#0" expanded="false" /> - </folding> + <state relative-caret-position="170"> + <caret line="10" column="21" selection-start-line="10" selection-start-column="21" selection-end-line="10" selection-end-column="21" /> + <folding /> </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/entities/person.py"> + <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="930"> - <caret line="55" column="37" selection-start-line="55" selection-start-column="37" selection-end-line="55" selection-end-column="37" /> + <state relative-caret-position="17"> + <caret line="7" column="79" selection-start-line="7" selection-start-column="79" selection-end-line="7" selection-end-column="79" /> <folding /> </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/entities/location.py"> + <entry file="file://$PROJECT_DIR$/factories/INL_factory.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="170"> - <caret line="10" column="27" selection-start-line="10" selection-start-column="27" selection-end-line="10" selection-end-column="27" /> - <folding /> + <state relative-caret-position="211"> + <caret line="78" column="32" selection-start-line="78" selection-start-column="32" selection-end-line="78" selection-end-column="32" /> + <folding> + <element signature="e#0#15#0" expanded="true" /> + <element signature="e#1747#2164#0" expanded="false" /> + </folding> </state> </provider> </entry> <entry file="file://$PROJECT_DIR$/testers/factorytester.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="170"> - <caret line="10" column="0" selection-start-line="10" selection-start-column="0" selection-end-line="11" selection-end-column="35" /> + <state relative-caret-position="272"> + <caret line="16" column="23" selection-start-line="16" selection-start-column="23" selection-end-line="16" selection-end-column="23" /> <folding> <element signature="e#0#38#0" expanded="true" /> </folding> </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/readers/xml_reader.py"> + <entry file="file://$PROJECT_DIR$/entities/person.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="289"> - <caret line="23" column="34" selection-start-line="23" selection-start-column="34" selection-end-line="23" selection-end-column="34" /> + <state relative-caret-position="296"> + <caret line="68" column="0" selection-start-line="68" selection-start-column="0" selection-end-line="68" selection-end-column="0" /> <folding /> </state> </provider> diff --git a/entities/location.py b/entities/location.py index 07ef7ff..064b193 100644 --- a/entities/location.py +++ b/entities/location.py @@ -12,3 +12,4 @@ class Location(BasicEntity): print("Name = " + self.name) print("Name in langs = " + str(self.name_in_langs)) print("Types = " + str(self.types_of_place)) + print("Comments = " + str(self.comments_list)) diff --git a/entities/person.py b/entities/person.py index fa04566..c6db584 100644 --- a/entities/person.py +++ b/entities/person.py @@ -2,7 +2,7 @@ from entities.basic_entity import BasicEntity class Person(BasicEntity): - def __init__(self, name, date_of_birth, name_in_langs, bio_data, comments_list): + def __init__(self, name, date_of_birth, name_in_langs, bio_data, comments_list, profession): """ :param name: @@ -55,9 +55,14 @@ class Person(BasicEntity): bio_data_dict.update({elem: ''}) self.bio_data = bio_data_dict self.comments_list = comments_list + self.profession = profession def print_entity(self): print("Name = " + self.name) + print("Birth year = " + self.birth_year) + print("Death year = " + self.death_year) print("Names in langs = " + str(self.name_in_langs)) print("Bio Data = " + str(self.bio_data)) + print("Comments = " + str(self.comments_list)) + print("Profession = " + str(self.profession)) diff --git a/factories/INL_factory.py b/factories/INL_factory.py index e52257f..e9838f4 100644 --- a/factories/INL_factory.py +++ b/factories/INL_factory.py @@ -21,7 +21,8 @@ ENTITY_KEYS = { '451:a': 'name_in_langs', '451:9': 'langs_langindic', '550.a': 'type_of_place', - '667.a': 'comment' + '667.a': 'comment', + '374.a': 'profession' } @@ -50,6 +51,7 @@ class INLFactory(BasicFactory): comment_list = list() eng_name = '' date_of_birth = '' + profession = list() #get the names and date of birth and bio data for field in raw_object.getroot(): key = field.attrib.get('tag') @@ -79,7 +81,9 @@ class INLFactory(BasicFactory): bio_data.append(field.text) elif tag == 'comment': comment_list.append(field.text) - return entities.Person(eng_name, date_of_birth, name_in_langs, bio_data, comment_list) + elif tag == 'profession': + profession.append(field.text) + return entities.Person(eng_name, date_of_birth, name_in_langs, bio_data, comment_list, profession) #110 is institue elif record_key == '110': return entities.Institution() diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py index 879dad7..3d9b1b7 100644 --- a/parsers/INL_xml_parser.py +++ b/parsers/INL_xml_parser.py @@ -5,7 +5,7 @@ except ImportError: KNOWN_FIELD_TAGS = ['100', '110', '151'] -TAG_WHITELIST = ['100', '400', '700', '678', '667', '151', '550', '451'] +TAG_WHITELIST = ['100', '400', '700', '678', '667', '151', '550', '451', '374'] class INLXmlParser: def __init__(self, reader, whitelist=TAG_WHITELIST): diff --git a/readers/xml_reader.py b/readers/xml_reader.py index 0ed07d5..3e630cb 100644 --- a/readers/xml_reader.py +++ b/readers/xml_reader.py @@ -33,6 +33,10 @@ def read_file(path, element_key): #test print the entity entity.print_entity() + + #TODO analys and upload the entity + + # import pdb; pdb.set_trace() print(record_counter, cleaned_element.getroot().tag, '@@@', cleaned_element.getroot().attrib, '@@@', cleaned_element.getroot().text) element.clear() diff --git a/testers/factorytester.py b/testers/factorytester.py index 1fb6154..b6029ca 100644 --- a/testers/factorytester.py +++ b/testers/factorytester.py @@ -4,15 +4,13 @@ import factories import xml.etree.cElementTree as ET xmlpath = 'C:/Users/Ilsar/Documents/datahack/xml_example.xml' -whitelist = ['100', '374', '400', '151', '451', '550', '551', '678'] - xmltree = ET.parse(xmlpath) entities = list() inl_factory = factories.INLFactory() for record in xmltree.getroot(): - inl_parser = parsers.INLXmlParser(record, whitelist) + inl_parser = parsers.INLXmlParser(record) clean_record = inl_parser.clearxml() entities.append(inl_factory.get_entity(clean_record)) |