summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgilad_ilsar <gandismidas1>2016-09-22 14:28:05 +0300
committergilad_ilsar <gandismidas1>2016-09-22 14:28:05 +0300
commit9fe2b6c8bf631a265dec44b5f474b97ad9d277c2 (patch)
treef8399baa768f7f67a9c338c9ac7b774b3f18ee2f
parente24a4199fc75f9939c488c46aea3d8ff745a6ba8 (diff)
updates
-rw-r--r--.idea/workspace.xml114
-rw-r--r--entities/location.py1
-rw-r--r--entities/person.py7
-rw-r--r--factories/INL_factory.py8
-rw-r--r--parsers/INL_xml_parser.py2
-rw-r--r--readers/xml_reader.py4
-rw-r--r--testers/factorytester.py4
7 files changed, 80 insertions, 60 deletions
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index 966bd42..d837dbc 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -21,7 +21,7 @@
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="CoverageDataManager">
- <SUITE FILE_PATH="coverage/parser$factorytester.coverage" NAME="factorytester Coverage Results" MODIFIED="1474542426173" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/testers" />
+ <SUITE FILE_PATH="coverage/parser$factorytester.coverage" NAME="factorytester Coverage Results" MODIFIED="1474543643085" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/testers" />
</component>
<component name="CreatePatchCommitExecutor">
<option name="PATCH_PATH" value="" />
@@ -32,11 +32,11 @@
</component>
<component name="FileEditorManager">
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
- <file leaf-file-name="xml_reader.py" pinned="false" current-in-tab="true">
+ <file leaf-file-name="xml_reader.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/readers/xml_reader.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="289">
- <caret line="23" column="34" selection-start-line="23" selection-start-column="34" selection-end-line="23" selection-end-column="34" />
+ <state relative-caret-position="170">
+ <caret line="10" column="21" selection-start-line="10" selection-start-column="21" selection-end-line="10" selection-end-column="21" />
<folding />
</state>
</provider>
@@ -45,8 +45,8 @@
<file leaf-file-name="INL_xml_parser.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="7">
- <caret line="11" column="28" selection-start-line="11" selection-start-column="28" selection-end-line="11" selection-end-column="28" />
+ <state relative-caret-position="17">
+ <caret line="7" column="79" selection-start-line="7" selection-start-column="79" selection-end-line="7" selection-end-column="79" />
<folding />
</state>
</provider>
@@ -65,21 +65,21 @@
<file leaf-file-name="INL_factory.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/factories/INL_factory.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="828">
- <caret line="86" column="33" selection-start-line="86" selection-start-column="33" selection-end-line="86" selection-end-column="33" />
+ <state relative-caret-position="211">
+ <caret line="78" column="32" selection-start-line="78" selection-start-column="32" selection-end-line="78" selection-end-column="32" />
<folding>
<element signature="e#0#15#0" expanded="true" />
- <element signature="e#1688#2105#0" expanded="false" />
+ <element signature="e#1747#2164#0" expanded="false" />
</folding>
</state>
</provider>
</entry>
</file>
- <file leaf-file-name="person.py" pinned="false" current-in-tab="false">
+ <file leaf-file-name="person.py" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/entities/person.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="930">
- <caret line="55" column="37" selection-start-line="55" selection-start-column="37" selection-end-line="55" selection-end-column="37" />
+ <state relative-caret-position="296">
+ <caret line="68" column="0" selection-start-line="68" selection-start-column="0" selection-end-line="68" selection-end-column="0" />
<folding />
</state>
</provider>
@@ -108,8 +108,8 @@
<file leaf-file-name="factorytester.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/testers/factorytester.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="170">
- <caret line="10" column="0" selection-start-line="10" selection-start-column="0" selection-end-line="11" selection-end-column="35" />
+ <state relative-caret-position="272">
+ <caret line="16" column="23" selection-start-line="16" selection-start-column="23" selection-end-line="16" selection-end-column="23" />
<folding>
<element signature="e#0#38#0" expanded="true" />
</folding>
@@ -143,12 +143,12 @@
<option value="$PROJECT_DIR$/libs/__init__.py" />
<option value="$PROJECT_DIR$/entities/testers/factorytester.py" />
<option value="$PROJECT_DIR$/parsers/__init__.py" />
+ <option value="$PROJECT_DIR$/entities/location.py" />
+ <option value="$PROJECT_DIR$/readers/xml_reader.py" />
<option value="$PROJECT_DIR$/testers/factorytester.py" />
<option value="$PROJECT_DIR$/parsers/INL_xml_parser.py" />
- <option value="$PROJECT_DIR$/entities/person.py" />
<option value="$PROJECT_DIR$/factories/INL_factory.py" />
- <option value="$PROJECT_DIR$/entities/location.py" />
- <option value="$PROJECT_DIR$/readers/xml_reader.py" />
+ <option value="$PROJECT_DIR$/entities/person.py" />
</list>
</option>
</component>
@@ -541,12 +541,19 @@
<option name="project" value="LOCAL" />
<updated>1474539772357</updated>
</task>
- <option name="localTasksCounter" value="8" />
+ <task id="LOCAL-00008" summary="updates">
+ <created>1474542828452</created>
+ <option name="number" value="00008" />
+ <option name="presentableId" value="LOCAL-00008" />
+ <option name="project" value="LOCAL" />
+ <updated>1474542828452</updated>
+ </task>
+ <option name="localTasksCounter" value="9" />
<servers />
</component>
<component name="ToolWindowManager">
<frame x="-8" y="-8" width="1382" height="744" extended-state="6" />
- <editor active="true" />
+ <editor active="false" />
<layout>
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.25549048" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
@@ -554,7 +561,7 @@
<window_info id="Database" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.3298969" sideWeight="0.5" order="10" side_tool="false" content_ui="tabs" />
<window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="8" side_tool="false" content_ui="tabs" />
- <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32913387" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
+ <window_info id="Run" active="true" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.32913387" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="9" side_tool="false" content_ui="tabs" />
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="true" content_ui="tabs" />
@@ -583,7 +590,8 @@
<MESSAGE value="implemented the factory" />
<MESSAGE value="tester and person entity" />
<MESSAGE value="update the loctaion entity" />
- <option name="LAST_COMMIT_MESSAGE" value="update the loctaion entity" />
+ <MESSAGE value="updates" />
+ <option name="LAST_COMMIT_MESSAGE" value="updates" />
</component>
<component name="XDebuggerManager">
<breakpoint-manager>
@@ -622,7 +630,7 @@
<caret line="37" column="31" selection-start-line="37" selection-start-column="31" selection-end-line="37" selection-end-column="31" />
<folding>
<element signature="e#0#15#0" expanded="true" />
- <element signature="e#1688#2105#0" expanded="false" />
+ <element signature="e#1747#2164#0" expanded="false" />
</folding>
</state>
</provider>
@@ -683,7 +691,7 @@
<caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding>
<element signature="e#0#15#0" expanded="true" />
- <element signature="e#1688#2105#0" expanded="false" />
+ <element signature="e#1747#2164#0" expanded="false" />
</folding>
</state>
</provider>
@@ -750,14 +758,6 @@
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/parsers/__init__.py">
- <provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="17">
- <caret line="1" column="40" selection-start-line="1" selection-start-column="40" selection-end-line="1" selection-end-column="40" />
- <folding />
- </state>
- </provider>
- </entry>
<entry file="file://$PROJECT_DIR$/libs/json_tools.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="51">
@@ -774,6 +774,14 @@
</state>
</provider>
</entry>
+ <entry file="file://$PROJECT_DIR$/parsers/__init__.py">
+ <provider selected="true" editor-type-id="text-editor">
+ <state relative-caret-position="17">
+ <caret line="1" column="40" selection-start-line="1" selection-start-column="40" selection-end-line="1" selection-end-column="40" />
+ <folding />
+ </state>
+ </provider>
+ </entry>
<entry file="file://$PROJECT_DIR$/entities/basic_entity.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="51">
@@ -782,55 +790,55 @@
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py">
+ <entry file="file://$PROJECT_DIR$/entities/location.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="7">
- <caret line="11" column="28" selection-start-line="11" selection-start-column="28" selection-end-line="11" selection-end-column="28" />
+ <state relative-caret-position="170">
+ <caret line="10" column="27" selection-start-line="10" selection-start-column="27" selection-end-line="10" selection-end-column="27" />
<folding />
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/factories/INL_factory.py">
+ <entry file="file://$PROJECT_DIR$/readers/xml_reader.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="828">
- <caret line="86" column="33" selection-start-line="86" selection-start-column="33" selection-end-line="86" selection-end-column="33" />
- <folding>
- <element signature="e#0#15#0" expanded="true" />
- <element signature="e#1688#2105#0" expanded="false" />
- </folding>
+ <state relative-caret-position="170">
+ <caret line="10" column="21" selection-start-line="10" selection-start-column="21" selection-end-line="10" selection-end-column="21" />
+ <folding />
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/entities/person.py">
+ <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="930">
- <caret line="55" column="37" selection-start-line="55" selection-start-column="37" selection-end-line="55" selection-end-column="37" />
+ <state relative-caret-position="17">
+ <caret line="7" column="79" selection-start-line="7" selection-start-column="79" selection-end-line="7" selection-end-column="79" />
<folding />
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/entities/location.py">
+ <entry file="file://$PROJECT_DIR$/factories/INL_factory.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="170">
- <caret line="10" column="27" selection-start-line="10" selection-start-column="27" selection-end-line="10" selection-end-column="27" />
- <folding />
+ <state relative-caret-position="211">
+ <caret line="78" column="32" selection-start-line="78" selection-start-column="32" selection-end-line="78" selection-end-column="32" />
+ <folding>
+ <element signature="e#0#15#0" expanded="true" />
+ <element signature="e#1747#2164#0" expanded="false" />
+ </folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/testers/factorytester.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="170">
- <caret line="10" column="0" selection-start-line="10" selection-start-column="0" selection-end-line="11" selection-end-column="35" />
+ <state relative-caret-position="272">
+ <caret line="16" column="23" selection-start-line="16" selection-start-column="23" selection-end-line="16" selection-end-column="23" />
<folding>
<element signature="e#0#38#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/readers/xml_reader.py">
+ <entry file="file://$PROJECT_DIR$/entities/person.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="289">
- <caret line="23" column="34" selection-start-line="23" selection-start-column="34" selection-end-line="23" selection-end-column="34" />
+ <state relative-caret-position="296">
+ <caret line="68" column="0" selection-start-line="68" selection-start-column="0" selection-end-line="68" selection-end-column="0" />
<folding />
</state>
</provider>
diff --git a/entities/location.py b/entities/location.py
index 07ef7ff..064b193 100644
--- a/entities/location.py
+++ b/entities/location.py
@@ -12,3 +12,4 @@ class Location(BasicEntity):
print("Name = " + self.name)
print("Name in langs = " + str(self.name_in_langs))
print("Types = " + str(self.types_of_place))
+ print("Comments = " + str(self.comments_list))
diff --git a/entities/person.py b/entities/person.py
index fa04566..c6db584 100644
--- a/entities/person.py
+++ b/entities/person.py
@@ -2,7 +2,7 @@ from entities.basic_entity import BasicEntity
class Person(BasicEntity):
- def __init__(self, name, date_of_birth, name_in_langs, bio_data, comments_list):
+ def __init__(self, name, date_of_birth, name_in_langs, bio_data, comments_list, profession):
"""
:param name:
@@ -55,9 +55,14 @@ class Person(BasicEntity):
bio_data_dict.update({elem: ''})
self.bio_data = bio_data_dict
self.comments_list = comments_list
+ self.profession = profession
def print_entity(self):
print("Name = " + self.name)
+ print("Birth year = " + self.birth_year)
+ print("Death year = " + self.death_year)
print("Names in langs = " + str(self.name_in_langs))
print("Bio Data = " + str(self.bio_data))
+ print("Comments = " + str(self.comments_list))
+ print("Profession = " + str(self.profession))
diff --git a/factories/INL_factory.py b/factories/INL_factory.py
index e52257f..e9838f4 100644
--- a/factories/INL_factory.py
+++ b/factories/INL_factory.py
@@ -21,7 +21,8 @@ ENTITY_KEYS = {
'451:a': 'name_in_langs',
'451:9': 'langs_langindic',
'550.a': 'type_of_place',
- '667.a': 'comment'
+ '667.a': 'comment',
+ '374.a': 'profession'
}
@@ -50,6 +51,7 @@ class INLFactory(BasicFactory):
comment_list = list()
eng_name = ''
date_of_birth = ''
+ profession = list()
#get the names and date of birth and bio data
for field in raw_object.getroot():
key = field.attrib.get('tag')
@@ -79,7 +81,9 @@ class INLFactory(BasicFactory):
bio_data.append(field.text)
elif tag == 'comment':
comment_list.append(field.text)
- return entities.Person(eng_name, date_of_birth, name_in_langs, bio_data, comment_list)
+ elif tag == 'profession':
+ profession.append(field.text)
+ return entities.Person(eng_name, date_of_birth, name_in_langs, bio_data, comment_list, profession)
#110 is institue
elif record_key == '110':
return entities.Institution()
diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py
index 879dad7..3d9b1b7 100644
--- a/parsers/INL_xml_parser.py
+++ b/parsers/INL_xml_parser.py
@@ -5,7 +5,7 @@ except ImportError:
KNOWN_FIELD_TAGS = ['100', '110', '151']
-TAG_WHITELIST = ['100', '400', '700', '678', '667', '151', '550', '451']
+TAG_WHITELIST = ['100', '400', '700', '678', '667', '151', '550', '451', '374']
class INLXmlParser:
def __init__(self, reader, whitelist=TAG_WHITELIST):
diff --git a/readers/xml_reader.py b/readers/xml_reader.py
index 0ed07d5..3e630cb 100644
--- a/readers/xml_reader.py
+++ b/readers/xml_reader.py
@@ -33,6 +33,10 @@ def read_file(path, element_key):
#test print the entity
entity.print_entity()
+
+ #TODO analys and upload the entity
+
+
# import pdb; pdb.set_trace()
print(record_counter, cleaned_element.getroot().tag, '@@@', cleaned_element.getroot().attrib, '@@@', cleaned_element.getroot().text)
element.clear()
diff --git a/testers/factorytester.py b/testers/factorytester.py
index 1fb6154..b6029ca 100644
--- a/testers/factorytester.py
+++ b/testers/factorytester.py
@@ -4,15 +4,13 @@ import factories
import xml.etree.cElementTree as ET
xmlpath = 'C:/Users/Ilsar/Documents/datahack/xml_example.xml'
-whitelist = ['100', '374', '400', '151', '451', '550', '551', '678']
-
xmltree = ET.parse(xmlpath)
entities = list()
inl_factory = factories.INLFactory()
for record in xmltree.getroot():
- inl_parser = parsers.INLXmlParser(record, whitelist)
+ inl_parser = parsers.INLXmlParser(record)
clean_record = inl_parser.clearxml()
entities.append(inl_factory.get_entity(clean_record))