summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgilad_ilsar <gandismidas1>2016-09-22 14:13:48 +0300
committergilad_ilsar <gandismidas1>2016-09-22 14:13:48 +0300
commite24a4199fc75f9939c488c46aea3d8ff745a6ba8 (patch)
tree689fb682e26da9c25de272b33c23493c060f3193
parentcc9c0e57d5a23be30a9d0ad5a97acaa01019b573 (diff)
updates
-rw-r--r--.idea/workspace.xml155
-rw-r--r--entities/location.py3
-rw-r--r--entities/person.py29
-rw-r--r--factories/INL_factory.py10
-rw-r--r--parsers/INL_xml_parser.py6
-rw-r--r--readers/xml_reader.py12
-rw-r--r--testers/factorytester.py2
7 files changed, 119 insertions, 98 deletions
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index 292b4fc..966bd42 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -3,6 +3,10 @@
<component name="ChangeListManager">
<list default="true" id="1d9b5e9b-4282-4345-b663-d1b92a287a32" name="Default" comment="">
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
+ <change type="MODIFICATION" beforePath="$PROJECT_DIR$/entities/location.py" afterPath="$PROJECT_DIR$/entities/location.py" />
+ <change type="MODIFICATION" beforePath="$PROJECT_DIR$/entities/person.py" afterPath="$PROJECT_DIR$/entities/person.py" />
+ <change type="MODIFICATION" beforePath="$PROJECT_DIR$/factories/INL_factory.py" afterPath="$PROJECT_DIR$/factories/INL_factory.py" />
+ <change type="MODIFICATION" beforePath="$PROJECT_DIR$/parsers/INL_xml_parser.py" afterPath="$PROJECT_DIR$/parsers/INL_xml_parser.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/readers/xml_reader.py" afterPath="$PROJECT_DIR$/readers/xml_reader.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/testers/factorytester.py" afterPath="$PROJECT_DIR$/testers/factorytester.py" />
</list>
@@ -17,7 +21,7 @@
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="CoverageDataManager">
- <SUITE FILE_PATH="coverage/parser$factorytester.coverage" NAME="factorytester Coverage Results" MODIFIED="1474538841285" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/testers" />
+ <SUITE FILE_PATH="coverage/parser$factorytester.coverage" NAME="factorytester Coverage Results" MODIFIED="1474542426173" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/testers" />
</component>
<component name="CreatePatchCommitExecutor">
<option name="PATCH_PATH" value="" />
@@ -28,11 +32,21 @@
</component>
<component name="FileEditorManager">
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
+ <file leaf-file-name="xml_reader.py" pinned="false" current-in-tab="true">
+ <entry file="file://$PROJECT_DIR$/readers/xml_reader.py">
+ <provider selected="true" editor-type-id="text-editor">
+ <state relative-caret-position="289">
+ <caret line="23" column="34" selection-start-line="23" selection-start-column="34" selection-end-line="23" selection-end-column="34" />
+ <folding />
+ </state>
+ </provider>
+ </entry>
+ </file>
<file leaf-file-name="INL_xml_parser.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="340">
- <caret line="20" column="46" selection-start-line="20" selection-start-column="46" selection-end-line="20" selection-end-column="46" />
+ <state relative-caret-position="7">
+ <caret line="11" column="28" selection-start-line="11" selection-start-column="28" selection-end-line="11" selection-end-column="28" />
<folding />
</state>
</provider>
@@ -51,32 +65,21 @@
<file leaf-file-name="INL_factory.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/factories/INL_factory.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="313">
- <caret line="77" column="20" selection-start-line="77" selection-start-column="20" selection-end-line="77" selection-end-column="47" />
+ <state relative-caret-position="828">
+ <caret line="86" column="33" selection-start-line="86" selection-start-column="33" selection-end-line="86" selection-end-column="33" />
<folding>
<element signature="e#0#15#0" expanded="true" />
- <element signature="e#1654#2071#0" expanded="false" />
- <element signature="e#2866#4151#0" expanded="false" />
+ <element signature="e#1688#2105#0" expanded="false" />
</folding>
</state>
</provider>
</entry>
</file>
- <file leaf-file-name="xml_reader.py" pinned="false" current-in-tab="true">
- <entry file="file://$PROJECT_DIR$/readers/xml_reader.py">
- <provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="301">
- <caret line="20" column="34" selection-start-line="20" selection-start-column="34" selection-end-line="20" selection-end-column="34" />
- <folding />
- </state>
- </provider>
- </entry>
- </file>
<file leaf-file-name="person.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/entities/person.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="17">
- <caret line="1" column="0" selection-start-line="1" selection-start-column="0" selection-end-line="1" selection-end-column="0" />
+ <state relative-caret-position="930">
+ <caret line="55" column="37" selection-start-line="55" selection-start-column="37" selection-end-line="55" selection-end-column="37" />
<folding />
</state>
</provider>
@@ -95,8 +98,8 @@
<file leaf-file-name="location.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/entities/location.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="68">
- <caret line="4" column="60" selection-start-line="4" selection-start-column="60" selection-end-line="4" selection-end-column="60" />
+ <state relative-caret-position="170">
+ <caret line="10" column="27" selection-start-line="10" selection-start-column="27" selection-end-line="10" selection-end-column="27" />
<folding />
</state>
</provider>
@@ -105,8 +108,8 @@
<file leaf-file-name="factorytester.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/testers/factorytester.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="323">
- <caret line="19" column="16" selection-start-line="19" selection-start-column="16" selection-end-line="19" selection-end-column="16" />
+ <state relative-caret-position="170">
+ <caret line="10" column="0" selection-start-line="10" selection-start-column="0" selection-end-line="11" selection-end-column="35" />
<folding>
<element signature="e#0#38#0" expanded="true" />
</folding>
@@ -114,26 +117,6 @@
</provider>
</entry>
</file>
- <file leaf-file-name="__init__.py" pinned="false" current-in-tab="false">
- <entry file="file://$PROJECT_DIR$/entities/__init__.py">
- <provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="34">
- <caret line="2" column="14" selection-start-line="2" selection-start-column="14" selection-end-line="2" selection-end-column="14" />
- <folding />
- </state>
- </provider>
- </entry>
- </file>
- <file leaf-file-name="json_tools.py" pinned="false" current-in-tab="false">
- <entry file="file://$PROJECT_DIR$/libs/json_tools.py">
- <provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="51">
- <caret line="3" column="6" selection-start-line="3" selection-start-column="6" selection-end-line="3" selection-end-column="6" />
- <folding />
- </state>
- </provider>
- </entry>
- </file>
</leaf>
</component>
<component name="FileTemplateManagerImpl">
@@ -160,11 +143,11 @@
<option value="$PROJECT_DIR$/libs/__init__.py" />
<option value="$PROJECT_DIR$/entities/testers/factorytester.py" />
<option value="$PROJECT_DIR$/parsers/__init__.py" />
- <option value="$PROJECT_DIR$/entities/person.py" />
+ <option value="$PROJECT_DIR$/testers/factorytester.py" />
<option value="$PROJECT_DIR$/parsers/INL_xml_parser.py" />
- <option value="$PROJECT_DIR$/entities/location.py" />
+ <option value="$PROJECT_DIR$/entities/person.py" />
<option value="$PROJECT_DIR$/factories/INL_factory.py" />
- <option value="$PROJECT_DIR$/testers/factorytester.py" />
+ <option value="$PROJECT_DIR$/entities/location.py" />
<option value="$PROJECT_DIR$/readers/xml_reader.py" />
</list>
</option>
@@ -551,7 +534,14 @@
<option name="project" value="LOCAL" />
<updated>1474537703873</updated>
</task>
- <option name="localTasksCounter" value="7" />
+ <task id="LOCAL-00007" summary="update the loctaion entity">
+ <created>1474539772357</created>
+ <option name="number" value="00007" />
+ <option name="presentableId" value="LOCAL-00007" />
+ <option name="project" value="LOCAL" />
+ <updated>1474539772357</updated>
+ </task>
+ <option name="localTasksCounter" value="8" />
<servers />
</component>
<component name="ToolWindowManager">
@@ -632,8 +622,7 @@
<caret line="37" column="31" selection-start-line="37" selection-start-column="31" selection-end-line="37" selection-end-column="31" />
<folding>
<element signature="e#0#15#0" expanded="true" />
- <element signature="e#1654#2071#0" expanded="false" />
- <element signature="e#2866#4151#0" expanded="false" />
+ <element signature="e#1688#2105#0" expanded="false" />
</folding>
</state>
</provider>
@@ -694,8 +683,7 @@
<caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding>
<element signature="e#0#15#0" expanded="true" />
- <element signature="e#1654#2071#0" expanded="false" />
- <element signature="e#2866#4151#0" expanded="false" />
+ <element signature="e#1688#2105#0" expanded="false" />
</folding>
</state>
</provider>
@@ -762,22 +750,6 @@
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/entities/location.py">
- <provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="68">
- <caret line="4" column="60" selection-start-line="4" selection-start-column="60" selection-end-line="4" selection-end-column="60" />
- <folding />
- </state>
- </provider>
- </entry>
- <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py">
- <provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="340">
- <caret line="20" column="46" selection-start-line="20" selection-start-column="46" selection-end-line="20" selection-end-column="46" />
- <folding />
- </state>
- </provider>
- </entry>
<entry file="file://$PROJECT_DIR$/parsers/__init__.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="17">
@@ -786,18 +758,18 @@
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/entities/__init__.py">
+ <entry file="file://$PROJECT_DIR$/libs/json_tools.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="34">
- <caret line="2" column="14" selection-start-line="2" selection-start-column="14" selection-end-line="2" selection-end-column="14" />
+ <state relative-caret-position="51">
+ <caret line="3" column="6" selection-start-line="3" selection-start-column="6" selection-end-line="3" selection-end-column="6" />
<folding />
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/libs/json_tools.py">
+ <entry file="file://$PROJECT_DIR$/entities/__init__.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="51">
- <caret line="3" column="6" selection-start-line="3" selection-start-column="6" selection-end-line="3" selection-end-column="6" />
+ <state relative-caret-position="34">
+ <caret line="2" column="14" selection-start-line="2" selection-start-column="14" selection-end-line="2" selection-end-column="14" />
<folding />
</state>
</provider>
@@ -810,30 +782,45 @@
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/entities/person.py">
+ <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="17">
- <caret line="1" column="0" selection-start-line="1" selection-start-column="0" selection-end-line="1" selection-end-column="0" />
+ <state relative-caret-position="7">
+ <caret line="11" column="28" selection-start-line="11" selection-start-column="28" selection-end-line="11" selection-end-column="28" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/factories/INL_factory.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="313">
- <caret line="77" column="20" selection-start-line="77" selection-start-column="20" selection-end-line="77" selection-end-column="47" />
+ <state relative-caret-position="828">
+ <caret line="86" column="33" selection-start-line="86" selection-start-column="33" selection-end-line="86" selection-end-column="33" />
<folding>
<element signature="e#0#15#0" expanded="true" />
- <element signature="e#1654#2071#0" expanded="false" />
- <element signature="e#2866#4151#0" expanded="false" />
+ <element signature="e#1688#2105#0" expanded="false" />
</folding>
</state>
</provider>
</entry>
+ <entry file="file://$PROJECT_DIR$/entities/person.py">
+ <provider selected="true" editor-type-id="text-editor">
+ <state relative-caret-position="930">
+ <caret line="55" column="37" selection-start-line="55" selection-start-column="37" selection-end-line="55" selection-end-column="37" />
+ <folding />
+ </state>
+ </provider>
+ </entry>
+ <entry file="file://$PROJECT_DIR$/entities/location.py">
+ <provider selected="true" editor-type-id="text-editor">
+ <state relative-caret-position="170">
+ <caret line="10" column="27" selection-start-line="10" selection-start-column="27" selection-end-line="10" selection-end-column="27" />
+ <folding />
+ </state>
+ </provider>
+ </entry>
<entry file="file://$PROJECT_DIR$/testers/factorytester.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="323">
- <caret line="19" column="16" selection-start-line="19" selection-start-column="16" selection-end-line="19" selection-end-column="16" />
+ <state relative-caret-position="170">
+ <caret line="10" column="0" selection-start-line="10" selection-start-column="0" selection-end-line="11" selection-end-column="35" />
<folding>
<element signature="e#0#38#0" expanded="true" />
</folding>
@@ -842,8 +829,8 @@
</entry>
<entry file="file://$PROJECT_DIR$/readers/xml_reader.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="301">
- <caret line="20" column="34" selection-start-line="20" selection-start-column="34" selection-end-line="20" selection-end-column="34" />
+ <state relative-caret-position="289">
+ <caret line="23" column="34" selection-start-line="23" selection-start-column="34" selection-end-line="23" selection-end-column="34" />
<folding />
</state>
</provider>
diff --git a/entities/location.py b/entities/location.py
index cd1ca01..07ef7ff 100644
--- a/entities/location.py
+++ b/entities/location.py
@@ -2,10 +2,11 @@ from entities.basic_entity import BasicEntity
class Location(BasicEntity):
- def __init__(self, name, types_of_place, name_in_langs):
+ def __init__(self, name, types_of_place, name_in_langs, comments_list):
self.name = name
self.types_of_place = types_of_place
self.name_in_langs = name_in_langs
+ self.comments_list = comments_list
def print_entity(self):
print("Name = " + self.name)
diff --git a/entities/person.py b/entities/person.py
index d541bb4..fa04566 100644
--- a/entities/person.py
+++ b/entities/person.py
@@ -2,7 +2,7 @@ from entities.basic_entity import BasicEntity
class Person(BasicEntity):
- def __init__(self, name, date_of_birth, name_in_langs, bio_data):
+ def __init__(self, name, date_of_birth, name_in_langs, bio_data, comments_list):
"""
:param name:
@@ -22,6 +22,7 @@ class Person(BasicEntity):
self.birth_year = date_of_birth.strip()
self.death_year = ''
self.name_in_langs = name_in_langs
+ '''
place_of_birth = list()
place_of_death = list()
profession = list()
@@ -37,12 +38,26 @@ class Person(BasicEntity):
self.place_of_birth = place_of_birth
self.place_of_death = place_of_death
self.profession = profession
+ '''
+ bio_data_dict = dict()
+ for elem in bio_data:
+ elem_splitted = elem.split(":")
+ if len(elem_splitted) == 2:
+ bio_data_key = elem_splitted[0]
+ bio_data_value = elem_splitted[1]
+ if bio_data_key in bio_data_dict:
+ bio_data_dict.get(bio_data_key).append(bio_data_value)
+ else:
+ bio_data_dict.update(
+ {bio_data_key: [bio_data_value]}
+ )
+ else:
+ bio_data_dict.update({elem: ''})
+ self.bio_data = bio_data_dict
+ self.comments_list = comments_list
+
def print_entity(self):
print("Name = " + self.name)
- print("Birth year = " + self.birth_year)
- print("Death year = " + self.death_year)
- print("Names in langs" + str(self.name_in_langs))
- print("Places of birth = " + str(self.place_of_birth))
- print("Places of death = " + str(self.place_of_death))
- print("profession = " + str(self.profession))
+ print("Names in langs = " + str(self.name_in_langs))
+ print("Bio Data = " + str(self.bio_data))
diff --git a/factories/INL_factory.py b/factories/INL_factory.py
index 6b75f28..e52257f 100644
--- a/factories/INL_factory.py
+++ b/factories/INL_factory.py
@@ -47,6 +47,7 @@ class INLFactory(BasicFactory):
name = ''
name_in_langs = dict()
bio_data = list()
+ comment_list = list()
eng_name = ''
date_of_birth = ''
#get the names and date of birth and bio data
@@ -76,7 +77,9 @@ class INLFactory(BasicFactory):
name_in_langs.update({field.text: [name_diff]})
elif tag == 'bio_data':
bio_data.append(field.text)
- return entities.Person(eng_name, date_of_birth, name_in_langs, bio_data)
+ elif tag == 'comment':
+ comment_list.append(field.text)
+ return entities.Person(eng_name, date_of_birth, name_in_langs, bio_data, comment_list)
#110 is institue
elif record_key == '110':
return entities.Institution()
@@ -84,6 +87,7 @@ class INLFactory(BasicFactory):
elif record_key == '151':
name_in_langs = dict()
types_of_place = list()
+ comment_list = list()
for field in raw_object.getroot():
key = field.attrib.get('tag')
tag = entity_keys.get(key)
@@ -108,7 +112,9 @@ class INLFactory(BasicFactory):
name_in_langs.get(field.text).append(name_diff)
else:
name_in_langs.update({field.text: [name_diff]})
- return entities.Location(eng_name, types_of_place , name_in_langs)
+ elif tag == 'comment':
+ comment_list.append(field.text)
+ return entities.Location(eng_name, types_of_place , name_in_langs, comment_list)
else:
raise KeyError('Key {} was not recognized for factory {}'.format(entity_keys, type(self)))
diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py
index 968bf55..879dad7 100644
--- a/parsers/INL_xml_parser.py
+++ b/parsers/INL_xml_parser.py
@@ -5,11 +5,13 @@ except ImportError:
KNOWN_FIELD_TAGS = ['100', '110', '151']
+TAG_WHITELIST = ['100', '400', '700', '678', '667', '151', '550', '451']
class INLXmlParser:
- def __init__(self, reader, whitelist=None):
+ def __init__(self, reader, whitelist=TAG_WHITELIST):
self.reader = reader
- self.whitelist = whitelist or KNOWN_FIELD_TAGS
+ #self.whitelist = whitelist or KNOWN_FIELD_TAGS
+ self.whitelist = whitelist
def clearxml(self):
newTreeRoot = ET.Element('data')
diff --git a/readers/xml_reader.py b/readers/xml_reader.py
index bd7821b..0ed07d5 100644
--- a/readers/xml_reader.py
+++ b/readers/xml_reader.py
@@ -1,5 +1,5 @@
# from __future__ import absolute_import
-import parsers
+import parsers, factories
try:
import xml.etree.cElementTree as ET
@@ -18,11 +18,21 @@ def read_file(path, element_key):
# get the root element
event, root = context.__next__()
+ #the factory
+ inl_factory = factories.INLFactory()
+
for event, element in context:
if 'end' in event:
if element_key in element.tag:
+ #enter the processing here
record_counter += 1
+ #cleaned element is a tree
cleaned_element = parsers.INLXmlParser(element).clearxml()
+ entity = inl_factory.get_entity(cleaned_element)
+
+ #test print the entity
+ entity.print_entity()
+
# import pdb; pdb.set_trace()
print(record_counter, cleaned_element.getroot().tag, '@@@', cleaned_element.getroot().attrib, '@@@', cleaned_element.getroot().text)
element.clear()
diff --git a/testers/factorytester.py b/testers/factorytester.py
index 121e068..1fb6154 100644
--- a/testers/factorytester.py
+++ b/testers/factorytester.py
@@ -17,5 +17,5 @@ for record in xmltree.getroot():
entities.append(inl_factory.get_entity(clean_record))
for entity in entities:
- print(entity)
+ entity. print_entity()