summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgilad_ilsar <gandismidas1>2016-09-22 14:53:42 +0300
committergilad_ilsar <gandismidas1>2016-09-22 14:53:42 +0300
commit6405185cd4136b04b45b3b9d756fdd5d38405f07 (patch)
tree863942a90ff00ab68d87fc8f41f6b000d46a55e8
parent9fe2b6c8bf631a265dec44b5f474b97ad9d277c2 (diff)
updates
-rw-r--r--.idea/workspace.xml111
-rw-r--r--factories/INL_factory.py3
-rw-r--r--parsers/INL_xml_parser.py5
-rw-r--r--readers/xml_reader.py12
4 files changed, 81 insertions, 50 deletions
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index d837dbc..bf6f1e4 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -3,12 +3,9 @@
<component name="ChangeListManager">
<list default="true" id="1d9b5e9b-4282-4345-b663-d1b92a287a32" name="Default" comment="">
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
- <change type="MODIFICATION" beforePath="$PROJECT_DIR$/entities/location.py" afterPath="$PROJECT_DIR$/entities/location.py" />
- <change type="MODIFICATION" beforePath="$PROJECT_DIR$/entities/person.py" afterPath="$PROJECT_DIR$/entities/person.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/factories/INL_factory.py" afterPath="$PROJECT_DIR$/factories/INL_factory.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/parsers/INL_xml_parser.py" afterPath="$PROJECT_DIR$/parsers/INL_xml_parser.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/readers/xml_reader.py" afterPath="$PROJECT_DIR$/readers/xml_reader.py" />
- <change type="MODIFICATION" beforePath="$PROJECT_DIR$/testers/factorytester.py" afterPath="$PROJECT_DIR$/testers/factorytester.py" />
</list>
<ignored path="lib2wiki.iws" />
<ignored path=".idea/workspace.xml" />
@@ -21,7 +18,8 @@
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="CoverageDataManager">
- <SUITE FILE_PATH="coverage/parser$factorytester.coverage" NAME="factorytester Coverage Results" MODIFIED="1474543643085" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/testers" />
+ <SUITE FILE_PATH="coverage/parser$factorytester.coverage" NAME="factorytester Coverage Results" MODIFIED="1474544553528" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/testers" />
+ <SUITE FILE_PATH="coverage/parser$xml_reader.coverage" NAME="xml_reader Coverage Results" MODIFIED="1474545128115" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/readers" />
</component>
<component name="CreatePatchCommitExecutor">
<option name="PATCH_PATH" value="" />
@@ -32,11 +30,11 @@
</component>
<component name="FileEditorManager">
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
- <file leaf-file-name="xml_reader.py" pinned="false" current-in-tab="false">
+ <file leaf-file-name="xml_reader.py" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/readers/xml_reader.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="170">
- <caret line="10" column="21" selection-start-line="10" selection-start-column="21" selection-end-line="10" selection-end-column="21" />
+ <state relative-caret-position="146">
+ <caret line="26" column="42" selection-start-line="26" selection-start-column="42" selection-end-line="26" selection-end-column="42" />
<folding />
</state>
</provider>
@@ -45,8 +43,8 @@
<file leaf-file-name="INL_xml_parser.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="17">
- <caret line="7" column="79" selection-start-line="7" selection-start-column="79" selection-end-line="7" selection-end-column="79" />
+ <state relative-caret-position="330">
+ <caret line="29" column="38" selection-start-line="29" selection-start-column="38" selection-end-line="29" selection-end-column="38" />
<folding />
</state>
</provider>
@@ -65,8 +63,8 @@
<file leaf-file-name="INL_factory.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/factories/INL_factory.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="211">
- <caret line="78" column="32" selection-start-line="78" selection-start-column="32" selection-end-line="78" selection-end-column="32" />
+ <state relative-caret-position="1384">
+ <caret line="121" column="13" selection-start-line="121" selection-start-column="13" selection-end-line="121" selection-end-column="13" />
<folding>
<element signature="e#0#15#0" expanded="true" />
<element signature="e#1747#2164#0" expanded="false" />
@@ -75,7 +73,7 @@
</provider>
</entry>
</file>
- <file leaf-file-name="person.py" pinned="false" current-in-tab="true">
+ <file leaf-file-name="person.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/entities/person.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="296">
@@ -108,8 +106,8 @@
<file leaf-file-name="factorytester.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/testers/factorytester.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="272">
- <caret line="16" column="23" selection-start-line="16" selection-start-column="23" selection-end-line="16" selection-end-column="23" />
+ <state relative-caret-position="204">
+ <caret line="12" column="4" selection-start-line="12" selection-start-column="4" selection-end-line="12" selection-end-column="45" />
<folding>
<element signature="e#0#38#0" expanded="true" />
</folding>
@@ -144,11 +142,11 @@
<option value="$PROJECT_DIR$/entities/testers/factorytester.py" />
<option value="$PROJECT_DIR$/parsers/__init__.py" />
<option value="$PROJECT_DIR$/entities/location.py" />
- <option value="$PROJECT_DIR$/readers/xml_reader.py" />
<option value="$PROJECT_DIR$/testers/factorytester.py" />
- <option value="$PROJECT_DIR$/parsers/INL_xml_parser.py" />
- <option value="$PROJECT_DIR$/factories/INL_factory.py" />
<option value="$PROJECT_DIR$/entities/person.py" />
+ <option value="$PROJECT_DIR$/factories/INL_factory.py" />
+ <option value="$PROJECT_DIR$/parsers/INL_xml_parser.py" />
+ <option value="$PROJECT_DIR$/readers/xml_reader.py" />
</list>
</option>
</component>
@@ -245,7 +243,7 @@
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
<property name="WebServerToolWindowFactoryState" value="false" />
</component>
- <component name="RunManager" selected="Python.factorytester">
+ <component name="RunManager" selected="Python.xml_reader">
<configuration default="false" name="factorytester" type="PythonConfigurationType" factoryName="Python" temporary="true">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
@@ -264,6 +262,24 @@
<option name="SHOW_COMMAND_LINE" value="false" />
<method />
</configuration>
+ <configuration default="false" name="xml_reader" type="PythonConfigurationType" factoryName="Python" temporary="true">
+ <option name="INTERPRETER_OPTIONS" value="" />
+ <option name="PARENT_ENVS" value="true" />
+ <envs>
+ <env name="PYTHONUNBUFFERED" value="1" />
+ </envs>
+ <option name="SDK_HOME" value="" />
+ <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/readers" />
+ <option name="IS_MODULE_SDK" value="true" />
+ <option name="ADD_CONTENT_ROOTS" value="true" />
+ <option name="ADD_SOURCE_ROOTS" value="true" />
+ <module name="parser" />
+ <EXTENSION ID="PythonCoverageRunConfigurationExtension" enabled="false" sample_coverage="true" runner="coverage.py" />
+ <option name="SCRIPT_NAME" value="$PROJECT_DIR$/readers/xml_reader.py" />
+ <option name="PARAMETERS" value="" />
+ <option name="SHOW_COMMAND_LINE" value="false" />
+ <method />
+ </configuration>
<configuration default="true" type="DjangoTestsConfigurationType" factoryName="Django tests">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
@@ -472,12 +488,14 @@
<option name="USE_KEYWORD" value="false" />
<method />
</configuration>
- <list size="1">
+ <list size="2">
<item index="0" class="java.lang.String" itemvalue="Python.factorytester" />
+ <item index="1" class="java.lang.String" itemvalue="Python.xml_reader" />
</list>
<recent_temporary>
- <list size="1">
- <item index="0" class="java.lang.String" itemvalue="Python.factorytester" />
+ <list size="2">
+ <item index="0" class="java.lang.String" itemvalue="Python.xml_reader" />
+ <item index="1" class="java.lang.String" itemvalue="Python.factorytester" />
</list>
</recent_temporary>
</component>
@@ -548,12 +566,19 @@
<option name="project" value="LOCAL" />
<updated>1474542828452</updated>
</task>
- <option name="localTasksCounter" value="9" />
+ <task id="LOCAL-00009" summary="updates">
+ <created>1474543685903</created>
+ <option name="number" value="00009" />
+ <option name="presentableId" value="LOCAL-00009" />
+ <option name="project" value="LOCAL" />
+ <updated>1474543685903</updated>
+ </task>
+ <option name="localTasksCounter" value="10" />
<servers />
</component>
<component name="ToolWindowManager">
<frame x="-8" y="-8" width="1382" height="744" extended-state="6" />
- <editor active="false" />
+ <editor active="true" />
<layout>
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.25549048" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
@@ -774,14 +799,6 @@
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/parsers/__init__.py">
- <provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="17">
- <caret line="1" column="40" selection-start-line="1" selection-start-column="40" selection-end-line="1" selection-end-column="40" />
- <folding />
- </state>
- </provider>
- </entry>
<entry file="file://$PROJECT_DIR$/entities/basic_entity.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="51">
@@ -798,26 +815,34 @@
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/readers/xml_reader.py">
+ <entry file="file://$PROJECT_DIR$/entities/person.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="170">
- <caret line="10" column="21" selection-start-line="10" selection-start-column="21" selection-end-line="10" selection-end-column="21" />
+ <state relative-caret-position="296">
+ <caret line="68" column="0" selection-start-line="68" selection-start-column="0" selection-end-line="68" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py">
+ <entry file="file://$PROJECT_DIR$/parsers/__init__.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="17">
- <caret line="7" column="79" selection-start-line="7" selection-start-column="79" selection-end-line="7" selection-end-column="79" />
+ <caret line="1" column="40" selection-start-line="1" selection-start-column="40" selection-end-line="1" selection-end-column="40" />
+ <folding />
+ </state>
+ </provider>
+ </entry>
+ <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py">
+ <provider selected="true" editor-type-id="text-editor">
+ <state relative-caret-position="330">
+ <caret line="29" column="38" selection-start-line="29" selection-start-column="38" selection-end-line="29" selection-end-column="38" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/factories/INL_factory.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="211">
- <caret line="78" column="32" selection-start-line="78" selection-start-column="32" selection-end-line="78" selection-end-column="32" />
+ <state relative-caret-position="1384">
+ <caret line="121" column="13" selection-start-line="121" selection-start-column="13" selection-end-line="121" selection-end-column="13" />
<folding>
<element signature="e#0#15#0" expanded="true" />
<element signature="e#1747#2164#0" expanded="false" />
@@ -827,18 +852,18 @@
</entry>
<entry file="file://$PROJECT_DIR$/testers/factorytester.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="272">
- <caret line="16" column="23" selection-start-line="16" selection-start-column="23" selection-end-line="16" selection-end-column="23" />
+ <state relative-caret-position="204">
+ <caret line="12" column="4" selection-start-line="12" selection-start-column="4" selection-end-line="12" selection-end-column="45" />
<folding>
<element signature="e#0#38#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/entities/person.py">
+ <entry file="file://$PROJECT_DIR$/readers/xml_reader.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="296">
- <caret line="68" column="0" selection-start-line="68" selection-start-column="0" selection-end-line="68" selection-end-column="0" />
+ <state relative-caret-position="146">
+ <caret line="26" column="42" selection-start-line="26" selection-start-column="42" selection-end-line="26" selection-end-column="42" />
<folding />
</state>
</provider>
diff --git a/factories/INL_factory.py b/factories/INL_factory.py
index e9838f4..8bf2348 100644
--- a/factories/INL_factory.py
+++ b/factories/INL_factory.py
@@ -120,6 +120,7 @@ class INLFactory(BasicFactory):
comment_list.append(field.text)
return entities.Location(eng_name, types_of_place , name_in_langs, comment_list)
else:
- raise KeyError('Key {} was not recognized for factory {}'.format(entity_keys, type(self)))
+ return None
+ # raise KeyError('Key {} was not recognized for factory {}'.format(entity_keys, type(self)))
diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py
index 3d9b1b7..1a06f6b 100644
--- a/parsers/INL_xml_parser.py
+++ b/parsers/INL_xml_parser.py
@@ -14,11 +14,10 @@ class INLXmlParser:
self.whitelist = whitelist
def clearxml(self):
- newTreeRoot = ET.Element('data')
# # scan the datafields in the records and copy to the new one only the tags in the whitelist
# for record in root: # create new record
- newRecord = ET.SubElement(newTreeRoot, 'record')
+ newRecord = ET.Element('record')
for field in self.reader:
fieldtag = field.attrib.get('tag')
if fieldtag in self.whitelist:
@@ -34,4 +33,4 @@ class INLXmlParser:
newTag.text = data.text
newRecordTree = ET.ElementTree(newRecord)
- return newRecordTree
+ return ET.ElementTree(newRecord)
diff --git a/readers/xml_reader.py b/readers/xml_reader.py
index 3e630cb..ec2c696 100644
--- a/readers/xml_reader.py
+++ b/readers/xml_reader.py
@@ -26,12 +26,18 @@ def read_file(path, element_key):
if element_key in element.tag:
#enter the processing here
record_counter += 1
+
+ for field in element:
+ print(field.tag, field.attrib)
+
#cleaned element is a tree
- cleaned_element = parsers.INLXmlParser(element).clearxml()
+ inl_parser = parsers.INLXmlParser(element)
+ cleaned_element = inl_parser.clearxml()
entity = inl_factory.get_entity(cleaned_element)
#test print the entity
- entity.print_entity()
+ if entity != None:
+ entity.print_entity()
#TODO analys and upload the entity
@@ -43,4 +49,4 @@ def read_file(path, element_key):
if __name__ == '__main__':
- read_file(r"../../NLI-nnl10.xml", 'record')
+ read_file(r"C:/Users/Ilsar/Documents/datahack/NLI-nnl10.xml", 'record')