diff options
author | gilad_ilsar <gandismidas1> | 2016-09-22 14:53:42 +0300 |
---|---|---|
committer | gilad_ilsar <gandismidas1> | 2016-09-22 14:53:42 +0300 |
commit | 6405185cd4136b04b45b3b9d756fdd5d38405f07 (patch) | |
tree | 863942a90ff00ab68d87fc8f41f6b000d46a55e8 | |
parent | 9fe2b6c8bf631a265dec44b5f474b97ad9d277c2 (diff) |
updates
-rw-r--r-- | .idea/workspace.xml | 111 | ||||
-rw-r--r-- | factories/INL_factory.py | 3 | ||||
-rw-r--r-- | parsers/INL_xml_parser.py | 5 | ||||
-rw-r--r-- | readers/xml_reader.py | 12 |
4 files changed, 81 insertions, 50 deletions
diff --git a/.idea/workspace.xml b/.idea/workspace.xml index d837dbc..bf6f1e4 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -3,12 +3,9 @@ <component name="ChangeListManager"> <list default="true" id="1d9b5e9b-4282-4345-b663-d1b92a287a32" name="Default" comment=""> <change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" /> - <change type="MODIFICATION" beforePath="$PROJECT_DIR$/entities/location.py" afterPath="$PROJECT_DIR$/entities/location.py" /> - <change type="MODIFICATION" beforePath="$PROJECT_DIR$/entities/person.py" afterPath="$PROJECT_DIR$/entities/person.py" /> <change type="MODIFICATION" beforePath="$PROJECT_DIR$/factories/INL_factory.py" afterPath="$PROJECT_DIR$/factories/INL_factory.py" /> <change type="MODIFICATION" beforePath="$PROJECT_DIR$/parsers/INL_xml_parser.py" afterPath="$PROJECT_DIR$/parsers/INL_xml_parser.py" /> <change type="MODIFICATION" beforePath="$PROJECT_DIR$/readers/xml_reader.py" afterPath="$PROJECT_DIR$/readers/xml_reader.py" /> - <change type="MODIFICATION" beforePath="$PROJECT_DIR$/testers/factorytester.py" afterPath="$PROJECT_DIR$/testers/factorytester.py" /> </list> <ignored path="lib2wiki.iws" /> <ignored path=".idea/workspace.xml" /> @@ -21,7 +18,8 @@ <option name="LAST_RESOLUTION" value="IGNORE" /> </component> <component name="CoverageDataManager"> - <SUITE FILE_PATH="coverage/parser$factorytester.coverage" NAME="factorytester Coverage Results" MODIFIED="1474543643085" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/testers" /> + <SUITE FILE_PATH="coverage/parser$factorytester.coverage" NAME="factorytester Coverage Results" MODIFIED="1474544553528" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/testers" /> + <SUITE FILE_PATH="coverage/parser$xml_reader.coverage" NAME="xml_reader Coverage Results" MODIFIED="1474545128115" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/readers" /> </component> <component name="CreatePatchCommitExecutor"> <option name="PATCH_PATH" value="" /> @@ -32,11 +30,11 @@ </component> <component name="FileEditorManager"> <leaf SIDE_TABS_SIZE_LIMIT_KEY="300"> - <file leaf-file-name="xml_reader.py" pinned="false" current-in-tab="false"> + <file leaf-file-name="xml_reader.py" pinned="false" current-in-tab="true"> <entry file="file://$PROJECT_DIR$/readers/xml_reader.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="170"> - <caret line="10" column="21" selection-start-line="10" selection-start-column="21" selection-end-line="10" selection-end-column="21" /> + <state relative-caret-position="146"> + <caret line="26" column="42" selection-start-line="26" selection-start-column="42" selection-end-line="26" selection-end-column="42" /> <folding /> </state> </provider> @@ -45,8 +43,8 @@ <file leaf-file-name="INL_xml_parser.py" pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="17"> - <caret line="7" column="79" selection-start-line="7" selection-start-column="79" selection-end-line="7" selection-end-column="79" /> + <state relative-caret-position="330"> + <caret line="29" column="38" selection-start-line="29" selection-start-column="38" selection-end-line="29" selection-end-column="38" /> <folding /> </state> </provider> @@ -65,8 +63,8 @@ <file leaf-file-name="INL_factory.py" pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/factories/INL_factory.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="211"> - <caret line="78" column="32" selection-start-line="78" selection-start-column="32" selection-end-line="78" selection-end-column="32" /> + <state relative-caret-position="1384"> + <caret line="121" column="13" selection-start-line="121" selection-start-column="13" selection-end-line="121" selection-end-column="13" /> <folding> <element signature="e#0#15#0" expanded="true" /> <element signature="e#1747#2164#0" expanded="false" /> @@ -75,7 +73,7 @@ </provider> </entry> </file> - <file leaf-file-name="person.py" pinned="false" current-in-tab="true"> + <file leaf-file-name="person.py" pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/entities/person.py"> <provider selected="true" editor-type-id="text-editor"> <state relative-caret-position="296"> @@ -108,8 +106,8 @@ <file leaf-file-name="factorytester.py" pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/testers/factorytester.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="272"> - <caret line="16" column="23" selection-start-line="16" selection-start-column="23" selection-end-line="16" selection-end-column="23" /> + <state relative-caret-position="204"> + <caret line="12" column="4" selection-start-line="12" selection-start-column="4" selection-end-line="12" selection-end-column="45" /> <folding> <element signature="e#0#38#0" expanded="true" /> </folding> @@ -144,11 +142,11 @@ <option value="$PROJECT_DIR$/entities/testers/factorytester.py" /> <option value="$PROJECT_DIR$/parsers/__init__.py" /> <option value="$PROJECT_DIR$/entities/location.py" /> - <option value="$PROJECT_DIR$/readers/xml_reader.py" /> <option value="$PROJECT_DIR$/testers/factorytester.py" /> - <option value="$PROJECT_DIR$/parsers/INL_xml_parser.py" /> - <option value="$PROJECT_DIR$/factories/INL_factory.py" /> <option value="$PROJECT_DIR$/entities/person.py" /> + <option value="$PROJECT_DIR$/factories/INL_factory.py" /> + <option value="$PROJECT_DIR$/parsers/INL_xml_parser.py" /> + <option value="$PROJECT_DIR$/readers/xml_reader.py" /> </list> </option> </component> @@ -245,7 +243,7 @@ <property name="last_opened_file_path" value="$PROJECT_DIR$" /> <property name="WebServerToolWindowFactoryState" value="false" /> </component> - <component name="RunManager" selected="Python.factorytester"> + <component name="RunManager" selected="Python.xml_reader"> <configuration default="false" name="factorytester" type="PythonConfigurationType" factoryName="Python" temporary="true"> <option name="INTERPRETER_OPTIONS" value="" /> <option name="PARENT_ENVS" value="true" /> @@ -264,6 +262,24 @@ <option name="SHOW_COMMAND_LINE" value="false" /> <method /> </configuration> + <configuration default="false" name="xml_reader" type="PythonConfigurationType" factoryName="Python" temporary="true"> + <option name="INTERPRETER_OPTIONS" value="" /> + <option name="PARENT_ENVS" value="true" /> + <envs> + <env name="PYTHONUNBUFFERED" value="1" /> + </envs> + <option name="SDK_HOME" value="" /> + <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/readers" /> + <option name="IS_MODULE_SDK" value="true" /> + <option name="ADD_CONTENT_ROOTS" value="true" /> + <option name="ADD_SOURCE_ROOTS" value="true" /> + <module name="parser" /> + <EXTENSION ID="PythonCoverageRunConfigurationExtension" enabled="false" sample_coverage="true" runner="coverage.py" /> + <option name="SCRIPT_NAME" value="$PROJECT_DIR$/readers/xml_reader.py" /> + <option name="PARAMETERS" value="" /> + <option name="SHOW_COMMAND_LINE" value="false" /> + <method /> + </configuration> <configuration default="true" type="DjangoTestsConfigurationType" factoryName="Django tests"> <option name="INTERPRETER_OPTIONS" value="" /> <option name="PARENT_ENVS" value="true" /> @@ -472,12 +488,14 @@ <option name="USE_KEYWORD" value="false" /> <method /> </configuration> - <list size="1"> + <list size="2"> <item index="0" class="java.lang.String" itemvalue="Python.factorytester" /> + <item index="1" class="java.lang.String" itemvalue="Python.xml_reader" /> </list> <recent_temporary> - <list size="1"> - <item index="0" class="java.lang.String" itemvalue="Python.factorytester" /> + <list size="2"> + <item index="0" class="java.lang.String" itemvalue="Python.xml_reader" /> + <item index="1" class="java.lang.String" itemvalue="Python.factorytester" /> </list> </recent_temporary> </component> @@ -548,12 +566,19 @@ <option name="project" value="LOCAL" /> <updated>1474542828452</updated> </task> - <option name="localTasksCounter" value="9" /> + <task id="LOCAL-00009" summary="updates"> + <created>1474543685903</created> + <option name="number" value="00009" /> + <option name="presentableId" value="LOCAL-00009" /> + <option name="project" value="LOCAL" /> + <updated>1474543685903</updated> + </task> + <option name="localTasksCounter" value="10" /> <servers /> </component> <component name="ToolWindowManager"> <frame x="-8" y="-8" width="1382" height="744" extended-state="6" /> - <editor active="false" /> + <editor active="true" /> <layout> <window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.25549048" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" /> <window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" /> @@ -774,14 +799,6 @@ </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/parsers/__init__.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="17"> - <caret line="1" column="40" selection-start-line="1" selection-start-column="40" selection-end-line="1" selection-end-column="40" /> - <folding /> - </state> - </provider> - </entry> <entry file="file://$PROJECT_DIR$/entities/basic_entity.py"> <provider selected="true" editor-type-id="text-editor"> <state relative-caret-position="51"> @@ -798,26 +815,34 @@ </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/readers/xml_reader.py"> + <entry file="file://$PROJECT_DIR$/entities/person.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="170"> - <caret line="10" column="21" selection-start-line="10" selection-start-column="21" selection-end-line="10" selection-end-column="21" /> + <state relative-caret-position="296"> + <caret line="68" column="0" selection-start-line="68" selection-start-column="0" selection-end-line="68" selection-end-column="0" /> <folding /> </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py"> + <entry file="file://$PROJECT_DIR$/parsers/__init__.py"> <provider selected="true" editor-type-id="text-editor"> <state relative-caret-position="17"> - <caret line="7" column="79" selection-start-line="7" selection-start-column="79" selection-end-line="7" selection-end-column="79" /> + <caret line="1" column="40" selection-start-line="1" selection-start-column="40" selection-end-line="1" selection-end-column="40" /> + <folding /> + </state> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="330"> + <caret line="29" column="38" selection-start-line="29" selection-start-column="38" selection-end-line="29" selection-end-column="38" /> <folding /> </state> </provider> </entry> <entry file="file://$PROJECT_DIR$/factories/INL_factory.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="211"> - <caret line="78" column="32" selection-start-line="78" selection-start-column="32" selection-end-line="78" selection-end-column="32" /> + <state relative-caret-position="1384"> + <caret line="121" column="13" selection-start-line="121" selection-start-column="13" selection-end-line="121" selection-end-column="13" /> <folding> <element signature="e#0#15#0" expanded="true" /> <element signature="e#1747#2164#0" expanded="false" /> @@ -827,18 +852,18 @@ </entry> <entry file="file://$PROJECT_DIR$/testers/factorytester.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="272"> - <caret line="16" column="23" selection-start-line="16" selection-start-column="23" selection-end-line="16" selection-end-column="23" /> + <state relative-caret-position="204"> + <caret line="12" column="4" selection-start-line="12" selection-start-column="4" selection-end-line="12" selection-end-column="45" /> <folding> <element signature="e#0#38#0" expanded="true" /> </folding> </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/entities/person.py"> + <entry file="file://$PROJECT_DIR$/readers/xml_reader.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="296"> - <caret line="68" column="0" selection-start-line="68" selection-start-column="0" selection-end-line="68" selection-end-column="0" /> + <state relative-caret-position="146"> + <caret line="26" column="42" selection-start-line="26" selection-start-column="42" selection-end-line="26" selection-end-column="42" /> <folding /> </state> </provider> diff --git a/factories/INL_factory.py b/factories/INL_factory.py index e9838f4..8bf2348 100644 --- a/factories/INL_factory.py +++ b/factories/INL_factory.py @@ -120,6 +120,7 @@ class INLFactory(BasicFactory): comment_list.append(field.text) return entities.Location(eng_name, types_of_place , name_in_langs, comment_list) else: - raise KeyError('Key {} was not recognized for factory {}'.format(entity_keys, type(self))) + return None + # raise KeyError('Key {} was not recognized for factory {}'.format(entity_keys, type(self))) diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py index 3d9b1b7..1a06f6b 100644 --- a/parsers/INL_xml_parser.py +++ b/parsers/INL_xml_parser.py @@ -14,11 +14,10 @@ class INLXmlParser: self.whitelist = whitelist def clearxml(self): - newTreeRoot = ET.Element('data') # # scan the datafields in the records and copy to the new one only the tags in the whitelist # for record in root: # create new record - newRecord = ET.SubElement(newTreeRoot, 'record') + newRecord = ET.Element('record') for field in self.reader: fieldtag = field.attrib.get('tag') if fieldtag in self.whitelist: @@ -34,4 +33,4 @@ class INLXmlParser: newTag.text = data.text newRecordTree = ET.ElementTree(newRecord) - return newRecordTree + return ET.ElementTree(newRecord) diff --git a/readers/xml_reader.py b/readers/xml_reader.py index 3e630cb..ec2c696 100644 --- a/readers/xml_reader.py +++ b/readers/xml_reader.py @@ -26,12 +26,18 @@ def read_file(path, element_key): if element_key in element.tag: #enter the processing here record_counter += 1 + + for field in element: + print(field.tag, field.attrib) + #cleaned element is a tree - cleaned_element = parsers.INLXmlParser(element).clearxml() + inl_parser = parsers.INLXmlParser(element) + cleaned_element = inl_parser.clearxml() entity = inl_factory.get_entity(cleaned_element) #test print the entity - entity.print_entity() + if entity != None: + entity.print_entity() #TODO analys and upload the entity @@ -43,4 +49,4 @@ def read_file(path, element_key): if __name__ == '__main__': - read_file(r"../../NLI-nnl10.xml", 'record') + read_file(r"C:/Users/Ilsar/Documents/datahack/NLI-nnl10.xml", 'record') |