diff options
-rw-r--r-- | .idea/workspace.xml | 164 | ||||
-rw-r--r-- | factories/INL_factory.py | 4 | ||||
-rw-r--r-- | libs/json_tools.py | 5 | ||||
-rw-r--r-- | readers/xml_reader.py | 67 |
4 files changed, 149 insertions, 91 deletions
diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 26a7a39..f527370 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -3,6 +3,9 @@ <component name="ChangeListManager"> <list default="true" id="1d9b5e9b-4282-4345-b663-d1b92a287a32" name="Default" comment=""> <change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" /> + <change type="MODIFICATION" beforePath="$PROJECT_DIR$/factories/INL_factory.py" afterPath="$PROJECT_DIR$/factories/INL_factory.py" /> + <change type="MODIFICATION" beforePath="$PROJECT_DIR$/libs/json_tools.py" afterPath="$PROJECT_DIR$/libs/json_tools.py" /> + <change type="MODIFICATION" beforePath="$PROJECT_DIR$/readers/xml_reader.py" afterPath="$PROJECT_DIR$/readers/xml_reader.py" /> </list> <ignored path="lib2wiki.iws" /> <ignored path=".idea/workspace.xml" /> @@ -16,7 +19,7 @@ </component> <component name="CoverageDataManager"> <SUITE FILE_PATH="coverage/parser$factorytester.coverage" NAME="factorytester Coverage Results" MODIFIED="1474544553528" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/testers" /> - <SUITE FILE_PATH="coverage/parser$xml_reader.coverage" NAME="xml_reader Coverage Results" MODIFIED="1474545128115" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/readers" /> + <SUITE FILE_PATH="coverage/parser$xml_reader.coverage" NAME="xml_reader Coverage Results" MODIFIED="1474551147724" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/readers" /> </component> <component name="CreatePatchCommitExecutor"> <option name="PATCH_PATH" value="" /> @@ -30,28 +33,30 @@ <file leaf-file-name="xml_reader.py" pinned="false" current-in-tab="true"> <entry file="file://$PROJECT_DIR$/readers/xml_reader.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="-7"> - <caret line="26" column="42" selection-start-line="26" selection-start-column="42" selection-end-line="26" selection-end-column="42" /> - <folding /> + <state relative-caret-position="136"> + <caret line="26" column="38" selection-start-line="26" selection-start-column="38" selection-end-line="26" selection-end-column="38" /> + <folding> + <element signature="e#42#53#0" expanded="true" /> + </folding> </state> </provider> </entry> </file> - <file leaf-file-name="INL_xml_parser.py" pinned="false" current-in-tab="false"> - <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py"> + <file leaf-file-name="__init__.py" pinned="false" current-in-tab="false"> + <entry file="file://$USER_HOME$/Anaconda3/Lib/json/__init__.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="330"> - <caret line="29" column="38" selection-start-line="29" selection-start-column="38" selection-end-line="29" selection-end-column="38" /> + <state relative-caret-position="132"> + <caret line="118" column="12" selection-start-line="118" selection-start-column="9" selection-end-line="118" selection-end-column="12" /> <folding /> </state> </provider> </entry> </file> - <file leaf-file-name="__init__.py" pinned="false" current-in-tab="false"> - <entry file="file://$PROJECT_DIR$/parsers/__init__.py"> + <file leaf-file-name="INL_xml_parser.py" pinned="false" current-in-tab="false"> + <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="17"> - <caret line="1" column="40" selection-start-line="1" selection-start-column="40" selection-end-line="1" selection-end-column="40" /> + <state relative-caret-position="160"> + <caret line="19" column="15" selection-start-line="19" selection-start-column="15" selection-end-line="19" selection-end-column="15" /> <folding /> </state> </provider> @@ -60,11 +65,11 @@ <file leaf-file-name="INL_factory.py" pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/factories/INL_factory.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="1384"> - <caret line="121" column="13" selection-start-line="121" selection-start-column="13" selection-end-line="121" selection-end-column="13" /> + <state relative-caret-position="263"> + <caret line="113" column="44" selection-start-line="113" selection-start-column="44" selection-end-line="113" selection-end-column="44" /> <folding> <element signature="e#0#15#0" expanded="true" /> - <element signature="e#1747#2164#0" expanded="false" /> + <element signature="e#1774#2191#0" expanded="false" /> </folding> </state> </provider> @@ -73,8 +78,8 @@ <file leaf-file-name="person.py" pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/entities/person.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="296"> - <caret line="68" column="0" selection-start-line="68" selection-start-column="0" selection-end-line="68" selection-end-column="0" /> + <state relative-caret-position="234"> + <caret line="55" column="13" selection-start-line="55" selection-start-column="13" selection-end-line="55" selection-end-column="13" /> <folding /> </state> </provider> @@ -84,7 +89,7 @@ <entry file="file://$PROJECT_DIR$/entities/basic_entity.py"> <provider selected="true" editor-type-id="text-editor"> <state relative-caret-position="51"> - <caret line="3" column="22" selection-start-line="3" selection-start-column="22" selection-end-line="3" selection-end-column="22" /> + <caret line="3" column="27" selection-start-line="3" selection-start-column="27" selection-end-line="3" selection-end-column="27" /> <folding /> </state> </provider> @@ -103,8 +108,8 @@ <file leaf-file-name="factorytester.py" pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/testers/factorytester.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="204"> - <caret line="12" column="4" selection-start-line="12" selection-start-column="4" selection-end-line="12" selection-end-column="45" /> + <state relative-caret-position="136"> + <caret line="8" column="17" selection-start-line="8" selection-start-column="17" selection-end-line="8" selection-end-column="17" /> <folding> <element signature="e#0#38#0" expanded="true" /> </folding> @@ -112,6 +117,19 @@ </provider> </entry> </file> + <file leaf-file-name="json_tools.py" pinned="false" current-in-tab="false"> + <entry file="file://$PROJECT_DIR$/libs/json_tools.py"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="85"> + <caret line="5" column="34" selection-start-line="5" selection-start-column="34" selection-end-line="5" selection-end-column="34" /> + <folding> + <marker date="1474549999557" expanded="true" signature="69:104" ph="..." /> + <marker date="1474549999557" expanded="true" signature="128:189" ph="..." /> + </folding> + </state> + </provider> + </entry> + </file> </leaf> </component> <component name="FileTemplateManagerImpl"> @@ -128,7 +146,6 @@ <option name="CHANGED_PATHS"> <list> <option value="$PROJECT_DIR$/parsers/basic_parser.py" /> - <option value="$PROJECT_DIR$/libs/json_tools.py" /> <option value="$PROJECT_DIR$/factories/basic_factory.py" /> <option value="$PROJECT_DIR$/entities/basic_entity.py" /> <option value="$PROJECT_DIR$/entities/institution.py" /> @@ -141,9 +158,10 @@ <option value="$PROJECT_DIR$/entities/location.py" /> <option value="$PROJECT_DIR$/testers/factorytester.py" /> <option value="$PROJECT_DIR$/entities/person.py" /> - <option value="$PROJECT_DIR$/factories/INL_factory.py" /> <option value="$PROJECT_DIR$/parsers/INL_xml_parser.py" /> + <option value="$PROJECT_DIR$/libs/json_tools.py" /> <option value="$PROJECT_DIR$/readers/xml_reader.py" /> + <option value="$PROJECT_DIR$/factories/INL_factory.py" /> </list> </option> </component> @@ -577,7 +595,14 @@ <option name="project" value="LOCAL" /> <updated>1474545222845</updated> </task> - <option name="localTasksCounter" value="11" /> + <task id="LOCAL-00011" summary="updates"> + <created>1474545328764</created> + <option name="number" value="00011" /> + <option name="presentableId" value="LOCAL-00011" /> + <option name="project" value="LOCAL" /> + <updated>1474545328764</updated> + </task> + <option name="localTasksCounter" value="12" /> <servers /> </component> <component name="ToolWindowManager"> @@ -588,9 +613,10 @@ <window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" /> <window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="true" content_ui="tabs" /> <window_info id="Database" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" /> + <window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" /> <window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.3298969" sideWeight="0.5" order="10" side_tool="false" content_ui="tabs" /> <window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="8" side_tool="false" content_ui="tabs" /> - <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.32913387" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" /> + <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32913387" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" /> <window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" /> <window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="9" side_tool="false" content_ui="tabs" /> <window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="true" content_ui="tabs" /> @@ -600,7 +626,6 @@ <window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" /> <window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" /> <window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" /> - <window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" /> <window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" /> </layout> </component> @@ -624,7 +649,7 @@ </component> <component name="XDebuggerManager"> <breakpoint-manager> - <option name="time" value="7" /> + <option name="time" value="8" /> </breakpoint-manager> <watches-manager /> </component> @@ -659,7 +684,7 @@ <caret line="37" column="31" selection-start-line="37" selection-start-column="31" selection-end-line="37" selection-end-column="31" /> <folding> <element signature="e#0#15#0" expanded="true" /> - <element signature="e#1747#2164#0" expanded="false" /> + <element signature="e#1774#2191#0" expanded="false" /> </folding> </state> </provider> @@ -720,7 +745,7 @@ <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" /> <folding> <element signature="e#0#15#0" expanded="true" /> - <element signature="e#1747#2164#0" expanded="false" /> + <element signature="e#1774#2191#0" expanded="false" /> </folding> </state> </provider> @@ -753,6 +778,7 @@ <provider selected="true" editor-type-id="text-editor"> <state relative-caret-position="255"> <caret line="15" column="5" selection-start-line="15" selection-start-column="5" selection-end-line="15" selection-end-column="5" /> + <folding /> </state> </provider> </entry> @@ -768,6 +794,7 @@ <provider selected="true" editor-type-id="text-editor"> <state relative-caret-position="0"> <caret line="0" column="10" selection-start-line="0" selection-start-column="10" selection-end-line="0" selection-end-column="10" /> + <folding /> </state> </provider> </entry> @@ -787,88 +814,101 @@ </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/libs/json_tools.py"> + <entry file="file://$PROJECT_DIR$/entities/__init__.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="51"> - <caret line="3" column="6" selection-start-line="3" selection-start-column="6" selection-end-line="3" selection-end-column="6" /> + <state relative-caret-position="34"> + <caret line="2" column="14" selection-start-line="2" selection-start-column="14" selection-end-line="2" selection-end-column="14" /> <folding /> </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/entities/__init__.py"> + <entry file="file://$PROJECT_DIR$/testers/factorytester.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="34"> - <caret line="2" column="14" selection-start-line="2" selection-start-column="14" selection-end-line="2" selection-end-column="14" /> - <folding /> + <state relative-caret-position="136"> + <caret line="8" column="17" selection-start-line="8" selection-start-column="17" selection-end-line="8" selection-end-column="17" /> + <folding> + <element signature="e#0#38#0" expanded="true" /> + </folding> </state> </provider> </entry> <entry file="file://$PROJECT_DIR$/entities/basic_entity.py"> <provider selected="true" editor-type-id="text-editor"> <state relative-caret-position="51"> - <caret line="3" column="22" selection-start-line="3" selection-start-column="22" selection-end-line="3" selection-end-column="22" /> + <caret line="3" column="27" selection-start-line="3" selection-start-column="27" selection-end-line="3" selection-end-column="27" /> <folding /> </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/entities/location.py"> + <entry file="file://$PROJECT_DIR$/parsers/__init__.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="170"> - <caret line="10" column="27" selection-start-line="10" selection-start-column="27" selection-end-line="10" selection-end-column="27" /> + <state relative-caret-position="17"> + <caret line="1" column="40" selection-start-line="1" selection-start-column="40" selection-end-line="1" selection-end-column="40" /> <folding /> </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/entities/person.py"> + <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="296"> - <caret line="68" column="0" selection-start-line="68" selection-start-column="0" selection-end-line="68" selection-end-column="0" /> + <state relative-caret-position="160"> + <caret line="19" column="15" selection-start-line="19" selection-start-column="15" selection-end-line="19" selection-end-column="15" /> <folding /> </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/parsers/__init__.py"> + <entry file="file://$PROJECT_DIR$/entities/person.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="17"> - <caret line="1" column="40" selection-start-line="1" selection-start-column="40" selection-end-line="1" selection-end-column="40" /> + <state relative-caret-position="234"> + <caret line="55" column="13" selection-start-line="55" selection-start-column="13" selection-end-line="55" selection-end-column="13" /> <folding /> </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py"> + <entry file="file://$PROJECT_DIR$/libs/json_tools.py"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="85"> + <caret line="5" column="34" selection-start-line="5" selection-start-column="34" selection-end-line="5" selection-end-column="34" /> + <folding> + <marker date="1474549999557" expanded="true" signature="69:104" ph="..." /> + <marker date="1474549999557" expanded="true" signature="128:189" ph="..." /> + </folding> + </state> + </provider> + </entry> + <entry file="file://$USER_HOME$/Anaconda3/Lib/json/__init__.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="330"> - <caret line="29" column="38" selection-start-line="29" selection-start-column="38" selection-end-line="29" selection-end-column="38" /> + <state relative-caret-position="132"> + <caret line="118" column="12" selection-start-line="118" selection-start-column="9" selection-end-line="118" selection-end-column="12" /> <folding /> </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/factories/INL_factory.py"> + <entry file="file://$PROJECT_DIR$/entities/location.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="1384"> - <caret line="121" column="13" selection-start-line="121" selection-start-column="13" selection-end-line="121" selection-end-column="13" /> - <folding> - <element signature="e#0#15#0" expanded="true" /> - <element signature="e#1747#2164#0" expanded="false" /> - </folding> + <state relative-caret-position="170"> + <caret line="10" column="27" selection-start-line="10" selection-start-column="27" selection-end-line="10" selection-end-column="27" /> + <folding /> </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/testers/factorytester.py"> + <entry file="file://$PROJECT_DIR$/factories/INL_factory.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="204"> - <caret line="12" column="4" selection-start-line="12" selection-start-column="4" selection-end-line="12" selection-end-column="45" /> + <state relative-caret-position="263"> + <caret line="113" column="44" selection-start-line="113" selection-start-column="44" selection-end-line="113" selection-end-column="44" /> <folding> - <element signature="e#0#38#0" expanded="true" /> + <element signature="e#0#15#0" expanded="true" /> + <element signature="e#1774#2191#0" expanded="false" /> </folding> </state> </provider> </entry> <entry file="file://$PROJECT_DIR$/readers/xml_reader.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="-7"> - <caret line="26" column="42" selection-start-line="26" selection-start-column="42" selection-end-line="26" selection-end-column="42" /> - <folding /> + <state relative-caret-position="136"> + <caret line="26" column="38" selection-start-line="26" selection-start-column="38" selection-end-line="26" selection-end-column="38" /> + <folding> + <element signature="e#42#53#0" expanded="true" /> + </folding> </state> </provider> </entry> diff --git a/factories/INL_factory.py b/factories/INL_factory.py index 8bf2348..f4e494f 100644 --- a/factories/INL_factory.py +++ b/factories/INL_factory.py @@ -52,6 +52,7 @@ class INLFactory(BasicFactory): eng_name = '' date_of_birth = '' profession = list() + name_diff = '' #get the names and date of birth and bio data for field in raw_object.getroot(): key = field.attrib.get('tag') @@ -92,6 +93,9 @@ class INLFactory(BasicFactory): name_in_langs = dict() types_of_place = list() comment_list = list() + eng_name = '' + name_diff = '' + for field in raw_object.getroot(): key = field.attrib.get('tag') tag = entity_keys.get(key) diff --git a/libs/json_tools.py b/libs/json_tools.py index 6354531..5e78d23 100644 --- a/libs/json_tools.py +++ b/libs/json_tools.py @@ -3,4 +3,7 @@ import json class JsonSerializable(object): def __repr__(self): - return json.dumps(self.__dict__) + return str(self.to_json()) + + def to_json(self): + return json.dumps(self.__dict__, ensure_ascii=False) diff --git a/readers/xml_reader.py b/readers/xml_reader.py index ec2c696..2aaf8c6 100644 --- a/readers/xml_reader.py +++ b/readers/xml_reader.py @@ -1,4 +1,6 @@ # from __future__ import absolute_import +import json +import csv import parsers, factories try: @@ -6,6 +8,7 @@ try: except ImportError: import xml.etree.ElementTree as ET +CSV_FIELDS = ["name", "biodata", "comments"] def read_file(path, element_key): # get an iterable @@ -20,33 +23,41 @@ def read_file(path, element_key): #the factory inl_factory = factories.INLFactory() - - for event, element in context: - if 'end' in event: - if element_key in element.tag: - #enter the processing here - record_counter += 1 - - for field in element: - print(field.tag, field.attrib) - - #cleaned element is a tree - inl_parser = parsers.INLXmlParser(element) - cleaned_element = inl_parser.clearxml() - entity = inl_factory.get_entity(cleaned_element) - - #test print the entity - if entity != None: - entity.print_entity() - - - #TODO analys and upload the entity - - - # import pdb; pdb.set_trace() - print(record_counter, cleaned_element.getroot().tag, '@@@', cleaned_element.getroot().attrib, '@@@', cleaned_element.getroot().text) - element.clear() - - + with open('out.csv', 'w', encoding='utf8') as f: + writer = csv.DictWriter(f, CSV_FIELDS) + writer.writeheader() + f667 = open("667.txt", 'w', encoding="utf8") + f678 = open("678.txt", 'w', encoding="utf8") + for event, element in context: + if 'end' in event: + if element_key in element.tag: + #enter the processing here + record_counter += 1 + + #cleaned element is a tree + inl_parser = parsers.INLXmlParser(element) + cleaned_element = inl_parser.clearxml() + entity = inl_factory.get_entity(cleaned_element) + + + #test print the entity + if entity != None: + json_entity = entity.to_json() + print(json_entity) + writer.writerow({'name': entity.name, 'biodata': entity.bio_data, 'comments': json.dumps(entity.comments_list, ensure_ascii=False)}) + # json.dump(entity.comments_list, f667, indent=2, ensure_ascii=False) + # json.dump(entity.bio_data, f678, indent=2, ensure_ascii=False) + + # entity.print_entity() + + + #TODO analys and upload the entity + + + # import pdb; pdb.set_trace() + #print(record_counter, cleaned_element.getroot().tag, '@@@', cleaned_element.getroot().attrib, '@@@', cleaned_element.getroot().text) + element.clear() + f667.close() + f678.close() if __name__ == '__main__': read_file(r"C:/Users/Ilsar/Documents/datahack/NLI-nnl10.xml", 'record') |