summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgilad_ilsar <gandismidas1>2016-09-22 16:34:57 +0300
committergilad_ilsar <gandismidas1>2016-09-22 16:34:57 +0300
commit3615e3968baa5a464a2725e280a7e0f89e3428cf (patch)
tree3300933e3c766c11f773287f76a4c9b4e4a31c87
parentf1dbf8f8085b3471d6db6183819ec7806d28f239 (diff)
parser into csv
-rw-r--r--.idea/workspace.xml164
-rw-r--r--factories/INL_factory.py4
-rw-r--r--libs/json_tools.py5
-rw-r--r--readers/xml_reader.py67
4 files changed, 149 insertions, 91 deletions
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index 26a7a39..f527370 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -3,6 +3,9 @@
<component name="ChangeListManager">
<list default="true" id="1d9b5e9b-4282-4345-b663-d1b92a287a32" name="Default" comment="">
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
+ <change type="MODIFICATION" beforePath="$PROJECT_DIR$/factories/INL_factory.py" afterPath="$PROJECT_DIR$/factories/INL_factory.py" />
+ <change type="MODIFICATION" beforePath="$PROJECT_DIR$/libs/json_tools.py" afterPath="$PROJECT_DIR$/libs/json_tools.py" />
+ <change type="MODIFICATION" beforePath="$PROJECT_DIR$/readers/xml_reader.py" afterPath="$PROJECT_DIR$/readers/xml_reader.py" />
</list>
<ignored path="lib2wiki.iws" />
<ignored path=".idea/workspace.xml" />
@@ -16,7 +19,7 @@
</component>
<component name="CoverageDataManager">
<SUITE FILE_PATH="coverage/parser$factorytester.coverage" NAME="factorytester Coverage Results" MODIFIED="1474544553528" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/testers" />
- <SUITE FILE_PATH="coverage/parser$xml_reader.coverage" NAME="xml_reader Coverage Results" MODIFIED="1474545128115" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/readers" />
+ <SUITE FILE_PATH="coverage/parser$xml_reader.coverage" NAME="xml_reader Coverage Results" MODIFIED="1474551147724" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/readers" />
</component>
<component name="CreatePatchCommitExecutor">
<option name="PATCH_PATH" value="" />
@@ -30,28 +33,30 @@
<file leaf-file-name="xml_reader.py" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/readers/xml_reader.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="-7">
- <caret line="26" column="42" selection-start-line="26" selection-start-column="42" selection-end-line="26" selection-end-column="42" />
- <folding />
+ <state relative-caret-position="136">
+ <caret line="26" column="38" selection-start-line="26" selection-start-column="38" selection-end-line="26" selection-end-column="38" />
+ <folding>
+ <element signature="e#42#53#0" expanded="true" />
+ </folding>
</state>
</provider>
</entry>
</file>
- <file leaf-file-name="INL_xml_parser.py" pinned="false" current-in-tab="false">
- <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py">
+ <file leaf-file-name="__init__.py" pinned="false" current-in-tab="false">
+ <entry file="file://$USER_HOME$/Anaconda3/Lib/json/__init__.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="330">
- <caret line="29" column="38" selection-start-line="29" selection-start-column="38" selection-end-line="29" selection-end-column="38" />
+ <state relative-caret-position="132">
+ <caret line="118" column="12" selection-start-line="118" selection-start-column="9" selection-end-line="118" selection-end-column="12" />
<folding />
</state>
</provider>
</entry>
</file>
- <file leaf-file-name="__init__.py" pinned="false" current-in-tab="false">
- <entry file="file://$PROJECT_DIR$/parsers/__init__.py">
+ <file leaf-file-name="INL_xml_parser.py" pinned="false" current-in-tab="false">
+ <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="17">
- <caret line="1" column="40" selection-start-line="1" selection-start-column="40" selection-end-line="1" selection-end-column="40" />
+ <state relative-caret-position="160">
+ <caret line="19" column="15" selection-start-line="19" selection-start-column="15" selection-end-line="19" selection-end-column="15" />
<folding />
</state>
</provider>
@@ -60,11 +65,11 @@
<file leaf-file-name="INL_factory.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/factories/INL_factory.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="1384">
- <caret line="121" column="13" selection-start-line="121" selection-start-column="13" selection-end-line="121" selection-end-column="13" />
+ <state relative-caret-position="263">
+ <caret line="113" column="44" selection-start-line="113" selection-start-column="44" selection-end-line="113" selection-end-column="44" />
<folding>
<element signature="e#0#15#0" expanded="true" />
- <element signature="e#1747#2164#0" expanded="false" />
+ <element signature="e#1774#2191#0" expanded="false" />
</folding>
</state>
</provider>
@@ -73,8 +78,8 @@
<file leaf-file-name="person.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/entities/person.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="296">
- <caret line="68" column="0" selection-start-line="68" selection-start-column="0" selection-end-line="68" selection-end-column="0" />
+ <state relative-caret-position="234">
+ <caret line="55" column="13" selection-start-line="55" selection-start-column="13" selection-end-line="55" selection-end-column="13" />
<folding />
</state>
</provider>
@@ -84,7 +89,7 @@
<entry file="file://$PROJECT_DIR$/entities/basic_entity.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="51">
- <caret line="3" column="22" selection-start-line="3" selection-start-column="22" selection-end-line="3" selection-end-column="22" />
+ <caret line="3" column="27" selection-start-line="3" selection-start-column="27" selection-end-line="3" selection-end-column="27" />
<folding />
</state>
</provider>
@@ -103,8 +108,8 @@
<file leaf-file-name="factorytester.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/testers/factorytester.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="204">
- <caret line="12" column="4" selection-start-line="12" selection-start-column="4" selection-end-line="12" selection-end-column="45" />
+ <state relative-caret-position="136">
+ <caret line="8" column="17" selection-start-line="8" selection-start-column="17" selection-end-line="8" selection-end-column="17" />
<folding>
<element signature="e#0#38#0" expanded="true" />
</folding>
@@ -112,6 +117,19 @@
</provider>
</entry>
</file>
+ <file leaf-file-name="json_tools.py" pinned="false" current-in-tab="false">
+ <entry file="file://$PROJECT_DIR$/libs/json_tools.py">
+ <provider selected="true" editor-type-id="text-editor">
+ <state relative-caret-position="85">
+ <caret line="5" column="34" selection-start-line="5" selection-start-column="34" selection-end-line="5" selection-end-column="34" />
+ <folding>
+ <marker date="1474549999557" expanded="true" signature="69:104" ph="..." />
+ <marker date="1474549999557" expanded="true" signature="128:189" ph="..." />
+ </folding>
+ </state>
+ </provider>
+ </entry>
+ </file>
</leaf>
</component>
<component name="FileTemplateManagerImpl">
@@ -128,7 +146,6 @@
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/parsers/basic_parser.py" />
- <option value="$PROJECT_DIR$/libs/json_tools.py" />
<option value="$PROJECT_DIR$/factories/basic_factory.py" />
<option value="$PROJECT_DIR$/entities/basic_entity.py" />
<option value="$PROJECT_DIR$/entities/institution.py" />
@@ -141,9 +158,10 @@
<option value="$PROJECT_DIR$/entities/location.py" />
<option value="$PROJECT_DIR$/testers/factorytester.py" />
<option value="$PROJECT_DIR$/entities/person.py" />
- <option value="$PROJECT_DIR$/factories/INL_factory.py" />
<option value="$PROJECT_DIR$/parsers/INL_xml_parser.py" />
+ <option value="$PROJECT_DIR$/libs/json_tools.py" />
<option value="$PROJECT_DIR$/readers/xml_reader.py" />
+ <option value="$PROJECT_DIR$/factories/INL_factory.py" />
</list>
</option>
</component>
@@ -577,7 +595,14 @@
<option name="project" value="LOCAL" />
<updated>1474545222845</updated>
</task>
- <option name="localTasksCounter" value="11" />
+ <task id="LOCAL-00011" summary="updates">
+ <created>1474545328764</created>
+ <option name="number" value="00011" />
+ <option name="presentableId" value="LOCAL-00011" />
+ <option name="project" value="LOCAL" />
+ <updated>1474545328764</updated>
+ </task>
+ <option name="localTasksCounter" value="12" />
<servers />
</component>
<component name="ToolWindowManager">
@@ -588,9 +613,10 @@
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="true" content_ui="tabs" />
<window_info id="Database" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
+ <window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.3298969" sideWeight="0.5" order="10" side_tool="false" content_ui="tabs" />
<window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="8" side_tool="false" content_ui="tabs" />
- <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.32913387" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
+ <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32913387" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="9" side_tool="false" content_ui="tabs" />
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="true" content_ui="tabs" />
@@ -600,7 +626,6 @@
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
- <window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
</layout>
</component>
@@ -624,7 +649,7 @@
</component>
<component name="XDebuggerManager">
<breakpoint-manager>
- <option name="time" value="7" />
+ <option name="time" value="8" />
</breakpoint-manager>
<watches-manager />
</component>
@@ -659,7 +684,7 @@
<caret line="37" column="31" selection-start-line="37" selection-start-column="31" selection-end-line="37" selection-end-column="31" />
<folding>
<element signature="e#0#15#0" expanded="true" />
- <element signature="e#1747#2164#0" expanded="false" />
+ <element signature="e#1774#2191#0" expanded="false" />
</folding>
</state>
</provider>
@@ -720,7 +745,7 @@
<caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding>
<element signature="e#0#15#0" expanded="true" />
- <element signature="e#1747#2164#0" expanded="false" />
+ <element signature="e#1774#2191#0" expanded="false" />
</folding>
</state>
</provider>
@@ -753,6 +778,7 @@
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="255">
<caret line="15" column="5" selection-start-line="15" selection-start-column="5" selection-end-line="15" selection-end-column="5" />
+ <folding />
</state>
</provider>
</entry>
@@ -768,6 +794,7 @@
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="10" selection-start-line="0" selection-start-column="10" selection-end-line="0" selection-end-column="10" />
+ <folding />
</state>
</provider>
</entry>
@@ -787,88 +814,101 @@
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/libs/json_tools.py">
+ <entry file="file://$PROJECT_DIR$/entities/__init__.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="51">
- <caret line="3" column="6" selection-start-line="3" selection-start-column="6" selection-end-line="3" selection-end-column="6" />
+ <state relative-caret-position="34">
+ <caret line="2" column="14" selection-start-line="2" selection-start-column="14" selection-end-line="2" selection-end-column="14" />
<folding />
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/entities/__init__.py">
+ <entry file="file://$PROJECT_DIR$/testers/factorytester.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="34">
- <caret line="2" column="14" selection-start-line="2" selection-start-column="14" selection-end-line="2" selection-end-column="14" />
- <folding />
+ <state relative-caret-position="136">
+ <caret line="8" column="17" selection-start-line="8" selection-start-column="17" selection-end-line="8" selection-end-column="17" />
+ <folding>
+ <element signature="e#0#38#0" expanded="true" />
+ </folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/entities/basic_entity.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="51">
- <caret line="3" column="22" selection-start-line="3" selection-start-column="22" selection-end-line="3" selection-end-column="22" />
+ <caret line="3" column="27" selection-start-line="3" selection-start-column="27" selection-end-line="3" selection-end-column="27" />
<folding />
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/entities/location.py">
+ <entry file="file://$PROJECT_DIR$/parsers/__init__.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="170">
- <caret line="10" column="27" selection-start-line="10" selection-start-column="27" selection-end-line="10" selection-end-column="27" />
+ <state relative-caret-position="17">
+ <caret line="1" column="40" selection-start-line="1" selection-start-column="40" selection-end-line="1" selection-end-column="40" />
<folding />
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/entities/person.py">
+ <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="296">
- <caret line="68" column="0" selection-start-line="68" selection-start-column="0" selection-end-line="68" selection-end-column="0" />
+ <state relative-caret-position="160">
+ <caret line="19" column="15" selection-start-line="19" selection-start-column="15" selection-end-line="19" selection-end-column="15" />
<folding />
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/parsers/__init__.py">
+ <entry file="file://$PROJECT_DIR$/entities/person.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="17">
- <caret line="1" column="40" selection-start-line="1" selection-start-column="40" selection-end-line="1" selection-end-column="40" />
+ <state relative-caret-position="234">
+ <caret line="55" column="13" selection-start-line="55" selection-start-column="13" selection-end-line="55" selection-end-column="13" />
<folding />
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/parsers/INL_xml_parser.py">
+ <entry file="file://$PROJECT_DIR$/libs/json_tools.py">
+ <provider selected="true" editor-type-id="text-editor">
+ <state relative-caret-position="85">
+ <caret line="5" column="34" selection-start-line="5" selection-start-column="34" selection-end-line="5" selection-end-column="34" />
+ <folding>
+ <marker date="1474549999557" expanded="true" signature="69:104" ph="..." />
+ <marker date="1474549999557" expanded="true" signature="128:189" ph="..." />
+ </folding>
+ </state>
+ </provider>
+ </entry>
+ <entry file="file://$USER_HOME$/Anaconda3/Lib/json/__init__.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="330">
- <caret line="29" column="38" selection-start-line="29" selection-start-column="38" selection-end-line="29" selection-end-column="38" />
+ <state relative-caret-position="132">
+ <caret line="118" column="12" selection-start-line="118" selection-start-column="9" selection-end-line="118" selection-end-column="12" />
<folding />
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/factories/INL_factory.py">
+ <entry file="file://$PROJECT_DIR$/entities/location.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="1384">
- <caret line="121" column="13" selection-start-line="121" selection-start-column="13" selection-end-line="121" selection-end-column="13" />
- <folding>
- <element signature="e#0#15#0" expanded="true" />
- <element signature="e#1747#2164#0" expanded="false" />
- </folding>
+ <state relative-caret-position="170">
+ <caret line="10" column="27" selection-start-line="10" selection-start-column="27" selection-end-line="10" selection-end-column="27" />
+ <folding />
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/testers/factorytester.py">
+ <entry file="file://$PROJECT_DIR$/factories/INL_factory.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="204">
- <caret line="12" column="4" selection-start-line="12" selection-start-column="4" selection-end-line="12" selection-end-column="45" />
+ <state relative-caret-position="263">
+ <caret line="113" column="44" selection-start-line="113" selection-start-column="44" selection-end-line="113" selection-end-column="44" />
<folding>
- <element signature="e#0#38#0" expanded="true" />
+ <element signature="e#0#15#0" expanded="true" />
+ <element signature="e#1774#2191#0" expanded="false" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/readers/xml_reader.py">
<provider selected="true" editor-type-id="text-editor">
- <state relative-caret-position="-7">
- <caret line="26" column="42" selection-start-line="26" selection-start-column="42" selection-end-line="26" selection-end-column="42" />
- <folding />
+ <state relative-caret-position="136">
+ <caret line="26" column="38" selection-start-line="26" selection-start-column="38" selection-end-line="26" selection-end-column="38" />
+ <folding>
+ <element signature="e#42#53#0" expanded="true" />
+ </folding>
</state>
</provider>
</entry>
diff --git a/factories/INL_factory.py b/factories/INL_factory.py
index 8bf2348..f4e494f 100644
--- a/factories/INL_factory.py
+++ b/factories/INL_factory.py
@@ -52,6 +52,7 @@ class INLFactory(BasicFactory):
eng_name = ''
date_of_birth = ''
profession = list()
+ name_diff = ''
#get the names and date of birth and bio data
for field in raw_object.getroot():
key = field.attrib.get('tag')
@@ -92,6 +93,9 @@ class INLFactory(BasicFactory):
name_in_langs = dict()
types_of_place = list()
comment_list = list()
+ eng_name = ''
+ name_diff = ''
+
for field in raw_object.getroot():
key = field.attrib.get('tag')
tag = entity_keys.get(key)
diff --git a/libs/json_tools.py b/libs/json_tools.py
index 6354531..5e78d23 100644
--- a/libs/json_tools.py
+++ b/libs/json_tools.py
@@ -3,4 +3,7 @@ import json
class JsonSerializable(object):
def __repr__(self):
- return json.dumps(self.__dict__)
+ return str(self.to_json())
+
+ def to_json(self):
+ return json.dumps(self.__dict__, ensure_ascii=False)
diff --git a/readers/xml_reader.py b/readers/xml_reader.py
index ec2c696..2aaf8c6 100644
--- a/readers/xml_reader.py
+++ b/readers/xml_reader.py
@@ -1,4 +1,6 @@
# from __future__ import absolute_import
+import json
+import csv
import parsers, factories
try:
@@ -6,6 +8,7 @@ try:
except ImportError:
import xml.etree.ElementTree as ET
+CSV_FIELDS = ["name", "biodata", "comments"]
def read_file(path, element_key):
# get an iterable
@@ -20,33 +23,41 @@ def read_file(path, element_key):
#the factory
inl_factory = factories.INLFactory()
-
- for event, element in context:
- if 'end' in event:
- if element_key in element.tag:
- #enter the processing here
- record_counter += 1
-
- for field in element:
- print(field.tag, field.attrib)
-
- #cleaned element is a tree
- inl_parser = parsers.INLXmlParser(element)
- cleaned_element = inl_parser.clearxml()
- entity = inl_factory.get_entity(cleaned_element)
-
- #test print the entity
- if entity != None:
- entity.print_entity()
-
-
- #TODO analys and upload the entity
-
-
- # import pdb; pdb.set_trace()
- print(record_counter, cleaned_element.getroot().tag, '@@@', cleaned_element.getroot().attrib, '@@@', cleaned_element.getroot().text)
- element.clear()
-
-
+ with open('out.csv', 'w', encoding='utf8') as f:
+ writer = csv.DictWriter(f, CSV_FIELDS)
+ writer.writeheader()
+ f667 = open("667.txt", 'w', encoding="utf8")
+ f678 = open("678.txt", 'w', encoding="utf8")
+ for event, element in context:
+ if 'end' in event:
+ if element_key in element.tag:
+ #enter the processing here
+ record_counter += 1
+
+ #cleaned element is a tree
+ inl_parser = parsers.INLXmlParser(element)
+ cleaned_element = inl_parser.clearxml()
+ entity = inl_factory.get_entity(cleaned_element)
+
+
+ #test print the entity
+ if entity != None:
+ json_entity = entity.to_json()
+ print(json_entity)
+ writer.writerow({'name': entity.name, 'biodata': entity.bio_data, 'comments': json.dumps(entity.comments_list, ensure_ascii=False)})
+ # json.dump(entity.comments_list, f667, indent=2, ensure_ascii=False)
+ # json.dump(entity.bio_data, f678, indent=2, ensure_ascii=False)
+
+ # entity.print_entity()
+
+
+ #TODO analys and upload the entity
+
+
+ # import pdb; pdb.set_trace()
+ #print(record_counter, cleaned_element.getroot().tag, '@@@', cleaned_element.getroot().attrib, '@@@', cleaned_element.getroot().text)
+ element.clear()
+ f667.close()
+ f678.close()
if __name__ == '__main__':
read_file(r"C:/Users/Ilsar/Documents/datahack/NLI-nnl10.xml", 'record')