summaryrefslogtreecommitdiff
path: root/readers/xml_reader.py
diff options
context:
space:
mode:
Diffstat (limited to 'readers/xml_reader.py')
-rw-r--r--readers/xml_reader.py122
1 files changed, 61 insertions, 61 deletions
diff --git a/readers/xml_reader.py b/readers/xml_reader.py
index 5b2d1fd..710899d 100644
--- a/readers/xml_reader.py
+++ b/readers/xml_reader.py
@@ -1,61 +1,61 @@
-# from __future__ import absolute_import
-import json
-import csv
-import parsers, factories
-from entities import Person
-
-try:
- import xml.etree.cElementTree as ET
-except ImportError:
- import xml.etree.ElementTree as ET
-
-def read_file(path, element_key):
- # get an iterable
- record_counter = 0
- context = ET.iterparse(path, events=("start", "end"))
-
- # turn it into an iterator
- context = iter(context)
-
- # get the root element
- event, root = context.__next__()
-
- # the factory
- inl_factory = factories.INLFactory()
- files = {}
- for event, element in context:
- if 'end' in event:
- if element_key in element.tag:
- # enter the processing here
- record_counter += 1
-
- #cleaned element is a tree
- inl_parser = parsers.INLXmlParser(element)
- cleaned_element = inl_parser.clearxml()
- entity = inl_factory.get_entity(cleaned_element)
-
- # test print the entity
- if entity != None:
- if entity.TYPE not in files:
- files[entity.TYPE] = open("../out/{}.csv".format(entity.TYPE), 'w+', encoding='utf8')
- json_entity = entity.to_json()
- print(json_entity)
- writer = csv.DictWriter(files[entity.TYPE], entity.CSV_FIELDS)
- writer.writerow(entity.to_csv_dict())
- # json.dump(entity.comments_list, f667, indent=2, ensure_ascii=False)
- # json.dump(entity.bio_data, f678, indent=2, ensure_ascii=False)
-
- # entity.print_entity()
-
- # TODO analys and upload the entity
-
-
- # import pdb; pdb.set_trace()
- print(record_counter, cleaned_element.getroot().tag, '@@@', cleaned_element.getroot().attrib, '@@@',
- cleaned_element.getroot().text)
- element.clear()
- print(record_counter)
-
-
-if __name__ == '__main__':
- read_file(r"../../NLI-nnl10.xml", 'record')
+# from __future__ import absolute_import
+import json
+import csv
+import parsers, factories
+from entities import Person
+
+try:
+ import xml.etree.cElementTree as ET
+except ImportError:
+ import xml.etree.ElementTree as ET
+
+def read_file(path, element_key):
+ # get an iterable
+ record_counter = 0
+ context = ET.iterparse(path, events=("start", "end"))
+
+ # turn it into an iterator
+ context = iter(context)
+
+ # get the root element
+ event, root = context.__next__()
+
+ # the factory
+ inl_factory = factories.INLFactory()
+ files = {}
+ for event, element in context:
+ if 'end' in event:
+ if element_key in element.tag:
+ # enter the processing here
+ record_counter += 1
+
+ #cleaned element is a tree
+ inl_parser = parsers.INLXmlParser(element)
+ cleaned_element = inl_parser.clearxml()
+ entity = inl_factory.get_entity(cleaned_element)
+
+ # test print the entity
+ if entity != None:
+ if entity.TYPE not in files:
+ files[entity.TYPE] = open("../out/{}.csv".format(entity.TYPE), 'w+', encoding='utf8')
+ json_entity = entity.to_json()
+ print(json_entity)
+ writer = csv.DictWriter(files[entity.TYPE], entity.CSV_FIELDS)
+ writer.writerow(entity.to_csv_dict())
+ # json.dump(entity.comments_list, f667, indent=2, ensure_ascii=False)
+ # json.dump(entity.bio_data, f678, indent=2, ensure_ascii=False)
+
+ # entity.print_entity()
+
+ # TODO analys and upload the entity
+
+
+ # import pdb; pdb.set_trace()
+ print(record_counter, cleaned_element.getroot().tag, '@@@', cleaned_element.getroot().attrib, '@@@',
+ cleaned_element.getroot().text)
+ element.clear()
+ print(record_counter)
+
+
+if __name__ == '__main__':
+ read_file(r"../../NLI-nnl10.xml", 'record')