From 41125eb195324d18d9c2c12aa12ecbf66dc5d495 Mon Sep 17 00:00:00 2001 From: roy lewin Date: Sun, 25 Sep 2016 19:15:46 +0300 Subject: WIP: merge changes not previously merged (Commit done by Tzafrir) --- readers/xml_reader.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'readers/xml_reader.py') diff --git a/readers/xml_reader.py b/readers/xml_reader.py index af80e25..5b2d1fd 100644 --- a/readers/xml_reader.py +++ b/readers/xml_reader.py @@ -20,13 +20,13 @@ def read_file(path, element_key): # get the root element event, root = context.__next__() - #the factory + # the factory inl_factory = factories.INLFactory() files = {} for event, element in context: if 'end' in event: if element_key in element.tag: - #enter the processing here + # enter the processing here record_counter += 1 #cleaned element is a tree @@ -34,7 +34,7 @@ def read_file(path, element_key): cleaned_element = inl_parser.clearxml() entity = inl_factory.get_entity(cleaned_element) - #test print the entity + # test print the entity if entity != None: if entity.TYPE not in files: files[entity.TYPE] = open("../out/{}.csv".format(entity.TYPE), 'w+', encoding='utf8') @@ -47,12 +47,15 @@ def read_file(path, element_key): # entity.print_entity() - - #TODO analys and upload the entity + # TODO analys and upload the entity # import pdb; pdb.set_trace() - #print(record_counter, cleaned_element.getroot().tag, '@@@', cleaned_element.getroot().attrib, '@@@', cleaned_element.getroot().text) + print(record_counter, cleaned_element.getroot().tag, '@@@', cleaned_element.getroot().attrib, '@@@', + cleaned_element.getroot().text) element.clear() + print(record_counter) + + if __name__ == '__main__': - read_file(r"C:/Users/Ilsar/Documents/datahack/NLI-nnl10.xml", 'record') + read_file(r"../../NLI-nnl10.xml", 'record') -- cgit v1.2.3