From bd3956dc019d7f56bfd2cb8b667e8cacf9e80f59 Mon Sep 17 00:00:00 2001 From: roy lewin Date: Thu, 22 Sep 2016 10:00:44 +0300 Subject: Fixed conflicts in xml reader --- .idea/workspace.xml | 89 ++++++++++++++++++++++++++++++++++++++++++----- parsers/INL_xml_parser.py | 29 ++------------- readers/xml_reader.py | 5 +-- 3 files changed, 85 insertions(+), 38 deletions(-) diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 5299b52..f2b732c 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -3,9 +3,11 @@ + + + + + @@ -21,11 +26,40 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + @@ -158,11 +192,11 @@ - + - + + + + + + - + - + - + + + - - @@ -441,5 +496,21 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py index c23cf20..4cd04ef 100644 --- a/parsers/INL_xml_parser.py +++ b/parsers/INL_xml_parser.py @@ -11,7 +11,6 @@ class INLXmlParser: self.reader = reader self.whitelist = whitelist or KNOWN_FIELD_TAGS -<<<<<<< HEAD def clearxml(self): newTreeRoot = ET.Element('data') @@ -29,30 +28,6 @@ class INLXmlParser: for data in field: subData = ET.SubElement(newTag, data.tag, data.attrib) subData.text = data.text -======= - #expects to get a record as ElementTree - def clearxml(self, record): - # root == list of tags - root = record.getroot() - - # scan the datafields in the records and copy to the new one only the tags in the whitelist - # create new record - newRecord = ET.Element('record') - for field in root: - fieldtag = field.attrib.get('tag') - if fieldtag in self.whitelist: - tempTag = fieldtag - # tag 700 and 400 are the same - if tempTag == '700': - tempTag = '400' - for data in field: - newFieldTag = tempTag - newFieldTag +='.' - newFieldTag += data.attrib.get('code') - newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag}) - newTag.text = data.text ->>>>>>> 081eac29a20ab8485f2b8180654a6d4b808e2df7 - - # newRecordTree = ET.ElementTree(newRecord) - return newRecord + newRecordTree = ET.ElementTree(newRecord) + return newRecordTree diff --git a/readers/xml_reader.py b/readers/xml_reader.py index 8a819b0..55a0750 100644 --- a/readers/xml_reader.py +++ b/readers/xml_reader.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import +# from __future__ import absolute_import import parsers try: @@ -23,7 +23,8 @@ def read_file(path, element_key): if element_key in element.tag: record_counter += 1 cleaned_element = parsers.INLXmlParser(element).clearxml() - print record_counter, cleaned_element.getroot().attrib + # import pdb; pdb.set_trace() + print record_counter, cleaned_element.getroot().tag, '@@@', cleaned_element.getroot().attrib, '@@@', cleaned_element.getroot().text element.clear() -- cgit v1.2.3