diff options
Diffstat (limited to 'parsers/INL_xml_parser.py')
-rw-r--r-- | parsers/INL_xml_parser.py | 41 |
1 files changed, 20 insertions, 21 deletions
diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py index 2ea9a9b..671d2d9 100644 --- a/parsers/INL_xml_parser.py +++ b/parsers/INL_xml_parser.py @@ -6,29 +6,28 @@ class INLXmlParser: self.whitelist = lst self.xmlpath = path - def clearxml(self): - xmltree = ET.parse(self.xmlpath) - # root == list of records - root = xmltree.getroot() - # create new data - newTreeRoot = ET.Element('data') + #expects to get a record as ElementTree + def clearxml(self, record): + # root == list of tags + root = record.getroot() # scan the datafields in the records and copy to the new one only the tags in the whitelist - for record in root: - # create new record - newRecord = ET.SubElement(newTreeRoot, 'record') - for field in record: - fieldtag = field.attrib.get('tag') - if fieldtag in self.whitelist: - newFieldTag = fieldtag - # tag 700 and 400 are the same - if newFieldTag == '700': - newFieldTag = '400' + # create new record + newRecord = ET.Element('record') + for field in root: + fieldtag = field.attrib.get('tag') + if fieldtag in self.whitelist: + tempTag = fieldtag + # tag 700 and 400 are the same + if tempTag == '700': + tempTag = '400' + for data in field: + newFieldTag = tempTag + newFieldTag +='.' + newFieldTag += data.attrib.get('code') newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag}) - for data in field: - subData = ET.SubElement(newTag, data.tag, data.attrib) - subData.text = data.text + newTag.text = data.text - newTree = ET.ElementTree(newTreeRoot) - return newTree + newRecordTree = ET.ElementTree(newRecord) + return newRecordTree |