import xml.etree.cElementTree as ET class INLXmlParser: def __init__(self, lst, path): self.whitelist = lst self.xmlpath = path #expects to get a record as ElementTree def clearxml(self, record): # root == list of tags root = record.getroot() # scan the datafields in the records and copy to the new one only the tags in the whitelist # create new record newRecord = ET.Element('record') for field in root: fieldtag = field.attrib.get('tag') if fieldtag in self.whitelist: tempTag = fieldtag # tag 700 and 400 are the same if tempTag == '700': tempTag = '400' for data in field: newFieldTag = tempTag newFieldTag +='.' newFieldTag += data.attrib.get('code') newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag}) newTag.text = data.text newRecordTree = ET.ElementTree(newRecord) return newRecordTree