import xml.etree.cElementTree as ET class INLXmlParser: def __init__(self, lst, path): self.whitelist = lst self.xmlpath = path def clearxml(self): xmltree = ET.parse(self.xmlpath) # root == list of records root = xmltree.getroot() # create new data newTreeRoot = ET.Element('data') # scan the datafields in the records and copy to the new one only the tags in the whitelist for record in root: # create new record newRecord = ET.SubElement(newTreeRoot, 'record') for field in record: fieldtag = field.attrib.get('tag') if fieldtag in self.whitelist: newFieldTag = fieldtag # tag 700 and 400 are the same if newFieldTag == '700': newFieldTag = '400' newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag}) for data in field: subData = ET.SubElement(newTag, data.tag, data.attrib) subData.text = data.text newTree = ET.ElementTree(newTreeRoot) return newTree