try: import xml.etree.cElementTree as ET except ImportError: import xml.etree.ElementTree as ET KNOWN_FIELD_TAGS = ['100', '110', '151'] class INLXmlParser: def __init__(self, reader, whitelist=None): self.reader = reader self.whitelist = whitelist or KNOWN_FIELD_TAGS def clearxml(self): newTreeRoot = ET.Element('data') # # scan the datafields in the records and copy to the new one only the tags in the whitelist # for record in root: # create new record newRecord = ET.SubElement(newTreeRoot, 'record') for field in self.reader: fieldtag = field.attrib.get('tag') if fieldtag in self.whitelist: newFieldTag = fieldtag # tag 700 and 400 are the same if newFieldTag == '700': newFieldTag = '400' newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag}) for data in field: subData = ET.SubElement(newTag, data.tag, data.attrib) subData.text = data.text newTree = ET.ElementTree(newTreeRoot) return newTree