try: import xml.etree.cElementTree as ET except ImportError: import xml.etree.ElementTree as ET KNOWN_FIELD_TAGS = ['100', '110', '151'] class INLXmlParser: def __init__(self, reader, whitelist=None): self.reader = reader self.whitelist = whitelist or KNOWN_FIELD_TAGS <<<<<<< HEAD def clearxml(self): newTreeRoot = ET.Element('data') # # scan the datafields in the records and copy to the new one only the tags in the whitelist # for record in root: # create new record newRecord = ET.SubElement(newTreeRoot, 'record') for field in self.reader: fieldtag = field.attrib.get('tag') if fieldtag in self.whitelist: newFieldTag = fieldtag # tag 700 and 400 are the same if newFieldTag == '700': newFieldTag = '400' newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag}) for data in field: subData = ET.SubElement(newTag, data.tag, data.attrib) subData.text = data.text ======= #expects to get a record as ElementTree def clearxml(self, record): # root == list of tags root = record.getroot() # scan the datafields in the records and copy to the new one only the tags in the whitelist # create new record newRecord = ET.Element('record') for field in root: fieldtag = field.attrib.get('tag') if fieldtag in self.whitelist: tempTag = fieldtag # tag 700 and 400 are the same if tempTag == '700': tempTag = '400' for data in field: newFieldTag = tempTag newFieldTag +='.' newFieldTag += data.attrib.get('code') newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag}) newTag.text = data.text >>>>>>> 081eac29a20ab8485f2b8180654a6d4b808e2df7 newRecordTree = ET.ElementTree(newRecord) return newRecordTree