diff options
Diffstat (limited to 'parsers/INL_xml_parser.py')
-rw-r--r-- | parsers/INL_xml_parser.py | 72 |
1 files changed, 36 insertions, 36 deletions
diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py index cdde5a8..1a06f6b 100644 --- a/parsers/INL_xml_parser.py +++ b/parsers/INL_xml_parser.py @@ -1,36 +1,36 @@ -try:
- import xml.etree.cElementTree as ET
-except ImportError:
- import xml.etree.ElementTree as ET
-
-KNOWN_FIELD_TAGS = ['100', '110', '151']
-
-TAG_WHITELIST = ['100', '400', '700', '678', '667', '151', '550', '451', '374']
-
-class INLXmlParser:
- def __init__(self, reader, whitelist=TAG_WHITELIST):
- self.reader = reader
- #self.whitelist = whitelist or KNOWN_FIELD_TAGS
- self.whitelist = whitelist
-
- def clearxml(self):
-
- # # scan the datafields in the records and copy to the new one only the tags in the whitelist
- # for record in root: # create new record
- newRecord = ET.Element('record')
- for field in self.reader:
- fieldtag = field.attrib.get('tag')
- if fieldtag in self.whitelist:
- temptag = fieldtag
- # tag 700 and 400 are the same
- if temptag == '700':
- temptag = '400'
- for data in field:
- newFieldTag = temptag
- newFieldTag += '.'
- newFieldTag += data.attrib.get('code')
- newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag})
- newTag.text = data.text
-
- newRecordTree = ET.ElementTree(newRecord)
- return ET.ElementTree(newRecord)
+try: + import xml.etree.cElementTree as ET +except ImportError: + import xml.etree.ElementTree as ET + +KNOWN_FIELD_TAGS = ['100', '110', '151'] + +TAG_WHITELIST = ['100', '400', '700', '678', '667', '151', '550', '451', '374'] + +class INLXmlParser: + def __init__(self, reader, whitelist=TAG_WHITELIST): + self.reader = reader + #self.whitelist = whitelist or KNOWN_FIELD_TAGS + self.whitelist = whitelist + + def clearxml(self): + + # # scan the datafields in the records and copy to the new one only the tags in the whitelist + # for record in root: # create new record + newRecord = ET.Element('record') + for field in self.reader: + fieldtag = field.attrib.get('tag') + if fieldtag in self.whitelist: + temptag = fieldtag + # tag 700 and 400 are the same + if temptag == '700': + temptag = '400' + for data in field: + newFieldTag = temptag + newFieldTag += '.' + newFieldTag += data.attrib.get('code') + newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag}) + newTag.text = data.text + + newRecordTree = ET.ElementTree(newRecord) + return ET.ElementTree(newRecord) |