summaryrefslogtreecommitdiff
path: root/parsers/INL_xml_parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'parsers/INL_xml_parser.py')
-rw-r--r--parsers/INL_xml_parser.py47
1 files changed, 23 insertions, 24 deletions
diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py
index 2ea9a9b..512d46e 100644
--- a/parsers/INL_xml_parser.py
+++ b/parsers/INL_xml_parser.py
@@ -1,34 +1,33 @@
-import xml.etree.cElementTree as ET
+try:
+ import xml.etree.cElementTree as ET
+except ImportError:
+ import xml.etree.ElementTree as ET
+
+KNOWN_FIELD_TAGS = ['100', '110', '151']
class INLXmlParser:
- def __init__(self, lst, path):
- self.whitelist = lst
- self.xmlpath = path
+ def __init__(self, reader, whitelist=None):
+ self.reader = reader
+ self.whitelist = whitelist or KNOWN_FIELD_TAGS
def clearxml(self):
- xmltree = ET.parse(self.xmlpath)
- # root == list of records
- root = xmltree.getroot()
-
- # create new data
newTreeRoot = ET.Element('data')
- # scan the datafields in the records and copy to the new one only the tags in the whitelist
- for record in root:
- # create new record
- newRecord = ET.SubElement(newTreeRoot, 'record')
- for field in record:
- fieldtag = field.attrib.get('tag')
- if fieldtag in self.whitelist:
- newFieldTag = fieldtag
- # tag 700 and 400 are the same
- if newFieldTag == '700':
- newFieldTag = '400'
- newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag})
- for data in field:
- subData = ET.SubElement(newTag, data.tag, data.attrib)
- subData.text = data.text
+ # # scan the datafields in the records and copy to the new one only the tags in the whitelist
+ # for record in root: # create new record
+ newRecord = ET.SubElement(newTreeRoot, 'record')
+ for field in self.reader:
+ fieldtag = field.attrib.get('tag')
+ if fieldtag in self.whitelist:
+ newFieldTag = fieldtag
+ # tag 700 and 400 are the same
+ if newFieldTag == '700':
+ newFieldTag = '400'
+ newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag})
+ for data in field:
+ subData = ET.SubElement(newTag, data.tag, data.attrib)
+ subData.text = data.text
newTree = ET.ElementTree(newTreeRoot)
return newTree