diff options
Diffstat (limited to 'parsers')
-rw-r--r-- | parsers/INL_xml_parser.py | 33 | ||||
-rw-r--r-- | parsers/__init__.py | 1 |
2 files changed, 30 insertions, 4 deletions
diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py index 660d44b..c23cf20 100644 --- a/parsers/INL_xml_parser.py +++ b/parsers/INL_xml_parser.py @@ -1,11 +1,35 @@ -import xml.etree.cElementTree as ET +try: + import xml.etree.cElementTree as ET +except ImportError: + import xml.etree.ElementTree as ET + +KNOWN_FIELD_TAGS = ['100', '110', '151'] class INLXmlParser: - def __init__(self, lst, path): - self.whitelist = lst - self.xmlpath = path + def __init__(self, reader, whitelist=None): + self.reader = reader + self.whitelist = whitelist or KNOWN_FIELD_TAGS + +<<<<<<< HEAD + def clearxml(self): + newTreeRoot = ET.Element('data') + # # scan the datafields in the records and copy to the new one only the tags in the whitelist + # for record in root: # create new record + newRecord = ET.SubElement(newTreeRoot, 'record') + for field in self.reader: + fieldtag = field.attrib.get('tag') + if fieldtag in self.whitelist: + newFieldTag = fieldtag + # tag 700 and 400 are the same + if newFieldTag == '700': + newFieldTag = '400' + newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag}) + for data in field: + subData = ET.SubElement(newTag, data.tag, data.attrib) + subData.text = data.text +======= #expects to get a record as ElementTree def clearxml(self, record): @@ -28,6 +52,7 @@ class INLXmlParser: newFieldTag += data.attrib.get('code') newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag}) newTag.text = data.text +>>>>>>> 081eac29a20ab8485f2b8180654a6d4b808e2df7 # newRecordTree = ET.ElementTree(newRecord) return newRecord diff --git a/parsers/__init__.py b/parsers/__init__.py index e69de29..e3a246d 100644 --- a/parsers/__init__.py +++ b/parsers/__init__.py @@ -0,0 +1 @@ +from INL_xml_parser import INLXmlParser
\ No newline at end of file |