From d646c9a42273e98c85602f5618598125007bbfaa Mon Sep 17 00:00:00 2001 From: Tzafrir Cohen Date: Sun, 25 Sep 2016 20:28:16 +0300 Subject: WIP: commit all files that were changed --- parsers/INL_xml_parser.py | 72 +++++++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 36 deletions(-) (limited to 'parsers/INL_xml_parser.py') diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py index 1a06f6b..cdde5a8 100644 --- a/parsers/INL_xml_parser.py +++ b/parsers/INL_xml_parser.py @@ -1,36 +1,36 @@ -try: - import xml.etree.cElementTree as ET -except ImportError: - import xml.etree.ElementTree as ET - -KNOWN_FIELD_TAGS = ['100', '110', '151'] - -TAG_WHITELIST = ['100', '400', '700', '678', '667', '151', '550', '451', '374'] - -class INLXmlParser: - def __init__(self, reader, whitelist=TAG_WHITELIST): - self.reader = reader - #self.whitelist = whitelist or KNOWN_FIELD_TAGS - self.whitelist = whitelist - - def clearxml(self): - - # # scan the datafields in the records and copy to the new one only the tags in the whitelist - # for record in root: # create new record - newRecord = ET.Element('record') - for field in self.reader: - fieldtag = field.attrib.get('tag') - if fieldtag in self.whitelist: - temptag = fieldtag - # tag 700 and 400 are the same - if temptag == '700': - temptag = '400' - for data in field: - newFieldTag = temptag - newFieldTag += '.' - newFieldTag += data.attrib.get('code') - newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag}) - newTag.text = data.text - - newRecordTree = ET.ElementTree(newRecord) - return ET.ElementTree(newRecord) +try: + import xml.etree.cElementTree as ET +except ImportError: + import xml.etree.ElementTree as ET + +KNOWN_FIELD_TAGS = ['100', '110', '151'] + +TAG_WHITELIST = ['100', '400', '700', '678', '667', '151', '550', '451', '374'] + +class INLXmlParser: + def __init__(self, reader, whitelist=TAG_WHITELIST): + self.reader = reader + #self.whitelist = whitelist or KNOWN_FIELD_TAGS + self.whitelist = whitelist + + def clearxml(self): + + # # scan the datafields in the records and copy to the new one only the tags in the whitelist + # for record in root: # create new record + newRecord = ET.Element('record') + for field in self.reader: + fieldtag = field.attrib.get('tag') + if fieldtag in self.whitelist: + temptag = fieldtag + # tag 700 and 400 are the same + if temptag == '700': + temptag = '400' + for data in field: + newFieldTag = temptag + newFieldTag += '.' + newFieldTag += data.attrib.get('code') + newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag}) + newTag.text = data.text + + newRecordTree = ET.ElementTree(newRecord) + return ET.ElementTree(newRecord) -- cgit v1.2.3