diff options
Diffstat (limited to 'parsers')
-rw-r--r-- | parsers/INL_xml_parser.py | 72 | ||||
-rw-r--r-- | parsers/__init__.py | 2 | ||||
-rw-r--r-- | parsers/basic_parser.py | 12 |
3 files changed, 43 insertions, 43 deletions
diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py index 1a06f6b..cdde5a8 100644 --- a/parsers/INL_xml_parser.py +++ b/parsers/INL_xml_parser.py @@ -1,36 +1,36 @@ -try: - import xml.etree.cElementTree as ET -except ImportError: - import xml.etree.ElementTree as ET - -KNOWN_FIELD_TAGS = ['100', '110', '151'] - -TAG_WHITELIST = ['100', '400', '700', '678', '667', '151', '550', '451', '374'] - -class INLXmlParser: - def __init__(self, reader, whitelist=TAG_WHITELIST): - self.reader = reader - #self.whitelist = whitelist or KNOWN_FIELD_TAGS - self.whitelist = whitelist - - def clearxml(self): - - # # scan the datafields in the records and copy to the new one only the tags in the whitelist - # for record in root: # create new record - newRecord = ET.Element('record') - for field in self.reader: - fieldtag = field.attrib.get('tag') - if fieldtag in self.whitelist: - temptag = fieldtag - # tag 700 and 400 are the same - if temptag == '700': - temptag = '400' - for data in field: - newFieldTag = temptag - newFieldTag += '.' - newFieldTag += data.attrib.get('code') - newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag}) - newTag.text = data.text - - newRecordTree = ET.ElementTree(newRecord) - return ET.ElementTree(newRecord) +try:
+ import xml.etree.cElementTree as ET
+except ImportError:
+ import xml.etree.ElementTree as ET
+
+KNOWN_FIELD_TAGS = ['100', '110', '151']
+
+TAG_WHITELIST = ['100', '400', '700', '678', '667', '151', '550', '451', '374']
+
+class INLXmlParser:
+ def __init__(self, reader, whitelist=TAG_WHITELIST):
+ self.reader = reader
+ #self.whitelist = whitelist or KNOWN_FIELD_TAGS
+ self.whitelist = whitelist
+
+ def clearxml(self):
+
+ # # scan the datafields in the records and copy to the new one only the tags in the whitelist
+ # for record in root: # create new record
+ newRecord = ET.Element('record')
+ for field in self.reader:
+ fieldtag = field.attrib.get('tag')
+ if fieldtag in self.whitelist:
+ temptag = fieldtag
+ # tag 700 and 400 are the same
+ if temptag == '700':
+ temptag = '400'
+ for data in field:
+ newFieldTag = temptag
+ newFieldTag += '.'
+ newFieldTag += data.attrib.get('code')
+ newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag})
+ newTag.text = data.text
+
+ newRecordTree = ET.ElementTree(newRecord)
+ return ET.ElementTree(newRecord)
diff --git a/parsers/__init__.py b/parsers/__init__.py index d32c917..07907f9 100644 --- a/parsers/__init__.py +++ b/parsers/__init__.py @@ -1,2 +1,2 @@ - +
from .INL_xml_parser import INLXmlParser
\ No newline at end of file diff --git a/parsers/basic_parser.py b/parsers/basic_parser.py index dae19cb..32c1b43 100644 --- a/parsers/basic_parser.py +++ b/parsers/basic_parser.py @@ -1,6 +1,6 @@ -class BasicParser(object): - def __init__(self): - pass - - def parse(self, data): - raise NotImplementedError("parse() method must be implemented class {}".format(type(self))) +class BasicParser(object):
+ def __init__(self):
+ pass
+
+ def parse(self, data):
+ raise NotImplementedError("parse() method must be implemented class {}".format(type(self)))
|