summaryrefslogtreecommitdiff
path: root/parsers
diff options
context:
space:
mode:
Diffstat (limited to 'parsers')
-rw-r--r--parsers/INL_xml_parser.py33
-rw-r--r--parsers/__init__.py1
2 files changed, 30 insertions, 4 deletions
diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py
index 660d44b..c23cf20 100644
--- a/parsers/INL_xml_parser.py
+++ b/parsers/INL_xml_parser.py
@@ -1,11 +1,35 @@
-import xml.etree.cElementTree as ET
+try:
+ import xml.etree.cElementTree as ET
+except ImportError:
+ import xml.etree.ElementTree as ET
+
+KNOWN_FIELD_TAGS = ['100', '110', '151']
class INLXmlParser:
- def __init__(self, lst, path):
- self.whitelist = lst
- self.xmlpath = path
+ def __init__(self, reader, whitelist=None):
+ self.reader = reader
+ self.whitelist = whitelist or KNOWN_FIELD_TAGS
+
+<<<<<<< HEAD
+ def clearxml(self):
+ newTreeRoot = ET.Element('data')
+ # # scan the datafields in the records and copy to the new one only the tags in the whitelist
+ # for record in root: # create new record
+ newRecord = ET.SubElement(newTreeRoot, 'record')
+ for field in self.reader:
+ fieldtag = field.attrib.get('tag')
+ if fieldtag in self.whitelist:
+ newFieldTag = fieldtag
+ # tag 700 and 400 are the same
+ if newFieldTag == '700':
+ newFieldTag = '400'
+ newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag})
+ for data in field:
+ subData = ET.SubElement(newTag, data.tag, data.attrib)
+ subData.text = data.text
+=======
#expects to get a record as ElementTree
def clearxml(self, record):
@@ -28,6 +52,7 @@ class INLXmlParser:
newFieldTag += data.attrib.get('code')
newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag})
newTag.text = data.text
+>>>>>>> 081eac29a20ab8485f2b8180654a6d4b808e2df7
# newRecordTree = ET.ElementTree(newRecord)
return newRecord
diff --git a/parsers/__init__.py b/parsers/__init__.py
index e69de29..e3a246d 100644
--- a/parsers/__init__.py
+++ b/parsers/__init__.py
@@ -0,0 +1 @@
+from INL_xml_parser import INLXmlParser \ No newline at end of file