summaryrefslogtreecommitdiff
path: root/parsers/INL_xml_parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'parsers/INL_xml_parser.py')
-rw-r--r--parsers/INL_xml_parser.py29
1 files changed, 27 insertions, 2 deletions
diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py
index 512d46e..2ba96c9 100644
--- a/parsers/INL_xml_parser.py
+++ b/parsers/INL_xml_parser.py
@@ -11,6 +11,7 @@ class INLXmlParser:
self.reader = reader
self.whitelist = whitelist or KNOWN_FIELD_TAGS
+<<<<<<< HEAD
def clearxml(self):
newTreeRoot = ET.Element('data')
@@ -28,6 +29,30 @@ class INLXmlParser:
for data in field:
subData = ET.SubElement(newTag, data.tag, data.attrib)
subData.text = data.text
+=======
- newTree = ET.ElementTree(newTreeRoot)
- return newTree
+ #expects to get a record as ElementTree
+ def clearxml(self, record):
+ # root == list of tags
+ root = record.getroot()
+
+ # scan the datafields in the records and copy to the new one only the tags in the whitelist
+ # create new record
+ newRecord = ET.Element('record')
+ for field in root:
+ fieldtag = field.attrib.get('tag')
+ if fieldtag in self.whitelist:
+ tempTag = fieldtag
+ # tag 700 and 400 are the same
+ if tempTag == '700':
+ tempTag = '400'
+ for data in field:
+ newFieldTag = tempTag
+ newFieldTag +='.'
+ newFieldTag += data.attrib.get('code')
+ newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag})
+ newTag.text = data.text
+>>>>>>> 081eac29a20ab8485f2b8180654a6d4b808e2df7
+
+ newRecordTree = ET.ElementTree(newRecord)
+ return newRecordTree