summaryrefslogtreecommitdiff
path: root/parsers/INL_xml_parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'parsers/INL_xml_parser.py')
-rw-r--r--parsers/INL_xml_parser.py35
1 files changed, 35 insertions, 0 deletions
diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py
new file mode 100644
index 0000000..57ceebd
--- /dev/null
+++ b/parsers/INL_xml_parser.py
@@ -0,0 +1,35 @@
+import xml.etree.cElementTree as ET
+
+
+class INLXmlParser:
+ def __init__(self, lst, path):
+ self.whitelist = lst
+ self.xmlpath = path
+
+ def clearxml(self):
+ xmltree = ET.parse(self.xmlpath)
+ # root == list of records
+ root = xmltree.getroot()
+
+ # create new data
+ newTreeRoot = ET.Element('data')
+
+ # scan the datafields in the records and copy to the new one only the tags in the whitelist
+ for record in root:
+ # create new record
+ newRecord = ET.SubElement(newTreeRoot, 'record')
+ for field in record:
+ fieldtag = field.attrib.get('tag')
+ if fieldtag in self.whitelist:
+ newFieldTag = fieldtag
+ # tag 700 and 400 are the same
+ if newFieldTag == '700':
+ newFieldTag = '400'
+ newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag})
+ for data in field:
+ subData = ET.SubElement(newTag, data.tag, data.attrib)
+ subData.text = data.text
+
+ newTree = ET.ElementTree(newTreeRoot)
+ newTree.write('C:/Users/Ilsar/Documents/datahack/outTest.xml')
+ return newTree