summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgilad_ilsar <gandismidas1>2016-09-21 20:50:13 +0300
committergilad_ilsar <gandismidas1>2016-09-21 20:50:13 +0300
commit587c722cccc09659abc1bf046183207a482cc827 (patch)
tree2caae1dbfbf527588a5cb1b41786fe6e9911f544
parentdc19d86545d7f9fab0092d0ca20214f0ea079988 (diff)
created the INL xml parser
-rw-r--r--.idea/vcs.xml6
-rw-r--r--parsers/INL_xml_parser.py35
2 files changed, 41 insertions, 0 deletions
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+ <component name="VcsDirectoryMappings">
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
+ </component>
+</project> \ No newline at end of file
diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py
new file mode 100644
index 0000000..57ceebd
--- /dev/null
+++ b/parsers/INL_xml_parser.py
@@ -0,0 +1,35 @@
+import xml.etree.cElementTree as ET
+
+
+class INLXmlParser:
+ def __init__(self, lst, path):
+ self.whitelist = lst
+ self.xmlpath = path
+
+ def clearxml(self):
+ xmltree = ET.parse(self.xmlpath)
+ # root == list of records
+ root = xmltree.getroot()
+
+ # create new data
+ newTreeRoot = ET.Element('data')
+
+ # scan the datafields in the records and copy to the new one only the tags in the whitelist
+ for record in root:
+ # create new record
+ newRecord = ET.SubElement(newTreeRoot, 'record')
+ for field in record:
+ fieldtag = field.attrib.get('tag')
+ if fieldtag in self.whitelist:
+ newFieldTag = fieldtag
+ # tag 700 and 400 are the same
+ if newFieldTag == '700':
+ newFieldTag = '400'
+ newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag})
+ for data in field:
+ subData = ET.SubElement(newTag, data.tag, data.attrib)
+ subData.text = data.text
+
+ newTree = ET.ElementTree(newTreeRoot)
+ newTree.write('C:/Users/Ilsar/Documents/datahack/outTest.xml')
+ return newTree