diff options
author | gilad_ilsar <gandismidas1> | 2016-09-21 20:50:13 +0300 |
---|---|---|
committer | gilad_ilsar <gandismidas1> | 2016-09-21 20:50:13 +0300 |
commit | 587c722cccc09659abc1bf046183207a482cc827 (patch) | |
tree | 2caae1dbfbf527588a5cb1b41786fe6e9911f544 | |
parent | dc19d86545d7f9fab0092d0ca20214f0ea079988 (diff) |
created the INL xml parser
-rw-r--r-- | .idea/vcs.xml | 6 | ||||
-rw-r--r-- | parsers/INL_xml_parser.py | 35 |
2 files changed, 41 insertions, 0 deletions
diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="VcsDirectoryMappings"> + <mapping directory="$PROJECT_DIR$" vcs="Git" /> + </component> +</project>
\ No newline at end of file diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py new file mode 100644 index 0000000..57ceebd --- /dev/null +++ b/parsers/INL_xml_parser.py @@ -0,0 +1,35 @@ +import xml.etree.cElementTree as ET + + +class INLXmlParser: + def __init__(self, lst, path): + self.whitelist = lst + self.xmlpath = path + + def clearxml(self): + xmltree = ET.parse(self.xmlpath) + # root == list of records + root = xmltree.getroot() + + # create new data + newTreeRoot = ET.Element('data') + + # scan the datafields in the records and copy to the new one only the tags in the whitelist + for record in root: + # create new record + newRecord = ET.SubElement(newTreeRoot, 'record') + for field in record: + fieldtag = field.attrib.get('tag') + if fieldtag in self.whitelist: + newFieldTag = fieldtag + # tag 700 and 400 are the same + if newFieldTag == '700': + newFieldTag = '400' + newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag}) + for data in field: + subData = ET.SubElement(newTag, data.tag, data.attrib) + subData.text = data.text + + newTree = ET.ElementTree(newTreeRoot) + newTree.write('C:/Users/Ilsar/Documents/datahack/outTest.xml') + return newTree |