diff options
author | roy lewin <roy.lewin@gmail.com> | 2016-09-22 01:58:05 +0300 |
---|---|---|
committer | roy lewin <roy.lewin@gmail.com> | 2016-09-22 01:58:05 +0300 |
commit | 733afb1109a06f89af68e8660728f1b4bbbdab1a (patch) | |
tree | ec4c965bea61228c55a7c4c68e1be86446291620 /parsers/INL_xml_parser.py | |
parent | 652781137f3856fef98e3063766f9f3b1a984a2e (diff) | |
parent | 081eac29a20ab8485f2b8180654a6d4b808e2df7 (diff) |
Merge branch 'master' of https://bitbucket.org/lib_2_wiki/parser
# Conflicts:
# .idea/misc.xml
# .idea/workspace.xml
# parsers/INL_xml_parser.py
Diffstat (limited to 'parsers/INL_xml_parser.py')
-rw-r--r-- | parsers/INL_xml_parser.py | 29 |
1 files changed, 27 insertions, 2 deletions
diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py index 512d46e..2ba96c9 100644 --- a/parsers/INL_xml_parser.py +++ b/parsers/INL_xml_parser.py @@ -11,6 +11,7 @@ class INLXmlParser: self.reader = reader self.whitelist = whitelist or KNOWN_FIELD_TAGS +<<<<<<< HEAD def clearxml(self): newTreeRoot = ET.Element('data') @@ -28,6 +29,30 @@ class INLXmlParser: for data in field: subData = ET.SubElement(newTag, data.tag, data.attrib) subData.text = data.text +======= - newTree = ET.ElementTree(newTreeRoot) - return newTree + #expects to get a record as ElementTree + def clearxml(self, record): + # root == list of tags + root = record.getroot() + + # scan the datafields in the records and copy to the new one only the tags in the whitelist + # create new record + newRecord = ET.Element('record') + for field in root: + fieldtag = field.attrib.get('tag') + if fieldtag in self.whitelist: + tempTag = fieldtag + # tag 700 and 400 are the same + if tempTag == '700': + tempTag = '400' + for data in field: + newFieldTag = tempTag + newFieldTag +='.' + newFieldTag += data.attrib.get('code') + newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag}) + newTag.text = data.text +>>>>>>> 081eac29a20ab8485f2b8180654a6d4b808e2df7 + + newRecordTree = ET.ElementTree(newRecord) + return newRecordTree |