diff options
author | gilad_ilsar <gandismidas1> | 2016-09-21 22:49:44 +0300 |
---|---|---|
committer | gilad_ilsar <gandismidas1> | 2016-09-21 22:49:44 +0300 |
commit | 4d108fca731bbd152ad058d007e2de6440f2c98a (patch) | |
tree | 215697b96bb3cc4120931309561cc1d544537032 | |
parent | 985a0fcac8b89bfe3c5bdcea0889b921cc27c033 (diff) |
change the xml cleaner to set tags as ###.*
-rw-r--r-- | .gitignore | 15 | ||||
-rw-r--r-- | .idea/misc.xml | 12 | ||||
-rw-r--r-- | .idea/modules.xml | 2 | ||||
-rw-r--r-- | .idea/parser.iml (renamed from .idea/lib2wiki.iml) | 3 | ||||
-rw-r--r-- | parsers/INL_xml_parser.py | 41 |
5 files changed, 34 insertions, 39 deletions
@@ -1,16 +1 @@ -# User-specific stuff: -.idea/workspace.xml -.idea/tasks.xml -.idea/dictionaries -.idea/vcs.xml -.idea/jsLibraryMappings.xml - -# Sensitive or high-churn files: -.idea/dataSources.ids -.idea/dataSources.xml -.idea/dataSources.local.xml -.idea/sqlDataSources.xml -.idea/dynamic.xml -.idea/uiDesigner.xml - *.pyc
\ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index de9bbc8..8e8cee7 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,14 @@ <?xml version="1.0" encoding="UTF-8"?> <project version="4"> - <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.10 (C:\Python27\python.exe)" project-jdk-type="Python SDK" /> + <component name="ProjectLevelVcsManager" settingsEditedManually="false"> + <OptionsSetting value="true" id="Add" /> + <OptionsSetting value="true" id="Remove" /> + <OptionsSetting value="true" id="Checkout" /> + <OptionsSetting value="true" id="Update" /> + <OptionsSetting value="true" id="Status" /> + <OptionsSetting value="true" id="Edit" /> + <ConfirmationsSetting value="0" id="Add" /> + <ConfirmationsSetting value="0" id="Remove" /> + </component> + <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.2 (C:\Users\Ilsar\Anaconda3\python.exe)" project-jdk-type="Python SDK" /> </project>
\ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml index 9a7bd2d..405d108 100644 --- a/.idea/modules.xml +++ b/.idea/modules.xml @@ -2,7 +2,7 @@ <project version="4"> <component name="ProjectModuleManager"> <modules> - <module fileurl="file://$PROJECT_DIR$/.idea/lib2wiki.iml" filepath="$PROJECT_DIR$/.idea/lib2wiki.iml" /> + <module fileurl="file://$PROJECT_DIR$/.idea/parser.iml" filepath="$PROJECT_DIR$/.idea/parser.iml" /> </modules> </component> </project>
\ No newline at end of file diff --git a/.idea/lib2wiki.iml b/.idea/parser.iml index 6711606..6f63a63 100644 --- a/.idea/lib2wiki.iml +++ b/.idea/parser.iml @@ -6,6 +6,7 @@ <orderEntry type="sourceFolder" forTests="false" /> </component> <component name="TestRunnerService"> - <option name="PROJECT_TEST_RUNNER" value="Unittests" /> + <option name="projectConfiguration" value="Nosetests" /> + <option name="PROJECT_TEST_RUNNER" value="Nosetests" /> </component> </module>
\ No newline at end of file diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py index 2ea9a9b..671d2d9 100644 --- a/parsers/INL_xml_parser.py +++ b/parsers/INL_xml_parser.py @@ -6,29 +6,28 @@ class INLXmlParser: self.whitelist = lst self.xmlpath = path - def clearxml(self): - xmltree = ET.parse(self.xmlpath) - # root == list of records - root = xmltree.getroot() - # create new data - newTreeRoot = ET.Element('data') + #expects to get a record as ElementTree + def clearxml(self, record): + # root == list of tags + root = record.getroot() # scan the datafields in the records and copy to the new one only the tags in the whitelist - for record in root: - # create new record - newRecord = ET.SubElement(newTreeRoot, 'record') - for field in record: - fieldtag = field.attrib.get('tag') - if fieldtag in self.whitelist: - newFieldTag = fieldtag - # tag 700 and 400 are the same - if newFieldTag == '700': - newFieldTag = '400' + # create new record + newRecord = ET.Element('record') + for field in root: + fieldtag = field.attrib.get('tag') + if fieldtag in self.whitelist: + tempTag = fieldtag + # tag 700 and 400 are the same + if tempTag == '700': + tempTag = '400' + for data in field: + newFieldTag = tempTag + newFieldTag +='.' + newFieldTag += data.attrib.get('code') newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag}) - for data in field: - subData = ET.SubElement(newTag, data.tag, data.attrib) - subData.text = data.text + newTag.text = data.text - newTree = ET.ElementTree(newTreeRoot) - return newTree + newRecordTree = ET.ElementTree(newRecord) + return newRecordTree |