summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgilad_ilsar <gandismidas1>2016-09-21 22:49:44 +0300
committergilad_ilsar <gandismidas1>2016-09-21 22:49:44 +0300
commit4d108fca731bbd152ad058d007e2de6440f2c98a (patch)
tree215697b96bb3cc4120931309561cc1d544537032
parent985a0fcac8b89bfe3c5bdcea0889b921cc27c033 (diff)
change the xml cleaner to set tags as ###.*
-rw-r--r--.gitignore15
-rw-r--r--.idea/misc.xml12
-rw-r--r--.idea/modules.xml2
-rw-r--r--.idea/parser.iml (renamed from .idea/lib2wiki.iml)3
-rw-r--r--parsers/INL_xml_parser.py41
5 files changed, 34 insertions, 39 deletions
diff --git a/.gitignore b/.gitignore
index 5e64fdb..7e99e36 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,16 +1 @@
-# User-specific stuff:
-.idea/workspace.xml
-.idea/tasks.xml
-.idea/dictionaries
-.idea/vcs.xml
-.idea/jsLibraryMappings.xml
-
-# Sensitive or high-churn files:
-.idea/dataSources.ids
-.idea/dataSources.xml
-.idea/dataSources.local.xml
-.idea/sqlDataSources.xml
-.idea/dynamic.xml
-.idea/uiDesigner.xml
-
*.pyc \ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index de9bbc8..8e8cee7 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
- <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.10 (C:\Python27\python.exe)" project-jdk-type="Python SDK" />
+ <component name="ProjectLevelVcsManager" settingsEditedManually="false">
+ <OptionsSetting value="true" id="Add" />
+ <OptionsSetting value="true" id="Remove" />
+ <OptionsSetting value="true" id="Checkout" />
+ <OptionsSetting value="true" id="Update" />
+ <OptionsSetting value="true" id="Status" />
+ <OptionsSetting value="true" id="Edit" />
+ <ConfirmationsSetting value="0" id="Add" />
+ <ConfirmationsSetting value="0" id="Remove" />
+ </component>
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.2 (C:\Users\Ilsar\Anaconda3\python.exe)" project-jdk-type="Python SDK" />
</project> \ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
index 9a7bd2d..405d108 100644
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@@ -2,7 +2,7 @@
<project version="4">
<component name="ProjectModuleManager">
<modules>
- <module fileurl="file://$PROJECT_DIR$/.idea/lib2wiki.iml" filepath="$PROJECT_DIR$/.idea/lib2wiki.iml" />
+ <module fileurl="file://$PROJECT_DIR$/.idea/parser.iml" filepath="$PROJECT_DIR$/.idea/parser.iml" />
</modules>
</component>
</project> \ No newline at end of file
diff --git a/.idea/lib2wiki.iml b/.idea/parser.iml
index 6711606..6f63a63 100644
--- a/.idea/lib2wiki.iml
+++ b/.idea/parser.iml
@@ -6,6 +6,7 @@
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
- <option name="PROJECT_TEST_RUNNER" value="Unittests" />
+ <option name="projectConfiguration" value="Nosetests" />
+ <option name="PROJECT_TEST_RUNNER" value="Nosetests" />
</component>
</module> \ No newline at end of file
diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py
index 2ea9a9b..671d2d9 100644
--- a/parsers/INL_xml_parser.py
+++ b/parsers/INL_xml_parser.py
@@ -6,29 +6,28 @@ class INLXmlParser:
self.whitelist = lst
self.xmlpath = path
- def clearxml(self):
- xmltree = ET.parse(self.xmlpath)
- # root == list of records
- root = xmltree.getroot()
- # create new data
- newTreeRoot = ET.Element('data')
+ #expects to get a record as ElementTree
+ def clearxml(self, record):
+ # root == list of tags
+ root = record.getroot()
# scan the datafields in the records and copy to the new one only the tags in the whitelist
- for record in root:
- # create new record
- newRecord = ET.SubElement(newTreeRoot, 'record')
- for field in record:
- fieldtag = field.attrib.get('tag')
- if fieldtag in self.whitelist:
- newFieldTag = fieldtag
- # tag 700 and 400 are the same
- if newFieldTag == '700':
- newFieldTag = '400'
+ # create new record
+ newRecord = ET.Element('record')
+ for field in root:
+ fieldtag = field.attrib.get('tag')
+ if fieldtag in self.whitelist:
+ tempTag = fieldtag
+ # tag 700 and 400 are the same
+ if tempTag == '700':
+ tempTag = '400'
+ for data in field:
+ newFieldTag = tempTag
+ newFieldTag +='.'
+ newFieldTag += data.attrib.get('code')
newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag})
- for data in field:
- subData = ET.SubElement(newTag, data.tag, data.attrib)
- subData.text = data.text
+ newTag.text = data.text
- newTree = ET.ElementTree(newTreeRoot)
- return newTree
+ newRecordTree = ET.ElementTree(newRecord)
+ return newRecordTree