diff options
Diffstat (limited to 'readers/xml_reader.py')
-rw-r--r-- | readers/xml_reader.py | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/readers/xml_reader.py b/readers/xml_reader.py new file mode 100644 index 0000000..8a819b0 --- /dev/null +++ b/readers/xml_reader.py @@ -0,0 +1,31 @@ +from __future__ import absolute_import +import parsers + +try: + import xml.etree.cElementTree as ET +except ImportError: + import xml.etree.ElementTree as ET + + +def read_file(path, element_key): + # get an iterable + record_counter = 0 + context = ET.iterparse(path, events=("start", "end")) + + # turn it into an iterator + context = iter(context) + + # get the root element + event, root = context.next() + + for event, element in context: + if 'end' in event: + if element_key in element.tag: + record_counter += 1 + cleaned_element = parsers.INLXmlParser(element).clearxml() + print record_counter, cleaned_element.getroot().attrib + element.clear() + + +if __name__ == '__main__': + read_file(r"../../NLI-nnl10.xml", 'record') |