# from __future__ import absolute_import import parsers try: import xml.etree.cElementTree as ET except ImportError: import xml.etree.ElementTree as ET def read_file(path, element_key): # get an iterable record_counter = 0 context = ET.iterparse(path, events=("start", "end")) # turn it into an iterator context = iter(context) # get the root element event, root = context.__next__() for event, element in context: if 'end' in event: if element_key in element.tag: record_counter += 1 cleaned_element = parsers.INLXmlParser(element).clearxml() # import pdb; pdb.set_trace() print(record_counter, cleaned_element.getroot().tag, '@@@', cleaned_element.getroot().attrib, '@@@', cleaned_element.getroot().text) element.clear() if __name__ == '__main__': read_file(r"../../NLI-nnl10.xml", 'record')