blob: bd7821b6c313ac8eb1bd70ca1cd14b2b4f752660 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
# from __future__ import absolute_import
import parsers
try:
import xml.etree.cElementTree as ET
except ImportError:
import xml.etree.ElementTree as ET
def read_file(path, element_key):
# get an iterable
record_counter = 0
context = ET.iterparse(path, events=("start", "end"))
# turn it into an iterator
context = iter(context)
# get the root element
event, root = context.__next__()
for event, element in context:
if 'end' in event:
if element_key in element.tag:
record_counter += 1
cleaned_element = parsers.INLXmlParser(element).clearxml()
# import pdb; pdb.set_trace()
print(record_counter, cleaned_element.getroot().tag, '@@@', cleaned_element.getroot().attrib, '@@@', cleaned_element.getroot().text)
element.clear()
if __name__ == '__main__':
read_file(r"../../NLI-nnl10.xml", 'record')
|