1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
import entities
from factories import BasicFactory
import xml.etree.cElementTree as ET
TAG_TO_ENTITY_MAPPING = {
'100': entities.Person,
'110': entities.Institution,
'151': entities.Location
}
ENTITY_KEYS = {
'100.a': 'name',
'100.9': 'name_langindic',
'100.d': 'date_of_birth',
'400.a': 'name_in_langs',
'400.9': 'langs_langindic',
'678.a': 'bio_data',
'151.a': 'name',
'151.9': 'name_langindic',
'451:a': 'name_in_langs',
'451:9': 'langs_langindic',
'550.a': 'type_of_place',
'667.a': 'comment',
'374.a': 'profession'
}
def get_record_key(record):
root = record.getroot()
for field in root:
field_tag = field.attrib.get('tag')
if '100' in field_tag:
return '100'
if '151' in field_tag:
return '151'
if '110' in field_tag:
return '110'
class INLFactory(BasicFactory):
def __init__(self, tag_to_entity_mapping=None):
self.mapping = tag_to_entity_mapping or TAG_TO_ENTITY_MAPPING
def get_entity(self, raw_object, entity_keys=ENTITY_KEYS):
record_key = get_record_key(raw_object)
#100 is person
if record_key == '100':
name = ''
name_in_langs = dict()
bio_data = list()
comment_list = list()
eng_name = ''
date_of_birth = ''
profession = list()
name_diff = ''
#get the names and date of birth and bio data
for field in raw_object.getroot():
key = field.attrib.get('tag')
tag = entity_keys.get(key)
if tag == 'name':
name = field.text
elif tag == 'name_langindic':
# chack if this english name
if field.text == 'lat':
eng_name = name
# else add it to name_in_langs
else:
if field.text in name_in_langs:
name_in_langs.get(field.text).append(name)
else:
name_in_langs.update({field.text: [name]})
elif tag == 'date_of_birth':
date_of_birth = field.text
elif tag == 'name_in_langs':
name_diff = field.text
elif tag == 'langs_langindic':
if field.text in name_in_langs:
name_in_langs.get(field.text).append(name_diff)
else:
name_in_langs.update({field.text: [name_diff]})
elif tag == 'bio_data':
bio_data.append(field.text)
elif tag == 'comment':
comment_list.append(field.text)
elif tag == 'profession':
profession.append(field.text)
return entities.Person(eng_name, date_of_birth, name_in_langs, bio_data, comment_list, profession)
#110 is institue
elif record_key == '110':
return entities.Institution()
#151 is location
elif record_key == '151':
name_in_langs = dict()
types_of_place = list()
comment_list = list()
eng_name = ''
name_diff = ''
for field in raw_object.getroot():
key = field.attrib.get('tag')
tag = entity_keys.get(key)
if tag == 'name':
name = field.text
elif tag == 'name_langindic':
# chack if this english name
if field.text == 'lat':
eng_name = name
# else add it to name_in_langs
else:
if field.text in name_in_langs:
name_in_langs.get(field.text).append(name)
else:
name_in_langs.update({field.text: [name]})
elif tag == 'type_of_place':
types_of_place.append(field.text)
elif tag == 'name_in_langs':
name_diff = field.text
elif tag == 'langs_langindic':
if field.text in name_in_langs:
name_in_langs.get(field.text).append(name_diff)
else:
name_in_langs.update({field.text: [name_diff]})
elif tag == 'comment':
comment_list.append(field.text)
return entities.Location(eng_name, types_of_place , name_in_langs, comment_list)
else:
return None
# raise KeyError('Key {} was not recognized for factory {}'.format(entity_keys, type(self)))
|