summaryrefslogtreecommitdiff
path: root/factories/INL_factory.py
blob: f4e494fb7a1d2100c505ae274b20d00d937b858a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import entities
from factories import BasicFactory
import xml.etree.cElementTree as ET

TAG_TO_ENTITY_MAPPING = {
    '100': entities.Person,
    '110': entities.Institution,
    '151': entities.Location
}


ENTITY_KEYS = {
    '100.a': 'name',
    '100.9': 'name_langindic',
    '100.d': 'date_of_birth',
    '400.a': 'name_in_langs',
    '400.9': 'langs_langindic',
    '678.a': 'bio_data',
    '151.a': 'name',
    '151.9': 'name_langindic',
    '451:a': 'name_in_langs',
    '451:9': 'langs_langindic',
    '550.a': 'type_of_place',
    '667.a': 'comment',
    '374.a': 'profession'
}


def get_record_key(record):
    root = record.getroot()
    for field in root:
        field_tag = field.attrib.get('tag')
        if '100' in field_tag:
            return '100'
        if '151' in field_tag:
            return '151'
        if '110' in field_tag:
            return '110'

class INLFactory(BasicFactory):
    def __init__(self, tag_to_entity_mapping=None):
        self.mapping = tag_to_entity_mapping or TAG_TO_ENTITY_MAPPING

    def get_entity(self,  raw_object, entity_keys=ENTITY_KEYS):
        record_key = get_record_key(raw_object)
        #100 is person
        if record_key == '100':
            name = ''
            name_in_langs = dict()
            bio_data = list()
            comment_list = list()
            eng_name = ''
            date_of_birth = ''
            profession = list()
            name_diff = ''
            #get the names and date of birth and bio data
            for field in raw_object.getroot():
                key = field.attrib.get('tag')
                tag = entity_keys.get(key)
                if tag == 'name':
                    name = field.text
                elif tag == 'name_langindic':
                    # chack if this english name
                    if field.text == 'lat':
                        eng_name = name
                    # else add it to name_in_langs
                    else:
                        if field.text in name_in_langs:
                            name_in_langs.get(field.text).append(name)
                        else:
                            name_in_langs.update({field.text: [name]})
                elif tag == 'date_of_birth':
                    date_of_birth = field.text
                elif tag == 'name_in_langs':
                    name_diff = field.text
                elif tag == 'langs_langindic':
                    if field.text in name_in_langs:
                        name_in_langs.get(field.text).append(name_diff)
                    else:
                        name_in_langs.update({field.text: [name_diff]})
                elif tag == 'bio_data':
                    bio_data.append(field.text)
                elif tag == 'comment':
                    comment_list.append(field.text)
                elif tag == 'profession':
                    profession.append(field.text)
            return entities.Person(eng_name, date_of_birth, name_in_langs, bio_data, comment_list, profession)
        #110 is institue
        elif record_key == '110':
            return entities.Institution()
        #151 is location
        elif record_key == '151':
            name_in_langs = dict()
            types_of_place = list()
            comment_list = list()
            eng_name = ''
            name_diff = ''

            for field in raw_object.getroot():
                key = field.attrib.get('tag')
                tag = entity_keys.get(key)
                if tag == 'name':
                    name = field.text
                elif tag == 'name_langindic':
                    # chack if this english name
                    if field.text == 'lat':
                        eng_name = name
                    # else add it to name_in_langs
                    else:
                        if field.text in name_in_langs:
                            name_in_langs.get(field.text).append(name)
                        else:
                            name_in_langs.update({field.text: [name]})
                elif tag == 'type_of_place':
                    types_of_place.append(field.text)
                elif tag == 'name_in_langs':
                    name_diff = field.text
                elif tag == 'langs_langindic':
                    if field.text in name_in_langs:
                        name_in_langs.get(field.text).append(name_diff)
                    else:
                        name_in_langs.update({field.text: [name_diff]})
                elif tag == 'comment':
                    comment_list.append(field.text)
            return entities.Location(eng_name, types_of_place , name_in_langs, comment_list)
        else:
            return None
        #    raise KeyError('Key {} was not recognized for factory {}'.format(entity_keys, type(self)))