summaryrefslogtreecommitdiff
path: root/factories/INL_factory.py
blob: 42feea6c67eea839d9a2fde370d39a70b2e3893f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import entities
from factories import BasicFactory
import xml.etree.cElementTree as ET

TAG_TO_ENTITY_MAPPING = {
    '100': entities.Person,
    '110': entities.Institution,
    '151': entities.Location
}


ENTITY_KEYS = {
    '100.a': 'name',
    '100.9': 'name_langindic',
    '100.d': 'date_of_birth',
    '400.a': 'name_in_langs',
    '400.9': 'langs_langindic',
    '678.a': 'bio_data',
    '151.a': 'name',
    '151.9': 'name_langindic',
    '451:a': 'name_in_langs',
    '451:9': 'langs_langindic',
    '550.a': 'type_of_place'
}


def get_record_key(record):
    root = record.getroot()
    for field in root:
        field_tag = field.attrib.get('tag')
        if '100' in field_tag:
            return '100'
        if '151' in field_tag:
            return '151'
        if '110' in field_tag:
            return '110'

class INLFactory(BasicFactory):
    def __init__(self, tag_to_entity_mapping=None):
        self.mapping = tag_to_entity_mapping or TAG_TO_ENTITY_MAPPING

    def get_entity(self,  raw_object, entity_keys=ENTITY_KEYS):
        record_key = get_record_key(raw_object)
        if record_key == '100':
            name = ''
            name_in_langs = dict()
            bio_data = list()
            eng_name = ''
            date_of_birth = ''
            #get the names and date of birth and bio data
            for field in raw_object.getroot():
                key = field.attrib.get('tag')
                tag = entity_keys.get(key)
                if tag == 'name':
                    name = field.text
                elif tag == 'name_langindic':
                    # chack if this english name
                    if field.text == 'lat':
                        eng_name = name
                    # else add it to name_in_langs
                    else:
                        if field.text in name_in_langs:
                            name_in_langs.get(field.text).append(name)
                        else:
                            name_in_langs.update({field.text: [name]})
                elif tag == 'date_of_birth':
                    date_of_birth = field.text
                elif tag == 'name_in_langs':
                    name_diff = field.text
                elif tag == 'langs_langindic':
                    if field.text in name_in_langs:
                        name_in_langs.get(field.text).append(name_diff)
                    else:
                        name_in_langs.update({field.text: [name]})
                elif tag == 'bio_data':
                    bio_data.append(field.text)
            return entities.Person(eng_name, date_of_birth, name_in_langs, bio_data)
        elif record_key == '110':
            return entities.Institution()
        elif record_key == '151':
            name_in_langs = dict()
            types_of_place = list()
            for field in raw_object.getroot():
                key = field.attrib.get('tag')
                tag = entity_keys.get(key)
                if tag == 'name':
                    name = field.text
                elif tag == 'name_langindic':
                    # chack if this english name
                    if field.text == 'lat':
                        eng_name = name
                    # else add it to name_in_langs
                    else:
                        if field.text in name_in_langs:
                            name_in_langs.get(field.text).append(name)
                        else:
                            name_in_langs.update({field.text: [name]})
                elif tag == 'type_of_place':
                    types_of_place.append(field.text)
                elif tag == 'name_in_langs':
                    name_diff = field.text
                elif tag == 'langs_langindic':
                    if field.text in name_in_langs:
                        name_in_langs.get(field.text).append(name_diff)
                    else:
                        name_in_langs.update({field.text: [name]})
            return entities.Location(eng_name, types_of_place , name_in_langs)
        else:
            raise KeyError('Key {} was not recognized for factory {}'.format(entity_keys, type(self)))