summaryrefslogtreecommitdiff
path: root/factories/INL_factory.py
blob: b61c48723c8ffd41c526fe9a633302e768b7d89c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import entities
from factories import BasicFactory

TAG_TO_ENTITY_MAPPING = {
    '100': entities.Person,
    '110': entities.Institution,
    '151': entities.Location
}


ENTITY_KEYS = {
    '100.a': 'name',
    '100.d': 'date_of_birth',
    '400.a': 'name_in_langs',
    '151.a': 'name',
    '451:a': 'name_in_langs',
    '550.a': 'type'
}


def get_record_key(self, record):
    root = record.getroot()
    for field in root:
        field_tag = field.attrib.get('tag')
        if '100' in field_tag:
            return '100'
        if '151' in field_tag:
            return '151'
        if '110' in field_tag:
            return '110'

class INLFactory(BasicFactory):
    def __init__(self, tag_to_entity_mapping=None):
        self.mapping = tag_to_entity_mapping or TAG_TO_ENTITY_MAPPING

    def get_entity(self, entity_keys, raw_object):
        record_key = get_record_key(raw_object)
        if record_key == '100':
            has_name = False
            name_in_langs = []
            for field in raw_object.getroot():
                key = field.attrib.get('tag')
                tag = entity_keys.get(key)
                if tag == 'name' and not has_name:
                    name = field.text
                    has_name = True
                elif tag == 'date_of_birth':
                    date_of_birth = field.text
                elif tag == 'name_in_langs':
                    name_in_langs.append(field.text)
            return entities.Person(name, date_of_birth, name_in_langs)
        elif record_key == '110':
            return entities.Institution()
        elif record_key == '151':
            has_name = False
            name_in_langs = []
            type = []
            for field in raw_object.getroot():
                key = field.attrib.get('tag')
                tag = entity_keys.get(key)
                if tag == 'name' and not has_name:
                    name = field.text
                    has_name = True
                elif tag == 'type':
                    type.append(field.text)
                elif tag == 'name_in_langs':
                    name_in_langs.append(field.text)
            return entities.Location(name, type, name_in_langs)
        else:
            raise KeyError('Key {} was not recognized for factory {}'.format(entity_key, type(self)))