From 081eac29a20ab8485f2b8180654a6d4b808e2df7 Mon Sep 17 00:00:00 2001 From: gilad_ilsar Date: Wed, 21 Sep 2016 23:38:53 +0300 Subject: implemented the factory --- factories/INL_factory.py | 61 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 6 deletions(-) (limited to 'factories') diff --git a/factories/INL_factory.py b/factories/INL_factory.py index 6607368..b61c487 100644 --- a/factories/INL_factory.py +++ b/factories/INL_factory.py @@ -8,16 +8,65 @@ TAG_TO_ENTITY_MAPPING = { } +ENTITY_KEYS = { + '100.a': 'name', + '100.d': 'date_of_birth', + '400.a': 'name_in_langs', + '151.a': 'name', + '451:a': 'name_in_langs', + '550.a': 'type' +} + + +def get_record_key(self, record): + root = record.getroot() + for field in root: + field_tag = field.attrib.get('tag') + if '100' in field_tag: + return '100' + if '151' in field_tag: + return '151' + if '110' in field_tag: + return '110' + class INLFactory(BasicFactory): def __init__(self, tag_to_entity_mapping=None): self.mapping = tag_to_entity_mapping or TAG_TO_ENTITY_MAPPING - def get_entity(self, entity_key, raw_object): - if entity_key == '100': - return entities.Person('', '', '') - elif entity_key == '110': + def get_entity(self, entity_keys, raw_object): + record_key = get_record_key(raw_object) + if record_key == '100': + has_name = False + name_in_langs = [] + for field in raw_object.getroot(): + key = field.attrib.get('tag') + tag = entity_keys.get(key) + if tag == 'name' and not has_name: + name = field.text + has_name = True + elif tag == 'date_of_birth': + date_of_birth = field.text + elif tag == 'name_in_langs': + name_in_langs.append(field.text) + return entities.Person(name, date_of_birth, name_in_langs) + elif record_key == '110': return entities.Institution() - elif entity_key == '151': - return entities.Location('', '', '') + elif record_key == '151': + has_name = False + name_in_langs = [] + type = [] + for field in raw_object.getroot(): + key = field.attrib.get('tag') + tag = entity_keys.get(key) + if tag == 'name' and not has_name: + name = field.text + has_name = True + elif tag == 'type': + type.append(field.text) + elif tag == 'name_in_langs': + name_in_langs.append(field.text) + return entities.Location(name, type, name_in_langs) else: raise KeyError('Key {} was not recognized for factory {}'.format(entity_key, type(self))) + + -- cgit v1.2.3