From e34be2e06f88032824beaec5173419c60602591f Mon Sep 17 00:00:00 2001 From: gilad_ilsar Date: Thu, 22 Sep 2016 11:51:49 +0300 Subject: tester and person entity --- factories/INL_factory.py | 73 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 17 deletions(-) (limited to 'factories/INL_factory.py') diff --git a/factories/INL_factory.py b/factories/INL_factory.py index b61c487..adc5b1a 100644 --- a/factories/INL_factory.py +++ b/factories/INL_factory.py @@ -1,5 +1,6 @@ import entities from factories import BasicFactory +import xml.etree.cElementTree as ET TAG_TO_ENTITY_MAPPING = { '100': entities.Person, @@ -10,15 +11,20 @@ TAG_TO_ENTITY_MAPPING = { ENTITY_KEYS = { '100.a': 'name', + '100.9': 'name_langindic', '100.d': 'date_of_birth', '400.a': 'name_in_langs', + '400.9': 'langs_langindic', + '678.a': 'bio_data', '151.a': 'name', + '151.9': 'name_langindic', '451:a': 'name_in_langs', + '451:9': 'langs_langindic', '550.a': 'type' } -def get_record_key(self, record): +def get_record_key(record): root = record.getroot() for field in root: field_tag = field.attrib.get('tag') @@ -33,40 +39,73 @@ class INLFactory(BasicFactory): def __init__(self, tag_to_entity_mapping=None): self.mapping = tag_to_entity_mapping or TAG_TO_ENTITY_MAPPING - def get_entity(self, entity_keys, raw_object): + def get_entity(self, raw_object, entity_keys=ENTITY_KEYS): record_key = get_record_key(raw_object) if record_key == '100': - has_name = False - name_in_langs = [] + name = '' + name_in_langs = dict() + bio_data = list() + eng_name = '' + date_of_birth = '' + #get the names and date of birth and bio data for field in raw_object.getroot(): key = field.attrib.get('tag') tag = entity_keys.get(key) - if tag == 'name' and not has_name: + if tag == 'name': name = field.text - has_name = True + elif tag == 'name_langindic': + # chack if this english name + if field.text == 'lat': + eng_name = name + # else add it to name_in_langs + else: + if field.text in name_in_langs: + name_in_langs.get(field.text).append(name) + else: + name_in_langs.update({field.text: [name]}) elif tag == 'date_of_birth': date_of_birth = field.text elif tag == 'name_in_langs': - name_in_langs.append(field.text) - return entities.Person(name, date_of_birth, name_in_langs) + name_diff = field.text + elif tag == 'langs_langindic': + if field.text in name_in_langs: + name_in_langs.get(field.text).append(name_diff) + else: + name_in_langs.update({field.text: [name]}) + elif tag == 'bio_data': + bio_data.append(field.text) + return entities.Person(eng_name, date_of_birth, name_in_langs, bio_data) elif record_key == '110': return entities.Institution() elif record_key == '151': - has_name = False - name_in_langs = [] - type = [] + name_in_langs = dict() + types = [] for field in raw_object.getroot(): key = field.attrib.get('tag') tag = entity_keys.get(key) - if tag == 'name' and not has_name: + if tag == 'name': name = field.text - has_name = True + elif tag == 'name_langindic': + # chack if this english name + if field.text == 'lat': + eng_name = name + # else add it to name_in_langs + else: + if field.text in name_in_langs: + name_in_langs.get(field.text).append(name) + else: + name_in_langs.update({field.text: [name]}) elif tag == 'type': - type.append(field.text) + types.append(field.text) elif tag == 'name_in_langs': - name_in_langs.append(field.text) - return entities.Location(name, type, name_in_langs) + name_diff = field.text + elif tag == 'langs_langindic': + if field.text in name_in_langs: + name_in_langs.get(field.text).append(name_diff) + else: + name_in_langs.update({field.text: [name]}) + return entities.Location(eng_name, types, name_in_langs) else: - raise KeyError('Key {} was not recognized for factory {}'.format(entity_key, type(self))) + raise KeyError('Key {} was not recognized for factory {}'.format(entity_keys, type(self))) -- cgit v1.2.3