import entities from factories import BasicFactory TAG_TO_ENTITY_MAPPING = { '100': entities.Person, '110': entities.Institution, '151': entities.Location } ENTITY_KEYS = { '100.a': 'name', '100.d': 'date_of_birth', '400.a': 'name_in_langs', '151.a': 'name', '451:a': 'name_in_langs', '550.a': 'type' } def get_record_key(self, record): root = record.getroot() for field in root: field_tag = field.attrib.get('tag') if '100' in field_tag: return '100' if '151' in field_tag: return '151' if '110' in field_tag: return '110' class INLFactory(BasicFactory): def __init__(self, tag_to_entity_mapping=None): self.mapping = tag_to_entity_mapping or TAG_TO_ENTITY_MAPPING def get_entity(self, entity_keys, raw_object): record_key = get_record_key(raw_object) if record_key == '100': has_name = False name_in_langs = [] for field in raw_object.getroot(): key = field.attrib.get('tag') tag = entity_keys.get(key) if tag == 'name' and not has_name: name = field.text has_name = True elif tag == 'date_of_birth': date_of_birth = field.text elif tag == 'name_in_langs': name_in_langs.append(field.text) return entities.Person(name, date_of_birth, name_in_langs) elif record_key == '110': return entities.Institution() elif record_key == '151': has_name = False name_in_langs = [] type = [] for field in raw_object.getroot(): key = field.attrib.get('tag') tag = entity_keys.get(key) if tag == 'name' and not has_name: name = field.text has_name = True elif tag == 'type': type.append(field.text) elif tag == 'name_in_langs': name_in_langs.append(field.text) return entities.Location(name, type, name_in_langs) else: raise KeyError('Key {} was not recognized for factory {}'.format(entity_key, type(self)))