diff options
author | roy lewin <roy.lewin@gmail.com> | 2016-09-22 01:58:05 +0300 |
---|---|---|
committer | roy lewin <roy.lewin@gmail.com> | 2016-09-22 01:58:05 +0300 |
commit | 733afb1109a06f89af68e8660728f1b4bbbdab1a (patch) | |
tree | ec4c965bea61228c55a7c4c68e1be86446291620 /factories/INL_factory.py | |
parent | 652781137f3856fef98e3063766f9f3b1a984a2e (diff) | |
parent | 081eac29a20ab8485f2b8180654a6d4b808e2df7 (diff) |
Merge branch 'master' of https://bitbucket.org/lib_2_wiki/parser
# Conflicts:
# .idea/misc.xml
# .idea/workspace.xml
# parsers/INL_xml_parser.py
Diffstat (limited to 'factories/INL_factory.py')
-rw-r--r-- | factories/INL_factory.py | 61 |
1 files changed, 55 insertions, 6 deletions
diff --git a/factories/INL_factory.py b/factories/INL_factory.py index 6607368..b61c487 100644 --- a/factories/INL_factory.py +++ b/factories/INL_factory.py @@ -8,16 +8,65 @@ TAG_TO_ENTITY_MAPPING = { } +ENTITY_KEYS = { + '100.a': 'name', + '100.d': 'date_of_birth', + '400.a': 'name_in_langs', + '151.a': 'name', + '451:a': 'name_in_langs', + '550.a': 'type' +} + + +def get_record_key(self, record): + root = record.getroot() + for field in root: + field_tag = field.attrib.get('tag') + if '100' in field_tag: + return '100' + if '151' in field_tag: + return '151' + if '110' in field_tag: + return '110' + class INLFactory(BasicFactory): def __init__(self, tag_to_entity_mapping=None): self.mapping = tag_to_entity_mapping or TAG_TO_ENTITY_MAPPING - def get_entity(self, entity_key, raw_object): - if entity_key == '100': - return entities.Person('', '', '') - elif entity_key == '110': + def get_entity(self, entity_keys, raw_object): + record_key = get_record_key(raw_object) + if record_key == '100': + has_name = False + name_in_langs = [] + for field in raw_object.getroot(): + key = field.attrib.get('tag') + tag = entity_keys.get(key) + if tag == 'name' and not has_name: + name = field.text + has_name = True + elif tag == 'date_of_birth': + date_of_birth = field.text + elif tag == 'name_in_langs': + name_in_langs.append(field.text) + return entities.Person(name, date_of_birth, name_in_langs) + elif record_key == '110': return entities.Institution() - elif entity_key == '151': - return entities.Location('', '', '') + elif record_key == '151': + has_name = False + name_in_langs = [] + type = [] + for field in raw_object.getroot(): + key = field.attrib.get('tag') + tag = entity_keys.get(key) + if tag == 'name' and not has_name: + name = field.text + has_name = True + elif tag == 'type': + type.append(field.text) + elif tag == 'name_in_langs': + name_in_langs.append(field.text) + return entities.Location(name, type, name_in_langs) else: raise KeyError('Key {} was not recognized for factory {}'.format(entity_key, type(self))) + + |