From d646c9a42273e98c85602f5618598125007bbfaa Mon Sep 17 00:00:00 2001 From: Tzafrir Cohen Date: Sun, 25 Sep 2016 20:28:16 +0300 Subject: WIP: commit all files that were changed --- factories/INL_factory.py | 260 +++++++++++++++++++++++------------------------ 1 file changed, 130 insertions(+), 130 deletions(-) (limited to 'factories/INL_factory.py') diff --git a/factories/INL_factory.py b/factories/INL_factory.py index f4e494f..286762a 100644 --- a/factories/INL_factory.py +++ b/factories/INL_factory.py @@ -1,130 +1,130 @@ -import entities -from factories import BasicFactory -import xml.etree.cElementTree as ET - -TAG_TO_ENTITY_MAPPING = { - '100': entities.Person, - '110': entities.Institution, - '151': entities.Location -} - - -ENTITY_KEYS = { - '100.a': 'name', - '100.9': 'name_langindic', - '100.d': 'date_of_birth', - '400.a': 'name_in_langs', - '400.9': 'langs_langindic', - '678.a': 'bio_data', - '151.a': 'name', - '151.9': 'name_langindic', - '451:a': 'name_in_langs', - '451:9': 'langs_langindic', - '550.a': 'type_of_place', - '667.a': 'comment', - '374.a': 'profession' -} - - -def get_record_key(record): - root = record.getroot() - for field in root: - field_tag = field.attrib.get('tag') - if '100' in field_tag: - return '100' - if '151' in field_tag: - return '151' - if '110' in field_tag: - return '110' - -class INLFactory(BasicFactory): - def __init__(self, tag_to_entity_mapping=None): - self.mapping = tag_to_entity_mapping or TAG_TO_ENTITY_MAPPING - - def get_entity(self, raw_object, entity_keys=ENTITY_KEYS): - record_key = get_record_key(raw_object) - #100 is person - if record_key == '100': - name = '' - name_in_langs = dict() - bio_data = list() - comment_list = list() - eng_name = '' - date_of_birth = '' - profession = list() - name_diff = '' - #get the names and date of birth and bio data - for field in raw_object.getroot(): - key = field.attrib.get('tag') - tag = entity_keys.get(key) - if tag == 'name': - name = field.text - elif tag == 'name_langindic': - # chack if this english name - if field.text == 'lat': - eng_name = name - # else add it to name_in_langs - else: - if field.text in name_in_langs: - name_in_langs.get(field.text).append(name) - else: - name_in_langs.update({field.text: [name]}) - elif tag == 'date_of_birth': - date_of_birth = field.text - elif tag == 'name_in_langs': - name_diff = field.text - elif tag == 'langs_langindic': - if field.text in name_in_langs: - name_in_langs.get(field.text).append(name_diff) - else: - name_in_langs.update({field.text: [name_diff]}) - elif tag == 'bio_data': - bio_data.append(field.text) - elif tag == 'comment': - comment_list.append(field.text) - elif tag == 'profession': - profession.append(field.text) - return entities.Person(eng_name, date_of_birth, name_in_langs, bio_data, comment_list, profession) - #110 is institue - elif record_key == '110': - return entities.Institution() - #151 is location - elif record_key == '151': - name_in_langs = dict() - types_of_place = list() - comment_list = list() - eng_name = '' - name_diff = '' - - for field in raw_object.getroot(): - key = field.attrib.get('tag') - tag = entity_keys.get(key) - if tag == 'name': - name = field.text - elif tag == 'name_langindic': - # chack if this english name - if field.text == 'lat': - eng_name = name - # else add it to name_in_langs - else: - if field.text in name_in_langs: - name_in_langs.get(field.text).append(name) - else: - name_in_langs.update({field.text: [name]}) - elif tag == 'type_of_place': - types_of_place.append(field.text) - elif tag == 'name_in_langs': - name_diff = field.text - elif tag == 'langs_langindic': - if field.text in name_in_langs: - name_in_langs.get(field.text).append(name_diff) - else: - name_in_langs.update({field.text: [name_diff]}) - elif tag == 'comment': - comment_list.append(field.text) - return entities.Location(eng_name, types_of_place , name_in_langs, comment_list) - else: - return None - # raise KeyError('Key {} was not recognized for factory {}'.format(entity_keys, type(self))) - - +import entities +from factories import BasicFactory +import xml.etree.cElementTree as ET + +TAG_TO_ENTITY_MAPPING = { + '100': entities.Person, + '110': entities.Institution, + '151': entities.Location +} + + +ENTITY_KEYS = { + '100.a': 'name', + '100.9': 'name_langindic', + '100.d': 'date_of_birth', + '400.a': 'name_in_langs', + '400.9': 'langs_langindic', + '678.a': 'bio_data', + '151.a': 'name', + '151.9': 'name_langindic', + '451:a': 'name_in_langs', + '451:9': 'langs_langindic', + '550.a': 'type_of_place', + '667.a': 'comment', + '374.a': 'profession' +} + + +def get_record_key(record): + root = record.getroot() + for field in root: + field_tag = field.attrib.get('tag') + if '100' in field_tag: + return '100' + if '151' in field_tag: + return '151' + if '110' in field_tag: + return '110' + +class INLFactory(BasicFactory): + def __init__(self, tag_to_entity_mapping=None): + self.mapping = tag_to_entity_mapping or TAG_TO_ENTITY_MAPPING + + def get_entity(self, raw_object, entity_keys=ENTITY_KEYS): + record_key = get_record_key(raw_object) + #100 is person + if record_key == '100': + name = '' + name_in_langs = dict() + bio_data = list() + comment_list = list() + eng_name = '' + date_of_birth = '' + profession = list() + name_diff = '' + #get the names and date of birth and bio data + for field in raw_object.getroot(): + key = field.attrib.get('tag') + tag = entity_keys.get(key) + if tag == 'name': + name = field.text + elif tag == 'name_langindic': + # chack if this english name + if field.text == 'lat': + eng_name = name + # else add it to name_in_langs + else: + if field.text in name_in_langs: + name_in_langs.get(field.text).append(name) + else: + name_in_langs.update({field.text: [name]}) + elif tag == 'date_of_birth': + date_of_birth = field.text + elif tag == 'name_in_langs': + name_diff = field.text + elif tag == 'langs_langindic': + if field.text in name_in_langs: + name_in_langs.get(field.text).append(name_diff) + else: + name_in_langs.update({field.text: [name_diff]}) + elif tag == 'bio_data': + bio_data.append(field.text) + elif tag == 'comment': + comment_list.append(field.text) + elif tag == 'profession': + profession.append(field.text) + return entities.Person(eng_name, date_of_birth, name_in_langs, bio_data, comment_list, profession) + #110 is institue + elif record_key == '110': + return entities.Institution() + #151 is location + elif record_key == '151': + name_in_langs = dict() + types_of_place = list() + comment_list = list() + eng_name = '' + name_diff = '' + + for field in raw_object.getroot(): + key = field.attrib.get('tag') + tag = entity_keys.get(key) + if tag == 'name': + name = field.text + elif tag == 'name_langindic': + # chack if this english name + if field.text == 'lat': + eng_name = name + # else add it to name_in_langs + else: + if field.text in name_in_langs: + name_in_langs.get(field.text).append(name) + else: + name_in_langs.update({field.text: [name]}) + elif tag == 'type_of_place': + types_of_place.append(field.text) + elif tag == 'name_in_langs': + name_diff = field.text + elif tag == 'langs_langindic': + if field.text in name_in_langs: + name_in_langs.get(field.text).append(name_diff) + else: + name_in_langs.update({field.text: [name_diff]}) + elif tag == 'comment': + comment_list.append(field.text) + return entities.Location(eng_name, types_of_place , name_in_langs, comment_list) + else: + return None + # raise KeyError('Key {} was not recognized for factory {}'.format(entity_keys, type(self))) + + -- cgit v1.2.3