summaryrefslogtreecommitdiff
path: root/factories/INL_factory.py
diff options
context:
space:
mode:
Diffstat (limited to 'factories/INL_factory.py')
-rw-r--r--factories/INL_factory.py73
1 files changed, 56 insertions, 17 deletions
diff --git a/factories/INL_factory.py b/factories/INL_factory.py
index b61c487..adc5b1a 100644
--- a/factories/INL_factory.py
+++ b/factories/INL_factory.py
@@ -1,5 +1,6 @@
import entities
from factories import BasicFactory
+import xml.etree.cElementTree as ET
TAG_TO_ENTITY_MAPPING = {
'100': entities.Person,
@@ -10,15 +11,20 @@ TAG_TO_ENTITY_MAPPING = {
ENTITY_KEYS = {
'100.a': 'name',
+ '100.9': 'name_langindic',
'100.d': 'date_of_birth',
'400.a': 'name_in_langs',
+ '400.9': 'langs_langindic',
+ '678.a': 'bio_data',
'151.a': 'name',
+ '151.9': 'name_langindic',
'451:a': 'name_in_langs',
+ '451:9': 'langs_langindic',
'550.a': 'type'
}
-def get_record_key(self, record):
+def get_record_key(record):
root = record.getroot()
for field in root:
field_tag = field.attrib.get('tag')
@@ -33,40 +39,73 @@ class INLFactory(BasicFactory):
def __init__(self, tag_to_entity_mapping=None):
self.mapping = tag_to_entity_mapping or TAG_TO_ENTITY_MAPPING
- def get_entity(self, entity_keys, raw_object):
+ def get_entity(self, raw_object, entity_keys=ENTITY_KEYS):
record_key = get_record_key(raw_object)
if record_key == '100':
- has_name = False
- name_in_langs = []
+ name = ''
+ name_in_langs = dict()
+ bio_data = list()
+ eng_name = ''
+ date_of_birth = ''
+ #get the names and date of birth and bio data
for field in raw_object.getroot():
key = field.attrib.get('tag')
tag = entity_keys.get(key)
- if tag == 'name' and not has_name:
+ if tag == 'name':
name = field.text
- has_name = True
+ elif tag == 'name_langindic':
+ # chack if this english name
+ if field.text == 'lat':
+ eng_name = name
+ # else add it to name_in_langs
+ else:
+ if field.text in name_in_langs:
+ name_in_langs.get(field.text).append(name)
+ else:
+ name_in_langs.update({field.text: [name]})
elif tag == 'date_of_birth':
date_of_birth = field.text
elif tag == 'name_in_langs':
- name_in_langs.append(field.text)
- return entities.Person(name, date_of_birth, name_in_langs)
+ name_diff = field.text
+ elif tag == 'langs_langindic':
+ if field.text in name_in_langs:
+ name_in_langs.get(field.text).append(name_diff)
+ else:
+ name_in_langs.update({field.text: [name]})
+ elif tag == 'bio_data':
+ bio_data.append(field.text)
+ return entities.Person(eng_name, date_of_birth, name_in_langs, bio_data)
elif record_key == '110':
return entities.Institution()
elif record_key == '151':
- has_name = False
- name_in_langs = []
- type = []
+ name_in_langs = dict()
+ types = []
for field in raw_object.getroot():
key = field.attrib.get('tag')
tag = entity_keys.get(key)
- if tag == 'name' and not has_name:
+ if tag == 'name':
name = field.text
- has_name = True
+ elif tag == 'name_langindic':
+ # chack if this english name
+ if field.text == 'lat':
+ eng_name = name
+ # else add it to name_in_langs
+ else:
+ if field.text in name_in_langs:
+ name_in_langs.get(field.text).append(name)
+ else:
+ name_in_langs.update({field.text: [name]})
elif tag == 'type':
- type.append(field.text)
+ types.append(field.text)
elif tag == 'name_in_langs':
- name_in_langs.append(field.text)
- return entities.Location(name, type, name_in_langs)
+ name_diff = field.text
+ elif tag == 'langs_langindic':
+ if field.text in name_in_langs:
+ name_in_langs.get(field.text).append(name_diff)
+ else:
+ name_in_langs.update({field.text: [name]})
+ return entities.Location(eng_name, types, name_in_langs)
else:
- raise KeyError('Key {} was not recognized for factory {}'.format(entity_key, type(self)))
+ raise KeyError('Key {} was not recognized for factory {}'.format(entity_keys, type(self)))