From d646c9a42273e98c85602f5618598125007bbfaa Mon Sep 17 00:00:00 2001
From: Tzafrir Cohen <tzafrir@cohens.org.il>
Date: Sun, 25 Sep 2016 20:28:16 +0300
Subject: WIP: commit all files that were changed

---
 .gitignore                 | 274 ++++++++++++++++++++++-----------------------
 .idea/misc.xml             |  10 ++
 entities/__init__.py       |   4 +-
 entities/basic_entity.py   |  10 +-
 entities/institution.py    |  12 +-
 entities/location.py       |  50 ++++-----
 entities/person.py         | 152 ++++++++++++-------------
 factories/INL_factory.py   | 260 +++++++++++++++++++++---------------------
 factories/__init__.py      |   2 +-
 factories/basic_factory.py |   6 +-
 libs/json_tools.py         |  18 +--
 parsers/INL_xml_parser.py  |  72 ++++++------
 parsers/__init__.py        |   2 +-
 parsers/basic_parser.py    |  12 +-
 readers/xml_reader.py      | 122 ++++++++++----------
 testers/factorytester.py   |  42 +++----
 writers/wd_writer.py       |  13 ++-
 17 files changed, 536 insertions(+), 525 deletions(-)

diff --git a/.gitignore b/.gitignore
index 7ebdd82..7c59bc9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,138 +1,138 @@
-# Created by .ignore support plugin (hsz.mobi)
-### JetBrains template
-# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
-# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
-
-# User-specific stuff:
-.idea/workspace.xml
-.idea/tasks.xml
-.idea/dictionaries
-.idea/vcs.xml
-.idea/jsLibraryMappings.xml
-
-# Sensitive or high-churn files:
-.idea/dataSources.ids
-.idea/dataSources.xml
-.idea/dataSources.local.xml
-.idea/sqlDataSources.xml
-.idea/dynamic.xml
-.idea/uiDesigner.xml
-
-# Gradle:
-.idea/gradle.xml
-.idea/libraries
-
-# Mongo Explorer plugin:
-.idea/mongoSettings.xml
-
-## File-based project format:
-*.iws
-
-## Plugin-specific files:
-
-# IntelliJ
-/out/
-
-# mpeltonen/sbt-idea plugin
-.idea_modules/
-
-# JIRA plugin
-atlassian-ide-plugin.xml
-
-# Crashlytics plugin (for Android Studio and IntelliJ)
-com_crashlytics_export_strings.xml
-crashlytics.properties
-crashlytics-build.properties
-fabric.properties
-### Python template
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-env/
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-*.egg-info/
-.installed.cfg
-*.egg
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*,cover
-.hypothesis/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-target/
-
-# IPython Notebook
-.ipynb_checkpoints
-
-# pyenv
-.python-version
-
-# celery beat schedule file
-celerybeat-schedule
-
-# dotenv
-.env
-
-# virtualenv
-venv/
-ENV/
-
-# Spyder project settings
-.spyderproject
-
-# Rope project settings
-.ropeproject
-
+# Created by .ignore support plugin (hsz.mobi)
+### JetBrains template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff:
+.idea/workspace.xml
+.idea/tasks.xml
+.idea/dictionaries
+.idea/vcs.xml
+.idea/jsLibraryMappings.xml
+
+# Sensitive or high-churn files:
+.idea/dataSources.ids
+.idea/dataSources.xml
+.idea/dataSources.local.xml
+.idea/sqlDataSources.xml
+.idea/dynamic.xml
+.idea/uiDesigner.xml
+
+# Gradle:
+.idea/gradle.xml
+.idea/libraries
+
+# Mongo Explorer plugin:
+.idea/mongoSettings.xml
+
+## File-based project format:
+*.iws
+
+## Plugin-specific files:
+
+# IntelliJ
+/out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+### Python template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# IPython Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# dotenv
+.env
+
+# virtualenv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+
+# Rope project settings
+.ropeproject
+
 .out/*
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index d3cc99c..84919a4 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,14 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
+  <component name="ProjectLevelVcsManager" settingsEditedManually="false">
+    <OptionsSetting value="true" id="Add" />
+    <OptionsSetting value="true" id="Remove" />
+    <OptionsSetting value="true" id="Checkout" />
+    <OptionsSetting value="true" id="Update" />
+    <OptionsSetting value="true" id="Status" />
+    <OptionsSetting value="true" id="Edit" />
+    <ConfirmationsSetting value="0" id="Add" />
+    <ConfirmationsSetting value="0" id="Remove" />
+  </component>
   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.2 (C:\Program Files (x86)\Python35-32\python.exe)" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/entities/__init__.py b/entities/__init__.py
index 907ef4d..701846e 100644
--- a/entities/__init__.py
+++ b/entities/__init__.py
@@ -1,3 +1,3 @@
-from entities.person import Person
-from entities.institution import Institution
+from entities.person import Person
+from entities.institution import Institution
 from entities.location import Location
\ No newline at end of file
diff --git a/entities/basic_entity.py b/entities/basic_entity.py
index 9181422..9e8f11b 100644
--- a/entities/basic_entity.py
+++ b/entities/basic_entity.py
@@ -1,5 +1,5 @@
-from libs import JsonSerializable
-
-
-class BasicEntity(JsonSerializable):
-    pass
+from libs import JsonSerializable
+
+
+class BasicEntity(JsonSerializable):
+    pass
diff --git a/entities/institution.py b/entities/institution.py
index 4538207..6be86fc 100644
--- a/entities/institution.py
+++ b/entities/institution.py
@@ -1,6 +1,6 @@
-from entities.basic_entity import BasicEntity
-
-
-class Institution(BasicEntity):
-    def __init__(self):
-        raise NotImplementedError()
+from entities.basic_entity import BasicEntity
+
+
+class Institution(BasicEntity):
+    def __init__(self):
+        raise NotImplementedError()
diff --git a/entities/location.py b/entities/location.py
index a43eb8d..f782e1f 100644
--- a/entities/location.py
+++ b/entities/location.py
@@ -1,25 +1,25 @@
-import json
-
-from entities.basic_entity import BasicEntity
-
-
-class Location(BasicEntity):
-    def __init__(self, name, types_of_place, name_in_langs, comments_list):
-        self.name = name
-        self.types_of_place = types_of_place
-        self.name_in_langs = name_in_langs
-        self.comments_list = comments_list
-
-    CSV_FIELDS = ["name", "comments"]
-    TYPE = "LOCATION"
-
-
-    def print_entity(self):
-        print("Name = " + self.name)
-        print("Name in langs = " + str(self.name_in_langs))
-        print("Types = " + str(self.types_of_place))
-        print("Comments = " + str(self.comments_list))
-
-    def to_csv_dict(self):
-        return {'name': self.name,
-                'comments': json.dumps(self.comments_list, ensure_ascii=False)}
+import json
+
+from entities.basic_entity import BasicEntity
+
+
+class Location(BasicEntity):
+    def __init__(self, name, types_of_place, name_in_langs, comments_list):
+        self.name = name
+        self.types_of_place = types_of_place
+        self.name_in_langs = name_in_langs
+        self.comments_list = comments_list
+
+    CSV_FIELDS = ["name", "comments"]
+    TYPE = "LOCATION"
+
+
+    def print_entity(self):
+        print("Name = " + self.name)
+        print("Name in langs = " + str(self.name_in_langs))
+        print("Types = " + str(self.types_of_place))
+        print("Comments = " + str(self.comments_list))
+
+    def to_csv_dict(self):
+        return {'name': self.name,
+                'comments': json.dumps(self.comments_list, ensure_ascii=False)}
diff --git a/entities/person.py b/entities/person.py
index b315aac..a5aa396 100644
--- a/entities/person.py
+++ b/entities/person.py
@@ -1,76 +1,76 @@
-import json
-
-from entities.basic_entity import BasicEntity
-
-
-class Person(BasicEntity):
-    def __init__(self, name, date_of_birth, name_in_langs, bio_data, comments_list, profession):
-        """
-
-        :param name:
-        :param date_of_birth:
-        :param name_in_langs: Mapping of the persons's name in various languages, as a dictionary. For example:
-            {
-                "latin": "George"
-                "heb": "[george in hebrew]"
-            }
-        """
-        self.name = name
-        years_parts = date_of_birth.split('-')
-        if (len(years_parts) == 2):
-            self.birth_year = years_parts[0]
-            self.death_year = years_parts[1]
-        else:
-            self.birth_year = date_of_birth.strip()
-            self.death_year = ''
-        self.name_in_langs = name_in_langs
-        '''
-        place_of_birth = list()
-        place_of_death = list()
-        profession = list()
-        for comment in bio_data:
-            encoded_comment = ''.join(comment).strip()
-            if encoded_comment.startswith(u"מקום לידה: "):
-                place_of_birth.append(encoded_comment.partition(u"מקום לידה: ")[2])
-            if encoded_comment.startswith(u"מקום פטירה: "):
-                place_of_death.append(encoded_comment.partition(u"מקום פטירה: ")[2])
-            if encoded_comment.startswith(u"מקצוע: "):
-                profession.append(encoded_comment.partition(u"מקום פטירה: ")[2])
-
-        self.place_of_birth = place_of_birth
-        self.place_of_death = place_of_death
-        self.profession = profession
-        '''
-        bio_data_dict = dict()
-        for elem in bio_data:
-            elem_splitted = elem.split(":")
-            if len(elem_splitted) == 2:
-                bio_data_key = elem_splitted[0]
-                bio_data_value = elem_splitted[1]
-                if bio_data_key in bio_data_dict:
-                    bio_data_dict.get(bio_data_key).append(bio_data_value)
-                else:
-                    bio_data_dict.update(
-                        {bio_data_key: [bio_data_value]}
-                    )
-            else:
-                bio_data_dict.update({elem: ''})
-        self.bio_data = bio_data_dict
-        self.comments_list = comments_list
-        self.profession = profession
-
-    CSV_FIELDS = ["name", "biodata", "comments"]
-    TYPE = 'PERSON'
-
-    def print_entity(self):
-        print("Name = " + self.name)
-        print("Birth year = " + self.birth_year)
-        print("Death year = " + self.death_year)
-        print("Names in langs = " + str(self.name_in_langs))
-        print("Bio Data = " + str(self.bio_data))
-        print("Comments = " + str(self.comments_list))
-        print("Profession = " + str(self.profession))
-
-    def to_csv_dict(self):
-        return {'name': self.name, 'biodata': self.bio_data,
-                'comments': json.dumps(self.comments_list, ensure_ascii=False)}
+import json
+
+from entities.basic_entity import BasicEntity
+
+
+class Person(BasicEntity):
+    def __init__(self, name, date_of_birth, name_in_langs, bio_data, comments_list, profession):
+        """
+
+        :param name:
+        :param date_of_birth:
+        :param name_in_langs: Mapping of the persons's name in various languages, as a dictionary. For example:
+            {
+                "latin": "George"
+                "heb": "[george in hebrew]"
+            }
+        """
+        self.name = name
+        years_parts = date_of_birth.split('-')
+        if (len(years_parts) == 2):
+            self.birth_year = years_parts[0]
+            self.death_year = years_parts[1]
+        else:
+            self.birth_year = date_of_birth.strip()
+            self.death_year = ''
+        self.name_in_langs = name_in_langs
+        '''
+        place_of_birth = list()
+        place_of_death = list()
+        profession = list()
+        for comment in bio_data:
+            encoded_comment = ''.join(comment).strip()
+            if encoded_comment.startswith(u"מקום לידה: "):
+                place_of_birth.append(encoded_comment.partition(u"מקום לידה: ")[2])
+            if encoded_comment.startswith(u"מקום פטירה: "):
+                place_of_death.append(encoded_comment.partition(u"מקום פטירה: ")[2])
+            if encoded_comment.startswith(u"מקצוע: "):
+                profession.append(encoded_comment.partition(u"מקום פטירה: ")[2])
+
+        self.place_of_birth = place_of_birth
+        self.place_of_death = place_of_death
+        self.profession = profession
+        '''
+        bio_data_dict = dict()
+        for elem in bio_data:
+            elem_splitted = elem.split(":")
+            if len(elem_splitted) == 2:
+                bio_data_key = elem_splitted[0]
+                bio_data_value = elem_splitted[1]
+                if bio_data_key in bio_data_dict:
+                    bio_data_dict.get(bio_data_key).append(bio_data_value)
+                else:
+                    bio_data_dict.update(
+                        {bio_data_key: [bio_data_value]}
+                    )
+            else:
+                bio_data_dict.update({elem: ''})
+        self.bio_data = bio_data_dict
+        self.comments_list = comments_list
+        self.profession = profession
+
+    CSV_FIELDS = ["name", "biodata", "comments"]
+    TYPE = 'PERSON'
+
+    def print_entity(self):
+        print("Name = " + self.name)
+        print("Birth year = " + self.birth_year)
+        print("Death year = " + self.death_year)
+        print("Names in langs = " + str(self.name_in_langs))
+        print("Bio Data = " + str(self.bio_data))
+        print("Comments = " + str(self.comments_list))
+        print("Profession = " + str(self.profession))
+
+    def to_csv_dict(self):
+        return {'name': self.name, 'biodata': self.bio_data,
+                'comments': json.dumps(self.comments_list, ensure_ascii=False)}
diff --git a/factories/INL_factory.py b/factories/INL_factory.py
index f4e494f..286762a 100644
--- a/factories/INL_factory.py
+++ b/factories/INL_factory.py
@@ -1,130 +1,130 @@
-import entities
-from factories import BasicFactory
-import xml.etree.cElementTree as ET
-
-TAG_TO_ENTITY_MAPPING = {
-    '100': entities.Person,
-    '110': entities.Institution,
-    '151': entities.Location
-}
-
-
-ENTITY_KEYS = {
-    '100.a': 'name',
-    '100.9': 'name_langindic',
-    '100.d': 'date_of_birth',
-    '400.a': 'name_in_langs',
-    '400.9': 'langs_langindic',
-    '678.a': 'bio_data',
-    '151.a': 'name',
-    '151.9': 'name_langindic',
-    '451:a': 'name_in_langs',
-    '451:9': 'langs_langindic',
-    '550.a': 'type_of_place',
-    '667.a': 'comment',
-    '374.a': 'profession'
-}
-
-
-def get_record_key(record):
-    root = record.getroot()
-    for field in root:
-        field_tag = field.attrib.get('tag')
-        if '100' in field_tag:
-            return '100'
-        if '151' in field_tag:
-            return '151'
-        if '110' in field_tag:
-            return '110'
-
-class INLFactory(BasicFactory):
-    def __init__(self, tag_to_entity_mapping=None):
-        self.mapping = tag_to_entity_mapping or TAG_TO_ENTITY_MAPPING
-
-    def get_entity(self,  raw_object, entity_keys=ENTITY_KEYS):
-        record_key = get_record_key(raw_object)
-        #100 is person
-        if record_key == '100':
-            name = ''
-            name_in_langs = dict()
-            bio_data = list()
-            comment_list = list()
-            eng_name = ''
-            date_of_birth = ''
-            profession = list()
-            name_diff = ''
-            #get the names and date of birth and bio data
-            for field in raw_object.getroot():
-                key = field.attrib.get('tag')
-                tag = entity_keys.get(key)
-                if tag == 'name':
-                    name = field.text
-                elif tag == 'name_langindic':
-                    # chack if this english name
-                    if field.text == 'lat':
-                        eng_name = name
-                    # else add it to name_in_langs
-                    else:
-                        if field.text in name_in_langs:
-                            name_in_langs.get(field.text).append(name)
-                        else:
-                            name_in_langs.update({field.text: [name]})
-                elif tag == 'date_of_birth':
-                    date_of_birth = field.text
-                elif tag == 'name_in_langs':
-                    name_diff = field.text
-                elif tag == 'langs_langindic':
-                    if field.text in name_in_langs:
-                        name_in_langs.get(field.text).append(name_diff)
-                    else:
-                        name_in_langs.update({field.text: [name_diff]})
-                elif tag == 'bio_data':
-                    bio_data.append(field.text)
-                elif tag == 'comment':
-                    comment_list.append(field.text)
-                elif tag == 'profession':
-                    profession.append(field.text)
-            return entities.Person(eng_name, date_of_birth, name_in_langs, bio_data, comment_list, profession)
-        #110 is institue
-        elif record_key == '110':
-            return entities.Institution()
-        #151 is location
-        elif record_key == '151':
-            name_in_langs = dict()
-            types_of_place = list()
-            comment_list = list()
-            eng_name = ''
-            name_diff = ''
-
-            for field in raw_object.getroot():
-                key = field.attrib.get('tag')
-                tag = entity_keys.get(key)
-                if tag == 'name':
-                    name = field.text
-                elif tag == 'name_langindic':
-                    # chack if this english name
-                    if field.text == 'lat':
-                        eng_name = name
-                    # else add it to name_in_langs
-                    else:
-                        if field.text in name_in_langs:
-                            name_in_langs.get(field.text).append(name)
-                        else:
-                            name_in_langs.update({field.text: [name]})
-                elif tag == 'type_of_place':
-                    types_of_place.append(field.text)
-                elif tag == 'name_in_langs':
-                    name_diff = field.text
-                elif tag == 'langs_langindic':
-                    if field.text in name_in_langs:
-                        name_in_langs.get(field.text).append(name_diff)
-                    else:
-                        name_in_langs.update({field.text: [name_diff]})
-                elif tag == 'comment':
-                    comment_list.append(field.text)
-            return entities.Location(eng_name, types_of_place , name_in_langs, comment_list)
-        else:
-            return None
-        #    raise KeyError('Key {} was not recognized for factory {}'.format(entity_keys, type(self)))
-
-
+import entities
+from factories import BasicFactory
+import xml.etree.cElementTree as ET
+
+TAG_TO_ENTITY_MAPPING = {
+    '100': entities.Person,
+    '110': entities.Institution,
+    '151': entities.Location
+}
+
+
+ENTITY_KEYS = {
+    '100.a': 'name',
+    '100.9': 'name_langindic',
+    '100.d': 'date_of_birth',
+    '400.a': 'name_in_langs',
+    '400.9': 'langs_langindic',
+    '678.a': 'bio_data',
+    '151.a': 'name',
+    '151.9': 'name_langindic',
+    '451:a': 'name_in_langs',
+    '451:9': 'langs_langindic',
+    '550.a': 'type_of_place',
+    '667.a': 'comment',
+    '374.a': 'profession'
+}
+
+
+def get_record_key(record):
+    root = record.getroot()
+    for field in root:
+        field_tag = field.attrib.get('tag')
+        if '100' in field_tag:
+            return '100'
+        if '151' in field_tag:
+            return '151'
+        if '110' in field_tag:
+            return '110'
+
+class INLFactory(BasicFactory):
+    def __init__(self, tag_to_entity_mapping=None):
+        self.mapping = tag_to_entity_mapping or TAG_TO_ENTITY_MAPPING
+
+    def get_entity(self,  raw_object, entity_keys=ENTITY_KEYS):
+        record_key = get_record_key(raw_object)
+        #100 is person
+        if record_key == '100':
+            name = ''
+            name_in_langs = dict()
+            bio_data = list()
+            comment_list = list()
+            eng_name = ''
+            date_of_birth = ''
+            profession = list()
+            name_diff = ''
+            #get the names and date of birth and bio data
+            for field in raw_object.getroot():
+                key = field.attrib.get('tag')
+                tag = entity_keys.get(key)
+                if tag == 'name':
+                    name = field.text
+                elif tag == 'name_langindic':
+                    # chack if this english name
+                    if field.text == 'lat':
+                        eng_name = name
+                    # else add it to name_in_langs
+                    else:
+                        if field.text in name_in_langs:
+                            name_in_langs.get(field.text).append(name)
+                        else:
+                            name_in_langs.update({field.text: [name]})
+                elif tag == 'date_of_birth':
+                    date_of_birth = field.text
+                elif tag == 'name_in_langs':
+                    name_diff = field.text
+                elif tag == 'langs_langindic':
+                    if field.text in name_in_langs:
+                        name_in_langs.get(field.text).append(name_diff)
+                    else:
+                        name_in_langs.update({field.text: [name_diff]})
+                elif tag == 'bio_data':
+                    bio_data.append(field.text)
+                elif tag == 'comment':
+                    comment_list.append(field.text)
+                elif tag == 'profession':
+                    profession.append(field.text)
+            return entities.Person(eng_name, date_of_birth, name_in_langs, bio_data, comment_list, profession)
+        #110 is institue
+        elif record_key == '110':
+            return entities.Institution()
+        #151 is location
+        elif record_key == '151':
+            name_in_langs = dict()
+            types_of_place = list()
+            comment_list = list()
+            eng_name = ''
+            name_diff = ''
+
+            for field in raw_object.getroot():
+                key = field.attrib.get('tag')
+                tag = entity_keys.get(key)
+                if tag == 'name':
+                    name = field.text
+                elif tag == 'name_langindic':
+                    # chack if this english name
+                    if field.text == 'lat':
+                        eng_name = name
+                    # else add it to name_in_langs
+                    else:
+                        if field.text in name_in_langs:
+                            name_in_langs.get(field.text).append(name)
+                        else:
+                            name_in_langs.update({field.text: [name]})
+                elif tag == 'type_of_place':
+                    types_of_place.append(field.text)
+                elif tag == 'name_in_langs':
+                    name_diff = field.text
+                elif tag == 'langs_langindic':
+                    if field.text in name_in_langs:
+                        name_in_langs.get(field.text).append(name_diff)
+                    else:
+                        name_in_langs.update({field.text: [name_diff]})
+                elif tag == 'comment':
+                    comment_list.append(field.text)
+            return entities.Location(eng_name, types_of_place , name_in_langs, comment_list)
+        else:
+            return None
+        #    raise KeyError('Key {} was not recognized for factory {}'.format(entity_keys, type(self)))
+
+
diff --git a/factories/__init__.py b/factories/__init__.py
index 86901f5..947845c 100644
--- a/factories/__init__.py
+++ b/factories/__init__.py
@@ -1,2 +1,2 @@
-from factories.basic_factory import BasicFactory
+from factories.basic_factory import BasicFactory
 from factories.INL_factory import INLFactory
\ No newline at end of file
diff --git a/factories/basic_factory.py b/factories/basic_factory.py
index 1715846..1974d65 100644
--- a/factories/basic_factory.py
+++ b/factories/basic_factory.py
@@ -1,3 +1,3 @@
-class BasicFactory(object):
-    def get_entity(self, entity_key, raw_object):
-        raise NotImplementedError("get_entity() method must be implemented class {}".format(type(self)))
+class BasicFactory(object):
+    def get_entity(self, entity_key, raw_object):
+        raise NotImplementedError("get_entity() method must be implemented class {}".format(type(self)))
diff --git a/libs/json_tools.py b/libs/json_tools.py
index 5e78d23..9ce19b0 100644
--- a/libs/json_tools.py
+++ b/libs/json_tools.py
@@ -1,9 +1,9 @@
-import json
-
-
-class JsonSerializable(object):
-    def __repr__(self):
-        return str(self.to_json())
-
-    def to_json(self):
-        return json.dumps(self.__dict__, ensure_ascii=False)
+import json
+
+
+class JsonSerializable(object):
+    def __repr__(self):
+        return str(self.to_json())
+
+    def to_json(self):
+        return json.dumps(self.__dict__, ensure_ascii=False)
diff --git a/parsers/INL_xml_parser.py b/parsers/INL_xml_parser.py
index 1a06f6b..cdde5a8 100644
--- a/parsers/INL_xml_parser.py
+++ b/parsers/INL_xml_parser.py
@@ -1,36 +1,36 @@
-try:
-    import xml.etree.cElementTree as ET
-except ImportError:
-    import xml.etree.ElementTree as ET
-
-KNOWN_FIELD_TAGS = ['100', '110', '151']
-
-TAG_WHITELIST = ['100', '400', '700', '678', '667', '151', '550', '451', '374']
-
-class INLXmlParser:
-    def __init__(self, reader, whitelist=TAG_WHITELIST):
-        self.reader = reader
-        #self.whitelist = whitelist or KNOWN_FIELD_TAGS
-        self.whitelist = whitelist
-
-    def clearxml(self):
-
-        # # scan the datafields in the records and copy to the new one only the tags in the whitelist
-        # for record in root:    # create new record
-        newRecord = ET.Element('record')
-        for field in self.reader:
-            fieldtag = field.attrib.get('tag')
-            if fieldtag in self.whitelist:
-                temptag = fieldtag
-                # tag 700 and 400 are the same
-                if temptag == '700':
-                    temptag = '400'
-                for data in field:
-                    newFieldTag = temptag
-                    newFieldTag += '.'
-                    newFieldTag += data.attrib.get('code')
-                    newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag})
-                    newTag.text = data.text
-
-        newRecordTree = ET.ElementTree(newRecord)
-        return ET.ElementTree(newRecord)
+try:
+    import xml.etree.cElementTree as ET
+except ImportError:
+    import xml.etree.ElementTree as ET
+
+KNOWN_FIELD_TAGS = ['100', '110', '151']
+
+TAG_WHITELIST = ['100', '400', '700', '678', '667', '151', '550', '451', '374']
+
+class INLXmlParser:
+    def __init__(self, reader, whitelist=TAG_WHITELIST):
+        self.reader = reader
+        #self.whitelist = whitelist or KNOWN_FIELD_TAGS
+        self.whitelist = whitelist
+
+    def clearxml(self):
+
+        # # scan the datafields in the records and copy to the new one only the tags in the whitelist
+        # for record in root:    # create new record
+        newRecord = ET.Element('record')
+        for field in self.reader:
+            fieldtag = field.attrib.get('tag')
+            if fieldtag in self.whitelist:
+                temptag = fieldtag
+                # tag 700 and 400 are the same
+                if temptag == '700':
+                    temptag = '400'
+                for data in field:
+                    newFieldTag = temptag
+                    newFieldTag += '.'
+                    newFieldTag += data.attrib.get('code')
+                    newTag = ET.SubElement(newRecord, 'datafield', {'tag': newFieldTag})
+                    newTag.text = data.text
+
+        newRecordTree = ET.ElementTree(newRecord)
+        return ET.ElementTree(newRecord)
diff --git a/parsers/__init__.py b/parsers/__init__.py
index d32c917..07907f9 100644
--- a/parsers/__init__.py
+++ b/parsers/__init__.py
@@ -1,2 +1,2 @@
-
+
 from .INL_xml_parser import INLXmlParser
\ No newline at end of file
diff --git a/parsers/basic_parser.py b/parsers/basic_parser.py
index dae19cb..32c1b43 100644
--- a/parsers/basic_parser.py
+++ b/parsers/basic_parser.py
@@ -1,6 +1,6 @@
-class BasicParser(object):
-    def __init__(self):
-        pass
-
-    def parse(self, data):
-        raise NotImplementedError("parse() method must be implemented class {}".format(type(self)))
+class BasicParser(object):
+    def __init__(self):
+        pass
+
+    def parse(self, data):
+        raise NotImplementedError("parse() method must be implemented class {}".format(type(self)))
diff --git a/readers/xml_reader.py b/readers/xml_reader.py
index 5b2d1fd..710899d 100644
--- a/readers/xml_reader.py
+++ b/readers/xml_reader.py
@@ -1,61 +1,61 @@
-#  from __future__ import absolute_import
-import json
-import csv
-import parsers, factories
-from entities import Person
-
-try:
-    import xml.etree.cElementTree as ET
-except ImportError:
-    import xml.etree.ElementTree as ET
-
-def read_file(path, element_key):
-    # get an iterable
-    record_counter = 0
-    context = ET.iterparse(path, events=("start", "end"))
-
-    # turn it into an iterator
-    context = iter(context)
-
-    # get the root element
-    event, root = context.__next__()
-
-    # the factory
-    inl_factory = factories.INLFactory()
-    files = {}
-    for event, element in context:
-        if 'end' in event:
-            if element_key in element.tag:
-                # enter the processing here
-                record_counter += 1
-
-                #cleaned element is a tree
-                inl_parser = parsers.INLXmlParser(element)
-                cleaned_element = inl_parser.clearxml()
-                entity = inl_factory.get_entity(cleaned_element)
-
-                # test print the entity
-                if entity != None:
-                    if entity.TYPE not in files:
-                        files[entity.TYPE] = open("../out/{}.csv".format(entity.TYPE), 'w+', encoding='utf8')
-                    json_entity = entity.to_json()
-                    print(json_entity)
-                    writer = csv.DictWriter(files[entity.TYPE], entity.CSV_FIELDS)
-                    writer.writerow(entity.to_csv_dict())
-                    # json.dump(entity.comments_list, f667, indent=2, ensure_ascii=False)
-                    # json.dump(entity.bio_data, f678, indent=2, ensure_ascii=False)
-
-                    # entity.print_entity()
-
-                # TODO analys and upload the entity
-
-
-                # import pdb; pdb.set_trace()
-                print(record_counter, cleaned_element.getroot().tag, '@@@', cleaned_element.getroot().attrib, '@@@',
-                      cleaned_element.getroot().text)
-                element.clear()
-    print(record_counter)
-
-
-if __name__ == '__main__':
-    read_file(r"../../NLI-nnl10.xml", 'record')
+#  from __future__ import absolute_import
+import json
+import csv
+import parsers, factories
+from entities import Person
+
+try:
+    import xml.etree.cElementTree as ET
+except ImportError:
+    import xml.etree.ElementTree as ET
+
+def read_file(path, element_key):
+    # get an iterable
+    record_counter = 0
+    context = ET.iterparse(path, events=("start", "end"))
+
+    # turn it into an iterator
+    context = iter(context)
+
+    # get the root element
+    event, root = context.__next__()
+
+    # the factory
+    inl_factory = factories.INLFactory()
+    files = {}
+    for event, element in context:
+        if 'end' in event:
+            if element_key in element.tag:
+                # enter the processing here
+                record_counter += 1
+
+                #cleaned element is a tree
+                inl_parser = parsers.INLXmlParser(element)
+                cleaned_element = inl_parser.clearxml()
+                entity = inl_factory.get_entity(cleaned_element)
+
+                # test print the entity
+                if entity != None:
+                    if entity.TYPE not in files:
+                        files[entity.TYPE] = open("../out/{}.csv".format(entity.TYPE), 'w+', encoding='utf8')
+                    json_entity = entity.to_json()
+                    print(json_entity)
+                    writer = csv.DictWriter(files[entity.TYPE], entity.CSV_FIELDS)
+                    writer.writerow(entity.to_csv_dict())
+                    # json.dump(entity.comments_list, f667, indent=2, ensure_ascii=False)
+                    # json.dump(entity.bio_data, f678, indent=2, ensure_ascii=False)
+
+                    # entity.print_entity()
+
+                # TODO analys and upload the entity
+
+
+                # import pdb; pdb.set_trace()
+                print(record_counter, cleaned_element.getroot().tag, '@@@', cleaned_element.getroot().attrib, '@@@',
+                      cleaned_element.getroot().text)
+                element.clear()
+    print(record_counter)
+
+
+if __name__ == '__main__':
+    read_file(r"../../NLI-nnl10.xml", 'record')
diff --git a/testers/factorytester.py b/testers/factorytester.py
index 88e660d..55ebe7c 100644
--- a/testers/factorytester.py
+++ b/testers/factorytester.py
@@ -1,21 +1,21 @@
-from __future__ import absolute_import
-import parsers
-import factories
-import xml.etree.cElementTree as ET
-
-xmlpath = r"C:\roy\NLI-nnl10 - 1MB.xml"
-whitelist = ['100', '374', '400', '151', '451', '550', '551', '678']
-
-
-xmltree = ET.parse(xmlpath)
-entities = list()
-inl_factory = factories.INLFactory()
-
-for record in xmltree.getroot():
-    inl_parser = parsers.INLXmlParser(record, whitelist)
-    clean_record = inl_parser.clearxml()
-    entities.append(inl_factory.get_entity(clean_record))
-
-for entity in entities:
-    print(entity)
-
+from __future__ import absolute_import
+import parsers
+import factories
+import xml.etree.cElementTree as ET
+
+xmlpath = r"C:\roy\NLI-nnl10 - 1MB.xml"
+whitelist = ['100', '374', '400', '151', '451', '550', '551', '678']
+
+
+xmltree = ET.parse(xmlpath)
+entities = list()
+inl_factory = factories.INLFactory()
+
+for record in xmltree.getroot():
+    inl_parser = parsers.INLXmlParser(record, whitelist)
+    clean_record = inl_parser.clearxml()
+    entities.append(inl_factory.get_entity(clean_record))
+
+for entity in entities:
+    print(entity)
+
diff --git a/writers/wd_writer.py b/writers/wd_writer.py
index b88833f..4a456e5 100644
--- a/writers/wd_writer.py
+++ b/writers/wd_writer.py
@@ -1,6 +1,7 @@
-import pywikibot
-from pywikibot import pagegenerators, WikidataBot
-
-class WDWriter(object):
-    def __init__(self):
-        pass
\ No newline at end of file
+import pywikibot
+from pywikibot import pagegenerators, WikidataBot
+
+
+class WDWriter(object):
+    def __init__(self, source_path, reader, factory):
+        self.source_path = source_path
-- 
cgit v1.2.3