# HG changeset patch # User casties # Date 1441298901 -7200 # Node ID 4dfd832e9cd9cc6c9146c5da8b39856e3f10e16c # Parent de0a06eef13b7d0dfc1aa0347ebaf295aea181cf added automatic creation of inverse relations. added more attribute types. diff -r de0a06eef13b -r 4dfd832e9cd9 importFromOpenMind/importer/ismi2neo4j.py --- a/importFromOpenMind/importer/ismi2neo4j.py Fri Aug 28 17:24:45 2015 +0200 +++ b/importFromOpenMind/importer/ismi2neo4j.py Thu Sep 03 18:48:21 2015 +0200 @@ -2,11 +2,24 @@ import json from neo4jrestclient.client import GraphDatabase, Node -# In[111]: -ismi_types=["PERSON","WITNESS","CODEX","PLACE","COLLECTION","REPOSITORY"] +## configure behaviour + +# add inverse relations as "' + + if is_tar_rel: + name = '<' + name + + return name + def getNode(ismi_id=None): if ismi_id is not None: - res = gdb.query("match (n {ismi_id: %s}) return n"%40635, returns=(Node)) + res = gdb.query("match (n {ismi_id: %s}) return n"%ismi_id, returns=(Node)) if len(res) > 0: return res[0] return None + def nodeFromEnt(ent, etype): attrs = {} # go through all attributes for att in ent['atts']: ct = att.get('content_type', None) - if ct in ['text', 'arabic', 'bool', 'url']: - # normal text attribute + if ct is None or ct.lower() in ['text', 'arabic', 'bool', 'boolean', 'url', 'language']: + # normal text attribute (assume no content_type is text too...) key = att['name'] val = att['ov'] @@ -62,6 +93,18 @@ # keep attribute attrs[key] = val + elif ct == 'num': + # number attribute + key = att['name'] + val = att['ov'] + + if key in ent_exclude_attrs: + # exclude attribute + continue + + # keep attribute, assume num is int + attrs[key] = int(val) + elif ct == 'date': # date attribute key = att['name'] @@ -73,7 +116,7 @@ continue else: - #print("WARN: attribute with unknown content_type: %s"%repr(att)) + print("WARN: attribute with unknown content_type: %s"%repr(att)) # ignore other content types continue @@ -83,7 +126,7 @@ print("ERROR: entity type doesn't match!") return null - attrs['type'] = oc + attrs['type'] = fixName(oc) ismi_id = ent['id'] # rename id to ismi_id @@ -97,7 +140,7 @@ # create node with attributes node = gdb.nodes.create(**attrs) # add labels - node.labels.add(['project_ismi', etype]) + node.labels.add([project_label, fixName(etype)]) return node @@ -144,7 +187,13 @@ print("ERROR: relation %s tar node %s missing!"%(rel_id,tar_id)) continue - n4j_rel = gdb.relationships.create(src, rel_name, tar) + if add_inverse_relations: + n4j_rel = [gdb.relationships.create(src, fixName(rel_name, is_src_rel=True), tar), + gdb.relationships.create(tar, fixName(rel_name, is_tar_rel=True), src)] + + else: + n4j_rel = gdb.relationships.create(src, fixName(rel_name), tar) + n4j_relations[rel_id] = n4j_rel return n4j_relations