Mercurial > hg > drupalISMI
changeset 17:4dfd832e9cd9
added automatic creation of inverse relations.
added more attribute types.
author | casties |
---|---|
date | Thu, 03 Sep 2015 18:48:21 +0200 |
parents | de0a06eef13b |
children | 0827156df210 |
files | importFromOpenMind/importer/ismi2neo4j.py |
diffstat | 1 files changed, 61 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/importFromOpenMind/importer/ismi2neo4j.py Fri Aug 28 17:24:45 2015 +0200 +++ b/importFromOpenMind/importer/ismi2neo4j.py Thu Sep 03 18:48:21 2015 +0200 @@ -2,11 +2,24 @@ import json from neo4jrestclient.client import GraphDatabase, Node -# In[111]: -ismi_types=["PERSON","WITNESS","CODEX","PLACE","COLLECTION","REPOSITORY"] +## configure behaviour + +# add inverse relations as "<relation" +add_inverse_relations = True + +# try to find and re-use existing nodes in neo4j (slow!) +keep_nodes = False +# label added to all nodes +project_label = '_ismi_inv_rel' + +# OpenMind base URL baseURL="http://localhost:18080/ismi-richfaces/jsonInterface?" +# neo4j base URL +neo4jBaseURL = "http://localhost:7474/db/data/" + + entsURL=baseURL+"method=get_ents&oc=%s" entURL=baseURL+"method=get_ent&id=%s&include_content=True" @@ -19,39 +32,57 @@ defs_json = readJSON(baseURL+"method=get_defs") +# current list of all definitions ismi_defs = [atts['ov'] for atts in defs_json['defs']] +#ismi_types=["PERSON","WITNESS","CODEX","PLACE","COLLECTION","REPOSITORY"] -gdb = GraphDatabase("http://localhost:7474/db/data/", username="neo4j", password="neo5j") + +gdb = GraphDatabase(neo4jBaseURL, username="neo4j", password="neo5j") n4j_nodes = {} ismi_relations = {} n4j_relations = {} -keep_nodes = False - ent_exclude_attrs = [ 'lw', 'node_type', 'nov' ] +def fixName(name, is_src_rel=False, is_tar_rel=False): + # these are too embarrasing... + if 'FLORUIT' in name: + name = name.replace('FLORUIT', 'FLOURISH') + + elif 'floruit' in name: + name = name.replace('floruit', 'flourish') + + if is_src_rel: + name = name + '>' + + if is_tar_rel: + name = '<' + name + + return name + def getNode(ismi_id=None): if ismi_id is not None: - res = gdb.query("match (n {ismi_id: %s}) return n"%40635, returns=(Node)) + res = gdb.query("match (n {ismi_id: %s}) return n"%ismi_id, returns=(Node)) if len(res) > 0: return res[0] return None + def nodeFromEnt(ent, etype): attrs = {} # go through all attributes for att in ent['atts']: ct = att.get('content_type', None) - if ct in ['text', 'arabic', 'bool', 'url']: - # normal text attribute + if ct is None or ct.lower() in ['text', 'arabic', 'bool', 'boolean', 'url', 'language']: + # normal text attribute (assume no content_type is text too...) key = att['name'] val = att['ov'] @@ -62,6 +93,18 @@ # keep attribute attrs[key] = val + elif ct == 'num': + # number attribute + key = att['name'] + val = att['ov'] + + if key in ent_exclude_attrs: + # exclude attribute + continue + + # keep attribute, assume num is int + attrs[key] = int(val) + elif ct == 'date': # date attribute key = att['name'] @@ -73,7 +116,7 @@ continue else: - #print("WARN: attribute with unknown content_type: %s"%repr(att)) + print("WARN: attribute with unknown content_type: %s"%repr(att)) # ignore other content types continue @@ -83,7 +126,7 @@ print("ERROR: entity type doesn't match!") return null - attrs['type'] = oc + attrs['type'] = fixName(oc) ismi_id = ent['id'] # rename id to ismi_id @@ -97,7 +140,7 @@ # create node with attributes node = gdb.nodes.create(**attrs) # add labels - node.labels.add(['project_ismi', etype]) + node.labels.add([project_label, fixName(etype)]) return node @@ -144,7 +187,13 @@ print("ERROR: relation %s tar node %s missing!"%(rel_id,tar_id)) continue - n4j_rel = gdb.relationships.create(src, rel_name, tar) + if add_inverse_relations: + n4j_rel = [gdb.relationships.create(src, fixName(rel_name, is_src_rel=True), tar), + gdb.relationships.create(tar, fixName(rel_name, is_tar_rel=True), src)] + + else: + n4j_rel = gdb.relationships.create(src, fixName(rel_name), tar) + n4j_relations[rel_id] = n4j_rel return n4j_relations