Mercurial > hg > drupalISMI
changeset 18:0827156df210
added contraction of relations into attributes.
added some docstrings.
author | casties |
---|---|
date | Mon, 07 Sep 2015 16:57:10 +0200 |
parents | 4dfd832e9cd9 |
children | ca1e02a2a9c4 |
files | importFromOpenMind/importer/ismi2neo4j.py |
diffstat | 1 files changed, 71 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/importFromOpenMind/importer/ismi2neo4j.py Thu Sep 03 18:48:21 2015 +0200 +++ b/importFromOpenMind/importer/ismi2neo4j.py Mon Sep 07 16:57:10 2015 +0200 @@ -7,11 +7,14 @@ # add inverse relations as "<relation" add_inverse_relations = True +# add relations to these objects as attributes with the relations name +contract_relations_into_attributes = ['PLACE', 'ALIAS'] + # try to find and re-use existing nodes in neo4j (slow!) keep_nodes = False # label added to all nodes -project_label = '_ismi_inv_rel' +project_label = '_ismi2' # OpenMind base URL baseURL="http://localhost:18080/ismi-richfaces/jsonInterface?" @@ -50,8 +53,8 @@ 'nov' ] -def fixName(name, is_src_rel=False, is_tar_rel=False): - # these are too embarrasing... +def fixName(name, is_src_rel=False, is_tar_rel=False, att_from_rel=False): + # these are too embarrassing... if 'FLORUIT' in name: name = name.replace('FLORUIT', 'FLOURISH') @@ -59,10 +62,18 @@ name = name.replace('floruit', 'flourish') if is_src_rel: - name = name + '>' + #name = name + '>' + pass if is_tar_rel: name = '<' + name + + if att_from_rel: + # clean up relations as attribute names + name = name.replace('is_', '') + name = name.replace('has_', '') + name = name.replace('was_', '') + name = name.replace('_of', '') return name @@ -77,6 +88,10 @@ def nodeFromEnt(ent, etype): + """Create a Neo4J node from the given JSON entity. + + Creates the node in gdb and returns the node. + """ attrs = {} # go through all attributes for att in ent['atts']: @@ -144,9 +159,11 @@ return node -# In[77]: - def relsFromEnt(ent, relations): + """Extract all relations from JSON entity. + + Adds JSON to dict relations under relation's id. + """ # go through src_rels and tar_rels rels = ent.get('src_rels', []) + ent.get('tar_rels', []) for rel in rels: @@ -162,9 +179,15 @@ return relations -# In[110]: - def n4jrelationsFromRels(rels, nodes): + """Create relations in Neo4J. + + Args: + rels: dict of JSON relations + nodes: dict of existing Neo4J nodes + Returns: + dict of Neo4J relations + """ # go through all rels print("importing %s relations"%len(rels)) cnt = 0 @@ -187,6 +210,41 @@ print("ERROR: relation %s tar node %s missing!"%(rel_id,tar_id)) continue + if contract_relations_into_attributes: + # contract source relations + tar_type = rel['tar_oc'] + if tar_type in contract_relations_into_attributes: + att_name = fixName(rel_name, att_from_rel=True) + # TODO: clean up attribute names + while src.get(att_name, None) is not None: + # attribute exists + if att_name[-1].isnumeric(): + # increment last digit + att_name = att_name[:-1] + str(int(att_name[-1]) + 1) + else: + att_name += '2' + + # add target node's label as attribute + #print("contracting tar to attribute %s on id=%s"%(att_name, src_id)) + src.set(att_name, tar.get('label')) + + # contract target relations + src_type = rel['src_oc'] + if src_type in contract_relations_into_attributes: + att_name = fixName(rel_name, att_from_rel=True) + # TODO: clean up attribute names + while tar.get(att_name, None) is not None: + # attribute exists + if att_name[-1].isnumeric(): + # increment last digit + att_name = att_name[:-1] + str(int(att_name[-1]) + 1) + else: + att_name += '2' + + # add target node's label as attribute + #print("contracting src to attribute %s on id=%s"%(att_name, tar_id)) + src.set(att_name, src.get('label')) + if add_inverse_relations: n4j_rel = [gdb.relationships.create(src, fixName(rel_name, is_src_rel=True), tar), gdb.relationships.create(tar, fixName(rel_name, is_tar_rel=True), src)] @@ -199,9 +257,9 @@ return n4j_relations -# In[114]: - def importEnts(etype): + """Import all entities of the given type. + """ # read json for all entities of given type json = readJSON(entsURL%etype) ents = json['ents'] @@ -224,13 +282,13 @@ if keep_nodes: node = getNode(ismi_id) - if node is None: - node = nodeFromEnt(ent_data, etype) - if ismi_id in n4j_nodes: print("ERROR: entity with id=%s exists!"%ismi_id) return + if node is None: + node = nodeFromEnt(ent_data, etype) + # save node reference n4j_nodes[ismi_id] = node