# HG changeset patch # User casties # Date 1441637830 -7200 # Node ID 0827156df210deeb4b1cb393bde55c1ce0e419ac # Parent 4dfd832e9cd9cc6c9146c5da8b39856e3f10e16c added contraction of relations into attributes. added some docstrings. diff -r 4dfd832e9cd9 -r 0827156df210 importFromOpenMind/importer/ismi2neo4j.py --- a/importFromOpenMind/importer/ismi2neo4j.py Thu Sep 03 18:48:21 2015 +0200 +++ b/importFromOpenMind/importer/ismi2neo4j.py Mon Sep 07 16:57:10 2015 +0200 @@ -7,11 +7,14 @@ # add inverse relations as "' + #name = name + '>' + pass if is_tar_rel: name = '<' + name + + if att_from_rel: + # clean up relations as attribute names + name = name.replace('is_', '') + name = name.replace('has_', '') + name = name.replace('was_', '') + name = name.replace('_of', '') return name @@ -77,6 +88,10 @@ def nodeFromEnt(ent, etype): + """Create a Neo4J node from the given JSON entity. + + Creates the node in gdb and returns the node. + """ attrs = {} # go through all attributes for att in ent['atts']: @@ -144,9 +159,11 @@ return node -# In[77]: - def relsFromEnt(ent, relations): + """Extract all relations from JSON entity. + + Adds JSON to dict relations under relation's id. + """ # go through src_rels and tar_rels rels = ent.get('src_rels', []) + ent.get('tar_rels', []) for rel in rels: @@ -162,9 +179,15 @@ return relations -# In[110]: - def n4jrelationsFromRels(rels, nodes): + """Create relations in Neo4J. + + Args: + rels: dict of JSON relations + nodes: dict of existing Neo4J nodes + Returns: + dict of Neo4J relations + """ # go through all rels print("importing %s relations"%len(rels)) cnt = 0 @@ -187,6 +210,41 @@ print("ERROR: relation %s tar node %s missing!"%(rel_id,tar_id)) continue + if contract_relations_into_attributes: + # contract source relations + tar_type = rel['tar_oc'] + if tar_type in contract_relations_into_attributes: + att_name = fixName(rel_name, att_from_rel=True) + # TODO: clean up attribute names + while src.get(att_name, None) is not None: + # attribute exists + if att_name[-1].isnumeric(): + # increment last digit + att_name = att_name[:-1] + str(int(att_name[-1]) + 1) + else: + att_name += '2' + + # add target node's label as attribute + #print("contracting tar to attribute %s on id=%s"%(att_name, src_id)) + src.set(att_name, tar.get('label')) + + # contract target relations + src_type = rel['src_oc'] + if src_type in contract_relations_into_attributes: + att_name = fixName(rel_name, att_from_rel=True) + # TODO: clean up attribute names + while tar.get(att_name, None) is not None: + # attribute exists + if att_name[-1].isnumeric(): + # increment last digit + att_name = att_name[:-1] + str(int(att_name[-1]) + 1) + else: + att_name += '2' + + # add target node's label as attribute + #print("contracting src to attribute %s on id=%s"%(att_name, tar_id)) + src.set(att_name, src.get('label')) + if add_inverse_relations: n4j_rel = [gdb.relationships.create(src, fixName(rel_name, is_src_rel=True), tar), gdb.relationships.create(tar, fixName(rel_name, is_tar_rel=True), src)] @@ -199,9 +257,9 @@ return n4j_relations -# In[114]: - def importEnts(etype): + """Import all entities of the given type. + """ # read json for all entities of given type json = readJSON(entsURL%etype) ents = json['ents'] @@ -224,13 +282,13 @@ if keep_nodes: node = getNode(ismi_id) - if node is None: - node = nodeFromEnt(ent_data, etype) - if ismi_id in n4j_nodes: print("ERROR: entity with id=%s exists!"%ismi_id) return + if node is None: + node = nodeFromEnt(ent_data, etype) + # save node reference n4j_nodes[ismi_id] = node