changeset 18:0827156df210

added contraction of relations into attributes. added some docstrings.
author casties
date Mon, 07 Sep 2015 16:57:10 +0200
parents 4dfd832e9cd9
children ca1e02a2a9c4
files importFromOpenMind/importer/ismi2neo4j.py
diffstat 1 files changed, 71 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/importFromOpenMind/importer/ismi2neo4j.py	Thu Sep 03 18:48:21 2015 +0200
+++ b/importFromOpenMind/importer/ismi2neo4j.py	Mon Sep 07 16:57:10 2015 +0200
@@ -7,11 +7,14 @@
 # add inverse relations as "<relation"
 add_inverse_relations = True
 
+# add relations to these objects as attributes with the relations name
+contract_relations_into_attributes = ['PLACE', 'ALIAS']
+
 # try to find and re-use existing nodes in neo4j (slow!)
 keep_nodes = False
 
 # label added to all nodes
-project_label = '_ismi_inv_rel'
+project_label = '_ismi2'
 
 # OpenMind base URL
 baseURL="http://localhost:18080/ismi-richfaces/jsonInterface?"
@@ -50,8 +53,8 @@
     'nov'
 ]
 
-def fixName(name, is_src_rel=False, is_tar_rel=False):
-    # these are too embarrasing...
+def fixName(name, is_src_rel=False, is_tar_rel=False, att_from_rel=False):
+    # these are too embarrassing...
     if 'FLORUIT' in name:
         name = name.replace('FLORUIT', 'FLOURISH')
         
@@ -59,10 +62,18 @@
         name = name.replace('floruit', 'flourish')
         
     if is_src_rel:
-        name = name + '>'
+        #name = name + '>'
+        pass
         
     if is_tar_rel:
         name = '<' + name
+        
+    if att_from_rel:
+        # clean up relations as attribute names
+        name = name.replace('is_', '')
+        name = name.replace('has_', '')
+        name = name.replace('was_', '')
+        name = name.replace('_of', '')
 
     return name
 
@@ -77,6 +88,10 @@
 
 
 def nodeFromEnt(ent, etype):
+    """Create a Neo4J node from the given JSON entity.
+    
+    Creates the node in gdb and returns the node.
+    """ 
     attrs = {}
     # go through all attributes
     for att in ent['atts']:
@@ -144,9 +159,11 @@
     return node
 
 
-# In[77]:
-
 def relsFromEnt(ent, relations):
+    """Extract all relations from JSON entity.
+    
+    Adds JSON to dict relations under relation's id.
+    """
     # go through src_rels and tar_rels
     rels = ent.get('src_rels', []) + ent.get('tar_rels', [])
     for rel in rels:
@@ -162,9 +179,15 @@
     return relations
 
 
-# In[110]:
-
 def n4jrelationsFromRels(rels, nodes):
+    """Create relations in Neo4J.
+    
+    Args:
+        rels: dict of JSON relations
+        nodes: dict of existing Neo4J nodes
+    Returns:
+        dict of Neo4J relations
+    """
     # go through all rels
     print("importing %s relations"%len(rels))
     cnt = 0
@@ -187,6 +210,41 @@
             print("ERROR: relation %s tar node %s missing!"%(rel_id,tar_id))
             continue
         
+        if contract_relations_into_attributes:
+            # contract source relations
+            tar_type = rel['tar_oc']
+            if tar_type in contract_relations_into_attributes:
+                att_name = fixName(rel_name, att_from_rel=True)
+                # TODO: clean up attribute names
+                while src.get(att_name, None) is not None:
+                    # attribute exists
+                    if att_name[-1].isnumeric():
+                        # increment last digit
+                        att_name = att_name[:-1] + str(int(att_name[-1]) + 1)
+                    else:
+                        att_name += '2'
+                    
+                # add target node's label as attribute
+                #print("contracting tar to attribute %s on id=%s"%(att_name, src_id))
+                src.set(att_name, tar.get('label'))
+                
+            # contract target relations
+            src_type = rel['src_oc']
+            if src_type in contract_relations_into_attributes:
+                att_name = fixName(rel_name, att_from_rel=True)
+                # TODO: clean up attribute names
+                while tar.get(att_name, None) is not None:
+                    # attribute exists
+                    if att_name[-1].isnumeric():
+                        # increment last digit
+                        att_name = att_name[:-1] + str(int(att_name[-1]) + 1)
+                    else:
+                        att_name += '2'
+                    
+                # add target node's label as attribute
+                #print("contracting src to attribute %s on id=%s"%(att_name, tar_id))
+                src.set(att_name, src.get('label'))
+        
         if add_inverse_relations:
             n4j_rel = [gdb.relationships.create(src, fixName(rel_name, is_src_rel=True), tar),
                        gdb.relationships.create(tar, fixName(rel_name, is_tar_rel=True), src)]
@@ -199,9 +257,9 @@
     return n4j_relations
 
 
-# In[114]:
-
 def importEnts(etype):
+    """Import all entities of the given type.
+    """
     # read json for all entities of given type
     json = readJSON(entsURL%etype)
     ents = json['ents']
@@ -224,13 +282,13 @@
         if keep_nodes:
             node = getNode(ismi_id)
         
-        if node is None:
-            node = nodeFromEnt(ent_data, etype)
-        
         if ismi_id in n4j_nodes:
             print("ERROR: entity with id=%s exists!"%ismi_id)
             return
         
+        if node is None:
+            node = nodeFromEnt(ent_data, etype)
+        
         # save node reference
         n4j_nodes[ismi_id] = node