changeset 23:45a823b5bf33

updated ismi2model importer and model2neo4j exporter.
author casties
date Wed, 23 Sep 2015 19:18:59 +0200
parents 3585573999b6
children 97f2da68fb5f
files importFromOpenMind/importer/ismi2model.py importFromOpenMind/importer/model2neo4j.py
diffstat 2 files changed, 138 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/importFromOpenMind/importer/ismi2model.py	Wed Sep 23 11:54:22 2015 +0200
+++ b/importFromOpenMind/importer/ismi2model.py	Wed Sep 23 19:18:59 2015 +0200
@@ -1,6 +1,7 @@
 import urllib.request
 import json
 import networkx
+import sys
 
 ## configure behaviour
 
@@ -110,7 +111,7 @@
             # date attribute
             key = att['name']
             val = att['ov']
-            #print("don't know what to do with date: %s=%s"%(key,val))
+            print("don't know what to do with date: %s=%s"%(key,val))
             
         elif ct == 'old':
             # ignore attribute
@@ -230,7 +231,7 @@
                 nx_graph.node[tar_id][att_name] = nx_graph.node[src_id]['label']
         
         # create relation with type
-        nx_rel = nx_graph.add_edge(src_id, tar_id, type=fixName(rel_name))
+        nx_rel = nx_graph.add_edge(src_id, tar_id, type=fixName(rel_name), ismi_id=rel_id)
         
         nx_relations[rel_id] = nx_rel
         
@@ -289,15 +290,22 @@
     relationsFromRels(ismi_relations, nx_nodes)
 
 
-# In[120]:
+## main
+
+print("Copy graph from OpenMind to networkx pickle")
 
+# parse command line parameters
+if len(sys.argv) > 1:
+    output_fn = sys.argv[1]
+
+# import everything
+print("Reading graph from OpenMind at %s"%baseURL)
 importAllEnts(ismi_defs)
 #importAllEnts(['TEXT'])
 
 print("Graph info: %s"%networkx.info(nx_graph))
-print("Number of nodes: %s"%networkx.number_of_nodes(nx_graph))
-print("Number of edges: %s"%networkx.number_of_edges(nx_graph))
 #print(" nodes:%s"%repr(nx_graph.nodes(data=True)))
+
 # export pickle
 networkx.write_gpickle(nx_graph, output_fn)
-print("Wrote file %s"%output_fn)
+print("Wrote networkx pickle file %s"%output_fn)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/importFromOpenMind/importer/model2neo4j.py	Wed Sep 23 19:18:59 2015 +0200
@@ -0,0 +1,124 @@
+import networkx as nx
+from neo4jrestclient.client import GraphDatabase, Node
+import sys
+
+## configure behaviour
+
+# metworkx graph file
+input_fn = 'ismi_graph.gpickle'
+
+# label added to all nodes
+project_label = '_ismi4'
+
+# neo4j base URL
+neo4jBaseURL = "http://localhost:7474/db/data/"
+
+
+## setup
+
+n4j_nodes = {}
+n4j_relations = {}
+
+
+def fixName(name, is_src_rel=False, is_tar_rel=False, att_from_rel=False):
+    # these are too embarrassing...
+    if 'FLORUIT' in name:
+        name = name.replace('FLORUIT', 'FLOURISH')
+        
+    elif 'floruit' in name:
+        name = name.replace('floruit', 'flourish')
+        
+    if is_src_rel:
+        #name = name + '>'
+        pass
+        
+    if is_tar_rel:
+        name = '<' + name
+        
+    if att_from_rel:
+        # clean up relations as attribute names
+        name = name.replace('is_', '')
+        name = name.replace('has_', '')
+        name = name.replace('was_', '')
+        name = name.replace('_of', '')
+
+    return name
+
+
+def copyNodes(nx_graph, n4j_graph):
+    """copy all nodes from nx_graph to n4j_graph"""
+    
+    print("Copying nodes to Neo4J")
+    cnt = 0
+    for node_id in nx.nodes_iter(nx_graph):
+        attrs = nx_graph.node[node_id]
+        type = attrs['type']
+        ismi_id = attrs['ismi_id']
+        # create node with attributes
+        n4j_node = n4j_graph.nodes.create(**attrs)
+        # add labels
+        n4j_node.labels.add([project_label, type])
+        # save reference
+        n4j_nodes[ismi_id] = n4j_node
+
+        cnt += 1
+        if cnt % 100 == 0:
+            print("  %s"%cnt)
+
+
+def copyRelations(nx_graph, n4j_graph):
+    """copy all relations from nx_graph to n4j_graph"""
+    
+    print("Copying relations to Neo4J")
+    cnt = 0
+    for nx_edge in nx.edges_iter(nx_graph):
+        (nx_src, nx_tar) = nx_edge
+        # get attributes of edge
+        attrs = nx_graph.edge[nx_src][nx_tar][0]
+        type = attrs['type']
+        # get ismi_id of source and target nodes
+        src_id = nx_graph.node[nx_src]['ismi_id']
+        tar_id = nx_graph.node[nx_tar]['ismi_id']
+        # get Neo4J nodes
+        src = n4j_nodes.get(src_id, None)
+        if src is None:
+            print("ERROR: src node %s missing!"%src_id)
+            break
+        
+        tar = n4j_nodes.get(tar_id, None)
+        if tar is None:
+            print("ERROR: tar node %s missing!"%tar_id)
+            break
+        
+        # create Neo4J relation
+        n4j_rel = n4j_graph.relationships.create(src, type, tar)
+        # add attributes
+        n4j_rel.properties = attrs
+
+        cnt += 1
+        if cnt % 100 == 0:
+            print("  %s"%cnt)
+
+
+## main
+
+print("Copy graph from networkx to Neo4J")
+
+# read commandline parameters
+if len(sys.argv) > 1:
+    input_fn = sys.argv[1]
+
+# read networkx graph from pickle
+print("Reading graph from %s"%input_fn)
+nx_graph = nx.read_gpickle(input_fn)
+print("Graph info: %s"%nx.info(nx_graph))
+
+# open neo4j graph db
+print("Opening Neo4J db at %s"%neo4jBaseURL)
+n4j_graph = GraphDatabase(neo4jBaseURL, username="neo4j", password="neo5j")
+
+copyNodes(nx_graph, n4j_graph)
+
+copyRelations(nx_graph, n4j_graph)
+
+print("Done.")