diff importFromOpenMind/importer/model2neo4j_client.py @ 48:6625019a0c96

old model2neo4j renamed to model2neo4j_restclient. new model2neo4j_client and model2neo4j_import. fixed ismixml2model and compare_models.
author casties
date Tue, 07 Feb 2017 21:06:13 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/importFromOpenMind/importer/model2neo4j_client.py	Tue Feb 07 21:06:13 2017 +0100
@@ -0,0 +1,144 @@
+import networkx as nx
+from neo4j.v1 import GraphDatabase, basic_auth
+import sys
+
+## configure behaviour
+
+# metworkx graph file
+input_fn = 'ismi_graph.gpickle'
+
+# label added to all nodes
+project_label = '_ismi'
+
+# neo4j base URL
+neo4jBaseURL = "bolt://localhost:7687"
+
+# name of type attribute
+node_type_attribute = '_type'
+rel_type_attribute = '_type'
+
+
+## setup
+
+n4j_nodes = {}
+n4j_relations = {}
+
+# active log levels for logging
+logLevels = {'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
+#logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
+#logLevels = {'INFO', 'ERROR', 'SYSMSG'}
+
+def log(level, message):
+    if level in logLevels:
+        print("%s: %s"%(level, message))
+
+
+def createIndices(nx_graph, n4j_graph):
+    """create indices for nodes from nx_graph in n4j_graph"""
+    
+    log('INFO', "Creating node indices in Neo4J")
+    cnt = 0
+    ismi_types = set()
+    # collect types of all nodes
+    for node_id in nx.nodes_iter(nx_graph):
+        attrs = nx_graph.node[node_id]
+        # get entity type
+        ismi_types.add(attrs[node_type_attribute])
+        
+    # create constraints for all types
+    for ismi_type in ismi_types:
+        query = "CREATE CONSTRAINT ON (n:%s) ASSERT n.ismi_id IS UNIQUE"%ismi_type
+        n4j_graph.run(query)
+
+
+def copyNodes(nx_graph, n4j_graph):
+    """copy all nodes from nx_graph to n4j_graph"""
+    
+    log('INFO', "Copying nodes to Neo4J")
+    cnt = 0
+    for node_id in nx.nodes_iter(nx_graph):
+        attrs = nx_graph.node[node_id]
+        # get entity type
+        ntype = attrs[node_type_attribute]
+        # get ismi_id
+        ismi_id = attrs['ismi_id']
+        att_qs = ", ".join(["%s: {%s}"%(k, k) for k in attrs.keys()])
+        # query to create node with attributes (parameter names are attribute keys)
+        cypher = "CREATE (n:%s {%s})"%(ntype, att_qs)
+        # run query
+        n4j_graph.run(cypher, parameters=attrs)
+        # save node id
+        n4j_nodes[ismi_id] = attrs
+
+        cnt += 1
+        if cnt % 100 == 0:
+            log('INFO', "%s nodes"%cnt)
+
+    log('INFO', "%s nodes copied"%cnt)
+
+
+def copyRelations(nx_graph, n4j_graph):
+    """copy all relations from nx_graph to n4j_graph"""
+    
+    print("Copying relations to Neo4J")
+    cnt = 0
+    for nx_edge in nx.edges_iter(nx_graph):
+        (nx_src, nx_tar) = nx_edge
+        # get attributes of edge
+        attrs = nx_graph.edge[nx_src][nx_tar][0]
+        # get relation type
+        rtype = attrs[rel_type_attribute]
+        # get ismi_id of source and target nodes
+        src_id = nx_graph.node[nx_src]['ismi_id']
+        tar_id = nx_graph.node[nx_tar]['ismi_id']
+        # get Neo4J nodes
+        src = n4j_nodes.get(src_id, None)
+        if src is None:
+            print("ERROR: src node %s missing!"%src_id)
+            break
+        
+        tar = n4j_nodes.get(tar_id, None)
+        if tar is None:
+            print("ERROR: tar node %s missing!"%tar_id)
+            break
+        
+        src_type = src[node_type_attribute]
+        tar_type = tar[node_type_attribute]
+        
+        att_qs = ", ".join(["%s: {%s}"%(k, k) for k in attrs.keys()])
+        # query to create a relation with attributes
+        cypher = "MATCH (n1:%s),(n2:%s) WHERE n1.ismi_id = %s AND n2.ismi_id = %s CREATE (n1)-[r:%s {%s}]->(n2)"%(src_type, tar_type, src_id, tar_id, rtype, att_qs)
+        # run query
+        n4j_graph.run(cypher, attrs)
+
+        cnt += 1
+        if cnt % 100 == 0:
+            log('INFO', "%s relations"%cnt)
+
+    log('INFO', "%s relations copied"%cnt)
+## main
+
+print("Copy graph from networkx to Neo4J")
+
+# read commandline parameters
+if len(sys.argv) > 1:
+    input_fn = sys.argv[1]
+
+# read networkx graph from pickle
+print("Reading graph from %s"%input_fn)
+nx_graph = nx.read_gpickle(input_fn)
+print("Graph info: %s"%nx.info(nx_graph))
+
+# open neo4j graph db
+print("Opening Neo4J db at %s"%neo4jBaseURL)
+n4j_driver = GraphDatabase.driver(neo4jBaseURL, auth=basic_auth("neo4j", "neo5j"))
+# get session to pass to functions 
+n4j_graph = n4j_driver.session()
+
+createIndices(nx_graph, n4j_graph)
+
+copyNodes(nx_graph, n4j_graph)
+
+copyRelations(nx_graph, n4j_graph)
+
+print("Done.")