view importFromOpenMind/importer/model2neo4j_client.py @ 48:6625019a0c96

old model2neo4j renamed to model2neo4j_restclient. new model2neo4j_client and model2neo4j_import. fixed ismixml2model and compare_models.
author casties
date Tue, 07 Feb 2017 21:06:13 +0100
parents
children
line wrap: on
line source

import networkx as nx
from neo4j.v1 import GraphDatabase, basic_auth
import sys

## configure behaviour

# metworkx graph file
input_fn = 'ismi_graph.gpickle'

# label added to all nodes
project_label = '_ismi'

# neo4j base URL
neo4jBaseURL = "bolt://localhost:7687"

# name of type attribute
node_type_attribute = '_type'
rel_type_attribute = '_type'


## setup

n4j_nodes = {}
n4j_relations = {}

# active log levels for logging
logLevels = {'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
#logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
#logLevels = {'INFO', 'ERROR', 'SYSMSG'}

def log(level, message):
    if level in logLevels:
        print("%s: %s"%(level, message))


def createIndices(nx_graph, n4j_graph):
    """create indices for nodes from nx_graph in n4j_graph"""
    
    log('INFO', "Creating node indices in Neo4J")
    cnt = 0
    ismi_types = set()
    # collect types of all nodes
    for node_id in nx.nodes_iter(nx_graph):
        attrs = nx_graph.node[node_id]
        # get entity type
        ismi_types.add(attrs[node_type_attribute])
        
    # create constraints for all types
    for ismi_type in ismi_types:
        query = "CREATE CONSTRAINT ON (n:%s) ASSERT n.ismi_id IS UNIQUE"%ismi_type
        n4j_graph.run(query)


def copyNodes(nx_graph, n4j_graph):
    """copy all nodes from nx_graph to n4j_graph"""
    
    log('INFO', "Copying nodes to Neo4J")
    cnt = 0
    for node_id in nx.nodes_iter(nx_graph):
        attrs = nx_graph.node[node_id]
        # get entity type
        ntype = attrs[node_type_attribute]
        # get ismi_id
        ismi_id = attrs['ismi_id']
        att_qs = ", ".join(["%s: {%s}"%(k, k) for k in attrs.keys()])
        # query to create node with attributes (parameter names are attribute keys)
        cypher = "CREATE (n:%s {%s})"%(ntype, att_qs)
        # run query
        n4j_graph.run(cypher, parameters=attrs)
        # save node id
        n4j_nodes[ismi_id] = attrs

        cnt += 1
        if cnt % 100 == 0:
            log('INFO', "%s nodes"%cnt)

    log('INFO', "%s nodes copied"%cnt)


def copyRelations(nx_graph, n4j_graph):
    """copy all relations from nx_graph to n4j_graph"""
    
    print("Copying relations to Neo4J")
    cnt = 0
    for nx_edge in nx.edges_iter(nx_graph):
        (nx_src, nx_tar) = nx_edge
        # get attributes of edge
        attrs = nx_graph.edge[nx_src][nx_tar][0]
        # get relation type
        rtype = attrs[rel_type_attribute]
        # get ismi_id of source and target nodes
        src_id = nx_graph.node[nx_src]['ismi_id']
        tar_id = nx_graph.node[nx_tar]['ismi_id']
        # get Neo4J nodes
        src = n4j_nodes.get(src_id, None)
        if src is None:
            print("ERROR: src node %s missing!"%src_id)
            break
        
        tar = n4j_nodes.get(tar_id, None)
        if tar is None:
            print("ERROR: tar node %s missing!"%tar_id)
            break
        
        src_type = src[node_type_attribute]
        tar_type = tar[node_type_attribute]
        
        att_qs = ", ".join(["%s: {%s}"%(k, k) for k in attrs.keys()])
        # query to create a relation with attributes
        cypher = "MATCH (n1:%s),(n2:%s) WHERE n1.ismi_id = %s AND n2.ismi_id = %s CREATE (n1)-[r:%s {%s}]->(n2)"%(src_type, tar_type, src_id, tar_id, rtype, att_qs)
        # run query
        n4j_graph.run(cypher, attrs)

        cnt += 1
        if cnt % 100 == 0:
            log('INFO', "%s relations"%cnt)

    log('INFO', "%s relations copied"%cnt)
## main

print("Copy graph from networkx to Neo4J")

# read commandline parameters
if len(sys.argv) > 1:
    input_fn = sys.argv[1]

# read networkx graph from pickle
print("Reading graph from %s"%input_fn)
nx_graph = nx.read_gpickle(input_fn)
print("Graph info: %s"%nx.info(nx_graph))

# open neo4j graph db
print("Opening Neo4J db at %s"%neo4jBaseURL)
n4j_driver = GraphDatabase.driver(neo4jBaseURL, auth=basic_auth("neo4j", "neo5j"))
# get session to pass to functions 
n4j_graph = n4j_driver.session()

createIndices(nx_graph, n4j_graph)

copyNodes(nx_graph, n4j_graph)

copyRelations(nx_graph, n4j_graph)

print("Done.")