Mercurial > hg > drupalISMI
diff importFromOpenMind/importer/model2neo4j_client.py @ 48:6625019a0c96
old model2neo4j renamed to model2neo4j_restclient. new model2neo4j_client and model2neo4j_import. fixed ismixml2model and compare_models.
author | casties |
---|---|
date | Tue, 07 Feb 2017 21:06:13 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/importFromOpenMind/importer/model2neo4j_client.py Tue Feb 07 21:06:13 2017 +0100 @@ -0,0 +1,144 @@ +import networkx as nx +from neo4j.v1 import GraphDatabase, basic_auth +import sys + +## configure behaviour + +# metworkx graph file +input_fn = 'ismi_graph.gpickle' + +# label added to all nodes +project_label = '_ismi' + +# neo4j base URL +neo4jBaseURL = "bolt://localhost:7687" + +# name of type attribute +node_type_attribute = '_type' +rel_type_attribute = '_type' + + +## setup + +n4j_nodes = {} +n4j_relations = {} + +# active log levels for logging +logLevels = {'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SYSMSG'} +#logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'} +#logLevels = {'INFO', 'ERROR', 'SYSMSG'} + +def log(level, message): + if level in logLevels: + print("%s: %s"%(level, message)) + + +def createIndices(nx_graph, n4j_graph): + """create indices for nodes from nx_graph in n4j_graph""" + + log('INFO', "Creating node indices in Neo4J") + cnt = 0 + ismi_types = set() + # collect types of all nodes + for node_id in nx.nodes_iter(nx_graph): + attrs = nx_graph.node[node_id] + # get entity type + ismi_types.add(attrs[node_type_attribute]) + + # create constraints for all types + for ismi_type in ismi_types: + query = "CREATE CONSTRAINT ON (n:%s) ASSERT n.ismi_id IS UNIQUE"%ismi_type + n4j_graph.run(query) + + +def copyNodes(nx_graph, n4j_graph): + """copy all nodes from nx_graph to n4j_graph""" + + log('INFO', "Copying nodes to Neo4J") + cnt = 0 + for node_id in nx.nodes_iter(nx_graph): + attrs = nx_graph.node[node_id] + # get entity type + ntype = attrs[node_type_attribute] + # get ismi_id + ismi_id = attrs['ismi_id'] + att_qs = ", ".join(["%s: {%s}"%(k, k) for k in attrs.keys()]) + # query to create node with attributes (parameter names are attribute keys) + cypher = "CREATE (n:%s {%s})"%(ntype, att_qs) + # run query + n4j_graph.run(cypher, parameters=attrs) + # save node id + n4j_nodes[ismi_id] = attrs + + cnt += 1 + if cnt % 100 == 0: + log('INFO', "%s nodes"%cnt) + + log('INFO', "%s nodes copied"%cnt) + + +def copyRelations(nx_graph, n4j_graph): + """copy all relations from nx_graph to n4j_graph""" + + print("Copying relations to Neo4J") + cnt = 0 + for nx_edge in nx.edges_iter(nx_graph): + (nx_src, nx_tar) = nx_edge + # get attributes of edge + attrs = nx_graph.edge[nx_src][nx_tar][0] + # get relation type + rtype = attrs[rel_type_attribute] + # get ismi_id of source and target nodes + src_id = nx_graph.node[nx_src]['ismi_id'] + tar_id = nx_graph.node[nx_tar]['ismi_id'] + # get Neo4J nodes + src = n4j_nodes.get(src_id, None) + if src is None: + print("ERROR: src node %s missing!"%src_id) + break + + tar = n4j_nodes.get(tar_id, None) + if tar is None: + print("ERROR: tar node %s missing!"%tar_id) + break + + src_type = src[node_type_attribute] + tar_type = tar[node_type_attribute] + + att_qs = ", ".join(["%s: {%s}"%(k, k) for k in attrs.keys()]) + # query to create a relation with attributes + cypher = "MATCH (n1:%s),(n2:%s) WHERE n1.ismi_id = %s AND n2.ismi_id = %s CREATE (n1)-[r:%s {%s}]->(n2)"%(src_type, tar_type, src_id, tar_id, rtype, att_qs) + # run query + n4j_graph.run(cypher, attrs) + + cnt += 1 + if cnt % 100 == 0: + log('INFO', "%s relations"%cnt) + + log('INFO', "%s relations copied"%cnt) +## main + +print("Copy graph from networkx to Neo4J") + +# read commandline parameters +if len(sys.argv) > 1: + input_fn = sys.argv[1] + +# read networkx graph from pickle +print("Reading graph from %s"%input_fn) +nx_graph = nx.read_gpickle(input_fn) +print("Graph info: %s"%nx.info(nx_graph)) + +# open neo4j graph db +print("Opening Neo4J db at %s"%neo4jBaseURL) +n4j_driver = GraphDatabase.driver(neo4jBaseURL, auth=basic_auth("neo4j", "neo5j")) +# get session to pass to functions +n4j_graph = n4j_driver.session() + +createIndices(nx_graph, n4j_graph) + +copyNodes(nx_graph, n4j_graph) + +copyRelations(nx_graph, n4j_graph) + +print("Done.")