comparison importFromOpenMind/importer/model2neo4j_client.py @ 48:6625019a0c96

old model2neo4j renamed to model2neo4j_restclient. new model2neo4j_client and model2neo4j_import. fixed ismixml2model and compare_models.
author casties
date Tue, 07 Feb 2017 21:06:13 +0100
parents
children
comparison
equal deleted inserted replaced
47:378dcb66a27f 48:6625019a0c96
1 import networkx as nx
2 from neo4j.v1 import GraphDatabase, basic_auth
3 import sys
4
5 ## configure behaviour
6
7 # metworkx graph file
8 input_fn = 'ismi_graph.gpickle'
9
10 # label added to all nodes
11 project_label = '_ismi'
12
13 # neo4j base URL
14 neo4jBaseURL = "bolt://localhost:7687"
15
16 # name of type attribute
17 node_type_attribute = '_type'
18 rel_type_attribute = '_type'
19
20
21 ## setup
22
23 n4j_nodes = {}
24 n4j_relations = {}
25
26 # active log levels for logging
27 logLevels = {'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
28 #logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
29 #logLevels = {'INFO', 'ERROR', 'SYSMSG'}
30
31 def log(level, message):
32 if level in logLevels:
33 print("%s: %s"%(level, message))
34
35
36 def createIndices(nx_graph, n4j_graph):
37 """create indices for nodes from nx_graph in n4j_graph"""
38
39 log('INFO', "Creating node indices in Neo4J")
40 cnt = 0
41 ismi_types = set()
42 # collect types of all nodes
43 for node_id in nx.nodes_iter(nx_graph):
44 attrs = nx_graph.node[node_id]
45 # get entity type
46 ismi_types.add(attrs[node_type_attribute])
47
48 # create constraints for all types
49 for ismi_type in ismi_types:
50 query = "CREATE CONSTRAINT ON (n:%s) ASSERT n.ismi_id IS UNIQUE"%ismi_type
51 n4j_graph.run(query)
52
53
54 def copyNodes(nx_graph, n4j_graph):
55 """copy all nodes from nx_graph to n4j_graph"""
56
57 log('INFO', "Copying nodes to Neo4J")
58 cnt = 0
59 for node_id in nx.nodes_iter(nx_graph):
60 attrs = nx_graph.node[node_id]
61 # get entity type
62 ntype = attrs[node_type_attribute]
63 # get ismi_id
64 ismi_id = attrs['ismi_id']
65 att_qs = ", ".join(["%s: {%s}"%(k, k) for k in attrs.keys()])
66 # query to create node with attributes (parameter names are attribute keys)
67 cypher = "CREATE (n:%s {%s})"%(ntype, att_qs)
68 # run query
69 n4j_graph.run(cypher, parameters=attrs)
70 # save node id
71 n4j_nodes[ismi_id] = attrs
72
73 cnt += 1
74 if cnt % 100 == 0:
75 log('INFO', "%s nodes"%cnt)
76
77 log('INFO', "%s nodes copied"%cnt)
78
79
80 def copyRelations(nx_graph, n4j_graph):
81 """copy all relations from nx_graph to n4j_graph"""
82
83 print("Copying relations to Neo4J")
84 cnt = 0
85 for nx_edge in nx.edges_iter(nx_graph):
86 (nx_src, nx_tar) = nx_edge
87 # get attributes of edge
88 attrs = nx_graph.edge[nx_src][nx_tar][0]
89 # get relation type
90 rtype = attrs[rel_type_attribute]
91 # get ismi_id of source and target nodes
92 src_id = nx_graph.node[nx_src]['ismi_id']
93 tar_id = nx_graph.node[nx_tar]['ismi_id']
94 # get Neo4J nodes
95 src = n4j_nodes.get(src_id, None)
96 if src is None:
97 print("ERROR: src node %s missing!"%src_id)
98 break
99
100 tar = n4j_nodes.get(tar_id, None)
101 if tar is None:
102 print("ERROR: tar node %s missing!"%tar_id)
103 break
104
105 src_type = src[node_type_attribute]
106 tar_type = tar[node_type_attribute]
107
108 att_qs = ", ".join(["%s: {%s}"%(k, k) for k in attrs.keys()])
109 # query to create a relation with attributes
110 cypher = "MATCH (n1:%s),(n2:%s) WHERE n1.ismi_id = %s AND n2.ismi_id = %s CREATE (n1)-[r:%s {%s}]->(n2)"%(src_type, tar_type, src_id, tar_id, rtype, att_qs)
111 # run query
112 n4j_graph.run(cypher, attrs)
113
114 cnt += 1
115 if cnt % 100 == 0:
116 log('INFO', "%s relations"%cnt)
117
118 log('INFO', "%s relations copied"%cnt)
119 ## main
120
121 print("Copy graph from networkx to Neo4J")
122
123 # read commandline parameters
124 if len(sys.argv) > 1:
125 input_fn = sys.argv[1]
126
127 # read networkx graph from pickle
128 print("Reading graph from %s"%input_fn)
129 nx_graph = nx.read_gpickle(input_fn)
130 print("Graph info: %s"%nx.info(nx_graph))
131
132 # open neo4j graph db
133 print("Opening Neo4J db at %s"%neo4jBaseURL)
134 n4j_driver = GraphDatabase.driver(neo4jBaseURL, auth=basic_auth("neo4j", "neo5j"))
135 # get session to pass to functions
136 n4j_graph = n4j_driver.session()
137
138 createIndices(nx_graph, n4j_graph)
139
140 copyNodes(nx_graph, n4j_graph)
141
142 copyRelations(nx_graph, n4j_graph)
143
144 print("Done.")