Mercurial > hg > drupalISMI
annotate importFromOpenMind/importer/model2neo4j_restclient.py @ 60:1b520696760a default tip
new ismixml_splitter.py that splits openmind-data.xml into separate files per entity type.
| author | casties |
|---|---|
| date | Tue, 19 Jun 2018 21:46:49 +0200 |
| parents | 6625019a0c96 |
| children |
| rev | line source |
|---|---|
| 23 | 1 import networkx as nx |
|
29
1a1877812757
include normalized attributes in neo4j with prefix "_n_"
casties
parents:
28
diff
changeset
|
2 from neo4jrestclient.client import GraphDatabase |
| 23 | 3 import sys |
| 4 | |
| 5 ## configure behaviour | |
| 6 | |
| 7 # metworkx graph file | |
| 8 input_fn = 'ismi_graph.gpickle' | |
| 9 | |
| 10 # label added to all nodes | |
| 28 | 11 project_label = '_ismi' |
| 23 | 12 |
| 13 # neo4j base URL | |
| 14 neo4jBaseURL = "http://localhost:7474/db/data/" | |
| 15 | |
|
33
7e2e344c3b87
make name of type attribute configurable. default '_type' for nodes.
casties
parents:
31
diff
changeset
|
16 # name of type attribute |
|
7e2e344c3b87
make name of type attribute configurable. default '_type' for nodes.
casties
parents:
31
diff
changeset
|
17 node_type_attribute = '_type' |
|
34
74dfaed3600b
keep relation attributes but no attributes from openmind. change relation type attribute to '_type' too.
casties
parents:
33
diff
changeset
|
18 rel_type_attribute = '_type' |
|
33
7e2e344c3b87
make name of type attribute configurable. default '_type' for nodes.
casties
parents:
31
diff
changeset
|
19 |
| 23 | 20 |
| 21 ## setup | |
| 22 | |
| 23 n4j_nodes = {} | |
| 24 n4j_relations = {} | |
| 25 | |
| 26 | |
| 27 def copyNodes(nx_graph, n4j_graph): | |
| 28 """copy all nodes from nx_graph to n4j_graph""" | |
| 29 | |
| 30 print("Copying nodes to Neo4J") | |
| 31 cnt = 0 | |
| 32 for node_id in nx.nodes_iter(nx_graph): | |
| 33 attrs = nx_graph.node[node_id] | |
|
33
7e2e344c3b87
make name of type attribute configurable. default '_type' for nodes.
casties
parents:
31
diff
changeset
|
34 ntype = attrs[node_type_attribute] |
| 23 | 35 ismi_id = attrs['ismi_id'] |
| 36 # create node with attributes | |
| 37 n4j_node = n4j_graph.nodes.create(**attrs) | |
| 38 # add labels | |
|
29
1a1877812757
include normalized attributes in neo4j with prefix "_n_"
casties
parents:
28
diff
changeset
|
39 n4j_node.labels.add([project_label, ntype]) |
| 23 | 40 # save reference |
| 41 n4j_nodes[ismi_id] = n4j_node | |
| 42 | |
| 43 cnt += 1 | |
| 44 if cnt % 100 == 0: | |
|
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
45 print(" %s nodes"%cnt) |
| 23 | 46 |
| 47 | |
| 48 def copyRelations(nx_graph, n4j_graph): | |
| 49 """copy all relations from nx_graph to n4j_graph""" | |
| 50 | |
| 51 print("Copying relations to Neo4J") | |
| 52 cnt = 0 | |
| 53 for nx_edge in nx.edges_iter(nx_graph): | |
| 54 (nx_src, nx_tar) = nx_edge | |
| 55 # get attributes of edge | |
| 56 attrs = nx_graph.edge[nx_src][nx_tar][0] | |
|
33
7e2e344c3b87
make name of type attribute configurable. default '_type' for nodes.
casties
parents:
31
diff
changeset
|
57 etype = attrs[rel_type_attribute] |
| 23 | 58 # get ismi_id of source and target nodes |
| 59 src_id = nx_graph.node[nx_src]['ismi_id'] | |
| 60 tar_id = nx_graph.node[nx_tar]['ismi_id'] | |
| 61 # get Neo4J nodes | |
| 62 src = n4j_nodes.get(src_id, None) | |
| 63 if src is None: | |
| 64 print("ERROR: src node %s missing!"%src_id) | |
| 65 break | |
| 66 | |
| 67 tar = n4j_nodes.get(tar_id, None) | |
| 68 if tar is None: | |
| 69 print("ERROR: tar node %s missing!"%tar_id) | |
| 70 break | |
| 71 | |
| 72 # create Neo4J relation | |
|
33
7e2e344c3b87
make name of type attribute configurable. default '_type' for nodes.
casties
parents:
31
diff
changeset
|
73 n4j_rel = n4j_graph.relationships.create(src, etype, tar) |
| 23 | 74 # add attributes |
| 75 n4j_rel.properties = attrs | |
| 76 | |
| 77 cnt += 1 | |
| 78 if cnt % 100 == 0: | |
|
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
79 print(" %s relations"%cnt) |
| 23 | 80 |
| 81 | |
| 82 ## main | |
| 83 | |
| 84 print("Copy graph from networkx to Neo4J") | |
| 85 | |
| 86 # read commandline parameters | |
| 87 if len(sys.argv) > 1: | |
| 88 input_fn = sys.argv[1] | |
| 89 | |
| 90 # read networkx graph from pickle | |
| 91 print("Reading graph from %s"%input_fn) | |
| 92 nx_graph = nx.read_gpickle(input_fn) | |
| 93 print("Graph info: %s"%nx.info(nx_graph)) | |
| 94 | |
| 95 # open neo4j graph db | |
| 96 print("Opening Neo4J db at %s"%neo4jBaseURL) | |
| 97 n4j_graph = GraphDatabase(neo4jBaseURL, username="neo4j", password="neo5j") | |
| 98 | |
| 99 copyNodes(nx_graph, n4j_graph) | |
| 100 | |
| 101 copyRelations(nx_graph, n4j_graph) | |
| 102 | |
| 103 print("Done.") |
