Mercurial > hg > drupalISMI
changeset 23:45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
author | casties |
---|---|
date | Wed, 23 Sep 2015 19:18:59 +0200 |
parents | 3585573999b6 |
children | 97f2da68fb5f |
files | importFromOpenMind/importer/ismi2model.py importFromOpenMind/importer/model2neo4j.py |
diffstat | 2 files changed, 138 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/importFromOpenMind/importer/ismi2model.py Wed Sep 23 11:54:22 2015 +0200 +++ b/importFromOpenMind/importer/ismi2model.py Wed Sep 23 19:18:59 2015 +0200 @@ -1,6 +1,7 @@ import urllib.request import json import networkx +import sys ## configure behaviour @@ -110,7 +111,7 @@ # date attribute key = att['name'] val = att['ov'] - #print("don't know what to do with date: %s=%s"%(key,val)) + print("don't know what to do with date: %s=%s"%(key,val)) elif ct == 'old': # ignore attribute @@ -230,7 +231,7 @@ nx_graph.node[tar_id][att_name] = nx_graph.node[src_id]['label'] # create relation with type - nx_rel = nx_graph.add_edge(src_id, tar_id, type=fixName(rel_name)) + nx_rel = nx_graph.add_edge(src_id, tar_id, type=fixName(rel_name), ismi_id=rel_id) nx_relations[rel_id] = nx_rel @@ -289,15 +290,22 @@ relationsFromRels(ismi_relations, nx_nodes) -# In[120]: +## main + +print("Copy graph from OpenMind to networkx pickle") +# parse command line parameters +if len(sys.argv) > 1: + output_fn = sys.argv[1] + +# import everything +print("Reading graph from OpenMind at %s"%baseURL) importAllEnts(ismi_defs) #importAllEnts(['TEXT']) print("Graph info: %s"%networkx.info(nx_graph)) -print("Number of nodes: %s"%networkx.number_of_nodes(nx_graph)) -print("Number of edges: %s"%networkx.number_of_edges(nx_graph)) #print(" nodes:%s"%repr(nx_graph.nodes(data=True))) + # export pickle networkx.write_gpickle(nx_graph, output_fn) -print("Wrote file %s"%output_fn) +print("Wrote networkx pickle file %s"%output_fn)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/importFromOpenMind/importer/model2neo4j.py Wed Sep 23 19:18:59 2015 +0200 @@ -0,0 +1,124 @@ +import networkx as nx +from neo4jrestclient.client import GraphDatabase, Node +import sys + +## configure behaviour + +# metworkx graph file +input_fn = 'ismi_graph.gpickle' + +# label added to all nodes +project_label = '_ismi4' + +# neo4j base URL +neo4jBaseURL = "http://localhost:7474/db/data/" + + +## setup + +n4j_nodes = {} +n4j_relations = {} + + +def fixName(name, is_src_rel=False, is_tar_rel=False, att_from_rel=False): + # these are too embarrassing... + if 'FLORUIT' in name: + name = name.replace('FLORUIT', 'FLOURISH') + + elif 'floruit' in name: + name = name.replace('floruit', 'flourish') + + if is_src_rel: + #name = name + '>' + pass + + if is_tar_rel: + name = '<' + name + + if att_from_rel: + # clean up relations as attribute names + name = name.replace('is_', '') + name = name.replace('has_', '') + name = name.replace('was_', '') + name = name.replace('_of', '') + + return name + + +def copyNodes(nx_graph, n4j_graph): + """copy all nodes from nx_graph to n4j_graph""" + + print("Copying nodes to Neo4J") + cnt = 0 + for node_id in nx.nodes_iter(nx_graph): + attrs = nx_graph.node[node_id] + type = attrs['type'] + ismi_id = attrs['ismi_id'] + # create node with attributes + n4j_node = n4j_graph.nodes.create(**attrs) + # add labels + n4j_node.labels.add([project_label, type]) + # save reference + n4j_nodes[ismi_id] = n4j_node + + cnt += 1 + if cnt % 100 == 0: + print(" %s"%cnt) + + +def copyRelations(nx_graph, n4j_graph): + """copy all relations from nx_graph to n4j_graph""" + + print("Copying relations to Neo4J") + cnt = 0 + for nx_edge in nx.edges_iter(nx_graph): + (nx_src, nx_tar) = nx_edge + # get attributes of edge + attrs = nx_graph.edge[nx_src][nx_tar][0] + type = attrs['type'] + # get ismi_id of source and target nodes + src_id = nx_graph.node[nx_src]['ismi_id'] + tar_id = nx_graph.node[nx_tar]['ismi_id'] + # get Neo4J nodes + src = n4j_nodes.get(src_id, None) + if src is None: + print("ERROR: src node %s missing!"%src_id) + break + + tar = n4j_nodes.get(tar_id, None) + if tar is None: + print("ERROR: tar node %s missing!"%tar_id) + break + + # create Neo4J relation + n4j_rel = n4j_graph.relationships.create(src, type, tar) + # add attributes + n4j_rel.properties = attrs + + cnt += 1 + if cnt % 100 == 0: + print(" %s"%cnt) + + +## main + +print("Copy graph from networkx to Neo4J") + +# read commandline parameters +if len(sys.argv) > 1: + input_fn = sys.argv[1] + +# read networkx graph from pickle +print("Reading graph from %s"%input_fn) +nx_graph = nx.read_gpickle(input_fn) +print("Graph info: %s"%nx.info(nx_graph)) + +# open neo4j graph db +print("Opening Neo4J db at %s"%neo4jBaseURL) +n4j_graph = GraphDatabase(neo4jBaseURL, username="neo4j", password="neo5j") + +copyNodes(nx_graph, n4j_graph) + +copyRelations(nx_graph, n4j_graph) + +print("Done.")