view importFromOpenMind/importer/model2neo4j.py @ 29:1a1877812757

include normalized attributes in neo4j with prefix "_n_"
author casties
date Thu, 10 Dec 2015 12:11:25 -0500
parents a9bfd49355f8
children 48bbba800c03
line wrap: on
line source

import networkx as nx
from neo4jrestclient.client import GraphDatabase
import sys

## configure behaviour

# metworkx graph file
input_fn = 'ismi_graph.gpickle'

# label added to all nodes
project_label = '_ismi'

# neo4j base URL
neo4jBaseURL = "http://localhost:7474/db/data/"


## setup

n4j_nodes = {}
n4j_relations = {}


def fixName(name, is_src_rel=False, is_tar_rel=False, att_from_rel=False):
    # these are too embarrassing...
    if 'FLORUIT' in name:
        name = name.replace('FLORUIT', 'FLOURISH')
        
    elif 'floruit' in name:
        name = name.replace('floruit', 'flourish')
        
    if is_src_rel:
        #name = name + '>'
        pass
        
    if is_tar_rel:
        name = '<' + name
        
    if att_from_rel:
        # clean up relations as attribute names
        name = name.replace('is_', '')
        name = name.replace('has_', '')
        name = name.replace('was_', '')
        name = name.replace('_of', '')

    return name


def copyNodes(nx_graph, n4j_graph):
    """copy all nodes from nx_graph to n4j_graph"""
    
    print("Copying nodes to Neo4J")
    cnt = 0
    for node_id in nx.nodes_iter(nx_graph):
        attrs = nx_graph.node[node_id]
        ntype = attrs['type']
        ismi_id = attrs['ismi_id']
        # create node with attributes
        n4j_node = n4j_graph.nodes.create(**attrs)
        # add labels
        n4j_node.labels.add([project_label, ntype])
        # save reference
        n4j_nodes[ismi_id] = n4j_node

        cnt += 1
        if cnt % 100 == 0:
            print("  %s nodes"%cnt)


def copyRelations(nx_graph, n4j_graph):
    """copy all relations from nx_graph to n4j_graph"""
    
    print("Copying relations to Neo4J")
    cnt = 0
    for nx_edge in nx.edges_iter(nx_graph):
        (nx_src, nx_tar) = nx_edge
        # get attributes of edge
        attrs = nx_graph.edge[nx_src][nx_tar][0]
        ntype = attrs['type']
        # get ismi_id of source and target nodes
        src_id = nx_graph.node[nx_src]['ismi_id']
        tar_id = nx_graph.node[nx_tar]['ismi_id']
        # get Neo4J nodes
        src = n4j_nodes.get(src_id, None)
        if src is None:
            print("ERROR: src node %s missing!"%src_id)
            break
        
        tar = n4j_nodes.get(tar_id, None)
        if tar is None:
            print("ERROR: tar node %s missing!"%tar_id)
            break
        
        # create Neo4J relation
        n4j_rel = n4j_graph.relationships.create(src, ntype, tar)
        # add attributes
        n4j_rel.properties = attrs

        cnt += 1
        if cnt % 100 == 0:
            print("  %s relations"%cnt)


## main

print("Copy graph from networkx to Neo4J")

# read commandline parameters
if len(sys.argv) > 1:
    input_fn = sys.argv[1]

# read networkx graph from pickle
print("Reading graph from %s"%input_fn)
nx_graph = nx.read_gpickle(input_fn)
print("Graph info: %s"%nx.info(nx_graph))

# open neo4j graph db
print("Opening Neo4J db at %s"%neo4jBaseURL)
n4j_graph = GraphDatabase(neo4jBaseURL, username="neo4j", password="neo5j")

copyNodes(nx_graph, n4j_graph)

copyRelations(nx_graph, n4j_graph)

print("Done.")