Mercurial > hg > drupalISMI
view importFromOpenMind/importer/model2neo4j_import.py @ 52:763b5d29fa5e
add int type to ismi_id attribute in neo4j csv import file.
author | casties |
---|---|
date | Fri, 17 Mar 2017 17:51:18 +0100 |
parents | 6625019a0c96 |
children |
line wrap: on
line source
import networkx as nx import csv import sys ## configure behaviour # metworkx graph file input_fn = 'ismi_graph.gpickle' # neo4j import file node_import_fn = "neo4j-nodes.csv" relation_import_fn = "neo4j-relations.csv" # name of type attribute node_type_attribute = '_type' rel_type_attribute = '_type' ## setup n4j_nodes = {} n4j_relations = {} # active log levels for logging logLevels = {'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SYSMSG'} #logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'} #logLevels = {'INFO', 'ERROR', 'SYSMSG'} def log(level, message): if level in logLevels: print("%s: %s"%(level, message)) def getNodeFields(nx_graph): """returns a set of field names for nodes from nx_graph""" log('INFO', "Creating node field list") ismi_fields = set() # collect types of all nodes for node_id in nx.nodes_iter(nx_graph): attrs = nx_graph.node[node_id] # save all attribute names for att in attrs.keys(): # add type to ismi_id if att == 'ismi_id': att = 'ismi_id:int' ismi_fields.add(att) return list(ismi_fields) def getRelationFields(nx_graph): """returns a set of field names for relations from nx_graph""" log('INFO', "Creating node field list") ismi_fields = set() # collect types of all relations for nx_edge in nx.edges_iter(nx_graph): (nx_src, nx_tar) = nx_edge # get attributes of edge attrs = nx_graph.edge[nx_src][nx_tar][0].copy() # save all attribute names for att in attrs.keys(): # add type to ismi_id if att == 'ismi_id': att = 'ismi_id:int' ismi_fields.add(att) return list(ismi_fields) def writeNodes(nx_graph, node_writer): """write all nodes from nx_graph to node_writer""" log('INFO', "Writing nodes to CSV file") cnt = 0 for node_id in nx.nodes_iter(nx_graph): attrs = nx_graph.node[node_id].copy() # get entity type ntype = attrs[node_type_attribute] # add as label attrs[':LABEL'] = ntype # get ismi_id ismi_id = attrs['ismi_id'] # add ismi_id as node id attrs[':ID'] = ismi_id # change ismi_id key to add type attrs['ismi_id:int'] = ismi_id del attrs['ismi_id'] # write row node_writer.writerow(attrs) # save node id n4j_nodes[ismi_id] = ismi_id cnt += 1 if cnt % 100 == 0: log('INFO', "%s nodes"%cnt) log('INFO', "%s nodes written"%cnt) def writeRelations(nx_graph, relation_writer): """write all relations from nx_graph to node_writer""" log('INFO', "Writing relations to CSV file") cnt = 0 for nx_edge in nx.edges_iter(nx_graph): (nx_src, nx_tar) = nx_edge # get attributes of edge attrs = nx_graph.edge[nx_src][nx_tar][0].copy() # get relation type rtype = attrs[rel_type_attribute] # add as label attrs[':TYPE'] = rtype # get ismi_id of source and target nodes src_id = nx_graph.node[nx_src]['ismi_id'] tar_id = nx_graph.node[nx_tar]['ismi_id'] # check Neo4J nodes src = n4j_nodes.get(src_id, None) if src is None: log("ERROR", "src node %s missing!"%src_id) break tar = n4j_nodes.get(tar_id, None) if tar is None: log("ERROR", "tar node %s missing!"%tar_id) break # use as start and end id attrs[':START_ID'] = src_id attrs[':END_ID'] = tar_id # change ismi_id key to add type attrs['ismi_id:int'] = attrs['ismi_id'] del attrs['ismi_id'] # write row relation_writer.writerow(attrs) cnt += 1 if cnt % 100 == 0: log('INFO', "%s relations"%cnt) log('INFO', "%s relations written"%cnt) ## main log('SYSINFO', "Copy graph from networkx to Neo4J") # read commandline parameters if len(sys.argv) > 1: input_fn = sys.argv[1] if len(sys.argv) > 3: node_import_fn = sys.argv[2] relation_import_fn = sys.argv[3] # read networkx graph from pickle log('SYSINFO', "Reading graph from %s"%input_fn) nx_graph = nx.read_gpickle(input_fn) log('SYSINFO', "Graph info: %s"%nx.info(nx_graph)) # get field lists node_fields = [':LABEL', ':ID'] + getNodeFields(nx_graph) relation_fields = [':TYPE', ':START_ID', ':END_ID'] + getRelationFields(nx_graph) # write neo4j CSV import files log('SYSINFO', "Opening node import file at %s"%node_import_fn) with open(node_import_fn, 'w', newline='', encoding='utf-8') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=node_fields, dialect=csv.excel, quoting=csv.QUOTE_NONNUMERIC) writer.writeheader() writeNodes(nx_graph, writer) # write neo4j CSV import files log('SYSINFO', "Opening relation import file at %s"%relation_import_fn) with open(relation_import_fn, 'w', newline='', encoding='utf-8') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=relation_fields, dialect=csv.excel, quoting=csv.QUOTE_NONNUMERIC) writer.writeheader() writeRelations(nx_graph, writer) print("Done.")