view importFromOpenMind/importer/compare_models.py @ 47:378dcb66a27f

new compare_models comparing the existence of nodes and relations in two graphs. fixed bugs in ismixml2model.
author casties
date Mon, 06 Feb 2017 18:44:43 +0100
parents
children 6625019a0c96
line wrap: on
line source

import networkx as nx
import sys
import csv

## configure behaviour

# metworkx graph files
input1_fn = 'ismi_graph1.gpickle'
input2_fn = 'ismi_graph2.gpickle'

# name of type attribute
node_type_attribute = '_type'
rel_type_attribute = '_type'

# active log levels for logging
logLevels = {'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
#logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
#logLevels = {'INFO', 'ERROR', 'SYSMSG'}

def log(level, message):
    if level in logLevels:
        print("%s: %s"%(level, message))


def invertRelations(nx_graph):
    """Add inverse relations to each relation"""
    
    print("Adding inverse relations.")
    # copy list of edges because we add edges in the loop
    edges = nx.edges(nx_graph)[:]
    # iterate list
    cnt = 0
    for nx_edge in edges:
        (nx_src, nx_tar) = nx_edge
        # get attributes of edge
        rel_attrs = nx_graph.edge[nx_src][nx_tar][0][:]
        rel_type = rel_attrs[rel_type_attribute]
        rel_id = rel_attrs['ismi_id']
        # create new relation
        rel_attrs[rel_type_attribute] = fixName(rel_type, is_tar_rel=True)
        rel_attrs['ismi_id': -rel_id]
        nx_graph.add_edge(nx_tar, nx_src, attr_dict=invrel_atts)

        cnt += 1
        if cnt % 100 == 0:
            print("  %s relations"%cnt)
        
    
def compare_nodes(nx_graph1, nx_graph2):
    """compare nodes of two graphs"""
    
    log('INFO', "Compare graph nodes: %s vs %s"%(repr(nx_graph1), repr(nx_graph2)))
    cnt = 0
    missing_nodes1 = []
    missing_nodes2 = []
    # iterate all nodes in graph 1
    for n in nx.nodes_iter(nx_graph1):
        #attrs = nx_graph.node[n]
        
        if not nx_graph2.has_node(n):
            missing_nodes2.append(n)

    if len(missing_nodes2) > 0:
        log('WARNING', "%s nodes missing in graph 2"%len(missing_nodes2))
        log('DEBUG', "nodes: %s"%missing_nodes2)
        #log('DEBUG', "nodes: %s"%([nx_graph1.node[n] for n in missing_nodes2]))
    
    # iterate all nodes in graph 2
    for n in nx.nodes_iter(nx_graph2):
        #attrs = nx_graph.node[n]
        
        if not nx_graph1.has_node(n):
            missing_nodes1.append(n)

    if len(missing_nodes1) > 0:
        log('WARNING', "%s nodes missing in graph 1"%len(missing_nodes1))
        log('DEBUG', "nodes: %s"%(missing_nodes1))
    

def compare_relations(nx_graph1, nx_graph2):
    """compare relations of two graphs"""
    
    log('INFO', "Compare graph relations: %s vs %s"%(repr(nx_graph1), repr(nx_graph2)))
    cnt = 0
    missing_rels1 = []
    missing_rels2 = []
    # iterate all edges in graph 1
    for (s, t) in nx.edges_iter(nx_graph1):
        
        if not nx_graph2.has_edge(s, t):
            missing_rels2.append((s,t))

    if len(missing_rels2) > 0:
        log('WARNING', "%s relations missing in graph 2"%len(missing_rels2))
        log('DEBUG', "relations: %s"%missing_rels2)
        #log('DEBUG', "nodes: %s"%([nx_graph1.node[n] for n in missing_nodes2]))
    
    # iterate all nodes in graph 2
    for (s, t) in nx.edges_iter(nx_graph2):
        #attrs = nx_graph.node[n]
        
        if not nx_graph1.has_edge(s, t):
            missing_rels1.append((s,t))

    if len(missing_rels1) > 0:
        log('WARNING', "%s relations missing in graph 1"%len(missing_rels1))
        log('DEBUG', "relations: %s"%(missing_rels1))
    

## main

print("Modify networkx graph")

# read commandline parameters
if len(sys.argv) > 2:
    input1_fn = sys.argv[1]
    input2_fn = sys.argv[2]

# read networkx graph from pickle
print("Reading graph 1 from %s"%input1_fn)
nx_graph1 = nx.read_gpickle(input1_fn)
print("Graph 1 info: %s"%nx.info(nx_graph1))

print("Reading graph 2 from %s"%input2_fn)
nx_graph2 = nx.read_gpickle(input2_fn)
print("Graph 2 info: %s"%nx.info(nx_graph2))

# operate    
compare_nodes(nx_graph1, nx_graph2)
compare_relations(nx_graph1, nx_graph2)


print("Done.")