view importFromOpenMind/importer/compare_models.py @ 48:6625019a0c96

old model2neo4j renamed to model2neo4j_restclient. new model2neo4j_client and model2neo4j_import. fixed ismixml2model and compare_models.
author casties
date Tue, 07 Feb 2017 21:06:13 +0100
parents 378dcb66a27f
children
line wrap: on
line source

import networkx as nx
import sys
import csv
from sqlalchemy.sql.expression import false

## configure behaviour

# metworkx graph files
input1_fn = 'ismi_graph1.gpickle'
input2_fn = 'ismi_graph2.gpickle'

# name of type attribute
node_type_attribute = '_type'
rel_type_attribute = '_type'

# also compare attributes
check_attributes = True
check_attribute_content = False

# active log levels for logging
logLevels = {'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
#logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
#logLevels = {'INFO', 'ERROR', 'SYSMSG'}

def log(level, message):
    if level in logLevels:
        print("%s: %s"%(level, message))


def compare_attributes(attrs1, attrs2):
    """compare two sets of attributes"""
    for a in attrs1.keys():
        if a.startswith('_n_'):
            continue
        
        if a not in attrs2:
            log('DEBUG', "attribute %s missing in attrs2"%a)
            return False
        
        elif check_attribute_content:
            val1 = attrs1[a]
            val2 = attrs2[a]
            if isinstance(val1, str):
                val1 = val1.replace('\r', '')
                val2 = val2.replace('\r', '')
                
            if val1 != val2:
                log('DEBUG', "attribute %s different in attrs2: \n%s\n vs \n%s\n"%(a, repr(val1), repr(val2)))
                return False
            
    for a in attrs2.keys():
        if a.startswith('_n_'):
            continue
        
        if a not in attrs1:
            log('DEBUG', "attribute %s missing in attrs1"%a)
            return False
        
    return True

def compare_nodes(nx_graph1, nx_graph2):
    """compare nodes of two graphs"""
    
    log('INFO', "Compare graph nodes: %s vs %s"%(repr(nx_graph1), repr(nx_graph2)))
    cnt = 0
    missing_nodes1 = []
    missing_nodes2 = []
    attribute_differences = []
    # iterate all nodes in graph 1
    for n in nx.nodes_iter(nx_graph1):
        if not nx_graph2.has_node(n):
            missing_nodes2.append(n)

        else:
            attrs1 = nx_graph1.node[n]
            attrs2 = nx_graph2.node[n]
            if check_attributes and not compare_attributes(attrs1, attrs2):
                attribute_differences.append(n)

    if len(missing_nodes2) > 0:
        log('WARNING', "%s nodes missing in graph 2"%len(missing_nodes2))
        log('DEBUG', "nodes: %s"%missing_nodes2)
        #log('DEBUG', "nodes: %s"%([nx_graph1.node[n] for n in missing_nodes2]))
    
    # iterate all nodes in graph 2
    for n in nx.nodes_iter(nx_graph2):
        #attrs = nx_graph.node[n]
        
        if not nx_graph1.has_node(n):
            missing_nodes1.append(n)

    if len(missing_nodes1) > 0:
        log('WARNING', "%s nodes missing in graph 1"%len(missing_nodes1))
        log('DEBUG', "nodes: %s"%(missing_nodes1))
    
    if len(attribute_differences) > 0:
        log('WARNING', "%s nodes with attribute differences"%len(attribute_differences))
        log('DEBUG', "nodes: %s"%(attribute_differences))
    

def compare_relations(nx_graph1, nx_graph2):
    """compare relations of two graphs"""
    
    log('INFO', "Compare graph relations: %s vs %s"%(repr(nx_graph1), repr(nx_graph2)))
    cnt = 0
    missing_rels1 = []
    missing_rels2 = []
    # iterate all edges in graph 1
    for (s, t) in nx.edges_iter(nx_graph1):
        
        if not nx_graph2.has_edge(s, t):
            missing_rels2.append((s,t))

    if len(missing_rels2) > 0:
        log('WARNING', "%s relations missing in graph 2"%len(missing_rels2))
        log('DEBUG', "relations: %s"%missing_rels2)
        #log('DEBUG', "nodes: %s"%([nx_graph1.node[n] for n in missing_nodes2]))
    
    # iterate all nodes in graph 2
    for (s, t) in nx.edges_iter(nx_graph2):
        #attrs = nx_graph.node[n]
        
        if not nx_graph1.has_edge(s, t):
            missing_rels1.append((s,t))

    if len(missing_rels1) > 0:
        log('WARNING', "%s relations missing in graph 1"%len(missing_rels1))
        log('DEBUG', "relations: %s"%(missing_rels1))
    

## main

print("Modify networkx graph")

# read commandline parameters
if len(sys.argv) > 2:
    input1_fn = sys.argv[1]
    input2_fn = sys.argv[2]

# read networkx graph from pickle
print("Reading graph 1 from %s"%input1_fn)
nx_graph1 = nx.read_gpickle(input1_fn)
print("Graph 1 info: %s"%nx.info(nx_graph1))

print("Reading graph 2 from %s"%input2_fn)
nx_graph2 = nx.read_gpickle(input2_fn)
print("Graph 2 info: %s"%nx.info(nx_graph2))

# operate    
compare_nodes(nx_graph1, nx_graph2)
compare_relations(nx_graph1, nx_graph2)


print("Done.")