Mercurial > hg > drupalISMI
comparison importFromOpenMind/importer/compare_models.py @ 48:6625019a0c96
old model2neo4j renamed to model2neo4j_restclient. new model2neo4j_client and model2neo4j_import. fixed ismixml2model and compare_models.
author | casties |
---|---|
date | Tue, 07 Feb 2017 21:06:13 +0100 |
parents | 378dcb66a27f |
children |
comparison
equal
deleted
inserted
replaced
47:378dcb66a27f | 48:6625019a0c96 |
---|---|
1 import networkx as nx | 1 import networkx as nx |
2 import sys | 2 import sys |
3 import csv | 3 import csv |
4 from sqlalchemy.sql.expression import false | |
4 | 5 |
5 ## configure behaviour | 6 ## configure behaviour |
6 | 7 |
7 # metworkx graph files | 8 # metworkx graph files |
8 input1_fn = 'ismi_graph1.gpickle' | 9 input1_fn = 'ismi_graph1.gpickle' |
10 | 11 |
11 # name of type attribute | 12 # name of type attribute |
12 node_type_attribute = '_type' | 13 node_type_attribute = '_type' |
13 rel_type_attribute = '_type' | 14 rel_type_attribute = '_type' |
14 | 15 |
16 # also compare attributes | |
17 check_attributes = True | |
18 check_attribute_content = False | |
19 | |
15 # active log levels for logging | 20 # active log levels for logging |
16 logLevels = {'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SYSMSG'} | 21 logLevels = {'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SYSMSG'} |
17 #logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'} | 22 #logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'} |
18 #logLevels = {'INFO', 'ERROR', 'SYSMSG'} | 23 #logLevels = {'INFO', 'ERROR', 'SYSMSG'} |
19 | 24 |
20 def log(level, message): | 25 def log(level, message): |
21 if level in logLevels: | 26 if level in logLevels: |
22 print("%s: %s"%(level, message)) | 27 print("%s: %s"%(level, message)) |
23 | 28 |
24 | 29 |
25 def invertRelations(nx_graph): | 30 def compare_attributes(attrs1, attrs2): |
26 """Add inverse relations to each relation""" | 31 """compare two sets of attributes""" |
27 | 32 for a in attrs1.keys(): |
28 print("Adding inverse relations.") | 33 if a.startswith('_n_'): |
29 # copy list of edges because we add edges in the loop | 34 continue |
30 edges = nx.edges(nx_graph)[:] | 35 |
31 # iterate list | 36 if a not in attrs2: |
32 cnt = 0 | 37 log('DEBUG', "attribute %s missing in attrs2"%a) |
33 for nx_edge in edges: | 38 return False |
34 (nx_src, nx_tar) = nx_edge | 39 |
35 # get attributes of edge | 40 elif check_attribute_content: |
36 rel_attrs = nx_graph.edge[nx_src][nx_tar][0][:] | 41 val1 = attrs1[a] |
37 rel_type = rel_attrs[rel_type_attribute] | 42 val2 = attrs2[a] |
38 rel_id = rel_attrs['ismi_id'] | 43 if isinstance(val1, str): |
39 # create new relation | 44 val1 = val1.replace('\r', '') |
40 rel_attrs[rel_type_attribute] = fixName(rel_type, is_tar_rel=True) | 45 val2 = val2.replace('\r', '') |
41 rel_attrs['ismi_id': -rel_id] | 46 |
42 nx_graph.add_edge(nx_tar, nx_src, attr_dict=invrel_atts) | 47 if val1 != val2: |
48 log('DEBUG', "attribute %s different in attrs2: \n%s\n vs \n%s\n"%(a, repr(val1), repr(val2))) | |
49 return False | |
50 | |
51 for a in attrs2.keys(): | |
52 if a.startswith('_n_'): | |
53 continue | |
54 | |
55 if a not in attrs1: | |
56 log('DEBUG', "attribute %s missing in attrs1"%a) | |
57 return False | |
58 | |
59 return True | |
43 | 60 |
44 cnt += 1 | |
45 if cnt % 100 == 0: | |
46 print(" %s relations"%cnt) | |
47 | |
48 | |
49 def compare_nodes(nx_graph1, nx_graph2): | 61 def compare_nodes(nx_graph1, nx_graph2): |
50 """compare nodes of two graphs""" | 62 """compare nodes of two graphs""" |
51 | 63 |
52 log('INFO', "Compare graph nodes: %s vs %s"%(repr(nx_graph1), repr(nx_graph2))) | 64 log('INFO', "Compare graph nodes: %s vs %s"%(repr(nx_graph1), repr(nx_graph2))) |
53 cnt = 0 | 65 cnt = 0 |
54 missing_nodes1 = [] | 66 missing_nodes1 = [] |
55 missing_nodes2 = [] | 67 missing_nodes2 = [] |
68 attribute_differences = [] | |
56 # iterate all nodes in graph 1 | 69 # iterate all nodes in graph 1 |
57 for n in nx.nodes_iter(nx_graph1): | 70 for n in nx.nodes_iter(nx_graph1): |
58 #attrs = nx_graph.node[n] | |
59 | |
60 if not nx_graph2.has_node(n): | 71 if not nx_graph2.has_node(n): |
61 missing_nodes2.append(n) | 72 missing_nodes2.append(n) |
73 | |
74 else: | |
75 attrs1 = nx_graph1.node[n] | |
76 attrs2 = nx_graph2.node[n] | |
77 if check_attributes and not compare_attributes(attrs1, attrs2): | |
78 attribute_differences.append(n) | |
62 | 79 |
63 if len(missing_nodes2) > 0: | 80 if len(missing_nodes2) > 0: |
64 log('WARNING', "%s nodes missing in graph 2"%len(missing_nodes2)) | 81 log('WARNING', "%s nodes missing in graph 2"%len(missing_nodes2)) |
65 log('DEBUG', "nodes: %s"%missing_nodes2) | 82 log('DEBUG', "nodes: %s"%missing_nodes2) |
66 #log('DEBUG', "nodes: %s"%([nx_graph1.node[n] for n in missing_nodes2])) | 83 #log('DEBUG', "nodes: %s"%([nx_graph1.node[n] for n in missing_nodes2])) |
73 missing_nodes1.append(n) | 90 missing_nodes1.append(n) |
74 | 91 |
75 if len(missing_nodes1) > 0: | 92 if len(missing_nodes1) > 0: |
76 log('WARNING', "%s nodes missing in graph 1"%len(missing_nodes1)) | 93 log('WARNING', "%s nodes missing in graph 1"%len(missing_nodes1)) |
77 log('DEBUG', "nodes: %s"%(missing_nodes1)) | 94 log('DEBUG', "nodes: %s"%(missing_nodes1)) |
95 | |
96 if len(attribute_differences) > 0: | |
97 log('WARNING', "%s nodes with attribute differences"%len(attribute_differences)) | |
98 log('DEBUG', "nodes: %s"%(attribute_differences)) | |
78 | 99 |
79 | 100 |
80 def compare_relations(nx_graph1, nx_graph2): | 101 def compare_relations(nx_graph1, nx_graph2): |
81 """compare relations of two graphs""" | 102 """compare relations of two graphs""" |
82 | 103 |