Mercurial > hg > drupalISMI
annotate importFromOpenMind/importer/compare_models.py @ 47:378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
fixed bugs in ismixml2model.
author | casties |
---|---|
date | Mon, 06 Feb 2017 18:44:43 +0100 |
parents | |
children | 6625019a0c96 |
rev | line source |
---|---|
47
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
1 import networkx as nx |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
2 import sys |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
3 import csv |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
4 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
5 ## configure behaviour |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
6 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
7 # metworkx graph files |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
8 input1_fn = 'ismi_graph1.gpickle' |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
9 input2_fn = 'ismi_graph2.gpickle' |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
10 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
11 # name of type attribute |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
12 node_type_attribute = '_type' |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
13 rel_type_attribute = '_type' |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
14 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
15 # active log levels for logging |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
16 logLevels = {'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SYSMSG'} |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
17 #logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'} |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
18 #logLevels = {'INFO', 'ERROR', 'SYSMSG'} |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
19 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
20 def log(level, message): |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
21 if level in logLevels: |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
22 print("%s: %s"%(level, message)) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
23 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
24 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
25 def invertRelations(nx_graph): |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
26 """Add inverse relations to each relation""" |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
27 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
28 print("Adding inverse relations.") |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
29 # copy list of edges because we add edges in the loop |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
30 edges = nx.edges(nx_graph)[:] |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
31 # iterate list |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
32 cnt = 0 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
33 for nx_edge in edges: |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
34 (nx_src, nx_tar) = nx_edge |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
35 # get attributes of edge |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
36 rel_attrs = nx_graph.edge[nx_src][nx_tar][0][:] |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
37 rel_type = rel_attrs[rel_type_attribute] |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
38 rel_id = rel_attrs['ismi_id'] |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
39 # create new relation |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
40 rel_attrs[rel_type_attribute] = fixName(rel_type, is_tar_rel=True) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
41 rel_attrs['ismi_id': -rel_id] |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
42 nx_graph.add_edge(nx_tar, nx_src, attr_dict=invrel_atts) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
43 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
44 cnt += 1 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
45 if cnt % 100 == 0: |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
46 print(" %s relations"%cnt) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
47 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
48 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
49 def compare_nodes(nx_graph1, nx_graph2): |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
50 """compare nodes of two graphs""" |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
51 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
52 log('INFO', "Compare graph nodes: %s vs %s"%(repr(nx_graph1), repr(nx_graph2))) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
53 cnt = 0 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
54 missing_nodes1 = [] |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
55 missing_nodes2 = [] |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
56 # iterate all nodes in graph 1 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
57 for n in nx.nodes_iter(nx_graph1): |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
58 #attrs = nx_graph.node[n] |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
59 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
60 if not nx_graph2.has_node(n): |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
61 missing_nodes2.append(n) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
62 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
63 if len(missing_nodes2) > 0: |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
64 log('WARNING', "%s nodes missing in graph 2"%len(missing_nodes2)) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
65 log('DEBUG', "nodes: %s"%missing_nodes2) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
66 #log('DEBUG', "nodes: %s"%([nx_graph1.node[n] for n in missing_nodes2])) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
67 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
68 # iterate all nodes in graph 2 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
69 for n in nx.nodes_iter(nx_graph2): |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
70 #attrs = nx_graph.node[n] |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
71 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
72 if not nx_graph1.has_node(n): |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
73 missing_nodes1.append(n) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
74 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
75 if len(missing_nodes1) > 0: |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
76 log('WARNING', "%s nodes missing in graph 1"%len(missing_nodes1)) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
77 log('DEBUG', "nodes: %s"%(missing_nodes1)) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
78 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
79 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
80 def compare_relations(nx_graph1, nx_graph2): |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
81 """compare relations of two graphs""" |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
82 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
83 log('INFO', "Compare graph relations: %s vs %s"%(repr(nx_graph1), repr(nx_graph2))) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
84 cnt = 0 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
85 missing_rels1 = [] |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
86 missing_rels2 = [] |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
87 # iterate all edges in graph 1 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
88 for (s, t) in nx.edges_iter(nx_graph1): |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
89 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
90 if not nx_graph2.has_edge(s, t): |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
91 missing_rels2.append((s,t)) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
92 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
93 if len(missing_rels2) > 0: |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
94 log('WARNING', "%s relations missing in graph 2"%len(missing_rels2)) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
95 log('DEBUG', "relations: %s"%missing_rels2) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
96 #log('DEBUG', "nodes: %s"%([nx_graph1.node[n] for n in missing_nodes2])) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
97 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
98 # iterate all nodes in graph 2 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
99 for (s, t) in nx.edges_iter(nx_graph2): |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
100 #attrs = nx_graph.node[n] |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
101 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
102 if not nx_graph1.has_edge(s, t): |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
103 missing_rels1.append((s,t)) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
104 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
105 if len(missing_rels1) > 0: |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
106 log('WARNING', "%s relations missing in graph 1"%len(missing_rels1)) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
107 log('DEBUG', "relations: %s"%(missing_rels1)) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
108 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
109 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
110 ## main |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
111 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
112 print("Modify networkx graph") |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
113 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
114 # read commandline parameters |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
115 if len(sys.argv) > 2: |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
116 input1_fn = sys.argv[1] |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
117 input2_fn = sys.argv[2] |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
118 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
119 # read networkx graph from pickle |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
120 print("Reading graph 1 from %s"%input1_fn) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
121 nx_graph1 = nx.read_gpickle(input1_fn) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
122 print("Graph 1 info: %s"%nx.info(nx_graph1)) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
123 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
124 print("Reading graph 2 from %s"%input2_fn) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
125 nx_graph2 = nx.read_gpickle(input2_fn) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
126 print("Graph 2 info: %s"%nx.info(nx_graph2)) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
127 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
128 # operate |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
129 compare_nodes(nx_graph1, nx_graph2) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
130 compare_relations(nx_graph1, nx_graph2) |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
131 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
132 |
378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
casties
parents:
diff
changeset
|
133 print("Done.") |