Mercurial > hg > drupalISMI
changeset 47:378dcb66a27f
new compare_models comparing the existence of nodes and relations in two graphs.
fixed bugs in ismixml2model.
author | casties |
---|---|
date | Mon, 06 Feb 2017 18:44:43 +0100 |
parents | f3945ef1e6a4 |
children | 6625019a0c96 |
files | importFromOpenMind/importer/compare_models.py importFromOpenMind/importer/ismixml2model.py |
diffstat | 2 files changed, 150 insertions(+), 10 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/importFromOpenMind/importer/compare_models.py Mon Feb 06 18:44:43 2017 +0100 @@ -0,0 +1,133 @@ +import networkx as nx +import sys +import csv + +## configure behaviour + +# metworkx graph files +input1_fn = 'ismi_graph1.gpickle' +input2_fn = 'ismi_graph2.gpickle' + +# name of type attribute +node_type_attribute = '_type' +rel_type_attribute = '_type' + +# active log levels for logging +logLevels = {'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SYSMSG'} +#logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'} +#logLevels = {'INFO', 'ERROR', 'SYSMSG'} + +def log(level, message): + if level in logLevels: + print("%s: %s"%(level, message)) + + +def invertRelations(nx_graph): + """Add inverse relations to each relation""" + + print("Adding inverse relations.") + # copy list of edges because we add edges in the loop + edges = nx.edges(nx_graph)[:] + # iterate list + cnt = 0 + for nx_edge in edges: + (nx_src, nx_tar) = nx_edge + # get attributes of edge + rel_attrs = nx_graph.edge[nx_src][nx_tar][0][:] + rel_type = rel_attrs[rel_type_attribute] + rel_id = rel_attrs['ismi_id'] + # create new relation + rel_attrs[rel_type_attribute] = fixName(rel_type, is_tar_rel=True) + rel_attrs['ismi_id': -rel_id] + nx_graph.add_edge(nx_tar, nx_src, attr_dict=invrel_atts) + + cnt += 1 + if cnt % 100 == 0: + print(" %s relations"%cnt) + + +def compare_nodes(nx_graph1, nx_graph2): + """compare nodes of two graphs""" + + log('INFO', "Compare graph nodes: %s vs %s"%(repr(nx_graph1), repr(nx_graph2))) + cnt = 0 + missing_nodes1 = [] + missing_nodes2 = [] + # iterate all nodes in graph 1 + for n in nx.nodes_iter(nx_graph1): + #attrs = nx_graph.node[n] + + if not nx_graph2.has_node(n): + missing_nodes2.append(n) + + if len(missing_nodes2) > 0: + log('WARNING', "%s nodes missing in graph 2"%len(missing_nodes2)) + log('DEBUG', "nodes: %s"%missing_nodes2) + #log('DEBUG', "nodes: %s"%([nx_graph1.node[n] for n in missing_nodes2])) + + # iterate all nodes in graph 2 + for n in nx.nodes_iter(nx_graph2): + #attrs = nx_graph.node[n] + + if not nx_graph1.has_node(n): + missing_nodes1.append(n) + + if len(missing_nodes1) > 0: + log('WARNING', "%s nodes missing in graph 1"%len(missing_nodes1)) + log('DEBUG', "nodes: %s"%(missing_nodes1)) + + +def compare_relations(nx_graph1, nx_graph2): + """compare relations of two graphs""" + + log('INFO', "Compare graph relations: %s vs %s"%(repr(nx_graph1), repr(nx_graph2))) + cnt = 0 + missing_rels1 = [] + missing_rels2 = [] + # iterate all edges in graph 1 + for (s, t) in nx.edges_iter(nx_graph1): + + if not nx_graph2.has_edge(s, t): + missing_rels2.append((s,t)) + + if len(missing_rels2) > 0: + log('WARNING', "%s relations missing in graph 2"%len(missing_rels2)) + log('DEBUG', "relations: %s"%missing_rels2) + #log('DEBUG', "nodes: %s"%([nx_graph1.node[n] for n in missing_nodes2])) + + # iterate all nodes in graph 2 + for (s, t) in nx.edges_iter(nx_graph2): + #attrs = nx_graph.node[n] + + if not nx_graph1.has_edge(s, t): + missing_rels1.append((s,t)) + + if len(missing_rels1) > 0: + log('WARNING', "%s relations missing in graph 1"%len(missing_rels1)) + log('DEBUG', "relations: %s"%(missing_rels1)) + + +## main + +print("Modify networkx graph") + +# read commandline parameters +if len(sys.argv) > 2: + input1_fn = sys.argv[1] + input2_fn = sys.argv[2] + +# read networkx graph from pickle +print("Reading graph 1 from %s"%input1_fn) +nx_graph1 = nx.read_gpickle(input1_fn) +print("Graph 1 info: %s"%nx.info(nx_graph1)) + +print("Reading graph 2 from %s"%input2_fn) +nx_graph2 = nx.read_gpickle(input2_fn) +print("Graph 2 info: %s"%nx.info(nx_graph2)) + +# operate +compare_nodes(nx_graph1, nx_graph2) +compare_relations(nx_graph1, nx_graph2) + + +print("Done.")
--- a/importFromOpenMind/importer/ismixml2model.py Fri Feb 03 18:46:16 2017 +0100 +++ b/importFromOpenMind/importer/ismixml2model.py Mon Feb 06 18:44:43 2017 +0100 @@ -157,7 +157,7 @@ # set type attrs[node_type_attribute] = fixName(oc) - ismi_id = ent_elem.get('id') + ismi_id = int(ent_elem.get('id')) # rename id to ismi_id attrs['ismi_id'] = ismi_id @@ -169,7 +169,7 @@ # attrs['_n_label'] = ent.get('nov') # create node - #log('DEBUG', "new node(%s, %s)"%(ismi_id, attrs)) + log('DEBUG', "new node(%s, %s)"%(ismi_id, attrs)) nx_graph.add_node(ismi_id, **attrs) node = nx_graph.node[ismi_id] @@ -179,16 +179,16 @@ def relationFromRel(rel_elem): """Create graph relation from etree element. """ - rel_id = rel_elem.get('id') + rel_id = int(rel_elem.get('id')) rel_name = rel_elem.get('object-class') - src_id = rel_elem.get('source-id') - tar_id = rel_elem.get('target-id') + src_id = int(rel_elem.get('source-id')) + tar_id = int(rel_elem.get('target-id')) if not src_id in nx_nodes: - log("ERROR", "relation %s src node %s missing!"%(rel_id,src_id)) + log("WARNING", "relation %s src node %s missing!"%(rel_id,src_id)) return None if not tar_id in nx_nodes: - log("ERROR", "relation %s tar node %s missing!"%(rel_id,tar_id)) + log("WARNING", "relation %s tar node %s missing!"%(rel_id,tar_id)) return None ov = rel_elem.text or '' @@ -258,7 +258,7 @@ attrs[rel_type_attribute] = fixName(rel_name) attrs['ismi_id'] = rel_id - log('DEBUG', "new edge(%s, %s, %s)"%(src_id, tar_id, attrs)) + #log('DEBUG', "new edge(%s, %s, %s)"%(src_id, tar_id, attrs)) # create relation with type nx_rel = nx_graph.add_edge(src_id, tar_id, attr_dict=attrs) @@ -275,7 +275,13 @@ # iterate through entities element for ent_elem in ents_elem: cnt += 1 - ismi_id = ent_elem.get('id') + + oc = ent_elem.get('object-class') + if oc in exclude_objects_of_type: + # skip this entity + continue + + ismi_id = int(ent_elem.get('id')) log('DEBUG', "reading entity[%s]"%ismi_id) if ismi_id in nx_nodes: @@ -303,7 +309,8 @@ # iterate through entities element for rel_elem in rels_elem: cnt += 1 - ismi_id = rel_elem.get('id') + + ismi_id = int(rel_elem.get('id')) log('DEBUG', "reading relation[%s]"%ismi_id) if ismi_id in nx_relations: