changeset 47:378dcb66a27f

new compare_models comparing the existence of nodes and relations in two graphs. fixed bugs in ismixml2model.
author casties
date Mon, 06 Feb 2017 18:44:43 +0100
parents f3945ef1e6a4
children 6625019a0c96
files importFromOpenMind/importer/compare_models.py importFromOpenMind/importer/ismixml2model.py
diffstat 2 files changed, 150 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/importFromOpenMind/importer/compare_models.py	Mon Feb 06 18:44:43 2017 +0100
@@ -0,0 +1,133 @@
+import networkx as nx
+import sys
+import csv
+
+## configure behaviour
+
+# metworkx graph files
+input1_fn = 'ismi_graph1.gpickle'
+input2_fn = 'ismi_graph2.gpickle'
+
+# name of type attribute
+node_type_attribute = '_type'
+rel_type_attribute = '_type'
+
+# active log levels for logging
+logLevels = {'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
+#logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
+#logLevels = {'INFO', 'ERROR', 'SYSMSG'}
+
+def log(level, message):
+    if level in logLevels:
+        print("%s: %s"%(level, message))
+
+
+def invertRelations(nx_graph):
+    """Add inverse relations to each relation"""
+    
+    print("Adding inverse relations.")
+    # copy list of edges because we add edges in the loop
+    edges = nx.edges(nx_graph)[:]
+    # iterate list
+    cnt = 0
+    for nx_edge in edges:
+        (nx_src, nx_tar) = nx_edge
+        # get attributes of edge
+        rel_attrs = nx_graph.edge[nx_src][nx_tar][0][:]
+        rel_type = rel_attrs[rel_type_attribute]
+        rel_id = rel_attrs['ismi_id']
+        # create new relation
+        rel_attrs[rel_type_attribute] = fixName(rel_type, is_tar_rel=True)
+        rel_attrs['ismi_id': -rel_id]
+        nx_graph.add_edge(nx_tar, nx_src, attr_dict=invrel_atts)
+
+        cnt += 1
+        if cnt % 100 == 0:
+            print("  %s relations"%cnt)
+        
+    
+def compare_nodes(nx_graph1, nx_graph2):
+    """compare nodes of two graphs"""
+    
+    log('INFO', "Compare graph nodes: %s vs %s"%(repr(nx_graph1), repr(nx_graph2)))
+    cnt = 0
+    missing_nodes1 = []
+    missing_nodes2 = []
+    # iterate all nodes in graph 1
+    for n in nx.nodes_iter(nx_graph1):
+        #attrs = nx_graph.node[n]
+        
+        if not nx_graph2.has_node(n):
+            missing_nodes2.append(n)
+
+    if len(missing_nodes2) > 0:
+        log('WARNING', "%s nodes missing in graph 2"%len(missing_nodes2))
+        log('DEBUG', "nodes: %s"%missing_nodes2)
+        #log('DEBUG', "nodes: %s"%([nx_graph1.node[n] for n in missing_nodes2]))
+    
+    # iterate all nodes in graph 2
+    for n in nx.nodes_iter(nx_graph2):
+        #attrs = nx_graph.node[n]
+        
+        if not nx_graph1.has_node(n):
+            missing_nodes1.append(n)
+
+    if len(missing_nodes1) > 0:
+        log('WARNING', "%s nodes missing in graph 1"%len(missing_nodes1))
+        log('DEBUG', "nodes: %s"%(missing_nodes1))
+    
+
+def compare_relations(nx_graph1, nx_graph2):
+    """compare relations of two graphs"""
+    
+    log('INFO', "Compare graph relations: %s vs %s"%(repr(nx_graph1), repr(nx_graph2)))
+    cnt = 0
+    missing_rels1 = []
+    missing_rels2 = []
+    # iterate all edges in graph 1
+    for (s, t) in nx.edges_iter(nx_graph1):
+        
+        if not nx_graph2.has_edge(s, t):
+            missing_rels2.append((s,t))
+
+    if len(missing_rels2) > 0:
+        log('WARNING', "%s relations missing in graph 2"%len(missing_rels2))
+        log('DEBUG', "relations: %s"%missing_rels2)
+        #log('DEBUG', "nodes: %s"%([nx_graph1.node[n] for n in missing_nodes2]))
+    
+    # iterate all nodes in graph 2
+    for (s, t) in nx.edges_iter(nx_graph2):
+        #attrs = nx_graph.node[n]
+        
+        if not nx_graph1.has_edge(s, t):
+            missing_rels1.append((s,t))
+
+    if len(missing_rels1) > 0:
+        log('WARNING', "%s relations missing in graph 1"%len(missing_rels1))
+        log('DEBUG', "relations: %s"%(missing_rels1))
+    
+
+## main
+
+print("Modify networkx graph")
+
+# read commandline parameters
+if len(sys.argv) > 2:
+    input1_fn = sys.argv[1]
+    input2_fn = sys.argv[2]
+
+# read networkx graph from pickle
+print("Reading graph 1 from %s"%input1_fn)
+nx_graph1 = nx.read_gpickle(input1_fn)
+print("Graph 1 info: %s"%nx.info(nx_graph1))
+
+print("Reading graph 2 from %s"%input2_fn)
+nx_graph2 = nx.read_gpickle(input2_fn)
+print("Graph 2 info: %s"%nx.info(nx_graph2))
+
+# operate    
+compare_nodes(nx_graph1, nx_graph2)
+compare_relations(nx_graph1, nx_graph2)
+
+
+print("Done.")
--- a/importFromOpenMind/importer/ismixml2model.py	Fri Feb 03 18:46:16 2017 +0100
+++ b/importFromOpenMind/importer/ismixml2model.py	Mon Feb 06 18:44:43 2017 +0100
@@ -157,7 +157,7 @@
     # set type
     attrs[node_type_attribute] = fixName(oc)
                 
-    ismi_id = ent_elem.get('id')
+    ismi_id = int(ent_elem.get('id'))
     # rename id to ismi_id
     attrs['ismi_id'] = ismi_id
             
@@ -169,7 +169,7 @@
         #    attrs['_n_label'] = ent.get('nov')
     
     # create node
-    #log('DEBUG', "new node(%s, %s)"%(ismi_id, attrs))
+    log('DEBUG', "new node(%s, %s)"%(ismi_id, attrs))
     nx_graph.add_node(ismi_id, **attrs)
     node = nx_graph.node[ismi_id]
     
@@ -179,16 +179,16 @@
 def relationFromRel(rel_elem):
     """Create graph relation from etree element.    
     """
-    rel_id = rel_elem.get('id')
+    rel_id = int(rel_elem.get('id'))
     rel_name = rel_elem.get('object-class')
-    src_id = rel_elem.get('source-id')
-    tar_id = rel_elem.get('target-id')
+    src_id = int(rel_elem.get('source-id'))
+    tar_id = int(rel_elem.get('target-id'))
     if not src_id in nx_nodes:
-        log("ERROR", "relation %s src node %s missing!"%(rel_id,src_id))
+        log("WARNING", "relation %s src node %s missing!"%(rel_id,src_id))
         return None
     
     if not tar_id in nx_nodes:
-        log("ERROR", "relation %s tar node %s missing!"%(rel_id,tar_id))
+        log("WARNING", "relation %s tar node %s missing!"%(rel_id,tar_id))
         return None
 
     ov = rel_elem.text or ''
@@ -258,7 +258,7 @@
         
     attrs[rel_type_attribute] = fixName(rel_name)
     attrs['ismi_id'] = rel_id
-    log('DEBUG', "new edge(%s, %s, %s)"%(src_id, tar_id, attrs))
+    #log('DEBUG', "new edge(%s, %s, %s)"%(src_id, tar_id, attrs))
     # create relation with type
     nx_rel = nx_graph.add_edge(src_id, tar_id, attr_dict=attrs)
     
@@ -275,7 +275,13 @@
     # iterate through entities element
     for ent_elem in ents_elem:
         cnt += 1
-        ismi_id = ent_elem.get('id')
+        
+        oc = ent_elem.get('object-class')
+        if oc in exclude_objects_of_type:
+            # skip this entity
+            continue
+        
+        ismi_id = int(ent_elem.get('id'))
         log('DEBUG', "reading entity[%s]"%ismi_id)
         
         if ismi_id in nx_nodes:
@@ -303,7 +309,8 @@
     # iterate through entities element
     for rel_elem in rels_elem:
         cnt += 1
-        ismi_id = rel_elem.get('id')
+
+        ismi_id = int(rel_elem.get('id'))
         log('DEBUG', "reading relation[%s]"%ismi_id)
         
         if ismi_id in nx_relations: