# HG changeset patch
# User casties
# Date 1477677319 -7200
# Node ID 1867bc2180c5be84d03c5a2ba0757274c613bff4
# Parent  9ab136f412a12f7994229868bdf79202006c536e
better check_ismi_log analyser.

diff -r 9ab136f412a1 -r 1867bc2180c5 importFromOpenMind/importer/check_ismi_log.py
--- a/importFromOpenMind/importer/check_ismi_log.py	Fri Oct 21 19:20:17 2016 +0200
+++ b/importFromOpenMind/importer/check_ismi_log.py	Fri Oct 28 19:55:19 2016 +0200
@@ -3,7 +3,10 @@
 
 import re
 
+# max number of lines to read (for testing)
 maxLinecnt = None
+
+# active log levels for logging
 #logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
 logLevels = {'ERROR', 'SYSMSG'}
 
@@ -22,12 +25,48 @@
         att = " %s=%s "%('name',node['name'])
         
     elif nt == 'RELATION':
-        att = " %s=%s "%('oc',node['object_class'])
+        att = " %s=%s "%('oc',node['object-class'])
 
     s = "%s%s[%s]"%(nt, att, node)
     return s
 
 
+nodeCsvFieldList = ['node_type', 'id', 'row_id', 'object_class', 'user', 'public', 'type',
+                    'version', 'modification_time', 'system_status', 'content_type',
+                    'source_id', 'source_modif', 'source_obj_class', 'target_id', 'target_modif', 'target_obj_class',
+                    'own_value_b64']
+
+nodeCsvFieldMap = {'node_type':'node-type', 'id':'id', 'row_id':'row-id', 'object_class':'object-class', 'user':'user', 
+                   'public':'public', 'type':'type',
+                    'version':'version', 'modification_time':'mtime', 'system_status':'system-status', 'content_type':'content-type',
+                    'source_id':'source-id', 'source_modif':'source-mtime', 'source_obj_class':'source-oc', 
+                    'target_id':'target-id', 'target_modif':'target-mtime', 'target_obj_class':'target-oc',
+                    'own_value_b64':'b64-value'}
+
+
+def printHeaderCsv(outFile):
+    s = ""
+    for cf in nodeCsvFieldList:
+        if s == "":
+            s += "%s"%cf
+        else:
+            s += ",%s"%cf
+            
+    print(s, file=outFile)
+
+
+def printNodeCsv(node, outFile):
+    s = ""
+    for cf in nodeCsvFieldList:
+        nf = nodeCsvFieldMap[cf]
+        if s == "":
+            s += "%s"%(node.get(nf, ''))
+        else:
+            s += ",%s"%(node.get(nf, ''))
+            
+    print(s, file=outFile)
+
+
 def parseStart(line):
     tstamp = None
     tm = re.match('(\d+-\d+-\d+ \d+:\d+:\d+)', line)
@@ -48,8 +87,17 @@
         segs = match.group(2).split(', ')
         for seg in segs:
             k, v = seg.split('=', 1)
+            
+            # fix bug with relation's object-class parameter
+            if k == 'object_class':
+                k = 'object-class'
+                
             data[k] = v.strip('"')
             
+            # normalize attriute's name to object-class
+            if k == 'name':
+                data['object-class'] = v.strip('"')
+            
         return data
     
     return None
@@ -95,7 +143,7 @@
             if n['node-type'] == 'RELATION' \
               and prev['source-id'] == n['source-id'] \
               and prev['target-id'] == n['target-id'] \
-              and prev['object_class'] == n['object_class']:
+              and prev['object-class'] == n['object-class']:
                 # relation with same source, target and type
                 log("DEBUG", "similar relations: %s vs %s"%(prev, n))
                 return n
@@ -103,7 +151,7 @@
     return None
 
 
-def compareNodeLists(prev, cur, ctx):
+def compareNodeLists(prev, cur, ctx, lostFile=None):
     prevNodes = {}
     curNodes = {}
     
@@ -184,6 +232,8 @@
         log("ERROR", "in %s"%ctx)
         for n in addPrevNodes:
             log("ERROR","lost node: %s"%prettyPrintNode(n))
+            if lostFile is not None:
+                printNodeCsv(n, lostFile)
         
     if len(addCurNodes) > 0:
         #print("INFO: new nodes: %s"%[prettyPrintNode(n) for n in addCurNodes])
@@ -192,8 +242,13 @@
         
 
 
-def analyseLogfile(filename):
-    with open(filename) as f:
+def analyseLogfile(inFilename, outFilename=None):
+    outFile = None
+    if outFilename is not None:
+        outFile = open(outFilename, mode='w')
+        printHeaderCsv(outFile)
+    
+    with open(inFilename) as f:
         saving = 0
         linecnt = 0
         saveCtx = None
@@ -244,7 +299,7 @@
                 log("INFO", "saves: %s"%len(saves))
                 
                 if len(prevSaves) > 0:
-                    compareNodeLists(prevSaves, saves, saveCtx)
+                    compareNodeLists(prevSaves, saves, saveCtx, outFile)
                 
                 prevSaves = []
                 saves = []
@@ -257,4 +312,4 @@
         
         
 # run analysis
-analyseLogfile('ismi-161011.log')
+analyseLogfile('ismi-161011.log', 'ismi-161011-lost.csv')