changeset 38:9ab136f412a1

new first version of check_ismi_log analyser.
author casties
date Fri, 21 Oct 2016 19:20:17 +0200
parents eccbb6239b89
children 1867bc2180c5
files importFromOpenMind/importer/check_ismi_log.py
diffstat 1 files changed, 260 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/importFromOpenMind/importer/check_ismi_log.py	Fri Oct 21 19:20:17 2016 +0200
@@ -0,0 +1,260 @@
+
+# coding: utf-8
+
+import re
+
+maxLinecnt = None
+#logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
+logLevels = {'ERROR', 'SYSMSG'}
+
+def log(level, message):
+    if level in logLevels:
+        print("%s: %s"%(level, message))
+
+
+def prettyPrintNode(node):
+    nt = node['node-type']
+    att = ''
+    if nt == 'ENTITY':
+        att = " %s=%s "%('oc',node['object-class'])
+        
+    elif nt == 'ATTRIBUTE':
+        att = " %s=%s "%('name',node['name'])
+        
+    elif nt == 'RELATION':
+        att = " %s=%s "%('oc',node['object_class'])
+
+    s = "%s%s[%s]"%(nt, att, node)
+    return s
+
+
+def parseStart(line):
+    tstamp = None
+    tm = re.match('(\d+-\d+-\d+ \d+:\d+:\d+)', line)
+    if tm:
+        tstamp = tm.group(1)
+        
+    sm = re.search('START Saving (\w+) \[ID=(\d*)', line)
+    if sm:
+        return {'time': tstamp, 'oc': sm.group(1), 'id': sm.group(2)}
+    
+    return None
+
+
+def parseSave(line):
+    match = re.search('([A-Z]+)\[([^\]]+)\]', line)
+    if match:
+        data = {'node-type': match.group(1)}
+        segs = match.group(2).split(', ')
+        for seg in segs:
+            k, v = seg.split('=', 1)
+            data[k] = v.strip('"')
+            
+        return data
+    
+    return None
+
+
+def equalNodes(prev, cur):
+    log("DEBUG", "compare: %s vs %s"%(prev, cur))
+    if prev['id'] != cur['id']:
+        log("INFO", "node id mismatch!")
+        return False
+    
+    if prev['node-type'] != cur['node-type']:
+        log("INFO", "node node-type mismatch!")
+        return False
+    
+    if prev.get('source-id', None) != cur.get('source-id', None):
+        log("INFO", "node source_id mismatch!")
+        return False
+    
+    if prev.get('target-id', None) != cur.get('target-id', None):
+        log("INFO", "node target_id mismatch!")
+        return False
+    
+    if prev['b64-value'] != cur['b64-value']:
+        log("INFO", "node ownvalue mismatch!")
+        return False
+    
+    return True
+
+
+def getSimilarNode(prev, curList):
+    nt = prev['node-type']
+    if nt == 'ATTRIBUTE':
+        for n in curList:
+            if n['node-type'] == 'ATTRIBUTE' \
+              and prev['name'] == n['name']:
+                # attribute with same name
+                log("DEBUG", "similar attributes: %s vs %s"%(prev, n))
+                return n
+            
+    elif nt == 'RELATION':
+        for n in curList:
+            if n['node-type'] == 'RELATION' \
+              and prev['source-id'] == n['source-id'] \
+              and prev['target-id'] == n['target-id'] \
+              and prev['object_class'] == n['object_class']:
+                # relation with same source, target and type
+                log("DEBUG", "similar relations: %s vs %s"%(prev, n))
+                return n
+            
+    return None
+
+
+def compareNodeLists(prev, cur, ctx):
+    prevNodes = {}
+    curNodes = {}
+    
+    #
+    # read nodes
+    #
+    for n in prev:
+        nid = n['id']
+        if nid not in prevNodes:
+            prevNodes[nid] = n
+        else:
+            log("DEBUG", "duplicate save of prev node id="+nid)
+            if isinstance(prevNodes[nid], list):
+                prevNodes[nid].append(n)
+            else:
+                prevNodes[nid] = [prevNodes[nid], n]
+        
+    for n in cur:
+        nid = n['id']
+        if nid not in curNodes:
+            curNodes[nid] = n
+        else:
+            log("DEBUG", "duplicate save of cur node id="+nid)
+            if isinstance(curNodes[nid], list):
+                curNodes[nid].append(n)
+            else:
+                curNodes[nid] = [curNodes[nid], n]
+        
+    #
+    # compare nodes
+    #
+    curNodeUnchecked = set(curNodes.keys())
+    addPrevNodes = []
+    addCurNodes = []
+    
+    for nid in prevNodes:
+        prevNode = prevNodes[nid]
+        if isinstance(prevNode, list):
+            log("DEBUG", "multi-save prev node: %s"%prevNode)
+            # use the last version(?)
+            prevNode = prevNode[-1]
+            
+        if nid not in curNodes:
+            if prevNode['node-type'] == 'ATTRIBUTE' and prevNode['b64-value'] == '':
+                # emtpy attribute - ignore
+                continue
+                
+            else:
+                log("DEBUG", "node %s not in cur saves! %s"%(nid,prevNode))
+                addPrevNodes.append(prevNode)
+                continue
+            
+        curNode = curNodes[nid]
+        if isinstance(curNode, list):
+            log("DEBUG", "multi-save cur node: %s"%curNode)
+            # use the last version?
+            curNode = curNode[-1]
+            
+        equalNodes(prevNode, curNode)
+
+        curNodeUnchecked.remove(nid)
+        
+    # make list of additional current (=new) nodes
+    for nid in curNodeUnchecked:
+        addCurNodes.append(curNodes[nid])
+        log("DEBUG", "new node %s"%curNodes[nid])
+        
+    # compare missing and new nodes
+    for n in addPrevNodes.copy():
+        sn = getSimilarNode(n, addCurNodes)
+        if sn is not None:
+            # similar is good enough
+            addPrevNodes.remove(n)
+            addCurNodes.remove(sn)
+    
+    if len(addPrevNodes) > 0:
+        #print("ERROR: lost nodes: %s"%[prettyPrintNode(n) for n in addPrevNodes])
+        log("ERROR", "in %s"%ctx)
+        for n in addPrevNodes:
+            log("ERROR","lost node: %s"%prettyPrintNode(n))
+        
+    if len(addCurNodes) > 0:
+        #print("INFO: new nodes: %s"%[prettyPrintNode(n) for n in addCurNodes])
+        for n in addCurNodes:
+            log("INFO", "new node: %s"%prettyPrintNode(n))
+        
+
+
+def analyseLogfile(filename):
+    with open(filename) as f:
+        saving = 0
+        linecnt = 0
+        saveCtx = None
+        prevSaves = []
+        saves = []
+        
+        for line in f:
+            linecnt += 1
+            if '*************** START Saving' in line:
+                saving += 1
+                log('DEBUG', line)
+                saveCtx = parseStart(line)
+                
+                if saving > 1:
+                    log("ERROR", "Concurrent save (%s) in #%s of %s"%(saving, linecnt, line))
+                    # TODO: what now?
+                    
+            elif 'INFO transactionlog' in line:
+                if 'save previous' in line:
+                    data = parseSave(line)
+                    if data is None:
+                        log("DEBUG", "Error parsing line: %s"%line)
+                        continue
+                        
+                    prevSaves.append(data)
+                    
+                elif 'save' in line:
+                    data = parseSave(line)
+                    if data is None:
+                        log("DEBUG", "Error parsing line: %s"%line)
+                        continue
+                    
+                    saves.append(parseSave(line))
+                
+            elif '*************** END Saving' in line:
+                saving -= 1
+                log('DEBUG', line)
+                
+                if saving > 0:
+                    log("ERROR", "Concurrent end save (%s) in #%s of %s"%(saving, linecnt, line))
+                
+                elif saving < 0:
+                    log("ERROR", "Too many END saves!")
+                    break
+                    
+                log("INFO", "saving %s"%saveCtx)
+                log("INFO", "prev saves: %s"%len(prevSaves))
+                log("INFO", "saves: %s"%len(saves))
+                
+                if len(prevSaves) > 0:
+                    compareNodeLists(prevSaves, saves, saveCtx)
+                
+                prevSaves = []
+                saves = []
+                            
+                    
+            if maxLinecnt is not None and linecnt >= maxLinecnt:
+                break
+                
+        log("SYSMSG", "%s lines of logfile scanned"%linecnt)
+        
+        
+# run analysis
+analyseLogfile('ismi-161011.log')