# HG changeset patch # User casties # Date 1477070417 -7200 # Node ID 9ab136f412a12f7994229868bdf79202006c536e # Parent eccbb6239b89ebd5edeeda3ff50deaf7925bbb49 new first version of check_ismi_log analyser. diff -r eccbb6239b89 -r 9ab136f412a1 importFromOpenMind/importer/check_ismi_log.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/importFromOpenMind/importer/check_ismi_log.py Fri Oct 21 19:20:17 2016 +0200 @@ -0,0 +1,260 @@ + +# coding: utf-8 + +import re + +maxLinecnt = None +#logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'} +logLevels = {'ERROR', 'SYSMSG'} + +def log(level, message): + if level in logLevels: + print("%s: %s"%(level, message)) + + +def prettyPrintNode(node): + nt = node['node-type'] + att = '' + if nt == 'ENTITY': + att = " %s=%s "%('oc',node['object-class']) + + elif nt == 'ATTRIBUTE': + att = " %s=%s "%('name',node['name']) + + elif nt == 'RELATION': + att = " %s=%s "%('oc',node['object_class']) + + s = "%s%s[%s]"%(nt, att, node) + return s + + +def parseStart(line): + tstamp = None + tm = re.match('(\d+-\d+-\d+ \d+:\d+:\d+)', line) + if tm: + tstamp = tm.group(1) + + sm = re.search('START Saving (\w+) \[ID=(\d*)', line) + if sm: + return {'time': tstamp, 'oc': sm.group(1), 'id': sm.group(2)} + + return None + + +def parseSave(line): + match = re.search('([A-Z]+)\[([^\]]+)\]', line) + if match: + data = {'node-type': match.group(1)} + segs = match.group(2).split(', ') + for seg in segs: + k, v = seg.split('=', 1) + data[k] = v.strip('"') + + return data + + return None + + +def equalNodes(prev, cur): + log("DEBUG", "compare: %s vs %s"%(prev, cur)) + if prev['id'] != cur['id']: + log("INFO", "node id mismatch!") + return False + + if prev['node-type'] != cur['node-type']: + log("INFO", "node node-type mismatch!") + return False + + if prev.get('source-id', None) != cur.get('source-id', None): + log("INFO", "node source_id mismatch!") + return False + + if prev.get('target-id', None) != cur.get('target-id', None): + log("INFO", "node target_id mismatch!") + return False + + if prev['b64-value'] != cur['b64-value']: + log("INFO", "node ownvalue mismatch!") + return False + + return True + + +def getSimilarNode(prev, curList): + nt = prev['node-type'] + if nt == 'ATTRIBUTE': + for n in curList: + if n['node-type'] == 'ATTRIBUTE' \ + and prev['name'] == n['name']: + # attribute with same name + log("DEBUG", "similar attributes: %s vs %s"%(prev, n)) + return n + + elif nt == 'RELATION': + for n in curList: + if n['node-type'] == 'RELATION' \ + and prev['source-id'] == n['source-id'] \ + and prev['target-id'] == n['target-id'] \ + and prev['object_class'] == n['object_class']: + # relation with same source, target and type + log("DEBUG", "similar relations: %s vs %s"%(prev, n)) + return n + + return None + + +def compareNodeLists(prev, cur, ctx): + prevNodes = {} + curNodes = {} + + # + # read nodes + # + for n in prev: + nid = n['id'] + if nid not in prevNodes: + prevNodes[nid] = n + else: + log("DEBUG", "duplicate save of prev node id="+nid) + if isinstance(prevNodes[nid], list): + prevNodes[nid].append(n) + else: + prevNodes[nid] = [prevNodes[nid], n] + + for n in cur: + nid = n['id'] + if nid not in curNodes: + curNodes[nid] = n + else: + log("DEBUG", "duplicate save of cur node id="+nid) + if isinstance(curNodes[nid], list): + curNodes[nid].append(n) + else: + curNodes[nid] = [curNodes[nid], n] + + # + # compare nodes + # + curNodeUnchecked = set(curNodes.keys()) + addPrevNodes = [] + addCurNodes = [] + + for nid in prevNodes: + prevNode = prevNodes[nid] + if isinstance(prevNode, list): + log("DEBUG", "multi-save prev node: %s"%prevNode) + # use the last version(?) + prevNode = prevNode[-1] + + if nid not in curNodes: + if prevNode['node-type'] == 'ATTRIBUTE' and prevNode['b64-value'] == '': + # emtpy attribute - ignore + continue + + else: + log("DEBUG", "node %s not in cur saves! %s"%(nid,prevNode)) + addPrevNodes.append(prevNode) + continue + + curNode = curNodes[nid] + if isinstance(curNode, list): + log("DEBUG", "multi-save cur node: %s"%curNode) + # use the last version? + curNode = curNode[-1] + + equalNodes(prevNode, curNode) + + curNodeUnchecked.remove(nid) + + # make list of additional current (=new) nodes + for nid in curNodeUnchecked: + addCurNodes.append(curNodes[nid]) + log("DEBUG", "new node %s"%curNodes[nid]) + + # compare missing and new nodes + for n in addPrevNodes.copy(): + sn = getSimilarNode(n, addCurNodes) + if sn is not None: + # similar is good enough + addPrevNodes.remove(n) + addCurNodes.remove(sn) + + if len(addPrevNodes) > 0: + #print("ERROR: lost nodes: %s"%[prettyPrintNode(n) for n in addPrevNodes]) + log("ERROR", "in %s"%ctx) + for n in addPrevNodes: + log("ERROR","lost node: %s"%prettyPrintNode(n)) + + if len(addCurNodes) > 0: + #print("INFO: new nodes: %s"%[prettyPrintNode(n) for n in addCurNodes]) + for n in addCurNodes: + log("INFO", "new node: %s"%prettyPrintNode(n)) + + + +def analyseLogfile(filename): + with open(filename) as f: + saving = 0 + linecnt = 0 + saveCtx = None + prevSaves = [] + saves = [] + + for line in f: + linecnt += 1 + if '*************** START Saving' in line: + saving += 1 + log('DEBUG', line) + saveCtx = parseStart(line) + + if saving > 1: + log("ERROR", "Concurrent save (%s) in #%s of %s"%(saving, linecnt, line)) + # TODO: what now? + + elif 'INFO transactionlog' in line: + if 'save previous' in line: + data = parseSave(line) + if data is None: + log("DEBUG", "Error parsing line: %s"%line) + continue + + prevSaves.append(data) + + elif 'save' in line: + data = parseSave(line) + if data is None: + log("DEBUG", "Error parsing line: %s"%line) + continue + + saves.append(parseSave(line)) + + elif '*************** END Saving' in line: + saving -= 1 + log('DEBUG', line) + + if saving > 0: + log("ERROR", "Concurrent end save (%s) in #%s of %s"%(saving, linecnt, line)) + + elif saving < 0: + log("ERROR", "Too many END saves!") + break + + log("INFO", "saving %s"%saveCtx) + log("INFO", "prev saves: %s"%len(prevSaves)) + log("INFO", "saves: %s"%len(saves)) + + if len(prevSaves) > 0: + compareNodeLists(prevSaves, saves, saveCtx) + + prevSaves = [] + saves = [] + + + if maxLinecnt is not None and linecnt >= maxLinecnt: + break + + log("SYSMSG", "%s lines of logfile scanned"%linecnt) + + +# run analysis +analyseLogfile('ismi-161011.log')