Mercurial > hg > drupalISMI


# coding: utf-8

import sys, re

# max number of lines to read (for testing)
maxLinecnt = None

# do not output deleted nodes
omitDeleted = True

# active log levels for logging
#logLevels = {'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
#logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
logLevels = {'ERROR', 'SYSMSG'}

def log(level, message):
    if level in logLevels:
        print("%s: %s"%(level, message))


def prettyPrintNode(node):
    nt = node['node-type']
    att = ''
    if nt == 'ENTITY':
        att = " %s=%s "%('oc',node['object-class'])

    elif nt == 'ATTRIBUTE':
        att = " %s=%s "%('name',node['name'])

    elif nt == 'RELATION':
        att = " %s=%s "%('oc',node['object-class'])

    s = "%s%s[%s]"%(nt, att, node)
    return s


nodeCsvFieldList = ['_logtime', 'node_type', 'id', 'row_id', 'object_class', 'user', 'public', 'type',
                    'version', 'modification_time', 'system_status', 'content_type',
                    'source_id', 'source_modif', 'source_obj_class', 'target_id', 'target_modif', 'target_obj_class',
                    'own_value_b64']

nodeCsvFieldMap = {'_logtime':'_logtime', 'node_type':'node-type', 'id':'id', 'row_id':'row-id', 'object_class':'object-class', 'user':'user',
                   'public':'public', 'type':'type',
                    'version':'version', 'modification_time':'mtime', 'system_status':'system-status', 'content_type':'content-type',
                    'source_id':'source-id', 'source_modif':'source-mtime', 'source_obj_class':'source-oc',
                    'target_id':'target-id', 'target_modif':'target-mtime', 'target_obj_class':'target-oc',
                    'own_value_b64':'b64-value'}


def printHeaderCsv(outFile):
    s = ""
    for cf in nodeCsvFieldList:
        if s == "":
            s += "%s"%cf
        else:
            s += ",%s"%cf

    print(s, file=outFile)


def printNodeCsv(node, outFile):
    s = ""
    for cf in nodeCsvFieldList:
        nf = nodeCsvFieldMap[cf]
        if s == "":
            s += "%s"%(node.get(nf, ''))
        else:
            s += ",%s"%(node.get(nf, ''))

    print(s, file=outFile)


def parseThread(line):
    tm = re.match('(\d+-\d+-\d+ \d+:\d+:\d+),\d+ (\S+)', line)
    if tm:
        tstamp = tm.group(1)
        tid = tm.group(2)

        return {'time': tstamp, 'tid': tid}

    return None


def parseStart(line):
    pt = parseThread(line)
    tstamp = pt['time']
    tid = pt['tid']

    sm = re.search('START Saving (\w+) id=(\w+)', line)
    if sm:
        return {'tid': tid, 'time': tstamp, 'oc': sm.group(1), 'id': sm.group(2)}

    sm = re.search('START remove entity: user=\S+ entity=Entity\[rowId=\d+, id=(\d+)', line)
    if sm:
        return {'tid': tid, 'time': tstamp, 'id': sm.group(1)}

    sm = re.search('END Saving (\w+) id=(\w+)', line)
    if sm:
        return {'tid': tid, 'time': tstamp, 'oc': sm.group(1), 'id': sm.group(2)}

    sm = re.search('END remove entity: user=\S+ entity=Entity\[rowId=\d+, id=(\d+)', line)
    if sm:
        return {'tid': tid, 'time': tstamp, 'id': sm.group(1)}

    return None


def parseSave(line):
    match = re.search('([A-Z]+)\[([^\]]+)\]', line)
    if match:
        data = {'node-type': match.group(1)}
        segs = match.group(2).split(', ')
        for seg in segs:
            k, v = seg.split('=', 1)

            data[k] = v.strip('"')

            # normalize attribute's name to object-class
            if k == 'name':
                data['object-class'] = v.strip('"')

        return data

    return None


def equalNodes(prev, cur):
    #log("DEBUG", "compare: %s vs %s"%(prev, cur))
    if prev['id'] != cur['id']:
        log("INFO", "node id mismatch!")
        return False

    if prev['node-type'] != cur['node-type']:
        log("INFO", "node node-type mismatch!")
        return False

    if prev.get('source-id', None) != cur.get('source-id', None):
        log("INFO", "node source_id mismatch!")
        return False

    if prev.get('target-id', None) != cur.get('target-id', None):
        log("INFO", "node target_id mismatch!")
        return False

    if prev['b64-value'] != cur['b64-value']:
        log("INFO", "node ownvalue mismatch!")
        return False

    return True


def getSimilarNode(prev, curList):
    nt = prev['node-type']
    if nt == 'ATTRIBUTE':
        for n in curList:
            if n['node-type'] == 'ATTRIBUTE' \
              and prev['name'] == n['name']:
                # attribute with same name
                log("DEBUG", "similar attributes: %s vs %s"%(prev, n))
                return n

    elif nt == 'RELATION':
        for n in curList:
            if n['node-type'] == 'RELATION' \
              and prev['source-id'] == n['source-id'] \
              and prev['target-id'] == n['target-id'] \
              and prev['object-class'] == n['object-class']:
                # relation with same source, target and type
                log("DEBUG", "similar relations: %s vs %s"%(prev, n))
                return n

    return None


def compareNodeLists(prev, cur, ctx, lostFile=None):
    """compare list of previous and current nodes.

    prints results to the log and output file.
    """
    prevNodes = {}
    curNodes = {}

    #
    # read nodes
    #
    for n in prev:
        nid = n['id']
        if nid not in prevNodes:
            prevNodes[nid] = n
        else:
            log("DEBUG", "duplicate save of prev node id="+nid)
            if isinstance(prevNodes[nid], list):
                prevNodes[nid].append(n)
            else:
                prevNodes[nid] = [prevNodes[nid], n]

    for n in cur:
        nid = n['id']
        if nid not in curNodes:
            curNodes[nid] = n
        else:
            log("DEBUG", "duplicate save of cur node id="+nid)
            if isinstance(curNodes[nid], list):
                curNodes[nid].append(n)
            else:
                curNodes[nid] = [curNodes[nid], n]

    #
    # compare nodes
    #
    curNodeUnchecked = set(curNodes.keys())
    addPrevNodes = []
    addCurNodes = []

    for nid in prevNodes:
        prevNode = prevNodes[nid]
        if isinstance(prevNode, list):
            log("DEBUG", "multi-save prev node: %s"%prevNode)
            # use the last version(?)
            prevNode = prevNode[-1]

        if nid not in curNodes:
            if prevNode['node-type'] == 'ATTRIBUTE' and prevNode['b64-value'] == '':
                # emtpy attribute - ignore
                continue

            else:
                log("DEBUG", "node %s not in cur saves! %s"%(nid,prevNode))
                addPrevNodes.append(prevNode)
                continue

        curNode = curNodes[nid]
        if isinstance(curNode, list):
            log("DEBUG", "multi-save cur node: %s"%curNode)
            # use the last version?
            curNode = curNode[-1]

        equalNodes(prevNode, curNode)

        curNodeUnchecked.remove(nid)

    # make list of additional current (=new) nodes
    for nid in curNodeUnchecked:
        curNode = curNodes[nid]
        # list can contain lists
        if isinstance(curNode, list):
            # use the last version(?)
            curNode = curNode[-1]

        addCurNodes.append(curNode)
        log("DEBUG", "new node %s"%curNodes[nid])

    # compare missing and new nodes
    for n in addPrevNodes.copy():
        sn = getSimilarNode(n, addCurNodes)
        if sn is not None:
            # similar is good enough
            addPrevNodes.remove(n)
            addCurNodes.remove(sn)

    if len(addPrevNodes) > 0:
        #print("ERROR: lost nodes: %s"%[prettyPrintNode(n) for n in addPrevNodes])
        log("ERROR", "in %s"%ctx)
        for n in addPrevNodes:
            log("ERROR","lost node: %s"%prettyPrintNode(n))
            if lostFile is not None:
                printNodeCsv(n, lostFile)

    if len(addCurNodes) > 0:
        #print("INFO: new nodes: %s"%[prettyPrintNode(n) for n in addCurNodes])
        for n in addCurNodes:
            log("INFO", "new node: %s"%prettyPrintNode(n))


def analyseLogfile(inFilename, outFilename=None):
    outFile = None
    if outFilename is not None:
        outFile = open(outFilename, mode='w')
        printHeaderCsv(outFile)

    with open(inFilename) as f:
        linecnt = 0
        savecnt = 0
        saving = 0
        openSaves = {}
        savingPrev = 0
        deleting = 0
        saveCtx = None
        deleteCtx = None
        prevSaves = {}
        saves = {}

        for line in f:
            linecnt += 1
            #
            # parsing lines in order of specific match, not order of operation!
            #
            if '*** START Saving' in line:
                savecnt += 1
                saving += 1
                # make sure delete is off
                deleting = 0
                log('DEBUG', line)
                # parse time and id
                saveCtx = parseStart(line)
                tid = saveCtx['tid']
                saveId = "%s@%s"%(saveCtx['id'], tid)
                if saveId in openSaves:
                    log('ERROR', "Duplicate save for same id! %s"%saveCtx)
                    # assume the first save was spurious
                    saving -= 1

                openSaves[saveId] = saveCtx

                if len(openSaves) > 1:
                    log("ERROR", "Multiple open save (%s) in #%s of %s"%(saving, linecnt, line))
                    # TODO: what now?

            elif '*** END Saving' in line:
                saving -= 1
                saveCtx = parseStart(line)
                tid = saveCtx['tid']
                saveId = "%s@%s"%(saveCtx['id'], tid)
                if saveId in openSaves:
                    log('DEBUG', "End save matches start save: %s"%saveCtx)
                    del openSaves[saveId]

                elif "null@%s"%saveCtx['tid'] in openSaves:
                    log('DEBUG', "Assume end save (of %s) matches start save null: %s"%(len(openSaves), saveCtx))
                    del openSaves["null@%s"%saveCtx['tid']]

                else:
                    log("ERROR", "End save without start save! %s"%saveCtx)

                # make sure delete is off
                deleting = 0
                log('DEBUG', line)

                if len(openSaves) > 0:
                    log('WARNING', "Still open saves (%s) during end save in #%s of %s"%(saving, linecnt, line))

                elif saving < 0:
                    log('ERROR', "Too many END saves!")
                    break

                if tid not in saves:
                    saves[tid] = []

                if tid not in prevSaves:
                    prevSaves[tid] = []

                log("INFO", "saving %s"%saveCtx)
                log("INFO", "prev saves: %s"%len(prevSaves[tid]))
                log("INFO", "saves: %s"%len(saves[tid]))

                if len(prevSaves[tid]) > 0:
                    compareNodeLists(prevSaves[tid], saves[tid], saveCtx, outFile)

                del prevSaves[tid]
                del saves[tid]

            elif '** START save entity' in line:
                pass

            elif '** END save entity' in line:
                pass

            elif '** START remove entity' in line:
                deleting += 1
                log('DEBUG', line)
                deleteCtx = parseStart(line)

                if deleting > 1:
                    log("ERROR", "Concurrent delete (%s) in #%s of %s"%(saving, linecnt, line))
                    # TODO: what now?
                    break

            elif '** END remove entity' in line:
                deleting -= 1
                log('DEBUG', line)
                ctx = parseStart(line)
                if deleteCtx['id'] != ctx['id']:
                    log('ERROR', "END delete has different ID!")
                    break

                if deleting > 1:
                    log("ERROR", "Concurrent delete (%s) in #%s of %s"%(saving, linecnt, line))
                    # TODO: what now?
                    break

                deleteCtx = None

            elif '* START save entity' in line:
                pass

            elif '* START save previous' in line:
                savingPrev += 1

            elif '* ...END save entity' in line:
                pass

            elif '* END ...save previous' in line:
                savingPrev -= 1

                if savingPrev < 0:
                    log("ERROR", "Too many END save previous!")

            elif 'save previous' in line:
                # check thread ID
                td = parseThread(line)
                tid = td['tid']

                data = parseSave(line)
                if data is None:
                    log("DEBUG", "Error parsing line: %s"%line)
                    continue

                # add log time
                data['_logtime'] = td['time']

                if omitDeleted and deleting > 0 and savingPrev > 0:
                    # this should be a save prev from deleting
                    delId = deleteCtx['id']
                    # check if node is related to deleted id
                    if data.get('id', None) == delId or data.get('source-id', None) == delId \
                        or data.get('target-id', None) == delId:
                        log('DEBUG', "intentionally deleted node: %s"%data)

                    else:
                        log('ERROR', "Node without matching id in delete! %s"%data)
                        if tid in prevSaves:
                            prevSaves[tid].append(data)

                        else:
                            prevSaves[tid] = [data]

                else:
                    if tid in prevSaves:
                        prevSaves[tid].append(data)

                    else:
                        prevSaves[tid] = [data]

            elif 'save' in line:
                # check thread ID
                td = parseThread(line)
                tid = td['tid']

                data = parseSave(line)
                if data is None:
                    log("DEBUG", "Error parsing line: %s"%line)
                    continue

                # add log time
                data['_logtime'] = td['time']

                if tid in saves:
                    saves[tid].append(data)

                else:
                    saves[tid] = [data]

            if maxLinecnt is not None and linecnt >= maxLinecnt:
                break

        if len(saves) > 0:
            log('ERROR', "%s unprocessed saves!"%len(saves))
            log('DEBUG', saves)

        if len(prevSaves) > 0:
            log('ERROR', "%s unprocessed previous saves!"%len(prevSaves))
            log('DEBUG', prevSaves)

        log("SYSMSG", "%s form saves in %s lines of logfile"%(savecnt, linecnt))


#
# public static void main :-)
#

input_fn = None
output_fn = None

# parse command line parameters
if len(sys.argv) > 2:
    input_fn = sys.argv[1]
    output_fn = sys.argv[2]

    # run analysis
    analyseLogfile(input_fn, output_fn)

else:
    print("ERROR: missing parameters!")
    print("use: check_ismi_log logfile csvfile")
    exit(1)
author	casties
date	Mon, 10 Jul 2017 17:18:39 +0200
parents	1f160e595167
children