comparison importFromOpenMind/importer/check_ismi_transaction_log.py @ 43:bb5390f752fc

new ISMI transaction log analyser.
author casties
date Thu, 08 Dec 2016 19:19:02 +0100
parents
children 4692e90215eb
comparison
equal deleted inserted replaced
42:4dee9586cc44 43:bb5390f752fc
1
2 # coding: utf-8
3
4 import sys, re
5
6 # max number of lines to read (for testing)
7 maxLinecnt = None
8
9 # do not output deleted nodes
10 omitDeleted = True
11
12 # active log levels for logging
13 logLevels = {'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
14 #logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
15 #logLevels = {'ERROR', 'SYSMSG'}
16
17 def log(level, message):
18 if level in logLevels:
19 print("%s: %s"%(level, message))
20
21
22 def prettyPrintNode(node):
23 nt = node['node-type']
24 att = ''
25 if nt == 'ENTITY':
26 att = " %s=%s "%('oc',node['object-class'])
27
28 elif nt == 'ATTRIBUTE':
29 att = " %s=%s "%('name',node['name'])
30
31 elif nt == 'RELATION':
32 att = " %s=%s "%('oc',node['object-class'])
33
34 s = "%s%s[%s]"%(nt, att, node)
35 return s
36
37
38 nodeCsvFieldList = ['node_type', 'id', 'row_id', 'object_class', 'user', 'public', 'type',
39 'version', 'modification_time', 'system_status', 'content_type',
40 'source_id', 'source_modif', 'source_obj_class', 'target_id', 'target_modif', 'target_obj_class',
41 'own_value_b64']
42
43 nodeCsvFieldMap = {'node_type':'node-type', 'id':'id', 'row_id':'row-id', 'object_class':'object-class', 'user':'user',
44 'public':'public', 'type':'type',
45 'version':'version', 'modification_time':'mtime', 'system_status':'system-status', 'content_type':'content-type',
46 'source_id':'source-id', 'source_modif':'source-mtime', 'source_obj_class':'source-oc',
47 'target_id':'target-id', 'target_modif':'target-mtime', 'target_obj_class':'target-oc',
48 'own_value_b64':'b64-value'}
49
50
51 def printHeaderCsv(outFile):
52 s = ""
53 for cf in nodeCsvFieldList:
54 if s == "":
55 s += "%s"%cf
56 else:
57 s += ",%s"%cf
58
59 print(s, file=outFile)
60
61
62 def printNodeCsv(node, outFile):
63 s = ""
64 for cf in nodeCsvFieldList:
65 nf = nodeCsvFieldMap[cf]
66 if s == "":
67 s += "%s"%(node.get(nf, ''))
68 else:
69 s += ",%s"%(node.get(nf, ''))
70
71 print(s, file=outFile)
72
73
74 def parseStart(line):
75 tstamp = None
76 tm = re.match('(\d+-\d+-\d+ \d+:\d+:\d+)', line)
77 if tm:
78 tstamp = tm.group(1)
79
80 sm = re.search('START Saving (\w+) id=(\w+)', line)
81 if sm:
82 return {'time': tstamp, 'oc': sm.group(1), 'id': sm.group(2)}
83
84 sm = re.search('START remove entity: user=\S+ entity=Entity\[rowId=\d+, id=(\d+)', line)
85 if sm:
86 return {'time': tstamp, 'id': sm.group(1)}
87
88 sm = re.search('END Saving (\w+) id=(\w+)', line)
89 if sm:
90 return {'time': tstamp, 'oc': sm.group(1), 'id': sm.group(2)}
91
92 sm = re.search('END remove entity: user=\S+ entity=Entity\[rowId=\d+, id=(\d+)', line)
93 if sm:
94 return {'time': tstamp, 'id': sm.group(1)}
95
96 return None
97
98
99 def parseSave(line):
100 match = re.search('([A-Z]+)\[([^\]]+)\]', line)
101 if match:
102 data = {'node-type': match.group(1)}
103 segs = match.group(2).split(', ')
104 for seg in segs:
105 k, v = seg.split('=', 1)
106
107 data[k] = v.strip('"')
108
109 # normalize attribute's name to object-class
110 if k == 'name':
111 data['object-class'] = v.strip('"')
112
113 return data
114
115 return None
116
117
118 def equalNodes(prev, cur):
119 #log("DEBUG", "compare: %s vs %s"%(prev, cur))
120 if prev['id'] != cur['id']:
121 log("INFO", "node id mismatch!")
122 return False
123
124 if prev['node-type'] != cur['node-type']:
125 log("INFO", "node node-type mismatch!")
126 return False
127
128 if prev.get('source-id', None) != cur.get('source-id', None):
129 log("INFO", "node source_id mismatch!")
130 return False
131
132 if prev.get('target-id', None) != cur.get('target-id', None):
133 log("INFO", "node target_id mismatch!")
134 return False
135
136 if prev['b64-value'] != cur['b64-value']:
137 log("INFO", "node ownvalue mismatch!")
138 return False
139
140 return True
141
142
143 def getSimilarNode(prev, curList):
144 nt = prev['node-type']
145 if nt == 'ATTRIBUTE':
146 for n in curList:
147 if n['node-type'] == 'ATTRIBUTE' \
148 and prev['name'] == n['name']:
149 # attribute with same name
150 log("DEBUG", "similar attributes: %s vs %s"%(prev, n))
151 return n
152
153 elif nt == 'RELATION':
154 for n in curList:
155 if n['node-type'] == 'RELATION' \
156 and prev['source-id'] == n['source-id'] \
157 and prev['target-id'] == n['target-id'] \
158 and prev['object-class'] == n['object-class']:
159 # relation with same source, target and type
160 log("DEBUG", "similar relations: %s vs %s"%(prev, n))
161 return n
162
163 return None
164
165
166 def compareNodeLists(prev, cur, ctx, lostFile=None):
167 """compare list of previous and current nodes.
168
169 prints results to the log and output file.
170 """
171 prevNodes = {}
172 curNodes = {}
173
174 #
175 # read nodes
176 #
177 for n in prev:
178 nid = n['id']
179 if nid not in prevNodes:
180 prevNodes[nid] = n
181 else:
182 log("DEBUG", "duplicate save of prev node id="+nid)
183 if isinstance(prevNodes[nid], list):
184 prevNodes[nid].append(n)
185 else:
186 prevNodes[nid] = [prevNodes[nid], n]
187
188 for n in cur:
189 nid = n['id']
190 if nid not in curNodes:
191 curNodes[nid] = n
192 else:
193 log("DEBUG", "duplicate save of cur node id="+nid)
194 if isinstance(curNodes[nid], list):
195 curNodes[nid].append(n)
196 else:
197 curNodes[nid] = [curNodes[nid], n]
198
199 #
200 # compare nodes
201 #
202 curNodeUnchecked = set(curNodes.keys())
203 addPrevNodes = []
204 addCurNodes = []
205
206 for nid in prevNodes:
207 prevNode = prevNodes[nid]
208 if isinstance(prevNode, list):
209 log("DEBUG", "multi-save prev node: %s"%prevNode)
210 # use the last version(?)
211 prevNode = prevNode[-1]
212
213 if nid not in curNodes:
214 if prevNode['node-type'] == 'ATTRIBUTE' and prevNode['b64-value'] == '':
215 # emtpy attribute - ignore
216 continue
217
218 else:
219 log("DEBUG", "node %s not in cur saves! %s"%(nid,prevNode))
220 addPrevNodes.append(prevNode)
221 continue
222
223 curNode = curNodes[nid]
224 if isinstance(curNode, list):
225 log("DEBUG", "multi-save cur node: %s"%curNode)
226 # use the last version?
227 curNode = curNode[-1]
228
229 equalNodes(prevNode, curNode)
230
231 curNodeUnchecked.remove(nid)
232
233 # make list of additional current (=new) nodes
234 for nid in curNodeUnchecked:
235 curNode = curNodes[nid]
236 # list can contain lists
237 if isinstance(curNode, list):
238 # use the last version(?)
239 curNode = curNode[-1]
240
241 addCurNodes.append(curNode)
242 log("DEBUG", "new node %s"%curNodes[nid])
243
244 # compare missing and new nodes
245 for n in addPrevNodes.copy():
246 sn = getSimilarNode(n, addCurNodes)
247 if sn is not None:
248 # similar is good enough
249 addPrevNodes.remove(n)
250 addCurNodes.remove(sn)
251
252 if len(addPrevNodes) > 0:
253 #print("ERROR: lost nodes: %s"%[prettyPrintNode(n) for n in addPrevNodes])
254 log("ERROR", "in %s"%ctx)
255 for n in addPrevNodes:
256 log("ERROR","lost node: %s"%prettyPrintNode(n))
257 if lostFile is not None:
258 printNodeCsv(n, lostFile)
259
260 if len(addCurNodes) > 0:
261 #print("INFO: new nodes: %s"%[prettyPrintNode(n) for n in addCurNodes])
262 for n in addCurNodes:
263 log("INFO", "new node: %s"%prettyPrintNode(n))
264
265
266
267 def analyseLogfile(inFilename, outFilename=None):
268 outFile = None
269 if outFilename is not None:
270 outFile = open(outFilename, mode='w')
271 printHeaderCsv(outFile)
272
273 with open(inFilename) as f:
274 linecnt = 0
275 saving = 0
276 openSaves = {}
277 savingPrev = 0
278 deleting = 0
279 saveCtx = None
280 deleteCtx = None
281 prevSaves = []
282 saves = []
283
284 for line in f:
285 linecnt += 1
286 #
287 # parsing lines in order of specific match, not order of operation!
288 #
289 if '*** START Saving' in line:
290 saving += 1
291 # make sure delete is off
292 deleting = 0
293 log('DEBUG', line)
294 # parse time and id
295 saveCtx = parseStart(line)
296 saveId = saveCtx['id']
297 if saveId in openSaves:
298 log('ERROR', "Duplicate save for same id! %s"%saveCtx)
299 # assume the first save was spurious
300 saving -= 1
301
302 openSaves[saveId] = saveCtx
303
304 if len(openSaves) > 1:
305 log("ERROR", "Multiple open save (%s) in #%s of %s"%(saving, linecnt, line))
306 # TODO: what now?
307
308 elif '*** END Saving' in line:
309 saving -= 1
310 saveCtx = parseStart(line)
311 saveId = saveCtx['id']
312 if saveId in openSaves:
313 log('DEBUG', "End save matches start save: %s"%saveCtx)
314 del openSaves[saveId]
315
316 elif 'null' in openSaves:
317 log('DEBUG', "Assume end save (of %s) matches start save null: %s"%(len(openSaves), saveCtx))
318 del openSaves['null']
319
320 else:
321 log("ERROR", "End save without start save! %s"%saveCtx)
322
323 # make sure delete is off
324 deleting = 0
325 log('DEBUG', line)
326
327 if len(openSaves) > 0:
328 log('WARNING', "Still open saves (%s) during end save in #%s of %s"%(saving, linecnt, line))
329
330 elif saving < 0:
331 log('ERROR', "Too many END saves!")
332 break
333
334 log("INFO", "saving %s"%saveCtx)
335 log("INFO", "prev saves: %s"%len(prevSaves))
336 log("INFO", "saves: %s"%len(saves))
337
338 if len(prevSaves) > 0:
339 compareNodeLists(prevSaves, saves, saveCtx, outFile)
340
341 prevSaves = []
342 saves = []
343
344 elif '** START save entity' in line:
345 pass
346
347 elif '** END save entity' in line:
348 pass
349
350 elif '** START remove entity' in line:
351 deleting += 1
352 log('DEBUG', line)
353 deleteCtx = parseStart(line)
354
355 if deleting > 1:
356 log("ERROR", "Concurrent delete (%s) in #%s of %s"%(saving, linecnt, line))
357 # TODO: what now?
358 break
359
360 elif '** END remove entity' in line:
361 deleting -= 1
362 log('DEBUG', line)
363 ctx = parseStart(line)
364 if deleteCtx['id'] != ctx['id']:
365 log('ERROR', "END delete has different ID!")
366 break
367
368 if deleting > 1:
369 log("ERROR", "Concurrent delete (%s) in #%s of %s"%(saving, linecnt, line))
370 # TODO: what now?
371 break
372
373 deleteCtx = None
374
375 elif '* START save entity' in line:
376 pass
377
378 elif '* START save previous' in line:
379 savingPrev += 1
380
381 elif '* ...END save entity' in line:
382 pass
383
384 elif '* END ...save previous' in line:
385 savingPrev -= 1
386
387 if saving < 0:
388 log("ERROR", "Too many END save previous!")
389
390 elif 'save previous' in line:
391 data = parseSave(line)
392 if data is None:
393 log("DEBUG", "Error parsing line: %s"%line)
394 continue
395
396 if omitDeleted and deleting > 0 and savingPrev > 0:
397 # this should be a save prev from deleting
398 delId = deleteCtx['id']
399 # check if node is related to deleted id
400 if data.get('id', None) == delId or data.get('source-id', None) == delId \
401 or data.get('target-id', None) == delId:
402 log('DEBUG', "intentionally deleted node: %s"%data)
403
404 else:
405 log('ERROR', "Node without matching id in delete! %s"%data)
406 prevSaves.append(data)
407
408 else:
409 prevSaves.append(data)
410
411 elif 'save' in line:
412 data = parseSave(line)
413 if data is None:
414 log("DEBUG", "Error parsing line: %s"%line)
415 continue
416
417 saves.append(parseSave(line))
418
419 if maxLinecnt is not None and linecnt >= maxLinecnt:
420 break
421
422 log("SYSMSG", "%s lines of logfile scanned"%linecnt)
423
424
425 #
426 # public static void main :-)
427 #
428
429 input_fn = None
430 output_fn = None
431
432 # parse command line parameters
433 if len(sys.argv) > 2:
434 input_fn = sys.argv[1]
435 output_fn = sys.argv[2]
436
437 # run analysis
438 analyseLogfile(input_fn, output_fn)
439
440 else:
441 print("ERROR: missing parameters!")
442 print("use: check_ismi_log logfile csvfile")
443 exit(1)