43
|
1
|
|
2 # coding: utf-8
|
|
3
|
|
4 import sys, re
|
|
5
|
|
6 # max number of lines to read (for testing)
|
|
7 maxLinecnt = None
|
|
8
|
|
9 # do not output deleted nodes
|
|
10 omitDeleted = True
|
|
11
|
|
12 # active log levels for logging
|
49
|
13 #logLevels = {'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
|
43
|
14 #logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
|
49
|
15 logLevels = {'ERROR', 'SYSMSG'}
|
43
|
16
|
|
17 def log(level, message):
|
|
18 if level in logLevels:
|
|
19 print("%s: %s"%(level, message))
|
|
20
|
|
21
|
|
22 def prettyPrintNode(node):
|
|
23 nt = node['node-type']
|
|
24 att = ''
|
|
25 if nt == 'ENTITY':
|
|
26 att = " %s=%s "%('oc',node['object-class'])
|
|
27
|
|
28 elif nt == 'ATTRIBUTE':
|
|
29 att = " %s=%s "%('name',node['name'])
|
|
30
|
|
31 elif nt == 'RELATION':
|
|
32 att = " %s=%s "%('oc',node['object-class'])
|
|
33
|
|
34 s = "%s%s[%s]"%(nt, att, node)
|
|
35 return s
|
|
36
|
|
37
|
|
38 nodeCsvFieldList = ['node_type', 'id', 'row_id', 'object_class', 'user', 'public', 'type',
|
|
39 'version', 'modification_time', 'system_status', 'content_type',
|
|
40 'source_id', 'source_modif', 'source_obj_class', 'target_id', 'target_modif', 'target_obj_class',
|
|
41 'own_value_b64']
|
|
42
|
|
43 nodeCsvFieldMap = {'node_type':'node-type', 'id':'id', 'row_id':'row-id', 'object_class':'object-class', 'user':'user',
|
|
44 'public':'public', 'type':'type',
|
|
45 'version':'version', 'modification_time':'mtime', 'system_status':'system-status', 'content_type':'content-type',
|
|
46 'source_id':'source-id', 'source_modif':'source-mtime', 'source_obj_class':'source-oc',
|
|
47 'target_id':'target-id', 'target_modif':'target-mtime', 'target_obj_class':'target-oc',
|
|
48 'own_value_b64':'b64-value'}
|
|
49
|
|
50
|
|
51 def printHeaderCsv(outFile):
|
|
52 s = ""
|
|
53 for cf in nodeCsvFieldList:
|
|
54 if s == "":
|
|
55 s += "%s"%cf
|
|
56 else:
|
|
57 s += ",%s"%cf
|
|
58
|
|
59 print(s, file=outFile)
|
|
60
|
|
61
|
|
62 def printNodeCsv(node, outFile):
|
|
63 s = ""
|
|
64 for cf in nodeCsvFieldList:
|
|
65 nf = nodeCsvFieldMap[cf]
|
|
66 if s == "":
|
|
67 s += "%s"%(node.get(nf, ''))
|
|
68 else:
|
|
69 s += ",%s"%(node.get(nf, ''))
|
|
70
|
|
71 print(s, file=outFile)
|
|
72
|
|
73
|
44
|
74 def parseThread(line):
|
|
75 tm = re.match('(\d+-\d+-\d+ \d+:\d+:\d+),\d+ (\S+)', line)
|
43
|
76 if tm:
|
|
77 tstamp = tm.group(1)
|
44
|
78 tid = tm.group(2)
|
|
79
|
|
80 return {'time': tstamp, 'tid': tid}
|
|
81
|
|
82 return None
|
|
83
|
|
84
|
|
85 def parseStart(line):
|
|
86 pt = parseThread(line)
|
|
87 tstamp = pt['time']
|
|
88 tid = pt['tid']
|
43
|
89
|
|
90 sm = re.search('START Saving (\w+) id=(\w+)', line)
|
|
91 if sm:
|
44
|
92 return {'tid': tid, 'time': tstamp, 'oc': sm.group(1), 'id': sm.group(2)}
|
43
|
93
|
|
94 sm = re.search('START remove entity: user=\S+ entity=Entity\[rowId=\d+, id=(\d+)', line)
|
|
95 if sm:
|
44
|
96 return {'tid': tid, 'time': tstamp, 'id': sm.group(1)}
|
43
|
97
|
|
98 sm = re.search('END Saving (\w+) id=(\w+)', line)
|
|
99 if sm:
|
44
|
100 return {'tid': tid, 'time': tstamp, 'oc': sm.group(1), 'id': sm.group(2)}
|
43
|
101
|
|
102 sm = re.search('END remove entity: user=\S+ entity=Entity\[rowId=\d+, id=(\d+)', line)
|
|
103 if sm:
|
44
|
104 return {'tid': tid, 'time': tstamp, 'id': sm.group(1)}
|
43
|
105
|
|
106 return None
|
|
107
|
|
108
|
|
109 def parseSave(line):
|
|
110 match = re.search('([A-Z]+)\[([^\]]+)\]', line)
|
|
111 if match:
|
|
112 data = {'node-type': match.group(1)}
|
|
113 segs = match.group(2).split(', ')
|
|
114 for seg in segs:
|
|
115 k, v = seg.split('=', 1)
|
|
116
|
|
117 data[k] = v.strip('"')
|
|
118
|
|
119 # normalize attribute's name to object-class
|
|
120 if k == 'name':
|
|
121 data['object-class'] = v.strip('"')
|
|
122
|
|
123 return data
|
|
124
|
|
125 return None
|
|
126
|
|
127
|
|
128 def equalNodes(prev, cur):
|
|
129 #log("DEBUG", "compare: %s vs %s"%(prev, cur))
|
|
130 if prev['id'] != cur['id']:
|
|
131 log("INFO", "node id mismatch!")
|
|
132 return False
|
|
133
|
|
134 if prev['node-type'] != cur['node-type']:
|
|
135 log("INFO", "node node-type mismatch!")
|
|
136 return False
|
|
137
|
|
138 if prev.get('source-id', None) != cur.get('source-id', None):
|
|
139 log("INFO", "node source_id mismatch!")
|
|
140 return False
|
|
141
|
|
142 if prev.get('target-id', None) != cur.get('target-id', None):
|
|
143 log("INFO", "node target_id mismatch!")
|
|
144 return False
|
|
145
|
|
146 if prev['b64-value'] != cur['b64-value']:
|
|
147 log("INFO", "node ownvalue mismatch!")
|
|
148 return False
|
|
149
|
|
150 return True
|
|
151
|
|
152
|
|
153 def getSimilarNode(prev, curList):
|
|
154 nt = prev['node-type']
|
|
155 if nt == 'ATTRIBUTE':
|
|
156 for n in curList:
|
|
157 if n['node-type'] == 'ATTRIBUTE' \
|
|
158 and prev['name'] == n['name']:
|
|
159 # attribute with same name
|
|
160 log("DEBUG", "similar attributes: %s vs %s"%(prev, n))
|
|
161 return n
|
|
162
|
|
163 elif nt == 'RELATION':
|
|
164 for n in curList:
|
|
165 if n['node-type'] == 'RELATION' \
|
|
166 and prev['source-id'] == n['source-id'] \
|
|
167 and prev['target-id'] == n['target-id'] \
|
|
168 and prev['object-class'] == n['object-class']:
|
|
169 # relation with same source, target and type
|
|
170 log("DEBUG", "similar relations: %s vs %s"%(prev, n))
|
|
171 return n
|
|
172
|
|
173 return None
|
|
174
|
|
175
|
|
176 def compareNodeLists(prev, cur, ctx, lostFile=None):
|
|
177 """compare list of previous and current nodes.
|
|
178
|
|
179 prints results to the log and output file.
|
|
180 """
|
|
181 prevNodes = {}
|
|
182 curNodes = {}
|
|
183
|
|
184 #
|
|
185 # read nodes
|
|
186 #
|
|
187 for n in prev:
|
|
188 nid = n['id']
|
|
189 if nid not in prevNodes:
|
|
190 prevNodes[nid] = n
|
|
191 else:
|
|
192 log("DEBUG", "duplicate save of prev node id="+nid)
|
|
193 if isinstance(prevNodes[nid], list):
|
|
194 prevNodes[nid].append(n)
|
|
195 else:
|
|
196 prevNodes[nid] = [prevNodes[nid], n]
|
|
197
|
|
198 for n in cur:
|
|
199 nid = n['id']
|
|
200 if nid not in curNodes:
|
|
201 curNodes[nid] = n
|
|
202 else:
|
|
203 log("DEBUG", "duplicate save of cur node id="+nid)
|
|
204 if isinstance(curNodes[nid], list):
|
|
205 curNodes[nid].append(n)
|
|
206 else:
|
|
207 curNodes[nid] = [curNodes[nid], n]
|
|
208
|
|
209 #
|
|
210 # compare nodes
|
|
211 #
|
|
212 curNodeUnchecked = set(curNodes.keys())
|
|
213 addPrevNodes = []
|
|
214 addCurNodes = []
|
|
215
|
|
216 for nid in prevNodes:
|
|
217 prevNode = prevNodes[nid]
|
|
218 if isinstance(prevNode, list):
|
|
219 log("DEBUG", "multi-save prev node: %s"%prevNode)
|
|
220 # use the last version(?)
|
|
221 prevNode = prevNode[-1]
|
|
222
|
|
223 if nid not in curNodes:
|
|
224 if prevNode['node-type'] == 'ATTRIBUTE' and prevNode['b64-value'] == '':
|
|
225 # emtpy attribute - ignore
|
|
226 continue
|
|
227
|
|
228 else:
|
|
229 log("DEBUG", "node %s not in cur saves! %s"%(nid,prevNode))
|
|
230 addPrevNodes.append(prevNode)
|
|
231 continue
|
|
232
|
|
233 curNode = curNodes[nid]
|
|
234 if isinstance(curNode, list):
|
|
235 log("DEBUG", "multi-save cur node: %s"%curNode)
|
|
236 # use the last version?
|
|
237 curNode = curNode[-1]
|
|
238
|
|
239 equalNodes(prevNode, curNode)
|
|
240
|
|
241 curNodeUnchecked.remove(nid)
|
|
242
|
|
243 # make list of additional current (=new) nodes
|
|
244 for nid in curNodeUnchecked:
|
|
245 curNode = curNodes[nid]
|
|
246 # list can contain lists
|
|
247 if isinstance(curNode, list):
|
|
248 # use the last version(?)
|
|
249 curNode = curNode[-1]
|
|
250
|
|
251 addCurNodes.append(curNode)
|
|
252 log("DEBUG", "new node %s"%curNodes[nid])
|
|
253
|
|
254 # compare missing and new nodes
|
|
255 for n in addPrevNodes.copy():
|
|
256 sn = getSimilarNode(n, addCurNodes)
|
|
257 if sn is not None:
|
|
258 # similar is good enough
|
|
259 addPrevNodes.remove(n)
|
|
260 addCurNodes.remove(sn)
|
|
261
|
|
262 if len(addPrevNodes) > 0:
|
|
263 #print("ERROR: lost nodes: %s"%[prettyPrintNode(n) for n in addPrevNodes])
|
|
264 log("ERROR", "in %s"%ctx)
|
|
265 for n in addPrevNodes:
|
|
266 log("ERROR","lost node: %s"%prettyPrintNode(n))
|
|
267 if lostFile is not None:
|
|
268 printNodeCsv(n, lostFile)
|
|
269
|
|
270 if len(addCurNodes) > 0:
|
|
271 #print("INFO: new nodes: %s"%[prettyPrintNode(n) for n in addCurNodes])
|
|
272 for n in addCurNodes:
|
|
273 log("INFO", "new node: %s"%prettyPrintNode(n))
|
|
274
|
|
275
|
|
276
|
|
277 def analyseLogfile(inFilename, outFilename=None):
|
|
278 outFile = None
|
|
279 if outFilename is not None:
|
|
280 outFile = open(outFilename, mode='w')
|
|
281 printHeaderCsv(outFile)
|
|
282
|
|
283 with open(inFilename) as f:
|
|
284 linecnt = 0
|
49
|
285 savecnt = 0
|
43
|
286 saving = 0
|
|
287 openSaves = {}
|
|
288 savingPrev = 0
|
|
289 deleting = 0
|
|
290 saveCtx = None
|
|
291 deleteCtx = None
|
44
|
292 prevSaves = {}
|
|
293 saves = {}
|
43
|
294
|
|
295 for line in f:
|
|
296 linecnt += 1
|
|
297 #
|
|
298 # parsing lines in order of specific match, not order of operation!
|
|
299 #
|
|
300 if '*** START Saving' in line:
|
49
|
301 savecnt += 1
|
43
|
302 saving += 1
|
|
303 # make sure delete is off
|
|
304 deleting = 0
|
|
305 log('DEBUG', line)
|
|
306 # parse time and id
|
|
307 saveCtx = parseStart(line)
|
44
|
308 tid = saveCtx['tid']
|
|
309 saveId = "%s@%s"%(saveCtx['id'], tid)
|
43
|
310 if saveId in openSaves:
|
|
311 log('ERROR', "Duplicate save for same id! %s"%saveCtx)
|
|
312 # assume the first save was spurious
|
|
313 saving -= 1
|
|
314
|
|
315 openSaves[saveId] = saveCtx
|
|
316
|
|
317 if len(openSaves) > 1:
|
|
318 log("ERROR", "Multiple open save (%s) in #%s of %s"%(saving, linecnt, line))
|
|
319 # TODO: what now?
|
|
320
|
|
321 elif '*** END Saving' in line:
|
|
322 saving -= 1
|
|
323 saveCtx = parseStart(line)
|
44
|
324 tid = saveCtx['tid']
|
|
325 saveId = "%s@%s"%(saveCtx['id'], tid)
|
43
|
326 if saveId in openSaves:
|
|
327 log('DEBUG', "End save matches start save: %s"%saveCtx)
|
|
328 del openSaves[saveId]
|
|
329
|
44
|
330 elif "null@%s"%saveCtx['tid'] in openSaves:
|
43
|
331 log('DEBUG', "Assume end save (of %s) matches start save null: %s"%(len(openSaves), saveCtx))
|
44
|
332 del openSaves["null@%s"%saveCtx['tid']]
|
43
|
333
|
|
334 else:
|
|
335 log("ERROR", "End save without start save! %s"%saveCtx)
|
|
336
|
|
337 # make sure delete is off
|
|
338 deleting = 0
|
|
339 log('DEBUG', line)
|
|
340
|
|
341 if len(openSaves) > 0:
|
|
342 log('WARNING', "Still open saves (%s) during end save in #%s of %s"%(saving, linecnt, line))
|
|
343
|
|
344 elif saving < 0:
|
|
345 log('ERROR', "Too many END saves!")
|
|
346 break
|
|
347
|
44
|
348 if tid not in saves:
|
|
349 saves[tid] = []
|
|
350
|
|
351 if tid not in prevSaves:
|
|
352 prevSaves[tid] = []
|
|
353
|
43
|
354 log("INFO", "saving %s"%saveCtx)
|
44
|
355 log("INFO", "prev saves: %s"%len(prevSaves[tid]))
|
|
356 log("INFO", "saves: %s"%len(saves[tid]))
|
43
|
357
|
44
|
358 if len(prevSaves[tid]) > 0:
|
|
359 compareNodeLists(prevSaves[tid], saves[tid], saveCtx, outFile)
|
43
|
360
|
44
|
361 del prevSaves[tid]
|
|
362 del saves[tid]
|
43
|
363
|
|
364 elif '** START save entity' in line:
|
|
365 pass
|
|
366
|
|
367 elif '** END save entity' in line:
|
|
368 pass
|
|
369
|
|
370 elif '** START remove entity' in line:
|
|
371 deleting += 1
|
|
372 log('DEBUG', line)
|
|
373 deleteCtx = parseStart(line)
|
|
374
|
|
375 if deleting > 1:
|
|
376 log("ERROR", "Concurrent delete (%s) in #%s of %s"%(saving, linecnt, line))
|
|
377 # TODO: what now?
|
|
378 break
|
|
379
|
|
380 elif '** END remove entity' in line:
|
|
381 deleting -= 1
|
|
382 log('DEBUG', line)
|
|
383 ctx = parseStart(line)
|
|
384 if deleteCtx['id'] != ctx['id']:
|
|
385 log('ERROR', "END delete has different ID!")
|
|
386 break
|
|
387
|
|
388 if deleting > 1:
|
|
389 log("ERROR", "Concurrent delete (%s) in #%s of %s"%(saving, linecnt, line))
|
|
390 # TODO: what now?
|
|
391 break
|
|
392
|
|
393 deleteCtx = None
|
|
394
|
|
395 elif '* START save entity' in line:
|
|
396 pass
|
|
397
|
|
398 elif '* START save previous' in line:
|
|
399 savingPrev += 1
|
|
400
|
|
401 elif '* ...END save entity' in line:
|
|
402 pass
|
|
403
|
|
404 elif '* END ...save previous' in line:
|
|
405 savingPrev -= 1
|
|
406
|
50
|
407 if savingPrev < 0:
|
43
|
408 log("ERROR", "Too many END save previous!")
|
|
409
|
|
410 elif 'save previous' in line:
|
44
|
411 # check thread ID
|
|
412 td = parseThread(line)
|
|
413 tid = td['tid']
|
|
414
|
43
|
415 data = parseSave(line)
|
|
416 if data is None:
|
|
417 log("DEBUG", "Error parsing line: %s"%line)
|
|
418 continue
|
|
419
|
|
420 if omitDeleted and deleting > 0 and savingPrev > 0:
|
|
421 # this should be a save prev from deleting
|
|
422 delId = deleteCtx['id']
|
|
423 # check if node is related to deleted id
|
|
424 if data.get('id', None) == delId or data.get('source-id', None) == delId \
|
|
425 or data.get('target-id', None) == delId:
|
|
426 log('DEBUG', "intentionally deleted node: %s"%data)
|
|
427
|
|
428 else:
|
|
429 log('ERROR', "Node without matching id in delete! %s"%data)
|
44
|
430 if tid in prevSaves:
|
|
431 prevSaves[tid].append(data)
|
|
432
|
|
433 else:
|
|
434 prevSaves[tid] = [data]
|
43
|
435
|
|
436 else:
|
44
|
437 if tid in prevSaves:
|
|
438 prevSaves[tid].append(data)
|
|
439
|
|
440 else:
|
|
441 prevSaves[tid] = [data]
|
43
|
442
|
|
443 elif 'save' in line:
|
44
|
444 # check thread ID
|
|
445 td = parseThread(line)
|
|
446 tid = td['tid']
|
|
447
|
43
|
448 data = parseSave(line)
|
|
449 if data is None:
|
|
450 log("DEBUG", "Error parsing line: %s"%line)
|
|
451 continue
|
|
452
|
44
|
453 if tid in saves:
|
|
454 saves[tid].append(data)
|
|
455
|
|
456 else:
|
|
457 saves[tid] = [data]
|
43
|
458
|
|
459 if maxLinecnt is not None and linecnt >= maxLinecnt:
|
|
460 break
|
|
461
|
44
|
462 if len(saves) > 0:
|
|
463 log('ERROR', "%s unprocessed saves!"%len(saves))
|
|
464 log('DEBUG', saves)
|
|
465
|
|
466 if len(prevSaves) > 0:
|
|
467 log('ERROR', "%s unprocessed previous saves!"%len(prevSaves))
|
|
468 log('DEBUG', prevSaves)
|
|
469
|
49
|
470 log("SYSMSG", "%s form saves in %s lines of logfile"%(savecnt, linecnt))
|
43
|
471
|
|
472
|
|
473 #
|
|
474 # public static void main :-)
|
|
475 #
|
|
476
|
|
477 input_fn = None
|
|
478 output_fn = None
|
|
479
|
|
480 # parse command line parameters
|
|
481 if len(sys.argv) > 2:
|
|
482 input_fn = sys.argv[1]
|
|
483 output_fn = sys.argv[2]
|
|
484
|
|
485 # run analysis
|
|
486 analyseLogfile(input_fn, output_fn)
|
|
487
|
|
488 else:
|
|
489 print("ERROR: missing parameters!")
|
|
490 print("use: check_ismi_log logfile csvfile")
|
|
491 exit(1)
|