43
|
1
|
|
2 # coding: utf-8
|
|
3
|
|
4 import sys, re
|
|
5
|
|
6 # max number of lines to read (for testing)
|
|
7 maxLinecnt = None
|
|
8
|
|
9 # do not output deleted nodes
|
|
10 omitDeleted = True
|
|
11
|
|
12 # active log levels for logging
|
|
13 logLevels = {'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
|
|
14 #logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
|
|
15 #logLevels = {'ERROR', 'SYSMSG'}
|
|
16
|
|
17 def log(level, message):
|
|
18 if level in logLevels:
|
|
19 print("%s: %s"%(level, message))
|
|
20
|
|
21
|
|
22 def prettyPrintNode(node):
|
|
23 nt = node['node-type']
|
|
24 att = ''
|
|
25 if nt == 'ENTITY':
|
|
26 att = " %s=%s "%('oc',node['object-class'])
|
|
27
|
|
28 elif nt == 'ATTRIBUTE':
|
|
29 att = " %s=%s "%('name',node['name'])
|
|
30
|
|
31 elif nt == 'RELATION':
|
|
32 att = " %s=%s "%('oc',node['object-class'])
|
|
33
|
|
34 s = "%s%s[%s]"%(nt, att, node)
|
|
35 return s
|
|
36
|
|
37
|
|
38 nodeCsvFieldList = ['node_type', 'id', 'row_id', 'object_class', 'user', 'public', 'type',
|
|
39 'version', 'modification_time', 'system_status', 'content_type',
|
|
40 'source_id', 'source_modif', 'source_obj_class', 'target_id', 'target_modif', 'target_obj_class',
|
|
41 'own_value_b64']
|
|
42
|
|
43 nodeCsvFieldMap = {'node_type':'node-type', 'id':'id', 'row_id':'row-id', 'object_class':'object-class', 'user':'user',
|
|
44 'public':'public', 'type':'type',
|
|
45 'version':'version', 'modification_time':'mtime', 'system_status':'system-status', 'content_type':'content-type',
|
|
46 'source_id':'source-id', 'source_modif':'source-mtime', 'source_obj_class':'source-oc',
|
|
47 'target_id':'target-id', 'target_modif':'target-mtime', 'target_obj_class':'target-oc',
|
|
48 'own_value_b64':'b64-value'}
|
|
49
|
|
50
|
|
51 def printHeaderCsv(outFile):
|
|
52 s = ""
|
|
53 for cf in nodeCsvFieldList:
|
|
54 if s == "":
|
|
55 s += "%s"%cf
|
|
56 else:
|
|
57 s += ",%s"%cf
|
|
58
|
|
59 print(s, file=outFile)
|
|
60
|
|
61
|
|
62 def printNodeCsv(node, outFile):
|
|
63 s = ""
|
|
64 for cf in nodeCsvFieldList:
|
|
65 nf = nodeCsvFieldMap[cf]
|
|
66 if s == "":
|
|
67 s += "%s"%(node.get(nf, ''))
|
|
68 else:
|
|
69 s += ",%s"%(node.get(nf, ''))
|
|
70
|
|
71 print(s, file=outFile)
|
|
72
|
|
73
|
|
74 def parseStart(line):
|
|
75 tstamp = None
|
|
76 tm = re.match('(\d+-\d+-\d+ \d+:\d+:\d+)', line)
|
|
77 if tm:
|
|
78 tstamp = tm.group(1)
|
|
79
|
|
80 sm = re.search('START Saving (\w+) id=(\w+)', line)
|
|
81 if sm:
|
|
82 return {'time': tstamp, 'oc': sm.group(1), 'id': sm.group(2)}
|
|
83
|
|
84 sm = re.search('START remove entity: user=\S+ entity=Entity\[rowId=\d+, id=(\d+)', line)
|
|
85 if sm:
|
|
86 return {'time': tstamp, 'id': sm.group(1)}
|
|
87
|
|
88 sm = re.search('END Saving (\w+) id=(\w+)', line)
|
|
89 if sm:
|
|
90 return {'time': tstamp, 'oc': sm.group(1), 'id': sm.group(2)}
|
|
91
|
|
92 sm = re.search('END remove entity: user=\S+ entity=Entity\[rowId=\d+, id=(\d+)', line)
|
|
93 if sm:
|
|
94 return {'time': tstamp, 'id': sm.group(1)}
|
|
95
|
|
96 return None
|
|
97
|
|
98
|
|
99 def parseSave(line):
|
|
100 match = re.search('([A-Z]+)\[([^\]]+)\]', line)
|
|
101 if match:
|
|
102 data = {'node-type': match.group(1)}
|
|
103 segs = match.group(2).split(', ')
|
|
104 for seg in segs:
|
|
105 k, v = seg.split('=', 1)
|
|
106
|
|
107 data[k] = v.strip('"')
|
|
108
|
|
109 # normalize attribute's name to object-class
|
|
110 if k == 'name':
|
|
111 data['object-class'] = v.strip('"')
|
|
112
|
|
113 return data
|
|
114
|
|
115 return None
|
|
116
|
|
117
|
|
118 def equalNodes(prev, cur):
|
|
119 #log("DEBUG", "compare: %s vs %s"%(prev, cur))
|
|
120 if prev['id'] != cur['id']:
|
|
121 log("INFO", "node id mismatch!")
|
|
122 return False
|
|
123
|
|
124 if prev['node-type'] != cur['node-type']:
|
|
125 log("INFO", "node node-type mismatch!")
|
|
126 return False
|
|
127
|
|
128 if prev.get('source-id', None) != cur.get('source-id', None):
|
|
129 log("INFO", "node source_id mismatch!")
|
|
130 return False
|
|
131
|
|
132 if prev.get('target-id', None) != cur.get('target-id', None):
|
|
133 log("INFO", "node target_id mismatch!")
|
|
134 return False
|
|
135
|
|
136 if prev['b64-value'] != cur['b64-value']:
|
|
137 log("INFO", "node ownvalue mismatch!")
|
|
138 return False
|
|
139
|
|
140 return True
|
|
141
|
|
142
|
|
143 def getSimilarNode(prev, curList):
|
|
144 nt = prev['node-type']
|
|
145 if nt == 'ATTRIBUTE':
|
|
146 for n in curList:
|
|
147 if n['node-type'] == 'ATTRIBUTE' \
|
|
148 and prev['name'] == n['name']:
|
|
149 # attribute with same name
|
|
150 log("DEBUG", "similar attributes: %s vs %s"%(prev, n))
|
|
151 return n
|
|
152
|
|
153 elif nt == 'RELATION':
|
|
154 for n in curList:
|
|
155 if n['node-type'] == 'RELATION' \
|
|
156 and prev['source-id'] == n['source-id'] \
|
|
157 and prev['target-id'] == n['target-id'] \
|
|
158 and prev['object-class'] == n['object-class']:
|
|
159 # relation with same source, target and type
|
|
160 log("DEBUG", "similar relations: %s vs %s"%(prev, n))
|
|
161 return n
|
|
162
|
|
163 return None
|
|
164
|
|
165
|
|
166 def compareNodeLists(prev, cur, ctx, lostFile=None):
|
|
167 """compare list of previous and current nodes.
|
|
168
|
|
169 prints results to the log and output file.
|
|
170 """
|
|
171 prevNodes = {}
|
|
172 curNodes = {}
|
|
173
|
|
174 #
|
|
175 # read nodes
|
|
176 #
|
|
177 for n in prev:
|
|
178 nid = n['id']
|
|
179 if nid not in prevNodes:
|
|
180 prevNodes[nid] = n
|
|
181 else:
|
|
182 log("DEBUG", "duplicate save of prev node id="+nid)
|
|
183 if isinstance(prevNodes[nid], list):
|
|
184 prevNodes[nid].append(n)
|
|
185 else:
|
|
186 prevNodes[nid] = [prevNodes[nid], n]
|
|
187
|
|
188 for n in cur:
|
|
189 nid = n['id']
|
|
190 if nid not in curNodes:
|
|
191 curNodes[nid] = n
|
|
192 else:
|
|
193 log("DEBUG", "duplicate save of cur node id="+nid)
|
|
194 if isinstance(curNodes[nid], list):
|
|
195 curNodes[nid].append(n)
|
|
196 else:
|
|
197 curNodes[nid] = [curNodes[nid], n]
|
|
198
|
|
199 #
|
|
200 # compare nodes
|
|
201 #
|
|
202 curNodeUnchecked = set(curNodes.keys())
|
|
203 addPrevNodes = []
|
|
204 addCurNodes = []
|
|
205
|
|
206 for nid in prevNodes:
|
|
207 prevNode = prevNodes[nid]
|
|
208 if isinstance(prevNode, list):
|
|
209 log("DEBUG", "multi-save prev node: %s"%prevNode)
|
|
210 # use the last version(?)
|
|
211 prevNode = prevNode[-1]
|
|
212
|
|
213 if nid not in curNodes:
|
|
214 if prevNode['node-type'] == 'ATTRIBUTE' and prevNode['b64-value'] == '':
|
|
215 # emtpy attribute - ignore
|
|
216 continue
|
|
217
|
|
218 else:
|
|
219 log("DEBUG", "node %s not in cur saves! %s"%(nid,prevNode))
|
|
220 addPrevNodes.append(prevNode)
|
|
221 continue
|
|
222
|
|
223 curNode = curNodes[nid]
|
|
224 if isinstance(curNode, list):
|
|
225 log("DEBUG", "multi-save cur node: %s"%curNode)
|
|
226 # use the last version?
|
|
227 curNode = curNode[-1]
|
|
228
|
|
229 equalNodes(prevNode, curNode)
|
|
230
|
|
231 curNodeUnchecked.remove(nid)
|
|
232
|
|
233 # make list of additional current (=new) nodes
|
|
234 for nid in curNodeUnchecked:
|
|
235 curNode = curNodes[nid]
|
|
236 # list can contain lists
|
|
237 if isinstance(curNode, list):
|
|
238 # use the last version(?)
|
|
239 curNode = curNode[-1]
|
|
240
|
|
241 addCurNodes.append(curNode)
|
|
242 log("DEBUG", "new node %s"%curNodes[nid])
|
|
243
|
|
244 # compare missing and new nodes
|
|
245 for n in addPrevNodes.copy():
|
|
246 sn = getSimilarNode(n, addCurNodes)
|
|
247 if sn is not None:
|
|
248 # similar is good enough
|
|
249 addPrevNodes.remove(n)
|
|
250 addCurNodes.remove(sn)
|
|
251
|
|
252 if len(addPrevNodes) > 0:
|
|
253 #print("ERROR: lost nodes: %s"%[prettyPrintNode(n) for n in addPrevNodes])
|
|
254 log("ERROR", "in %s"%ctx)
|
|
255 for n in addPrevNodes:
|
|
256 log("ERROR","lost node: %s"%prettyPrintNode(n))
|
|
257 if lostFile is not None:
|
|
258 printNodeCsv(n, lostFile)
|
|
259
|
|
260 if len(addCurNodes) > 0:
|
|
261 #print("INFO: new nodes: %s"%[prettyPrintNode(n) for n in addCurNodes])
|
|
262 for n in addCurNodes:
|
|
263 log("INFO", "new node: %s"%prettyPrintNode(n))
|
|
264
|
|
265
|
|
266
|
|
267 def analyseLogfile(inFilename, outFilename=None):
|
|
268 outFile = None
|
|
269 if outFilename is not None:
|
|
270 outFile = open(outFilename, mode='w')
|
|
271 printHeaderCsv(outFile)
|
|
272
|
|
273 with open(inFilename) as f:
|
|
274 linecnt = 0
|
|
275 saving = 0
|
|
276 openSaves = {}
|
|
277 savingPrev = 0
|
|
278 deleting = 0
|
|
279 saveCtx = None
|
|
280 deleteCtx = None
|
|
281 prevSaves = []
|
|
282 saves = []
|
|
283
|
|
284 for line in f:
|
|
285 linecnt += 1
|
|
286 #
|
|
287 # parsing lines in order of specific match, not order of operation!
|
|
288 #
|
|
289 if '*** START Saving' in line:
|
|
290 saving += 1
|
|
291 # make sure delete is off
|
|
292 deleting = 0
|
|
293 log('DEBUG', line)
|
|
294 # parse time and id
|
|
295 saveCtx = parseStart(line)
|
|
296 saveId = saveCtx['id']
|
|
297 if saveId in openSaves:
|
|
298 log('ERROR', "Duplicate save for same id! %s"%saveCtx)
|
|
299 # assume the first save was spurious
|
|
300 saving -= 1
|
|
301
|
|
302 openSaves[saveId] = saveCtx
|
|
303
|
|
304 if len(openSaves) > 1:
|
|
305 log("ERROR", "Multiple open save (%s) in #%s of %s"%(saving, linecnt, line))
|
|
306 # TODO: what now?
|
|
307
|
|
308 elif '*** END Saving' in line:
|
|
309 saving -= 1
|
|
310 saveCtx = parseStart(line)
|
|
311 saveId = saveCtx['id']
|
|
312 if saveId in openSaves:
|
|
313 log('DEBUG', "End save matches start save: %s"%saveCtx)
|
|
314 del openSaves[saveId]
|
|
315
|
|
316 elif 'null' in openSaves:
|
|
317 log('DEBUG', "Assume end save (of %s) matches start save null: %s"%(len(openSaves), saveCtx))
|
|
318 del openSaves['null']
|
|
319
|
|
320 else:
|
|
321 log("ERROR", "End save without start save! %s"%saveCtx)
|
|
322
|
|
323 # make sure delete is off
|
|
324 deleting = 0
|
|
325 log('DEBUG', line)
|
|
326
|
|
327 if len(openSaves) > 0:
|
|
328 log('WARNING', "Still open saves (%s) during end save in #%s of %s"%(saving, linecnt, line))
|
|
329
|
|
330 elif saving < 0:
|
|
331 log('ERROR', "Too many END saves!")
|
|
332 break
|
|
333
|
|
334 log("INFO", "saving %s"%saveCtx)
|
|
335 log("INFO", "prev saves: %s"%len(prevSaves))
|
|
336 log("INFO", "saves: %s"%len(saves))
|
|
337
|
|
338 if len(prevSaves) > 0:
|
|
339 compareNodeLists(prevSaves, saves, saveCtx, outFile)
|
|
340
|
|
341 prevSaves = []
|
|
342 saves = []
|
|
343
|
|
344 elif '** START save entity' in line:
|
|
345 pass
|
|
346
|
|
347 elif '** END save entity' in line:
|
|
348 pass
|
|
349
|
|
350 elif '** START remove entity' in line:
|
|
351 deleting += 1
|
|
352 log('DEBUG', line)
|
|
353 deleteCtx = parseStart(line)
|
|
354
|
|
355 if deleting > 1:
|
|
356 log("ERROR", "Concurrent delete (%s) in #%s of %s"%(saving, linecnt, line))
|
|
357 # TODO: what now?
|
|
358 break
|
|
359
|
|
360 elif '** END remove entity' in line:
|
|
361 deleting -= 1
|
|
362 log('DEBUG', line)
|
|
363 ctx = parseStart(line)
|
|
364 if deleteCtx['id'] != ctx['id']:
|
|
365 log('ERROR', "END delete has different ID!")
|
|
366 break
|
|
367
|
|
368 if deleting > 1:
|
|
369 log("ERROR", "Concurrent delete (%s) in #%s of %s"%(saving, linecnt, line))
|
|
370 # TODO: what now?
|
|
371 break
|
|
372
|
|
373 deleteCtx = None
|
|
374
|
|
375 elif '* START save entity' in line:
|
|
376 pass
|
|
377
|
|
378 elif '* START save previous' in line:
|
|
379 savingPrev += 1
|
|
380
|
|
381 elif '* ...END save entity' in line:
|
|
382 pass
|
|
383
|
|
384 elif '* END ...save previous' in line:
|
|
385 savingPrev -= 1
|
|
386
|
|
387 if saving < 0:
|
|
388 log("ERROR", "Too many END save previous!")
|
|
389
|
|
390 elif 'save previous' in line:
|
|
391 data = parseSave(line)
|
|
392 if data is None:
|
|
393 log("DEBUG", "Error parsing line: %s"%line)
|
|
394 continue
|
|
395
|
|
396 if omitDeleted and deleting > 0 and savingPrev > 0:
|
|
397 # this should be a save prev from deleting
|
|
398 delId = deleteCtx['id']
|
|
399 # check if node is related to deleted id
|
|
400 if data.get('id', None) == delId or data.get('source-id', None) == delId \
|
|
401 or data.get('target-id', None) == delId:
|
|
402 log('DEBUG', "intentionally deleted node: %s"%data)
|
|
403
|
|
404 else:
|
|
405 log('ERROR', "Node without matching id in delete! %s"%data)
|
|
406 prevSaves.append(data)
|
|
407
|
|
408 else:
|
|
409 prevSaves.append(data)
|
|
410
|
|
411 elif 'save' in line:
|
|
412 data = parseSave(line)
|
|
413 if data is None:
|
|
414 log("DEBUG", "Error parsing line: %s"%line)
|
|
415 continue
|
|
416
|
|
417 saves.append(parseSave(line))
|
|
418
|
|
419 if maxLinecnt is not None and linecnt >= maxLinecnt:
|
|
420 break
|
|
421
|
|
422 log("SYSMSG", "%s lines of logfile scanned"%linecnt)
|
|
423
|
|
424
|
|
425 #
|
|
426 # public static void main :-)
|
|
427 #
|
|
428
|
|
429 input_fn = None
|
|
430 output_fn = None
|
|
431
|
|
432 # parse command line parameters
|
|
433 if len(sys.argv) > 2:
|
|
434 input_fn = sys.argv[1]
|
|
435 output_fn = sys.argv[2]
|
|
436
|
|
437 # run analysis
|
|
438 analyseLogfile(input_fn, output_fn)
|
|
439
|
|
440 else:
|
|
441 print("ERROR: missing parameters!")
|
|
442 print("use: check_ismi_log logfile csvfile")
|
|
443 exit(1)
|