Mercurial > hg > drupalISMI
annotate importFromOpenMind/importer/check_ismi_log.py @ 41:5b3cd0b66b30
fix bug with multiple-saved nodes.
author | casties |
---|---|
date | Wed, 02 Nov 2016 16:56:27 +0100 |
parents | f38ca3eb1088 |
children | 4dee9586cc44 |
rev | line source |
---|---|
38 | 1 |
2 # coding: utf-8 | |
3 | |
40
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
4 import sys, re |
38 | 5 |
39 | 6 # max number of lines to read (for testing) |
38 | 7 maxLinecnt = None |
39 | 8 |
40
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
9 # do not output deleted nodes |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
10 omitDeleted = True |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
11 |
39 | 12 # active log levels for logging |
41 | 13 logLevels = {'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SYSMSG'} |
38 | 14 #logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'} |
41 | 15 #logLevels = {'ERROR', 'SYSMSG'} |
38 | 16 |
17 def log(level, message): | |
18 if level in logLevels: | |
19 print("%s: %s"%(level, message)) | |
20 | |
21 | |
22 def prettyPrintNode(node): | |
23 nt = node['node-type'] | |
24 att = '' | |
25 if nt == 'ENTITY': | |
26 att = " %s=%s "%('oc',node['object-class']) | |
27 | |
28 elif nt == 'ATTRIBUTE': | |
29 att = " %s=%s "%('name',node['name']) | |
30 | |
31 elif nt == 'RELATION': | |
39 | 32 att = " %s=%s "%('oc',node['object-class']) |
38 | 33 |
34 s = "%s%s[%s]"%(nt, att, node) | |
35 return s | |
36 | |
37 | |
39 | 38 nodeCsvFieldList = ['node_type', 'id', 'row_id', 'object_class', 'user', 'public', 'type', |
39 'version', 'modification_time', 'system_status', 'content_type', | |
40 'source_id', 'source_modif', 'source_obj_class', 'target_id', 'target_modif', 'target_obj_class', | |
41 'own_value_b64'] | |
42 | |
43 nodeCsvFieldMap = {'node_type':'node-type', 'id':'id', 'row_id':'row-id', 'object_class':'object-class', 'user':'user', | |
44 'public':'public', 'type':'type', | |
45 'version':'version', 'modification_time':'mtime', 'system_status':'system-status', 'content_type':'content-type', | |
46 'source_id':'source-id', 'source_modif':'source-mtime', 'source_obj_class':'source-oc', | |
47 'target_id':'target-id', 'target_modif':'target-mtime', 'target_obj_class':'target-oc', | |
48 'own_value_b64':'b64-value'} | |
49 | |
50 | |
51 def printHeaderCsv(outFile): | |
52 s = "" | |
53 for cf in nodeCsvFieldList: | |
54 if s == "": | |
55 s += "%s"%cf | |
56 else: | |
57 s += ",%s"%cf | |
58 | |
59 print(s, file=outFile) | |
60 | |
61 | |
62 def printNodeCsv(node, outFile): | |
63 s = "" | |
64 for cf in nodeCsvFieldList: | |
65 nf = nodeCsvFieldMap[cf] | |
66 if s == "": | |
67 s += "%s"%(node.get(nf, '')) | |
68 else: | |
69 s += ",%s"%(node.get(nf, '')) | |
70 | |
71 print(s, file=outFile) | |
72 | |
73 | |
38 | 74 def parseStart(line): |
75 tstamp = None | |
76 tm = re.match('(\d+-\d+-\d+ \d+:\d+:\d+)', line) | |
77 if tm: | |
78 tstamp = tm.group(1) | |
79 | |
80 sm = re.search('START Saving (\w+) \[ID=(\d*)', line) | |
81 if sm: | |
82 return {'time': tstamp, 'oc': sm.group(1), 'id': sm.group(2)} | |
83 | |
40
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
84 sm = re.search('Deleting entity \[ID=(\d*)', line) |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
85 if sm: |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
86 return {'time': tstamp, 'id': sm.group(1)} |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
87 |
38 | 88 return None |
89 | |
90 | |
91 def parseSave(line): | |
92 match = re.search('([A-Z]+)\[([^\]]+)\]', line) | |
93 if match: | |
94 data = {'node-type': match.group(1)} | |
95 segs = match.group(2).split(', ') | |
96 for seg in segs: | |
97 k, v = seg.split('=', 1) | |
39 | 98 |
99 # fix bug with relation's object-class parameter | |
100 if k == 'object_class': | |
101 k = 'object-class' | |
102 | |
38 | 103 data[k] = v.strip('"') |
104 | |
39 | 105 # normalize attriute's name to object-class |
106 if k == 'name': | |
107 data['object-class'] = v.strip('"') | |
108 | |
38 | 109 return data |
110 | |
111 return None | |
112 | |
113 | |
114 def equalNodes(prev, cur): | |
115 log("DEBUG", "compare: %s vs %s"%(prev, cur)) | |
116 if prev['id'] != cur['id']: | |
117 log("INFO", "node id mismatch!") | |
118 return False | |
119 | |
120 if prev['node-type'] != cur['node-type']: | |
121 log("INFO", "node node-type mismatch!") | |
122 return False | |
123 | |
124 if prev.get('source-id', None) != cur.get('source-id', None): | |
125 log("INFO", "node source_id mismatch!") | |
126 return False | |
127 | |
128 if prev.get('target-id', None) != cur.get('target-id', None): | |
129 log("INFO", "node target_id mismatch!") | |
130 return False | |
131 | |
132 if prev['b64-value'] != cur['b64-value']: | |
133 log("INFO", "node ownvalue mismatch!") | |
134 return False | |
135 | |
136 return True | |
137 | |
138 | |
139 def getSimilarNode(prev, curList): | |
140 nt = prev['node-type'] | |
141 if nt == 'ATTRIBUTE': | |
142 for n in curList: | |
143 if n['node-type'] == 'ATTRIBUTE' \ | |
144 and prev['name'] == n['name']: | |
145 # attribute with same name | |
146 log("DEBUG", "similar attributes: %s vs %s"%(prev, n)) | |
147 return n | |
148 | |
149 elif nt == 'RELATION': | |
150 for n in curList: | |
151 if n['node-type'] == 'RELATION' \ | |
152 and prev['source-id'] == n['source-id'] \ | |
153 and prev['target-id'] == n['target-id'] \ | |
39 | 154 and prev['object-class'] == n['object-class']: |
38 | 155 # relation with same source, target and type |
156 log("DEBUG", "similar relations: %s vs %s"%(prev, n)) | |
157 return n | |
158 | |
159 return None | |
160 | |
161 | |
39 | 162 def compareNodeLists(prev, cur, ctx, lostFile=None): |
41 | 163 """compare list of previous and current nodes. |
164 | |
165 prints results to the log and output file. | |
166 """ | |
38 | 167 prevNodes = {} |
168 curNodes = {} | |
169 | |
170 # | |
171 # read nodes | |
172 # | |
173 for n in prev: | |
174 nid = n['id'] | |
175 if nid not in prevNodes: | |
176 prevNodes[nid] = n | |
177 else: | |
178 log("DEBUG", "duplicate save of prev node id="+nid) | |
179 if isinstance(prevNodes[nid], list): | |
180 prevNodes[nid].append(n) | |
181 else: | |
182 prevNodes[nid] = [prevNodes[nid], n] | |
183 | |
184 for n in cur: | |
185 nid = n['id'] | |
186 if nid not in curNodes: | |
187 curNodes[nid] = n | |
188 else: | |
189 log("DEBUG", "duplicate save of cur node id="+nid) | |
190 if isinstance(curNodes[nid], list): | |
191 curNodes[nid].append(n) | |
192 else: | |
193 curNodes[nid] = [curNodes[nid], n] | |
194 | |
195 # | |
196 # compare nodes | |
197 # | |
198 curNodeUnchecked = set(curNodes.keys()) | |
199 addPrevNodes = [] | |
200 addCurNodes = [] | |
201 | |
202 for nid in prevNodes: | |
203 prevNode = prevNodes[nid] | |
204 if isinstance(prevNode, list): | |
205 log("DEBUG", "multi-save prev node: %s"%prevNode) | |
206 # use the last version(?) | |
207 prevNode = prevNode[-1] | |
208 | |
209 if nid not in curNodes: | |
210 if prevNode['node-type'] == 'ATTRIBUTE' and prevNode['b64-value'] == '': | |
211 # emtpy attribute - ignore | |
212 continue | |
213 | |
214 else: | |
215 log("DEBUG", "node %s not in cur saves! %s"%(nid,prevNode)) | |
216 addPrevNodes.append(prevNode) | |
217 continue | |
218 | |
219 curNode = curNodes[nid] | |
220 if isinstance(curNode, list): | |
221 log("DEBUG", "multi-save cur node: %s"%curNode) | |
222 # use the last version? | |
223 curNode = curNode[-1] | |
224 | |
225 equalNodes(prevNode, curNode) | |
226 | |
227 curNodeUnchecked.remove(nid) | |
228 | |
229 # make list of additional current (=new) nodes | |
230 for nid in curNodeUnchecked: | |
41 | 231 curNode = curNodes[nid] |
232 # list can contain lists | |
233 if isinstance(curNode, list): | |
234 # use the last version(?) | |
235 curNode = curNode[-1] | |
236 | |
237 addCurNodes.append(curNode) | |
38 | 238 log("DEBUG", "new node %s"%curNodes[nid]) |
239 | |
240 # compare missing and new nodes | |
241 for n in addPrevNodes.copy(): | |
242 sn = getSimilarNode(n, addCurNodes) | |
243 if sn is not None: | |
244 # similar is good enough | |
245 addPrevNodes.remove(n) | |
246 addCurNodes.remove(sn) | |
247 | |
248 if len(addPrevNodes) > 0: | |
249 #print("ERROR: lost nodes: %s"%[prettyPrintNode(n) for n in addPrevNodes]) | |
250 log("ERROR", "in %s"%ctx) | |
251 for n in addPrevNodes: | |
252 log("ERROR","lost node: %s"%prettyPrintNode(n)) | |
39 | 253 if lostFile is not None: |
254 printNodeCsv(n, lostFile) | |
38 | 255 |
256 if len(addCurNodes) > 0: | |
257 #print("INFO: new nodes: %s"%[prettyPrintNode(n) for n in addCurNodes]) | |
258 for n in addCurNodes: | |
259 log("INFO", "new node: %s"%prettyPrintNode(n)) | |
260 | |
261 | |
262 | |
39 | 263 def analyseLogfile(inFilename, outFilename=None): |
264 outFile = None | |
265 if outFilename is not None: | |
266 outFile = open(outFilename, mode='w') | |
267 printHeaderCsv(outFile) | |
268 | |
269 with open(inFilename) as f: | |
40
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
270 linecnt = 0 |
38 | 271 saving = 0 |
40
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
272 savingPrev = 0 |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
273 deleting = 0 |
38 | 274 saveCtx = None |
40
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
275 deleteCtx = None |
38 | 276 prevSaves = [] |
277 saves = [] | |
278 | |
279 for line in f: | |
280 linecnt += 1 | |
281 if '*************** START Saving' in line: | |
282 saving += 1 | |
40
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
283 # make sure delete is off |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
284 deleting = 0 |
38 | 285 log('DEBUG', line) |
40
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
286 # parse time and id |
38 | 287 saveCtx = parseStart(line) |
288 | |
289 if saving > 1: | |
290 log("ERROR", "Concurrent save (%s) in #%s of %s"%(saving, linecnt, line)) | |
291 # TODO: what now? | |
292 | |
40
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
293 elif 'Deleting entity' in line: |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
294 deleting += 1 |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
295 log('DEBUG', line) |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
296 deleteCtx = parseStart(line) |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
297 |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
298 if deleting > 1: |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
299 log("ERROR", "Concurrent delete (%s) in #%s of %s"%(saving, linecnt, line)) |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
300 # TODO: what now? |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
301 break |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
302 |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
303 elif 'transactionlog' in line: |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
304 if '* START save previous' in line: |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
305 savingPrev += 1 |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
306 |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
307 elif '* End ...save previous' in line: |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
308 if deleting > 0 and savingPrev > 0: |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
309 # this should be the end of the save prev from deleting |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
310 deleting -= 1 |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
311 deleteCtx = None |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
312 |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
313 savingPrev -= 1 |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
314 |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
315 if saving < 0: |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
316 log("ERROR", "Too many END save previous!") |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
317 |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
318 elif 'save previous' in line: |
38 | 319 data = parseSave(line) |
320 if data is None: | |
321 log("DEBUG", "Error parsing line: %s"%line) | |
322 continue | |
323 | |
40
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
324 if omitDeleted and deleting > 0 and savingPrev > 0: |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
325 # this should be a save prev from deleting |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
326 delId = deleteCtx['id'] |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
327 # check if node is related to deleted id |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
328 if data.get('id', None) == delId or data.get('source-id', None) == delId \ |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
329 or data.get('target-id', None) == delId: |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
330 log('DEBUG', "intentionally deleted node: %s"%data) |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
331 |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
332 else: |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
333 log('ERROR', "Node without matching id in delete! %s"%data) |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
334 prevSaves.append(data) |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
335 |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
336 else: |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
337 prevSaves.append(data) |
38 | 338 |
339 elif 'save' in line: | |
340 data = parseSave(line) | |
341 if data is None: | |
342 log("DEBUG", "Error parsing line: %s"%line) | |
343 continue | |
344 | |
345 saves.append(parseSave(line)) | |
40
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
346 |
38 | 347 elif '*************** END Saving' in line: |
348 saving -= 1 | |
40
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
349 # make sure delete is off |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
350 deleting = 0 |
38 | 351 log('DEBUG', line) |
352 | |
353 if saving > 0: | |
354 log("ERROR", "Concurrent end save (%s) in #%s of %s"%(saving, linecnt, line)) | |
355 | |
356 elif saving < 0: | |
357 log("ERROR", "Too many END saves!") | |
358 break | |
359 | |
360 log("INFO", "saving %s"%saveCtx) | |
361 log("INFO", "prev saves: %s"%len(prevSaves)) | |
362 log("INFO", "saves: %s"%len(saves)) | |
363 | |
364 if len(prevSaves) > 0: | |
39 | 365 compareNodeLists(prevSaves, saves, saveCtx, outFile) |
38 | 366 |
367 prevSaves = [] | |
368 saves = [] | |
369 | |
370 if maxLinecnt is not None and linecnt >= maxLinecnt: | |
371 break | |
372 | |
373 log("SYSMSG", "%s lines of logfile scanned"%linecnt) | |
374 | |
40
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
375 |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
376 # |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
377 # public static void main :-) |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
378 # |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
379 |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
380 input_fn = None |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
381 output_fn = None |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
382 |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
383 # parse command line parameters |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
384 if len(sys.argv) > 2: |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
385 input_fn = sys.argv[1] |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
386 output_fn = sys.argv[2] |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
387 |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
388 # run analysis |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
389 analyseLogfile(input_fn, output_fn) |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
390 |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
391 else: |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
392 print("ERROR: missing parameters!") |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
393 print("use: check_ismi_log logfile csvfile") |
f38ca3eb1088
check_ismi_log analyser ignores deleted entities now.
casties
parents:
39
diff
changeset
|
394 exit(1) |