38
|
1
|
|
2 # coding: utf-8
|
|
3
|
|
4 import re
|
|
5
|
39
|
6 # max number of lines to read (for testing)
|
38
|
7 maxLinecnt = None
|
39
|
8
|
|
9 # active log levels for logging
|
38
|
10 #logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
|
|
11 logLevels = {'ERROR', 'SYSMSG'}
|
|
12
|
|
13 def log(level, message):
|
|
14 if level in logLevels:
|
|
15 print("%s: %s"%(level, message))
|
|
16
|
|
17
|
|
18 def prettyPrintNode(node):
|
|
19 nt = node['node-type']
|
|
20 att = ''
|
|
21 if nt == 'ENTITY':
|
|
22 att = " %s=%s "%('oc',node['object-class'])
|
|
23
|
|
24 elif nt == 'ATTRIBUTE':
|
|
25 att = " %s=%s "%('name',node['name'])
|
|
26
|
|
27 elif nt == 'RELATION':
|
39
|
28 att = " %s=%s "%('oc',node['object-class'])
|
38
|
29
|
|
30 s = "%s%s[%s]"%(nt, att, node)
|
|
31 return s
|
|
32
|
|
33
|
39
|
34 nodeCsvFieldList = ['node_type', 'id', 'row_id', 'object_class', 'user', 'public', 'type',
|
|
35 'version', 'modification_time', 'system_status', 'content_type',
|
|
36 'source_id', 'source_modif', 'source_obj_class', 'target_id', 'target_modif', 'target_obj_class',
|
|
37 'own_value_b64']
|
|
38
|
|
39 nodeCsvFieldMap = {'node_type':'node-type', 'id':'id', 'row_id':'row-id', 'object_class':'object-class', 'user':'user',
|
|
40 'public':'public', 'type':'type',
|
|
41 'version':'version', 'modification_time':'mtime', 'system_status':'system-status', 'content_type':'content-type',
|
|
42 'source_id':'source-id', 'source_modif':'source-mtime', 'source_obj_class':'source-oc',
|
|
43 'target_id':'target-id', 'target_modif':'target-mtime', 'target_obj_class':'target-oc',
|
|
44 'own_value_b64':'b64-value'}
|
|
45
|
|
46
|
|
47 def printHeaderCsv(outFile):
|
|
48 s = ""
|
|
49 for cf in nodeCsvFieldList:
|
|
50 if s == "":
|
|
51 s += "%s"%cf
|
|
52 else:
|
|
53 s += ",%s"%cf
|
|
54
|
|
55 print(s, file=outFile)
|
|
56
|
|
57
|
|
58 def printNodeCsv(node, outFile):
|
|
59 s = ""
|
|
60 for cf in nodeCsvFieldList:
|
|
61 nf = nodeCsvFieldMap[cf]
|
|
62 if s == "":
|
|
63 s += "%s"%(node.get(nf, ''))
|
|
64 else:
|
|
65 s += ",%s"%(node.get(nf, ''))
|
|
66
|
|
67 print(s, file=outFile)
|
|
68
|
|
69
|
38
|
70 def parseStart(line):
|
|
71 tstamp = None
|
|
72 tm = re.match('(\d+-\d+-\d+ \d+:\d+:\d+)', line)
|
|
73 if tm:
|
|
74 tstamp = tm.group(1)
|
|
75
|
|
76 sm = re.search('START Saving (\w+) \[ID=(\d*)', line)
|
|
77 if sm:
|
|
78 return {'time': tstamp, 'oc': sm.group(1), 'id': sm.group(2)}
|
|
79
|
|
80 return None
|
|
81
|
|
82
|
|
83 def parseSave(line):
|
|
84 match = re.search('([A-Z]+)\[([^\]]+)\]', line)
|
|
85 if match:
|
|
86 data = {'node-type': match.group(1)}
|
|
87 segs = match.group(2).split(', ')
|
|
88 for seg in segs:
|
|
89 k, v = seg.split('=', 1)
|
39
|
90
|
|
91 # fix bug with relation's object-class parameter
|
|
92 if k == 'object_class':
|
|
93 k = 'object-class'
|
|
94
|
38
|
95 data[k] = v.strip('"')
|
|
96
|
39
|
97 # normalize attriute's name to object-class
|
|
98 if k == 'name':
|
|
99 data['object-class'] = v.strip('"')
|
|
100
|
38
|
101 return data
|
|
102
|
|
103 return None
|
|
104
|
|
105
|
|
106 def equalNodes(prev, cur):
|
|
107 log("DEBUG", "compare: %s vs %s"%(prev, cur))
|
|
108 if prev['id'] != cur['id']:
|
|
109 log("INFO", "node id mismatch!")
|
|
110 return False
|
|
111
|
|
112 if prev['node-type'] != cur['node-type']:
|
|
113 log("INFO", "node node-type mismatch!")
|
|
114 return False
|
|
115
|
|
116 if prev.get('source-id', None) != cur.get('source-id', None):
|
|
117 log("INFO", "node source_id mismatch!")
|
|
118 return False
|
|
119
|
|
120 if prev.get('target-id', None) != cur.get('target-id', None):
|
|
121 log("INFO", "node target_id mismatch!")
|
|
122 return False
|
|
123
|
|
124 if prev['b64-value'] != cur['b64-value']:
|
|
125 log("INFO", "node ownvalue mismatch!")
|
|
126 return False
|
|
127
|
|
128 return True
|
|
129
|
|
130
|
|
131 def getSimilarNode(prev, curList):
|
|
132 nt = prev['node-type']
|
|
133 if nt == 'ATTRIBUTE':
|
|
134 for n in curList:
|
|
135 if n['node-type'] == 'ATTRIBUTE' \
|
|
136 and prev['name'] == n['name']:
|
|
137 # attribute with same name
|
|
138 log("DEBUG", "similar attributes: %s vs %s"%(prev, n))
|
|
139 return n
|
|
140
|
|
141 elif nt == 'RELATION':
|
|
142 for n in curList:
|
|
143 if n['node-type'] == 'RELATION' \
|
|
144 and prev['source-id'] == n['source-id'] \
|
|
145 and prev['target-id'] == n['target-id'] \
|
39
|
146 and prev['object-class'] == n['object-class']:
|
38
|
147 # relation with same source, target and type
|
|
148 log("DEBUG", "similar relations: %s vs %s"%(prev, n))
|
|
149 return n
|
|
150
|
|
151 return None
|
|
152
|
|
153
|
39
|
154 def compareNodeLists(prev, cur, ctx, lostFile=None):
|
38
|
155 prevNodes = {}
|
|
156 curNodes = {}
|
|
157
|
|
158 #
|
|
159 # read nodes
|
|
160 #
|
|
161 for n in prev:
|
|
162 nid = n['id']
|
|
163 if nid not in prevNodes:
|
|
164 prevNodes[nid] = n
|
|
165 else:
|
|
166 log("DEBUG", "duplicate save of prev node id="+nid)
|
|
167 if isinstance(prevNodes[nid], list):
|
|
168 prevNodes[nid].append(n)
|
|
169 else:
|
|
170 prevNodes[nid] = [prevNodes[nid], n]
|
|
171
|
|
172 for n in cur:
|
|
173 nid = n['id']
|
|
174 if nid not in curNodes:
|
|
175 curNodes[nid] = n
|
|
176 else:
|
|
177 log("DEBUG", "duplicate save of cur node id="+nid)
|
|
178 if isinstance(curNodes[nid], list):
|
|
179 curNodes[nid].append(n)
|
|
180 else:
|
|
181 curNodes[nid] = [curNodes[nid], n]
|
|
182
|
|
183 #
|
|
184 # compare nodes
|
|
185 #
|
|
186 curNodeUnchecked = set(curNodes.keys())
|
|
187 addPrevNodes = []
|
|
188 addCurNodes = []
|
|
189
|
|
190 for nid in prevNodes:
|
|
191 prevNode = prevNodes[nid]
|
|
192 if isinstance(prevNode, list):
|
|
193 log("DEBUG", "multi-save prev node: %s"%prevNode)
|
|
194 # use the last version(?)
|
|
195 prevNode = prevNode[-1]
|
|
196
|
|
197 if nid not in curNodes:
|
|
198 if prevNode['node-type'] == 'ATTRIBUTE' and prevNode['b64-value'] == '':
|
|
199 # emtpy attribute - ignore
|
|
200 continue
|
|
201
|
|
202 else:
|
|
203 log("DEBUG", "node %s not in cur saves! %s"%(nid,prevNode))
|
|
204 addPrevNodes.append(prevNode)
|
|
205 continue
|
|
206
|
|
207 curNode = curNodes[nid]
|
|
208 if isinstance(curNode, list):
|
|
209 log("DEBUG", "multi-save cur node: %s"%curNode)
|
|
210 # use the last version?
|
|
211 curNode = curNode[-1]
|
|
212
|
|
213 equalNodes(prevNode, curNode)
|
|
214
|
|
215 curNodeUnchecked.remove(nid)
|
|
216
|
|
217 # make list of additional current (=new) nodes
|
|
218 for nid in curNodeUnchecked:
|
|
219 addCurNodes.append(curNodes[nid])
|
|
220 log("DEBUG", "new node %s"%curNodes[nid])
|
|
221
|
|
222 # compare missing and new nodes
|
|
223 for n in addPrevNodes.copy():
|
|
224 sn = getSimilarNode(n, addCurNodes)
|
|
225 if sn is not None:
|
|
226 # similar is good enough
|
|
227 addPrevNodes.remove(n)
|
|
228 addCurNodes.remove(sn)
|
|
229
|
|
230 if len(addPrevNodes) > 0:
|
|
231 #print("ERROR: lost nodes: %s"%[prettyPrintNode(n) for n in addPrevNodes])
|
|
232 log("ERROR", "in %s"%ctx)
|
|
233 for n in addPrevNodes:
|
|
234 log("ERROR","lost node: %s"%prettyPrintNode(n))
|
39
|
235 if lostFile is not None:
|
|
236 printNodeCsv(n, lostFile)
|
38
|
237
|
|
238 if len(addCurNodes) > 0:
|
|
239 #print("INFO: new nodes: %s"%[prettyPrintNode(n) for n in addCurNodes])
|
|
240 for n in addCurNodes:
|
|
241 log("INFO", "new node: %s"%prettyPrintNode(n))
|
|
242
|
|
243
|
|
244
|
39
|
245 def analyseLogfile(inFilename, outFilename=None):
|
|
246 outFile = None
|
|
247 if outFilename is not None:
|
|
248 outFile = open(outFilename, mode='w')
|
|
249 printHeaderCsv(outFile)
|
|
250
|
|
251 with open(inFilename) as f:
|
38
|
252 saving = 0
|
|
253 linecnt = 0
|
|
254 saveCtx = None
|
|
255 prevSaves = []
|
|
256 saves = []
|
|
257
|
|
258 for line in f:
|
|
259 linecnt += 1
|
|
260 if '*************** START Saving' in line:
|
|
261 saving += 1
|
|
262 log('DEBUG', line)
|
|
263 saveCtx = parseStart(line)
|
|
264
|
|
265 if saving > 1:
|
|
266 log("ERROR", "Concurrent save (%s) in #%s of %s"%(saving, linecnt, line))
|
|
267 # TODO: what now?
|
|
268
|
|
269 elif 'INFO transactionlog' in line:
|
|
270 if 'save previous' in line:
|
|
271 data = parseSave(line)
|
|
272 if data is None:
|
|
273 log("DEBUG", "Error parsing line: %s"%line)
|
|
274 continue
|
|
275
|
|
276 prevSaves.append(data)
|
|
277
|
|
278 elif 'save' in line:
|
|
279 data = parseSave(line)
|
|
280 if data is None:
|
|
281 log("DEBUG", "Error parsing line: %s"%line)
|
|
282 continue
|
|
283
|
|
284 saves.append(parseSave(line))
|
|
285
|
|
286 elif '*************** END Saving' in line:
|
|
287 saving -= 1
|
|
288 log('DEBUG', line)
|
|
289
|
|
290 if saving > 0:
|
|
291 log("ERROR", "Concurrent end save (%s) in #%s of %s"%(saving, linecnt, line))
|
|
292
|
|
293 elif saving < 0:
|
|
294 log("ERROR", "Too many END saves!")
|
|
295 break
|
|
296
|
|
297 log("INFO", "saving %s"%saveCtx)
|
|
298 log("INFO", "prev saves: %s"%len(prevSaves))
|
|
299 log("INFO", "saves: %s"%len(saves))
|
|
300
|
|
301 if len(prevSaves) > 0:
|
39
|
302 compareNodeLists(prevSaves, saves, saveCtx, outFile)
|
38
|
303
|
|
304 prevSaves = []
|
|
305 saves = []
|
|
306
|
|
307
|
|
308 if maxLinecnt is not None and linecnt >= maxLinecnt:
|
|
309 break
|
|
310
|
|
311 log("SYSMSG", "%s lines of logfile scanned"%linecnt)
|
|
312
|
|
313
|
|
314 # run analysis
|
39
|
315 analyseLogfile('ismi-161011.log', 'ismi-161011-lost.csv')
|