comparison importFromOpenMind/importer/check_ismi_log.py @ 38:9ab136f412a1

new first version of check_ismi_log analyser.
author casties
date Fri, 21 Oct 2016 19:20:17 +0200
parents
children 1867bc2180c5
comparison
equal deleted inserted replaced
37:eccbb6239b89 38:9ab136f412a1
1
2 # coding: utf-8
3
4 import re
5
6 maxLinecnt = None
7 #logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'}
8 logLevels = {'ERROR', 'SYSMSG'}
9
10 def log(level, message):
11 if level in logLevels:
12 print("%s: %s"%(level, message))
13
14
15 def prettyPrintNode(node):
16 nt = node['node-type']
17 att = ''
18 if nt == 'ENTITY':
19 att = " %s=%s "%('oc',node['object-class'])
20
21 elif nt == 'ATTRIBUTE':
22 att = " %s=%s "%('name',node['name'])
23
24 elif nt == 'RELATION':
25 att = " %s=%s "%('oc',node['object_class'])
26
27 s = "%s%s[%s]"%(nt, att, node)
28 return s
29
30
31 def parseStart(line):
32 tstamp = None
33 tm = re.match('(\d+-\d+-\d+ \d+:\d+:\d+)', line)
34 if tm:
35 tstamp = tm.group(1)
36
37 sm = re.search('START Saving (\w+) \[ID=(\d*)', line)
38 if sm:
39 return {'time': tstamp, 'oc': sm.group(1), 'id': sm.group(2)}
40
41 return None
42
43
44 def parseSave(line):
45 match = re.search('([A-Z]+)\[([^\]]+)\]', line)
46 if match:
47 data = {'node-type': match.group(1)}
48 segs = match.group(2).split(', ')
49 for seg in segs:
50 k, v = seg.split('=', 1)
51 data[k] = v.strip('"')
52
53 return data
54
55 return None
56
57
58 def equalNodes(prev, cur):
59 log("DEBUG", "compare: %s vs %s"%(prev, cur))
60 if prev['id'] != cur['id']:
61 log("INFO", "node id mismatch!")
62 return False
63
64 if prev['node-type'] != cur['node-type']:
65 log("INFO", "node node-type mismatch!")
66 return False
67
68 if prev.get('source-id', None) != cur.get('source-id', None):
69 log("INFO", "node source_id mismatch!")
70 return False
71
72 if prev.get('target-id', None) != cur.get('target-id', None):
73 log("INFO", "node target_id mismatch!")
74 return False
75
76 if prev['b64-value'] != cur['b64-value']:
77 log("INFO", "node ownvalue mismatch!")
78 return False
79
80 return True
81
82
83 def getSimilarNode(prev, curList):
84 nt = prev['node-type']
85 if nt == 'ATTRIBUTE':
86 for n in curList:
87 if n['node-type'] == 'ATTRIBUTE' \
88 and prev['name'] == n['name']:
89 # attribute with same name
90 log("DEBUG", "similar attributes: %s vs %s"%(prev, n))
91 return n
92
93 elif nt == 'RELATION':
94 for n in curList:
95 if n['node-type'] == 'RELATION' \
96 and prev['source-id'] == n['source-id'] \
97 and prev['target-id'] == n['target-id'] \
98 and prev['object_class'] == n['object_class']:
99 # relation with same source, target and type
100 log("DEBUG", "similar relations: %s vs %s"%(prev, n))
101 return n
102
103 return None
104
105
106 def compareNodeLists(prev, cur, ctx):
107 prevNodes = {}
108 curNodes = {}
109
110 #
111 # read nodes
112 #
113 for n in prev:
114 nid = n['id']
115 if nid not in prevNodes:
116 prevNodes[nid] = n
117 else:
118 log("DEBUG", "duplicate save of prev node id="+nid)
119 if isinstance(prevNodes[nid], list):
120 prevNodes[nid].append(n)
121 else:
122 prevNodes[nid] = [prevNodes[nid], n]
123
124 for n in cur:
125 nid = n['id']
126 if nid not in curNodes:
127 curNodes[nid] = n
128 else:
129 log("DEBUG", "duplicate save of cur node id="+nid)
130 if isinstance(curNodes[nid], list):
131 curNodes[nid].append(n)
132 else:
133 curNodes[nid] = [curNodes[nid], n]
134
135 #
136 # compare nodes
137 #
138 curNodeUnchecked = set(curNodes.keys())
139 addPrevNodes = []
140 addCurNodes = []
141
142 for nid in prevNodes:
143 prevNode = prevNodes[nid]
144 if isinstance(prevNode, list):
145 log("DEBUG", "multi-save prev node: %s"%prevNode)
146 # use the last version(?)
147 prevNode = prevNode[-1]
148
149 if nid not in curNodes:
150 if prevNode['node-type'] == 'ATTRIBUTE' and prevNode['b64-value'] == '':
151 # emtpy attribute - ignore
152 continue
153
154 else:
155 log("DEBUG", "node %s not in cur saves! %s"%(nid,prevNode))
156 addPrevNodes.append(prevNode)
157 continue
158
159 curNode = curNodes[nid]
160 if isinstance(curNode, list):
161 log("DEBUG", "multi-save cur node: %s"%curNode)
162 # use the last version?
163 curNode = curNode[-1]
164
165 equalNodes(prevNode, curNode)
166
167 curNodeUnchecked.remove(nid)
168
169 # make list of additional current (=new) nodes
170 for nid in curNodeUnchecked:
171 addCurNodes.append(curNodes[nid])
172 log("DEBUG", "new node %s"%curNodes[nid])
173
174 # compare missing and new nodes
175 for n in addPrevNodes.copy():
176 sn = getSimilarNode(n, addCurNodes)
177 if sn is not None:
178 # similar is good enough
179 addPrevNodes.remove(n)
180 addCurNodes.remove(sn)
181
182 if len(addPrevNodes) > 0:
183 #print("ERROR: lost nodes: %s"%[prettyPrintNode(n) for n in addPrevNodes])
184 log("ERROR", "in %s"%ctx)
185 for n in addPrevNodes:
186 log("ERROR","lost node: %s"%prettyPrintNode(n))
187
188 if len(addCurNodes) > 0:
189 #print("INFO: new nodes: %s"%[prettyPrintNode(n) for n in addCurNodes])
190 for n in addCurNodes:
191 log("INFO", "new node: %s"%prettyPrintNode(n))
192
193
194
195 def analyseLogfile(filename):
196 with open(filename) as f:
197 saving = 0
198 linecnt = 0
199 saveCtx = None
200 prevSaves = []
201 saves = []
202
203 for line in f:
204 linecnt += 1
205 if '*************** START Saving' in line:
206 saving += 1
207 log('DEBUG', line)
208 saveCtx = parseStart(line)
209
210 if saving > 1:
211 log("ERROR", "Concurrent save (%s) in #%s of %s"%(saving, linecnt, line))
212 # TODO: what now?
213
214 elif 'INFO transactionlog' in line:
215 if 'save previous' in line:
216 data = parseSave(line)
217 if data is None:
218 log("DEBUG", "Error parsing line: %s"%line)
219 continue
220
221 prevSaves.append(data)
222
223 elif 'save' in line:
224 data = parseSave(line)
225 if data is None:
226 log("DEBUG", "Error parsing line: %s"%line)
227 continue
228
229 saves.append(parseSave(line))
230
231 elif '*************** END Saving' in line:
232 saving -= 1
233 log('DEBUG', line)
234
235 if saving > 0:
236 log("ERROR", "Concurrent end save (%s) in #%s of %s"%(saving, linecnt, line))
237
238 elif saving < 0:
239 log("ERROR", "Too many END saves!")
240 break
241
242 log("INFO", "saving %s"%saveCtx)
243 log("INFO", "prev saves: %s"%len(prevSaves))
244 log("INFO", "saves: %s"%len(saves))
245
246 if len(prevSaves) > 0:
247 compareNodeLists(prevSaves, saves, saveCtx)
248
249 prevSaves = []
250 saves = []
251
252
253 if maxLinecnt is not None and linecnt >= maxLinecnt:
254 break
255
256 log("SYSMSG", "%s lines of logfile scanned"%linecnt)
257
258
259 # run analysis
260 analyseLogfile('ismi-161011.log')