Mercurial > hg > drupalISMI
comparison importFromOpenMind/importer/check_ismi_log.py @ 38:9ab136f412a1
new first version of check_ismi_log analyser.
author | casties |
---|---|
date | Fri, 21 Oct 2016 19:20:17 +0200 |
parents | |
children | 1867bc2180c5 |
comparison
equal
deleted
inserted
replaced
37:eccbb6239b89 | 38:9ab136f412a1 |
---|---|
1 | |
2 # coding: utf-8 | |
3 | |
4 import re | |
5 | |
6 maxLinecnt = None | |
7 #logLevels = {'INFO', 'WARNING', 'ERROR', 'SYSMSG'} | |
8 logLevels = {'ERROR', 'SYSMSG'} | |
9 | |
10 def log(level, message): | |
11 if level in logLevels: | |
12 print("%s: %s"%(level, message)) | |
13 | |
14 | |
15 def prettyPrintNode(node): | |
16 nt = node['node-type'] | |
17 att = '' | |
18 if nt == 'ENTITY': | |
19 att = " %s=%s "%('oc',node['object-class']) | |
20 | |
21 elif nt == 'ATTRIBUTE': | |
22 att = " %s=%s "%('name',node['name']) | |
23 | |
24 elif nt == 'RELATION': | |
25 att = " %s=%s "%('oc',node['object_class']) | |
26 | |
27 s = "%s%s[%s]"%(nt, att, node) | |
28 return s | |
29 | |
30 | |
31 def parseStart(line): | |
32 tstamp = None | |
33 tm = re.match('(\d+-\d+-\d+ \d+:\d+:\d+)', line) | |
34 if tm: | |
35 tstamp = tm.group(1) | |
36 | |
37 sm = re.search('START Saving (\w+) \[ID=(\d*)', line) | |
38 if sm: | |
39 return {'time': tstamp, 'oc': sm.group(1), 'id': sm.group(2)} | |
40 | |
41 return None | |
42 | |
43 | |
44 def parseSave(line): | |
45 match = re.search('([A-Z]+)\[([^\]]+)\]', line) | |
46 if match: | |
47 data = {'node-type': match.group(1)} | |
48 segs = match.group(2).split(', ') | |
49 for seg in segs: | |
50 k, v = seg.split('=', 1) | |
51 data[k] = v.strip('"') | |
52 | |
53 return data | |
54 | |
55 return None | |
56 | |
57 | |
58 def equalNodes(prev, cur): | |
59 log("DEBUG", "compare: %s vs %s"%(prev, cur)) | |
60 if prev['id'] != cur['id']: | |
61 log("INFO", "node id mismatch!") | |
62 return False | |
63 | |
64 if prev['node-type'] != cur['node-type']: | |
65 log("INFO", "node node-type mismatch!") | |
66 return False | |
67 | |
68 if prev.get('source-id', None) != cur.get('source-id', None): | |
69 log("INFO", "node source_id mismatch!") | |
70 return False | |
71 | |
72 if prev.get('target-id', None) != cur.get('target-id', None): | |
73 log("INFO", "node target_id mismatch!") | |
74 return False | |
75 | |
76 if prev['b64-value'] != cur['b64-value']: | |
77 log("INFO", "node ownvalue mismatch!") | |
78 return False | |
79 | |
80 return True | |
81 | |
82 | |
83 def getSimilarNode(prev, curList): | |
84 nt = prev['node-type'] | |
85 if nt == 'ATTRIBUTE': | |
86 for n in curList: | |
87 if n['node-type'] == 'ATTRIBUTE' \ | |
88 and prev['name'] == n['name']: | |
89 # attribute with same name | |
90 log("DEBUG", "similar attributes: %s vs %s"%(prev, n)) | |
91 return n | |
92 | |
93 elif nt == 'RELATION': | |
94 for n in curList: | |
95 if n['node-type'] == 'RELATION' \ | |
96 and prev['source-id'] == n['source-id'] \ | |
97 and prev['target-id'] == n['target-id'] \ | |
98 and prev['object_class'] == n['object_class']: | |
99 # relation with same source, target and type | |
100 log("DEBUG", "similar relations: %s vs %s"%(prev, n)) | |
101 return n | |
102 | |
103 return None | |
104 | |
105 | |
106 def compareNodeLists(prev, cur, ctx): | |
107 prevNodes = {} | |
108 curNodes = {} | |
109 | |
110 # | |
111 # read nodes | |
112 # | |
113 for n in prev: | |
114 nid = n['id'] | |
115 if nid not in prevNodes: | |
116 prevNodes[nid] = n | |
117 else: | |
118 log("DEBUG", "duplicate save of prev node id="+nid) | |
119 if isinstance(prevNodes[nid], list): | |
120 prevNodes[nid].append(n) | |
121 else: | |
122 prevNodes[nid] = [prevNodes[nid], n] | |
123 | |
124 for n in cur: | |
125 nid = n['id'] | |
126 if nid not in curNodes: | |
127 curNodes[nid] = n | |
128 else: | |
129 log("DEBUG", "duplicate save of cur node id="+nid) | |
130 if isinstance(curNodes[nid], list): | |
131 curNodes[nid].append(n) | |
132 else: | |
133 curNodes[nid] = [curNodes[nid], n] | |
134 | |
135 # | |
136 # compare nodes | |
137 # | |
138 curNodeUnchecked = set(curNodes.keys()) | |
139 addPrevNodes = [] | |
140 addCurNodes = [] | |
141 | |
142 for nid in prevNodes: | |
143 prevNode = prevNodes[nid] | |
144 if isinstance(prevNode, list): | |
145 log("DEBUG", "multi-save prev node: %s"%prevNode) | |
146 # use the last version(?) | |
147 prevNode = prevNode[-1] | |
148 | |
149 if nid not in curNodes: | |
150 if prevNode['node-type'] == 'ATTRIBUTE' and prevNode['b64-value'] == '': | |
151 # emtpy attribute - ignore | |
152 continue | |
153 | |
154 else: | |
155 log("DEBUG", "node %s not in cur saves! %s"%(nid,prevNode)) | |
156 addPrevNodes.append(prevNode) | |
157 continue | |
158 | |
159 curNode = curNodes[nid] | |
160 if isinstance(curNode, list): | |
161 log("DEBUG", "multi-save cur node: %s"%curNode) | |
162 # use the last version? | |
163 curNode = curNode[-1] | |
164 | |
165 equalNodes(prevNode, curNode) | |
166 | |
167 curNodeUnchecked.remove(nid) | |
168 | |
169 # make list of additional current (=new) nodes | |
170 for nid in curNodeUnchecked: | |
171 addCurNodes.append(curNodes[nid]) | |
172 log("DEBUG", "new node %s"%curNodes[nid]) | |
173 | |
174 # compare missing and new nodes | |
175 for n in addPrevNodes.copy(): | |
176 sn = getSimilarNode(n, addCurNodes) | |
177 if sn is not None: | |
178 # similar is good enough | |
179 addPrevNodes.remove(n) | |
180 addCurNodes.remove(sn) | |
181 | |
182 if len(addPrevNodes) > 0: | |
183 #print("ERROR: lost nodes: %s"%[prettyPrintNode(n) for n in addPrevNodes]) | |
184 log("ERROR", "in %s"%ctx) | |
185 for n in addPrevNodes: | |
186 log("ERROR","lost node: %s"%prettyPrintNode(n)) | |
187 | |
188 if len(addCurNodes) > 0: | |
189 #print("INFO: new nodes: %s"%[prettyPrintNode(n) for n in addCurNodes]) | |
190 for n in addCurNodes: | |
191 log("INFO", "new node: %s"%prettyPrintNode(n)) | |
192 | |
193 | |
194 | |
195 def analyseLogfile(filename): | |
196 with open(filename) as f: | |
197 saving = 0 | |
198 linecnt = 0 | |
199 saveCtx = None | |
200 prevSaves = [] | |
201 saves = [] | |
202 | |
203 for line in f: | |
204 linecnt += 1 | |
205 if '*************** START Saving' in line: | |
206 saving += 1 | |
207 log('DEBUG', line) | |
208 saveCtx = parseStart(line) | |
209 | |
210 if saving > 1: | |
211 log("ERROR", "Concurrent save (%s) in #%s of %s"%(saving, linecnt, line)) | |
212 # TODO: what now? | |
213 | |
214 elif 'INFO transactionlog' in line: | |
215 if 'save previous' in line: | |
216 data = parseSave(line) | |
217 if data is None: | |
218 log("DEBUG", "Error parsing line: %s"%line) | |
219 continue | |
220 | |
221 prevSaves.append(data) | |
222 | |
223 elif 'save' in line: | |
224 data = parseSave(line) | |
225 if data is None: | |
226 log("DEBUG", "Error parsing line: %s"%line) | |
227 continue | |
228 | |
229 saves.append(parseSave(line)) | |
230 | |
231 elif '*************** END Saving' in line: | |
232 saving -= 1 | |
233 log('DEBUG', line) | |
234 | |
235 if saving > 0: | |
236 log("ERROR", "Concurrent end save (%s) in #%s of %s"%(saving, linecnt, line)) | |
237 | |
238 elif saving < 0: | |
239 log("ERROR", "Too many END saves!") | |
240 break | |
241 | |
242 log("INFO", "saving %s"%saveCtx) | |
243 log("INFO", "prev saves: %s"%len(prevSaves)) | |
244 log("INFO", "saves: %s"%len(saves)) | |
245 | |
246 if len(prevSaves) > 0: | |
247 compareNodeLists(prevSaves, saves, saveCtx) | |
248 | |
249 prevSaves = [] | |
250 saves = [] | |
251 | |
252 | |
253 if maxLinecnt is not None and linecnt >= maxLinecnt: | |
254 break | |
255 | |
256 log("SYSMSG", "%s lines of logfile scanned"%linecnt) | |
257 | |
258 | |
259 # run analysis | |
260 analyseLogfile('ismi-161011.log') |