Mercurial > hg > MPIWGWeb
comparison updatePersonalWWW.py @ 0:bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
author | casties |
---|---|
date | Thu, 10 Jan 2013 17:52:13 +0100 |
parents | |
children | c0dcb747cc41 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:bca61e893fcc |
---|---|
1 import psycopg2 as psycopg | |
2 psyco = 2 | |
3 | |
4 import logging | |
5 from MPIWGHelper import unicodify, utf8ify | |
6 | |
7 from xml import sax | |
8 from xml.sax.handler import ContentHandler | |
9 | |
10 # namespace for FileMaker8 | |
11 fm_ns = 'http://www.filemaker.com/fmpxmlresult' | |
12 | |
13 # list of fields that are taken from XML and checked against DB as conflicts | |
14 #checkFields=['key','first_name','last_name','title','home_inst','current_work','e_mail2'] | |
15 checkFields=['key','first_name','last_name','title','titles_new','home_inst','current_work'] | |
16 | |
17 | |
18 def sql_quote(v): | |
19 # quote dictionary | |
20 quote_dict = {"\'": "''", "\\": "\\\\"} | |
21 for dkey in quote_dict.keys(): | |
22 if v.find(dkey) >= 0: | |
23 v=quote_dict[dkey].join(v.split(dkey)) | |
24 return "'%s'"%v | |
25 | |
26 def SimpleSearch(curs,query, args=None): | |
27 """execute sql query and return data""" | |
28 logging.debug("executing: "+query) | |
29 if psyco == 1: | |
30 query = query.encode("UTF-8") | |
31 #if args is not None: | |
32 # args = [ sql_quote(a) for a in args ] | |
33 #logging.debug(query) | |
34 #logging.debug(args) | |
35 | |
36 curs.execute(query, args) | |
37 logging.debug("sql done") | |
38 try: | |
39 return curs.fetchall() | |
40 except: | |
41 return None | |
42 | |
43 class xml_handler(ContentHandler): | |
44 | |
45 def __init__(self): | |
46 ''' | |
47 SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table. | |
48 @param dsn: database connection string | |
49 @param table: name of the table the xml shall be imported into | |
50 ''' | |
51 | |
52 # set up parser | |
53 self.result={} | |
54 self.event = None | |
55 # self.top_dispatcher = { | |
56 # (saxtools.START_ELEMENT, fm_ns, u'METADATA'): | |
57 # self.handle_meta_fields, | |
58 # (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'): | |
59 # self.handle_data, | |
60 # } | |
61 | |
62 # connect database | |
63 | |
64 | |
65 | |
66 | |
67 self.dbIDs = {} | |
68 self.rowcnt = 0 | |
69 | |
70 self.currentName=None | |
71 | |
72 self.newDataset = [] | |
73 self.conflicts = [] | |
74 self.ok = [] | |
75 self.fieldNames=[] | |
76 self.currentRow={} | |
77 self.currentTag="" | |
78 return | |
79 | |
80 def startElement(self, name, attrs): | |
81 logging.debug(name) | |
82 if (name.lower() == "field") : | |
83 self.handle_meta_fields(attrs) | |
84 if (name.lower() == "row") : | |
85 logging.debug("handleROW") | |
86 self.currentRow={} # new Row | |
87 self.currentData=0 | |
88 | |
89 | |
90 if (name.lower()=="data"): | |
91 | |
92 self.currentName=self.fieldNames[self.currentData] | |
93 self.currentData+=1 | |
94 self.currentTag="data" | |
95 logging.debug("currentData"+str(self.currentData)) | |
96 logging.debug("currentName"+str(self.currentName)) | |
97 self.currentRow[self.currentName]="" #anlegen des eintrages | |
98 | |
99 def endElement(self,name): | |
100 if (name.lower() == "data") : | |
101 self.currentTag="" | |
102 if (name.lower() == "row"): | |
103 self.handle_end_row() | |
104 | |
105 def characters(self,content): | |
106 | |
107 if self.currentName is not None: | |
108 logging.debug(self.currentName+" "+content) | |
109 contentTmp = self.currentRow.get(self.currentName,'') #gibt es schon einen Inhalt, dann dieses hinzufuegen (in einem Tag kann u.U. der characters handler mehrfach aufgerufen werden.) | |
110 self.currentRow[self.currentName]=contentTmp+content; | |
111 | |
112 | |
113 def handle_end_row(self): | |
114 | |
115 logging.debug("edd ROW") | |
116 | |
117 if self.result.has_key(self.currentRow['key']): | |
118 logging.error("Key %s not unique"%self.currentRow['key']) | |
119 | |
120 logging.debug("currentrow:"+self.currentName) | |
121 logging.debug("currentname:"+self.currentRow['key']) | |
122 | |
123 if self.currentName is not None: | |
124 self.result[self.currentRow['key']]=self.currentRow.copy() | |
125 # | |
126 # | |
127 # return | |
128 | |
129 def handle_meta_fields(self,attrs): | |
130 | |
131 #First round through the generator corresponds to the | |
132 #start element event | |
133 logging.debug("START -FIELD") | |
134 name = attrs.get('NAME') | |
135 name=name.replace(" ","_")# make sure no spaces | |
136 self.fieldNames.append(name) | |
137 | |
138 self.update_fields = self.fieldNames | |
139 | |
140 logging.debug("xml-fieldnames:"+repr(self.fieldNames)) | |
141 # get list of fields in db table | |
142 | |
143 #print "upQ: ", self.updQuery | |
144 #print "adQ: ", self.addQuery | |
145 | |
146 return | |
147 | |
148 # def handle_meta_field(self, end_condition): | |
149 # name = self.params.get((None, u'NAME')) | |
150 # yield None | |
151 # #Element closed. Wrap up | |
152 # name=name.replace(" ","_")# make sure no spaces | |
153 # self.fieldNames.append(name) | |
154 # logging.debug("FIELD name: "+name) | |
155 # return | |
156 | |
157 # def handle_data(self, end_condition): | |
158 # dispatcher = { | |
159 # (saxtools.START_ELEMENT, fm_ns, u'ROW'): | |
160 # self.handle_row, | |
161 # } | |
162 # #First round through the generator corresponds to the | |
163 # #start element event | |
164 # logging.debug("START RESULTSET") | |
165 # self.rowcnt = 0 | |
166 # yield None | |
167 # | |
168 # #delegate is a generator that handles all the events "within" | |
169 # #this element | |
170 # delegate = None | |
171 # while not self.event == end_condition: | |
172 # delegate = saxtools.tenorsax.event_loop_body( | |
173 # dispatcher, delegate, self.event) | |
174 # yield None | |
175 # | |
176 # #Element closed. Wrap up | |
177 # logging.debug("END RESULTSET") | |
178 # | |
179 # | |
180 # | |
181 # return | |
182 | |
183 # def handle_row(self, end_condition): | |
184 # dispatcher = { | |
185 # (saxtools.START_ELEMENT, fm_ns, u'COL'): | |
186 # self.handle_col, | |
187 # } | |
188 # logging.debug("START ROW") | |
189 # self.dataSet = {} | |
190 # self.colIdx = 0 | |
191 # yield None | |
192 # | |
193 # #delegate is a generator that handles all the events "within" | |
194 # #this element | |
195 # delegate = None | |
196 # while not self.event == end_condition: | |
197 # delegate = saxtools.tenorsax.event_loop_body( | |
198 # dispatcher, delegate, self.event) | |
199 # yield None | |
200 # | |
201 # #Element closed. Wrap up | |
202 # logging.debug("END ROW") | |
203 # self.rowcnt += 1 | |
204 # # process collected row data | |
205 # update=False | |
206 # id_val='' | |
207 # | |
208 # if self.result.has_key(self.dataSet['key']): | |
209 # logging.error("Key %s not unique"%self.dataSet['key']) | |
210 # | |
211 # self.result[self.dataSet['key']]=self.dataSet | |
212 # | |
213 # | |
214 # return | |
215 | |
216 # def handle_col(self, end_condition): | |
217 # dispatcher = { | |
218 # (saxtools.START_ELEMENT, fm_ns, u'DATA'): | |
219 # self.handle_data_tag, | |
220 # } | |
221 # #print "START COL" | |
222 # yield None | |
223 # #delegate is a generator that handles all the events "within" | |
224 # #this element | |
225 # delegate = None | |
226 # while not self.event == end_condition: | |
227 # delegate = saxtools.tenorsax.event_loop_body( | |
228 # dispatcher, delegate, self.event) | |
229 # yield None | |
230 # #Element closed. Wrap up | |
231 # #print "END COL" | |
232 # self.colIdx += 1 | |
233 # return | |
234 # | |
235 # def handle_data_tag(self, end_condition): | |
236 # #print "START DATA" | |
237 # content = u'' | |
238 # yield None | |
239 # # gather child elements | |
240 # while not self.event == end_condition: | |
241 # if self.event[0] == saxtools.CHARACTER_DATA: | |
242 # content += self.params | |
243 # yield None | |
244 # #Element closed. Wrap up | |
245 # field = self.fieldNames[self.colIdx] | |
246 # self.dataSet[field.lower()] = content | |
247 # #print " DATA(", field, ") ", repr(content) | |
248 # return | |
249 | |
250 | |
251 def checkImport(dsn,resultSet): | |
252 #now connect to the database | |
253 logging.info("dsn: %s"%dsn) | |
254 dbCon = psycopg.connect(dsn) | |
255 db = dbCon.cursor() | |
256 | |
257 | |
258 qstr="select key from personal_www" | |
259 | |
260 results=SimpleSearch(db,qstr) | |
261 | |
262 keys=[] | |
263 for x in results: | |
264 if x[0]: | |
265 keys.append(unicodify(x[0])) | |
266 | |
267 | |
268 #first step detect new entries and conflicts | |
269 new=[] | |
270 conflicts={} | |
271 | |
272 for x in resultSet.iterkeys(): | |
273 | |
274 if x not in keys: | |
275 | |
276 new.append(x) | |
277 | |
278 else: | |
279 | |
280 conflict,ret=checkForConflicts(db,resultSet[x],x) | |
281 if conflict: | |
282 conflicts[x]=ret | |
283 | |
284 return new,conflicts | |
285 | |
286 def importFMPXML(filename): | |
287 ''' | |
288 method to import FileMaker XML file (FMPXMLRESULT format) into the table. | |
289 @param filename: xmlfile filename | |
290 | |
291 ''' | |
292 | |
293 parser = sax.make_parser() | |
294 #The "consumer" is our own handler | |
295 consumer = xml_handler() | |
296 #Initialize Tenorsax with handler | |
297 #handler = saxtools.tenorsax(consumer) | |
298 #Resulting tenorsax instance is the SAX handler | |
299 parser.setContentHandler(consumer) | |
300 #parser.setFeature(sax.handler.feature_namespaces, 1) | |
301 parser.parse(filename) | |
302 resultSet=consumer.result # xml now transformed into an dictionary | |
303 | |
304 return resultSet | |
305 | |
306 | |
307 | |
308 def checkForConflicts(cursor,dataSet,key): | |
309 | |
310 ret=[] | |
311 fields=",".join(checkFields) | |
312 | |
313 qstr="select %s from personal_www where key='%s'"%(fields,key) | |
314 | |
315 | |
316 sr=SimpleSearch(cursor,qstr) | |
317 | |
318 if not sr: | |
319 return True, None | |
320 | |
321 i=0 | |
322 retValue=False | |
323 | |
324 for checkField in checkFields: | |
325 dbValueR=sr[0][i] | |
326 if dbValueR: | |
327 dbValue=unicodify(dbValueR) | |
328 else: | |
329 dbValue="" | |
330 | |
331 if checkField in dataSet: | |
332 setValue=dataSet[checkField] | |
333 logging.debug( " %s %s %s %s"%(repr(key),checkField,repr(dbValue),repr(setValue))) | |
334 if dbValue.strip().rstrip()!=setValue.lstrip().rstrip(): | |
335 ret.append((checkField,dbValue,setValue)) | |
336 retValue=True | |
337 | |
338 else: | |
339 logging.warning("unknown field %s in data file!"%checkField) | |
340 | |
341 i+=1 | |
342 | |
343 return retValue,ret | |
344 | |
345 | |
346 ## | |
347 ## public static int main() | |
348 ## | |
349 | |
350 if __name__ == "__main__": | |
351 | |
352 | |
353 | |
354 loglevel = logging.DEBUG | |
355 | |
356 | |
357 logging.basicConfig(level=loglevel, | |
358 format='%(asctime)s %(levelname)s %(message)s', | |
359 datefmt='%H:%M:%S') | |
360 | |
361 resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml") | |
362 news,conflicts=checkImport(dsn="dbname=personalwww user=www password=e1nste1n", resultSet=resultSet) | |
363 | |
364 | |
365 print "new" | |
366 print len(news),news | |
367 print "-----------" | |
368 print "conflicts" | |
369 print conflicts | |
370 | |
371 # update_fields = None | |
372 # | |
373 # if options.update_fields: | |
374 # update_fields = [string.strip(s) for s in options.update_fields.split(',')] | |
375 # | |
376 # parser = sax.make_parser() | |
377 # #The "consumer" is our own handler | |
378 # consumer = xml_handler(dsn=options.dsn,table=options.table, | |
379 # update_fields=update_fields,id_field=options.id_field, | |
380 # sync_mode=options.sync_mode) | |
381 # #Initialize Tenorsax with handler | |
382 # handler = saxtools.tenorsax(consumer) | |
383 # #Resulting tenorsax instance is the SAX handler | |
384 # parser.setContentHandler(handler) | |
385 # parser.setFeature(sax.handler.feature_namespaces, 1) | |
386 # parser.parse(options.filename) | |
387 # | |
388 # | |
389 # print "DONE!" | |
390 | |
391 |