comparison updatePersonalWWW.py @ 0:bca61e893fcc

first checkin of MPIWGWeb r2 branch from CVS into mercurial
author casties
date Thu, 10 Jan 2013 17:52:13 +0100
parents
children c0dcb747cc41
comparison
equal deleted inserted replaced
-1:000000000000 0:bca61e893fcc
1 import psycopg2 as psycopg
2 psyco = 2
3
4 import logging
5 from MPIWGHelper import unicodify, utf8ify
6
7 from xml import sax
8 from xml.sax.handler import ContentHandler
9
10 # namespace for FileMaker8
11 fm_ns = 'http://www.filemaker.com/fmpxmlresult'
12
13 # list of fields that are taken from XML and checked against DB as conflicts
14 #checkFields=['key','first_name','last_name','title','home_inst','current_work','e_mail2']
15 checkFields=['key','first_name','last_name','title','titles_new','home_inst','current_work']
16
17
18 def sql_quote(v):
19 # quote dictionary
20 quote_dict = {"\'": "''", "\\": "\\\\"}
21 for dkey in quote_dict.keys():
22 if v.find(dkey) >= 0:
23 v=quote_dict[dkey].join(v.split(dkey))
24 return "'%s'"%v
25
26 def SimpleSearch(curs,query, args=None):
27 """execute sql query and return data"""
28 logging.debug("executing: "+query)
29 if psyco == 1:
30 query = query.encode("UTF-8")
31 #if args is not None:
32 # args = [ sql_quote(a) for a in args ]
33 #logging.debug(query)
34 #logging.debug(args)
35
36 curs.execute(query, args)
37 logging.debug("sql done")
38 try:
39 return curs.fetchall()
40 except:
41 return None
42
43 class xml_handler(ContentHandler):
44
45 def __init__(self):
46 '''
47 SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table.
48 @param dsn: database connection string
49 @param table: name of the table the xml shall be imported into
50 '''
51
52 # set up parser
53 self.result={}
54 self.event = None
55 # self.top_dispatcher = {
56 # (saxtools.START_ELEMENT, fm_ns, u'METADATA'):
57 # self.handle_meta_fields,
58 # (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'):
59 # self.handle_data,
60 # }
61
62 # connect database
63
64
65
66
67 self.dbIDs = {}
68 self.rowcnt = 0
69
70 self.currentName=None
71
72 self.newDataset = []
73 self.conflicts = []
74 self.ok = []
75 self.fieldNames=[]
76 self.currentRow={}
77 self.currentTag=""
78 return
79
80 def startElement(self, name, attrs):
81 logging.debug(name)
82 if (name.lower() == "field") :
83 self.handle_meta_fields(attrs)
84 if (name.lower() == "row") :
85 logging.debug("handleROW")
86 self.currentRow={} # new Row
87 self.currentData=0
88
89
90 if (name.lower()=="data"):
91
92 self.currentName=self.fieldNames[self.currentData]
93 self.currentData+=1
94 self.currentTag="data"
95 logging.debug("currentData"+str(self.currentData))
96 logging.debug("currentName"+str(self.currentName))
97 self.currentRow[self.currentName]="" #anlegen des eintrages
98
99 def endElement(self,name):
100 if (name.lower() == "data") :
101 self.currentTag=""
102 if (name.lower() == "row"):
103 self.handle_end_row()
104
105 def characters(self,content):
106
107 if self.currentName is not None:
108 logging.debug(self.currentName+" "+content)
109 contentTmp = self.currentRow.get(self.currentName,'') #gibt es schon einen Inhalt, dann dieses hinzufuegen (in einem Tag kann u.U. der characters handler mehrfach aufgerufen werden.)
110 self.currentRow[self.currentName]=contentTmp+content;
111
112
113 def handle_end_row(self):
114
115 logging.debug("edd ROW")
116
117 if self.result.has_key(self.currentRow['key']):
118 logging.error("Key %s not unique"%self.currentRow['key'])
119
120 logging.debug("currentrow:"+self.currentName)
121 logging.debug("currentname:"+self.currentRow['key'])
122
123 if self.currentName is not None:
124 self.result[self.currentRow['key']]=self.currentRow.copy()
125 #
126 #
127 # return
128
129 def handle_meta_fields(self,attrs):
130
131 #First round through the generator corresponds to the
132 #start element event
133 logging.debug("START -FIELD")
134 name = attrs.get('NAME')
135 name=name.replace(" ","_")# make sure no spaces
136 self.fieldNames.append(name)
137
138 self.update_fields = self.fieldNames
139
140 logging.debug("xml-fieldnames:"+repr(self.fieldNames))
141 # get list of fields in db table
142
143 #print "upQ: ", self.updQuery
144 #print "adQ: ", self.addQuery
145
146 return
147
148 # def handle_meta_field(self, end_condition):
149 # name = self.params.get((None, u'NAME'))
150 # yield None
151 # #Element closed. Wrap up
152 # name=name.replace(" ","_")# make sure no spaces
153 # self.fieldNames.append(name)
154 # logging.debug("FIELD name: "+name)
155 # return
156
157 # def handle_data(self, end_condition):
158 # dispatcher = {
159 # (saxtools.START_ELEMENT, fm_ns, u'ROW'):
160 # self.handle_row,
161 # }
162 # #First round through the generator corresponds to the
163 # #start element event
164 # logging.debug("START RESULTSET")
165 # self.rowcnt = 0
166 # yield None
167 #
168 # #delegate is a generator that handles all the events "within"
169 # #this element
170 # delegate = None
171 # while not self.event == end_condition:
172 # delegate = saxtools.tenorsax.event_loop_body(
173 # dispatcher, delegate, self.event)
174 # yield None
175 #
176 # #Element closed. Wrap up
177 # logging.debug("END RESULTSET")
178 #
179 #
180 #
181 # return
182
183 # def handle_row(self, end_condition):
184 # dispatcher = {
185 # (saxtools.START_ELEMENT, fm_ns, u'COL'):
186 # self.handle_col,
187 # }
188 # logging.debug("START ROW")
189 # self.dataSet = {}
190 # self.colIdx = 0
191 # yield None
192 #
193 # #delegate is a generator that handles all the events "within"
194 # #this element
195 # delegate = None
196 # while not self.event == end_condition:
197 # delegate = saxtools.tenorsax.event_loop_body(
198 # dispatcher, delegate, self.event)
199 # yield None
200 #
201 # #Element closed. Wrap up
202 # logging.debug("END ROW")
203 # self.rowcnt += 1
204 # # process collected row data
205 # update=False
206 # id_val=''
207 #
208 # if self.result.has_key(self.dataSet['key']):
209 # logging.error("Key %s not unique"%self.dataSet['key'])
210 #
211 # self.result[self.dataSet['key']]=self.dataSet
212 #
213 #
214 # return
215
216 # def handle_col(self, end_condition):
217 # dispatcher = {
218 # (saxtools.START_ELEMENT, fm_ns, u'DATA'):
219 # self.handle_data_tag,
220 # }
221 # #print "START COL"
222 # yield None
223 # #delegate is a generator that handles all the events "within"
224 # #this element
225 # delegate = None
226 # while not self.event == end_condition:
227 # delegate = saxtools.tenorsax.event_loop_body(
228 # dispatcher, delegate, self.event)
229 # yield None
230 # #Element closed. Wrap up
231 # #print "END COL"
232 # self.colIdx += 1
233 # return
234 #
235 # def handle_data_tag(self, end_condition):
236 # #print "START DATA"
237 # content = u''
238 # yield None
239 # # gather child elements
240 # while not self.event == end_condition:
241 # if self.event[0] == saxtools.CHARACTER_DATA:
242 # content += self.params
243 # yield None
244 # #Element closed. Wrap up
245 # field = self.fieldNames[self.colIdx]
246 # self.dataSet[field.lower()] = content
247 # #print " DATA(", field, ") ", repr(content)
248 # return
249
250
251 def checkImport(dsn,resultSet):
252 #now connect to the database
253 logging.info("dsn: %s"%dsn)
254 dbCon = psycopg.connect(dsn)
255 db = dbCon.cursor()
256
257
258 qstr="select key from personal_www"
259
260 results=SimpleSearch(db,qstr)
261
262 keys=[]
263 for x in results:
264 if x[0]:
265 keys.append(unicodify(x[0]))
266
267
268 #first step detect new entries and conflicts
269 new=[]
270 conflicts={}
271
272 for x in resultSet.iterkeys():
273
274 if x not in keys:
275
276 new.append(x)
277
278 else:
279
280 conflict,ret=checkForConflicts(db,resultSet[x],x)
281 if conflict:
282 conflicts[x]=ret
283
284 return new,conflicts
285
286 def importFMPXML(filename):
287 '''
288 method to import FileMaker XML file (FMPXMLRESULT format) into the table.
289 @param filename: xmlfile filename
290
291 '''
292
293 parser = sax.make_parser()
294 #The "consumer" is our own handler
295 consumer = xml_handler()
296 #Initialize Tenorsax with handler
297 #handler = saxtools.tenorsax(consumer)
298 #Resulting tenorsax instance is the SAX handler
299 parser.setContentHandler(consumer)
300 #parser.setFeature(sax.handler.feature_namespaces, 1)
301 parser.parse(filename)
302 resultSet=consumer.result # xml now transformed into an dictionary
303
304 return resultSet
305
306
307
308 def checkForConflicts(cursor,dataSet,key):
309
310 ret=[]
311 fields=",".join(checkFields)
312
313 qstr="select %s from personal_www where key='%s'"%(fields,key)
314
315
316 sr=SimpleSearch(cursor,qstr)
317
318 if not sr:
319 return True, None
320
321 i=0
322 retValue=False
323
324 for checkField in checkFields:
325 dbValueR=sr[0][i]
326 if dbValueR:
327 dbValue=unicodify(dbValueR)
328 else:
329 dbValue=""
330
331 if checkField in dataSet:
332 setValue=dataSet[checkField]
333 logging.debug( " %s %s %s %s"%(repr(key),checkField,repr(dbValue),repr(setValue)))
334 if dbValue.strip().rstrip()!=setValue.lstrip().rstrip():
335 ret.append((checkField,dbValue,setValue))
336 retValue=True
337
338 else:
339 logging.warning("unknown field %s in data file!"%checkField)
340
341 i+=1
342
343 return retValue,ret
344
345
346 ##
347 ## public static int main()
348 ##
349
350 if __name__ == "__main__":
351
352
353
354 loglevel = logging.DEBUG
355
356
357 logging.basicConfig(level=loglevel,
358 format='%(asctime)s %(levelname)s %(message)s',
359 datefmt='%H:%M:%S')
360
361 resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml")
362 news,conflicts=checkImport(dsn="dbname=personalwww user=www password=e1nste1n", resultSet=resultSet)
363
364
365 print "new"
366 print len(news),news
367 print "-----------"
368 print "conflicts"
369 print conflicts
370
371 # update_fields = None
372 #
373 # if options.update_fields:
374 # update_fields = [string.strip(s) for s in options.update_fields.split(',')]
375 #
376 # parser = sax.make_parser()
377 # #The "consumer" is our own handler
378 # consumer = xml_handler(dsn=options.dsn,table=options.table,
379 # update_fields=update_fields,id_field=options.id_field,
380 # sync_mode=options.sync_mode)
381 # #Initialize Tenorsax with handler
382 # handler = saxtools.tenorsax(consumer)
383 # #Resulting tenorsax instance is the SAX handler
384 # parser.setContentHandler(handler)
385 # parser.setFeature(sax.handler.feature_namespaces, 1)
386 # parser.parse(options.filename)
387 #
388 #
389 # print "DONE!"
390
391