Annotation of MPIWGWeb/updatePersonalWWW.py, revision 1.1.2.14
1.1.2.11 dwinter 1: import psycopg2 as psycopg
2: psyco = 2
1.1.2.1 dwinter 3:
4: import logging
1.1.2.8 casties 5: from MPIWGHelper import unicodify, utf8ify
1.1.2.1 dwinter 6:
7: from xml import sax
1.1.2.12 dwinter 8: from xml.sax.handler import ContentHandler
1.1.2.1 dwinter 9:
1.1.2.4 casties 10: # namespace for FileMaker8
1.1.2.1 dwinter 11: fm_ns = 'http://www.filemaker.com/fmpxmlresult'
1.1.2.4 casties 12:
13: # list of fields that are taken from XML and checked against DB as conflicts
1.1.2.1 dwinter 14: #checkFields=['key','first_name','last_name','title','home_inst','current_work','e_mail2']
1.1.2.10 dwinter 15: checkFields=['key','first_name','last_name','title','titles_new','home_inst','current_work']
16:
1.1.2.1 dwinter 17:
18: def sql_quote(v):
19: # quote dictionary
20: quote_dict = {"\'": "''", "\\": "\\\\"}
21: for dkey in quote_dict.keys():
22: if v.find(dkey) >= 0:
23: v=quote_dict[dkey].join(v.split(dkey))
24: return "'%s'"%v
25:
26: def SimpleSearch(curs,query, args=None):
27: """execute sql query and return data"""
28: logging.debug("executing: "+query)
29: if psyco == 1:
30: query = query.encode("UTF-8")
31: #if args is not None:
32: # args = [ sql_quote(a) for a in args ]
33: #logging.debug(query)
34: #logging.debug(args)
35:
36: curs.execute(query, args)
37: logging.debug("sql done")
38: try:
39: return curs.fetchall()
40: except:
41: return None
42:
1.1.2.12 dwinter 43: class xml_handler(ContentHandler):
1.1.2.1 dwinter 44:
45: def __init__(self):
46: '''
47: SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table.
48: @param dsn: database connection string
49: @param table: name of the table the xml shall be imported into
50: '''
51:
52: # set up parser
53: self.result={}
54: self.event = None
1.1.2.12 dwinter 55: # self.top_dispatcher = {
56: # (saxtools.START_ELEMENT, fm_ns, u'METADATA'):
57: # self.handle_meta_fields,
58: # (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'):
59: # self.handle_data,
60: # }
1.1.2.1 dwinter 61:
62: # connect database
63:
64:
65:
66:
67: self.dbIDs = {}
68: self.rowcnt = 0
69:
1.1.2.12 dwinter 70: self.currentName=None
1.1.2.1 dwinter 71:
72: self.newDataset = []
73: self.conflicts = []
74: self.ok = []
75: self.fieldNames=[]
1.1.2.12 dwinter 76: self.currentRow={}
77: self.currentTag=""
1.1.2.1 dwinter 78: return
79:
1.1.2.12 dwinter 80: def startElement(self, name, attrs):
81: logging.debug(name)
82: if (name.lower() == "field") :
83: self.handle_meta_fields(attrs)
84: if (name.lower() == "row") :
85: logging.debug("handleROW")
86: self.currentRow={} # new Row
87: self.currentData=0
88:
89:
90: if (name.lower()=="data"):
91:
92: self.currentName=self.fieldNames[self.currentData]
93: self.currentData+=1
94: self.currentTag="data"
1.1.2.14! dwinter 95: logging.debug("currentData"+str(self.currentData))
! 96: logging.debug("currentName"+str(self.currentName))
! 97: self.currentRow[self.currentName]="" #anlegen des eintrages
1.1.2.12 dwinter 98:
99: def endElement(self,name):
100: if (name.lower() == "data") :
101: self.currentTag=""
102: if (name.lower() == "row"):
103: self.handle_end_row()
104:
105: def characters(self,content):
106:
107: if self.currentName is not None:
108: logging.debug(self.currentName+" "+content)
109: self.currentRow[self.currentName]=content;
110:
111:
112: def handle_end_row(self):
113:
114: logging.debug("edd ROW")
115:
116: if self.result.has_key(self.currentRow['key']):
117: logging.error("Key %s not unique"%self.currentRow['key'])
118:
1.1.2.14! dwinter 119: logging.debug("currentrow:"+self.currentName)
! 120: logging.debug("currentname:"+self.currentRow['key'])
! 121:
1.1.2.12 dwinter 122: if self.currentName is not None:
123: self.result[self.currentRow['key']]=self.currentRow.copy()
124: #
125: #
126: # return
127:
128: def handle_meta_fields(self,attrs):
129:
1.1.2.1 dwinter 130: #First round through the generator corresponds to the
131: #start element event
1.1.2.12 dwinter 132: logging.debug("START -FIELD")
133: name = attrs.get('NAME')
134: name=name.replace(" ","_")# make sure no spaces
135: self.fieldNames.append(name)
1.1.2.1 dwinter 136:
137: self.update_fields = self.fieldNames
138:
139: logging.debug("xml-fieldnames:"+repr(self.fieldNames))
140: # get list of fields in db table
141:
142: #print "upQ: ", self.updQuery
143: #print "adQ: ", self.addQuery
144:
145: return
146:
1.1.2.12 dwinter 147: # def handle_meta_field(self, end_condition):
148: # name = self.params.get((None, u'NAME'))
149: # yield None
150: # #Element closed. Wrap up
151: # name=name.replace(" ","_")# make sure no spaces
152: # self.fieldNames.append(name)
153: # logging.debug("FIELD name: "+name)
154: # return
155:
156: # def handle_data(self, end_condition):
157: # dispatcher = {
158: # (saxtools.START_ELEMENT, fm_ns, u'ROW'):
159: # self.handle_row,
160: # }
161: # #First round through the generator corresponds to the
162: # #start element event
163: # logging.debug("START RESULTSET")
164: # self.rowcnt = 0
165: # yield None
166: #
167: # #delegate is a generator that handles all the events "within"
168: # #this element
169: # delegate = None
170: # while not self.event == end_condition:
171: # delegate = saxtools.tenorsax.event_loop_body(
172: # dispatcher, delegate, self.event)
173: # yield None
174: #
175: # #Element closed. Wrap up
176: # logging.debug("END RESULTSET")
177: #
178: #
179: #
180: # return
181:
182: # def handle_row(self, end_condition):
183: # dispatcher = {
184: # (saxtools.START_ELEMENT, fm_ns, u'COL'):
185: # self.handle_col,
186: # }
187: # logging.debug("START ROW")
188: # self.dataSet = {}
189: # self.colIdx = 0
190: # yield None
191: #
192: # #delegate is a generator that handles all the events "within"
193: # #this element
194: # delegate = None
195: # while not self.event == end_condition:
196: # delegate = saxtools.tenorsax.event_loop_body(
197: # dispatcher, delegate, self.event)
198: # yield None
199: #
200: # #Element closed. Wrap up
201: # logging.debug("END ROW")
202: # self.rowcnt += 1
203: # # process collected row data
204: # update=False
205: # id_val=''
206: #
207: # if self.result.has_key(self.dataSet['key']):
208: # logging.error("Key %s not unique"%self.dataSet['key'])
209: #
210: # self.result[self.dataSet['key']]=self.dataSet
211: #
212: #
213: # return
214:
215: # def handle_col(self, end_condition):
216: # dispatcher = {
217: # (saxtools.START_ELEMENT, fm_ns, u'DATA'):
218: # self.handle_data_tag,
219: # }
220: # #print "START COL"
221: # yield None
222: # #delegate is a generator that handles all the events "within"
223: # #this element
224: # delegate = None
225: # while not self.event == end_condition:
226: # delegate = saxtools.tenorsax.event_loop_body(
227: # dispatcher, delegate, self.event)
228: # yield None
229: # #Element closed. Wrap up
230: # #print "END COL"
231: # self.colIdx += 1
232: # return
233: #
234: # def handle_data_tag(self, end_condition):
235: # #print "START DATA"
236: # content = u''
237: # yield None
238: # # gather child elements
239: # while not self.event == end_condition:
240: # if self.event[0] == saxtools.CHARACTER_DATA:
241: # content += self.params
242: # yield None
243: # #Element closed. Wrap up
244: # field = self.fieldNames[self.colIdx]
245: # self.dataSet[field.lower()] = content
246: # #print " DATA(", field, ") ", repr(content)
247: # return
1.1.2.1 dwinter 248:
249:
250: def checkImport(dsn,resultSet):
251: #now connect to the database
1.1.2.3 casties 252: logging.info("dsn: %s"%dsn)
1.1.2.1 dwinter 253: dbCon = psycopg.connect(dsn)
254: db = dbCon.cursor()
255:
256:
257: qstr="select key from personal_www"
258:
259: results=SimpleSearch(db,qstr)
260:
261: keys=[]
262: for x in results:
263: if x[0]:
1.1.2.8 casties 264: keys.append(unicodify(x[0]))
1.1.2.1 dwinter 265:
266:
267: #first step detect new entries and conflicts
268: new=[]
269: conflicts={}
270:
271: for x in resultSet.iterkeys():
272:
273: if x not in keys:
274:
275: new.append(x)
276:
277: else:
278:
279: conflict,ret=checkForConflicts(db,resultSet[x],x)
280: if conflict:
281: conflicts[x]=ret
282:
283: return new,conflicts
284:
285: def importFMPXML(filename):
286: '''
287: method to import FileMaker XML file (FMPXMLRESULT format) into the table.
288: @param filename: xmlfile filename
289:
290: '''
291:
292: parser = sax.make_parser()
293: #The "consumer" is our own handler
294: consumer = xml_handler()
295: #Initialize Tenorsax with handler
1.1.2.12 dwinter 296: #handler = saxtools.tenorsax(consumer)
1.1.2.1 dwinter 297: #Resulting tenorsax instance is the SAX handler
1.1.2.12 dwinter 298: parser.setContentHandler(consumer)
299: #parser.setFeature(sax.handler.feature_namespaces, 1)
1.1.2.1 dwinter 300: parser.parse(filename)
301: resultSet=consumer.result # xml now transformed into an dictionary
302:
303: return resultSet
304:
305:
306:
307: def checkForConflicts(cursor,dataSet,key):
308:
309: ret=[]
310: fields=",".join(checkFields)
311:
312: qstr="select %s from personal_www where key='%s'"%(fields,key)
313:
314:
315: sr=SimpleSearch(cursor,qstr)
316:
317: if not sr:
318: return True, None
319:
320: i=0
321: retValue=False
322:
323: for checkField in checkFields:
324: dbValueR=sr[0][i]
325: if dbValueR:
1.1.2.10 dwinter 326: dbValue=unicodify(dbValueR)
1.1.2.1 dwinter 327: else:
328: dbValue=""
329:
1.1.2.7 casties 330: if checkField in dataSet:
331: setValue=dataSet[checkField]
332: logging.debug( " %s %s %s %s"%(repr(key),checkField,repr(dbValue),repr(setValue)))
333: if dbValue.strip().rstrip()!=setValue.lstrip().rstrip():
334: ret.append((checkField,dbValue,setValue))
335: retValue=True
336:
337: else:
338: logging.warning("unknown field %s in data file!"%checkField)
339:
1.1.2.1 dwinter 340: i+=1
341:
342: return retValue,ret
343:
344:
345: ##
346: ## public static int main()
347: ##
348:
349: if __name__ == "__main__":
350:
351:
352:
353: loglevel = logging.DEBUG
354:
355:
356: logging.basicConfig(level=loglevel,
357: format='%(asctime)s %(levelname)s %(message)s',
358: datefmt='%H:%M:%S')
359:
360: resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml")
1.1.2.12 dwinter 361: news,conflicts=checkImport(dsn="dbname=personalwww user=www password=e1nste1n", resultSet=resultSet)
1.1.2.1 dwinter 362:
363:
364: print "new"
365: print len(news),news
366: print "-----------"
367: print "conflicts"
368: print conflicts
369:
370: # update_fields = None
371: #
372: # if options.update_fields:
373: # update_fields = [string.strip(s) for s in options.update_fields.split(',')]
374: #
375: # parser = sax.make_parser()
376: # #The "consumer" is our own handler
377: # consumer = xml_handler(dsn=options.dsn,table=options.table,
378: # update_fields=update_fields,id_field=options.id_field,
379: # sync_mode=options.sync_mode)
380: # #Initialize Tenorsax with handler
381: # handler = saxtools.tenorsax(consumer)
382: # #Resulting tenorsax instance is the SAX handler
383: # parser.setContentHandler(handler)
384: # parser.setFeature(sax.handler.feature_namespaces, 1)
385: # parser.parse(options.filename)
386: #
387: #
388: # print "DONE!"
389:
1.1.2.10 dwinter 390:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>