Annotation of MPIWGWeb/updatePersonalWWW.py, revision 1.1.2.15
1.1.2.11 dwinter 1: import psycopg2 as psycopg
2: psyco = 2
1.1.2.1 dwinter 3:
4: import logging
1.1.2.8 casties 5: from MPIWGHelper import unicodify, utf8ify
1.1.2.1 dwinter 6:
7: from xml import sax
1.1.2.12 dwinter 8: from xml.sax.handler import ContentHandler
1.1.2.1 dwinter 9:
1.1.2.4 casties 10: # namespace for FileMaker8
1.1.2.1 dwinter 11: fm_ns = 'http://www.filemaker.com/fmpxmlresult'
1.1.2.4 casties 12:
13: # list of fields that are taken from XML and checked against DB as conflicts
1.1.2.1 dwinter 14: #checkFields=['key','first_name','last_name','title','home_inst','current_work','e_mail2']
1.1.2.10 dwinter 15: checkFields=['key','first_name','last_name','title','titles_new','home_inst','current_work']
16:
1.1.2.1 dwinter 17:
18: def sql_quote(v):
19: # quote dictionary
20: quote_dict = {"\'": "''", "\\": "\\\\"}
21: for dkey in quote_dict.keys():
22: if v.find(dkey) >= 0:
23: v=quote_dict[dkey].join(v.split(dkey))
24: return "'%s'"%v
25:
26: def SimpleSearch(curs,query, args=None):
27: """execute sql query and return data"""
28: logging.debug("executing: "+query)
29: if psyco == 1:
30: query = query.encode("UTF-8")
31: #if args is not None:
32: # args = [ sql_quote(a) for a in args ]
33: #logging.debug(query)
34: #logging.debug(args)
35:
36: curs.execute(query, args)
37: logging.debug("sql done")
38: try:
39: return curs.fetchall()
40: except:
41: return None
42:
1.1.2.12 dwinter 43: class xml_handler(ContentHandler):
1.1.2.1 dwinter 44:
45: def __init__(self):
46: '''
47: SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table.
48: @param dsn: database connection string
49: @param table: name of the table the xml shall be imported into
50: '''
51:
52: # set up parser
53: self.result={}
54: self.event = None
1.1.2.12 dwinter 55: # self.top_dispatcher = {
56: # (saxtools.START_ELEMENT, fm_ns, u'METADATA'):
57: # self.handle_meta_fields,
58: # (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'):
59: # self.handle_data,
60: # }
1.1.2.1 dwinter 61:
62: # connect database
63:
64:
65:
66:
67: self.dbIDs = {}
68: self.rowcnt = 0
69:
1.1.2.12 dwinter 70: self.currentName=None
1.1.2.1 dwinter 71:
72: self.newDataset = []
73: self.conflicts = []
74: self.ok = []
75: self.fieldNames=[]
1.1.2.12 dwinter 76: self.currentRow={}
77: self.currentTag=""
1.1.2.1 dwinter 78: return
79:
1.1.2.12 dwinter 80: def startElement(self, name, attrs):
81: logging.debug(name)
82: if (name.lower() == "field") :
83: self.handle_meta_fields(attrs)
84: if (name.lower() == "row") :
85: logging.debug("handleROW")
86: self.currentRow={} # new Row
87: self.currentData=0
88:
89:
90: if (name.lower()=="data"):
91:
92: self.currentName=self.fieldNames[self.currentData]
93: self.currentData+=1
94: self.currentTag="data"
1.1.2.14 dwinter 95: logging.debug("currentData"+str(self.currentData))
96: logging.debug("currentName"+str(self.currentName))
97: self.currentRow[self.currentName]="" #anlegen des eintrages
1.1.2.12 dwinter 98:
99: def endElement(self,name):
100: if (name.lower() == "data") :
101: self.currentTag=""
102: if (name.lower() == "row"):
103: self.handle_end_row()
104:
105: def characters(self,content):
106:
107: if self.currentName is not None:
108: logging.debug(self.currentName+" "+content)
1.1.2.15! dwinter 109: contentTmp = self.currentRow.get(self.currentName,'') #gibt es schon einen Inhalt, dann dieses hinzufuegen (in einem Tag kann u.U. der characters handler mehrfach aufgerufen werden.)
! 110: self.currentRow[self.currentName]=contentTmp+content;
1.1.2.12 dwinter 111:
112:
113: def handle_end_row(self):
114:
115: logging.debug("edd ROW")
116:
117: if self.result.has_key(self.currentRow['key']):
118: logging.error("Key %s not unique"%self.currentRow['key'])
119:
1.1.2.14 dwinter 120: logging.debug("currentrow:"+self.currentName)
121: logging.debug("currentname:"+self.currentRow['key'])
122:
1.1.2.12 dwinter 123: if self.currentName is not None:
124: self.result[self.currentRow['key']]=self.currentRow.copy()
125: #
126: #
127: # return
128:
129: def handle_meta_fields(self,attrs):
130:
1.1.2.1 dwinter 131: #First round through the generator corresponds to the
132: #start element event
1.1.2.12 dwinter 133: logging.debug("START -FIELD")
134: name = attrs.get('NAME')
135: name=name.replace(" ","_")# make sure no spaces
136: self.fieldNames.append(name)
1.1.2.1 dwinter 137:
138: self.update_fields = self.fieldNames
139:
140: logging.debug("xml-fieldnames:"+repr(self.fieldNames))
141: # get list of fields in db table
142:
143: #print "upQ: ", self.updQuery
144: #print "adQ: ", self.addQuery
145:
146: return
147:
1.1.2.12 dwinter 148: # def handle_meta_field(self, end_condition):
149: # name = self.params.get((None, u'NAME'))
150: # yield None
151: # #Element closed. Wrap up
152: # name=name.replace(" ","_")# make sure no spaces
153: # self.fieldNames.append(name)
154: # logging.debug("FIELD name: "+name)
155: # return
156:
157: # def handle_data(self, end_condition):
158: # dispatcher = {
159: # (saxtools.START_ELEMENT, fm_ns, u'ROW'):
160: # self.handle_row,
161: # }
162: # #First round through the generator corresponds to the
163: # #start element event
164: # logging.debug("START RESULTSET")
165: # self.rowcnt = 0
166: # yield None
167: #
168: # #delegate is a generator that handles all the events "within"
169: # #this element
170: # delegate = None
171: # while not self.event == end_condition:
172: # delegate = saxtools.tenorsax.event_loop_body(
173: # dispatcher, delegate, self.event)
174: # yield None
175: #
176: # #Element closed. Wrap up
177: # logging.debug("END RESULTSET")
178: #
179: #
180: #
181: # return
182:
183: # def handle_row(self, end_condition):
184: # dispatcher = {
185: # (saxtools.START_ELEMENT, fm_ns, u'COL'):
186: # self.handle_col,
187: # }
188: # logging.debug("START ROW")
189: # self.dataSet = {}
190: # self.colIdx = 0
191: # yield None
192: #
193: # #delegate is a generator that handles all the events "within"
194: # #this element
195: # delegate = None
196: # while not self.event == end_condition:
197: # delegate = saxtools.tenorsax.event_loop_body(
198: # dispatcher, delegate, self.event)
199: # yield None
200: #
201: # #Element closed. Wrap up
202: # logging.debug("END ROW")
203: # self.rowcnt += 1
204: # # process collected row data
205: # update=False
206: # id_val=''
207: #
208: # if self.result.has_key(self.dataSet['key']):
209: # logging.error("Key %s not unique"%self.dataSet['key'])
210: #
211: # self.result[self.dataSet['key']]=self.dataSet
212: #
213: #
214: # return
215:
216: # def handle_col(self, end_condition):
217: # dispatcher = {
218: # (saxtools.START_ELEMENT, fm_ns, u'DATA'):
219: # self.handle_data_tag,
220: # }
221: # #print "START COL"
222: # yield None
223: # #delegate is a generator that handles all the events "within"
224: # #this element
225: # delegate = None
226: # while not self.event == end_condition:
227: # delegate = saxtools.tenorsax.event_loop_body(
228: # dispatcher, delegate, self.event)
229: # yield None
230: # #Element closed. Wrap up
231: # #print "END COL"
232: # self.colIdx += 1
233: # return
234: #
235: # def handle_data_tag(self, end_condition):
236: # #print "START DATA"
237: # content = u''
238: # yield None
239: # # gather child elements
240: # while not self.event == end_condition:
241: # if self.event[0] == saxtools.CHARACTER_DATA:
242: # content += self.params
243: # yield None
244: # #Element closed. Wrap up
245: # field = self.fieldNames[self.colIdx]
246: # self.dataSet[field.lower()] = content
247: # #print " DATA(", field, ") ", repr(content)
248: # return
1.1.2.1 dwinter 249:
250:
251: def checkImport(dsn,resultSet):
252: #now connect to the database
1.1.2.3 casties 253: logging.info("dsn: %s"%dsn)
1.1.2.1 dwinter 254: dbCon = psycopg.connect(dsn)
255: db = dbCon.cursor()
256:
257:
258: qstr="select key from personal_www"
259:
260: results=SimpleSearch(db,qstr)
261:
262: keys=[]
263: for x in results:
264: if x[0]:
1.1.2.8 casties 265: keys.append(unicodify(x[0]))
1.1.2.1 dwinter 266:
267:
268: #first step detect new entries and conflicts
269: new=[]
270: conflicts={}
271:
272: for x in resultSet.iterkeys():
273:
274: if x not in keys:
275:
276: new.append(x)
277:
278: else:
279:
280: conflict,ret=checkForConflicts(db,resultSet[x],x)
281: if conflict:
282: conflicts[x]=ret
283:
284: return new,conflicts
285:
286: def importFMPXML(filename):
287: '''
288: method to import FileMaker XML file (FMPXMLRESULT format) into the table.
289: @param filename: xmlfile filename
290:
291: '''
292:
293: parser = sax.make_parser()
294: #The "consumer" is our own handler
295: consumer = xml_handler()
296: #Initialize Tenorsax with handler
1.1.2.12 dwinter 297: #handler = saxtools.tenorsax(consumer)
1.1.2.1 dwinter 298: #Resulting tenorsax instance is the SAX handler
1.1.2.12 dwinter 299: parser.setContentHandler(consumer)
300: #parser.setFeature(sax.handler.feature_namespaces, 1)
1.1.2.1 dwinter 301: parser.parse(filename)
302: resultSet=consumer.result # xml now transformed into an dictionary
303:
304: return resultSet
305:
306:
307:
308: def checkForConflicts(cursor,dataSet,key):
309:
310: ret=[]
311: fields=",".join(checkFields)
312:
313: qstr="select %s from personal_www where key='%s'"%(fields,key)
314:
315:
316: sr=SimpleSearch(cursor,qstr)
317:
318: if not sr:
319: return True, None
320:
321: i=0
322: retValue=False
323:
324: for checkField in checkFields:
325: dbValueR=sr[0][i]
326: if dbValueR:
1.1.2.10 dwinter 327: dbValue=unicodify(dbValueR)
1.1.2.1 dwinter 328: else:
329: dbValue=""
330:
1.1.2.7 casties 331: if checkField in dataSet:
332: setValue=dataSet[checkField]
333: logging.debug( " %s %s %s %s"%(repr(key),checkField,repr(dbValue),repr(setValue)))
334: if dbValue.strip().rstrip()!=setValue.lstrip().rstrip():
335: ret.append((checkField,dbValue,setValue))
336: retValue=True
337:
338: else:
339: logging.warning("unknown field %s in data file!"%checkField)
340:
1.1.2.1 dwinter 341: i+=1
342:
343: return retValue,ret
344:
345:
346: ##
347: ## public static int main()
348: ##
349:
350: if __name__ == "__main__":
351:
352:
353:
354: loglevel = logging.DEBUG
355:
356:
357: logging.basicConfig(level=loglevel,
358: format='%(asctime)s %(levelname)s %(message)s',
359: datefmt='%H:%M:%S')
360:
361: resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml")
1.1.2.12 dwinter 362: news,conflicts=checkImport(dsn="dbname=personalwww user=www password=e1nste1n", resultSet=resultSet)
1.1.2.1 dwinter 363:
364:
365: print "new"
366: print len(news),news
367: print "-----------"
368: print "conflicts"
369: print conflicts
370:
371: # update_fields = None
372: #
373: # if options.update_fields:
374: # update_fields = [string.strip(s) for s in options.update_fields.split(',')]
375: #
376: # parser = sax.make_parser()
377: # #The "consumer" is our own handler
378: # consumer = xml_handler(dsn=options.dsn,table=options.table,
379: # update_fields=update_fields,id_field=options.id_field,
380: # sync_mode=options.sync_mode)
381: # #Initialize Tenorsax with handler
382: # handler = saxtools.tenorsax(consumer)
383: # #Resulting tenorsax instance is the SAX handler
384: # parser.setContentHandler(handler)
385: # parser.setFeature(sax.handler.feature_namespaces, 1)
386: # parser.parse(options.filename)
387: #
388: #
389: # print "DONE!"
390:
1.1.2.10 dwinter 391:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>