Annotation of MPIWGWeb/updatePersonalWWW.py, revision 1.1.2.12
1.1.2.11 dwinter 1: import psycopg2 as psycopg
2: psyco = 2
1.1.2.1 dwinter 3:
4: import logging
1.1.2.8 casties 5: from MPIWGHelper import unicodify, utf8ify
1.1.2.1 dwinter 6:
7: from xml import sax
1.1.2.12! dwinter 8: from xml.sax.handler import ContentHandler
! 9: #from amara import saxtools
1.1.2.1 dwinter 10:
1.1.2.4 casties 11: # namespace for FileMaker8
1.1.2.1 dwinter 12: fm_ns = 'http://www.filemaker.com/fmpxmlresult'
1.1.2.4 casties 13:
14: # list of fields that are taken from XML and checked against DB as conflicts
1.1.2.1 dwinter 15: #checkFields=['key','first_name','last_name','title','home_inst','current_work','e_mail2']
1.1.2.10 dwinter 16: checkFields=['key','first_name','last_name','title','titles_new','home_inst','current_work']
17:
1.1.2.1 dwinter 18:
19: def sql_quote(v):
20: # quote dictionary
21: quote_dict = {"\'": "''", "\\": "\\\\"}
22: for dkey in quote_dict.keys():
23: if v.find(dkey) >= 0:
24: v=quote_dict[dkey].join(v.split(dkey))
25: return "'%s'"%v
26:
27: def SimpleSearch(curs,query, args=None):
28: """execute sql query and return data"""
29: logging.debug("executing: "+query)
30: if psyco == 1:
31: query = query.encode("UTF-8")
32: #if args is not None:
33: # args = [ sql_quote(a) for a in args ]
34: #logging.debug(query)
35: #logging.debug(args)
36:
37: curs.execute(query, args)
38: logging.debug("sql done")
39: try:
40: return curs.fetchall()
41: except:
42: return None
43:
1.1.2.12! dwinter 44: class xml_handler(ContentHandler):
1.1.2.1 dwinter 45:
46: def __init__(self):
47: '''
48: SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table.
49: @param dsn: database connection string
50: @param table: name of the table the xml shall be imported into
51: '''
52:
53: # set up parser
54: self.result={}
55: self.event = None
1.1.2.12! dwinter 56: # self.top_dispatcher = {
! 57: # (saxtools.START_ELEMENT, fm_ns, u'METADATA'):
! 58: # self.handle_meta_fields,
! 59: # (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'):
! 60: # self.handle_data,
! 61: # }
1.1.2.1 dwinter 62:
63: # connect database
64:
65:
66:
67:
68: self.dbIDs = {}
69: self.rowcnt = 0
70:
1.1.2.12! dwinter 71: self.currentName=None
1.1.2.1 dwinter 72:
73: self.newDataset = []
74: self.conflicts = []
75: self.ok = []
76: self.fieldNames=[]
1.1.2.12! dwinter 77: self.currentRow={}
! 78: self.currentTag=""
1.1.2.1 dwinter 79: return
80:
1.1.2.12! dwinter 81: def startElement(self, name, attrs):
! 82: logging.debug(name)
! 83: if (name.lower() == "field") :
! 84: self.handle_meta_fields(attrs)
! 85: if (name.lower() == "row") :
! 86: logging.debug("handleROW")
! 87: self.currentRow={} # new Row
! 88: self.currentData=0
! 89:
! 90:
! 91: if (name.lower()=="data"):
! 92:
! 93: self.currentName=self.fieldNames[self.currentData]
! 94: self.currentData+=1
! 95: self.currentTag="data"
! 96:
! 97: def endElement(self,name):
! 98: if (name.lower() == "data") :
! 99: self.currentTag=""
! 100: if (name.lower() == "row"):
! 101: self.handle_end_row()
! 102:
! 103: def characters(self,content):
! 104:
! 105: if self.currentName is not None:
! 106: logging.debug(self.currentName+" "+content)
! 107: self.currentRow[self.currentName]=content;
! 108:
! 109:
! 110: def handle_end_row(self):
! 111:
! 112: logging.debug("edd ROW")
! 113:
! 114: if self.result.has_key(self.currentRow['key']):
! 115: logging.error("Key %s not unique"%self.currentRow['key'])
! 116:
! 117: if self.currentName is not None:
! 118: self.result[self.currentRow['key']]=self.currentRow.copy()
! 119: #
! 120: #
! 121: # return
! 122:
! 123: def handle_meta_fields(self,attrs):
! 124:
1.1.2.1 dwinter 125: #First round through the generator corresponds to the
126: #start element event
1.1.2.12! dwinter 127: logging.debug("START -FIELD")
! 128: name = attrs.get('NAME')
! 129: name=name.replace(" ","_")# make sure no spaces
! 130: self.fieldNames.append(name)
1.1.2.1 dwinter 131:
132: self.update_fields = self.fieldNames
133:
134: logging.debug("xml-fieldnames:"+repr(self.fieldNames))
135: # get list of fields in db table
136:
137: #print "upQ: ", self.updQuery
138: #print "adQ: ", self.addQuery
139:
140: return
141:
1.1.2.12! dwinter 142: # def handle_meta_field(self, end_condition):
! 143: # name = self.params.get((None, u'NAME'))
! 144: # yield None
! 145: # #Element closed. Wrap up
! 146: # name=name.replace(" ","_")# make sure no spaces
! 147: # self.fieldNames.append(name)
! 148: # logging.debug("FIELD name: "+name)
! 149: # return
! 150:
! 151: # def handle_data(self, end_condition):
! 152: # dispatcher = {
! 153: # (saxtools.START_ELEMENT, fm_ns, u'ROW'):
! 154: # self.handle_row,
! 155: # }
! 156: # #First round through the generator corresponds to the
! 157: # #start element event
! 158: # logging.debug("START RESULTSET")
! 159: # self.rowcnt = 0
! 160: # yield None
! 161: #
! 162: # #delegate is a generator that handles all the events "within"
! 163: # #this element
! 164: # delegate = None
! 165: # while not self.event == end_condition:
! 166: # delegate = saxtools.tenorsax.event_loop_body(
! 167: # dispatcher, delegate, self.event)
! 168: # yield None
! 169: #
! 170: # #Element closed. Wrap up
! 171: # logging.debug("END RESULTSET")
! 172: #
! 173: #
! 174: #
! 175: # return
! 176:
! 177: # def handle_row(self, end_condition):
! 178: # dispatcher = {
! 179: # (saxtools.START_ELEMENT, fm_ns, u'COL'):
! 180: # self.handle_col,
! 181: # }
! 182: # logging.debug("START ROW")
! 183: # self.dataSet = {}
! 184: # self.colIdx = 0
! 185: # yield None
! 186: #
! 187: # #delegate is a generator that handles all the events "within"
! 188: # #this element
! 189: # delegate = None
! 190: # while not self.event == end_condition:
! 191: # delegate = saxtools.tenorsax.event_loop_body(
! 192: # dispatcher, delegate, self.event)
! 193: # yield None
! 194: #
! 195: # #Element closed. Wrap up
! 196: # logging.debug("END ROW")
! 197: # self.rowcnt += 1
! 198: # # process collected row data
! 199: # update=False
! 200: # id_val=''
! 201: #
! 202: # if self.result.has_key(self.dataSet['key']):
! 203: # logging.error("Key %s not unique"%self.dataSet['key'])
! 204: #
! 205: # self.result[self.dataSet['key']]=self.dataSet
! 206: #
! 207: #
! 208: # return
! 209:
! 210: # def handle_col(self, end_condition):
! 211: # dispatcher = {
! 212: # (saxtools.START_ELEMENT, fm_ns, u'DATA'):
! 213: # self.handle_data_tag,
! 214: # }
! 215: # #print "START COL"
! 216: # yield None
! 217: # #delegate is a generator that handles all the events "within"
! 218: # #this element
! 219: # delegate = None
! 220: # while not self.event == end_condition:
! 221: # delegate = saxtools.tenorsax.event_loop_body(
! 222: # dispatcher, delegate, self.event)
! 223: # yield None
! 224: # #Element closed. Wrap up
! 225: # #print "END COL"
! 226: # self.colIdx += 1
! 227: # return
! 228: #
! 229: # def handle_data_tag(self, end_condition):
! 230: # #print "START DATA"
! 231: # content = u''
! 232: # yield None
! 233: # # gather child elements
! 234: # while not self.event == end_condition:
! 235: # if self.event[0] == saxtools.CHARACTER_DATA:
! 236: # content += self.params
! 237: # yield None
! 238: # #Element closed. Wrap up
! 239: # field = self.fieldNames[self.colIdx]
! 240: # self.dataSet[field.lower()] = content
! 241: # #print " DATA(", field, ") ", repr(content)
! 242: # return
1.1.2.1 dwinter 243:
244:
245: def checkImport(dsn,resultSet):
246: #now connect to the database
1.1.2.3 casties 247: logging.info("dsn: %s"%dsn)
1.1.2.1 dwinter 248: dbCon = psycopg.connect(dsn)
249: db = dbCon.cursor()
250:
251:
252: qstr="select key from personal_www"
253:
254: results=SimpleSearch(db,qstr)
255:
256: keys=[]
257: for x in results:
258: if x[0]:
1.1.2.8 casties 259: keys.append(unicodify(x[0]))
1.1.2.1 dwinter 260:
261:
262: #first step detect new entries and conflicts
263: new=[]
264: conflicts={}
265:
266: for x in resultSet.iterkeys():
267:
268: if x not in keys:
269:
270: new.append(x)
271:
272: else:
273:
274: conflict,ret=checkForConflicts(db,resultSet[x],x)
275: if conflict:
276: conflicts[x]=ret
277:
278: return new,conflicts
279:
280: def importFMPXML(filename):
281: '''
282: method to import FileMaker XML file (FMPXMLRESULT format) into the table.
283: @param filename: xmlfile filename
284:
285: '''
286:
287: parser = sax.make_parser()
288: #The "consumer" is our own handler
289: consumer = xml_handler()
290: #Initialize Tenorsax with handler
1.1.2.12! dwinter 291: #handler = saxtools.tenorsax(consumer)
1.1.2.1 dwinter 292: #Resulting tenorsax instance is the SAX handler
1.1.2.12! dwinter 293: parser.setContentHandler(consumer)
! 294: #parser.setFeature(sax.handler.feature_namespaces, 1)
1.1.2.1 dwinter 295: parser.parse(filename)
296: resultSet=consumer.result # xml now transformed into an dictionary
297:
298: return resultSet
299:
300:
301:
302: def checkForConflicts(cursor,dataSet,key):
303:
304: ret=[]
305: fields=",".join(checkFields)
306:
307: qstr="select %s from personal_www where key='%s'"%(fields,key)
308:
309:
310: sr=SimpleSearch(cursor,qstr)
311:
312: if not sr:
313: return True, None
314:
315: i=0
316: retValue=False
317:
318: for checkField in checkFields:
319: dbValueR=sr[0][i]
320: if dbValueR:
1.1.2.10 dwinter 321: dbValue=unicodify(dbValueR)
1.1.2.1 dwinter 322: else:
323: dbValue=""
324:
1.1.2.7 casties 325: if checkField in dataSet:
326: setValue=dataSet[checkField]
327: logging.debug( " %s %s %s %s"%(repr(key),checkField,repr(dbValue),repr(setValue)))
328: if dbValue.strip().rstrip()!=setValue.lstrip().rstrip():
329: ret.append((checkField,dbValue,setValue))
330: retValue=True
331:
332: else:
333: logging.warning("unknown field %s in data file!"%checkField)
334:
1.1.2.1 dwinter 335: i+=1
336:
337: return retValue,ret
338:
339:
340: ##
341: ## public static int main()
342: ##
343:
344: if __name__ == "__main__":
345:
346:
347:
348: loglevel = logging.DEBUG
349:
350:
351: logging.basicConfig(level=loglevel,
352: format='%(asctime)s %(levelname)s %(message)s',
353: datefmt='%H:%M:%S')
354:
355: resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml")
1.1.2.12! dwinter 356: news,conflicts=checkImport(dsn="dbname=personalwww user=www password=e1nste1n", resultSet=resultSet)
1.1.2.1 dwinter 357:
358:
359: print "new"
360: print len(news),news
361: print "-----------"
362: print "conflicts"
363: print conflicts
364:
365: # update_fields = None
366: #
367: # if options.update_fields:
368: # update_fields = [string.strip(s) for s in options.update_fields.split(',')]
369: #
370: # parser = sax.make_parser()
371: # #The "consumer" is our own handler
372: # consumer = xml_handler(dsn=options.dsn,table=options.table,
373: # update_fields=update_fields,id_field=options.id_field,
374: # sync_mode=options.sync_mode)
375: # #Initialize Tenorsax with handler
376: # handler = saxtools.tenorsax(consumer)
377: # #Resulting tenorsax instance is the SAX handler
378: # parser.setContentHandler(handler)
379: # parser.setFeature(sax.handler.feature_namespaces, 1)
380: # parser.parse(options.filename)
381: #
382: #
383: # print "DONE!"
384:
1.1.2.10 dwinter 385:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>