--- MPIWGWeb/Attic/updatePersonalWWW.py 2007/04/18 10:03:13 1.1 +++ MPIWGWeb/Attic/updatePersonalWWW.py 2011/05/18 12:36:06 1.1.2.10 @@ -0,0 +1,356 @@ +try: + import psycopg2 as psycopg + psyco = 2 +except: + import psycopg + psyco = 1 + +import logging +from MPIWGHelper import unicodify, utf8ify + +from xml import sax +from amara import saxtools + +# namespace for FileMaker8 +fm_ns = 'http://www.filemaker.com/fmpxmlresult' + +# list of fields that are taken from XML and checked against DB as conflicts +#checkFields=['key','first_name','last_name','title','home_inst','current_work','e_mail2'] +checkFields=['key','first_name','last_name','title','titles_new','home_inst','current_work'] + + +def sql_quote(v): + # quote dictionary + quote_dict = {"\'": "''", "\\": "\\\\"} + for dkey in quote_dict.keys(): + if v.find(dkey) >= 0: + v=quote_dict[dkey].join(v.split(dkey)) + return "'%s'"%v + +def SimpleSearch(curs,query, args=None): + """execute sql query and return data""" + logging.debug("executing: "+query) + if psyco == 1: + query = query.encode("UTF-8") + #if args is not None: + # args = [ sql_quote(a) for a in args ] + #logging.debug(query) + #logging.debug(args) + + curs.execute(query, args) + logging.debug("sql done") + try: + return curs.fetchall() + except: + return None + +class xml_handler: + + def __init__(self): + ''' + SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table. + @param dsn: database connection string + @param table: name of the table the xml shall be imported into + ''' + + # set up parser + self.result={} + self.event = None + self.top_dispatcher = { + (saxtools.START_ELEMENT, fm_ns, u'METADATA'): + self.handle_meta_fields, + (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'): + self.handle_data, + } + + # connect database + + + + + self.dbIDs = {} + self.rowcnt = 0 + + + + self.newDataset = [] + self.conflicts = [] + self.ok = [] + self.fieldNames=[] + return + + def handle_meta_fields(self, end_condition): + dispatcher = { + (saxtools.START_ELEMENT, fm_ns, u'FIELD'): + self.handle_meta_field, + } + #First round through the generator corresponds to the + #start element event + logging.debug("START METADATA") + yield None + + #delegate is a generator that handles all the events "within" + #this element + delegate = None + while not self.event == end_condition: + delegate = saxtools.tenorsax.event_loop_body( + dispatcher, delegate, self.event) + yield None + + #Element closed. Wrap up + logging.debug("END METADATA") + + self.update_fields = self.fieldNames + + logging.debug("xml-fieldnames:"+repr(self.fieldNames)) + # get list of fields in db table + + #print "upQ: ", self.updQuery + #print "adQ: ", self.addQuery + + return + + def handle_meta_field(self, end_condition): + name = self.params.get((None, u'NAME')) + yield None + #Element closed. Wrap up + name=name.replace(" ","_")# make sure no spaces + self.fieldNames.append(name) + logging.debug("FIELD name: "+name) + return + + def handle_data(self, end_condition): + dispatcher = { + (saxtools.START_ELEMENT, fm_ns, u'ROW'): + self.handle_row, + } + #First round through the generator corresponds to the + #start element event + logging.debug("START RESULTSET") + self.rowcnt = 0 + yield None + + #delegate is a generator that handles all the events "within" + #this element + delegate = None + while not self.event == end_condition: + delegate = saxtools.tenorsax.event_loop_body( + dispatcher, delegate, self.event) + yield None + + #Element closed. Wrap up + logging.debug("END RESULTSET") + + + + return + + def handle_row(self, end_condition): + dispatcher = { + (saxtools.START_ELEMENT, fm_ns, u'COL'): + self.handle_col, + } + logging.debug("START ROW") + self.dataSet = {} + self.colIdx = 0 + yield None + + #delegate is a generator that handles all the events "within" + #this element + delegate = None + while not self.event == end_condition: + delegate = saxtools.tenorsax.event_loop_body( + dispatcher, delegate, self.event) + yield None + + #Element closed. Wrap up + logging.debug("END ROW") + self.rowcnt += 1 + # process collected row data + update=False + id_val='' + + if self.result.has_key(self.dataSet['key']): + logging.error("Key %s not unique"%self.dataSet['key']) + + self.result[self.dataSet['key']]=self.dataSet + + + return + + def handle_col(self, end_condition): + dispatcher = { + (saxtools.START_ELEMENT, fm_ns, u'DATA'): + self.handle_data_tag, + } + #print "START COL" + yield None + #delegate is a generator that handles all the events "within" + #this element + delegate = None + while not self.event == end_condition: + delegate = saxtools.tenorsax.event_loop_body( + dispatcher, delegate, self.event) + yield None + #Element closed. Wrap up + #print "END COL" + self.colIdx += 1 + return + + def handle_data_tag(self, end_condition): + #print "START DATA" + content = u'' + yield None + # gather child elements + while not self.event == end_condition: + if self.event[0] == saxtools.CHARACTER_DATA: + content += self.params + yield None + #Element closed. Wrap up + field = self.fieldNames[self.colIdx] + self.dataSet[field.lower()] = content + #print " DATA(", field, ") ", repr(content) + return + + +def checkImport(dsn,resultSet): + #now connect to the database + logging.info("dsn: %s"%dsn) + dbCon = psycopg.connect(dsn) + db = dbCon.cursor() + + + qstr="select key from personal_www" + + results=SimpleSearch(db,qstr) + + keys=[] + for x in results: + if x[0]: + keys.append(unicodify(x[0])) + + + #first step detect new entries and conflicts + new=[] + conflicts={} + + for x in resultSet.iterkeys(): + + if x not in keys: + + new.append(x) + + else: + + conflict,ret=checkForConflicts(db,resultSet[x],x) + if conflict: + conflicts[x]=ret + + return new,conflicts + +def importFMPXML(filename): + ''' + method to import FileMaker XML file (FMPXMLRESULT format) into the table. + @param filename: xmlfile filename + + ''' + + parser = sax.make_parser() + #The "consumer" is our own handler + consumer = xml_handler() + #Initialize Tenorsax with handler + handler = saxtools.tenorsax(consumer) + #Resulting tenorsax instance is the SAX handler + parser.setContentHandler(handler) + parser.setFeature(sax.handler.feature_namespaces, 1) + parser.parse(filename) + resultSet=consumer.result # xml now transformed into an dictionary + + return resultSet + + + +def checkForConflicts(cursor,dataSet,key): + + ret=[] + fields=",".join(checkFields) + + qstr="select %s from personal_www where key='%s'"%(fields,key) + + + sr=SimpleSearch(cursor,qstr) + + if not sr: + return True, None + + i=0 + retValue=False + + for checkField in checkFields: + dbValueR=sr[0][i] + if dbValueR: + dbValue=unicodify(dbValueR) + else: + dbValue="" + + if checkField in dataSet: + setValue=dataSet[checkField] + logging.debug( " %s %s %s %s"%(repr(key),checkField,repr(dbValue),repr(setValue))) + if dbValue.strip().rstrip()!=setValue.lstrip().rstrip(): + ret.append((checkField,dbValue,setValue)) + retValue=True + + else: + logging.warning("unknown field %s in data file!"%checkField) + + i+=1 + + return retValue,ret + + +## +## public static int main() +## + +if __name__ == "__main__": + + + + loglevel = logging.DEBUG + + + logging.basicConfig(level=loglevel, + format='%(asctime)s %(levelname)s %(message)s', + datefmt='%H:%M:%S') + + resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml") + news,conflicts=checkImport(dsn="dbname=personalwww host=xserve02a user=mysql password=e1nste1n", resultSet=resultSet) + + + print "new" + print len(news),news + print "-----------" + print "conflicts" + print conflicts + +# update_fields = None +# +# if options.update_fields: +# update_fields = [string.strip(s) for s in options.update_fields.split(',')] +# +# parser = sax.make_parser() +# #The "consumer" is our own handler +# consumer = xml_handler(dsn=options.dsn,table=options.table, +# update_fields=update_fields,id_field=options.id_field, +# sync_mode=options.sync_mode) +# #Initialize Tenorsax with handler +# handler = saxtools.tenorsax(consumer) +# #Resulting tenorsax instance is the SAX handler +# parser.setContentHandler(handler) +# parser.setFeature(sax.handler.feature_namespaces, 1) +# parser.parse(options.filename) +# +# +# print "DONE!" + +