--- MPIWGWeb/Attic/updatePersonalWWW.py 2007/04/18 10:03:13 1.1 +++ MPIWGWeb/Attic/updatePersonalWWW.py 2012/02/15 11:51:48 1.1.2.15 @@ -0,0 +1,391 @@ +import psycopg2 as psycopg +psyco = 2 + +import logging +from MPIWGHelper import unicodify, utf8ify + +from xml import sax +from xml.sax.handler import ContentHandler + +# namespace for FileMaker8 +fm_ns = 'http://www.filemaker.com/fmpxmlresult' + +# list of fields that are taken from XML and checked against DB as conflicts +#checkFields=['key','first_name','last_name','title','home_inst','current_work','e_mail2'] +checkFields=['key','first_name','last_name','title','titles_new','home_inst','current_work'] + + +def sql_quote(v): + # quote dictionary + quote_dict = {"\'": "''", "\\": "\\\\"} + for dkey in quote_dict.keys(): + if v.find(dkey) >= 0: + v=quote_dict[dkey].join(v.split(dkey)) + return "'%s'"%v + +def SimpleSearch(curs,query, args=None): + """execute sql query and return data""" + logging.debug("executing: "+query) + if psyco == 1: + query = query.encode("UTF-8") + #if args is not None: + # args = [ sql_quote(a) for a in args ] + #logging.debug(query) + #logging.debug(args) + + curs.execute(query, args) + logging.debug("sql done") + try: + return curs.fetchall() + except: + return None + +class xml_handler(ContentHandler): + + def __init__(self): + ''' + SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table. + @param dsn: database connection string + @param table: name of the table the xml shall be imported into + ''' + + # set up parser + self.result={} + self.event = None +# self.top_dispatcher = { +# (saxtools.START_ELEMENT, fm_ns, u'METADATA'): +# self.handle_meta_fields, +# (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'): +# self.handle_data, +# } + + # connect database + + + + + self.dbIDs = {} + self.rowcnt = 0 + + self.currentName=None + + self.newDataset = [] + self.conflicts = [] + self.ok = [] + self.fieldNames=[] + self.currentRow={} + self.currentTag="" + return + + def startElement(self, name, attrs): + logging.debug(name) + if (name.lower() == "field") : + self.handle_meta_fields(attrs) + if (name.lower() == "row") : + logging.debug("handleROW") + self.currentRow={} # new Row + self.currentData=0 + + + if (name.lower()=="data"): + + self.currentName=self.fieldNames[self.currentData] + self.currentData+=1 + self.currentTag="data" + logging.debug("currentData"+str(self.currentData)) + logging.debug("currentName"+str(self.currentName)) + self.currentRow[self.currentName]="" #anlegen des eintrages + + def endElement(self,name): + if (name.lower() == "data") : + self.currentTag="" + if (name.lower() == "row"): + self.handle_end_row() + + def characters(self,content): + + if self.currentName is not None: + logging.debug(self.currentName+" "+content) + contentTmp = self.currentRow.get(self.currentName,'') #gibt es schon einen Inhalt, dann dieses hinzufuegen (in einem Tag kann u.U. der characters handler mehrfach aufgerufen werden.) + self.currentRow[self.currentName]=contentTmp+content; + + + def handle_end_row(self): + + logging.debug("edd ROW") + + if self.result.has_key(self.currentRow['key']): + logging.error("Key %s not unique"%self.currentRow['key']) + + logging.debug("currentrow:"+self.currentName) + logging.debug("currentname:"+self.currentRow['key']) + + if self.currentName is not None: + self.result[self.currentRow['key']]=self.currentRow.copy() +# +# +# return + + def handle_meta_fields(self,attrs): + + #First round through the generator corresponds to the + #start element event + logging.debug("START -FIELD") + name = attrs.get('NAME') + name=name.replace(" ","_")# make sure no spaces + self.fieldNames.append(name) + + self.update_fields = self.fieldNames + + logging.debug("xml-fieldnames:"+repr(self.fieldNames)) + # get list of fields in db table + + #print "upQ: ", self.updQuery + #print "adQ: ", self.addQuery + + return + +# def handle_meta_field(self, end_condition): +# name = self.params.get((None, u'NAME')) +# yield None +# #Element closed. Wrap up +# name=name.replace(" ","_")# make sure no spaces +# self.fieldNames.append(name) +# logging.debug("FIELD name: "+name) +# return + +# def handle_data(self, end_condition): +# dispatcher = { +# (saxtools.START_ELEMENT, fm_ns, u'ROW'): +# self.handle_row, +# } +# #First round through the generator corresponds to the +# #start element event +# logging.debug("START RESULTSET") +# self.rowcnt = 0 +# yield None +# +# #delegate is a generator that handles all the events "within" +# #this element +# delegate = None +# while not self.event == end_condition: +# delegate = saxtools.tenorsax.event_loop_body( +# dispatcher, delegate, self.event) +# yield None +# +# #Element closed. Wrap up +# logging.debug("END RESULTSET") +# +# +# +# return + +# def handle_row(self, end_condition): +# dispatcher = { +# (saxtools.START_ELEMENT, fm_ns, u'COL'): +# self.handle_col, +# } +# logging.debug("START ROW") +# self.dataSet = {} +# self.colIdx = 0 +# yield None +# +# #delegate is a generator that handles all the events "within" +# #this element +# delegate = None +# while not self.event == end_condition: +# delegate = saxtools.tenorsax.event_loop_body( +# dispatcher, delegate, self.event) +# yield None +# +# #Element closed. Wrap up +# logging.debug("END ROW") +# self.rowcnt += 1 +# # process collected row data +# update=False +# id_val='' +# +# if self.result.has_key(self.dataSet['key']): +# logging.error("Key %s not unique"%self.dataSet['key']) +# +# self.result[self.dataSet['key']]=self.dataSet +# +# +# return + +# def handle_col(self, end_condition): +# dispatcher = { +# (saxtools.START_ELEMENT, fm_ns, u'DATA'): +# self.handle_data_tag, +# } +# #print "START COL" +# yield None +# #delegate is a generator that handles all the events "within" +# #this element +# delegate = None +# while not self.event == end_condition: +# delegate = saxtools.tenorsax.event_loop_body( +# dispatcher, delegate, self.event) +# yield None +# #Element closed. Wrap up +# #print "END COL" +# self.colIdx += 1 +# return +# +# def handle_data_tag(self, end_condition): +# #print "START DATA" +# content = u'' +# yield None +# # gather child elements +# while not self.event == end_condition: +# if self.event[0] == saxtools.CHARACTER_DATA: +# content += self.params +# yield None +# #Element closed. Wrap up +# field = self.fieldNames[self.colIdx] +# self.dataSet[field.lower()] = content +# #print " DATA(", field, ") ", repr(content) +# return + + +def checkImport(dsn,resultSet): + #now connect to the database + logging.info("dsn: %s"%dsn) + dbCon = psycopg.connect(dsn) + db = dbCon.cursor() + + + qstr="select key from personal_www" + + results=SimpleSearch(db,qstr) + + keys=[] + for x in results: + if x[0]: + keys.append(unicodify(x[0])) + + + #first step detect new entries and conflicts + new=[] + conflicts={} + + for x in resultSet.iterkeys(): + + if x not in keys: + + new.append(x) + + else: + + conflict,ret=checkForConflicts(db,resultSet[x],x) + if conflict: + conflicts[x]=ret + + return new,conflicts + +def importFMPXML(filename): + ''' + method to import FileMaker XML file (FMPXMLRESULT format) into the table. + @param filename: xmlfile filename + + ''' + + parser = sax.make_parser() + #The "consumer" is our own handler + consumer = xml_handler() + #Initialize Tenorsax with handler + #handler = saxtools.tenorsax(consumer) + #Resulting tenorsax instance is the SAX handler + parser.setContentHandler(consumer) + #parser.setFeature(sax.handler.feature_namespaces, 1) + parser.parse(filename) + resultSet=consumer.result # xml now transformed into an dictionary + + return resultSet + + + +def checkForConflicts(cursor,dataSet,key): + + ret=[] + fields=",".join(checkFields) + + qstr="select %s from personal_www where key='%s'"%(fields,key) + + + sr=SimpleSearch(cursor,qstr) + + if not sr: + return True, None + + i=0 + retValue=False + + for checkField in checkFields: + dbValueR=sr[0][i] + if dbValueR: + dbValue=unicodify(dbValueR) + else: + dbValue="" + + if checkField in dataSet: + setValue=dataSet[checkField] + logging.debug( " %s %s %s %s"%(repr(key),checkField,repr(dbValue),repr(setValue))) + if dbValue.strip().rstrip()!=setValue.lstrip().rstrip(): + ret.append((checkField,dbValue,setValue)) + retValue=True + + else: + logging.warning("unknown field %s in data file!"%checkField) + + i+=1 + + return retValue,ret + + +## +## public static int main() +## + +if __name__ == "__main__": + + + + loglevel = logging.DEBUG + + + logging.basicConfig(level=loglevel, + format='%(asctime)s %(levelname)s %(message)s', + datefmt='%H:%M:%S') + + resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml") + news,conflicts=checkImport(dsn="dbname=personalwww user=www password=e1nste1n", resultSet=resultSet) + + + print "new" + print len(news),news + print "-----------" + print "conflicts" + print conflicts + +# update_fields = None +# +# if options.update_fields: +# update_fields = [string.strip(s) for s in options.update_fields.split(',')] +# +# parser = sax.make_parser() +# #The "consumer" is our own handler +# consumer = xml_handler(dsn=options.dsn,table=options.table, +# update_fields=update_fields,id_field=options.id_field, +# sync_mode=options.sync_mode) +# #Initialize Tenorsax with handler +# handler = saxtools.tenorsax(consumer) +# #Resulting tenorsax instance is the SAX handler +# parser.setContentHandler(handler) +# parser.setFeature(sax.handler.feature_namespaces, 1) +# parser.parse(options.filename) +# +# +# print "DONE!" + +