import psycopg2 as psycopg psyco = 2 import logging from MPIWGHelper import unicodify, utf8ify from xml import sax from xml.sax.handler import ContentHandler # namespace for FileMaker8 fm_ns = 'http://www.filemaker.com/fmpxmlresult' # list of fields that are taken from XML and checked against DB as conflicts #checkFields=['key','first_name','last_name','title','home_inst','current_work','e_mail2'] checkFields=['key','first_name','last_name','title','titles_new','home_inst','current_work'] def sql_quote(v): # quote dictionary quote_dict = {"\'": "''", "\\": "\\\\"} for dkey in quote_dict.keys(): if v.find(dkey) >= 0: v=quote_dict[dkey].join(v.split(dkey)) return "'%s'"%v def SimpleSearch(curs,query, args=None): """execute sql query and return data""" logging.debug("executing: "+query) if psyco == 1: query = query.encode("UTF-8") #if args is not None: # args = [ sql_quote(a) for a in args ] #logging.debug(query) #logging.debug(args) curs.execute(query, args) logging.debug("sql done") try: return curs.fetchall() except: return None class xml_handler(ContentHandler): def __init__(self): ''' SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table. @param dsn: database connection string @param table: name of the table the xml shall be imported into ''' # set up parser self.result={} self.event = None # self.top_dispatcher = { # (saxtools.START_ELEMENT, fm_ns, u'METADATA'): # self.handle_meta_fields, # (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'): # self.handle_data, # } # connect database self.dbIDs = {} self.rowcnt = 0 self.currentName=None self.newDataset = [] self.conflicts = [] self.ok = [] self.fieldNames=[] self.currentRow={} self.currentTag="" return def startElement(self, name, attrs): logging.debug(name) if (name.lower() == "field") : self.handle_meta_fields(attrs) if (name.lower() == "row") : logging.debug("handleROW") self.currentRow={} # new Row self.currentData=0 if (name.lower()=="data"): self.currentName=self.fieldNames[self.currentData] self.currentData+=1 self.currentTag="data" logging.debug("currentData"+str(self.currentData)) logging.debug("currentName"+str(self.currentName)) self.currentRow[self.currentName]="" #anlegen des eintrages def endElement(self,name): if (name.lower() == "data") : self.currentTag="" if (name.lower() == "row"): self.handle_end_row() def characters(self,content): if self.currentName is not None: logging.debug(self.currentName+" "+content) self.currentRow[self.currentName]=content; def handle_end_row(self): logging.debug("edd ROW") if self.result.has_key(self.currentRow['key']): logging.error("Key %s not unique"%self.currentRow['key']) logging.debug("currentrow:"+self.currentName) logging.debug("currentname:"+self.currentRow['key']) if self.currentName is not None: self.result[self.currentRow['key']]=self.currentRow.copy() # # # return def handle_meta_fields(self,attrs): #First round through the generator corresponds to the #start element event logging.debug("START -FIELD") name = attrs.get('NAME') name=name.replace(" ","_")# make sure no spaces self.fieldNames.append(name) self.update_fields = self.fieldNames logging.debug("xml-fieldnames:"+repr(self.fieldNames)) # get list of fields in db table #print "upQ: ", self.updQuery #print "adQ: ", self.addQuery return # def handle_meta_field(self, end_condition): # name = self.params.get((None, u'NAME')) # yield None # #Element closed. Wrap up # name=name.replace(" ","_")# make sure no spaces # self.fieldNames.append(name) # logging.debug("FIELD name: "+name) # return # def handle_data(self, end_condition): # dispatcher = { # (saxtools.START_ELEMENT, fm_ns, u'ROW'): # self.handle_row, # } # #First round through the generator corresponds to the # #start element event # logging.debug("START RESULTSET") # self.rowcnt = 0 # yield None # # #delegate is a generator that handles all the events "within" # #this element # delegate = None # while not self.event == end_condition: # delegate = saxtools.tenorsax.event_loop_body( # dispatcher, delegate, self.event) # yield None # # #Element closed. Wrap up # logging.debug("END RESULTSET") # # # # return # def handle_row(self, end_condition): # dispatcher = { # (saxtools.START_ELEMENT, fm_ns, u'COL'): # self.handle_col, # } # logging.debug("START ROW") # self.dataSet = {} # self.colIdx = 0 # yield None # # #delegate is a generator that handles all the events "within" # #this element # delegate = None # while not self.event == end_condition: # delegate = saxtools.tenorsax.event_loop_body( # dispatcher, delegate, self.event) # yield None # # #Element closed. Wrap up # logging.debug("END ROW") # self.rowcnt += 1 # # process collected row data # update=False # id_val='' # # if self.result.has_key(self.dataSet['key']): # logging.error("Key %s not unique"%self.dataSet['key']) # # self.result[self.dataSet['key']]=self.dataSet # # # return # def handle_col(self, end_condition): # dispatcher = { # (saxtools.START_ELEMENT, fm_ns, u'DATA'): # self.handle_data_tag, # } # #print "START COL" # yield None # #delegate is a generator that handles all the events "within" # #this element # delegate = None # while not self.event == end_condition: # delegate = saxtools.tenorsax.event_loop_body( # dispatcher, delegate, self.event) # yield None # #Element closed. Wrap up # #print "END COL" # self.colIdx += 1 # return # # def handle_data_tag(self, end_condition): # #print "START DATA" # content = u'' # yield None # # gather child elements # while not self.event == end_condition: # if self.event[0] == saxtools.CHARACTER_DATA: # content += self.params # yield None # #Element closed. Wrap up # field = self.fieldNames[self.colIdx] # self.dataSet[field.lower()] = content # #print " DATA(", field, ") ", repr(content) # return def checkImport(dsn,resultSet): #now connect to the database logging.info("dsn: %s"%dsn) dbCon = psycopg.connect(dsn) db = dbCon.cursor() qstr="select key from personal_www" results=SimpleSearch(db,qstr) keys=[] for x in results: if x[0]: keys.append(unicodify(x[0])) #first step detect new entries and conflicts new=[] conflicts={} for x in resultSet.iterkeys(): if x not in keys: new.append(x) else: conflict,ret=checkForConflicts(db,resultSet[x],x) if conflict: conflicts[x]=ret return new,conflicts def importFMPXML(filename): ''' method to import FileMaker XML file (FMPXMLRESULT format) into the table. @param filename: xmlfile filename ''' parser = sax.make_parser() #The "consumer" is our own handler consumer = xml_handler() #Initialize Tenorsax with handler #handler = saxtools.tenorsax(consumer) #Resulting tenorsax instance is the SAX handler parser.setContentHandler(consumer) #parser.setFeature(sax.handler.feature_namespaces, 1) parser.parse(filename) resultSet=consumer.result # xml now transformed into an dictionary return resultSet def checkForConflicts(cursor,dataSet,key): ret=[] fields=",".join(checkFields) qstr="select %s from personal_www where key='%s'"%(fields,key) sr=SimpleSearch(cursor,qstr) if not sr: return True, None i=0 retValue=False for checkField in checkFields: dbValueR=sr[0][i] if dbValueR: dbValue=unicodify(dbValueR) else: dbValue="" if checkField in dataSet: setValue=dataSet[checkField] logging.debug( " %s %s %s %s"%(repr(key),checkField,repr(dbValue),repr(setValue))) if dbValue.strip().rstrip()!=setValue.lstrip().rstrip(): ret.append((checkField,dbValue,setValue)) retValue=True else: logging.warning("unknown field %s in data file!"%checkField) i+=1 return retValue,ret ## ## public static int main() ## if __name__ == "__main__": loglevel = logging.DEBUG logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s %(message)s', datefmt='%H:%M:%S') resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml") news,conflicts=checkImport(dsn="dbname=personalwww user=www password=e1nste1n", resultSet=resultSet) print "new" print len(news),news print "-----------" print "conflicts" print conflicts # update_fields = None # # if options.update_fields: # update_fields = [string.strip(s) for s in options.update_fields.split(',')] # # parser = sax.make_parser() # #The "consumer" is our own handler # consumer = xml_handler(dsn=options.dsn,table=options.table, # update_fields=update_fields,id_field=options.id_field, # sync_mode=options.sync_mode) # #Initialize Tenorsax with handler # handler = saxtools.tenorsax(consumer) # #Resulting tenorsax instance is the SAX handler # parser.setContentHandler(handler) # parser.setFeature(sax.handler.feature_namespaces, 1) # parser.parse(options.filename) # # # print "DONE!"