--- ZSQLExtend/importFMPXML.py 2008/09/05 19:05:57 1.27 +++ ZSQLExtend/importFMPXML.py 2011/06/23 09:14:53 1.33 @@ -6,6 +6,7 @@ import logging import sys import types import time +import re from xml import sax from amara import saxtools @@ -22,7 +23,7 @@ except: fm_ns = 'http://www.filemaker.com/fmpxmlresult' -version_string = "V0.6.1 ROC 2.7.2008" +version_string = "V0.6.7 ROC 21.6.2011" def unicodify(text, withNone=False): """decode str (utf-8 or latin-1 representation) into unicode object""" @@ -68,6 +69,20 @@ def sql_quote(v): v=string.join(string.split(v,dkey),quote_dict[dkey]) return "'%s'"%v +def sqlName(s, lc=True, more=''): + """returns restricted ASCII-only version of string""" + if s is None: + return "" + + # remove ' + s = s.replace("'","") + # all else -> "_" + s = re.sub('[^A-Za-z0-9_'+more+']','_',s) + if lc: + return s.lower() + + return s + def SimpleSearch(curs,query, args=None, ascii=False): """execute sql query and return data""" #logger.debug("executing: "+query) @@ -160,6 +175,7 @@ class xml_handler: self.replace_table = getattr(options,"replace_table",None) self.backup_table = getattr(options,"backup_table",None) self.read_before_update = getattr(options,"read_before_update",None) + self.debug_data = getattr(options,"debug_data",None) self.logger.debug("dsn: "+repr(getattr(options,"dsn",None))) self.logger.debug("table: "+repr(self.table)) @@ -172,6 +188,7 @@ class xml_handler: self.logger.debug("replace_table: "+repr(self.replace_table)) self.logger.debug("backup_table: "+repr(self.backup_table)) self.logger.debug("read_before_update: "+repr(self.read_before_update)) + self.logger.debug("debug_data: "+repr(self.debug_data)) self.dbIDs = {} self.rowcnt = 0 @@ -203,7 +220,8 @@ class xml_handler: #First round through the generator corresponds to the #start element event self.logger.info("reading metadata...") - self.logger.debug("START METADATA") + if self.debug_data: + self.logger.debug("START METADATA") yield None #delegate is a generator that handles all the events "within" @@ -215,7 +233,8 @@ class xml_handler: yield None #Element closed. Wrap up - self.logger.debug("END METADATA") + if self.debug_data: + self.logger.debug("END METADATA") # rename table for backup if self.backup_table: @@ -347,7 +366,7 @@ class xml_handler: #Element closed. Wrap up if self.lc_names: # clean name - sqlname = name.replace(" ","_").lower() + sqlname = sqlName(name) else: sqlname = name self.xml_field_names.append(name) @@ -365,7 +384,8 @@ class xml_handler: #First round through the generator corresponds to the #start element event self.logger.info("reading data...") - self.logger.debug("START RESULTSET") + if self.debug_data: + self.logger.debug("START RESULTSET") self.rowcnt = 0 yield None @@ -378,23 +398,29 @@ class xml_handler: yield None #Element closed. Wrap up - self.logger.debug("END RESULTSET") + if self.debug_data: + self.logger.debug("END RESULTSET") self.dbCon.commit() if self.sync_mode: # delete unmatched entries in db - self.logger.info("deleting unmatched rows from db") - delQuery = "DELETE FROM %s WHERE \"%s\" = %%s"%(self.table,self.id_field) - for id in self.dbIDs.keys(): - # find all not-updated fields - if self.dbIDs[id] == 0: - self.logger.info(" delete: %s"%id) - SimpleSearch(self.db, delQuery, [id], ascii=self.ascii_db) - - elif self.dbIDs[id] > 1: - self.logger.info(" sync: ID %s used more than once?"%id) - - self.dbCon.commit() + if self.rowcnt > 0: + self.logger.info("deleting unmatched rows from db") + delQuery = "DELETE FROM %s WHERE \"%s\" = %%s"%(self.table,self.id_field) + for id in self.dbIDs.keys(): + # find all not-updated fields + if self.dbIDs[id] == 0: + self.logger.info(" delete: %s"%id) + SimpleSearch(self.db, delQuery, [id], ascii=self.ascii_db) + + elif self.dbIDs[id] > 1: + self.logger.info(" sync: ID %s used more than once?"%id) + + self.dbCon.commit() + + else: + # safety in case we had an empty file + self.logger.warning("no rows read! not deleting unmatched rows!") # reinstate backup tables if self.backup_table and not self.id_field: @@ -407,6 +433,7 @@ class xml_handler: self.db.execute(qstr) self.dbCon.commit() + self.logger.info("Done (%s rows)"%self.rowcnt) return def handle_row(self, end_condition): @@ -414,7 +441,8 @@ class xml_handler: (saxtools.START_ELEMENT, fm_ns, u'COL'): self.handle_col, } - self.logger.debug("START ROW") + if self.debug_data: + self.logger.debug("START ROW") self.xml_data = {} self.colIdx = 0 yield None @@ -428,7 +456,8 @@ class xml_handler: yield None #Element closed. Wrap up - self.logger.debug("END ROW") + if self.debug_data: + self.logger.debug("END ROW") self.rowcnt += 1 # process collected row data update=False @@ -436,9 +465,17 @@ class xml_handler: # synchronize by id_field if self.id_field: if self.id_type == 'integer': - id_val = int(self.xml_data[self.xml_id]) + try: + id_val = int(self.xml_data[self.xml_id]) + except: + pass else: id_val = self.xml_data[self.xml_id] + + if not id_val: + # abort update + self.logger.error("ERROR: unable to sync! emtpy id in row %s"%self.rowcnt) + return if id_val in self.dbIDs: self.dbIDs[id_val] += 1 @@ -454,7 +491,7 @@ class xml_handler: f = self.xml_field_map[fn] val = self.xml_data[fn] type = self.sql_fields[f.getName()].getType() - if type == "date" and len(val) == 0: + if type == "date" and len(val.strip()) == 0: # empty date field val = None @@ -468,7 +505,8 @@ class xml_handler: # update existing row (by id_field) if self.read_before_update: # read data - self.logger.debug("update check: %s = %s"%(id_val, args)) + if self.debug_data: + self.logger.debug("update check: %s = %s"%(id_val, args)) oldrow = SimpleSearch(self.db, self.selQuery, [id_val], ascii=self.ascii_db) #i = 0 #for v in oldrow[0]: @@ -476,19 +514,22 @@ class xml_handler: # i += 1 if tuple(oldrow[0]) != tuple(args): # data has changed -- update - self.logger.debug("really update: %s = %s"%(id_val, args)) + if self.debug_data: + self.logger.debug("really update: %s = %s"%(id_val, args)) args.append(id_val) # last arg is id SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db) else: # always update - self.logger.debug("update: %s = %s"%(id_val, args)) + if self.debug_data: + self.logger.debug("update: %s = %s"%(id_val, args)) args.append(id_val) # last arg is id SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db) else: # create new row - self.logger.debug("insert: %s"%args) + if self.debug_data: + self.logger.debug("insert: %s"%args) SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db) #self.logger.info(" row:"+"%d (%s)"%(self.rowcnt,id_val)) @@ -618,6 +659,9 @@ if __name__ == "__main__": opars.add_option("-d", "--debug", default=False, action="store_true", dest="debug", help="debug mode (more output)") + opars.add_option("--debug-data", default=False, action="store_true", + dest="debug_data", + help="debug mode for data (even more output)") (options, args) = opars.parse_args()