--- ZSQLExtend/importFMPXML.py 2009/02/10 17:54:00 1.29 +++ ZSQLExtend/importFMPXML.py 2012/02/15 08:41:01 1.35 @@ -6,9 +6,11 @@ import logging import sys import types import time +import re from xml import sax -from amara import saxtools +from xml.sax.handler import ContentHandler +#from amara import saxtools try: import psycopg2 as psycopg @@ -22,7 +24,7 @@ except: fm_ns = 'http://www.filemaker.com/fmpxmlresult' -version_string = "V0.6.3 ROC 10.2.2009" +version_string = "V0.6.7 ROC 21.6.2011" def unicodify(text, withNone=False): """decode str (utf-8 or latin-1 representation) into unicode object""" @@ -68,6 +70,20 @@ def sql_quote(v): v=string.join(string.split(v,dkey),quote_dict[dkey]) return "'%s'"%v +def sqlName(s, lc=True, more=''): + """returns restricted ASCII-only version of string""" + if s is None: + return "" + + # remove ' + s = s.replace("'","") + # all else -> "_" + s = re.sub('[^A-Za-z0-9_'+more+']','_',s) + if lc: + return s.lower() + + return s + def SimpleSearch(curs,query, args=None, ascii=False): """execute sql query and return data""" #logger.debug("executing: "+query) @@ -110,7 +126,7 @@ class TableColumn: return self.name -class xml_handler: +class xml_handler(ContentHandler): def __init__(self,options): """SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table. @param options: dict of options @@ -136,13 +152,15 @@ class xml_handler: # set up parser + self.result={} self.event = None - self.top_dispatcher = { - (saxtools.START_ELEMENT, fm_ns, u'METADATA'): - self.handle_meta_fields, - (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'): - self.handle_data_fields, - } + +# self.top_dispatcher = { +# (saxtools.START_ELEMENT, fm_ns, u'METADATA'): +# self.handle_meta_fields, +# (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'): +# self.handle_data_fields, +# } # connect database self.dbCon = psycopg.connect(options.dsn) @@ -178,6 +196,8 @@ class xml_handler: self.dbIDs = {} self.rowcnt = 0 + self.currentName = None + if self.id_field is not None: # prepare a list of ids for sync mode qstr="select %s from %s"%(self.id_field,self.table) @@ -197,27 +217,71 @@ class xml_handler: return - def handle_meta_fields(self, end_condition): - dispatcher = { - (saxtools.START_ELEMENT, fm_ns, u'FIELD'): - self.handle_meta_field, - } + def startElement(self, name, attrs): + logging.debug(name) + if (name.lower() == "field") : + self.handle_meta_field(attrs) + if (name.lower() == "row") : + logging.debug("handleROW") + self.handle_row(attrs) + if (name.lower()=="resultset"): + self.handle_data_fields(attrs) + + if (name.lower()=="data"): + self.handle_data_tag(attrs); + + def endElement(self,name): + if (name.lower() == "resultset") : + self.currentTag="" + self.handle_end_data_fields() + if (name.lower() == "field") : + self.handle_end_meta_field() + if (name.lower() == "metadata"): + self.handle_end_meta_fields() + if (name.lower() == "row") : + logging.debug("handleROW") + self.handle_end_row() + + if (name.lower() == "col") : + self.handle_end_col() + def characters(self,content): + + try: + fn = self.xml_field_names[self.colIdx] + + contentTmp = self.xml_data.get(fn,'') #gibt es schon einen Inhalt, dann dieses hinzufuegen (in einem Tag kann u.U. der characters handler mehrfach aufgerufen werden.) + self.xml_data[fn] = contentTmp+content + except: + logging.debug(content) + pass + +# if self.currentName is not None: +# logging.debug(self.currentName+" "+content) +# self.currentRow[self.currentName]=content; +# + def handle_end_meta_fields(self): +# dispatcher = { +# (saxtools.START_ELEMENT, fm_ns, u'FIELD'): +# self.handle_meta_field, +# } #First round through the generator corresponds to the #start element event - self.logger.info("reading metadata...") - self.logger.debug("START METADATA") - yield None +# self.logger.info("reading metadata...") +# if self.debug_data: +# self.logger.debug("START METADATA") +# #yield None #delegate is a generator that handles all the events "within" #this element - delegate = None - while not self.event == end_condition: - delegate = saxtools.tenorsax.event_loop_body( - dispatcher, delegate, self.event) - yield None - - #Element closed. Wrap up - self.logger.debug("END METADATA") +# delegate = None +# while not self.event == end_condition: +# delegate = saxtools.tenorsax.event_loop_body( +# dispatcher, delegate, self.event) +# yield None +# +# #Element closed. Wrap up + if self.debug_data: + self.logger.debug("END METADATA") # rename table for backup if self.backup_table: @@ -343,13 +407,16 @@ class xml_handler: self.logger.debug("add-query: "+self.addQuery) return - def handle_meta_field(self, end_condition): - name = self.params.get((None, u'NAME')) - yield None + def handle_meta_field(self, attrs): + self.currentName = attrs.get('NAME') + #yield None + return + def handle_end_meta_field(self): #Element closed. Wrap up + name = self.currentName if self.lc_names: # clean name - sqlname = name.replace(" ","_").lower() + sqlname = sqlName(name) else: sqlname = name self.xml_field_names.append(name) @@ -359,44 +426,44 @@ class xml_handler: self.logger.debug("FIELD name: "+name) return - def handle_data_fields(self, end_condition): - dispatcher = { - (saxtools.START_ELEMENT, fm_ns, u'ROW'): - self.handle_row, - } + def handle_data_fields(self, attrs): + #First round through the generator corresponds to the #start element event self.logger.info("reading data...") - self.logger.debug("START RESULTSET") + if self.debug_data: + self.logger.debug("START RESULTSET") self.rowcnt = 0 - yield None + return + def handle_end_data_fields(self): #delegate is a generator that handles all the events "within" #this element - delegate = None - while not self.event == end_condition: - delegate = saxtools.tenorsax.event_loop_body( - dispatcher, delegate, self.event) - yield None - + #Element closed. Wrap up - self.logger.debug("END RESULTSET") + if self.debug_data: + self.logger.debug("END RESULTSET") self.dbCon.commit() if self.sync_mode: # delete unmatched entries in db - self.logger.info("deleting unmatched rows from db") - delQuery = "DELETE FROM %s WHERE \"%s\" = %%s"%(self.table,self.id_field) - for id in self.dbIDs.keys(): - # find all not-updated fields - if self.dbIDs[id] == 0: - self.logger.info(" delete: %s"%id) - SimpleSearch(self.db, delQuery, [id], ascii=self.ascii_db) - - elif self.dbIDs[id] > 1: - self.logger.info(" sync: ID %s used more than once?"%id) - - self.dbCon.commit() + if self.rowcnt > 0: + self.logger.info("deleting unmatched rows from db") + delQuery = "DELETE FROM %s WHERE \"%s\" = %%s"%(self.table,self.id_field) + for id in self.dbIDs.keys(): + # find all not-updated fields + if self.dbIDs[id] == 0: + self.logger.info(" delete: %s"%id) + SimpleSearch(self.db, delQuery, [id], ascii=self.ascii_db) + + elif self.dbIDs[id] > 1: + self.logger.info(" sync: ID %s used more than once?"%id) + + self.dbCon.commit() + + else: + # safety in case we had an empty file + self.logger.warning("no rows read! not deleting unmatched rows!") # reinstate backup tables if self.backup_table and not self.id_field: @@ -409,28 +476,25 @@ class xml_handler: self.db.execute(qstr) self.dbCon.commit() + self.logger.info("Done (%s rows)"%self.rowcnt) return def handle_row(self, end_condition): - dispatcher = { - (saxtools.START_ELEMENT, fm_ns, u'COL'): - self.handle_col, - } - self.logger.debug("START ROW") + + if self.debug_data: + self.logger.debug("START ROW") self.xml_data = {} self.colIdx = 0 - yield None + return + + def handle_end_row(self): #delegate is a generator that handles all the events "within" #this element - delegate = None - while not self.event == end_condition: - delegate = saxtools.tenorsax.event_loop_body( - dispatcher, delegate, self.event) - yield None - + #Element closed. Wrap up - self.logger.debug("END ROW") + if self.debug_data: + self.logger.debug("END ROW") self.rowcnt += 1 # process collected row data update=False @@ -462,9 +526,9 @@ class xml_handler: continue f = self.xml_field_map[fn] - val = self.xml_data[fn] + val = self.xml_data.get(fn,None) type = self.sql_fields[f.getName()].getType() - if type == "date" and len(val) == 0: + if type == "date" and len(val.strip()) == 0: # empty date field val = None @@ -512,37 +576,25 @@ class xml_handler: return - def handle_col(self, end_condition): - dispatcher = { - (saxtools.START_ELEMENT, fm_ns, u'DATA'): - self.handle_data_tag, - } - #print "START COL" - yield None - #delegate is a generator that handles all the events "within" - #this element - delegate = None - while not self.event == end_condition: - delegate = saxtools.tenorsax.event_loop_body( - dispatcher, delegate, self.event) - yield None - #Element closed. Wrap up - #print "END COL" + def handle_end_col(self): + + self.colIdx += 1 return - def handle_data_tag(self, end_condition): + + def handle_data_tag(self, attrs): #print "START DATA" - content = u'' - yield None - # gather child elements - while not self.event == end_condition: - if self.event[0] == saxtools.CHARACTER_DATA: - content += self.params - yield None - #Element closed. Wrap up - fn = self.xml_field_names[self.colIdx] - self.xml_data[fn] = content + self.content = u'' +# yield None +# # gather child elements +# while not self.event == end_condition: +# if self.event[0] == saxtools.CHARACTER_DATA: +# content += self.params +# yield None +# #Element closed. Wrap up +# fn = self.xml_field_names[self.colIdx] +# self.xml_data[fn] = content return @@ -582,10 +634,10 @@ def importFMPXML(options): #The "consumer" is our own handler consumer = xml_handler(options) #Initialize Tenorsax with handler - handler = saxtools.tenorsax(consumer) + #handler = saxtools.tenorsax(consumer) #Resulting tenorsax instance is the SAX handler - parser.setContentHandler(handler) - parser.setFeature(sax.handler.feature_namespaces, 1) + parser.setContentHandler(consumer) + #parser.setFeature(sax.handler.feature_namespaces, 1) parser.parse(options.filename)