#!/usr/local/bin/python # import string from xml.dom.pulldom import parse import logging import sys try: import psycopg2 as psycopg psyco = 2 except: import psycopg psyco = 1 def getTextFromNode(nodename): """get the cdata content of a node""" if nodename is None: return "" nodelist=nodename.childNodes rc = "" for node in nodelist: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc def sql_quote(v): # quote dictionary quote_dict = {"\'": "''", "\\": "\\\\"} for dkey in quote_dict.keys(): if string.find(v, dkey) >= 0: v=string.join(string.split(v,dkey),quote_dict[dkey]) return "'%s'" % v def SimpleSearch(curs,query, args=None): """execute sql query and return data""" logging.debug("executing: "+query) if psyco == 1: query = query.encode("UTF-8") curs.execute(query, args) logging.debug("sql done") return curs.fetchall() def importXMLFileFMP(dsn,table,filename,update_fields=None,id_field=None,sync_mode=False): ''' Import FileMaker XML file (FMPXMLRESULT format) into the table. @param table: name of the table the xml shall be imported into @param filename: xmlfile filename @param update_fields: (optional) list of fields to update; default is to create all fields @param id_field: (optional) field which uniquely identifies an entry for updating purposes. @param sync_mode: (optional) really synchronise, i.e. delete entries not in XML file ''' # connect database dbCon = psycopg.connect(dsn) db = dbCon.cursor() assert db, "AIIEE no db cursor for %s!!"%dsn # read XML file fh=file(filename) logging.info("reading file "+filename) doc=parse(fh) logging.info("file read") dbIDs = {} rowcnt = 0 ret = "" logging.debug("dsn: "+dsn) logging.debug("table: "+table) logging.debug("update_fields: "+repr(update_fields)) logging.debug("id_field: "+id_field) logging.debug("sync_mode: "+repr(sync_mode)) if id_field is not None: # prepare a list of ids for sync mode qstr="select %s from %s"%(id_field,table) for id in SimpleSearch(db, qstr): # value 0: not updated dbIDs[id[0]] = 0; rowcnt += 1 logging.info("%d entries in DB to sync"%rowcnt) fieldNames = [] rowcnt = 0 id_val = '' while 1: node=doc.getEvent() if node is None: break; # METADATA tag defines number and names of fields in FMPXMLRESULT if node[1].nodeName == 'METADATA': doc.expandNode(node[1]) names=node[1].getElementsByTagName('FIELD') for name in names: fn = name.getAttribute('NAME') fieldNames.append(fn) if update_fields is None: # update all fields update_fields = fieldNames logging.debug("fieldnames:"+repr(fieldNames)) # get list of fields in db table qstr="""select attname from pg_attribute, pg_class where attrelid = pg_class.oid and relname = '%s'""" columns=[x[0] for x in SimpleSearch(db, qstr%table)] # adjust db table to fields in XML and fieldlist for fieldName in fieldNames: logging.debug("db-fieldname:"+repr(fieldName)) if (fieldName not in columns) and (fieldName in update_fields): qstr="alter table %s add %%s %%s"%table logging.info("db add field:"+qstr%(fieldName,'text')) db.execute(qstr, (fieldName,'text')) db.commit() # ROW tags (in RESULTSET tag) hold data elif node[1].nodeName == 'ROW': rowcnt += 1 doc.expandNode(node[1]) cols=node[1].getElementsByTagName('COL') dataSet={} i = 0 # populate with data for col in cols: data=col.getElementsByTagName('DATA') dataSet[fieldNames[i]] = getTextFromNode(data[0]) i += 1 update=False # synchronize by id_field if id_field: id_val=dataSet[id_field] if id_val in dbIDs: dbIDs[id_val] += 1 update=True if update: # update existing row (by id_field) setvals=[] for fieldName in update_fields: setvals.append("%s = %s"%(fieldName,sql_quote(dataSet[fieldName]))) setStr=string.join(setvals, ',') id_val=dataSet[id_field] qstr="UPDATE %s SET %s WHERE %s = '%s' "%(table,setStr,id_field,id_val) if psyco == 1: query = query.encode("UTF-8") db.execute(qstr) ret += "up: %s"%id_val else: # create new row fields=string.join(update_fields, ',') values=string.join([" %s "%sql_quote(dataSet[x]) for x in update_fields], ',') qstr="INSERT INTO %s (%s) VALUES (%s)"%(table,fields,values) if psyco == 1: query = query.encode("UTF-8") db.execute(qstr) ret += "ad: %s"%dataSet.get(id_field, rowcnt) #logging.info(" row:"+"%d (%s)"%(rowcnt,id_val)) if (rowcnt % 10) == 0: logging.info(" row:"+"%d (%s)"%(rowcnt,id_val)) dbCon.commit() dbCon.commit() if sync_mode: # delete unmatched entries in db for id in dbIDs.keys(): # find all not-updated fields if dbIDs[id] == 0: logging.info(" delete:"+id) qstr = "DELETE FROM %s WHERE %%s = '%%s'"%table if psyco == 1: query = query.encode("UTF-8") db.execute(qstr, (id_field,id)) elif dbIDs[id] > 1: logging.info(" sync:"+"id used more than once?"+id) dbCon.commit() return ret ## ## public static int main() ## from optparse import OptionParser opars = OptionParser() opars.add_option("-f", "--file", dest="filename", help="FMPXML file name", metavar="FILE") opars.add_option("-c", "--dsn", dest="dsn", help="database connection string") opars.add_option("-t", "--table", dest="table", help="database table name") opars.add_option("--fields", default=None, dest="update_fields", help="list of fields to update (comma separated)", metavar="LIST") opars.add_option("--id-field", default=None, dest="id_field", help="name of id field for synchronisation", metavar="NAME") opars.add_option("--sync-mode", default=False, action="store_true", dest="sync_mode", help="do full sync (remove unmatched fields from db)") opars.add_option("-d", "--debug", default=False, action="store_true", dest="debug", help="debug mode (more output)") (options, args) = opars.parse_args() if len(sys.argv) < 2 or options.filename is None or options.dsn is None: opars.print_help() sys.exit(1) if options.debug: loglevel = logging.DEBUG else: loglevel = logging.INFO logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s %(message)s', datefmt='%H:%M:%S') update_fields = None if options.update_fields: update_fields = [string.strip(s) for s in options.update_fields.split(',')] importXMLFileFMP(dsn=options.dsn,table=options.table,filename=options.filename, update_fields=update_fields,id_field=options.id_field, sync_mode=options.sync_mode) print "DONE!"