File:  [Repository] / ZSQLExtend / importFMPXML.py
Revision 1.4: download - view: text, annotated - select for diffs - revision graph
Thu Dec 21 12:17:32 2006 UTC (17 years, 6 months ago) by casties
Branches: MAIN
CVS tags: HEAD
more niceties

    1: #!/usr/local/bin/python
    2: #
    3: 
    4: import string
    5: from xml.dom.pulldom import parse
    6: import logging
    7: import sys
    8: 
    9: try:
   10:     import psycopg2 as psycopg
   11:     psyco = 2
   12: except:
   13:     import psycopg
   14:     psyco = 1
   15: 
   16: 
   17: def getTextFromNode(nodename):
   18:     """get the cdata content of a node"""
   19:     if nodename is None:
   20:         return ""
   21:     nodelist=nodename.childNodes
   22:     rc = ""
   23:     for node in nodelist:
   24:         if node.nodeType == node.TEXT_NODE:
   25:            rc = rc + node.data
   26:     return rc
   27: 
   28: def sql_quote(v):
   29:     # quote dictionary
   30:     quote_dict = {"\'": "''", "\\": "\\\\"}
   31:     for dkey in quote_dict.keys():
   32:         if string.find(v, dkey) >= 0:
   33:             v=string.join(string.split(v,dkey),quote_dict[dkey])
   34:     return "'%s'" % v
   35: 
   36: def SimpleSearch(curs,query, args=None):
   37:     """execute sql query and return data"""
   38:     logging.debug("executing: "+query)
   39:     if psyco == 1:
   40:         query = query.encode("UTF-8")
   41:     curs.execute(query, args)
   42:     logging.debug("sql done")
   43:     try:
   44:         return curs.fetchall()
   45:     except:
   46:         return None
   47: 
   48: 
   49: def importXMLFileFMP(dsn,table,filename,update_fields=None,id_field=None,sync_mode=False):
   50:     '''
   51:     Import FileMaker XML file (FMPXMLRESULT format) into the table.
   52:     @param table: name of the table the xml shall be imported into
   53:     @param filename: xmlfile filename
   54:     @param update_fields: (optional) list of fields to update; default is to create all fields
   55:     @param id_field: (optional) field which uniquely identifies an entry for updating purposes.
   56:     @param sync_mode: (optional) really synchronise, i.e. delete entries not in XML file
   57:     '''
   58: 
   59:     # connect database
   60:     dbCon = psycopg.connect(dsn)
   61:     db = dbCon.cursor()
   62:     assert db, "AIIEE no db cursor for %s!!"%dsn
   63: 
   64:     # read XML file
   65:     fh=file(filename)
   66:     logging.info("reading file "+filename)
   67:     doc=parse(fh)
   68:     logging.info("file read")
   69: 
   70:     dbIDs = {}
   71:     rowcnt = 0
   72:     ret = ""
   73:     
   74:     logging.debug("dsn: "+dsn)
   75:     logging.debug("table: "+table)
   76:     logging.debug("update_fields: "+repr(update_fields))
   77:     logging.debug("id_field: "+id_field)
   78:     logging.debug("sync_mode: "+repr(sync_mode))
   79:     
   80:     if id_field is not None:
   81:         # prepare a list of ids for sync mode
   82:         qstr="select %s from %s"%(id_field,table)
   83:         for id in SimpleSearch(db, qstr):
   84:             # value 0: not updated
   85:             dbIDs[id[0]] = 0;
   86:             rowcnt += 1
   87:             
   88:         logging.info("%d entries in DB to sync"%rowcnt)
   89:     
   90:     fieldNames = []
   91:     rowcnt = 0
   92:     id_val = ''
   93:     
   94:     while 1:
   95:         node=doc.getEvent()
   96:     
   97:         if node is None:
   98:             break;
   99:         
  100:         # METADATA tag defines number and names of fields in FMPXMLRESULT
  101:         if node[1].nodeName == 'METADATA':
  102:             doc.expandNode(node[1])
  103:         
  104:             names=node[1].getElementsByTagName('FIELD')
  105: 
  106:             for name in names:
  107:                 fn = name.getAttribute('NAME')
  108:                 fieldNames.append(fn)
  109:             
  110:             if update_fields is None:
  111:                 # update all fields
  112:                 update_fields = fieldNames
  113:             
  114:             logging.debug("fieldnames:"+repr(fieldNames))
  115:             # get list of fields in db table
  116:             qstr="""select attname from pg_attribute, pg_class where attrelid = pg_class.oid and relname = '%s'"""
  117:             columns=[x[0] for x in SimpleSearch(db, qstr%table)]
  118:             
  119:             # adjust db table to fields in XML and fieldlist
  120:             for fieldName in fieldNames:
  121:                 logging.debug("db-fieldname:"+repr(fieldName))                     
  122:                 if (fieldName not in columns) and (fieldName in update_fields):
  123:                     qstr="alter table %s add %%s %%s"%table
  124:                     logging.info("db add field:"+qstr%(fieldName,'text'))
  125:                     db.execute(qstr, (fieldName,'text'))
  126:                     db.commit()
  127:                     
  128:         
  129:         # ROW tags (in RESULTSET tag) hold data
  130:         elif node[1].nodeName == 'ROW':
  131:             rowcnt += 1
  132:             
  133:             doc.expandNode(node[1])
  134:             cols=node[1].getElementsByTagName('COL')
  135:             dataSet={}
  136:             i = 0
  137:             # populate with data
  138:             for col in cols:
  139:                 data=col.getElementsByTagName('DATA')
  140:                 dataSet[fieldNames[i]] = getTextFromNode(data[0])
  141:                 i += 1
  142:                 
  143:             update=False
  144:             
  145:             # synchronize by id_field
  146:             if id_field:
  147:                 id_val=dataSet[id_field]
  148:                 if id_val in dbIDs:
  149:                     dbIDs[id_val] += 1
  150:                     update=True
  151:             
  152:             if update:
  153:                 # update existing row (by id_field)
  154:                 setvals=[]
  155:                 for fieldName in update_fields:
  156:                     setvals.append("%s = %s"%(fieldName,sql_quote(dataSet[fieldName])))
  157:                 setStr=string.join(setvals, ',')
  158:                 id_val=dataSet[id_field]
  159:                 qstr="UPDATE %s SET %s WHERE %s = '%s' "%(table,setStr,id_field,id_val)
  160:                 SimpleSearch(db, qstr)
  161:                 ret += "up: %s"%id_val
  162:             else:
  163:                 # create new row
  164:                 fields=string.join(update_fields, ',')
  165:                 values=string.join([" %s "%sql_quote(dataSet[x]) for x in update_fields], ',')
  166:                 qstr="INSERT INTO %s (%s) VALUES (%s)"%(table,fields,values)
  167:                 SimpleSearch(db, qstr)
  168:                 ret += "ad: %s"%dataSet.get(id_field, rowcnt)
  169: 
  170:             #logging.info(" row:"+"%d (%s)"%(rowcnt,id_val))
  171:             if (rowcnt % 10) == 0:
  172:                 logging.info(" row:"+"%d (%s)"%(rowcnt,id_val))
  173:                 dbCon.commit()
  174: 
  175:     dbCon.commit()
  176:     if sync_mode:
  177:         # delete unmatched entries in db
  178:         for id in dbIDs.keys():
  179:             # find all not-updated fields
  180:             if dbIDs[id] == 0:
  181:                 logging.info(" delete:"+id)
  182:                 qstr = "DELETE FROM %s WHERE %%s = '%%s'"%table
  183:                 SimpleSearch(db, qstr, (id_field,id))
  184:                 
  185:             elif dbIDs[id] > 1:
  186:                 logging.info(" sync:"+"id used more than once?"+id)
  187:         
  188:         dbCon.commit()
  189:     
  190:     return ret
  191: 
  192: ##
  193: ## public static int main()
  194: ##
  195: 
  196: from optparse import OptionParser
  197: 
  198: opars = OptionParser()
  199: opars.add_option("-f", "--file", 
  200:                  dest="filename",
  201:                  help="FMPXML file name", metavar="FILE")
  202: opars.add_option("-c", "--dsn", 
  203:                  dest="dsn", 
  204:                  help="database connection string")
  205: opars.add_option("-t", "--table", 
  206:                  dest="table", 
  207:                  help="database table name")
  208: opars.add_option("--fields", default=None, 
  209:                  dest="update_fields", 
  210:                  help="list of fields to update (comma separated)", metavar="LIST")
  211: opars.add_option("--id-field", default=None, 
  212:                  dest="id_field", 
  213:                  help="name of id field for synchronisation", metavar="NAME")
  214: opars.add_option("--sync-mode", default=False, action="store_true", 
  215:                  dest="sync_mode", 
  216:                  help="do full sync (remove unmatched fields from db)")
  217: opars.add_option("-d", "--debug", default=False, action="store_true", 
  218:                  dest="debug", 
  219:                  help="debug mode (more output)")
  220: 
  221: (options, args) = opars.parse_args()
  222: 
  223: if len(sys.argv) < 2 or options.filename is None or options.dsn is None:
  224:     opars.print_help()
  225:     sys.exit(1)
  226: 
  227: if options.debug:
  228:     loglevel = logging.DEBUG
  229: else:
  230:     loglevel = logging.INFO
  231: 
  232: logging.basicConfig(level=loglevel, 
  233:                     format='%(asctime)s %(levelname)s %(message)s',
  234:                     datefmt='%H:%M:%S')
  235: 
  236: update_fields = None
  237: 
  238: if options.update_fields:
  239:     update_fields = [string.strip(s) for s in options.update_fields.split(',')]
  240:      
  241: importXMLFileFMP(dsn=options.dsn,table=options.table,filename=options.filename,
  242:                  update_fields=update_fields,id_field=options.id_field,
  243:                  sync_mode=options.sync_mode)
  244: 
  245: print "DONE!"

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>