File:  [Repository] / ZSQLExtend / importFMPXML.py
Revision 1.35: download - view: text, annotated - select for diffs - revision graph
Wed Feb 15 08:41:01 2012 UTC (12 years, 4 months ago) by dwinter
Branches: MAIN
CVS tags: HEAD
bug in character handling

    1: #!/usr/local/bin/python
    2: #
    3: 
    4: import string
    5: import logging
    6: import sys
    7: import types
    8: import time
    9: import re
   10: 
   11: from xml import sax
   12: from xml.sax.handler import ContentHandler
   13: #from amara import saxtools
   14: 
   15: try:
   16:     import psycopg2 as psycopg
   17:     import psycopg2.extensions
   18:     # switch to unicode
   19:     psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
   20:     psyco = 2
   21: except:
   22:     import psycopg
   23:     psyco = 1
   24: 
   25: fm_ns = 'http://www.filemaker.com/fmpxmlresult'
   26: 
   27: version_string = "V0.6.7 ROC 21.6.2011"
   28: 
   29: def unicodify(text, withNone=False):
   30:     """decode str (utf-8 or latin-1 representation) into unicode object"""
   31:     if withNone and text is None:
   32:         return None
   33:     if not text:
   34:         return u""
   35:     if isinstance(text, str):
   36:         try:
   37:             return text.decode('utf-8')
   38:         except:
   39:             return text.decode('latin-1')
   40:     else:
   41:         return text
   42: 
   43: def utf8ify(text, withNone=False):
   44:     """encode unicode object or string into byte string in utf-8 representation"""
   45:     if withNone and text is None:
   46:         return None
   47:     if not text:
   48:         return ""
   49:     if isinstance(text, unicode):
   50:         return text.encode('utf-8')
   51:     else:
   52:         return text
   53: 
   54: def getTextFromNode(nodename):
   55:     """get the cdata content of a node"""
   56:     if nodename is None:
   57:         return ""
   58:     nodelist=nodename.childNodes
   59:     rc = ""
   60:     for node in nodelist:
   61:         if node.nodeType == node.TEXT_NODE:
   62:            rc = rc + node.data
   63:     return rc
   64: 
   65: def sql_quote(v):
   66:     # quote dictionary
   67:     quote_dict = {"\'": "''", "\\": "\\\\"}
   68:     for dkey in quote_dict.keys():
   69:         if string.find(v, dkey) >= 0:
   70:             v=string.join(string.split(v,dkey),quote_dict[dkey])
   71:     return "'%s'"%v
   72: 
   73: def sqlName(s, lc=True, more=''):
   74:     """returns restricted ASCII-only version of string"""
   75:     if s is None:
   76:         return ""
   77:     
   78:     # remove '
   79:     s = s.replace("'","")
   80:     # all else -> "_"
   81:     s = re.sub('[^A-Za-z0-9_'+more+']','_',s)
   82:     if lc:
   83:         return s.lower()
   84:     
   85:     return s
   86: 
   87: def SimpleSearch(curs,query, args=None, ascii=False):
   88:     """execute sql query and return data"""
   89:     #logger.debug("executing: "+query)
   90:     if ascii:
   91:         # encode all in UTF-8
   92:         query = utf8ify(query)
   93:         if args is not None:
   94:             encargs = []
   95:             for a in args:
   96:                 encargs.append(utf8ify(a, withNone=True))
   97:             
   98:             args = encargs
   99: 
  100:     curs.execute(query, args)
  101:     #logger.debug("sql done")
  102:     try:
  103:         return curs.fetchall()
  104:     except:
  105:         return None
  106: 
  107: 
  108: class TableColumn:
  109:     """simple type for storing sql column name and type"""
  110:     
  111:     def __init__(self, name, type=None):
  112:         #print "new tablecolumn(%s,%s)"%(name, type)
  113:         self.name = name
  114:         self.type = type
  115:         
  116:     def getName(self):
  117:         return self.name
  118:     
  119:     def getType(self):
  120:         if self.type is not None:
  121:             return self.type
  122:         else:
  123:             return "text"
  124: 
  125:     def __str__(self):
  126:         return self.name
  127:     
  128:     
  129: class xml_handler(ContentHandler):
  130:     def __init__(self,options):
  131:         """SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table.
  132:         @param options: dict of options
  133:         @param options.dsn: database connection string
  134:         @param options.table: name of the table the xml shall be imported into
  135:         @param options.filename: xmlfile filename
  136:         @param options.update_fields: (optional) list of fields to update; default is to create all fields
  137:         @param options.id_field: (optional) field which uniquely identifies an entry for updating purposes.
  138:         @param options.sync_mode: (optional) really synchronise, i.e. delete entries not in XML file
  139:         @param options.lc_names: (optional) lower case and clean up field names from XML
  140:         @param options.keep_fields: (optional) don't add fields to SQL database
  141:         @param options.ascii_db: (optional) assume ascii encoding in db
  142:         @param options.replace_table: (optional) delete and re-insert data
  143:         @param options.backup_table: (optional) create backup of old table (breaks indices)
  144:         @param options.use_logger_instance: (optional) use this instance of a logger
  145:         """
  146:         
  147:         # set up logger
  148:         if hasattr(options, 'use_logger_instance'):
  149:             self.logger = options.use_logger_instance
  150:         else:
  151:             self.logger = logging.getLogger('db.import.fmpxml')
  152: 
  153:         
  154:         # set up parser
  155:         self.result={}
  156:         self.event = None
  157:         
  158: #        self.top_dispatcher = { 
  159: #            (saxtools.START_ELEMENT, fm_ns, u'METADATA'): 
  160: #            self.handle_meta_fields,
  161: #            (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'): 
  162: #            self.handle_data_fields,
  163: #            }
  164:         
  165:         # connect database
  166:         self.dbCon = psycopg.connect(options.dsn)
  167:         logging.debug("DB encoding: %s"%getattr(self.dbCon, 'encoding', 'UNKNOWN'))
  168:         self.db = self.dbCon.cursor()
  169:         assert self.db, "AIIEE no db cursor for %s!!"%options.dsn
  170:     
  171:         self.table = getattr(options,"table",None)
  172:         self.update_fields = getattr(options,"update_fields",None)
  173:         self.id_field = getattr(options,"id_field",None)
  174:         self.sync_mode = getattr(options,"sync_mode",None)
  175:         self.lc_names = getattr(options,"lc_names",None)
  176:         self.keep_fields = getattr(options,"keep_fields",None)
  177:         self.ascii_db = getattr(options,"ascii_db",None)
  178:         self.replace_table = getattr(options,"replace_table",None)
  179:         self.backup_table = getattr(options,"backup_table",None)
  180:         self.read_before_update = getattr(options,"read_before_update",None)
  181:         self.debug_data = getattr(options,"debug_data",None)
  182: 
  183:         self.logger.debug("dsn: "+repr(getattr(options,"dsn",None)))
  184:         self.logger.debug("table: "+repr(self.table))
  185:         self.logger.debug("update_fields: "+repr(self.update_fields))
  186:         self.logger.debug("id_field: "+repr(self.id_field))
  187:         self.logger.debug("sync_mode: "+repr(self.sync_mode))
  188:         self.logger.debug("lc_names: "+repr(self.lc_names))
  189:         self.logger.debug("keep_fields: "+repr(self.keep_fields))
  190:         self.logger.debug("ascii_db: "+repr(self.ascii_db))
  191:         self.logger.debug("replace_table: "+repr(self.replace_table))
  192:         self.logger.debug("backup_table: "+repr(self.backup_table))
  193:         self.logger.debug("read_before_update: "+repr(self.read_before_update))
  194:         self.logger.debug("debug_data: "+repr(self.debug_data))
  195:         
  196:         self.dbIDs = {}
  197:         self.rowcnt = 0
  198:         
  199:         self.currentName = None
  200:         
  201:         if self.id_field is not None:
  202:             # prepare a list of ids for sync mode
  203:             qstr="select %s from %s"%(self.id_field,self.table)
  204:             for id in SimpleSearch(self.db, qstr):
  205:                 # value 0: not updated
  206:                 self.dbIDs[id[0]] = 0;
  207:                 self.rowcnt += 1
  208:                 
  209:             self.logger.info("%d entries in DB to sync"%self.rowcnt)
  210:         
  211:         # names of fields in XML file
  212:         self.xml_field_names = []
  213:         # map XML field names to SQL field names
  214:         self.xml_field_map = {}
  215:         # and vice versa
  216:         self.sql_field_map = {}
  217:         
  218:         return
  219: 
  220:     def startElement(self, name, attrs):
  221:         logging.debug(name)
  222:         if (name.lower() == "field") :
  223:             self.handle_meta_field(attrs)
  224:         if (name.lower() == "row") :
  225:             logging.debug("handleROW")
  226:             self.handle_row(attrs)
  227:         if (name.lower()=="resultset"):
  228:            self.handle_data_fields(attrs)
  229:           
  230:         if (name.lower()=="data"):
  231:            self.handle_data_tag(attrs);    
  232:            
  233:     def endElement(self,name):
  234:         if (name.lower() == "resultset") :
  235:             self.currentTag=""
  236:             self.handle_end_data_fields()
  237:         if (name.lower() == "field") :
  238:             self.handle_end_meta_field()
  239:         if (name.lower() == "metadata"):
  240:             self.handle_end_meta_fields()
  241:         if (name.lower() == "row") :
  242:             logging.debug("handleROW")
  243:             self.handle_end_row()
  244:         
  245:         if (name.lower() == "col") :
  246:             self.handle_end_col()
  247:     def characters(self,content):
  248:         
  249:         try:
  250:             fn = self.xml_field_names[self.colIdx]
  251:             
  252:             contentTmp = self.xml_data.get(fn,'') #gibt es schon einen Inhalt, dann dieses hinzufuegen (in einem Tag kann u.U. der characters handler mehrfach aufgerufen werden.)
  253:             self.xml_data[fn] = contentTmp+content
  254:         except:
  255:             logging.debug(content)
  256:             pass
  257:         
  258: #        if self.currentName is not None:
  259: #            logging.debug(self.currentName+"    "+content)
  260: #            self.currentRow[self.currentName]=content;
  261: #    
  262:     def handle_end_meta_fields(self):
  263: #        dispatcher = {
  264: #            (saxtools.START_ELEMENT, fm_ns, u'FIELD'):
  265: #            self.handle_meta_field,
  266: #            }
  267:         #First round through the generator corresponds to the
  268:         #start element event
  269: #        self.logger.info("reading metadata...")
  270: #        if self.debug_data:
  271: #            self.logger.debug("START METADATA")
  272: #        #yield None
  273:     
  274:         #delegate is a generator that handles all the events "within"
  275:         #this element
  276: #        delegate = None
  277: #        while not self.event == end_condition:
  278: #            delegate = saxtools.tenorsax.event_loop_body(
  279: #                dispatcher, delegate, self.event)
  280: #            yield None
  281: #        
  282: #        #Element closed. Wrap up
  283:         if self.debug_data:
  284:             self.logger.debug("END METADATA")
  285:         
  286:         # rename table for backup
  287:         if self.backup_table:
  288:             self.orig_table = self.table
  289:             self.tmp_table = self.table + "_tmp"
  290:             backup_name = "%s_%s"%(self.table,time.strftime('%Y_%m_%d_%H_%M_%S'))
  291:             
  292:             # remove old temp table
  293:             qstr = "DROP TABLE %s"%(self.tmp_table)
  294:             try:
  295:                 self.db.execute(qstr)
  296:             except:
  297:                 pass
  298:             
  299:             self.dbCon.commit()
  300:            
  301:             if self.id_field:
  302:                 # sync mode -- copy backup table, update current table 
  303:                 self.logger.info("copy table %s to %s"%(self.table,backup_name))
  304:                 qstr = "CREATE TABLE %s AS (SELECT * FROM %s)"%(backup_name,self.table)
  305: 
  306:             else:
  307:                 # replace mode -- create empty tmp table, insert into tmp table
  308:                 self.table = self.tmp_table
  309:                 self.logger.info("create empty table %s"%(self.table))
  310:                 qstr = "CREATE TABLE %s AS (SELECT * FROM %s WHERE 1=0)"%(self.table,self.orig_table)
  311:             
  312:             self.db.execute(qstr)
  313:             self.dbCon.commit()
  314:         
  315:         # delete data from table for replace
  316:         if self.replace_table:
  317:             self.logger.info("delete data from table %s"%(self.table))
  318:             qstr = "TRUNCATE TABLE %s"%(self.table)
  319:             self.db.execute(qstr)
  320:             self.dbCon.commit()
  321:            
  322:         # try to match date style with XML
  323:         self.db.execute("set datestyle to 'german'")
  324:         
  325:         #self.logger.debug("xml-fieldnames:"+repr(self.xml_field_names))
  326:         # get list of fields and types of db table
  327:         qstr="select attname, format_type(pg_attribute.atttypid, pg_attribute.atttypmod) from pg_attribute, pg_class where attrelid = pg_class.oid and pg_attribute.attnum > 0 and relname = '%s'"
  328:         self.sql_fields={}
  329:         for f in SimpleSearch(self.db, qstr%self.table):
  330:             fn = f[0]
  331:             ft = f[1]
  332:             #print "SQL fields: %s (%s)"%(n,t)
  333:             self.sql_fields[fn] = TableColumn(fn,ft)
  334:         
  335:         # translate id_field (SQL-name) to XML-name
  336:         self.xml_id = self.sql_field_map.get(self.id_field, None)
  337:         # get type of id_field
  338:         if self.id_field:
  339:             self.id_type = self.sql_fields[self.id_field].getType()
  340:         else:
  341:             self.id_type = None
  342:         
  343:         # check fields to update
  344:         if self.update_fields is None:
  345:             if self.keep_fields:
  346:                 # update all existing fields from sql (when they are in the xml file)
  347:                 self.update_fields = {}
  348:                 for f in self.sql_fields.keys():
  349:                     if self.sql_field_map.has_key(f):
  350:                         xf = self.sql_field_map[f]
  351:                         self.update_fields[f] = self.xml_field_map[xf]
  352: 
  353:             else:
  354:                 # update all fields
  355:                 if self.lc_names:
  356:                     # create dict with sql names
  357:                     self.update_fields = {}
  358:                     for f in self.xml_field_map.values():
  359:                         self.update_fields[f.getName()] = f
  360:                         
  361:                 else:
  362:                     self.update_fields = self.xml_field_map
  363:                                 
  364:         # and translate to list of xml fields
  365:         if self.lc_names:
  366:             self.xml_update_list = [self.sql_field_map[x] for x in self.update_fields]
  367:         else:
  368:             self.xml_update_list = self.update_fields.keys()
  369: 
  370:         if not self.keep_fields:
  371:             # adjust db table to fields in XML and update_fields
  372:             for f in self.xml_field_map.values():
  373:                 self.logger.debug("sync-fieldname: %s"%f.getName())
  374:                 sf = self.sql_fields.get(f.getName(), None)
  375:                 uf = self.update_fields.get(f.getName(), None)
  376:                 if sf is not None:
  377:                     # name in db -- check type
  378:                     if f.getType() != sf.getType():
  379:                         self.logger.debug("field %s has different type (%s vs %s)"%(f,f.getType(),sf.getType()))
  380:                 elif uf is not None:
  381:                     # add field to table
  382:                     fn = uf.getName()
  383:                     ft = uf.getType()
  384:                     qstr="alter table %s add \"%s\" %s"%(self.table,fn,ft)
  385:                     self.logger.info("db add field:"+qstr)
  386:                     
  387:                     if self.ascii_db and type(qstr)==types.UnicodeType:
  388:                         qstr=qstr.encode('utf-8')
  389:                         
  390:                     self.db.execute(qstr)
  391:                     self.dbCon.commit()
  392:                     # add field to field list
  393:                     self.sql_fields[fn] = TableColumn(fn, ft)
  394:                 
  395:         # prepare sql statements for update (do not update id_field)
  396:         setStr=string.join(["\"%s\" = %%s"%self.xml_field_map[f] for f in self.xml_update_list if f != self.xml_id], ', ')
  397:         self.updQuery="UPDATE %s SET %s WHERE \"%s\" = %%s"%(self.table,setStr,self.id_field)
  398:         # and select (for update check)
  399:         selStr=string.join([self.xml_field_map[f].getName() for f in self.xml_update_list if f != self.xml_id], ', ')
  400:         self.selQuery="SELECT %s FROM %s WHERE \"%s\" = %%s"%(selStr,self.table,self.id_field)
  401:         # and insert
  402:         fields=string.join(["\"%s\""%self.xml_field_map[x].getName() for x in self.xml_update_list], ',')
  403:         values=string.join(['%s' for f in self.xml_update_list], ',')
  404:         self.addQuery="INSERT INTO %s (%s) VALUES (%s)"%(self.table,fields,values)
  405:         self.logger.debug("update-query: "+self.updQuery)
  406:         self.logger.debug("sel-query: "+self.selQuery)
  407:         self.logger.debug("add-query: "+self.addQuery)
  408:         return
  409: 
  410:     def handle_meta_field(self, attrs):
  411:         self.currentName =  attrs.get('NAME')
  412:         #yield None
  413:         return
  414:     def handle_end_meta_field(self):
  415:         #Element closed.  Wrap up
  416:         name = self.currentName
  417:         if self.lc_names:
  418:             # clean name
  419:             sqlname = sqlName(name)
  420:         else:
  421:             sqlname = name
  422:         self.xml_field_names.append(name)
  423:         # map to sql name and default text type
  424:         self.xml_field_map[name] = TableColumn(sqlname, 'text')
  425:         self.sql_field_map[sqlname] = name
  426:         self.logger.debug("FIELD name: "+name)
  427:         return
  428: 
  429:     def handle_data_fields(self, attrs):
  430:        
  431:         #First round through the generator corresponds to the
  432:         #start element event
  433:         self.logger.info("reading data...")
  434:         if self.debug_data:
  435:             self.logger.debug("START RESULTSET")
  436:         self.rowcnt = 0
  437:         return
  438:     
  439:     def handle_end_data_fields(self):
  440:         #delegate is a generator that handles all the events "within"
  441:         #this element
  442:      
  443:         #Element closed.  Wrap up
  444:         if self.debug_data:
  445:             self.logger.debug("END RESULTSET")
  446:         self.dbCon.commit()
  447:         
  448:         if self.sync_mode:
  449:             # delete unmatched entries in db
  450:             if self.rowcnt > 0:
  451:                 self.logger.info("deleting unmatched rows from db")
  452:                 delQuery = "DELETE FROM %s WHERE \"%s\" = %%s"%(self.table,self.id_field)
  453:                 for id in self.dbIDs.keys():
  454:                     # find all not-updated fields
  455:                     if self.dbIDs[id] == 0:
  456:                         self.logger.info(" delete: %s"%id)
  457:                         SimpleSearch(self.db, delQuery, [id], ascii=self.ascii_db)
  458:                         
  459:                     elif self.dbIDs[id] > 1:
  460:                         self.logger.info(" sync: ID %s used more than once?"%id)
  461:                 
  462:                 self.dbCon.commit()
  463:                 
  464:             else:
  465:                 # safety in case we had an empty file
  466:                 self.logger.warning("no rows read! not deleting unmatched rows!")
  467:             
  468:         # reinstate backup tables
  469:         if self.backup_table and not self.id_field:
  470:             backup_name = "%s_%s"%(self.orig_table,time.strftime('%Y_%m_%d_%H_%M_%S'))
  471:             self.logger.info("rename backup table %s to %s"%(self.orig_table,backup_name))
  472:             qstr = "ALTER TABLE %s RENAME TO %s"%(self.orig_table,backup_name)
  473:             self.db.execute(qstr)
  474:             self.logger.info("rename working table %s to %s"%(self.table,self.orig_table))
  475:             qstr = "ALTER TABLE %s RENAME TO %s"%(self.table,self.orig_table)
  476:             self.db.execute(qstr)
  477:             self.dbCon.commit()
  478:         
  479:         self.logger.info("Done (%s rows)"%self.rowcnt)
  480:         return
  481: 
  482:     def handle_row(self, end_condition):
  483:        
  484:         if self.debug_data:
  485:             self.logger.debug("START ROW")
  486:         self.xml_data = {}
  487:         self.colIdx = 0
  488:     
  489:         return
  490:     
  491:     def handle_end_row(self):
  492:         #delegate is a generator that handles all the events "within"
  493:         #this element
  494:      
  495:         #Element closed.  Wrap up
  496:         if self.debug_data:
  497:             self.logger.debug("END ROW")
  498:         self.rowcnt += 1
  499:         # process collected row data
  500:         update=False
  501:         id_val=''
  502:         # synchronize by id_field
  503:         if self.id_field:
  504:             if self.id_type == 'integer':
  505:                 try:
  506:                     id_val = int(self.xml_data[self.xml_id])
  507:                 except:
  508:                     pass
  509:             else:
  510:                 id_val = self.xml_data[self.xml_id]
  511: 
  512:             if not id_val:
  513:                 # abort update
  514:                 self.logger.error("ERROR: unable to sync! emtpy id in row %s"%self.rowcnt)
  515:                 return
  516:                 
  517:             if id_val in self.dbIDs:
  518:                 self.dbIDs[id_val] += 1
  519:                 update=True
  520: 
  521:         # collect all values
  522:         args = []
  523:         for fn in self.xml_update_list:
  524:             # do not update id_field
  525:             if update and fn == self.xml_id:
  526:                 continue
  527:             
  528:             f = self.xml_field_map[fn]
  529:             val = self.xml_data.get(fn,None)
  530:             type = self.sql_fields[f.getName()].getType()
  531:             if type == "date" and len(val.strip()) == 0: 
  532:                 # empty date field
  533:                 val = None
  534:                 
  535:             elif type == "integer" and len(val) == 0: 
  536:                 # empty int field
  537:                 val = None
  538:                 
  539:             args.append(val)
  540:                     
  541:         if update:
  542:             # update existing row (by id_field)
  543:             if self.read_before_update:
  544:                 # read data
  545:                 if self.debug_data:
  546:                     self.logger.debug("update check: %s = %s"%(id_val, args))
  547:                 oldrow = SimpleSearch(self.db, self.selQuery, [id_val], ascii=self.ascii_db)
  548:                 #i = 0
  549:                 #for v in oldrow[0]:
  550:                 #    logging.debug("v: %s = %s (%s)"%(v,args[i],v==args[i]))
  551:                 #    i += 1
  552:                 if tuple(oldrow[0]) != tuple(args):
  553:                     # data has changed -- update
  554:                     if self.debug_data:
  555:                         self.logger.debug("really update: %s = %s"%(id_val, args))
  556:                     args.append(id_val) # last arg is id
  557:                     SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db)
  558:                     
  559:             else:
  560:                 # always update
  561:                 if self.debug_data:
  562:                     self.logger.debug("update: %s = %s"%(id_val, args))
  563:                 args.append(id_val) # last arg is id
  564:                 SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db)
  565: 
  566:         else:
  567:             # create new row
  568:             if self.debug_data:
  569:                 self.logger.debug("insert: %s"%args)
  570:             SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db)
  571: 
  572:         #self.logger.info(" row:"+"%d (%s)"%(self.rowcnt,id_val))
  573:         if (self.rowcnt % 100) == 0:
  574:             self.logger.info(" row:"+"%d (id:%s)"%(self.rowcnt,id_val))
  575:             self.dbCon.commit()
  576:             
  577:         return
  578: 
  579:     def handle_end_col(self):
  580:       
  581:         
  582:         self.colIdx += 1
  583:         return
  584: 
  585:     
  586:     def handle_data_tag(self, attrs):
  587:         #print "START DATA"
  588:         self.content = u''
  589: #        yield None
  590: #        # gather child elements
  591: #        while not self.event == end_condition:
  592: #            if self.event[0] == saxtools.CHARACTER_DATA:
  593: #                content += self.params
  594: #            yield None
  595: #        #Element closed.  Wrap up
  596: #        fn = self.xml_field_names[self.colIdx]
  597: #        self.xml_data[fn] = content
  598:         return
  599: 
  600: 
  601: def importFMPXML(options):
  602:     """import FileMaker XML file (FMPXMLRESULT format) into the table.     
  603:         @param options: dict of options
  604:         @param options.dsn: database connection string
  605:         @param options.table: name of the table the xml shall be imported into
  606:         @param options.filename: xmlfile filename
  607:         @param options.update_fields: (optional) list of fields to update; default is to create all fields
  608:         @param options.id_field: (optional) field which uniquely identifies an entry for updating purposes.
  609:         @param options.sync_mode: (optional) really synchronise, i.e. delete entries not in XML file
  610:         @param options.lc_names: (optional) lower case and clean up field names from XML
  611:         @param options.keep_fields: (optional) don't add fields to SQL database
  612:         @param options.ascii_db: (optional) assume ascii encoding in db
  613:         @param options.replace_table: (optional) delete and re-insert data
  614:         @param options.backup_table: (optional) create backup of old table
  615:         """
  616:         
  617:     if getattr(options,'update_fields',None):
  618:         uf = {}
  619:         for f in options.update_fields.split(','):
  620:             if f.find(':') > 0:
  621:                 (n,t) = f.split(':')
  622:             else:
  623:                 n = f
  624:                 t = None
  625:             uf[n] = TableColumn(n,t)
  626:             
  627:         options.update_fields = uf
  628:     
  629:     if getattr(options,'id_field',None) and getattr(options,'replace_table',None):
  630:         logging.error("ABORT: sorry, you can't do both sync (id_field) and replace")
  631:         return
  632:         
  633:     parser = sax.make_parser()
  634:     #The "consumer" is our own handler
  635:     consumer = xml_handler(options)
  636:     #Initialize Tenorsax with handler
  637:     #handler = saxtools.tenorsax(consumer)
  638:     #Resulting tenorsax instance is the SAX handler 
  639:     parser.setContentHandler(consumer)
  640:     #parser.setFeature(sax.handler.feature_namespaces, 1)
  641:     parser.parse(options.filename)  
  642:     
  643: 
  644: if __name__ == "__main__":
  645:     from optparse import OptionParser
  646: 
  647:     opars = OptionParser()
  648:     opars.add_option("-f", "--file", 
  649:                      dest="filename",
  650:                      help="FMPXML file name", metavar="FILE")
  651:     opars.add_option("-c", "--dsn", 
  652:                      dest="dsn", 
  653:                      help="database connection string")
  654:     opars.add_option("-t", "--table", 
  655:                      dest="table", 
  656:                      help="database table name")
  657:     opars.add_option("--fields", default=None, 
  658:                      dest="update_fields", 
  659:                      help="list of fields to update (comma separated, sql-names)", metavar="LIST")
  660:     opars.add_option("--id-field", default=None, 
  661:                      dest="id_field", 
  662:                      help="name of id field for synchronisation (only appends data otherwise, sql-name)", metavar="NAME")
  663:     opars.add_option("--sync", "--sync-mode", default=False, action="store_true", 
  664:                      dest="sync_mode", 
  665:                      help="do full sync based on id field (remove unmatched fields from db)")
  666:     opars.add_option("--lc-names", default=False, action="store_true", 
  667:                      dest="lc_names", 
  668:                      help="clean and lower case field names from XML")
  669:     opars.add_option("--keep-fields", default=False, action="store_true", 
  670:                      dest="keep_fields", 
  671:                      help="don't add fields from XML to SQL table")
  672:     opars.add_option("--ascii-db", default=False, action="store_true", 
  673:                      dest="ascii_db", 
  674:                      help="the SQL database stores ASCII instead of unicode")
  675:     opars.add_option("--replace", default=False, action="store_true", 
  676:                      dest="replace_table", 
  677:                      help="replace table i.e. delete and re-insert data")
  678:     opars.add_option("--backup", default=False, action="store_true", 
  679:                      dest="backup_table", 
  680:                      help="create backup of old table")
  681:     opars.add_option("--read-before-update", default=False, action="store_true", 
  682:                      dest="read_before_update", 
  683:                      help="read all data to check if it really changed")
  684:     opars.add_option("-d", "--debug", default=False, action="store_true", 
  685:                      dest="debug", 
  686:                      help="debug mode (more output)")
  687:     opars.add_option("--debug-data", default=False, action="store_true", 
  688:                      dest="debug_data", 
  689:                      help="debug mode for data (even more output)")
  690:     
  691:     (options, args) = opars.parse_args()
  692:     
  693:     if len(sys.argv) < 2 or options.filename is None or options.dsn is None:
  694:         print "importFMPXML "+version_string
  695:         opars.print_help()
  696:         sys.exit(1)
  697:     
  698:     if options.debug:
  699:         loglevel = logging.DEBUG
  700:     else:
  701:         loglevel = logging.INFO
  702:     
  703:     logging.basicConfig(level=loglevel, 
  704:                         format='%(asctime)s %(levelname)s %(message)s',
  705:                         datefmt='%H:%M:%S')
  706: 
  707:     importFMPXML(options)
  708: 
  709: 
  710:     
  711: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>