File:  [Repository] / ZSQLExtend / importFMPXML.py
Revision 1.34: download - view: text, annotated - select for diffs - revision graph
Wed Feb 15 08:08:42 2012 UTC (12 years, 4 months ago) by dwinter
Branches: MAIN
CVS tags: HEAD
importFMPXML.py ohne amara

    1: #!/usr/local/bin/python
    2: #
    3: 
    4: import string
    5: import logging
    6: import sys
    7: import types
    8: import time
    9: import re
   10: 
   11: from xml import sax
   12: from xml.sax.handler import ContentHandler
   13: #from amara import saxtools
   14: 
   15: try:
   16:     import psycopg2 as psycopg
   17:     import psycopg2.extensions
   18:     # switch to unicode
   19:     psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
   20:     psyco = 2
   21: except:
   22:     import psycopg
   23:     psyco = 1
   24: 
   25: fm_ns = 'http://www.filemaker.com/fmpxmlresult'
   26: 
   27: version_string = "V0.6.7 ROC 21.6.2011"
   28: 
   29: def unicodify(text, withNone=False):
   30:     """decode str (utf-8 or latin-1 representation) into unicode object"""
   31:     if withNone and text is None:
   32:         return None
   33:     if not text:
   34:         return u""
   35:     if isinstance(text, str):
   36:         try:
   37:             return text.decode('utf-8')
   38:         except:
   39:             return text.decode('latin-1')
   40:     else:
   41:         return text
   42: 
   43: def utf8ify(text, withNone=False):
   44:     """encode unicode object or string into byte string in utf-8 representation"""
   45:     if withNone and text is None:
   46:         return None
   47:     if not text:
   48:         return ""
   49:     if isinstance(text, unicode):
   50:         return text.encode('utf-8')
   51:     else:
   52:         return text
   53: 
   54: def getTextFromNode(nodename):
   55:     """get the cdata content of a node"""
   56:     if nodename is None:
   57:         return ""
   58:     nodelist=nodename.childNodes
   59:     rc = ""
   60:     for node in nodelist:
   61:         if node.nodeType == node.TEXT_NODE:
   62:            rc = rc + node.data
   63:     return rc
   64: 
   65: def sql_quote(v):
   66:     # quote dictionary
   67:     quote_dict = {"\'": "''", "\\": "\\\\"}
   68:     for dkey in quote_dict.keys():
   69:         if string.find(v, dkey) >= 0:
   70:             v=string.join(string.split(v,dkey),quote_dict[dkey])
   71:     return "'%s'"%v
   72: 
   73: def sqlName(s, lc=True, more=''):
   74:     """returns restricted ASCII-only version of string"""
   75:     if s is None:
   76:         return ""
   77:     
   78:     # remove '
   79:     s = s.replace("'","")
   80:     # all else -> "_"
   81:     s = re.sub('[^A-Za-z0-9_'+more+']','_',s)
   82:     if lc:
   83:         return s.lower()
   84:     
   85:     return s
   86: 
   87: def SimpleSearch(curs,query, args=None, ascii=False):
   88:     """execute sql query and return data"""
   89:     #logger.debug("executing: "+query)
   90:     if ascii:
   91:         # encode all in UTF-8
   92:         query = utf8ify(query)
   93:         if args is not None:
   94:             encargs = []
   95:             for a in args:
   96:                 encargs.append(utf8ify(a, withNone=True))
   97:             
   98:             args = encargs
   99: 
  100:     curs.execute(query, args)
  101:     #logger.debug("sql done")
  102:     try:
  103:         return curs.fetchall()
  104:     except:
  105:         return None
  106: 
  107: 
  108: class TableColumn:
  109:     """simple type for storing sql column name and type"""
  110:     
  111:     def __init__(self, name, type=None):
  112:         #print "new tablecolumn(%s,%s)"%(name, type)
  113:         self.name = name
  114:         self.type = type
  115:         
  116:     def getName(self):
  117:         return self.name
  118:     
  119:     def getType(self):
  120:         if self.type is not None:
  121:             return self.type
  122:         else:
  123:             return "text"
  124: 
  125:     def __str__(self):
  126:         return self.name
  127:     
  128:     
  129: class xml_handler(ContentHandler):
  130:     def __init__(self,options):
  131:         """SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table.
  132:         @param options: dict of options
  133:         @param options.dsn: database connection string
  134:         @param options.table: name of the table the xml shall be imported into
  135:         @param options.filename: xmlfile filename
  136:         @param options.update_fields: (optional) list of fields to update; default is to create all fields
  137:         @param options.id_field: (optional) field which uniquely identifies an entry for updating purposes.
  138:         @param options.sync_mode: (optional) really synchronise, i.e. delete entries not in XML file
  139:         @param options.lc_names: (optional) lower case and clean up field names from XML
  140:         @param options.keep_fields: (optional) don't add fields to SQL database
  141:         @param options.ascii_db: (optional) assume ascii encoding in db
  142:         @param options.replace_table: (optional) delete and re-insert data
  143:         @param options.backup_table: (optional) create backup of old table (breaks indices)
  144:         @param options.use_logger_instance: (optional) use this instance of a logger
  145:         """
  146:         
  147:         # set up logger
  148:         if hasattr(options, 'use_logger_instance'):
  149:             self.logger = options.use_logger_instance
  150:         else:
  151:             self.logger = logging.getLogger('db.import.fmpxml')
  152: 
  153:         
  154:         # set up parser
  155:         self.result={}
  156:         self.event = None
  157:         
  158: #        self.top_dispatcher = { 
  159: #            (saxtools.START_ELEMENT, fm_ns, u'METADATA'): 
  160: #            self.handle_meta_fields,
  161: #            (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'): 
  162: #            self.handle_data_fields,
  163: #            }
  164:         
  165:         # connect database
  166:         self.dbCon = psycopg.connect(options.dsn)
  167:         logging.debug("DB encoding: %s"%getattr(self.dbCon, 'encoding', 'UNKNOWN'))
  168:         self.db = self.dbCon.cursor()
  169:         assert self.db, "AIIEE no db cursor for %s!!"%options.dsn
  170:     
  171:         self.table = getattr(options,"table",None)
  172:         self.update_fields = getattr(options,"update_fields",None)
  173:         self.id_field = getattr(options,"id_field",None)
  174:         self.sync_mode = getattr(options,"sync_mode",None)
  175:         self.lc_names = getattr(options,"lc_names",None)
  176:         self.keep_fields = getattr(options,"keep_fields",None)
  177:         self.ascii_db = getattr(options,"ascii_db",None)
  178:         self.replace_table = getattr(options,"replace_table",None)
  179:         self.backup_table = getattr(options,"backup_table",None)
  180:         self.read_before_update = getattr(options,"read_before_update",None)
  181:         self.debug_data = getattr(options,"debug_data",None)
  182: 
  183:         self.logger.debug("dsn: "+repr(getattr(options,"dsn",None)))
  184:         self.logger.debug("table: "+repr(self.table))
  185:         self.logger.debug("update_fields: "+repr(self.update_fields))
  186:         self.logger.debug("id_field: "+repr(self.id_field))
  187:         self.logger.debug("sync_mode: "+repr(self.sync_mode))
  188:         self.logger.debug("lc_names: "+repr(self.lc_names))
  189:         self.logger.debug("keep_fields: "+repr(self.keep_fields))
  190:         self.logger.debug("ascii_db: "+repr(self.ascii_db))
  191:         self.logger.debug("replace_table: "+repr(self.replace_table))
  192:         self.logger.debug("backup_table: "+repr(self.backup_table))
  193:         self.logger.debug("read_before_update: "+repr(self.read_before_update))
  194:         self.logger.debug("debug_data: "+repr(self.debug_data))
  195:         
  196:         self.dbIDs = {}
  197:         self.rowcnt = 0
  198:         
  199:         self.currentName = None
  200:         
  201:         if self.id_field is not None:
  202:             # prepare a list of ids for sync mode
  203:             qstr="select %s from %s"%(self.id_field,self.table)
  204:             for id in SimpleSearch(self.db, qstr):
  205:                 # value 0: not updated
  206:                 self.dbIDs[id[0]] = 0;
  207:                 self.rowcnt += 1
  208:                 
  209:             self.logger.info("%d entries in DB to sync"%self.rowcnt)
  210:         
  211:         # names of fields in XML file
  212:         self.xml_field_names = []
  213:         # map XML field names to SQL field names
  214:         self.xml_field_map = {}
  215:         # and vice versa
  216:         self.sql_field_map = {}
  217:         
  218:         return
  219: 
  220:     def startElement(self, name, attrs):
  221:         logging.debug(name)
  222:         if (name.lower() == "field") :
  223:             self.handle_meta_field(attrs)
  224:         if (name.lower() == "row") :
  225:             logging.debug("handleROW")
  226:             self.handle_row(attrs)
  227:         if (name.lower()=="resultset"):
  228:            self.handle_data_fields(attrs)
  229:           
  230:         if (name.lower()=="data"):
  231:            self.handle_data_tag(attrs);    
  232:            
  233:     def endElement(self,name):
  234:         if (name.lower() == "resultset") :
  235:             self.currentTag=""
  236:             self.handle_end_data_fields()
  237:         if (name.lower() == "field") :
  238:             self.handle_end_meta_field()
  239:         if (name.lower() == "metadata"):
  240:             self.handle_end_meta_fields()
  241:         if (name.lower() == "row") :
  242:             logging.debug("handleROW")
  243:             self.handle_end_row()
  244:         
  245:         if (name.lower() == "col") :
  246:             self.handle_end_col()
  247:     def characters(self,content):
  248:         
  249:         try:
  250:             fn = self.xml_field_names[self.colIdx]
  251:             self.xml_data[fn] = content
  252:         except:
  253:             logging.debug(content)
  254:             pass
  255:         
  256: #        if self.currentName is not None:
  257: #            logging.debug(self.currentName+"    "+content)
  258: #            self.currentRow[self.currentName]=content;
  259: #    
  260:     def handle_end_meta_fields(self):
  261: #        dispatcher = {
  262: #            (saxtools.START_ELEMENT, fm_ns, u'FIELD'):
  263: #            self.handle_meta_field,
  264: #            }
  265:         #First round through the generator corresponds to the
  266:         #start element event
  267: #        self.logger.info("reading metadata...")
  268: #        if self.debug_data:
  269: #            self.logger.debug("START METADATA")
  270: #        #yield None
  271:     
  272:         #delegate is a generator that handles all the events "within"
  273:         #this element
  274: #        delegate = None
  275: #        while not self.event == end_condition:
  276: #            delegate = saxtools.tenorsax.event_loop_body(
  277: #                dispatcher, delegate, self.event)
  278: #            yield None
  279: #        
  280: #        #Element closed. Wrap up
  281:         if self.debug_data:
  282:             self.logger.debug("END METADATA")
  283:         
  284:         # rename table for backup
  285:         if self.backup_table:
  286:             self.orig_table = self.table
  287:             self.tmp_table = self.table + "_tmp"
  288:             backup_name = "%s_%s"%(self.table,time.strftime('%Y_%m_%d_%H_%M_%S'))
  289:             
  290:             # remove old temp table
  291:             qstr = "DROP TABLE %s"%(self.tmp_table)
  292:             try:
  293:                 self.db.execute(qstr)
  294:             except:
  295:                 pass
  296:             
  297:             self.dbCon.commit()
  298:            
  299:             if self.id_field:
  300:                 # sync mode -- copy backup table, update current table 
  301:                 self.logger.info("copy table %s to %s"%(self.table,backup_name))
  302:                 qstr = "CREATE TABLE %s AS (SELECT * FROM %s)"%(backup_name,self.table)
  303: 
  304:             else:
  305:                 # replace mode -- create empty tmp table, insert into tmp table
  306:                 self.table = self.tmp_table
  307:                 self.logger.info("create empty table %s"%(self.table))
  308:                 qstr = "CREATE TABLE %s AS (SELECT * FROM %s WHERE 1=0)"%(self.table,self.orig_table)
  309:             
  310:             self.db.execute(qstr)
  311:             self.dbCon.commit()
  312:         
  313:         # delete data from table for replace
  314:         if self.replace_table:
  315:             self.logger.info("delete data from table %s"%(self.table))
  316:             qstr = "TRUNCATE TABLE %s"%(self.table)
  317:             self.db.execute(qstr)
  318:             self.dbCon.commit()
  319:            
  320:         # try to match date style with XML
  321:         self.db.execute("set datestyle to 'german'")
  322:         
  323:         #self.logger.debug("xml-fieldnames:"+repr(self.xml_field_names))
  324:         # get list of fields and types of db table
  325:         qstr="select attname, format_type(pg_attribute.atttypid, pg_attribute.atttypmod) from pg_attribute, pg_class where attrelid = pg_class.oid and pg_attribute.attnum > 0 and relname = '%s'"
  326:         self.sql_fields={}
  327:         for f in SimpleSearch(self.db, qstr%self.table):
  328:             fn = f[0]
  329:             ft = f[1]
  330:             #print "SQL fields: %s (%s)"%(n,t)
  331:             self.sql_fields[fn] = TableColumn(fn,ft)
  332:         
  333:         # translate id_field (SQL-name) to XML-name
  334:         self.xml_id = self.sql_field_map.get(self.id_field, None)
  335:         # get type of id_field
  336:         if self.id_field:
  337:             self.id_type = self.sql_fields[self.id_field].getType()
  338:         else:
  339:             self.id_type = None
  340:         
  341:         # check fields to update
  342:         if self.update_fields is None:
  343:             if self.keep_fields:
  344:                 # update all existing fields from sql (when they are in the xml file)
  345:                 self.update_fields = {}
  346:                 for f in self.sql_fields.keys():
  347:                     if self.sql_field_map.has_key(f):
  348:                         xf = self.sql_field_map[f]
  349:                         self.update_fields[f] = self.xml_field_map[xf]
  350: 
  351:             else:
  352:                 # update all fields
  353:                 if self.lc_names:
  354:                     # create dict with sql names
  355:                     self.update_fields = {}
  356:                     for f in self.xml_field_map.values():
  357:                         self.update_fields[f.getName()] = f
  358:                         
  359:                 else:
  360:                     self.update_fields = self.xml_field_map
  361:                                 
  362:         # and translate to list of xml fields
  363:         if self.lc_names:
  364:             self.xml_update_list = [self.sql_field_map[x] for x in self.update_fields]
  365:         else:
  366:             self.xml_update_list = self.update_fields.keys()
  367: 
  368:         if not self.keep_fields:
  369:             # adjust db table to fields in XML and update_fields
  370:             for f in self.xml_field_map.values():
  371:                 self.logger.debug("sync-fieldname: %s"%f.getName())
  372:                 sf = self.sql_fields.get(f.getName(), None)
  373:                 uf = self.update_fields.get(f.getName(), None)
  374:                 if sf is not None:
  375:                     # name in db -- check type
  376:                     if f.getType() != sf.getType():
  377:                         self.logger.debug("field %s has different type (%s vs %s)"%(f,f.getType(),sf.getType()))
  378:                 elif uf is not None:
  379:                     # add field to table
  380:                     fn = uf.getName()
  381:                     ft = uf.getType()
  382:                     qstr="alter table %s add \"%s\" %s"%(self.table,fn,ft)
  383:                     self.logger.info("db add field:"+qstr)
  384:                     
  385:                     if self.ascii_db and type(qstr)==types.UnicodeType:
  386:                         qstr=qstr.encode('utf-8')
  387:                         
  388:                     self.db.execute(qstr)
  389:                     self.dbCon.commit()
  390:                     # add field to field list
  391:                     self.sql_fields[fn] = TableColumn(fn, ft)
  392:                 
  393:         # prepare sql statements for update (do not update id_field)
  394:         setStr=string.join(["\"%s\" = %%s"%self.xml_field_map[f] for f in self.xml_update_list if f != self.xml_id], ', ')
  395:         self.updQuery="UPDATE %s SET %s WHERE \"%s\" = %%s"%(self.table,setStr,self.id_field)
  396:         # and select (for update check)
  397:         selStr=string.join([self.xml_field_map[f].getName() for f in self.xml_update_list if f != self.xml_id], ', ')
  398:         self.selQuery="SELECT %s FROM %s WHERE \"%s\" = %%s"%(selStr,self.table,self.id_field)
  399:         # and insert
  400:         fields=string.join(["\"%s\""%self.xml_field_map[x].getName() for x in self.xml_update_list], ',')
  401:         values=string.join(['%s' for f in self.xml_update_list], ',')
  402:         self.addQuery="INSERT INTO %s (%s) VALUES (%s)"%(self.table,fields,values)
  403:         self.logger.debug("update-query: "+self.updQuery)
  404:         self.logger.debug("sel-query: "+self.selQuery)
  405:         self.logger.debug("add-query: "+self.addQuery)
  406:         return
  407: 
  408:     def handle_meta_field(self, attrs):
  409:         self.currentName =  attrs.get('NAME')
  410:         #yield None
  411:         return
  412:     def handle_end_meta_field(self):
  413:         #Element closed.  Wrap up
  414:         name = self.currentName
  415:         if self.lc_names:
  416:             # clean name
  417:             sqlname = sqlName(name)
  418:         else:
  419:             sqlname = name
  420:         self.xml_field_names.append(name)
  421:         # map to sql name and default text type
  422:         self.xml_field_map[name] = TableColumn(sqlname, 'text')
  423:         self.sql_field_map[sqlname] = name
  424:         self.logger.debug("FIELD name: "+name)
  425:         return
  426: 
  427:     def handle_data_fields(self, attrs):
  428:        
  429:         #First round through the generator corresponds to the
  430:         #start element event
  431:         self.logger.info("reading data...")
  432:         if self.debug_data:
  433:             self.logger.debug("START RESULTSET")
  434:         self.rowcnt = 0
  435:         return
  436:     
  437:     def handle_end_data_fields(self):
  438:         #delegate is a generator that handles all the events "within"
  439:         #this element
  440:      
  441:         #Element closed.  Wrap up
  442:         if self.debug_data:
  443:             self.logger.debug("END RESULTSET")
  444:         self.dbCon.commit()
  445:         
  446:         if self.sync_mode:
  447:             # delete unmatched entries in db
  448:             if self.rowcnt > 0:
  449:                 self.logger.info("deleting unmatched rows from db")
  450:                 delQuery = "DELETE FROM %s WHERE \"%s\" = %%s"%(self.table,self.id_field)
  451:                 for id in self.dbIDs.keys():
  452:                     # find all not-updated fields
  453:                     if self.dbIDs[id] == 0:
  454:                         self.logger.info(" delete: %s"%id)
  455:                         SimpleSearch(self.db, delQuery, [id], ascii=self.ascii_db)
  456:                         
  457:                     elif self.dbIDs[id] > 1:
  458:                         self.logger.info(" sync: ID %s used more than once?"%id)
  459:                 
  460:                 self.dbCon.commit()
  461:                 
  462:             else:
  463:                 # safety in case we had an empty file
  464:                 self.logger.warning("no rows read! not deleting unmatched rows!")
  465:             
  466:         # reinstate backup tables
  467:         if self.backup_table and not self.id_field:
  468:             backup_name = "%s_%s"%(self.orig_table,time.strftime('%Y_%m_%d_%H_%M_%S'))
  469:             self.logger.info("rename backup table %s to %s"%(self.orig_table,backup_name))
  470:             qstr = "ALTER TABLE %s RENAME TO %s"%(self.orig_table,backup_name)
  471:             self.db.execute(qstr)
  472:             self.logger.info("rename working table %s to %s"%(self.table,self.orig_table))
  473:             qstr = "ALTER TABLE %s RENAME TO %s"%(self.table,self.orig_table)
  474:             self.db.execute(qstr)
  475:             self.dbCon.commit()
  476:         
  477:         self.logger.info("Done (%s rows)"%self.rowcnt)
  478:         return
  479: 
  480:     def handle_row(self, end_condition):
  481:        
  482:         if self.debug_data:
  483:             self.logger.debug("START ROW")
  484:         self.xml_data = {}
  485:         self.colIdx = 0
  486:     
  487:         return
  488:     
  489:     def handle_end_row(self):
  490:         #delegate is a generator that handles all the events "within"
  491:         #this element
  492:      
  493:         #Element closed.  Wrap up
  494:         if self.debug_data:
  495:             self.logger.debug("END ROW")
  496:         self.rowcnt += 1
  497:         # process collected row data
  498:         update=False
  499:         id_val=''
  500:         # synchronize by id_field
  501:         if self.id_field:
  502:             if self.id_type == 'integer':
  503:                 try:
  504:                     id_val = int(self.xml_data[self.xml_id])
  505:                 except:
  506:                     pass
  507:             else:
  508:                 id_val = self.xml_data[self.xml_id]
  509: 
  510:             if not id_val:
  511:                 # abort update
  512:                 self.logger.error("ERROR: unable to sync! emtpy id in row %s"%self.rowcnt)
  513:                 return
  514:                 
  515:             if id_val in self.dbIDs:
  516:                 self.dbIDs[id_val] += 1
  517:                 update=True
  518: 
  519:         # collect all values
  520:         args = []
  521:         for fn in self.xml_update_list:
  522:             # do not update id_field
  523:             if update and fn == self.xml_id:
  524:                 continue
  525:             
  526:             f = self.xml_field_map[fn]
  527:             val = self.xml_data.get(fn,None)
  528:             type = self.sql_fields[f.getName()].getType()
  529:             if type == "date" and len(val.strip()) == 0: 
  530:                 # empty date field
  531:                 val = None
  532:                 
  533:             elif type == "integer" and len(val) == 0: 
  534:                 # empty int field
  535:                 val = None
  536:                 
  537:             args.append(val)
  538:                     
  539:         if update:
  540:             # update existing row (by id_field)
  541:             if self.read_before_update:
  542:                 # read data
  543:                 if self.debug_data:
  544:                     self.logger.debug("update check: %s = %s"%(id_val, args))
  545:                 oldrow = SimpleSearch(self.db, self.selQuery, [id_val], ascii=self.ascii_db)
  546:                 #i = 0
  547:                 #for v in oldrow[0]:
  548:                 #    logging.debug("v: %s = %s (%s)"%(v,args[i],v==args[i]))
  549:                 #    i += 1
  550:                 if tuple(oldrow[0]) != tuple(args):
  551:                     # data has changed -- update
  552:                     if self.debug_data:
  553:                         self.logger.debug("really update: %s = %s"%(id_val, args))
  554:                     args.append(id_val) # last arg is id
  555:                     SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db)
  556:                     
  557:             else:
  558:                 # always update
  559:                 if self.debug_data:
  560:                     self.logger.debug("update: %s = %s"%(id_val, args))
  561:                 args.append(id_val) # last arg is id
  562:                 SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db)
  563: 
  564:         else:
  565:             # create new row
  566:             if self.debug_data:
  567:                 self.logger.debug("insert: %s"%args)
  568:             SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db)
  569: 
  570:         #self.logger.info(" row:"+"%d (%s)"%(self.rowcnt,id_val))
  571:         if (self.rowcnt % 100) == 0:
  572:             self.logger.info(" row:"+"%d (id:%s)"%(self.rowcnt,id_val))
  573:             self.dbCon.commit()
  574:             
  575:         return
  576: 
  577:     def handle_end_col(self):
  578:       
  579:         
  580:         self.colIdx += 1
  581:         return
  582: 
  583:     
  584:     def handle_data_tag(self, attrs):
  585:         #print "START DATA"
  586:         self.content = u''
  587: #        yield None
  588: #        # gather child elements
  589: #        while not self.event == end_condition:
  590: #            if self.event[0] == saxtools.CHARACTER_DATA:
  591: #                content += self.params
  592: #            yield None
  593: #        #Element closed.  Wrap up
  594: #        fn = self.xml_field_names[self.colIdx]
  595: #        self.xml_data[fn] = content
  596:         return
  597: 
  598: 
  599: def importFMPXML(options):
  600:     """import FileMaker XML file (FMPXMLRESULT format) into the table.     
  601:         @param options: dict of options
  602:         @param options.dsn: database connection string
  603:         @param options.table: name of the table the xml shall be imported into
  604:         @param options.filename: xmlfile filename
  605:         @param options.update_fields: (optional) list of fields to update; default is to create all fields
  606:         @param options.id_field: (optional) field which uniquely identifies an entry for updating purposes.
  607:         @param options.sync_mode: (optional) really synchronise, i.e. delete entries not in XML file
  608:         @param options.lc_names: (optional) lower case and clean up field names from XML
  609:         @param options.keep_fields: (optional) don't add fields to SQL database
  610:         @param options.ascii_db: (optional) assume ascii encoding in db
  611:         @param options.replace_table: (optional) delete and re-insert data
  612:         @param options.backup_table: (optional) create backup of old table
  613:         """
  614:         
  615:     if getattr(options,'update_fields',None):
  616:         uf = {}
  617:         for f in options.update_fields.split(','):
  618:             if f.find(':') > 0:
  619:                 (n,t) = f.split(':')
  620:             else:
  621:                 n = f
  622:                 t = None
  623:             uf[n] = TableColumn(n,t)
  624:             
  625:         options.update_fields = uf
  626:     
  627:     if getattr(options,'id_field',None) and getattr(options,'replace_table',None):
  628:         logging.error("ABORT: sorry, you can't do both sync (id_field) and replace")
  629:         return
  630:         
  631:     parser = sax.make_parser()
  632:     #The "consumer" is our own handler
  633:     consumer = xml_handler(options)
  634:     #Initialize Tenorsax with handler
  635:     #handler = saxtools.tenorsax(consumer)
  636:     #Resulting tenorsax instance is the SAX handler 
  637:     parser.setContentHandler(consumer)
  638:     #parser.setFeature(sax.handler.feature_namespaces, 1)
  639:     parser.parse(options.filename)  
  640:     
  641: 
  642: if __name__ == "__main__":
  643:     from optparse import OptionParser
  644: 
  645:     opars = OptionParser()
  646:     opars.add_option("-f", "--file", 
  647:                      dest="filename",
  648:                      help="FMPXML file name", metavar="FILE")
  649:     opars.add_option("-c", "--dsn", 
  650:                      dest="dsn", 
  651:                      help="database connection string")
  652:     opars.add_option("-t", "--table", 
  653:                      dest="table", 
  654:                      help="database table name")
  655:     opars.add_option("--fields", default=None, 
  656:                      dest="update_fields", 
  657:                      help="list of fields to update (comma separated, sql-names)", metavar="LIST")
  658:     opars.add_option("--id-field", default=None, 
  659:                      dest="id_field", 
  660:                      help="name of id field for synchronisation (only appends data otherwise, sql-name)", metavar="NAME")
  661:     opars.add_option("--sync", "--sync-mode", default=False, action="store_true", 
  662:                      dest="sync_mode", 
  663:                      help="do full sync based on id field (remove unmatched fields from db)")
  664:     opars.add_option("--lc-names", default=False, action="store_true", 
  665:                      dest="lc_names", 
  666:                      help="clean and lower case field names from XML")
  667:     opars.add_option("--keep-fields", default=False, action="store_true", 
  668:                      dest="keep_fields", 
  669:                      help="don't add fields from XML to SQL table")
  670:     opars.add_option("--ascii-db", default=False, action="store_true", 
  671:                      dest="ascii_db", 
  672:                      help="the SQL database stores ASCII instead of unicode")
  673:     opars.add_option("--replace", default=False, action="store_true", 
  674:                      dest="replace_table", 
  675:                      help="replace table i.e. delete and re-insert data")
  676:     opars.add_option("--backup", default=False, action="store_true", 
  677:                      dest="backup_table", 
  678:                      help="create backup of old table")
  679:     opars.add_option("--read-before-update", default=False, action="store_true", 
  680:                      dest="read_before_update", 
  681:                      help="read all data to check if it really changed")
  682:     opars.add_option("-d", "--debug", default=False, action="store_true", 
  683:                      dest="debug", 
  684:                      help="debug mode (more output)")
  685:     opars.add_option("--debug-data", default=False, action="store_true", 
  686:                      dest="debug_data", 
  687:                      help="debug mode for data (even more output)")
  688:     
  689:     (options, args) = opars.parse_args()
  690:     
  691:     if len(sys.argv) < 2 or options.filename is None or options.dsn is None:
  692:         print "importFMPXML "+version_string
  693:         opars.print_help()
  694:         sys.exit(1)
  695:     
  696:     if options.debug:
  697:         loglevel = logging.DEBUG
  698:     else:
  699:         loglevel = logging.INFO
  700:     
  701:     logging.basicConfig(level=loglevel, 
  702:                         format='%(asctime)s %(levelname)s %(message)s',
  703:                         datefmt='%H:%M:%S')
  704: 
  705:     importFMPXML(options)
  706: 
  707: 
  708:     
  709: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>