File:  [Repository] / ZSQLExtend / importCDLIimglist.py
Revision 1.6: download - view: text, annotated - select for diffs - revision graph
Fri Feb 11 19:08:49 2011 UTC (13 years, 4 months ago) by casties
Branches: MAIN
CVS tags: HEAD
ZSQLInlineSearch combines multiple parameters with the same name now with " ". works nicely with input type=checkbox and -op=all.

    1: #!/usr/local/bin/python
    2: #
    3: 
    4: import string
    5: import logging
    6: import sys
    7: import types
    8: import time
    9: 
   10: from importASCII import ASCII_handler
   11: from importASCII import importASCII
   12: from importASCII import SimpleSearch
   13: 
   14: version_string = "V0.2.3 ROC 25.3.2010"
   15: 
   16: # mapping img_type to SQL field names
   17: imgTypeMap = {
   18:     'p':'img_p',
   19:     'd':'img_d',
   20:     's':'img_s',
   21:     'e':'img_e',
   22:     'ed':'img_ed',
   23:     'l':'img_l',
   24:     'ld':'img_ld',
   25:     'ls':'img_ls'}
   26: # list of fields in constant order (for SQL queries)
   27: imgTypes = imgTypeMap.keys()
   28: 
   29: upd_fields = "fn,,img_type,%s"
   30: #id_field = "id_text"
   31: img_type_field = "img_type"
   32: 
   33: def setup(self):
   34:     """specialized setup version"""
   35:     ASCII_handler._setup(self)
   36:     # create special updQuery for img_type fields
   37:     setStr=string.join(["%s = %%s"%imgTypeMap[f] for f in imgTypes], ', ')
   38:     self.updQuery = "UPDATE %s SET %s WHERE %s = %%s"%(self.table,setStr,self.id_field)
   39:     # create special delQuery for img_type fields
   40:     delStr=string.join(["%s = null"%imgTypeMap[f] for f in imgTypes], ', ')
   41:     self.delQuery = "UPDATE %s SET %s WHERE %s = %%s"%(self.table,delStr,self.id_field)
   42:     # text file field for img_type
   43:     self.xml_img_type = self.sql_field_map[img_type_field]
   44:     # dict of all img fields
   45:     self.img_data = {}
   46: 
   47: 
   48: def handle_line(self, line):
   49:     """process single line of text data"""
   50:     self.logger.debug("START ROW")
   51: 
   52:     content = line.split()
   53:     self.xml_data = content
   54:     self.rowcnt += 1
   55:     # process collected row data
   56:     update=False
   57: 
   58:     # synchronize by id_field
   59:     id_val = self.xml_data[self.xml_id]
   60:     #logging.debug("id_val=%s xml_id=%s"%(id_val,self.xml_id))
   61:     
   62:     # get img_type
   63:     img_type_val = self.xml_data[self.xml_img_type]
   64: 
   65:     # collect all values
   66:     # filename is first value
   67:     fn = self.xml_data[self.sql_field_map['fn']] 
   68:     if fn.startswith('tn_'):
   69:         # ignore thumbnails
   70:         self.logger.debug("END ROW")
   71:         return
   72: 
   73:     # is the entry new?
   74:     if id_val in self.dbIDs:
   75:         self.dbIDs[id_val] += 1
   76:         update=True
   77: 
   78:     if update:
   79:         # update existing row (by id_field)
   80:         if id_val in self.img_data:
   81:             self.img_data[id_val][img_type_val] = fn
   82:         else:
   83:             self.img_data[id_val] = {img_type_val:fn}
   84: 
   85:         self.logger.debug("update: %s = %s"%(id_val, args))
   86: 
   87:     elif not self.update_mode:
   88:         # create new row (doesn't work)
   89:         self.logger.debug("insert: %s"%args)
   90:         #SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db)
   91: 
   92:     #self.logger.info(" row:"+"%d (%s)"%(self.rowcnt,id_val))
   93:     if (self.rowcnt % 100) == 0:
   94:         self.logger.info(" row:"+"%d (id:%s)"%(self.rowcnt,id_val))
   95: 
   96:     self.logger.debug("END ROW")
   97:     return
   98: 
   99: 
  100: def parse(self, filename):
  101:     """open file and read data"""
  102:     self.logger.info("reading data...")
  103:     self.rowcnt = 0
  104: 
  105:     fh = open(filename,"r")
  106:     self.logger.debug("BEGIN RESULTSET")
  107:     # parse line-wise
  108:     for line in fh:
  109:         self.handle_line(line)
  110: 
  111:     # done. Wrap up
  112:     self.logger.debug("END RESULTSET")
  113: 
  114:     self.logger.info("importing rows in db...")
  115:     i = 0
  116:     for id in self.dbIDs.keys():
  117:         # find all fields
  118:         if self.dbIDs[id] == 0:
  119:             # unmatched entry
  120:             self.logger.debug("CLEAN: %s with %s"%(self.delQuery,id))
  121:             SimpleSearch(self.db, self.delQuery, [id], ascii=self.ascii_db, result=False)
  122: 
  123:         elif self.dbIDs[id] > 0:
  124:             # assemble query
  125:             imgd = self.img_data.get(id, None)
  126:             if imgd is None:
  127:                 self.logger.error("No data for id %s while marked for update!"%id)
  128:                 continue
  129:             
  130:             args = [ imgd.get(f,None) for f in imgTypes ]
  131:             args.append(id)
  132:             # update
  133:             self.logger.debug("UPDATE: %s with %s"%(self.updQuery,args))
  134:             SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db, result=False)
  135: 
  136:         i += 1
  137:         if i % 100 == 0:
  138:             self.logger.info(" import: %d (%s)"%(i,id))
  139:             self.dbCon.commit()
  140: 
  141:     self.dbCon.commit()
  142:     # reinstate backup tables
  143:     if self.backup_table:
  144:         backup_name = "%s_%s"%(self.orig_table,time.strftime('%Y_%m_%d_%H_%M_%S'))
  145:         self.logger.info("rename backup table %s to %s"%(self.orig_table,backup_name))
  146:         qstr = "ALTER TABLE %s RENAME TO %s"%(self.orig_table,backup_name)
  147:         self.db.execute(qstr)
  148:         self.logger.info("rename working table %s to %s"%(self.table,self.orig_table))
  149:         qstr = "ALTER TABLE %s RENAME TO %s"%(self.table,self.orig_table)
  150:         self.db.execute(qstr)
  151:         self.dbCon.commit()
  152: 
  153:     return
  154: 
  155: # monkey patch ASCII_handler
  156: ASCII_handler._handle_line = ASCII_handler.handle_line
  157: ASCII_handler.handle_line = handle_line
  158: ASCII_handler._parse = ASCII_handler.parse
  159: ASCII_handler.parse = parse
  160: ASCII_handler._setup = ASCII_handler.setup
  161: ASCII_handler.setup = setup
  162: 
  163: 
  164: if __name__ == "__main__":
  165:     from optparse import OptionParser
  166: 
  167:     opars = OptionParser()
  168:     opars.add_option("-f", "--file", 
  169:                      dest="filename",
  170:                      help="text file name", metavar="FILE")
  171:     opars.add_option("-c", "--dsn", 
  172:                      dest="dsn", 
  173:                      help="database connection string")
  174:     opars.add_option("-t", "--table", 
  175:                      dest="table", 
  176:                      help="database table name")
  177:     opars.add_option("--id-field", default="id_text", 
  178:                      dest="id_field", 
  179:                      help="name of id field for synchronisation", metavar="NAME")
  180:     opars.add_option("--ascii-db", default=False, action="store_true", 
  181:                      dest="ascii_db", 
  182:                      help="the SQL database stores ASCII instead of unicode")
  183:     opars.add_option("--replace", default=False, action="store_true", 
  184:                      dest="replace_table", 
  185:                      help="replace table i.e. delete and re-insert data")
  186:     opars.add_option("--backup", default=False, action="store_true", 
  187:                      dest="backup_table", 
  188:                      help="create backup of old table (breaks indices)")
  189:     opars.add_option("-d", "--debug", default=False, action="store_true", 
  190:                      dest="debug", 
  191:                      help="debug mode (more output)")
  192:     
  193:     (options, args) = opars.parse_args()
  194:     
  195:     if (options.filename is None 
  196:         or options.dsn is None 
  197:         or options.table is None):
  198:         # not enough parameters
  199:         print "importCDLIimglist "+version_string
  200:         opars.print_help()
  201:         sys.exit(1)
  202:     
  203:     if options.debug:
  204:         loglevel = logging.DEBUG
  205:     else:
  206:         loglevel = logging.INFO
  207:     
  208:     logging.basicConfig(level=loglevel, 
  209:                         format='%(asctime)s %(levelname)s %(message)s',
  210:                         datefmt='%H:%M:%S')
  211: 
  212:     # fixed settings for CDLI imglist
  213:     options.update_fields = upd_fields%options.id_field
  214:     #options.id_field = id_field
  215:     options.update_mode = True
  216: 
  217:     importASCII(options)

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>