File:  [Repository] / ZSQLExtend / importCDLIimglist.py
Revision 1.3: download - view: text, annotated - select for diffs - revision graph
Wed Jan 21 11:12:36 2009 UTC (15 years, 4 months ago) by casties
Branches: MAIN
CVS tags: HEAD
fixed bug when image had only thumbnail tn_xxx

    1: #!/usr/local/bin/python
    2: #
    3: 
    4: import string
    5: import logging
    6: import sys
    7: import types
    8: import time
    9: 
   10: from importASCII import ASCII_handler
   11: from importASCII import importASCII
   12: from importASCII import SimpleSearch
   13: 
   14: version_string = "V0.2.1 ROC 21.1.2009"
   15: 
   16: # mapping img_type to SQL field names
   17: imgTypeMap = {
   18:     'p':'img_p',
   19:     'd':'img_d',
   20:     'e':'img_e',
   21:     'ed':'img_ed',
   22:     'l':'img_l',
   23:     'ld':'img_ld',
   24:     'ls':'img_ls'}
   25: # list of fields in constant order (for SQL queries)
   26: imgTypes = imgTypeMap.keys()
   27: 
   28: upd_fields = "fn,,img_type,id_text"
   29: id_field = "id_text"
   30: img_type_field = "img_type"
   31: 
   32: def setup(self):
   33:     """specialized setup version"""
   34:     ASCII_handler._setup(self)
   35:     # create special updQuery for img_type fields
   36:     setStr=string.join(["%s = %%s"%imgTypeMap[f] for f in imgTypes], ', ')
   37:     self.updQuery = "UPDATE %s SET %s WHERE id_text = %%s"%(self.table,setStr)
   38:     # create special delQuery for img_type fields
   39:     delStr=string.join(["%s = null"%imgTypeMap[f] for f in imgTypes], ', ')
   40:     self.delQuery = "UPDATE %s SET %s WHERE id_text = %%s"%(self.table,delStr)
   41:     # text file field for img_type
   42:     self.xml_img_type = self.sql_field_map[img_type_field]
   43:     # dict of all img fields
   44:     self.img_data = {}
   45: 
   46: 
   47: def handle_line(self, line):
   48:     """process single line of text data"""
   49:     self.logger.debug("START ROW")
   50: 
   51:     content = line.split()
   52:     self.xml_data = content
   53:     self.rowcnt += 1
   54:     # process collected row data
   55:     update=False
   56: 
   57:     # synchronize by id_field
   58:     id_val = self.xml_data[self.xml_id]
   59:     
   60:     # get img_type
   61:     img_type_val = self.xml_data[self.xml_img_type]
   62: 
   63:     # collect all values
   64:     # filename is first value
   65:     fn = self.xml_data[self.sql_field_map['fn']] 
   66:     if fn.startswith('tn_'):
   67:         # ignore thumbnails
   68:         self.logger.debug("END ROW")
   69:         return
   70: 
   71:     # is the entry new?
   72:     if id_val in self.dbIDs:
   73:         self.dbIDs[id_val] += 1
   74:         update=True
   75: 
   76:     if update:
   77:         # update existing row (by id_field)
   78:         if id_val in self.img_data:
   79:             self.img_data[id_val][img_type_val] = fn
   80:         else:
   81:             self.img_data[id_val] = {img_type_val:fn}
   82: 
   83:         self.logger.debug("update: %s = %s"%(id_val, args))
   84: 
   85:     elif not self.update_mode:
   86:         # create new row (doesn't work)
   87:         self.logger.debug("insert: %s"%args)
   88:         #SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db)
   89: 
   90:     #self.logger.info(" row:"+"%d (%s)"%(self.rowcnt,id_val))
   91:     if (self.rowcnt % 100) == 0:
   92:         self.logger.info(" row:"+"%d (id:%s)"%(self.rowcnt,id_val))
   93: 
   94:     self.logger.debug("END ROW")
   95:     return
   96: 
   97: 
   98: def parse(self, filename):
   99:     """open file and read data"""
  100:     self.logger.info("reading data...")
  101:     self.rowcnt = 0
  102: 
  103:     fh = open(filename,"r")
  104:     self.logger.debug("BEGIN RESULTSET")
  105:     # parse line-wise
  106:     for line in fh:
  107:         self.handle_line(line)
  108: 
  109:     # done. Wrap up
  110:     self.logger.debug("END RESULTSET")
  111: 
  112:     self.logger.info("importing rows in db...")
  113:     i = 0
  114:     for id in self.dbIDs.keys():
  115:         # find all fields
  116:         if self.dbIDs[id] == 0:
  117:             # unmatched entry
  118:             #self.logger.debug("CLEAN: %s with %s"%(self.delQuery,id))
  119:             SimpleSearch(self.db, self.delQuery, [id], ascii=self.ascii_db, result=False)
  120: 
  121:         elif self.dbIDs[id] > 0:
  122:             # assemble query
  123:             imgd = self.img_data.get(id, None)
  124:             if imgd is None:
  125:                 self.logger.error("No data for id %s while marked for update!"%id)
  126:                 continue
  127:             
  128:             args = [ imgd.get(f,None) for f in imgTypes ]
  129:             args.append(id)
  130:             # update
  131:             #self.logger.debug("UPDATE: %s with %s"%(self.updQuery,args))
  132:             SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db, result=False)
  133: 
  134:         i += 1
  135:         if i % 100 == 0:
  136:             self.logger.info(" import: %d (%s)"%(i,id))
  137:             self.dbCon.commit()
  138: 
  139:     self.dbCon.commit()
  140:     # reinstate backup tables
  141:     if self.backup_table:
  142:         backup_name = "%s_%s"%(self.orig_table,time.strftime('%Y_%m_%d_%H_%M_%S'))
  143:         self.logger.info("rename backup table %s to %s"%(self.orig_table,backup_name))
  144:         qstr = "ALTER TABLE %s RENAME TO %s"%(self.orig_table,backup_name)
  145:         self.db.execute(qstr)
  146:         self.logger.info("rename working table %s to %s"%(self.table,self.orig_table))
  147:         qstr = "ALTER TABLE %s RENAME TO %s"%(self.table,self.orig_table)
  148:         self.db.execute(qstr)
  149:         self.dbCon.commit()
  150: 
  151:     return
  152: 
  153: # monkey patch ASCII_handler
  154: ASCII_handler._handle_line = ASCII_handler.handle_line
  155: ASCII_handler.handle_line = handle_line
  156: ASCII_handler._parse = ASCII_handler.parse
  157: ASCII_handler.parse = parse
  158: ASCII_handler._setup = ASCII_handler.setup
  159: ASCII_handler.setup = setup
  160: 
  161: 
  162: if __name__ == "__main__":
  163:     from optparse import OptionParser
  164: 
  165:     opars = OptionParser()
  166:     opars.add_option("-f", "--file", 
  167:                      dest="filename",
  168:                      help="text file name", metavar="FILE")
  169:     opars.add_option("-c", "--dsn", 
  170:                      dest="dsn", 
  171:                      help="database connection string")
  172:     opars.add_option("-t", "--table", 
  173:                      dest="table", 
  174:                      help="database table name")
  175:     opars.add_option("--ascii-db", default=False, action="store_true", 
  176:                      dest="ascii_db", 
  177:                      help="the SQL database stores ASCII instead of unicode")
  178:     opars.add_option("--replace", default=False, action="store_true", 
  179:                      dest="replace_table", 
  180:                      help="replace table i.e. delete and re-insert data")
  181:     opars.add_option("--backup", default=False, action="store_true", 
  182:                      dest="backup_table", 
  183:                      help="create backup of old table (breaks indices)")
  184:     opars.add_option("-d", "--debug", default=False, action="store_true", 
  185:                      dest="debug", 
  186:                      help="debug mode (more output)")
  187:     
  188:     (options, args) = opars.parse_args()
  189:     
  190:     if (options.filename is None 
  191:         or options.dsn is None 
  192:         or options.table is None):
  193:         # not enough parameters
  194:         print "importCDLIimglist "+version_string
  195:         opars.print_help()
  196:         sys.exit(1)
  197:     
  198:     if options.debug:
  199:         loglevel = logging.DEBUG
  200:     else:
  201:         loglevel = logging.INFO
  202:     
  203:     logging.basicConfig(level=loglevel, 
  204:                         format='%(asctime)s %(levelname)s %(message)s',
  205:                         datefmt='%H:%M:%S')
  206: 
  207:     # fixed settings for CDLI imglist
  208:     options.update_fields = upd_fields
  209:     options.id_field = id_field
  210:     options.update_mode = True
  211: 
  212:     importASCII(options)

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>