File:  [Repository] / ZSQLExtend / importCDLIimglist.py
Revision 1.2: download - view: text, annotated - select for diffs - revision graph
Mon Dec 31 09:47:12 2007 UTC (16 years, 6 months ago) by casties
Branches: MAIN
CVS tags: HEAD
changed cdli image list importer

    1: #!/usr/local/bin/python
    2: #
    3: 
    4: import string
    5: import logging
    6: import sys
    7: import types
    8: import time
    9: 
   10: from importASCII import ASCII_handler
   11: from importASCII import importASCII
   12: from importASCII import SimpleSearch
   13: 
   14: version_string = "V0.2 ROC 29.12.2007"
   15: 
   16: # mapping img_type to SQL field names
   17: imgTypeMap = {
   18:     'p':'img_p',
   19:     'd':'img_d',
   20:     'e':'img_e',
   21:     'ed':'img_ed',
   22:     'l':'img_l',
   23:     'ld':'img_ld',
   24:     'ls':'img_ls'}
   25: # list of fields in constant order (for SQL queries)
   26: imgTypes = imgTypeMap.keys()
   27: 
   28: upd_fields = "fn,,img_type,id_text"
   29: id_field = "id_text"
   30: img_type_field = "img_type"
   31: 
   32: def setup(self):
   33:     """specialized setup version"""
   34:     ASCII_handler._setup(self)
   35:     # create special updQuery for img_type fields
   36:     setStr=string.join(["%s = %%s"%imgTypeMap[f] for f in imgTypes], ', ')
   37:     self.updQuery = "UPDATE %s SET %s WHERE id_text = %%s"%(self.table,setStr)
   38:     # create special delQuery for img_type fields
   39:     delStr=string.join(["%s = null"%imgTypeMap[f] for f in imgTypes], ', ')
   40:     self.delQuery = "UPDATE %s SET %s WHERE id_text = %%s"%(self.table,delStr)
   41:     # text file field for img_type
   42:     self.xml_img_type = self.sql_field_map[img_type_field]
   43:     # dict of all img fields
   44:     self.img_data = {}
   45: 
   46: 
   47: def handle_line(self, line):
   48:     """process single line of text data"""
   49:     self.logger.debug("START ROW")
   50: 
   51:     content = line.split()
   52:     self.xml_data = content
   53:     self.rowcnt += 1
   54:     # process collected row data
   55:     update=False
   56:     # synchronize by id_field
   57:     id_val = self.xml_data[self.xml_id]
   58:     if id_val in self.dbIDs:
   59:         self.dbIDs[id_val] += 1
   60:         update=True
   61: 
   62:     # get img_type
   63:     img_type_val = self.xml_data[self.xml_img_type]
   64: 
   65:     # collect all values
   66:     # filename is first value
   67:     fn = self.xml_data[self.sql_field_map['fn']] 
   68:     if fn.startswith('tn_'):
   69:         # ignore thumbnails
   70:         self.logger.debug("END ROW")
   71:         return
   72: 
   73:     if update:
   74:         # update existing row (by id_field)
   75:         if id_val in self.img_data:
   76:             self.img_data[id_val][img_type_val] = fn
   77:         else:
   78:             self.img_data[id_val] = {img_type_val:fn}
   79: 
   80:         self.logger.debug("update: %s = %s"%(id_val, args))
   81: 
   82:     elif not self.update_mode:
   83:         # create new row (doesn't work)
   84:         self.logger.debug("insert: %s"%args)
   85:         #SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db)
   86: 
   87:     #self.logger.info(" row:"+"%d (%s)"%(self.rowcnt,id_val))
   88:     if (self.rowcnt % 100) == 0:
   89:         self.logger.info(" row:"+"%d (id:%s)"%(self.rowcnt,id_val))
   90: 
   91:     self.logger.debug("END ROW")
   92:     return
   93: 
   94: 
   95: def parse(self, filename):
   96:     """open file and read data"""
   97:     self.logger.info("reading data...")
   98:     self.rowcnt = 0
   99: 
  100:     fh = open(filename,"r")
  101:     self.logger.debug("BEGIN RESULTSET")
  102:     # parse line-wise
  103:     for line in fh:
  104:         self.handle_line(line)
  105: 
  106:     # done. Wrap up
  107:     self.logger.debug("END RESULTSET")
  108: 
  109:     self.logger.info("importing rows in db...")
  110:     i = 0
  111:     for id in self.dbIDs.keys():
  112:         # find all fields
  113:         if self.dbIDs[id] == 0:
  114:             # unmatched entry
  115:             #self.logger.debug("CLEAN: %s with %s"%(self.delQuery,id))
  116:             SimpleSearch(self.db, self.delQuery, [id], ascii=self.ascii_db, result=False)
  117: 
  118:         elif self.dbIDs[id] > 0:
  119:             # assemble query
  120:             args = [ self.img_data[id].get(f,None) for f in imgTypes ]
  121:             args.append(id)
  122:             # update
  123:             #self.logger.debug("UPDATE: %s with %s"%(self.updQuery,args))
  124:             SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db, result=False)
  125: 
  126:         i += 1
  127:         if i % 100 == 0:
  128:             self.logger.info(" import: %d (%s)"%(i,id))
  129:             self.dbCon.commit()
  130: 
  131:     self.dbCon.commit()
  132:     # reinstate backup tables
  133:     if self.backup_table:
  134:         backup_name = "%s_%s"%(self.orig_table,time.strftime('%Y_%m_%d_%H_%M_%S'))
  135:         self.logger.info("rename backup table %s to %s"%(self.orig_table,backup_name))
  136:         qstr = "ALTER TABLE %s RENAME TO %s"%(self.orig_table,backup_name)
  137:         self.db.execute(qstr)
  138:         self.logger.info("rename working table %s to %s"%(self.table,self.orig_table))
  139:         qstr = "ALTER TABLE %s RENAME TO %s"%(self.table,self.orig_table)
  140:         self.db.execute(qstr)
  141:         self.dbCon.commit()
  142: 
  143:     return
  144: 
  145: # monkey patch ASCII_handler
  146: ASCII_handler._handle_line = ASCII_handler.handle_line
  147: ASCII_handler.handle_line = handle_line
  148: ASCII_handler._parse = ASCII_handler.parse
  149: ASCII_handler.parse = parse
  150: ASCII_handler._setup = ASCII_handler.setup
  151: ASCII_handler.setup = setup
  152: 
  153: 
  154: if __name__ == "__main__":
  155:     from optparse import OptionParser
  156: 
  157:     opars = OptionParser()
  158:     opars.add_option("-f", "--file", 
  159:                      dest="filename",
  160:                      help="text file name", metavar="FILE")
  161:     opars.add_option("-c", "--dsn", 
  162:                      dest="dsn", 
  163:                      help="database connection string")
  164:     opars.add_option("-t", "--table", 
  165:                      dest="table", 
  166:                      help="database table name")
  167:     opars.add_option("--ascii-db", default=False, action="store_true", 
  168:                      dest="ascii_db", 
  169:                      help="the SQL database stores ASCII instead of unicode")
  170:     opars.add_option("--replace", default=False, action="store_true", 
  171:                      dest="replace_table", 
  172:                      help="replace table i.e. delete and re-insert data")
  173:     opars.add_option("--backup", default=False, action="store_true", 
  174:                      dest="backup_table", 
  175:                      help="create backup of old table (breaks indices)")
  176:     opars.add_option("-d", "--debug", default=False, action="store_true", 
  177:                      dest="debug", 
  178:                      help="debug mode (more output)")
  179:     
  180:     (options, args) = opars.parse_args()
  181:     
  182:     if (options.filename is None 
  183:         or options.dsn is None 
  184:         or options.table is None):
  185:         # not enough parameters
  186:         print "importCDLIimglist "+version_string
  187:         opars.print_help()
  188:         sys.exit(1)
  189:     
  190:     if options.debug:
  191:         loglevel = logging.DEBUG
  192:     else:
  193:         loglevel = logging.INFO
  194:     
  195:     logging.basicConfig(level=loglevel, 
  196:                         format='%(asctime)s %(levelname)s %(message)s',
  197:                         datefmt='%H:%M:%S')
  198: 
  199:     # fixed settings for CDLI imglist
  200:     options.update_fields = upd_fields
  201:     options.id_field = id_field
  202:     options.update_mode = True
  203: 
  204:     importASCII(options)

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>