File:  [Repository] / ZSQLExtend / importCDLIimglist.py
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Sat Dec 29 19:58:00 2007 UTC (16 years, 5 months ago) by casties
Branches: MAIN
CVS tags: HEAD
added cdli image list importer

    1: #!/usr/local/bin/python
    2: #
    3: 
    4: import string
    5: import logging
    6: import sys
    7: import types
    8: import time
    9: 
   10: from importASCII import ASCII_handler
   11: from importASCII import importASCII
   12: from importASCII import SimpleSearch
   13: 
   14: version_string = "V0.1 ROC 4.12.2007"
   15: 
   16: # mapping img_type to SQL field names
   17: imgTypeMap = {
   18:     'p':'img_p',
   19:     'd':'img_d',
   20:     'e':'img_e',
   21:     'ed':'img_ed',
   22:     'l':'img_l',
   23:     'ld':'img_ld',
   24:     'ls':'img_ls'}
   25: 
   26: upd_fields = "fn,,img_type,id_text"
   27: id_field = "id_text"
   28: img_type_field = "img_type"
   29: 
   30: 
   31: def setup(self):
   32:     """specialized setup version"""
   33:     ASCII_handler._setup(self)
   34:     # create special updQueries for img_type fields
   35:     self.updQueries = dict([(t,"UPDATE %s SET %s = %%s WHERE id_text = %%s"%(self.table,imgTypeMap[t])) for t in imgTypeMap.keys()])
   36:     # text file field for img_type
   37:     self.xml_img_type = self.sql_field_map[img_type_field]
   38: 
   39: 
   40: def handle_line(self, line):
   41:     """process single line of text data"""
   42:     self.logger.debug("START ROW")
   43: 
   44:     content = line.split()
   45:     self.xml_data = content
   46:     self.rowcnt += 1
   47:     # process collected row data
   48:     update=False
   49:     # synchronize by id_field
   50:     id_val = self.xml_data[self.xml_id]
   51:     if id_val in self.dbIDs:
   52:         self.dbIDs[id_val] += 1
   53:         update=True
   54: 
   55:     # get img_type
   56:     img_type_val = self.xml_data[self.xml_img_type]
   57: 
   58:     # collect all values
   59:     # filename is first value
   60:     fn = self.xml_data[self.sql_field_map['fn']] 
   61:     if fn.startswith('tn_'):
   62:         # ignore thumbnails
   63:         self.logger.debug("END ROW")
   64:         return
   65: 
   66:     args = [fn]
   67: 
   68:     if update:
   69:         # update existing row (by id_field)
   70:         # last argument is ID match
   71:         args.append(id_val)
   72:         try:
   73:             query =  self.updQueries[img_type_val]
   74:         except:
   75:             self.logger.error("unknown image type %s"%img_type_val)
   76:             return
   77: 
   78:         self.logger.debug("update: %s = %s"%(id_val, args))
   79:         SimpleSearch(self.db, query, args, ascii=self.ascii_db)
   80: 
   81:     elif not self.update_mode:
   82:         # create new row
   83:         self.logger.debug("insert: %s"%args)
   84:         #SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db)
   85: 
   86:     #self.logger.info(" row:"+"%d (%s)"%(self.rowcnt,id_val))
   87:     if (self.rowcnt % 100) == 0:
   88:         self.logger.info(" row:"+"%d (id:%s)"%(self.rowcnt,id_val))
   89:         self.dbCon.commit()
   90: 
   91:     self.logger.debug("END ROW")
   92:     return
   93: 
   94: # monkey patch ASCII_handler
   95: ASCII_handler._handle_line = ASCII_handler.handle_line
   96: ASCII_handler.handle_line = handle_line
   97: ASCII_handler._setup = ASCII_handler.setup
   98: ASCII_handler.setup = setup
   99: 
  100: 
  101: if __name__ == "__main__":
  102:     from optparse import OptionParser
  103: 
  104:     opars = OptionParser()
  105:     opars.add_option("-f", "--file", 
  106:                      dest="filename",
  107:                      help="text file name", metavar="FILE")
  108:     opars.add_option("-c", "--dsn", 
  109:                      dest="dsn", 
  110:                      help="database connection string")
  111:     opars.add_option("-t", "--table", 
  112:                      dest="table", 
  113:                      help="database table name")
  114:     opars.add_option("--ascii-db", default=False, action="store_true", 
  115:                      dest="ascii_db", 
  116:                      help="the SQL database stores ASCII instead of unicode")
  117:     opars.add_option("--replace", default=False, action="store_true", 
  118:                      dest="replace_table", 
  119:                      help="replace table i.e. delete and re-insert data")
  120:     opars.add_option("--backup", default=False, action="store_true", 
  121:                      dest="backup_table", 
  122:                      help="create backup of old table (breaks indices)")
  123:     opars.add_option("-d", "--debug", default=False, action="store_true", 
  124:                      dest="debug", 
  125:                      help="debug mode (more output)")
  126:     
  127:     (options, args) = opars.parse_args()
  128:     
  129:     if (options.filename is None 
  130:         or options.dsn is None 
  131:         or options.table is None):
  132:         # not enough parameters
  133:         print "importCDLIimglist "+version_string
  134:         opars.print_help()
  135:         sys.exit(1)
  136:     
  137:     if options.debug:
  138:         loglevel = logging.DEBUG
  139:     else:
  140:         loglevel = logging.INFO
  141:     
  142:     logging.basicConfig(level=loglevel, 
  143:                         format='%(asctime)s %(levelname)s %(message)s',
  144:                         datefmt='%H:%M:%S')
  145: 
  146:     # fixed settings for CDLI imglist
  147:     options.update_fields = upd_fields
  148:     options.id_field = id_field
  149:     options.update_mode = True
  150: 
  151:     importASCII(options)

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>