#!/usr/local/bin/python # import string import logging import sys import types import time from importASCII import ASCII_handler from importASCII import importASCII from importASCII import SimpleSearch version_string = "V0.1 ROC 4.12.2007" # mapping img_type to SQL field names imgTypeMap = { 'p':'img_p', 'd':'img_d', 'e':'img_e', 'ed':'img_ed', 'l':'img_l', 'ld':'img_ld', 'ls':'img_ls'} upd_fields = "fn,,img_type,id_text" id_field = "id_text" img_type_field = "img_type" def setup(self): """specialized setup version""" ASCII_handler._setup(self) # create special updQueries for img_type fields self.updQueries = dict([(t,"UPDATE %s SET %s = %%s WHERE id_text = %%s"%(self.table,imgTypeMap[t])) for t in imgTypeMap.keys()]) # text file field for img_type self.xml_img_type = self.sql_field_map[img_type_field] def handle_line(self, line): """process single line of text data""" self.logger.debug("START ROW") content = line.split() self.xml_data = content self.rowcnt += 1 # process collected row data update=False # synchronize by id_field id_val = self.xml_data[self.xml_id] if id_val in self.dbIDs: self.dbIDs[id_val] += 1 update=True # get img_type img_type_val = self.xml_data[self.xml_img_type] # collect all values # filename is first value fn = self.xml_data[self.sql_field_map['fn']] if fn.startswith('tn_'): # ignore thumbnails self.logger.debug("END ROW") return args = [fn] if update: # update existing row (by id_field) # last argument is ID match args.append(id_val) try: query = self.updQueries[img_type_val] except: self.logger.error("unknown image type %s"%img_type_val) return self.logger.debug("update: %s = %s"%(id_val, args)) SimpleSearch(self.db, query, args, ascii=self.ascii_db) elif not self.update_mode: # create new row self.logger.debug("insert: %s"%args) #SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db) #self.logger.info(" row:"+"%d (%s)"%(self.rowcnt,id_val)) if (self.rowcnt % 100) == 0: self.logger.info(" row:"+"%d (id:%s)"%(self.rowcnt,id_val)) self.dbCon.commit() self.logger.debug("END ROW") return # monkey patch ASCII_handler ASCII_handler._handle_line = ASCII_handler.handle_line ASCII_handler.handle_line = handle_line ASCII_handler._setup = ASCII_handler.setup ASCII_handler.setup = setup if __name__ == "__main__": from optparse import OptionParser opars = OptionParser() opars.add_option("-f", "--file", dest="filename", help="text file name", metavar="FILE") opars.add_option("-c", "--dsn", dest="dsn", help="database connection string") opars.add_option("-t", "--table", dest="table", help="database table name") opars.add_option("--ascii-db", default=False, action="store_true", dest="ascii_db", help="the SQL database stores ASCII instead of unicode") opars.add_option("--replace", default=False, action="store_true", dest="replace_table", help="replace table i.e. delete and re-insert data") opars.add_option("--backup", default=False, action="store_true", dest="backup_table", help="create backup of old table (breaks indices)") opars.add_option("-d", "--debug", default=False, action="store_true", dest="debug", help="debug mode (more output)") (options, args) = opars.parse_args() if (options.filename is None or options.dsn is None or options.table is None): # not enough parameters print "importCDLIimglist "+version_string opars.print_help() sys.exit(1) if options.debug: loglevel = logging.DEBUG else: loglevel = logging.INFO logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s %(message)s', datefmt='%H:%M:%S') # fixed settings for CDLI imglist options.update_fields = upd_fields options.id_field = id_field options.update_mode = True importASCII(options)