#!/usr/local/bin/python # import string import logging import sys import types import time from importASCII import ASCII_handler from importASCII import importASCII from importASCII import SimpleSearch version_string = "V0.2.3 ROC 25.3.2010" # mapping img_type to SQL field names imgTypeMap = { 'p':'img_p', 'd':'img_d', 's':'img_s', 'e':'img_e', 'ed':'img_ed', 'l':'img_l', 'ld':'img_ld', 'ls':'img_ls'} # list of fields in constant order (for SQL queries) imgTypes = imgTypeMap.keys() upd_fields = "fn,,img_type,%s" #id_field = "id_text" img_type_field = "img_type" def setup(self): """specialized setup version""" ASCII_handler._setup(self) # create special updQuery for img_type fields setStr=string.join(["%s = %%s"%imgTypeMap[f] for f in imgTypes], ', ') self.updQuery = "UPDATE %s SET %s WHERE %s = %%s"%(self.table,setStr,self.id_field) # create special delQuery for img_type fields delStr=string.join(["%s = null"%imgTypeMap[f] for f in imgTypes], ', ') self.delQuery = "UPDATE %s SET %s WHERE %s = %%s"%(self.table,delStr,self.id_field) # text file field for img_type self.xml_img_type = self.sql_field_map[img_type_field] # dict of all img fields self.img_data = {} def handle_line(self, line): """process single line of text data""" self.logger.debug("START ROW") content = line.split() self.xml_data = content self.rowcnt += 1 # process collected row data update=False # synchronize by id_field id_val = self.xml_data[self.xml_id] #logging.debug("id_val=%s xml_id=%s"%(id_val,self.xml_id)) # get img_type img_type_val = self.xml_data[self.xml_img_type] # collect all values # filename is first value fn = self.xml_data[self.sql_field_map['fn']] if fn.startswith('tn_'): # ignore thumbnails self.logger.debug("END ROW") return # is the entry new? if id_val in self.dbIDs: self.dbIDs[id_val] += 1 update=True if update: # update existing row (by id_field) if id_val in self.img_data: self.img_data[id_val][img_type_val] = fn else: self.img_data[id_val] = {img_type_val:fn} self.logger.debug("update: %s = %s"%(id_val, args)) elif not self.update_mode: # create new row (doesn't work) self.logger.debug("insert: %s"%args) #SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db) #self.logger.info(" row:"+"%d (%s)"%(self.rowcnt,id_val)) if (self.rowcnt % 100) == 0: self.logger.info(" row:"+"%d (id:%s)"%(self.rowcnt,id_val)) self.logger.debug("END ROW") return def parse(self, filename): """open file and read data""" self.logger.info("reading data...") self.rowcnt = 0 fh = open(filename,"r") self.logger.debug("BEGIN RESULTSET") # parse line-wise for line in fh: self.handle_line(line) # done. Wrap up self.logger.debug("END RESULTSET") self.logger.info("importing rows in db...") i = 0 for id in self.dbIDs.keys(): # find all fields if self.dbIDs[id] == 0: # unmatched entry self.logger.debug("CLEAN: %s with %s"%(self.delQuery,id)) SimpleSearch(self.db, self.delQuery, [id], ascii=self.ascii_db, result=False) elif self.dbIDs[id] > 0: # assemble query imgd = self.img_data.get(id, None) if imgd is None: self.logger.error("No data for id %s while marked for update!"%id) continue args = [ imgd.get(f,None) for f in imgTypes ] args.append(id) # update self.logger.debug("UPDATE: %s with %s"%(self.updQuery,args)) SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db, result=False) i += 1 if i % 100 == 0: self.logger.info(" import: %d (%s)"%(i,id)) self.dbCon.commit() self.dbCon.commit() # reinstate backup tables if self.backup_table: backup_name = "%s_%s"%(self.orig_table,time.strftime('%Y_%m_%d_%H_%M_%S')) self.logger.info("rename backup table %s to %s"%(self.orig_table,backup_name)) qstr = "ALTER TABLE %s RENAME TO %s"%(self.orig_table,backup_name) self.db.execute(qstr) self.logger.info("rename working table %s to %s"%(self.table,self.orig_table)) qstr = "ALTER TABLE %s RENAME TO %s"%(self.table,self.orig_table) self.db.execute(qstr) self.dbCon.commit() return # monkey patch ASCII_handler ASCII_handler._handle_line = ASCII_handler.handle_line ASCII_handler.handle_line = handle_line ASCII_handler._parse = ASCII_handler.parse ASCII_handler.parse = parse ASCII_handler._setup = ASCII_handler.setup ASCII_handler.setup = setup if __name__ == "__main__": from optparse import OptionParser opars = OptionParser() opars.add_option("-f", "--file", dest="filename", help="text file name", metavar="FILE") opars.add_option("-c", "--dsn", dest="dsn", help="database connection string") opars.add_option("-t", "--table", dest="table", help="database table name") opars.add_option("--id-field", default="id_text", dest="id_field", help="name of id field for synchronisation", metavar="NAME") opars.add_option("--ascii-db", default=False, action="store_true", dest="ascii_db", help="the SQL database stores ASCII instead of unicode") opars.add_option("--replace", default=False, action="store_true", dest="replace_table", help="replace table i.e. delete and re-insert data") opars.add_option("--backup", default=False, action="store_true", dest="backup_table", help="create backup of old table (breaks indices)") opars.add_option("-d", "--debug", default=False, action="store_true", dest="debug", help="debug mode (more output)") (options, args) = opars.parse_args() if (options.filename is None or options.dsn is None or options.table is None): # not enough parameters print "importCDLIimglist "+version_string opars.print_help() sys.exit(1) if options.debug: loglevel = logging.DEBUG else: loglevel = logging.INFO logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s %(message)s', datefmt='%H:%M:%S') # fixed settings for CDLI imglist options.update_fields = upd_fields%options.id_field #options.id_field = id_field options.update_mode = True importASCII(options)