File:  [Repository] / ZSQLExtend / importCDLIimglist.py
Revision 1.6: download - view: text, annotated - select for diffs - revision graph
Fri Feb 11 19:08:49 2011 UTC (13 years, 2 months ago) by casties
Branches: MAIN
CVS tags: HEAD
ZSQLInlineSearch combines multiple parameters with the same name now with " ". works nicely with input type=checkbox and -op=all.

#!/usr/local/bin/python
#

import string
import logging
import sys
import types
import time

from importASCII import ASCII_handler
from importASCII import importASCII
from importASCII import SimpleSearch

version_string = "V0.2.3 ROC 25.3.2010"

# mapping img_type to SQL field names
imgTypeMap = {
    'p':'img_p',
    'd':'img_d',
    's':'img_s',
    'e':'img_e',
    'ed':'img_ed',
    'l':'img_l',
    'ld':'img_ld',
    'ls':'img_ls'}
# list of fields in constant order (for SQL queries)
imgTypes = imgTypeMap.keys()

upd_fields = "fn,,img_type,%s"
#id_field = "id_text"
img_type_field = "img_type"

def setup(self):
    """specialized setup version"""
    ASCII_handler._setup(self)
    # create special updQuery for img_type fields
    setStr=string.join(["%s = %%s"%imgTypeMap[f] for f in imgTypes], ', ')
    self.updQuery = "UPDATE %s SET %s WHERE %s = %%s"%(self.table,setStr,self.id_field)
    # create special delQuery for img_type fields
    delStr=string.join(["%s = null"%imgTypeMap[f] for f in imgTypes], ', ')
    self.delQuery = "UPDATE %s SET %s WHERE %s = %%s"%(self.table,delStr,self.id_field)
    # text file field for img_type
    self.xml_img_type = self.sql_field_map[img_type_field]
    # dict of all img fields
    self.img_data = {}


def handle_line(self, line):
    """process single line of text data"""
    self.logger.debug("START ROW")

    content = line.split()
    self.xml_data = content
    self.rowcnt += 1
    # process collected row data
    update=False

    # synchronize by id_field
    id_val = self.xml_data[self.xml_id]
    #logging.debug("id_val=%s xml_id=%s"%(id_val,self.xml_id))
    
    # get img_type
    img_type_val = self.xml_data[self.xml_img_type]

    # collect all values
    # filename is first value
    fn = self.xml_data[self.sql_field_map['fn']] 
    if fn.startswith('tn_'):
        # ignore thumbnails
        self.logger.debug("END ROW")
        return

    # is the entry new?
    if id_val in self.dbIDs:
        self.dbIDs[id_val] += 1
        update=True

    if update:
        # update existing row (by id_field)
        if id_val in self.img_data:
            self.img_data[id_val][img_type_val] = fn
        else:
            self.img_data[id_val] = {img_type_val:fn}

        self.logger.debug("update: %s = %s"%(id_val, args))

    elif not self.update_mode:
        # create new row (doesn't work)
        self.logger.debug("insert: %s"%args)
        #SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db)

    #self.logger.info(" row:"+"%d (%s)"%(self.rowcnt,id_val))
    if (self.rowcnt % 100) == 0:
        self.logger.info(" row:"+"%d (id:%s)"%(self.rowcnt,id_val))

    self.logger.debug("END ROW")
    return


def parse(self, filename):
    """open file and read data"""
    self.logger.info("reading data...")
    self.rowcnt = 0

    fh = open(filename,"r")
    self.logger.debug("BEGIN RESULTSET")
    # parse line-wise
    for line in fh:
        self.handle_line(line)

    # done. Wrap up
    self.logger.debug("END RESULTSET")

    self.logger.info("importing rows in db...")
    i = 0
    for id in self.dbIDs.keys():
        # find all fields
        if self.dbIDs[id] == 0:
            # unmatched entry
            self.logger.debug("CLEAN: %s with %s"%(self.delQuery,id))
            SimpleSearch(self.db, self.delQuery, [id], ascii=self.ascii_db, result=False)

        elif self.dbIDs[id] > 0:
            # assemble query
            imgd = self.img_data.get(id, None)
            if imgd is None:
                self.logger.error("No data for id %s while marked for update!"%id)
                continue
            
            args = [ imgd.get(f,None) for f in imgTypes ]
            args.append(id)
            # update
            self.logger.debug("UPDATE: %s with %s"%(self.updQuery,args))
            SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db, result=False)

        i += 1
        if i % 100 == 0:
            self.logger.info(" import: %d (%s)"%(i,id))
            self.dbCon.commit()

    self.dbCon.commit()
    # reinstate backup tables
    if self.backup_table:
        backup_name = "%s_%s"%(self.orig_table,time.strftime('%Y_%m_%d_%H_%M_%S'))
        self.logger.info("rename backup table %s to %s"%(self.orig_table,backup_name))
        qstr = "ALTER TABLE %s RENAME TO %s"%(self.orig_table,backup_name)
        self.db.execute(qstr)
        self.logger.info("rename working table %s to %s"%(self.table,self.orig_table))
        qstr = "ALTER TABLE %s RENAME TO %s"%(self.table,self.orig_table)
        self.db.execute(qstr)
        self.dbCon.commit()

    return

# monkey patch ASCII_handler
ASCII_handler._handle_line = ASCII_handler.handle_line
ASCII_handler.handle_line = handle_line
ASCII_handler._parse = ASCII_handler.parse
ASCII_handler.parse = parse
ASCII_handler._setup = ASCII_handler.setup
ASCII_handler.setup = setup


if __name__ == "__main__":
    from optparse import OptionParser

    opars = OptionParser()
    opars.add_option("-f", "--file", 
                     dest="filename",
                     help="text file name", metavar="FILE")
    opars.add_option("-c", "--dsn", 
                     dest="dsn", 
                     help="database connection string")
    opars.add_option("-t", "--table", 
                     dest="table", 
                     help="database table name")
    opars.add_option("--id-field", default="id_text", 
                     dest="id_field", 
                     help="name of id field for synchronisation", metavar="NAME")
    opars.add_option("--ascii-db", default=False, action="store_true", 
                     dest="ascii_db", 
                     help="the SQL database stores ASCII instead of unicode")
    opars.add_option("--replace", default=False, action="store_true", 
                     dest="replace_table", 
                     help="replace table i.e. delete and re-insert data")
    opars.add_option("--backup", default=False, action="store_true", 
                     dest="backup_table", 
                     help="create backup of old table (breaks indices)")
    opars.add_option("-d", "--debug", default=False, action="store_true", 
                     dest="debug", 
                     help="debug mode (more output)")
    
    (options, args) = opars.parse_args()
    
    if (options.filename is None 
        or options.dsn is None 
        or options.table is None):
        # not enough parameters
        print "importCDLIimglist "+version_string
        opars.print_help()
        sys.exit(1)
    
    if options.debug:
        loglevel = logging.DEBUG
    else:
        loglevel = logging.INFO
    
    logging.basicConfig(level=loglevel, 
                        format='%(asctime)s %(levelname)s %(message)s',
                        datefmt='%H:%M:%S')

    # fixed settings for CDLI imglist
    options.update_fields = upd_fields%options.id_field
    #options.id_field = id_field
    options.update_mode = True

    importASCII(options)

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>