File:  [Repository] / ZSQLExtend / importCDLIimglist.py
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Sat Dec 29 19:58:00 2007 UTC (16 years, 4 months ago) by casties
Branches: MAIN
CVS tags: HEAD
added cdli image list importer

#!/usr/local/bin/python
#

import string
import logging
import sys
import types
import time

from importASCII import ASCII_handler
from importASCII import importASCII
from importASCII import SimpleSearch

version_string = "V0.1 ROC 4.12.2007"

# mapping img_type to SQL field names
imgTypeMap = {
    'p':'img_p',
    'd':'img_d',
    'e':'img_e',
    'ed':'img_ed',
    'l':'img_l',
    'ld':'img_ld',
    'ls':'img_ls'}

upd_fields = "fn,,img_type,id_text"
id_field = "id_text"
img_type_field = "img_type"


def setup(self):
    """specialized setup version"""
    ASCII_handler._setup(self)
    # create special updQueries for img_type fields
    self.updQueries = dict([(t,"UPDATE %s SET %s = %%s WHERE id_text = %%s"%(self.table,imgTypeMap[t])) for t in imgTypeMap.keys()])
    # text file field for img_type
    self.xml_img_type = self.sql_field_map[img_type_field]


def handle_line(self, line):
    """process single line of text data"""
    self.logger.debug("START ROW")

    content = line.split()
    self.xml_data = content
    self.rowcnt += 1
    # process collected row data
    update=False
    # synchronize by id_field
    id_val = self.xml_data[self.xml_id]
    if id_val in self.dbIDs:
        self.dbIDs[id_val] += 1
        update=True

    # get img_type
    img_type_val = self.xml_data[self.xml_img_type]

    # collect all values
    # filename is first value
    fn = self.xml_data[self.sql_field_map['fn']] 
    if fn.startswith('tn_'):
        # ignore thumbnails
        self.logger.debug("END ROW")
        return

    args = [fn]

    if update:
        # update existing row (by id_field)
        # last argument is ID match
        args.append(id_val)
        try:
            query =  self.updQueries[img_type_val]
        except:
            self.logger.error("unknown image type %s"%img_type_val)
            return

        self.logger.debug("update: %s = %s"%(id_val, args))
        SimpleSearch(self.db, query, args, ascii=self.ascii_db)

    elif not self.update_mode:
        # create new row
        self.logger.debug("insert: %s"%args)
        #SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db)

    #self.logger.info(" row:"+"%d (%s)"%(self.rowcnt,id_val))
    if (self.rowcnt % 100) == 0:
        self.logger.info(" row:"+"%d (id:%s)"%(self.rowcnt,id_val))
        self.dbCon.commit()

    self.logger.debug("END ROW")
    return

# monkey patch ASCII_handler
ASCII_handler._handle_line = ASCII_handler.handle_line
ASCII_handler.handle_line = handle_line
ASCII_handler._setup = ASCII_handler.setup
ASCII_handler.setup = setup


if __name__ == "__main__":
    from optparse import OptionParser

    opars = OptionParser()
    opars.add_option("-f", "--file", 
                     dest="filename",
                     help="text file name", metavar="FILE")
    opars.add_option("-c", "--dsn", 
                     dest="dsn", 
                     help="database connection string")
    opars.add_option("-t", "--table", 
                     dest="table", 
                     help="database table name")
    opars.add_option("--ascii-db", default=False, action="store_true", 
                     dest="ascii_db", 
                     help="the SQL database stores ASCII instead of unicode")
    opars.add_option("--replace", default=False, action="store_true", 
                     dest="replace_table", 
                     help="replace table i.e. delete and re-insert data")
    opars.add_option("--backup", default=False, action="store_true", 
                     dest="backup_table", 
                     help="create backup of old table (breaks indices)")
    opars.add_option("-d", "--debug", default=False, action="store_true", 
                     dest="debug", 
                     help="debug mode (more output)")
    
    (options, args) = opars.parse_args()
    
    if (options.filename is None 
        or options.dsn is None 
        or options.table is None):
        # not enough parameters
        print "importCDLIimglist "+version_string
        opars.print_help()
        sys.exit(1)
    
    if options.debug:
        loglevel = logging.DEBUG
    else:
        loglevel = logging.INFO
    
    logging.basicConfig(level=loglevel, 
                        format='%(asctime)s %(levelname)s %(message)s',
                        datefmt='%H:%M:%S')

    # fixed settings for CDLI imglist
    options.update_fields = upd_fields
    options.id_field = id_field
    options.update_mode = True

    importASCII(options)

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>