#!/usr/local/bin/python
#
import string
import logging
import sys
import types
import time
from importASCII import ASCII_handler
from importASCII import importASCII
from importASCII import SimpleSearch
version_string = "V0.2.3 ROC 25.3.2010"
# mapping img_type to SQL field names
imgTypeMap = {
'p':'img_p',
'd':'img_d',
's':'img_s',
'e':'img_e',
'ed':'img_ed',
'l':'img_l',
'ld':'img_ld',
'ls':'img_ls'}
# list of fields in constant order (for SQL queries)
imgTypes = imgTypeMap.keys()
upd_fields = "fn,,img_type,%s"
#id_field = "id_text"
img_type_field = "img_type"
def setup(self):
"""specialized setup version"""
ASCII_handler._setup(self)
# create special updQuery for img_type fields
setStr=string.join(["%s = %%s"%imgTypeMap[f] for f in imgTypes], ', ')
self.updQuery = "UPDATE %s SET %s WHERE %s = %%s"%(self.table,setStr,self.id_field)
# create special delQuery for img_type fields
delStr=string.join(["%s = null"%imgTypeMap[f] for f in imgTypes], ', ')
self.delQuery = "UPDATE %s SET %s WHERE %s = %%s"%(self.table,delStr,self.id_field)
# text file field for img_type
self.xml_img_type = self.sql_field_map[img_type_field]
# dict of all img fields
self.img_data = {}
def handle_line(self, line):
"""process single line of text data"""
self.logger.debug("START ROW")
content = line.split()
self.xml_data = content
self.rowcnt += 1
# process collected row data
update=False
# synchronize by id_field
id_val = self.xml_data[self.xml_id]
# get img_type
img_type_val = self.xml_data[self.xml_img_type]
# collect all values
# filename is first value
fn = self.xml_data[self.sql_field_map['fn']]
if fn.startswith('tn_'):
# ignore thumbnails
self.logger.debug("END ROW")
return
# is the entry new?
if id_val in self.dbIDs:
self.dbIDs[id_val] += 1
update=True
if update:
# update existing row (by id_field)
if id_val in self.img_data:
self.img_data[id_val][img_type_val] = fn
else:
self.img_data[id_val] = {img_type_val:fn}
self.logger.debug("update: %s = %s"%(id_val, args))
elif not self.update_mode:
# create new row (doesn't work)
self.logger.debug("insert: %s"%args)
#SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db)
#self.logger.info(" row:"+"%d (%s)"%(self.rowcnt,id_val))
if (self.rowcnt % 100) == 0:
self.logger.info(" row:"+"%d (id:%s)"%(self.rowcnt,id_val))
self.logger.debug("END ROW")
return
def parse(self, filename):
"""open file and read data"""
self.logger.info("reading data...")
self.rowcnt = 0
fh = open(filename,"r")
self.logger.debug("BEGIN RESULTSET")
# parse line-wise
for line in fh:
self.handle_line(line)
# done. Wrap up
self.logger.debug("END RESULTSET")
self.logger.info("importing rows in db...")
i = 0
for id in self.dbIDs.keys():
# find all fields
if self.dbIDs[id] == 0:
# unmatched entry
#self.logger.debug("CLEAN: %s with %s"%(self.delQuery,id))
SimpleSearch(self.db, self.delQuery, [id], ascii=self.ascii_db, result=False)
elif self.dbIDs[id] > 0:
# assemble query
imgd = self.img_data.get(id, None)
if imgd is None:
self.logger.error("No data for id %s while marked for update!"%id)
continue
args = [ imgd.get(f,None) for f in imgTypes ]
args.append(id)
# update
#self.logger.debug("UPDATE: %s with %s"%(self.updQuery,args))
SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db, result=False)
i += 1
if i % 100 == 0:
self.logger.info(" import: %d (%s)"%(i,id))
self.dbCon.commit()
self.dbCon.commit()
# reinstate backup tables
if self.backup_table:
backup_name = "%s_%s"%(self.orig_table,time.strftime('%Y_%m_%d_%H_%M_%S'))
self.logger.info("rename backup table %s to %s"%(self.orig_table,backup_name))
qstr = "ALTER TABLE %s RENAME TO %s"%(self.orig_table,backup_name)
self.db.execute(qstr)
self.logger.info("rename working table %s to %s"%(self.table,self.orig_table))
qstr = "ALTER TABLE %s RENAME TO %s"%(self.table,self.orig_table)
self.db.execute(qstr)
self.dbCon.commit()
return
# monkey patch ASCII_handler
ASCII_handler._handle_line = ASCII_handler.handle_line
ASCII_handler.handle_line = handle_line
ASCII_handler._parse = ASCII_handler.parse
ASCII_handler.parse = parse
ASCII_handler._setup = ASCII_handler.setup
ASCII_handler.setup = setup
if __name__ == "__main__":
from optparse import OptionParser
opars = OptionParser()
opars.add_option("-f", "--file",
dest="filename",
help="text file name", metavar="FILE")
opars.add_option("-c", "--dsn",
dest="dsn",
help="database connection string")
opars.add_option("-t", "--table",
dest="table",
help="database table name")
opars.add_option("--id-field", default="id_text",
dest="id_field",
help="name of id field for synchronisation", metavar="NAME")
opars.add_option("--ascii-db", default=False, action="store_true",
dest="ascii_db",
help="the SQL database stores ASCII instead of unicode")
opars.add_option("--replace", default=False, action="store_true",
dest="replace_table",
help="replace table i.e. delete and re-insert data")
opars.add_option("--backup", default=False, action="store_true",
dest="backup_table",
help="create backup of old table (breaks indices)")
opars.add_option("-d", "--debug", default=False, action="store_true",
dest="debug",
help="debug mode (more output)")
(options, args) = opars.parse_args()
if (options.filename is None
or options.dsn is None
or options.table is None):
# not enough parameters
print "importCDLIimglist "+version_string
opars.print_help()
sys.exit(1)
if options.debug:
loglevel = logging.DEBUG
else:
loglevel = logging.INFO
logging.basicConfig(level=loglevel,
format='%(asctime)s %(levelname)s %(message)s',
datefmt='%H:%M:%S')
# fixed settings for CDLI imglist
options.update_fields = upd_fields%options.id_field
#options.id_field = id_field
options.update_mode = True
importASCII(options)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>