Annotation of ZSQLExtend/importCDLIimglist.py, revision 1.2
1.1 casties 1: #!/usr/local/bin/python
2: #
3:
4: import string
5: import logging
6: import sys
7: import types
8: import time
9:
10: from importASCII import ASCII_handler
11: from importASCII import importASCII
12: from importASCII import SimpleSearch
13:
1.2 ! casties 14: version_string = "V0.2 ROC 29.12.2007"
1.1 casties 15:
16: # mapping img_type to SQL field names
17: imgTypeMap = {
18: 'p':'img_p',
19: 'd':'img_d',
20: 'e':'img_e',
21: 'ed':'img_ed',
22: 'l':'img_l',
23: 'ld':'img_ld',
24: 'ls':'img_ls'}
1.2 ! casties 25: # list of fields in constant order (for SQL queries)
! 26: imgTypes = imgTypeMap.keys()
1.1 casties 27:
28: upd_fields = "fn,,img_type,id_text"
29: id_field = "id_text"
30: img_type_field = "img_type"
31:
32: def setup(self):
33: """specialized setup version"""
34: ASCII_handler._setup(self)
1.2 ! casties 35: # create special updQuery for img_type fields
! 36: setStr=string.join(["%s = %%s"%imgTypeMap[f] for f in imgTypes], ', ')
! 37: self.updQuery = "UPDATE %s SET %s WHERE id_text = %%s"%(self.table,setStr)
! 38: # create special delQuery for img_type fields
! 39: delStr=string.join(["%s = null"%imgTypeMap[f] for f in imgTypes], ', ')
! 40: self.delQuery = "UPDATE %s SET %s WHERE id_text = %%s"%(self.table,delStr)
1.1 casties 41: # text file field for img_type
42: self.xml_img_type = self.sql_field_map[img_type_field]
1.2 ! casties 43: # dict of all img fields
! 44: self.img_data = {}
1.1 casties 45:
46:
47: def handle_line(self, line):
48: """process single line of text data"""
49: self.logger.debug("START ROW")
50:
51: content = line.split()
52: self.xml_data = content
53: self.rowcnt += 1
54: # process collected row data
55: update=False
56: # synchronize by id_field
57: id_val = self.xml_data[self.xml_id]
58: if id_val in self.dbIDs:
59: self.dbIDs[id_val] += 1
60: update=True
61:
62: # get img_type
63: img_type_val = self.xml_data[self.xml_img_type]
64:
65: # collect all values
66: # filename is first value
67: fn = self.xml_data[self.sql_field_map['fn']]
68: if fn.startswith('tn_'):
69: # ignore thumbnails
70: self.logger.debug("END ROW")
71: return
72:
73: if update:
74: # update existing row (by id_field)
1.2 ! casties 75: if id_val in self.img_data:
! 76: self.img_data[id_val][img_type_val] = fn
! 77: else:
! 78: self.img_data[id_val] = {img_type_val:fn}
1.1 casties 79:
80: self.logger.debug("update: %s = %s"%(id_val, args))
81:
82: elif not self.update_mode:
1.2 ! casties 83: # create new row (doesn't work)
1.1 casties 84: self.logger.debug("insert: %s"%args)
85: #SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db)
86:
87: #self.logger.info(" row:"+"%d (%s)"%(self.rowcnt,id_val))
88: if (self.rowcnt % 100) == 0:
89: self.logger.info(" row:"+"%d (id:%s)"%(self.rowcnt,id_val))
1.2 ! casties 90:
! 91: self.logger.debug("END ROW")
! 92: return
! 93:
! 94:
! 95: def parse(self, filename):
! 96: """open file and read data"""
! 97: self.logger.info("reading data...")
! 98: self.rowcnt = 0
! 99:
! 100: fh = open(filename,"r")
! 101: self.logger.debug("BEGIN RESULTSET")
! 102: # parse line-wise
! 103: for line in fh:
! 104: self.handle_line(line)
! 105:
! 106: # done. Wrap up
! 107: self.logger.debug("END RESULTSET")
! 108:
! 109: self.logger.info("importing rows in db...")
! 110: i = 0
! 111: for id in self.dbIDs.keys():
! 112: # find all fields
! 113: if self.dbIDs[id] == 0:
! 114: # unmatched entry
! 115: #self.logger.debug("CLEAN: %s with %s"%(self.delQuery,id))
! 116: SimpleSearch(self.db, self.delQuery, [id], ascii=self.ascii_db, result=False)
! 117:
! 118: elif self.dbIDs[id] > 0:
! 119: # assemble query
! 120: args = [ self.img_data[id].get(f,None) for f in imgTypes ]
! 121: args.append(id)
! 122: # update
! 123: #self.logger.debug("UPDATE: %s with %s"%(self.updQuery,args))
! 124: SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db, result=False)
! 125:
! 126: i += 1
! 127: if i % 100 == 0:
! 128: self.logger.info(" import: %d (%s)"%(i,id))
! 129: self.dbCon.commit()
! 130:
! 131: self.dbCon.commit()
! 132: # reinstate backup tables
! 133: if self.backup_table:
! 134: backup_name = "%s_%s"%(self.orig_table,time.strftime('%Y_%m_%d_%H_%M_%S'))
! 135: self.logger.info("rename backup table %s to %s"%(self.orig_table,backup_name))
! 136: qstr = "ALTER TABLE %s RENAME TO %s"%(self.orig_table,backup_name)
! 137: self.db.execute(qstr)
! 138: self.logger.info("rename working table %s to %s"%(self.table,self.orig_table))
! 139: qstr = "ALTER TABLE %s RENAME TO %s"%(self.table,self.orig_table)
! 140: self.db.execute(qstr)
1.1 casties 141: self.dbCon.commit()
142:
143: return
144:
145: # monkey patch ASCII_handler
146: ASCII_handler._handle_line = ASCII_handler.handle_line
147: ASCII_handler.handle_line = handle_line
1.2 ! casties 148: ASCII_handler._parse = ASCII_handler.parse
! 149: ASCII_handler.parse = parse
1.1 casties 150: ASCII_handler._setup = ASCII_handler.setup
151: ASCII_handler.setup = setup
152:
153:
154: if __name__ == "__main__":
155: from optparse import OptionParser
156:
157: opars = OptionParser()
158: opars.add_option("-f", "--file",
159: dest="filename",
160: help="text file name", metavar="FILE")
161: opars.add_option("-c", "--dsn",
162: dest="dsn",
163: help="database connection string")
164: opars.add_option("-t", "--table",
165: dest="table",
166: help="database table name")
167: opars.add_option("--ascii-db", default=False, action="store_true",
168: dest="ascii_db",
169: help="the SQL database stores ASCII instead of unicode")
170: opars.add_option("--replace", default=False, action="store_true",
171: dest="replace_table",
172: help="replace table i.e. delete and re-insert data")
173: opars.add_option("--backup", default=False, action="store_true",
174: dest="backup_table",
175: help="create backup of old table (breaks indices)")
176: opars.add_option("-d", "--debug", default=False, action="store_true",
177: dest="debug",
178: help="debug mode (more output)")
179:
180: (options, args) = opars.parse_args()
181:
182: if (options.filename is None
183: or options.dsn is None
184: or options.table is None):
185: # not enough parameters
186: print "importCDLIimglist "+version_string
187: opars.print_help()
188: sys.exit(1)
189:
190: if options.debug:
191: loglevel = logging.DEBUG
192: else:
193: loglevel = logging.INFO
194:
195: logging.basicConfig(level=loglevel,
196: format='%(asctime)s %(levelname)s %(message)s',
197: datefmt='%H:%M:%S')
198:
199: # fixed settings for CDLI imglist
200: options.update_fields = upd_fields
201: options.id_field = id_field
202: options.update_mode = True
203:
204: importASCII(options)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>