Annotation of ZSQLExtend/importCDLIimglist.py, revision 1.6

1.1       casties     1: #!/usr/local/bin/python
                      2: #
                      3: 
                      4: import string
                      5: import logging
                      6: import sys
                      7: import types
                      8: import time
                      9: 
                     10: from importASCII import ASCII_handler
                     11: from importASCII import importASCII
                     12: from importASCII import SimpleSearch
                     13: 
1.5       casties    14: version_string = "V0.2.3 ROC 25.3.2010"
1.1       casties    15: 
                     16: # mapping img_type to SQL field names
                     17: imgTypeMap = {
                     18:     'p':'img_p',
                     19:     'd':'img_d',
1.4       casties    20:     's':'img_s',
1.1       casties    21:     'e':'img_e',
                     22:     'ed':'img_ed',
                     23:     'l':'img_l',
                     24:     'ld':'img_ld',
                     25:     'ls':'img_ls'}
1.2       casties    26: # list of fields in constant order (for SQL queries)
                     27: imgTypes = imgTypeMap.keys()
1.1       casties    28: 
1.5       casties    29: upd_fields = "fn,,img_type,%s"
                     30: #id_field = "id_text"
1.1       casties    31: img_type_field = "img_type"
                     32: 
                     33: def setup(self):
                     34:     """specialized setup version"""
                     35:     ASCII_handler._setup(self)
1.2       casties    36:     # create special updQuery for img_type fields
                     37:     setStr=string.join(["%s = %%s"%imgTypeMap[f] for f in imgTypes], ', ')
1.5       casties    38:     self.updQuery = "UPDATE %s SET %s WHERE %s = %%s"%(self.table,setStr,self.id_field)
1.2       casties    39:     # create special delQuery for img_type fields
                     40:     delStr=string.join(["%s = null"%imgTypeMap[f] for f in imgTypes], ', ')
1.5       casties    41:     self.delQuery = "UPDATE %s SET %s WHERE %s = %%s"%(self.table,delStr,self.id_field)
1.1       casties    42:     # text file field for img_type
                     43:     self.xml_img_type = self.sql_field_map[img_type_field]
1.2       casties    44:     # dict of all img fields
                     45:     self.img_data = {}
1.1       casties    46: 
                     47: 
                     48: def handle_line(self, line):
                     49:     """process single line of text data"""
                     50:     self.logger.debug("START ROW")
                     51: 
                     52:     content = line.split()
                     53:     self.xml_data = content
                     54:     self.rowcnt += 1
                     55:     # process collected row data
                     56:     update=False
1.3       casties    57: 
1.1       casties    58:     # synchronize by id_field
                     59:     id_val = self.xml_data[self.xml_id]
1.6     ! casties    60:     #logging.debug("id_val=%s xml_id=%s"%(id_val,self.xml_id))
1.3       casties    61:     
1.1       casties    62:     # get img_type
                     63:     img_type_val = self.xml_data[self.xml_img_type]
                     64: 
                     65:     # collect all values
                     66:     # filename is first value
                     67:     fn = self.xml_data[self.sql_field_map['fn']] 
                     68:     if fn.startswith('tn_'):
                     69:         # ignore thumbnails
                     70:         self.logger.debug("END ROW")
                     71:         return
                     72: 
1.3       casties    73:     # is the entry new?
                     74:     if id_val in self.dbIDs:
                     75:         self.dbIDs[id_val] += 1
                     76:         update=True
                     77: 
1.1       casties    78:     if update:
                     79:         # update existing row (by id_field)
1.2       casties    80:         if id_val in self.img_data:
                     81:             self.img_data[id_val][img_type_val] = fn
                     82:         else:
                     83:             self.img_data[id_val] = {img_type_val:fn}
1.1       casties    84: 
                     85:         self.logger.debug("update: %s = %s"%(id_val, args))
                     86: 
                     87:     elif not self.update_mode:
1.2       casties    88:         # create new row (doesn't work)
1.1       casties    89:         self.logger.debug("insert: %s"%args)
                     90:         #SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db)
                     91: 
                     92:     #self.logger.info(" row:"+"%d (%s)"%(self.rowcnt,id_val))
                     93:     if (self.rowcnt % 100) == 0:
                     94:         self.logger.info(" row:"+"%d (id:%s)"%(self.rowcnt,id_val))
1.2       casties    95: 
                     96:     self.logger.debug("END ROW")
                     97:     return
                     98: 
                     99: 
                    100: def parse(self, filename):
                    101:     """open file and read data"""
                    102:     self.logger.info("reading data...")
                    103:     self.rowcnt = 0
                    104: 
                    105:     fh = open(filename,"r")
                    106:     self.logger.debug("BEGIN RESULTSET")
                    107:     # parse line-wise
                    108:     for line in fh:
                    109:         self.handle_line(line)
                    110: 
                    111:     # done. Wrap up
                    112:     self.logger.debug("END RESULTSET")
                    113: 
                    114:     self.logger.info("importing rows in db...")
                    115:     i = 0
                    116:     for id in self.dbIDs.keys():
                    117:         # find all fields
                    118:         if self.dbIDs[id] == 0:
                    119:             # unmatched entry
1.6     ! casties   120:             self.logger.debug("CLEAN: %s with %s"%(self.delQuery,id))
1.2       casties   121:             SimpleSearch(self.db, self.delQuery, [id], ascii=self.ascii_db, result=False)
                    122: 
                    123:         elif self.dbIDs[id] > 0:
                    124:             # assemble query
1.3       casties   125:             imgd = self.img_data.get(id, None)
                    126:             if imgd is None:
                    127:                 self.logger.error("No data for id %s while marked for update!"%id)
                    128:                 continue
                    129:             
                    130:             args = [ imgd.get(f,None) for f in imgTypes ]
1.2       casties   131:             args.append(id)
                    132:             # update
1.6     ! casties   133:             self.logger.debug("UPDATE: %s with %s"%(self.updQuery,args))
1.2       casties   134:             SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db, result=False)
                    135: 
                    136:         i += 1
                    137:         if i % 100 == 0:
                    138:             self.logger.info(" import: %d (%s)"%(i,id))
                    139:             self.dbCon.commit()
                    140: 
                    141:     self.dbCon.commit()
                    142:     # reinstate backup tables
                    143:     if self.backup_table:
                    144:         backup_name = "%s_%s"%(self.orig_table,time.strftime('%Y_%m_%d_%H_%M_%S'))
                    145:         self.logger.info("rename backup table %s to %s"%(self.orig_table,backup_name))
                    146:         qstr = "ALTER TABLE %s RENAME TO %s"%(self.orig_table,backup_name)
                    147:         self.db.execute(qstr)
                    148:         self.logger.info("rename working table %s to %s"%(self.table,self.orig_table))
                    149:         qstr = "ALTER TABLE %s RENAME TO %s"%(self.table,self.orig_table)
                    150:         self.db.execute(qstr)
1.1       casties   151:         self.dbCon.commit()
                    152: 
                    153:     return
                    154: 
                    155: # monkey patch ASCII_handler
                    156: ASCII_handler._handle_line = ASCII_handler.handle_line
                    157: ASCII_handler.handle_line = handle_line
1.2       casties   158: ASCII_handler._parse = ASCII_handler.parse
                    159: ASCII_handler.parse = parse
1.1       casties   160: ASCII_handler._setup = ASCII_handler.setup
                    161: ASCII_handler.setup = setup
                    162: 
                    163: 
                    164: if __name__ == "__main__":
                    165:     from optparse import OptionParser
                    166: 
                    167:     opars = OptionParser()
                    168:     opars.add_option("-f", "--file", 
                    169:                      dest="filename",
                    170:                      help="text file name", metavar="FILE")
                    171:     opars.add_option("-c", "--dsn", 
                    172:                      dest="dsn", 
                    173:                      help="database connection string")
                    174:     opars.add_option("-t", "--table", 
                    175:                      dest="table", 
                    176:                      help="database table name")
1.5       casties   177:     opars.add_option("--id-field", default="id_text", 
                    178:                      dest="id_field", 
                    179:                      help="name of id field for synchronisation", metavar="NAME")
1.1       casties   180:     opars.add_option("--ascii-db", default=False, action="store_true", 
                    181:                      dest="ascii_db", 
                    182:                      help="the SQL database stores ASCII instead of unicode")
                    183:     opars.add_option("--replace", default=False, action="store_true", 
                    184:                      dest="replace_table", 
                    185:                      help="replace table i.e. delete and re-insert data")
                    186:     opars.add_option("--backup", default=False, action="store_true", 
                    187:                      dest="backup_table", 
                    188:                      help="create backup of old table (breaks indices)")
                    189:     opars.add_option("-d", "--debug", default=False, action="store_true", 
                    190:                      dest="debug", 
                    191:                      help="debug mode (more output)")
                    192:     
                    193:     (options, args) = opars.parse_args()
                    194:     
                    195:     if (options.filename is None 
                    196:         or options.dsn is None 
                    197:         or options.table is None):
                    198:         # not enough parameters
                    199:         print "importCDLIimglist "+version_string
                    200:         opars.print_help()
                    201:         sys.exit(1)
                    202:     
                    203:     if options.debug:
                    204:         loglevel = logging.DEBUG
                    205:     else:
                    206:         loglevel = logging.INFO
                    207:     
                    208:     logging.basicConfig(level=loglevel, 
                    209:                         format='%(asctime)s %(levelname)s %(message)s',
                    210:                         datefmt='%H:%M:%S')
                    211: 
                    212:     # fixed settings for CDLI imglist
1.5       casties   213:     options.update_fields = upd_fields%options.id_field
                    214:     #options.id_field = id_field
1.1       casties   215:     options.update_mode = True
                    216: 
                    217:     importASCII(options)

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>