Annotation of ZSQLExtend/importCDLIimglist.py, revision 1.4
1.1 casties 1: #!/usr/local/bin/python
2: #
3:
4: import string
5: import logging
6: import sys
7: import types
8: import time
9:
10: from importASCII import ASCII_handler
11: from importASCII import importASCII
12: from importASCII import SimpleSearch
13:
1.4 ! casties 14: version_string = "V0.2.2 ROC 28.9.2009"
1.1 casties 15:
16: # mapping img_type to SQL field names
17: imgTypeMap = {
18: 'p':'img_p',
19: 'd':'img_d',
1.4 ! casties 20: 's':'img_s',
1.1 casties 21: 'e':'img_e',
22: 'ed':'img_ed',
23: 'l':'img_l',
24: 'ld':'img_ld',
25: 'ls':'img_ls'}
1.2 casties 26: # list of fields in constant order (for SQL queries)
27: imgTypes = imgTypeMap.keys()
1.1 casties 28:
29: upd_fields = "fn,,img_type,id_text"
30: id_field = "id_text"
31: img_type_field = "img_type"
32:
33: def setup(self):
34: """specialized setup version"""
35: ASCII_handler._setup(self)
1.2 casties 36: # create special updQuery for img_type fields
37: setStr=string.join(["%s = %%s"%imgTypeMap[f] for f in imgTypes], ', ')
38: self.updQuery = "UPDATE %s SET %s WHERE id_text = %%s"%(self.table,setStr)
39: # create special delQuery for img_type fields
40: delStr=string.join(["%s = null"%imgTypeMap[f] for f in imgTypes], ', ')
41: self.delQuery = "UPDATE %s SET %s WHERE id_text = %%s"%(self.table,delStr)
1.1 casties 42: # text file field for img_type
43: self.xml_img_type = self.sql_field_map[img_type_field]
1.2 casties 44: # dict of all img fields
45: self.img_data = {}
1.1 casties 46:
47:
48: def handle_line(self, line):
49: """process single line of text data"""
50: self.logger.debug("START ROW")
51:
52: content = line.split()
53: self.xml_data = content
54: self.rowcnt += 1
55: # process collected row data
56: update=False
1.3 casties 57:
1.1 casties 58: # synchronize by id_field
59: id_val = self.xml_data[self.xml_id]
1.3 casties 60:
1.1 casties 61: # get img_type
62: img_type_val = self.xml_data[self.xml_img_type]
63:
64: # collect all values
65: # filename is first value
66: fn = self.xml_data[self.sql_field_map['fn']]
67: if fn.startswith('tn_'):
68: # ignore thumbnails
69: self.logger.debug("END ROW")
70: return
71:
1.3 casties 72: # is the entry new?
73: if id_val in self.dbIDs:
74: self.dbIDs[id_val] += 1
75: update=True
76:
1.1 casties 77: if update:
78: # update existing row (by id_field)
1.2 casties 79: if id_val in self.img_data:
80: self.img_data[id_val][img_type_val] = fn
81: else:
82: self.img_data[id_val] = {img_type_val:fn}
1.1 casties 83:
84: self.logger.debug("update: %s = %s"%(id_val, args))
85:
86: elif not self.update_mode:
1.2 casties 87: # create new row (doesn't work)
1.1 casties 88: self.logger.debug("insert: %s"%args)
89: #SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db)
90:
91: #self.logger.info(" row:"+"%d (%s)"%(self.rowcnt,id_val))
92: if (self.rowcnt % 100) == 0:
93: self.logger.info(" row:"+"%d (id:%s)"%(self.rowcnt,id_val))
1.2 casties 94:
95: self.logger.debug("END ROW")
96: return
97:
98:
99: def parse(self, filename):
100: """open file and read data"""
101: self.logger.info("reading data...")
102: self.rowcnt = 0
103:
104: fh = open(filename,"r")
105: self.logger.debug("BEGIN RESULTSET")
106: # parse line-wise
107: for line in fh:
108: self.handle_line(line)
109:
110: # done. Wrap up
111: self.logger.debug("END RESULTSET")
112:
113: self.logger.info("importing rows in db...")
114: i = 0
115: for id in self.dbIDs.keys():
116: # find all fields
117: if self.dbIDs[id] == 0:
118: # unmatched entry
119: #self.logger.debug("CLEAN: %s with %s"%(self.delQuery,id))
120: SimpleSearch(self.db, self.delQuery, [id], ascii=self.ascii_db, result=False)
121:
122: elif self.dbIDs[id] > 0:
123: # assemble query
1.3 casties 124: imgd = self.img_data.get(id, None)
125: if imgd is None:
126: self.logger.error("No data for id %s while marked for update!"%id)
127: continue
128:
129: args = [ imgd.get(f,None) for f in imgTypes ]
1.2 casties 130: args.append(id)
131: # update
132: #self.logger.debug("UPDATE: %s with %s"%(self.updQuery,args))
133: SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db, result=False)
134:
135: i += 1
136: if i % 100 == 0:
137: self.logger.info(" import: %d (%s)"%(i,id))
138: self.dbCon.commit()
139:
140: self.dbCon.commit()
141: # reinstate backup tables
142: if self.backup_table:
143: backup_name = "%s_%s"%(self.orig_table,time.strftime('%Y_%m_%d_%H_%M_%S'))
144: self.logger.info("rename backup table %s to %s"%(self.orig_table,backup_name))
145: qstr = "ALTER TABLE %s RENAME TO %s"%(self.orig_table,backup_name)
146: self.db.execute(qstr)
147: self.logger.info("rename working table %s to %s"%(self.table,self.orig_table))
148: qstr = "ALTER TABLE %s RENAME TO %s"%(self.table,self.orig_table)
149: self.db.execute(qstr)
1.1 casties 150: self.dbCon.commit()
151:
152: return
153:
154: # monkey patch ASCII_handler
155: ASCII_handler._handle_line = ASCII_handler.handle_line
156: ASCII_handler.handle_line = handle_line
1.2 casties 157: ASCII_handler._parse = ASCII_handler.parse
158: ASCII_handler.parse = parse
1.1 casties 159: ASCII_handler._setup = ASCII_handler.setup
160: ASCII_handler.setup = setup
161:
162:
163: if __name__ == "__main__":
164: from optparse import OptionParser
165:
166: opars = OptionParser()
167: opars.add_option("-f", "--file",
168: dest="filename",
169: help="text file name", metavar="FILE")
170: opars.add_option("-c", "--dsn",
171: dest="dsn",
172: help="database connection string")
173: opars.add_option("-t", "--table",
174: dest="table",
175: help="database table name")
176: opars.add_option("--ascii-db", default=False, action="store_true",
177: dest="ascii_db",
178: help="the SQL database stores ASCII instead of unicode")
179: opars.add_option("--replace", default=False, action="store_true",
180: dest="replace_table",
181: help="replace table i.e. delete and re-insert data")
182: opars.add_option("--backup", default=False, action="store_true",
183: dest="backup_table",
184: help="create backup of old table (breaks indices)")
185: opars.add_option("-d", "--debug", default=False, action="store_true",
186: dest="debug",
187: help="debug mode (more output)")
188:
189: (options, args) = opars.parse_args()
190:
191: if (options.filename is None
192: or options.dsn is None
193: or options.table is None):
194: # not enough parameters
195: print "importCDLIimglist "+version_string
196: opars.print_help()
197: sys.exit(1)
198:
199: if options.debug:
200: loglevel = logging.DEBUG
201: else:
202: loglevel = logging.INFO
203:
204: logging.basicConfig(level=loglevel,
205: format='%(asctime)s %(levelname)s %(message)s',
206: datefmt='%H:%M:%S')
207:
208: # fixed settings for CDLI imglist
209: options.update_fields = upd_fields
210: options.id_field = id_field
211: options.update_mode = True
212:
213: importASCII(options)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>