1: #!/usr/local/bin/python
2: #
3:
4: import string
5: import logging
6: import sys
7: import types
8: import time
9:
10: from importASCII import ASCII_handler
11: from importASCII import importASCII
12: from importASCII import SimpleSearch
13:
14: version_string = "V0.1 ROC 4.12.2007"
15:
16: # mapping img_type to SQL field names
17: imgTypeMap = {
18: 'p':'img_p',
19: 'd':'img_d',
20: 'e':'img_e',
21: 'ed':'img_ed',
22: 'l':'img_l',
23: 'ld':'img_ld',
24: 'ls':'img_ls'}
25:
26: upd_fields = "fn,,img_type,id_text"
27: id_field = "id_text"
28: img_type_field = "img_type"
29:
30:
31: def setup(self):
32: """specialized setup version"""
33: ASCII_handler._setup(self)
34: # create special updQueries for img_type fields
35: self.updQueries = dict([(t,"UPDATE %s SET %s = %%s WHERE id_text = %%s"%(self.table,imgTypeMap[t])) for t in imgTypeMap.keys()])
36: # text file field for img_type
37: self.xml_img_type = self.sql_field_map[img_type_field]
38:
39:
40: def handle_line(self, line):
41: """process single line of text data"""
42: self.logger.debug("START ROW")
43:
44: content = line.split()
45: self.xml_data = content
46: self.rowcnt += 1
47: # process collected row data
48: update=False
49: # synchronize by id_field
50: id_val = self.xml_data[self.xml_id]
51: if id_val in self.dbIDs:
52: self.dbIDs[id_val] += 1
53: update=True
54:
55: # get img_type
56: img_type_val = self.xml_data[self.xml_img_type]
57:
58: # collect all values
59: # filename is first value
60: fn = self.xml_data[self.sql_field_map['fn']]
61: if fn.startswith('tn_'):
62: # ignore thumbnails
63: self.logger.debug("END ROW")
64: return
65:
66: args = [fn]
67:
68: if update:
69: # update existing row (by id_field)
70: # last argument is ID match
71: args.append(id_val)
72: try:
73: query = self.updQueries[img_type_val]
74: except:
75: self.logger.error("unknown image type %s"%img_type_val)
76: return
77:
78: self.logger.debug("update: %s = %s"%(id_val, args))
79: SimpleSearch(self.db, query, args, ascii=self.ascii_db)
80:
81: elif not self.update_mode:
82: # create new row
83: self.logger.debug("insert: %s"%args)
84: #SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db)
85:
86: #self.logger.info(" row:"+"%d (%s)"%(self.rowcnt,id_val))
87: if (self.rowcnt % 100) == 0:
88: self.logger.info(" row:"+"%d (id:%s)"%(self.rowcnt,id_val))
89: self.dbCon.commit()
90:
91: self.logger.debug("END ROW")
92: return
93:
94: # monkey patch ASCII_handler
95: ASCII_handler._handle_line = ASCII_handler.handle_line
96: ASCII_handler.handle_line = handle_line
97: ASCII_handler._setup = ASCII_handler.setup
98: ASCII_handler.setup = setup
99:
100:
101: if __name__ == "__main__":
102: from optparse import OptionParser
103:
104: opars = OptionParser()
105: opars.add_option("-f", "--file",
106: dest="filename",
107: help="text file name", metavar="FILE")
108: opars.add_option("-c", "--dsn",
109: dest="dsn",
110: help="database connection string")
111: opars.add_option("-t", "--table",
112: dest="table",
113: help="database table name")
114: opars.add_option("--ascii-db", default=False, action="store_true",
115: dest="ascii_db",
116: help="the SQL database stores ASCII instead of unicode")
117: opars.add_option("--replace", default=False, action="store_true",
118: dest="replace_table",
119: help="replace table i.e. delete and re-insert data")
120: opars.add_option("--backup", default=False, action="store_true",
121: dest="backup_table",
122: help="create backup of old table (breaks indices)")
123: opars.add_option("-d", "--debug", default=False, action="store_true",
124: dest="debug",
125: help="debug mode (more output)")
126:
127: (options, args) = opars.parse_args()
128:
129: if (options.filename is None
130: or options.dsn is None
131: or options.table is None):
132: # not enough parameters
133: print "importCDLIimglist "+version_string
134: opars.print_help()
135: sys.exit(1)
136:
137: if options.debug:
138: loglevel = logging.DEBUG
139: else:
140: loglevel = logging.INFO
141:
142: logging.basicConfig(level=loglevel,
143: format='%(asctime)s %(levelname)s %(message)s',
144: datefmt='%H:%M:%S')
145:
146: # fixed settings for CDLI imglist
147: options.update_fields = upd_fields
148: options.id_field = id_field
149: options.update_mode = True
150:
151: importASCII(options)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>