1: #!/usr/local/bin/python
2: #
3:
4: import string
5: import logging
6: import sys
7:
8: from xml import sax
9: from amara import saxtools
10:
11: try:
12: import psycopg2 as psycopg
13: psyco = 2
14: except:
15: import psycopg
16: psyco = 1
17:
18: fm_ns = 'http://www.filemaker.com/fmpxmlresult'
19:
20: def getTextFromNode(nodename):
21: """get the cdata content of a node"""
22: if nodename is None:
23: return ""
24: nodelist=nodename.childNodes
25: rc = ""
26: for node in nodelist:
27: if node.nodeType == node.TEXT_NODE:
28: rc = rc + node.data
29: return rc
30:
31: def sql_quote(v):
32: # quote dictionary
33: quote_dict = {"\'": "''", "\\": "\\\\"}
34: for dkey in quote_dict.keys():
35: if string.find(v, dkey) >= 0:
36: v=string.join(string.split(v,dkey),quote_dict[dkey])
37: return "'%s'"%v
38:
39: def SimpleSearch(curs,query, args=None):
40: """execute sql query and return data"""
41: logging.debug("executing: "+query)
42: if psyco == 1:
43: query = query.encode("UTF-8")
44: curs.execute(query, args)
45: logging.debug("sql done")
46: try:
47: return curs.fetchall()
48: except:
49: return None
50:
51:
52:
53: class xml_handler:
54:
55: def __init__(self,dsn,table,update_fields=None,id_field=None,sync_mode=False):
56: '''
57: SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table.
58: @param dsn: database connection string
59: @param table: name of the table the xml shall be imported into
60: @param filename: xmlfile filename
61: @param update_fields: (optional) list of fields to update; default is to create all fields
62: @param id_field: (optional) field which uniquely identifies an entry for updating purposes.
63: @param sync_mode: (optional) really synchronise, i.e. delete entries not in XML file
64: '''
65: # set up parser
66: self.event = None
67: self.top_dispatcher = {
68: (saxtools.START_ELEMENT, fm_ns, u'METADATA'):
69: self.handle_meta_fields,
70: (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'):
71: self.handle_data,
72: }
73:
74: # connect database
75: self.dbCon = psycopg.connect(dsn)
76: self.db = self.dbCon.cursor()
77: assert self.db, "AIIEE no db cursor for %s!!"%dsn
78:
79: logging.debug("dsn: "+repr(dsn))
80: logging.debug("table: "+repr(table))
81: logging.debug("update_fields: "+repr(update_fields))
82: logging.debug("id_field: "+repr(id_field))
83: logging.debug("sync_mode: "+repr(sync_mode))
84:
85: self.table = table
86: self.update_fields = update_fields
87: self.id_field = id_field
88: self.sync_mode = sync_mode
89:
90: self.dbIDs = {}
91: self.rowcnt = 0
92:
93: if id_field is not None:
94: # prepare a list of ids for sync mode
95: qstr="select %s from %s"%(id_field,table)
96: for id in SimpleSearch(self.db, qstr):
97: # value 0: not updated
98: self.dbIDs[id[0]] = 0;
99: self.rowcnt += 1
100:
101: logging.info("%d entries in DB to sync"%self.rowcnt)
102:
103: self.fieldNames = []
104:
105: return
106:
107: def handle_meta_fields(self, end_condition):
108: dispatcher = {
109: (saxtools.START_ELEMENT, fm_ns, u'FIELD'):
110: self.handle_meta_field,
111: }
112: #First round through the generator corresponds to the
113: #start element event
114: logging.debug("START METADATA")
115: yield None
116:
117: #delegate is a generator that handles all the events "within"
118: #this element
119: delegate = None
120: while not self.event == end_condition:
121: delegate = saxtools.tenorsax.event_loop_body(
122: dispatcher, delegate, self.event)
123: yield None
124:
125: #Element closed. Wrap up
126: logging.debug("END METADATA")
127: if self.update_fields is None:
128: # update all fields
129: self.update_fields = self.fieldNames
130:
131: logging.debug("xml-fieldnames:"+repr(self.fieldNames))
132: # get list of fields in db table
133: qstr="""select attname from pg_attribute, pg_class where attrelid = pg_class.oid and relname = '%s'"""
134: columns=[x[0] for x in SimpleSearch(self.db, qstr%self.table)]
135:
136: # adjust db table to fields in XML and fieldlist
137: for fieldName in self.fieldNames:
138: logging.debug("db-fieldname:"+repr(fieldName))
139: if (fieldName not in columns) and (fieldName in self.update_fields):
140: qstr="alter table %s add %s %s"%(self.table,fieldName,'text')
141: logging.info("db add field:"+qstr)
142: self.db.execute(qstr)
143: self.dbCon.commit()
144:
145: # prepare sql statements for update
146: setStr=string.join(["%s = %%s"%f for f in self.update_fields], ', ')
147: self.updQuery="UPDATE %s SET %s WHERE %s = %%s"%(self.table,setStr,self.id_field)
148: # and insert
149: fields=string.join(self.update_fields, ',')
150: values=string.join(['%s' for f in self.update_fields], ',')
151: self.addQuery="INSERT INTO %s (%s) VALUES (%s)"%(self.table,fields,values)
152: #print "upQ: ", self.updQuery
153: #print "adQ: ", self.addQuery
154:
155: return
156:
157: def handle_meta_field(self, end_condition):
158: name = self.params.get((None, u'NAME'))
159: yield None
160: #Element closed. Wrap up
161: self.fieldNames.append(name)
162: logging.debug("FIELD name: "+name)
163: return
164:
165: def handle_data(self, end_condition):
166: dispatcher = {
167: (saxtools.START_ELEMENT, fm_ns, u'ROW'):
168: self.handle_row,
169: }
170: #First round through the generator corresponds to the
171: #start element event
172: logging.debug("START RESULTSET")
173: self.rowcnt = 0
174: yield None
175:
176: #delegate is a generator that handles all the events "within"
177: #this element
178: delegate = None
179: while not self.event == end_condition:
180: delegate = saxtools.tenorsax.event_loop_body(
181: dispatcher, delegate, self.event)
182: yield None
183:
184: #Element closed. Wrap up
185: logging.debug("END RESULTSET")
186: self.dbCon.commit()
187:
188: if self.sync_mode:
189: # delete unmatched entries in db
190: for id in self.dbIDs.keys():
191: # find all not-updated fields
192: if self.dbIDs[id] == 0:
193: logging.info(" delete:"+id)
194: qstr = "DELETE FROM %s WHERE %%s = '%%s'"%self.table
195: SimpleSearch(self.db, qstr, (self.id_field,id))
196:
197: elif self.dbIDs[id] > 1:
198: logging.info(" sync:"+"id used more than once?"+id)
199:
200: self.dbCon.commit()
201:
202: return
203:
204: def handle_row(self, end_condition):
205: dispatcher = {
206: (saxtools.START_ELEMENT, fm_ns, u'COL'):
207: self.handle_col,
208: }
209: logging.debug("START ROW")
210: self.dataSet = {}
211: self.colIdx = 0
212: yield None
213:
214: #delegate is a generator that handles all the events "within"
215: #this element
216: delegate = None
217: while not self.event == end_condition:
218: delegate = saxtools.tenorsax.event_loop_body(
219: dispatcher, delegate, self.event)
220: yield None
221:
222: #Element closed. Wrap up
223: logging.debug("END ROW")
224: self.rowcnt += 1
225: # process collected row data
226: update=False
227: id_val=''
228: # synchronize by id_field
229: if self.id_field:
230: id_val=self.dataSet[self.id_field]
231: if id_val in self.dbIDs:
232: self.dbIDs[id_val] += 1
233: update=True
234:
235: if update:
236: # update existing row (by id_field)
237: #setvals=[]
238: #for fieldName in self.update_fields:
239: # setvals.append("%s = %s"%(fieldName,sql_quote(self.dataSet[fieldName])))
240: #setStr=string.join(setvals, ',')
241: id_val=self.dataSet[self.id_field]
242: #qstr="UPDATE %s SET %s WHERE %s = '%s' "%(self.table,setStr,self.id_field,id_val)
243: args = [self.dataSet[f] for f in self.update_fields]
244: args.append(id_val)
245: SimpleSearch(self.db, self.updQuery, args)
246: logging.debug("update: %s"%id_val)
247: else:
248: # create new row
249: #fields=string.join(update_fields, ',')
250: #values=string.join([" %s "%sql_quote(self.dataSet[x]) for x in self.update_fields], ',')
251: #qstr="INSERT INTO %s (%s) VALUES (%s)"%(self.table,fields,self.values)
252: args = [self.dataSet[f] for f in self.update_fields]
253: SimpleSearch(self.db, self.addQuery, args)
254: logging.debug("add: %s"%self.dataSet.get(self.id_field, rowcnt))
255:
256: #logging.info(" row:"+"%d (%s)"%(self.rowcnt,id_val))
257: if (self.rowcnt % 10) == 0:
258: logging.info(" row:"+"%d (%s)"%(self.rowcnt,id_val))
259: self.dbCon.commit()
260:
261: return
262:
263: def handle_col(self, end_condition):
264: dispatcher = {
265: (saxtools.START_ELEMENT, fm_ns, u'DATA'):
266: self.handle_data_tag,
267: }
268: #print "START COL"
269: yield None
270: #delegate is a generator that handles all the events "within"
271: #this element
272: delegate = None
273: while not self.event == end_condition:
274: delegate = saxtools.tenorsax.event_loop_body(
275: dispatcher, delegate, self.event)
276: yield None
277: #Element closed. Wrap up
278: #print "END COL"
279: self.colIdx += 1
280: return
281:
282: def handle_data_tag(self, end_condition):
283: #print "START DATA"
284: content = u''
285: yield None
286: # gather child elements
287: while not self.event == end_condition:
288: if self.event[0] == saxtools.CHARACTER_DATA:
289: content += self.params
290: yield None
291: #Element closed. Wrap up
292: field = self.fieldNames[self.colIdx]
293: self.dataSet[field] = content
294: #print " DATA(", field, ") ", repr(content)
295: return
296:
297:
298:
299:
300:
301: ##
302: ## public static int main()
303: ##
304:
305: from optparse import OptionParser
306:
307: opars = OptionParser()
308: opars.add_option("-f", "--file",
309: dest="filename",
310: help="FMPXML file name", metavar="FILE")
311: opars.add_option("-c", "--dsn",
312: dest="dsn",
313: help="database connection string")
314: opars.add_option("-t", "--table",
315: dest="table",
316: help="database table name")
317: opars.add_option("--fields", default=None,
318: dest="update_fields",
319: help="list of fields to update (comma separated)", metavar="LIST")
320: opars.add_option("--id-field", default=None,
321: dest="id_field",
322: help="name of id field for synchronisation (only appends data otherwise)", metavar="NAME")
323: opars.add_option("--sync-mode", default=False, action="store_true",
324: dest="sync_mode",
325: help="do full sync based on id field (remove unmatched fields from db)")
326: opars.add_option("-d", "--debug", default=False, action="store_true",
327: dest="debug",
328: help="debug mode (more output)")
329:
330: (options, args) = opars.parse_args()
331:
332: if len(sys.argv) < 2 or options.filename is None or options.dsn is None:
333: opars.print_help()
334: sys.exit(1)
335:
336: if options.debug:
337: loglevel = logging.DEBUG
338: else:
339: loglevel = logging.INFO
340:
341: logging.basicConfig(level=loglevel,
342: format='%(asctime)s %(levelname)s %(message)s',
343: datefmt='%H:%M:%S')
344:
345: update_fields = None
346:
347: if options.update_fields:
348: update_fields = [string.strip(s) for s in options.update_fields.split(',')]
349:
350: parser = sax.make_parser()
351: #The "consumer" is our own handler
352: consumer = xml_handler(dsn=options.dsn,table=options.table,
353: update_fields=update_fields,id_field=options.id_field,
354: sync_mode=options.sync_mode)
355: #Initialize Tenorsax with handler
356: handler = saxtools.tenorsax(consumer)
357: #Resulting tenorsax instance is the SAX handler
358: parser.setContentHandler(handler)
359: parser.setFeature(sax.handler.feature_namespaces, 1)
360: parser.parse(options.filename)
361:
362:
363: print "DONE!"
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>