Annotation of ZSQLExtend/importFMPXML.py, revision 1.35
1.1 casties 1: #!/usr/local/bin/python
2: #
3:
4: import string
5: import logging
6: import sys
1.7 dwinter 7: import types
1.8 casties 8: import time
1.33 casties 9: import re
1.1 casties 10:
1.5 casties 11: from xml import sax
1.34 dwinter 12: from xml.sax.handler import ContentHandler
13: #from amara import saxtools
1.5 casties 14:
1.2 casties 15: try:
16: import psycopg2 as psycopg
1.23 casties 17: import psycopg2.extensions
18: # switch to unicode
19: psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
1.2 casties 20: psyco = 2
21: except:
22: import psycopg
23: psyco = 1
24:
1.5 casties 25: fm_ns = 'http://www.filemaker.com/fmpxmlresult'
1.1 casties 26:
1.33 casties 27: version_string = "V0.6.7 ROC 21.6.2011"
1.20 casties 28:
1.21 casties 29: def unicodify(text, withNone=False):
1.20 casties 30: """decode str (utf-8 or latin-1 representation) into unicode object"""
1.21 casties 31: if withNone and text is None:
1.20 casties 32: return None
1.21 casties 33: if not text:
1.20 casties 34: return u""
1.21 casties 35: if isinstance(text, str):
1.20 casties 36: try:
1.21 casties 37: return text.decode('utf-8')
1.20 casties 38: except:
1.21 casties 39: return text.decode('latin-1')
1.20 casties 40: else:
1.21 casties 41: return text
1.20 casties 42:
1.21 casties 43: def utf8ify(text, withNone=False):
1.20 casties 44: """encode unicode object or string into byte string in utf-8 representation"""
1.21 casties 45: if withNone and text is None:
1.20 casties 46: return None
1.21 casties 47: if not text:
1.20 casties 48: return ""
1.22 casties 49: if isinstance(text, unicode):
50: return text.encode('utf-8')
51: else:
1.21 casties 52: return text
1.8 casties 53:
1.1 casties 54: def getTextFromNode(nodename):
55: """get the cdata content of a node"""
56: if nodename is None:
57: return ""
58: nodelist=nodename.childNodes
59: rc = ""
60: for node in nodelist:
61: if node.nodeType == node.TEXT_NODE:
62: rc = rc + node.data
63: return rc
64:
65: def sql_quote(v):
66: # quote dictionary
67: quote_dict = {"\'": "''", "\\": "\\\\"}
68: for dkey in quote_dict.keys():
69: if string.find(v, dkey) >= 0:
70: v=string.join(string.split(v,dkey),quote_dict[dkey])
1.5 casties 71: return "'%s'"%v
1.1 casties 72:
1.33 casties 73: def sqlName(s, lc=True, more=''):
74: """returns restricted ASCII-only version of string"""
75: if s is None:
76: return ""
77:
78: # remove '
79: s = s.replace("'","")
80: # all else -> "_"
81: s = re.sub('[^A-Za-z0-9_'+more+']','_',s)
82: if lc:
83: return s.lower()
84:
85: return s
86:
1.8 casties 87: def SimpleSearch(curs,query, args=None, ascii=False):
1.1 casties 88: """execute sql query and return data"""
1.15 casties 89: #logger.debug("executing: "+query)
1.8 casties 90: if ascii:
91: # encode all in UTF-8
1.20 casties 92: query = utf8ify(query)
1.8 casties 93: if args is not None:
94: encargs = []
95: for a in args:
1.20 casties 96: encargs.append(utf8ify(a, withNone=True))
1.8 casties 97:
98: args = encargs
1.7 dwinter 99:
1.1 casties 100: curs.execute(query, args)
1.15 casties 101: #logger.debug("sql done")
1.4 casties 102: try:
103: return curs.fetchall()
104: except:
105: return None
1.1 casties 106:
107:
1.8 casties 108: class TableColumn:
109: """simple type for storing sql column name and type"""
110:
111: def __init__(self, name, type=None):
112: #print "new tablecolumn(%s,%s)"%(name, type)
113: self.name = name
114: self.type = type
115:
116: def getName(self):
117: return self.name
118:
119: def getType(self):
120: if self.type is not None:
121: return self.type
122: else:
123: return "text"
1.5 casties 124:
1.8 casties 125: def __str__(self):
126: return self.name
127:
128:
1.34 dwinter 129: class xml_handler(ContentHandler):
1.8 casties 130: def __init__(self,options):
1.9 dwinter 131: """SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table.
1.8 casties 132: @param options: dict of options
133: @param options.dsn: database connection string
134: @param options.table: name of the table the xml shall be imported into
135: @param options.filename: xmlfile filename
136: @param options.update_fields: (optional) list of fields to update; default is to create all fields
137: @param options.id_field: (optional) field which uniquely identifies an entry for updating purposes.
138: @param options.sync_mode: (optional) really synchronise, i.e. delete entries not in XML file
139: @param options.lc_names: (optional) lower case and clean up field names from XML
140: @param options.keep_fields: (optional) don't add fields to SQL database
141: @param options.ascii_db: (optional) assume ascii encoding in db
142: @param options.replace_table: (optional) delete and re-insert data
1.14 casties 143: @param options.backup_table: (optional) create backup of old table (breaks indices)
1.15 casties 144: @param options.use_logger_instance: (optional) use this instance of a logger
1.9 dwinter 145: """
146:
1.15 casties 147: # set up logger
148: if hasattr(options, 'use_logger_instance'):
149: self.logger = options.use_logger_instance
150: else:
151: self.logger = logging.getLogger('db.import.fmpxml')
152:
153:
1.5 casties 154: # set up parser
1.34 dwinter 155: self.result={}
1.5 casties 156: self.event = None
1.34 dwinter 157:
158: # self.top_dispatcher = {
159: # (saxtools.START_ELEMENT, fm_ns, u'METADATA'):
160: # self.handle_meta_fields,
161: # (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'):
162: # self.handle_data_fields,
163: # }
1.5 casties 164:
165: # connect database
1.8 casties 166: self.dbCon = psycopg.connect(options.dsn)
1.25 casties 167: logging.debug("DB encoding: %s"%getattr(self.dbCon, 'encoding', 'UNKNOWN'))
1.5 casties 168: self.db = self.dbCon.cursor()
1.8 casties 169: assert self.db, "AIIEE no db cursor for %s!!"%options.dsn
1.1 casties 170:
1.9 dwinter 171: self.table = getattr(options,"table",None)
172: self.update_fields = getattr(options,"update_fields",None)
173: self.id_field = getattr(options,"id_field",None)
174: self.sync_mode = getattr(options,"sync_mode",None)
175: self.lc_names = getattr(options,"lc_names",None)
176: self.keep_fields = getattr(options,"keep_fields",None)
177: self.ascii_db = getattr(options,"ascii_db",None)
178: self.replace_table = getattr(options,"replace_table",None)
179: self.backup_table = getattr(options,"backup_table",None)
1.23 casties 180: self.read_before_update = getattr(options,"read_before_update",None)
1.28 casties 181: self.debug_data = getattr(options,"debug_data",None)
1.8 casties 182:
1.15 casties 183: self.logger.debug("dsn: "+repr(getattr(options,"dsn",None)))
184: self.logger.debug("table: "+repr(self.table))
185: self.logger.debug("update_fields: "+repr(self.update_fields))
186: self.logger.debug("id_field: "+repr(self.id_field))
187: self.logger.debug("sync_mode: "+repr(self.sync_mode))
188: self.logger.debug("lc_names: "+repr(self.lc_names))
189: self.logger.debug("keep_fields: "+repr(self.keep_fields))
190: self.logger.debug("ascii_db: "+repr(self.ascii_db))
191: self.logger.debug("replace_table: "+repr(self.replace_table))
192: self.logger.debug("backup_table: "+repr(self.backup_table))
1.23 casties 193: self.logger.debug("read_before_update: "+repr(self.read_before_update))
1.28 casties 194: self.logger.debug("debug_data: "+repr(self.debug_data))
1.5 casties 195:
196: self.dbIDs = {}
197: self.rowcnt = 0
1.8 casties 198:
1.34 dwinter 199: self.currentName = None
200:
1.8 casties 201: if self.id_field is not None:
1.5 casties 202: # prepare a list of ids for sync mode
1.8 casties 203: qstr="select %s from %s"%(self.id_field,self.table)
1.5 casties 204: for id in SimpleSearch(self.db, qstr):
205: # value 0: not updated
206: self.dbIDs[id[0]] = 0;
207: self.rowcnt += 1
208:
1.15 casties 209: self.logger.info("%d entries in DB to sync"%self.rowcnt)
1.5 casties 210:
1.8 casties 211: # names of fields in XML file
212: self.xml_field_names = []
213: # map XML field names to SQL field names
214: self.xml_field_map = {}
215: # and vice versa
216: self.sql_field_map = {}
1.5 casties 217:
218: return
219:
1.34 dwinter 220: def startElement(self, name, attrs):
221: logging.debug(name)
222: if (name.lower() == "field") :
223: self.handle_meta_field(attrs)
224: if (name.lower() == "row") :
225: logging.debug("handleROW")
226: self.handle_row(attrs)
227: if (name.lower()=="resultset"):
228: self.handle_data_fields(attrs)
229:
230: if (name.lower()=="data"):
231: self.handle_data_tag(attrs);
232:
233: def endElement(self,name):
234: if (name.lower() == "resultset") :
235: self.currentTag=""
236: self.handle_end_data_fields()
237: if (name.lower() == "field") :
238: self.handle_end_meta_field()
239: if (name.lower() == "metadata"):
240: self.handle_end_meta_fields()
241: if (name.lower() == "row") :
242: logging.debug("handleROW")
243: self.handle_end_row()
244:
245: if (name.lower() == "col") :
246: self.handle_end_col()
247: def characters(self,content):
248:
249: try:
250: fn = self.xml_field_names[self.colIdx]
1.35 ! dwinter 251:
! 252: contentTmp = self.xml_data.get(fn,'') #gibt es schon einen Inhalt, dann dieses hinzufuegen (in einem Tag kann u.U. der characters handler mehrfach aufgerufen werden.)
! 253: self.xml_data[fn] = contentTmp+content
1.34 dwinter 254: except:
255: logging.debug(content)
256: pass
257:
258: # if self.currentName is not None:
259: # logging.debug(self.currentName+" "+content)
260: # self.currentRow[self.currentName]=content;
261: #
262: def handle_end_meta_fields(self):
263: # dispatcher = {
264: # (saxtools.START_ELEMENT, fm_ns, u'FIELD'):
265: # self.handle_meta_field,
266: # }
1.5 casties 267: #First round through the generator corresponds to the
268: #start element event
1.34 dwinter 269: # self.logger.info("reading metadata...")
270: # if self.debug_data:
271: # self.logger.debug("START METADATA")
272: # #yield None
1.1 casties 273:
1.5 casties 274: #delegate is a generator that handles all the events "within"
275: #this element
1.34 dwinter 276: # delegate = None
277: # while not self.event == end_condition:
278: # delegate = saxtools.tenorsax.event_loop_body(
279: # dispatcher, delegate, self.event)
280: # yield None
281: #
282: # #Element closed. Wrap up
1.30 casties 283: if self.debug_data:
284: self.logger.debug("END METADATA")
1.8 casties 285:
286: # rename table for backup
287: if self.backup_table:
288: self.orig_table = self.table
1.16 casties 289: self.tmp_table = self.table + "_tmp"
290: backup_name = "%s_%s"%(self.table,time.strftime('%Y_%m_%d_%H_%M_%S'))
291:
1.8 casties 292: # remove old temp table
1.16 casties 293: qstr = "DROP TABLE %s"%(self.tmp_table)
1.8 casties 294: try:
295: self.db.execute(qstr)
296: except:
297: pass
298:
299: self.dbCon.commit()
300:
301: if self.id_field:
1.16 casties 302: # sync mode -- copy backup table, update current table
303: self.logger.info("copy table %s to %s"%(self.table,backup_name))
304: qstr = "CREATE TABLE %s AS (SELECT * FROM %s)"%(backup_name,self.table)
1.8 casties 305:
306: else:
1.16 casties 307: # replace mode -- create empty tmp table, insert into tmp table
308: self.table = self.tmp_table
1.15 casties 309: self.logger.info("create empty table %s"%(self.table))
1.8 casties 310: qstr = "CREATE TABLE %s AS (SELECT * FROM %s WHERE 1=0)"%(self.table,self.orig_table)
311:
312: self.db.execute(qstr)
313: self.dbCon.commit()
314:
315: # delete data from table for replace
316: if self.replace_table:
1.15 casties 317: self.logger.info("delete data from table %s"%(self.table))
1.8 casties 318: qstr = "TRUNCATE TABLE %s"%(self.table)
319: self.db.execute(qstr)
320: self.dbCon.commit()
321:
322: # try to match date style with XML
323: self.db.execute("set datestyle to 'german'")
324:
1.15 casties 325: #self.logger.debug("xml-fieldnames:"+repr(self.xml_field_names))
1.8 casties 326: # get list of fields and types of db table
327: qstr="select attname, format_type(pg_attribute.atttypid, pg_attribute.atttypmod) from pg_attribute, pg_class where attrelid = pg_class.oid and pg_attribute.attnum > 0 and relname = '%s'"
328: self.sql_fields={}
329: for f in SimpleSearch(self.db, qstr%self.table):
1.24 casties 330: fn = f[0]
331: ft = f[1]
1.8 casties 332: #print "SQL fields: %s (%s)"%(n,t)
1.24 casties 333: self.sql_fields[fn] = TableColumn(fn,ft)
1.8 casties 334:
1.17 casties 335: # translate id_field (SQL-name) to XML-name
336: self.xml_id = self.sql_field_map.get(self.id_field, None)
337: # get type of id_field
338: if self.id_field:
339: self.id_type = self.sql_fields[self.id_field].getType()
340: else:
341: self.id_type = None
342:
1.8 casties 343: # check fields to update
1.5 casties 344: if self.update_fields is None:
1.8 casties 345: if self.keep_fields:
1.13 casties 346: # update all existing fields from sql (when they are in the xml file)
347: self.update_fields = {}
348: for f in self.sql_fields.keys():
349: if self.sql_field_map.has_key(f):
350: xf = self.sql_field_map[f]
351: self.update_fields[f] = self.xml_field_map[xf]
352:
1.8 casties 353: else:
354: # update all fields
355: if self.lc_names:
356: # create dict with sql names
357: self.update_fields = {}
358: for f in self.xml_field_map.values():
359: self.update_fields[f.getName()] = f
360:
361: else:
362: self.update_fields = self.xml_field_map
1.17 casties 363:
1.8 casties 364: # and translate to list of xml fields
365: if self.lc_names:
366: self.xml_update_list = [self.sql_field_map[x] for x in self.update_fields]
367: else:
368: self.xml_update_list = self.update_fields.keys()
1.17 casties 369:
1.8 casties 370: if not self.keep_fields:
371: # adjust db table to fields in XML and update_fields
372: for f in self.xml_field_map.values():
1.15 casties 373: self.logger.debug("sync-fieldname: %s"%f.getName())
1.8 casties 374: sf = self.sql_fields.get(f.getName(), None)
375: uf = self.update_fields.get(f.getName(), None)
376: if sf is not None:
377: # name in db -- check type
378: if f.getType() != sf.getType():
1.15 casties 379: self.logger.debug("field %s has different type (%s vs %s)"%(f,f.getType(),sf.getType()))
1.8 casties 380: elif uf is not None:
381: # add field to table
1.24 casties 382: fn = uf.getName()
383: ft = uf.getType()
384: qstr="alter table %s add \"%s\" %s"%(self.table,fn,ft)
1.15 casties 385: self.logger.info("db add field:"+qstr)
1.8 casties 386:
387: if self.ascii_db and type(qstr)==types.UnicodeType:
388: qstr=qstr.encode('utf-8')
389:
390: self.db.execute(qstr)
391: self.dbCon.commit()
1.24 casties 392: # add field to field list
393: self.sql_fields[fn] = TableColumn(fn, ft)
1.7 dwinter 394:
1.17 casties 395: # prepare sql statements for update (do not update id_field)
1.24 casties 396: setStr=string.join(["\"%s\" = %%s"%self.xml_field_map[f] for f in self.xml_update_list if f != self.xml_id], ', ')
397: self.updQuery="UPDATE %s SET %s WHERE \"%s\" = %%s"%(self.table,setStr,self.id_field)
1.23 casties 398: # and select (for update check)
399: selStr=string.join([self.xml_field_map[f].getName() for f in self.xml_update_list if f != self.xml_id], ', ')
1.24 casties 400: self.selQuery="SELECT %s FROM %s WHERE \"%s\" = %%s"%(selStr,self.table,self.id_field)
1.5 casties 401: # and insert
1.24 casties 402: fields=string.join(["\"%s\""%self.xml_field_map[x].getName() for x in self.xml_update_list], ',')
1.8 casties 403: values=string.join(['%s' for f in self.xml_update_list], ',')
1.5 casties 404: self.addQuery="INSERT INTO %s (%s) VALUES (%s)"%(self.table,fields,values)
1.15 casties 405: self.logger.debug("update-query: "+self.updQuery)
1.23 casties 406: self.logger.debug("sel-query: "+self.selQuery)
1.15 casties 407: self.logger.debug("add-query: "+self.addQuery)
1.5 casties 408: return
409:
1.34 dwinter 410: def handle_meta_field(self, attrs):
411: self.currentName = attrs.get('NAME')
412: #yield None
413: return
414: def handle_end_meta_field(self):
1.5 casties 415: #Element closed. Wrap up
1.34 dwinter 416: name = self.currentName
1.8 casties 417: if self.lc_names:
418: # clean name
1.33 casties 419: sqlname = sqlName(name)
1.8 casties 420: else:
421: sqlname = name
422: self.xml_field_names.append(name)
423: # map to sql name and default text type
424: self.xml_field_map[name] = TableColumn(sqlname, 'text')
425: self.sql_field_map[sqlname] = name
1.15 casties 426: self.logger.debug("FIELD name: "+name)
1.5 casties 427: return
428:
1.34 dwinter 429: def handle_data_fields(self, attrs):
430:
1.5 casties 431: #First round through the generator corresponds to the
432: #start element event
1.15 casties 433: self.logger.info("reading data...")
1.30 casties 434: if self.debug_data:
435: self.logger.debug("START RESULTSET")
1.5 casties 436: self.rowcnt = 0
1.34 dwinter 437: return
1.1 casties 438:
1.34 dwinter 439: def handle_end_data_fields(self):
1.5 casties 440: #delegate is a generator that handles all the events "within"
441: #this element
1.34 dwinter 442:
1.5 casties 443: #Element closed. Wrap up
1.30 casties 444: if self.debug_data:
445: self.logger.debug("END RESULTSET")
1.5 casties 446: self.dbCon.commit()
1.1 casties 447:
1.5 casties 448: if self.sync_mode:
449: # delete unmatched entries in db
1.32 casties 450: if self.rowcnt > 0:
451: self.logger.info("deleting unmatched rows from db")
452: delQuery = "DELETE FROM %s WHERE \"%s\" = %%s"%(self.table,self.id_field)
453: for id in self.dbIDs.keys():
454: # find all not-updated fields
455: if self.dbIDs[id] == 0:
456: self.logger.info(" delete: %s"%id)
457: SimpleSearch(self.db, delQuery, [id], ascii=self.ascii_db)
458:
459: elif self.dbIDs[id] > 1:
460: self.logger.info(" sync: ID %s used more than once?"%id)
461:
462: self.dbCon.commit()
463:
464: else:
465: # safety in case we had an empty file
466: self.logger.warning("no rows read! not deleting unmatched rows!")
1.1 casties 467:
1.8 casties 468: # reinstate backup tables
1.16 casties 469: if self.backup_table and not self.id_field:
1.8 casties 470: backup_name = "%s_%s"%(self.orig_table,time.strftime('%Y_%m_%d_%H_%M_%S'))
1.15 casties 471: self.logger.info("rename backup table %s to %s"%(self.orig_table,backup_name))
1.8 casties 472: qstr = "ALTER TABLE %s RENAME TO %s"%(self.orig_table,backup_name)
473: self.db.execute(qstr)
1.15 casties 474: self.logger.info("rename working table %s to %s"%(self.table,self.orig_table))
1.8 casties 475: qstr = "ALTER TABLE %s RENAME TO %s"%(self.table,self.orig_table)
476: self.db.execute(qstr)
1.5 casties 477: self.dbCon.commit()
1.1 casties 478:
1.30 casties 479: self.logger.info("Done (%s rows)"%self.rowcnt)
1.5 casties 480: return
481:
482: def handle_row(self, end_condition):
1.34 dwinter 483:
1.30 casties 484: if self.debug_data:
485: self.logger.debug("START ROW")
1.8 casties 486: self.xml_data = {}
1.5 casties 487: self.colIdx = 0
1.1 casties 488:
1.34 dwinter 489: return
490:
491: def handle_end_row(self):
1.5 casties 492: #delegate is a generator that handles all the events "within"
493: #this element
1.34 dwinter 494:
1.5 casties 495: #Element closed. Wrap up
1.30 casties 496: if self.debug_data:
497: self.logger.debug("END ROW")
1.5 casties 498: self.rowcnt += 1
499: # process collected row data
500: update=False
501: id_val=''
502: # synchronize by id_field
503: if self.id_field:
1.17 casties 504: if self.id_type == 'integer':
1.29 casties 505: try:
506: id_val = int(self.xml_data[self.xml_id])
507: except:
508: pass
1.17 casties 509: else:
510: id_val = self.xml_data[self.xml_id]
1.29 casties 511:
512: if not id_val:
513: # abort update
514: self.logger.error("ERROR: unable to sync! emtpy id in row %s"%self.rowcnt)
515: return
1.17 casties 516:
1.5 casties 517: if id_val in self.dbIDs:
518: self.dbIDs[id_val] += 1
519: update=True
1.8 casties 520:
521: # collect all values
522: args = []
523: for fn in self.xml_update_list:
1.17 casties 524: # do not update id_field
1.19 casties 525: if update and fn == self.xml_id:
1.17 casties 526: continue
527:
1.8 casties 528: f = self.xml_field_map[fn]
1.34 dwinter 529: val = self.xml_data.get(fn,None)
1.8 casties 530: type = self.sql_fields[f.getName()].getType()
1.31 casties 531: if type == "date" and len(val.strip()) == 0:
1.8 casties 532: # empty date field
533: val = None
534:
535: elif type == "integer" and len(val) == 0:
536: # empty int field
537: val = None
538:
539: args.append(val)
540:
1.5 casties 541: if update:
542: # update existing row (by id_field)
1.23 casties 543: if self.read_before_update:
544: # read data
1.28 casties 545: if self.debug_data:
546: self.logger.debug("update check: %s = %s"%(id_val, args))
1.23 casties 547: oldrow = SimpleSearch(self.db, self.selQuery, [id_val], ascii=self.ascii_db)
548: #i = 0
549: #for v in oldrow[0]:
550: # logging.debug("v: %s = %s (%s)"%(v,args[i],v==args[i]))
551: # i += 1
552: if tuple(oldrow[0]) != tuple(args):
553: # data has changed -- update
1.28 casties 554: if self.debug_data:
555: self.logger.debug("really update: %s = %s"%(id_val, args))
1.23 casties 556: args.append(id_val) # last arg is id
557: SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db)
558:
559: else:
560: # always update
1.28 casties 561: if self.debug_data:
562: self.logger.debug("update: %s = %s"%(id_val, args))
1.23 casties 563: args.append(id_val) # last arg is id
564: SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db)
1.8 casties 565:
1.5 casties 566: else:
567: # create new row
1.28 casties 568: if self.debug_data:
569: self.logger.debug("insert: %s"%args)
1.8 casties 570: SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db)
1.5 casties 571:
1.15 casties 572: #self.logger.info(" row:"+"%d (%s)"%(self.rowcnt,id_val))
573: if (self.rowcnt % 100) == 0:
574: self.logger.info(" row:"+"%d (id:%s)"%(self.rowcnt,id_val))
1.5 casties 575: self.dbCon.commit()
576:
577: return
578:
1.34 dwinter 579: def handle_end_col(self):
580:
581:
1.5 casties 582: self.colIdx += 1
583: return
584:
1.34 dwinter 585:
586: def handle_data_tag(self, attrs):
1.5 casties 587: #print "START DATA"
1.34 dwinter 588: self.content = u''
589: # yield None
590: # # gather child elements
591: # while not self.event == end_condition:
592: # if self.event[0] == saxtools.CHARACTER_DATA:
593: # content += self.params
594: # yield None
595: # #Element closed. Wrap up
596: # fn = self.xml_field_names[self.colIdx]
597: # self.xml_data[fn] = content
1.5 casties 598: return
599:
600:
1.11 dwinter 601: def importFMPXML(options):
1.14 casties 602: """import FileMaker XML file (FMPXMLRESULT format) into the table.
1.11 dwinter 603: @param options: dict of options
604: @param options.dsn: database connection string
605: @param options.table: name of the table the xml shall be imported into
606: @param options.filename: xmlfile filename
607: @param options.update_fields: (optional) list of fields to update; default is to create all fields
608: @param options.id_field: (optional) field which uniquely identifies an entry for updating purposes.
609: @param options.sync_mode: (optional) really synchronise, i.e. delete entries not in XML file
610: @param options.lc_names: (optional) lower case and clean up field names from XML
611: @param options.keep_fields: (optional) don't add fields to SQL database
612: @param options.ascii_db: (optional) assume ascii encoding in db
613: @param options.replace_table: (optional) delete and re-insert data
1.16 casties 614: @param options.backup_table: (optional) create backup of old table
1.11 dwinter 615: """
1.15 casties 616:
1.11 dwinter 617: if getattr(options,'update_fields',None):
618: uf = {}
619: for f in options.update_fields.split(','):
1.12 casties 620: if f.find(':') > 0:
621: (n,t) = f.split(':')
622: else:
623: n = f
624: t = None
1.11 dwinter 625: uf[n] = TableColumn(n,t)
626:
627: options.update_fields = uf
628:
629: if getattr(options,'id_field',None) and getattr(options,'replace_table',None):
630: logging.error("ABORT: sorry, you can't do both sync (id_field) and replace")
1.15 casties 631: return
1.11 dwinter 632:
633: parser = sax.make_parser()
634: #The "consumer" is our own handler
635: consumer = xml_handler(options)
636: #Initialize Tenorsax with handler
1.34 dwinter 637: #handler = saxtools.tenorsax(consumer)
1.11 dwinter 638: #Resulting tenorsax instance is the SAX handler
1.34 dwinter 639: parser.setContentHandler(consumer)
640: #parser.setFeature(sax.handler.feature_namespaces, 1)
1.11 dwinter 641: parser.parse(options.filename)
642:
1.1 casties 643:
1.9 dwinter 644: if __name__ == "__main__":
645: from optparse import OptionParser
1.5 casties 646:
1.9 dwinter 647: opars = OptionParser()
648: opars.add_option("-f", "--file",
649: dest="filename",
650: help="FMPXML file name", metavar="FILE")
651: opars.add_option("-c", "--dsn",
652: dest="dsn",
653: help="database connection string")
654: opars.add_option("-t", "--table",
655: dest="table",
656: help="database table name")
657: opars.add_option("--fields", default=None,
658: dest="update_fields",
659: help="list of fields to update (comma separated, sql-names)", metavar="LIST")
660: opars.add_option("--id-field", default=None,
661: dest="id_field",
662: help="name of id field for synchronisation (only appends data otherwise, sql-name)", metavar="NAME")
663: opars.add_option("--sync", "--sync-mode", default=False, action="store_true",
664: dest="sync_mode",
665: help="do full sync based on id field (remove unmatched fields from db)")
666: opars.add_option("--lc-names", default=False, action="store_true",
667: dest="lc_names",
668: help="clean and lower case field names from XML")
669: opars.add_option("--keep-fields", default=False, action="store_true",
670: dest="keep_fields",
671: help="don't add fields from XML to SQL table")
672: opars.add_option("--ascii-db", default=False, action="store_true",
673: dest="ascii_db",
674: help="the SQL database stores ASCII instead of unicode")
675: opars.add_option("--replace", default=False, action="store_true",
676: dest="replace_table",
677: help="replace table i.e. delete and re-insert data")
678: opars.add_option("--backup", default=False, action="store_true",
679: dest="backup_table",
1.16 casties 680: help="create backup of old table")
1.23 casties 681: opars.add_option("--read-before-update", default=False, action="store_true",
682: dest="read_before_update",
683: help="read all data to check if it really changed")
1.9 dwinter 684: opars.add_option("-d", "--debug", default=False, action="store_true",
685: dest="debug",
686: help="debug mode (more output)")
1.28 casties 687: opars.add_option("--debug-data", default=False, action="store_true",
688: dest="debug_data",
689: help="debug mode for data (even more output)")
1.9 dwinter 690:
691: (options, args) = opars.parse_args()
692:
693: if len(sys.argv) < 2 or options.filename is None or options.dsn is None:
694: print "importFMPXML "+version_string
695: opars.print_help()
696: sys.exit(1)
697:
698: if options.debug:
699: loglevel = logging.DEBUG
700: else:
701: loglevel = logging.INFO
702:
703: logging.basicConfig(level=loglevel,
704: format='%(asctime)s %(levelname)s %(message)s',
705: datefmt='%H:%M:%S')
706:
1.11 dwinter 707: importFMPXML(options)
708:
1.9 dwinter 709:
710:
1.1 casties 711:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>