Annotation of ZSQLExtend/importFMPXML.py, revision 1.34
1.1 casties 1: #!/usr/local/bin/python
2: #
3:
4: import string
5: import logging
6: import sys
1.7 dwinter 7: import types
1.8 casties 8: import time
1.33 casties 9: import re
1.1 casties 10:
1.5 casties 11: from xml import sax
1.34 ! dwinter 12: from xml.sax.handler import ContentHandler
! 13: #from amara import saxtools
1.5 casties 14:
1.2 casties 15: try:
16: import psycopg2 as psycopg
1.23 casties 17: import psycopg2.extensions
18: # switch to unicode
19: psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
1.2 casties 20: psyco = 2
21: except:
22: import psycopg
23: psyco = 1
24:
1.5 casties 25: fm_ns = 'http://www.filemaker.com/fmpxmlresult'
1.1 casties 26:
1.33 casties 27: version_string = "V0.6.7 ROC 21.6.2011"
1.20 casties 28:
1.21 casties 29: def unicodify(text, withNone=False):
1.20 casties 30: """decode str (utf-8 or latin-1 representation) into unicode object"""
1.21 casties 31: if withNone and text is None:
1.20 casties 32: return None
1.21 casties 33: if not text:
1.20 casties 34: return u""
1.21 casties 35: if isinstance(text, str):
1.20 casties 36: try:
1.21 casties 37: return text.decode('utf-8')
1.20 casties 38: except:
1.21 casties 39: return text.decode('latin-1')
1.20 casties 40: else:
1.21 casties 41: return text
1.20 casties 42:
1.21 casties 43: def utf8ify(text, withNone=False):
1.20 casties 44: """encode unicode object or string into byte string in utf-8 representation"""
1.21 casties 45: if withNone and text is None:
1.20 casties 46: return None
1.21 casties 47: if not text:
1.20 casties 48: return ""
1.22 casties 49: if isinstance(text, unicode):
50: return text.encode('utf-8')
51: else:
1.21 casties 52: return text
1.8 casties 53:
1.1 casties 54: def getTextFromNode(nodename):
55: """get the cdata content of a node"""
56: if nodename is None:
57: return ""
58: nodelist=nodename.childNodes
59: rc = ""
60: for node in nodelist:
61: if node.nodeType == node.TEXT_NODE:
62: rc = rc + node.data
63: return rc
64:
65: def sql_quote(v):
66: # quote dictionary
67: quote_dict = {"\'": "''", "\\": "\\\\"}
68: for dkey in quote_dict.keys():
69: if string.find(v, dkey) >= 0:
70: v=string.join(string.split(v,dkey),quote_dict[dkey])
1.5 casties 71: return "'%s'"%v
1.1 casties 72:
1.33 casties 73: def sqlName(s, lc=True, more=''):
74: """returns restricted ASCII-only version of string"""
75: if s is None:
76: return ""
77:
78: # remove '
79: s = s.replace("'","")
80: # all else -> "_"
81: s = re.sub('[^A-Za-z0-9_'+more+']','_',s)
82: if lc:
83: return s.lower()
84:
85: return s
86:
1.8 casties 87: def SimpleSearch(curs,query, args=None, ascii=False):
1.1 casties 88: """execute sql query and return data"""
1.15 casties 89: #logger.debug("executing: "+query)
1.8 casties 90: if ascii:
91: # encode all in UTF-8
1.20 casties 92: query = utf8ify(query)
1.8 casties 93: if args is not None:
94: encargs = []
95: for a in args:
1.20 casties 96: encargs.append(utf8ify(a, withNone=True))
1.8 casties 97:
98: args = encargs
1.7 dwinter 99:
1.1 casties 100: curs.execute(query, args)
1.15 casties 101: #logger.debug("sql done")
1.4 casties 102: try:
103: return curs.fetchall()
104: except:
105: return None
1.1 casties 106:
107:
1.8 casties 108: class TableColumn:
109: """simple type for storing sql column name and type"""
110:
111: def __init__(self, name, type=None):
112: #print "new tablecolumn(%s,%s)"%(name, type)
113: self.name = name
114: self.type = type
115:
116: def getName(self):
117: return self.name
118:
119: def getType(self):
120: if self.type is not None:
121: return self.type
122: else:
123: return "text"
1.5 casties 124:
1.8 casties 125: def __str__(self):
126: return self.name
127:
128:
1.34 ! dwinter 129: class xml_handler(ContentHandler):
1.8 casties 130: def __init__(self,options):
1.9 dwinter 131: """SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table.
1.8 casties 132: @param options: dict of options
133: @param options.dsn: database connection string
134: @param options.table: name of the table the xml shall be imported into
135: @param options.filename: xmlfile filename
136: @param options.update_fields: (optional) list of fields to update; default is to create all fields
137: @param options.id_field: (optional) field which uniquely identifies an entry for updating purposes.
138: @param options.sync_mode: (optional) really synchronise, i.e. delete entries not in XML file
139: @param options.lc_names: (optional) lower case and clean up field names from XML
140: @param options.keep_fields: (optional) don't add fields to SQL database
141: @param options.ascii_db: (optional) assume ascii encoding in db
142: @param options.replace_table: (optional) delete and re-insert data
1.14 casties 143: @param options.backup_table: (optional) create backup of old table (breaks indices)
1.15 casties 144: @param options.use_logger_instance: (optional) use this instance of a logger
1.9 dwinter 145: """
146:
1.15 casties 147: # set up logger
148: if hasattr(options, 'use_logger_instance'):
149: self.logger = options.use_logger_instance
150: else:
151: self.logger = logging.getLogger('db.import.fmpxml')
152:
153:
1.5 casties 154: # set up parser
1.34 ! dwinter 155: self.result={}
1.5 casties 156: self.event = None
1.34 ! dwinter 157:
! 158: # self.top_dispatcher = {
! 159: # (saxtools.START_ELEMENT, fm_ns, u'METADATA'):
! 160: # self.handle_meta_fields,
! 161: # (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'):
! 162: # self.handle_data_fields,
! 163: # }
1.5 casties 164:
165: # connect database
1.8 casties 166: self.dbCon = psycopg.connect(options.dsn)
1.25 casties 167: logging.debug("DB encoding: %s"%getattr(self.dbCon, 'encoding', 'UNKNOWN'))
1.5 casties 168: self.db = self.dbCon.cursor()
1.8 casties 169: assert self.db, "AIIEE no db cursor for %s!!"%options.dsn
1.1 casties 170:
1.9 dwinter 171: self.table = getattr(options,"table",None)
172: self.update_fields = getattr(options,"update_fields",None)
173: self.id_field = getattr(options,"id_field",None)
174: self.sync_mode = getattr(options,"sync_mode",None)
175: self.lc_names = getattr(options,"lc_names",None)
176: self.keep_fields = getattr(options,"keep_fields",None)
177: self.ascii_db = getattr(options,"ascii_db",None)
178: self.replace_table = getattr(options,"replace_table",None)
179: self.backup_table = getattr(options,"backup_table",None)
1.23 casties 180: self.read_before_update = getattr(options,"read_before_update",None)
1.28 casties 181: self.debug_data = getattr(options,"debug_data",None)
1.8 casties 182:
1.15 casties 183: self.logger.debug("dsn: "+repr(getattr(options,"dsn",None)))
184: self.logger.debug("table: "+repr(self.table))
185: self.logger.debug("update_fields: "+repr(self.update_fields))
186: self.logger.debug("id_field: "+repr(self.id_field))
187: self.logger.debug("sync_mode: "+repr(self.sync_mode))
188: self.logger.debug("lc_names: "+repr(self.lc_names))
189: self.logger.debug("keep_fields: "+repr(self.keep_fields))
190: self.logger.debug("ascii_db: "+repr(self.ascii_db))
191: self.logger.debug("replace_table: "+repr(self.replace_table))
192: self.logger.debug("backup_table: "+repr(self.backup_table))
1.23 casties 193: self.logger.debug("read_before_update: "+repr(self.read_before_update))
1.28 casties 194: self.logger.debug("debug_data: "+repr(self.debug_data))
1.5 casties 195:
196: self.dbIDs = {}
197: self.rowcnt = 0
1.8 casties 198:
1.34 ! dwinter 199: self.currentName = None
! 200:
1.8 casties 201: if self.id_field is not None:
1.5 casties 202: # prepare a list of ids for sync mode
1.8 casties 203: qstr="select %s from %s"%(self.id_field,self.table)
1.5 casties 204: for id in SimpleSearch(self.db, qstr):
205: # value 0: not updated
206: self.dbIDs[id[0]] = 0;
207: self.rowcnt += 1
208:
1.15 casties 209: self.logger.info("%d entries in DB to sync"%self.rowcnt)
1.5 casties 210:
1.8 casties 211: # names of fields in XML file
212: self.xml_field_names = []
213: # map XML field names to SQL field names
214: self.xml_field_map = {}
215: # and vice versa
216: self.sql_field_map = {}
1.5 casties 217:
218: return
219:
1.34 ! dwinter 220: def startElement(self, name, attrs):
! 221: logging.debug(name)
! 222: if (name.lower() == "field") :
! 223: self.handle_meta_field(attrs)
! 224: if (name.lower() == "row") :
! 225: logging.debug("handleROW")
! 226: self.handle_row(attrs)
! 227: if (name.lower()=="resultset"):
! 228: self.handle_data_fields(attrs)
! 229:
! 230: if (name.lower()=="data"):
! 231: self.handle_data_tag(attrs);
! 232:
! 233: def endElement(self,name):
! 234: if (name.lower() == "resultset") :
! 235: self.currentTag=""
! 236: self.handle_end_data_fields()
! 237: if (name.lower() == "field") :
! 238: self.handle_end_meta_field()
! 239: if (name.lower() == "metadata"):
! 240: self.handle_end_meta_fields()
! 241: if (name.lower() == "row") :
! 242: logging.debug("handleROW")
! 243: self.handle_end_row()
! 244:
! 245: if (name.lower() == "col") :
! 246: self.handle_end_col()
! 247: def characters(self,content):
! 248:
! 249: try:
! 250: fn = self.xml_field_names[self.colIdx]
! 251: self.xml_data[fn] = content
! 252: except:
! 253: logging.debug(content)
! 254: pass
! 255:
! 256: # if self.currentName is not None:
! 257: # logging.debug(self.currentName+" "+content)
! 258: # self.currentRow[self.currentName]=content;
! 259: #
! 260: def handle_end_meta_fields(self):
! 261: # dispatcher = {
! 262: # (saxtools.START_ELEMENT, fm_ns, u'FIELD'):
! 263: # self.handle_meta_field,
! 264: # }
1.5 casties 265: #First round through the generator corresponds to the
266: #start element event
1.34 ! dwinter 267: # self.logger.info("reading metadata...")
! 268: # if self.debug_data:
! 269: # self.logger.debug("START METADATA")
! 270: # #yield None
1.1 casties 271:
1.5 casties 272: #delegate is a generator that handles all the events "within"
273: #this element
1.34 ! dwinter 274: # delegate = None
! 275: # while not self.event == end_condition:
! 276: # delegate = saxtools.tenorsax.event_loop_body(
! 277: # dispatcher, delegate, self.event)
! 278: # yield None
! 279: #
! 280: # #Element closed. Wrap up
1.30 casties 281: if self.debug_data:
282: self.logger.debug("END METADATA")
1.8 casties 283:
284: # rename table for backup
285: if self.backup_table:
286: self.orig_table = self.table
1.16 casties 287: self.tmp_table = self.table + "_tmp"
288: backup_name = "%s_%s"%(self.table,time.strftime('%Y_%m_%d_%H_%M_%S'))
289:
1.8 casties 290: # remove old temp table
1.16 casties 291: qstr = "DROP TABLE %s"%(self.tmp_table)
1.8 casties 292: try:
293: self.db.execute(qstr)
294: except:
295: pass
296:
297: self.dbCon.commit()
298:
299: if self.id_field:
1.16 casties 300: # sync mode -- copy backup table, update current table
301: self.logger.info("copy table %s to %s"%(self.table,backup_name))
302: qstr = "CREATE TABLE %s AS (SELECT * FROM %s)"%(backup_name,self.table)
1.8 casties 303:
304: else:
1.16 casties 305: # replace mode -- create empty tmp table, insert into tmp table
306: self.table = self.tmp_table
1.15 casties 307: self.logger.info("create empty table %s"%(self.table))
1.8 casties 308: qstr = "CREATE TABLE %s AS (SELECT * FROM %s WHERE 1=0)"%(self.table,self.orig_table)
309:
310: self.db.execute(qstr)
311: self.dbCon.commit()
312:
313: # delete data from table for replace
314: if self.replace_table:
1.15 casties 315: self.logger.info("delete data from table %s"%(self.table))
1.8 casties 316: qstr = "TRUNCATE TABLE %s"%(self.table)
317: self.db.execute(qstr)
318: self.dbCon.commit()
319:
320: # try to match date style with XML
321: self.db.execute("set datestyle to 'german'")
322:
1.15 casties 323: #self.logger.debug("xml-fieldnames:"+repr(self.xml_field_names))
1.8 casties 324: # get list of fields and types of db table
325: qstr="select attname, format_type(pg_attribute.atttypid, pg_attribute.atttypmod) from pg_attribute, pg_class where attrelid = pg_class.oid and pg_attribute.attnum > 0 and relname = '%s'"
326: self.sql_fields={}
327: for f in SimpleSearch(self.db, qstr%self.table):
1.24 casties 328: fn = f[0]
329: ft = f[1]
1.8 casties 330: #print "SQL fields: %s (%s)"%(n,t)
1.24 casties 331: self.sql_fields[fn] = TableColumn(fn,ft)
1.8 casties 332:
1.17 casties 333: # translate id_field (SQL-name) to XML-name
334: self.xml_id = self.sql_field_map.get(self.id_field, None)
335: # get type of id_field
336: if self.id_field:
337: self.id_type = self.sql_fields[self.id_field].getType()
338: else:
339: self.id_type = None
340:
1.8 casties 341: # check fields to update
1.5 casties 342: if self.update_fields is None:
1.8 casties 343: if self.keep_fields:
1.13 casties 344: # update all existing fields from sql (when they are in the xml file)
345: self.update_fields = {}
346: for f in self.sql_fields.keys():
347: if self.sql_field_map.has_key(f):
348: xf = self.sql_field_map[f]
349: self.update_fields[f] = self.xml_field_map[xf]
350:
1.8 casties 351: else:
352: # update all fields
353: if self.lc_names:
354: # create dict with sql names
355: self.update_fields = {}
356: for f in self.xml_field_map.values():
357: self.update_fields[f.getName()] = f
358:
359: else:
360: self.update_fields = self.xml_field_map
1.17 casties 361:
1.8 casties 362: # and translate to list of xml fields
363: if self.lc_names:
364: self.xml_update_list = [self.sql_field_map[x] for x in self.update_fields]
365: else:
366: self.xml_update_list = self.update_fields.keys()
1.17 casties 367:
1.8 casties 368: if not self.keep_fields:
369: # adjust db table to fields in XML and update_fields
370: for f in self.xml_field_map.values():
1.15 casties 371: self.logger.debug("sync-fieldname: %s"%f.getName())
1.8 casties 372: sf = self.sql_fields.get(f.getName(), None)
373: uf = self.update_fields.get(f.getName(), None)
374: if sf is not None:
375: # name in db -- check type
376: if f.getType() != sf.getType():
1.15 casties 377: self.logger.debug("field %s has different type (%s vs %s)"%(f,f.getType(),sf.getType()))
1.8 casties 378: elif uf is not None:
379: # add field to table
1.24 casties 380: fn = uf.getName()
381: ft = uf.getType()
382: qstr="alter table %s add \"%s\" %s"%(self.table,fn,ft)
1.15 casties 383: self.logger.info("db add field:"+qstr)
1.8 casties 384:
385: if self.ascii_db and type(qstr)==types.UnicodeType:
386: qstr=qstr.encode('utf-8')
387:
388: self.db.execute(qstr)
389: self.dbCon.commit()
1.24 casties 390: # add field to field list
391: self.sql_fields[fn] = TableColumn(fn, ft)
1.7 dwinter 392:
1.17 casties 393: # prepare sql statements for update (do not update id_field)
1.24 casties 394: setStr=string.join(["\"%s\" = %%s"%self.xml_field_map[f] for f in self.xml_update_list if f != self.xml_id], ', ')
395: self.updQuery="UPDATE %s SET %s WHERE \"%s\" = %%s"%(self.table,setStr,self.id_field)
1.23 casties 396: # and select (for update check)
397: selStr=string.join([self.xml_field_map[f].getName() for f in self.xml_update_list if f != self.xml_id], ', ')
1.24 casties 398: self.selQuery="SELECT %s FROM %s WHERE \"%s\" = %%s"%(selStr,self.table,self.id_field)
1.5 casties 399: # and insert
1.24 casties 400: fields=string.join(["\"%s\""%self.xml_field_map[x].getName() for x in self.xml_update_list], ',')
1.8 casties 401: values=string.join(['%s' for f in self.xml_update_list], ',')
1.5 casties 402: self.addQuery="INSERT INTO %s (%s) VALUES (%s)"%(self.table,fields,values)
1.15 casties 403: self.logger.debug("update-query: "+self.updQuery)
1.23 casties 404: self.logger.debug("sel-query: "+self.selQuery)
1.15 casties 405: self.logger.debug("add-query: "+self.addQuery)
1.5 casties 406: return
407:
1.34 ! dwinter 408: def handle_meta_field(self, attrs):
! 409: self.currentName = attrs.get('NAME')
! 410: #yield None
! 411: return
! 412: def handle_end_meta_field(self):
1.5 casties 413: #Element closed. Wrap up
1.34 ! dwinter 414: name = self.currentName
1.8 casties 415: if self.lc_names:
416: # clean name
1.33 casties 417: sqlname = sqlName(name)
1.8 casties 418: else:
419: sqlname = name
420: self.xml_field_names.append(name)
421: # map to sql name and default text type
422: self.xml_field_map[name] = TableColumn(sqlname, 'text')
423: self.sql_field_map[sqlname] = name
1.15 casties 424: self.logger.debug("FIELD name: "+name)
1.5 casties 425: return
426:
1.34 ! dwinter 427: def handle_data_fields(self, attrs):
! 428:
1.5 casties 429: #First round through the generator corresponds to the
430: #start element event
1.15 casties 431: self.logger.info("reading data...")
1.30 casties 432: if self.debug_data:
433: self.logger.debug("START RESULTSET")
1.5 casties 434: self.rowcnt = 0
1.34 ! dwinter 435: return
1.1 casties 436:
1.34 ! dwinter 437: def handle_end_data_fields(self):
1.5 casties 438: #delegate is a generator that handles all the events "within"
439: #this element
1.34 ! dwinter 440:
1.5 casties 441: #Element closed. Wrap up
1.30 casties 442: if self.debug_data:
443: self.logger.debug("END RESULTSET")
1.5 casties 444: self.dbCon.commit()
1.1 casties 445:
1.5 casties 446: if self.sync_mode:
447: # delete unmatched entries in db
1.32 casties 448: if self.rowcnt > 0:
449: self.logger.info("deleting unmatched rows from db")
450: delQuery = "DELETE FROM %s WHERE \"%s\" = %%s"%(self.table,self.id_field)
451: for id in self.dbIDs.keys():
452: # find all not-updated fields
453: if self.dbIDs[id] == 0:
454: self.logger.info(" delete: %s"%id)
455: SimpleSearch(self.db, delQuery, [id], ascii=self.ascii_db)
456:
457: elif self.dbIDs[id] > 1:
458: self.logger.info(" sync: ID %s used more than once?"%id)
459:
460: self.dbCon.commit()
461:
462: else:
463: # safety in case we had an empty file
464: self.logger.warning("no rows read! not deleting unmatched rows!")
1.1 casties 465:
1.8 casties 466: # reinstate backup tables
1.16 casties 467: if self.backup_table and not self.id_field:
1.8 casties 468: backup_name = "%s_%s"%(self.orig_table,time.strftime('%Y_%m_%d_%H_%M_%S'))
1.15 casties 469: self.logger.info("rename backup table %s to %s"%(self.orig_table,backup_name))
1.8 casties 470: qstr = "ALTER TABLE %s RENAME TO %s"%(self.orig_table,backup_name)
471: self.db.execute(qstr)
1.15 casties 472: self.logger.info("rename working table %s to %s"%(self.table,self.orig_table))
1.8 casties 473: qstr = "ALTER TABLE %s RENAME TO %s"%(self.table,self.orig_table)
474: self.db.execute(qstr)
1.5 casties 475: self.dbCon.commit()
1.1 casties 476:
1.30 casties 477: self.logger.info("Done (%s rows)"%self.rowcnt)
1.5 casties 478: return
479:
480: def handle_row(self, end_condition):
1.34 ! dwinter 481:
1.30 casties 482: if self.debug_data:
483: self.logger.debug("START ROW")
1.8 casties 484: self.xml_data = {}
1.5 casties 485: self.colIdx = 0
1.1 casties 486:
1.34 ! dwinter 487: return
! 488:
! 489: def handle_end_row(self):
1.5 casties 490: #delegate is a generator that handles all the events "within"
491: #this element
1.34 ! dwinter 492:
1.5 casties 493: #Element closed. Wrap up
1.30 casties 494: if self.debug_data:
495: self.logger.debug("END ROW")
1.5 casties 496: self.rowcnt += 1
497: # process collected row data
498: update=False
499: id_val=''
500: # synchronize by id_field
501: if self.id_field:
1.17 casties 502: if self.id_type == 'integer':
1.29 casties 503: try:
504: id_val = int(self.xml_data[self.xml_id])
505: except:
506: pass
1.17 casties 507: else:
508: id_val = self.xml_data[self.xml_id]
1.29 casties 509:
510: if not id_val:
511: # abort update
512: self.logger.error("ERROR: unable to sync! emtpy id in row %s"%self.rowcnt)
513: return
1.17 casties 514:
1.5 casties 515: if id_val in self.dbIDs:
516: self.dbIDs[id_val] += 1
517: update=True
1.8 casties 518:
519: # collect all values
520: args = []
521: for fn in self.xml_update_list:
1.17 casties 522: # do not update id_field
1.19 casties 523: if update and fn == self.xml_id:
1.17 casties 524: continue
525:
1.8 casties 526: f = self.xml_field_map[fn]
1.34 ! dwinter 527: val = self.xml_data.get(fn,None)
1.8 casties 528: type = self.sql_fields[f.getName()].getType()
1.31 casties 529: if type == "date" and len(val.strip()) == 0:
1.8 casties 530: # empty date field
531: val = None
532:
533: elif type == "integer" and len(val) == 0:
534: # empty int field
535: val = None
536:
537: args.append(val)
538:
1.5 casties 539: if update:
540: # update existing row (by id_field)
1.23 casties 541: if self.read_before_update:
542: # read data
1.28 casties 543: if self.debug_data:
544: self.logger.debug("update check: %s = %s"%(id_val, args))
1.23 casties 545: oldrow = SimpleSearch(self.db, self.selQuery, [id_val], ascii=self.ascii_db)
546: #i = 0
547: #for v in oldrow[0]:
548: # logging.debug("v: %s = %s (%s)"%(v,args[i],v==args[i]))
549: # i += 1
550: if tuple(oldrow[0]) != tuple(args):
551: # data has changed -- update
1.28 casties 552: if self.debug_data:
553: self.logger.debug("really update: %s = %s"%(id_val, args))
1.23 casties 554: args.append(id_val) # last arg is id
555: SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db)
556:
557: else:
558: # always update
1.28 casties 559: if self.debug_data:
560: self.logger.debug("update: %s = %s"%(id_val, args))
1.23 casties 561: args.append(id_val) # last arg is id
562: SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db)
1.8 casties 563:
1.5 casties 564: else:
565: # create new row
1.28 casties 566: if self.debug_data:
567: self.logger.debug("insert: %s"%args)
1.8 casties 568: SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db)
1.5 casties 569:
1.15 casties 570: #self.logger.info(" row:"+"%d (%s)"%(self.rowcnt,id_val))
571: if (self.rowcnt % 100) == 0:
572: self.logger.info(" row:"+"%d (id:%s)"%(self.rowcnt,id_val))
1.5 casties 573: self.dbCon.commit()
574:
575: return
576:
1.34 ! dwinter 577: def handle_end_col(self):
! 578:
! 579:
1.5 casties 580: self.colIdx += 1
581: return
582:
1.34 ! dwinter 583:
! 584: def handle_data_tag(self, attrs):
1.5 casties 585: #print "START DATA"
1.34 ! dwinter 586: self.content = u''
! 587: # yield None
! 588: # # gather child elements
! 589: # while not self.event == end_condition:
! 590: # if self.event[0] == saxtools.CHARACTER_DATA:
! 591: # content += self.params
! 592: # yield None
! 593: # #Element closed. Wrap up
! 594: # fn = self.xml_field_names[self.colIdx]
! 595: # self.xml_data[fn] = content
1.5 casties 596: return
597:
598:
1.11 dwinter 599: def importFMPXML(options):
1.14 casties 600: """import FileMaker XML file (FMPXMLRESULT format) into the table.
1.11 dwinter 601: @param options: dict of options
602: @param options.dsn: database connection string
603: @param options.table: name of the table the xml shall be imported into
604: @param options.filename: xmlfile filename
605: @param options.update_fields: (optional) list of fields to update; default is to create all fields
606: @param options.id_field: (optional) field which uniquely identifies an entry for updating purposes.
607: @param options.sync_mode: (optional) really synchronise, i.e. delete entries not in XML file
608: @param options.lc_names: (optional) lower case and clean up field names from XML
609: @param options.keep_fields: (optional) don't add fields to SQL database
610: @param options.ascii_db: (optional) assume ascii encoding in db
611: @param options.replace_table: (optional) delete and re-insert data
1.16 casties 612: @param options.backup_table: (optional) create backup of old table
1.11 dwinter 613: """
1.15 casties 614:
1.11 dwinter 615: if getattr(options,'update_fields',None):
616: uf = {}
617: for f in options.update_fields.split(','):
1.12 casties 618: if f.find(':') > 0:
619: (n,t) = f.split(':')
620: else:
621: n = f
622: t = None
1.11 dwinter 623: uf[n] = TableColumn(n,t)
624:
625: options.update_fields = uf
626:
627: if getattr(options,'id_field',None) and getattr(options,'replace_table',None):
628: logging.error("ABORT: sorry, you can't do both sync (id_field) and replace")
1.15 casties 629: return
1.11 dwinter 630:
631: parser = sax.make_parser()
632: #The "consumer" is our own handler
633: consumer = xml_handler(options)
634: #Initialize Tenorsax with handler
1.34 ! dwinter 635: #handler = saxtools.tenorsax(consumer)
1.11 dwinter 636: #Resulting tenorsax instance is the SAX handler
1.34 ! dwinter 637: parser.setContentHandler(consumer)
! 638: #parser.setFeature(sax.handler.feature_namespaces, 1)
1.11 dwinter 639: parser.parse(options.filename)
640:
1.1 casties 641:
1.9 dwinter 642: if __name__ == "__main__":
643: from optparse import OptionParser
1.5 casties 644:
1.9 dwinter 645: opars = OptionParser()
646: opars.add_option("-f", "--file",
647: dest="filename",
648: help="FMPXML file name", metavar="FILE")
649: opars.add_option("-c", "--dsn",
650: dest="dsn",
651: help="database connection string")
652: opars.add_option("-t", "--table",
653: dest="table",
654: help="database table name")
655: opars.add_option("--fields", default=None,
656: dest="update_fields",
657: help="list of fields to update (comma separated, sql-names)", metavar="LIST")
658: opars.add_option("--id-field", default=None,
659: dest="id_field",
660: help="name of id field for synchronisation (only appends data otherwise, sql-name)", metavar="NAME")
661: opars.add_option("--sync", "--sync-mode", default=False, action="store_true",
662: dest="sync_mode",
663: help="do full sync based on id field (remove unmatched fields from db)")
664: opars.add_option("--lc-names", default=False, action="store_true",
665: dest="lc_names",
666: help="clean and lower case field names from XML")
667: opars.add_option("--keep-fields", default=False, action="store_true",
668: dest="keep_fields",
669: help="don't add fields from XML to SQL table")
670: opars.add_option("--ascii-db", default=False, action="store_true",
671: dest="ascii_db",
672: help="the SQL database stores ASCII instead of unicode")
673: opars.add_option("--replace", default=False, action="store_true",
674: dest="replace_table",
675: help="replace table i.e. delete and re-insert data")
676: opars.add_option("--backup", default=False, action="store_true",
677: dest="backup_table",
1.16 casties 678: help="create backup of old table")
1.23 casties 679: opars.add_option("--read-before-update", default=False, action="store_true",
680: dest="read_before_update",
681: help="read all data to check if it really changed")
1.9 dwinter 682: opars.add_option("-d", "--debug", default=False, action="store_true",
683: dest="debug",
684: help="debug mode (more output)")
1.28 casties 685: opars.add_option("--debug-data", default=False, action="store_true",
686: dest="debug_data",
687: help="debug mode for data (even more output)")
1.9 dwinter 688:
689: (options, args) = opars.parse_args()
690:
691: if len(sys.argv) < 2 or options.filename is None or options.dsn is None:
692: print "importFMPXML "+version_string
693: opars.print_help()
694: sys.exit(1)
695:
696: if options.debug:
697: loglevel = logging.DEBUG
698: else:
699: loglevel = logging.INFO
700:
701: logging.basicConfig(level=loglevel,
702: format='%(asctime)s %(levelname)s %(message)s',
703: datefmt='%H:%M:%S')
704:
1.11 dwinter 705: importFMPXML(options)
706:
1.9 dwinter 707:
708:
1.1 casties 709:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>