version 1.15, 2007/08/09 15:09:27
|
version 1.33, 2011/06/23 09:14:53
|
Line 6 import logging
|
Line 6 import logging
|
import sys |
import sys |
import types |
import types |
import time |
import time |
|
import re |
|
|
from xml import sax |
from xml import sax |
from amara import saxtools |
from amara import saxtools |
|
|
try: |
try: |
import psycopg2 as psycopg |
import psycopg2 as psycopg |
|
import psycopg2.extensions |
|
# switch to unicode |
|
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) |
psyco = 2 |
psyco = 2 |
except: |
except: |
import psycopg |
import psycopg |
Line 19 except:
|
Line 23 except:
|
|
|
fm_ns = 'http://www.filemaker.com/fmpxmlresult' |
fm_ns = 'http://www.filemaker.com/fmpxmlresult' |
|
|
version_string = "V0.4.1 ROC 9.8.2007" |
version_string = "V0.6.7 ROC 21.6.2011" |
|
|
|
def unicodify(text, withNone=False): |
|
"""decode str (utf-8 or latin-1 representation) into unicode object""" |
|
if withNone and text is None: |
|
return None |
|
if not text: |
|
return u"" |
|
if isinstance(text, str): |
|
try: |
|
return text.decode('utf-8') |
|
except: |
|
return text.decode('latin-1') |
|
else: |
|
return text |
|
|
|
def utf8ify(text, withNone=False): |
|
"""encode unicode object or string into byte string in utf-8 representation""" |
|
if withNone and text is None: |
|
return None |
|
if not text: |
|
return "" |
|
if isinstance(text, unicode): |
|
return text.encode('utf-8') |
|
else: |
|
return text |
|
|
def getTextFromNode(nodename): |
def getTextFromNode(nodename): |
"""get the cdata content of a node""" |
"""get the cdata content of a node""" |
Line 40 def sql_quote(v):
|
Line 69 def sql_quote(v):
|
v=string.join(string.split(v,dkey),quote_dict[dkey]) |
v=string.join(string.split(v,dkey),quote_dict[dkey]) |
return "'%s'"%v |
return "'%s'"%v |
|
|
|
def sqlName(s, lc=True, more=''): |
|
"""returns restricted ASCII-only version of string""" |
|
if s is None: |
|
return "" |
|
|
|
# remove ' |
|
s = s.replace("'","") |
|
# all else -> "_" |
|
s = re.sub('[^A-Za-z0-9_'+more+']','_',s) |
|
if lc: |
|
return s.lower() |
|
|
|
return s |
|
|
def SimpleSearch(curs,query, args=None, ascii=False): |
def SimpleSearch(curs,query, args=None, ascii=False): |
"""execute sql query and return data""" |
"""execute sql query and return data""" |
#logger.debug("executing: "+query) |
#logger.debug("executing: "+query) |
if ascii: |
if ascii: |
# encode all in UTF-8 |
# encode all in UTF-8 |
query = query.encode("UTF-8") |
query = utf8ify(query) |
if args is not None: |
if args is not None: |
encargs = [] |
encargs = [] |
for a in args: |
for a in args: |
if a is not None: |
encargs.append(utf8ify(a, withNone=True)) |
a = a.encode("UTF-8") |
|
encargs.append(a) |
|
|
|
args = encargs |
args = encargs |
|
|
Line 120 class xml_handler:
|
Line 161 class xml_handler:
|
|
|
# connect database |
# connect database |
self.dbCon = psycopg.connect(options.dsn) |
self.dbCon = psycopg.connect(options.dsn) |
|
logging.debug("DB encoding: %s"%getattr(self.dbCon, 'encoding', 'UNKNOWN')) |
self.db = self.dbCon.cursor() |
self.db = self.dbCon.cursor() |
assert self.db, "AIIEE no db cursor for %s!!"%options.dsn |
assert self.db, "AIIEE no db cursor for %s!!"%options.dsn |
|
|
Line 132 class xml_handler:
|
Line 174 class xml_handler:
|
self.ascii_db = getattr(options,"ascii_db",None) |
self.ascii_db = getattr(options,"ascii_db",None) |
self.replace_table = getattr(options,"replace_table",None) |
self.replace_table = getattr(options,"replace_table",None) |
self.backup_table = getattr(options,"backup_table",None) |
self.backup_table = getattr(options,"backup_table",None) |
|
self.read_before_update = getattr(options,"read_before_update",None) |
|
self.debug_data = getattr(options,"debug_data",None) |
|
|
self.logger.debug("dsn: "+repr(getattr(options,"dsn",None))) |
self.logger.debug("dsn: "+repr(getattr(options,"dsn",None))) |
self.logger.debug("table: "+repr(self.table)) |
self.logger.debug("table: "+repr(self.table)) |
Line 143 class xml_handler:
|
Line 187 class xml_handler:
|
self.logger.debug("ascii_db: "+repr(self.ascii_db)) |
self.logger.debug("ascii_db: "+repr(self.ascii_db)) |
self.logger.debug("replace_table: "+repr(self.replace_table)) |
self.logger.debug("replace_table: "+repr(self.replace_table)) |
self.logger.debug("backup_table: "+repr(self.backup_table)) |
self.logger.debug("backup_table: "+repr(self.backup_table)) |
|
self.logger.debug("read_before_update: "+repr(self.read_before_update)) |
|
self.logger.debug("debug_data: "+repr(self.debug_data)) |
|
|
self.dbIDs = {} |
self.dbIDs = {} |
self.rowcnt = 0 |
self.rowcnt = 0 |
Line 174 class xml_handler:
|
Line 220 class xml_handler:
|
#First round through the generator corresponds to the |
#First round through the generator corresponds to the |
#start element event |
#start element event |
self.logger.info("reading metadata...") |
self.logger.info("reading metadata...") |
|
if self.debug_data: |
self.logger.debug("START METADATA") |
self.logger.debug("START METADATA") |
yield None |
yield None |
|
|
Line 186 class xml_handler:
|
Line 233 class xml_handler:
|
yield None |
yield None |
|
|
#Element closed. Wrap up |
#Element closed. Wrap up |
|
if self.debug_data: |
self.logger.debug("END METADATA") |
self.logger.debug("END METADATA") |
|
|
# rename table for backup |
# rename table for backup |
if self.backup_table: |
if self.backup_table: |
self.orig_table = self.table |
self.orig_table = self.table |
self.table = self.table + "_tmp" |
self.tmp_table = self.table + "_tmp" |
|
backup_name = "%s_%s"%(self.table,time.strftime('%Y_%m_%d_%H_%M_%S')) |
|
|
# remove old temp table |
# remove old temp table |
qstr = "DROP TABLE %s"%(self.table) |
qstr = "DROP TABLE %s"%(self.tmp_table) |
try: |
try: |
self.db.execute(qstr) |
self.db.execute(qstr) |
except: |
except: |
Line 202 class xml_handler:
|
Line 252 class xml_handler:
|
self.dbCon.commit() |
self.dbCon.commit() |
|
|
if self.id_field: |
if self.id_field: |
# sync mode -- copy table |
# sync mode -- copy backup table, update current table |
self.logger.info("copy table %s to %s"%(self.orig_table,self.table)) |
self.logger.info("copy table %s to %s"%(self.table,backup_name)) |
qstr = "CREATE TABLE %s AS (SELECT * FROM %s)"%(self.table,self.orig_table) |
qstr = "CREATE TABLE %s AS (SELECT * FROM %s)"%(backup_name,self.table) |
|
|
else: |
else: |
# rename table and create empty new one |
# replace mode -- create empty tmp table, insert into tmp table |
|
self.table = self.tmp_table |
self.logger.info("create empty table %s"%(self.table)) |
self.logger.info("create empty table %s"%(self.table)) |
qstr = "CREATE TABLE %s AS (SELECT * FROM %s WHERE 1=0)"%(self.table,self.orig_table) |
qstr = "CREATE TABLE %s AS (SELECT * FROM %s WHERE 1=0)"%(self.table,self.orig_table) |
|
|
Line 224 class xml_handler:
|
Line 275 class xml_handler:
|
# try to match date style with XML |
# try to match date style with XML |
self.db.execute("set datestyle to 'german'") |
self.db.execute("set datestyle to 'german'") |
|
|
# translate id_field (SQL-name) to XML-name |
|
self.xml_id = self.sql_field_map.get(self.id_field, None) |
|
|
|
#self.logger.debug("xml-fieldnames:"+repr(self.xml_field_names)) |
#self.logger.debug("xml-fieldnames:"+repr(self.xml_field_names)) |
# get list of fields and types of db table |
# get list of fields and types of db table |
qstr="select attname, format_type(pg_attribute.atttypid, pg_attribute.atttypmod) from pg_attribute, pg_class where attrelid = pg_class.oid and pg_attribute.attnum > 0 and relname = '%s'" |
qstr="select attname, format_type(pg_attribute.atttypid, pg_attribute.atttypmod) from pg_attribute, pg_class where attrelid = pg_class.oid and pg_attribute.attnum > 0 and relname = '%s'" |
self.sql_fields={} |
self.sql_fields={} |
for f in SimpleSearch(self.db, qstr%self.table): |
for f in SimpleSearch(self.db, qstr%self.table): |
n = f[0] |
fn = f[0] |
t = f[1] |
ft = f[1] |
#print "SQL fields: %s (%s)"%(n,t) |
#print "SQL fields: %s (%s)"%(n,t) |
self.sql_fields[n] = TableColumn(n,t) |
self.sql_fields[fn] = TableColumn(fn,ft) |
|
|
|
# translate id_field (SQL-name) to XML-name |
|
self.xml_id = self.sql_field_map.get(self.id_field, None) |
|
# get type of id_field |
|
if self.id_field: |
|
self.id_type = self.sql_fields[self.id_field].getType() |
|
else: |
|
self.id_type = None |
|
|
# check fields to update |
# check fields to update |
if self.update_fields is None: |
if self.update_fields is None: |
Line 276 class xml_handler:
|
Line 332 class xml_handler:
|
self.logger.debug("field %s has different type (%s vs %s)"%(f,f.getType(),sf.getType())) |
self.logger.debug("field %s has different type (%s vs %s)"%(f,f.getType(),sf.getType())) |
elif uf is not None: |
elif uf is not None: |
# add field to table |
# add field to table |
qstr="alter table %s add %s %s"%(self.table,uf.getName(),uf.getType()) |
fn = uf.getName() |
|
ft = uf.getType() |
|
qstr="alter table %s add \"%s\" %s"%(self.table,fn,ft) |
self.logger.info("db add field:"+qstr) |
self.logger.info("db add field:"+qstr) |
|
|
if self.ascii_db and type(qstr)==types.UnicodeType: |
if self.ascii_db and type(qstr)==types.UnicodeType: |
Line 284 class xml_handler:
|
Line 342 class xml_handler:
|
|
|
self.db.execute(qstr) |
self.db.execute(qstr) |
self.dbCon.commit() |
self.dbCon.commit() |
|
# add field to field list |
|
self.sql_fields[fn] = TableColumn(fn, ft) |
|
|
# prepare sql statements for update |
# prepare sql statements for update (do not update id_field) |
setStr=string.join(["%s = %%s"%self.xml_field_map[f] for f in self.xml_update_list], ', ') |
setStr=string.join(["\"%s\" = %%s"%self.xml_field_map[f] for f in self.xml_update_list if f != self.xml_id], ', ') |
self.updQuery="UPDATE %s SET %s WHERE %s = %%s"%(self.table,setStr,self.id_field) |
self.updQuery="UPDATE %s SET %s WHERE \"%s\" = %%s"%(self.table,setStr,self.id_field) |
|
# and select (for update check) |
|
selStr=string.join([self.xml_field_map[f].getName() for f in self.xml_update_list if f != self.xml_id], ', ') |
|
self.selQuery="SELECT %s FROM %s WHERE \"%s\" = %%s"%(selStr,self.table,self.id_field) |
# and insert |
# and insert |
fields=string.join([self.xml_field_map[x].getName() for x in self.xml_update_list], ',') |
fields=string.join(["\"%s\""%self.xml_field_map[x].getName() for x in self.xml_update_list], ',') |
values=string.join(['%s' for f in self.xml_update_list], ',') |
values=string.join(['%s' for f in self.xml_update_list], ',') |
self.addQuery="INSERT INTO %s (%s) VALUES (%s)"%(self.table,fields,values) |
self.addQuery="INSERT INTO %s (%s) VALUES (%s)"%(self.table,fields,values) |
self.logger.debug("update-query: "+self.updQuery) |
self.logger.debug("update-query: "+self.updQuery) |
|
self.logger.debug("sel-query: "+self.selQuery) |
self.logger.debug("add-query: "+self.addQuery) |
self.logger.debug("add-query: "+self.addQuery) |
return |
return |
|
|
Line 302 class xml_handler:
|
Line 366 class xml_handler:
|
#Element closed. Wrap up |
#Element closed. Wrap up |
if self.lc_names: |
if self.lc_names: |
# clean name |
# clean name |
sqlname = name.replace(" ","_").lower() |
sqlname = sqlName(name) |
else: |
else: |
sqlname = name |
sqlname = name |
self.xml_field_names.append(name) |
self.xml_field_names.append(name) |
Line 320 class xml_handler:
|
Line 384 class xml_handler:
|
#First round through the generator corresponds to the |
#First round through the generator corresponds to the |
#start element event |
#start element event |
self.logger.info("reading data...") |
self.logger.info("reading data...") |
|
if self.debug_data: |
self.logger.debug("START RESULTSET") |
self.logger.debug("START RESULTSET") |
self.rowcnt = 0 |
self.rowcnt = 0 |
yield None |
yield None |
Line 333 class xml_handler:
|
Line 398 class xml_handler:
|
yield None |
yield None |
|
|
#Element closed. Wrap up |
#Element closed. Wrap up |
|
if self.debug_data: |
self.logger.debug("END RESULTSET") |
self.logger.debug("END RESULTSET") |
self.dbCon.commit() |
self.dbCon.commit() |
|
|
if self.sync_mode: |
if self.sync_mode: |
# delete unmatched entries in db |
# delete unmatched entries in db |
|
if self.rowcnt > 0: |
self.logger.info("deleting unmatched rows from db") |
self.logger.info("deleting unmatched rows from db") |
delQuery = "DELETE FROM %s WHERE %s = %%s"%(self.table,self.id_field) |
delQuery = "DELETE FROM %s WHERE \"%s\" = %%s"%(self.table,self.id_field) |
for id in self.dbIDs.keys(): |
for id in self.dbIDs.keys(): |
# find all not-updated fields |
# find all not-updated fields |
if self.dbIDs[id] == 0: |
if self.dbIDs[id] == 0: |
self.logger.info(" delete:"+id) |
self.logger.info(" delete: %s"%id) |
SimpleSearch(self.db, delQuery, [id], ascii=self.ascii_db) |
SimpleSearch(self.db, delQuery, [id], ascii=self.ascii_db) |
sys.exit(1) |
|
|
|
elif self.dbIDs[id] > 1: |
elif self.dbIDs[id] > 1: |
self.logger.info(" sync: ID %s used more than once?"%id) |
self.logger.info(" sync: ID %s used more than once?"%id) |
|
|
self.dbCon.commit() |
self.dbCon.commit() |
|
|
|
else: |
|
# safety in case we had an empty file |
|
self.logger.warning("no rows read! not deleting unmatched rows!") |
|
|
# reinstate backup tables |
# reinstate backup tables |
if self.backup_table: |
if self.backup_table and not self.id_field: |
backup_name = "%s_%s"%(self.orig_table,time.strftime('%Y_%m_%d_%H_%M_%S')) |
backup_name = "%s_%s"%(self.orig_table,time.strftime('%Y_%m_%d_%H_%M_%S')) |
self.logger.info("rename backup table %s to %s"%(self.orig_table,backup_name)) |
self.logger.info("rename backup table %s to %s"%(self.orig_table,backup_name)) |
qstr = "ALTER TABLE %s RENAME TO %s"%(self.orig_table,backup_name) |
qstr = "ALTER TABLE %s RENAME TO %s"%(self.orig_table,backup_name) |
Line 363 class xml_handler:
|
Line 433 class xml_handler:
|
self.db.execute(qstr) |
self.db.execute(qstr) |
self.dbCon.commit() |
self.dbCon.commit() |
|
|
|
self.logger.info("Done (%s rows)"%self.rowcnt) |
return |
return |
|
|
def handle_row(self, end_condition): |
def handle_row(self, end_condition): |
Line 370 class xml_handler:
|
Line 441 class xml_handler:
|
(saxtools.START_ELEMENT, fm_ns, u'COL'): |
(saxtools.START_ELEMENT, fm_ns, u'COL'): |
self.handle_col, |
self.handle_col, |
} |
} |
|
if self.debug_data: |
self.logger.debug("START ROW") |
self.logger.debug("START ROW") |
self.xml_data = {} |
self.xml_data = {} |
self.colIdx = 0 |
self.colIdx = 0 |
Line 384 class xml_handler:
|
Line 456 class xml_handler:
|
yield None |
yield None |
|
|
#Element closed. Wrap up |
#Element closed. Wrap up |
|
if self.debug_data: |
self.logger.debug("END ROW") |
self.logger.debug("END ROW") |
self.rowcnt += 1 |
self.rowcnt += 1 |
# process collected row data |
# process collected row data |
Line 391 class xml_handler:
|
Line 464 class xml_handler:
|
id_val='' |
id_val='' |
# synchronize by id_field |
# synchronize by id_field |
if self.id_field: |
if self.id_field: |
|
if self.id_type == 'integer': |
|
try: |
|
id_val = int(self.xml_data[self.xml_id]) |
|
except: |
|
pass |
|
else: |
id_val = self.xml_data[self.xml_id] |
id_val = self.xml_data[self.xml_id] |
|
|
|
if not id_val: |
|
# abort update |
|
self.logger.error("ERROR: unable to sync! emtpy id in row %s"%self.rowcnt) |
|
return |
|
|
if id_val in self.dbIDs: |
if id_val in self.dbIDs: |
self.dbIDs[id_val] += 1 |
self.dbIDs[id_val] += 1 |
update=True |
update=True |
Line 399 class xml_handler:
|
Line 484 class xml_handler:
|
# collect all values |
# collect all values |
args = [] |
args = [] |
for fn in self.xml_update_list: |
for fn in self.xml_update_list: |
|
# do not update id_field |
|
if update and fn == self.xml_id: |
|
continue |
|
|
f = self.xml_field_map[fn] |
f = self.xml_field_map[fn] |
val = self.xml_data[fn] |
val = self.xml_data[fn] |
type = self.sql_fields[f.getName()].getType() |
type = self.sql_fields[f.getName()].getType() |
if type == "date" and len(val) == 0: |
if type == "date" and len(val.strip()) == 0: |
# empty date field |
# empty date field |
val = None |
val = None |
|
|
Line 414 class xml_handler:
|
Line 503 class xml_handler:
|
|
|
if update: |
if update: |
# update existing row (by id_field) |
# update existing row (by id_field) |
# last argument is ID match |
if self.read_before_update: |
args.append(id_val) |
# read data |
|
if self.debug_data: |
|
self.logger.debug("update check: %s = %s"%(id_val, args)) |
|
oldrow = SimpleSearch(self.db, self.selQuery, [id_val], ascii=self.ascii_db) |
|
#i = 0 |
|
#for v in oldrow[0]: |
|
# logging.debug("v: %s = %s (%s)"%(v,args[i],v==args[i])) |
|
# i += 1 |
|
if tuple(oldrow[0]) != tuple(args): |
|
# data has changed -- update |
|
if self.debug_data: |
|
self.logger.debug("really update: %s = %s"%(id_val, args)) |
|
args.append(id_val) # last arg is id |
|
SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db) |
|
|
|
else: |
|
# always update |
|
if self.debug_data: |
self.logger.debug("update: %s = %s"%(id_val, args)) |
self.logger.debug("update: %s = %s"%(id_val, args)) |
|
args.append(id_val) # last arg is id |
SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db) |
SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db) |
|
|
else: |
else: |
# create new row |
# create new row |
|
if self.debug_data: |
self.logger.debug("insert: %s"%args) |
self.logger.debug("insert: %s"%args) |
SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db) |
SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db) |
|
|
Line 478 def importFMPXML(options):
|
Line 586 def importFMPXML(options):
|
@param options.keep_fields: (optional) don't add fields to SQL database |
@param options.keep_fields: (optional) don't add fields to SQL database |
@param options.ascii_db: (optional) assume ascii encoding in db |
@param options.ascii_db: (optional) assume ascii encoding in db |
@param options.replace_table: (optional) delete and re-insert data |
@param options.replace_table: (optional) delete and re-insert data |
@param options.backup_table: (optional) create backup of old table (breaks indices) |
@param options.backup_table: (optional) create backup of old table |
""" |
""" |
|
|
if getattr(options,'update_fields',None): |
if getattr(options,'update_fields',None): |
Line 544 if __name__ == "__main__":
|
Line 652 if __name__ == "__main__":
|
help="replace table i.e. delete and re-insert data") |
help="replace table i.e. delete and re-insert data") |
opars.add_option("--backup", default=False, action="store_true", |
opars.add_option("--backup", default=False, action="store_true", |
dest="backup_table", |
dest="backup_table", |
help="create backup of old table (breaks indices)") |
help="create backup of old table") |
|
opars.add_option("--read-before-update", default=False, action="store_true", |
|
dest="read_before_update", |
|
help="read all data to check if it really changed") |
opars.add_option("-d", "--debug", default=False, action="store_true", |
opars.add_option("-d", "--debug", default=False, action="store_true", |
dest="debug", |
dest="debug", |
help="debug mode (more output)") |
help="debug mode (more output)") |
|
opars.add_option("--debug-data", default=False, action="store_true", |
|
dest="debug_data", |
|
help="debug mode for data (even more output)") |
|
|
(options, args) = opars.parse_args() |
(options, args) = opars.parse_args() |
|
|