version 1.7, 2007/02/20 16:00:03
|
version 1.8, 2007/03/29 18:31:32
|
Line 5 import string
|
Line 5 import string
|
import logging |
import logging |
import sys |
import sys |
import types |
import types |
|
import time |
|
|
from xml import sax |
from xml import sax |
from amara import saxtools |
from amara import saxtools |
Line 18 except:
|
Line 19 except:
|
|
|
fm_ns = 'http://www.filemaker.com/fmpxmlresult' |
fm_ns = 'http://www.filemaker.com/fmpxmlresult' |
|
|
|
version_string = "V0.4 ROC 29.3.2007" |
|
|
def getTextFromNode(nodename): |
def getTextFromNode(nodename): |
"""get the cdata content of a node""" |
"""get the cdata content of a node""" |
if nodename is None: |
if nodename is None: |
Line 37 def sql_quote(v):
|
Line 40 def sql_quote(v):
|
v=string.join(string.split(v,dkey),quote_dict[dkey]) |
v=string.join(string.split(v,dkey),quote_dict[dkey]) |
return "'%s'"%v |
return "'%s'"%v |
|
|
def SimpleSearch(curs,query, args=None): |
def SimpleSearch(curs,query, args=None, ascii=False): |
"""execute sql query and return data""" |
"""execute sql query and return data""" |
logging.debug("executing: "+query) |
#logging.debug("executing: "+query) |
if psyco == 1: |
if ascii: |
|
# encode all in UTF-8 |
query = query.encode("UTF-8") |
query = query.encode("UTF-8") |
#if args is not None: |
if args is not None: |
# args = [ sql_quote(a) for a in args ] |
encargs = [] |
#logging.debug(query) |
for a in args: |
#logging.debug(args) |
if a is not None: |
|
a = a.encode("UTF-8") |
|
encargs.append(a) |
|
|
|
args = encargs |
|
|
curs.execute(query, args) |
curs.execute(query, args) |
logging.debug("sql done") |
#logging.debug("sql done") |
try: |
try: |
return curs.fetchall() |
return curs.fetchall() |
except: |
except: |
return None |
return None |
|
|
|
|
|
class TableColumn: |
|
"""simple type for storing sql column name and type""" |
|
|
|
def __init__(self, name, type=None): |
|
#print "new tablecolumn(%s,%s)"%(name, type) |
|
self.name = name |
|
self.type = type |
|
|
|
def getName(self): |
|
return self.name |
|
|
|
def getType(self): |
|
if self.type is not None: |
|
return self.type |
|
else: |
|
return "text" |
|
|
|
def __str__(self): |
|
return self.name |
|
|
|
|
class xml_handler: |
class xml_handler: |
|
|
def __init__(self,dsn,table,update_fields=None,id_field=None,sync_mode=False): |
def __init__(self,options): |
''' |
''' |
SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table. |
SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table. |
@param dsn: database connection string |
@param options: dict of options |
@param table: name of the table the xml shall be imported into |
@param options.dsn: database connection string |
@param filename: xmlfile filename |
@param options.table: name of the table the xml shall be imported into |
@param update_fields: (optional) list of fields to update; default is to create all fields |
@param options.filename: xmlfile filename |
@param id_field: (optional) field which uniquely identifies an entry for updating purposes. |
@param options.update_fields: (optional) list of fields to update; default is to create all fields |
@param sync_mode: (optional) really synchronise, i.e. delete entries not in XML file |
@param options.id_field: (optional) field which uniquely identifies an entry for updating purposes. |
|
@param options.sync_mode: (optional) really synchronise, i.e. delete entries not in XML file |
|
@param options.lc_names: (optional) lower case and clean up field names from XML |
|
@param options.keep_fields: (optional) don't add fields to SQL database |
|
@param options.ascii_db: (optional) assume ascii encoding in db |
|
@param options.replace_table: (optional) delete and re-insert data |
''' |
''' |
# set up parser |
# set up parser |
self.event = None |
self.event = None |
Line 74 class xml_handler:
|
Line 107 class xml_handler:
|
(saxtools.START_ELEMENT, fm_ns, u'METADATA'): |
(saxtools.START_ELEMENT, fm_ns, u'METADATA'): |
self.handle_meta_fields, |
self.handle_meta_fields, |
(saxtools.START_ELEMENT, fm_ns, u'RESULTSET'): |
(saxtools.START_ELEMENT, fm_ns, u'RESULTSET'): |
self.handle_data, |
self.handle_data_fields, |
} |
} |
|
|
# connect database |
# connect database |
self.dbCon = psycopg.connect(dsn) |
self.dbCon = psycopg.connect(options.dsn) |
self.db = self.dbCon.cursor() |
self.db = self.dbCon.cursor() |
assert self.db, "AIIEE no db cursor for %s!!"%dsn |
assert self.db, "AIIEE no db cursor for %s!!"%options.dsn |
|
|
logging.debug("dsn: "+repr(dsn)) |
self.table = options.table |
logging.debug("table: "+repr(table)) |
self.update_fields = options.update_fields |
logging.debug("update_fields: "+repr(update_fields)) |
self.id_field = options.id_field |
logging.debug("id_field: "+repr(id_field)) |
self.sync_mode = options.sync_mode |
logging.debug("sync_mode: "+repr(sync_mode)) |
self.lc_names = options.lc_names |
|
self.keep_fields = options.keep_fields |
self.table = table |
self.ascii_db = options.ascii_db |
self.update_fields = update_fields |
self.replace_table = options.replace_table |
self.id_field = id_field |
self.backup_table = options.backup_table |
self.sync_mode = sync_mode |
|
|
logging.debug("dsn: "+repr(options.dsn)) |
|
logging.debug("table: "+repr(self.table)) |
|
logging.debug("update_fields: "+repr(self.update_fields)) |
|
logging.debug("id_field: "+repr(self.id_field)) |
|
logging.debug("sync_mode: "+repr(self.sync_mode)) |
|
logging.debug("lc_names: "+repr(self.lc_names)) |
|
logging.debug("keep_fields: "+repr(self.keep_fields)) |
|
logging.debug("ascii_db: "+repr(self.ascii_db)) |
|
logging.debug("replace_table: "+repr(self.replace_table)) |
|
|
self.dbIDs = {} |
self.dbIDs = {} |
self.rowcnt = 0 |
self.rowcnt = 0 |
|
|
self.db.execute("set datestyle to 'german'") |
if self.id_field is not None: |
if id_field is not None: |
|
# prepare a list of ids for sync mode |
# prepare a list of ids for sync mode |
qstr="select %s from %s"%(id_field,table) |
qstr="select %s from %s"%(self.id_field,self.table) |
for id in SimpleSearch(self.db, qstr): |
for id in SimpleSearch(self.db, qstr): |
# value 0: not updated |
# value 0: not updated |
self.dbIDs[id[0]] = 0; |
self.dbIDs[id[0]] = 0; |
Line 107 class xml_handler:
|
Line 148 class xml_handler:
|
|
|
logging.info("%d entries in DB to sync"%self.rowcnt) |
logging.info("%d entries in DB to sync"%self.rowcnt) |
|
|
self.fieldNames = [] |
# names of fields in XML file |
|
self.xml_field_names = [] |
|
# map XML field names to SQL field names |
|
self.xml_field_map = {} |
|
# and vice versa |
|
self.sql_field_map = {} |
|
|
return |
return |
|
|
Line 131 class xml_handler:
|
Line 177 class xml_handler:
|
|
|
#Element closed. Wrap up |
#Element closed. Wrap up |
logging.debug("END METADATA") |
logging.debug("END METADATA") |
|
|
|
# rename table for backup |
|
if self.backup_table: |
|
self.orig_table = self.table |
|
self.table = self.table + "_tmp" |
|
# remove old temp table |
|
qstr = "DROP TABLE %s"%(self.table) |
|
try: |
|
self.db.execute(qstr) |
|
except: |
|
pass |
|
|
|
self.dbCon.commit() |
|
|
|
if self.id_field: |
|
# sync mode -- copy table |
|
logging.info("copy table %s to %s"%(self.orig_table,self.table)) |
|
qstr = "CREATE TABLE %s AS (SELECT * FROM %s)"%(self.table,self.orig_table) |
|
|
|
else: |
|
# rename table and create empty new one |
|
logging.info("create empty table %s"%(self.table)) |
|
qstr = "CREATE TABLE %s AS (SELECT * FROM %s WHERE 1=0)"%(self.table,self.orig_table) |
|
|
|
self.db.execute(qstr) |
|
self.dbCon.commit() |
|
|
|
# delete data from table for replace |
|
if self.replace_table: |
|
logging.info("delete data from table %s"%(self.table)) |
|
qstr = "TRUNCATE TABLE %s"%(self.table) |
|
self.db.execute(qstr) |
|
self.dbCon.commit() |
|
|
|
# try to match date style with XML |
|
self.db.execute("set datestyle to 'german'") |
|
|
|
# translate id_field (SQL-name) to XML-name |
|
self.xml_id = self.sql_field_map.get(self.id_field, None) |
|
|
|
#logging.debug("xml-fieldnames:"+repr(self.xml_field_names)) |
|
# get list of fields and types of db table |
|
qstr="select attname, format_type(pg_attribute.atttypid, pg_attribute.atttypmod) from pg_attribute, pg_class where attrelid = pg_class.oid and pg_attribute.attnum > 0 and relname = '%s'" |
|
self.sql_fields={} |
|
for f in SimpleSearch(self.db, qstr%self.table): |
|
n = f[0] |
|
t = f[1] |
|
#print "SQL fields: %s (%s)"%(n,t) |
|
self.sql_fields[n] = TableColumn(n,t) |
|
|
|
# check fields to update |
if self.update_fields is None: |
if self.update_fields is None: |
|
if self.keep_fields: |
|
# update existing fields |
|
self.update_fields = self.sql_fields |
|
|
|
else: |
# update all fields |
# update all fields |
self.update_fields = self.fieldNames |
if self.lc_names: |
|
# create dict with sql names |
|
self.update_fields = {} |
|
for f in self.xml_field_map.values(): |
|
self.update_fields[f.getName()] = f |
|
|
|
else: |
|
self.update_fields = self.xml_field_map |
|
|
|
# and translate to list of xml fields |
|
if self.lc_names: |
|
self.xml_update_list = [self.sql_field_map[x] for x in self.update_fields] |
|
else: |
|
self.xml_update_list = self.update_fields.keys() |
|
|
logging.debug("xml-fieldnames:"+repr(self.fieldNames)) |
if not self.keep_fields: |
# get list of fields in db table |
# adjust db table to fields in XML and update_fields |
qstr="""select attname from pg_attribute, pg_class where attrelid = pg_class.oid and relname = '%s'""" |
for f in self.xml_field_map.values(): |
columns=[x[0] for x in SimpleSearch(self.db, qstr%self.table)] |
logging.debug("sync-fieldname: %s"%f.getName()) |
|
sf = self.sql_fields.get(f.getName(), None) |
# adjust db table to fields in XML and fieldlist |
uf = self.update_fields.get(f.getName(), None) |
for fieldName in self.fieldNames: |
if sf is not None: |
logging.debug("db-fieldname:"+repr(fieldName)) |
# name in db -- check type |
fieldName=fieldName.replace(" ","_") # repair _ |
if f.getType() != sf.getType(): |
if (fieldName.lower() not in columns) and (fieldName in self.update_fields): |
logging.debug("field %s has different type (%s vs %s)"%(f,f.getType(),sf.getType())) |
qstr="alter table %s add %s %s"%(self.table,fieldName,'text') |
elif uf is not None: |
|
# add field to table |
|
qstr="alter table %s add %s %s"%(self.table,uf.getName(),uf.getType()) |
logging.info("db add field:"+qstr) |
logging.info("db add field:"+qstr) |
|
|
if type(qstr)==types.UnicodeType: |
if self.ascii_db and type(qstr)==types.UnicodeType: |
qstr=qstr.encode('utf-8') |
qstr=qstr.encode('utf-8') |
|
|
self.db.execute(qstr) |
self.db.execute(qstr) |
self.dbCon.commit() |
self.dbCon.commit() |
|
|
# prepare sql statements for update |
# prepare sql statements for update |
setStr=string.join(["%s = %%s"%f for f in self.update_fields], ', ') |
setStr=string.join(["%s = %%s"%self.xml_field_map[f] for f in self.xml_update_list], ', ') |
self.updQuery="UPDATE %s SET %s WHERE %s = %%s"%(self.table,setStr,self.id_field) |
self.updQuery="UPDATE %s SET %s WHERE %s = %%s"%(self.table,setStr,self.id_field) |
# and insert |
# and insert |
fields=string.join(self.update_fields, ',') |
fields=string.join([self.xml_field_map[x].getName() for x in self.xml_update_list], ',') |
values=string.join(['%s' for f in self.update_fields], ',') |
values=string.join(['%s' for f in self.xml_update_list], ',') |
self.addQuery="INSERT INTO %s (%s) VALUES (%s)"%(self.table,fields,values) |
self.addQuery="INSERT INTO %s (%s) VALUES (%s)"%(self.table,fields,values) |
#print "upQ: ", self.updQuery |
logging.debug("update-query: "+self.updQuery) |
#print "adQ: ", self.addQuery |
logging.debug("add-query: "+self.addQuery) |
|
|
return |
return |
|
|
def handle_meta_field(self, end_condition): |
def handle_meta_field(self, end_condition): |
name = self.params.get((None, u'NAME')) |
name = self.params.get((None, u'NAME')) |
yield None |
yield None |
#Element closed. Wrap up |
#Element closed. Wrap up |
name=name.replace(" ","_")# make sure no spaces |
if self.lc_names: |
self.fieldNames.append(name) |
# clean name |
|
sqlname = name.replace(" ","_").lower() |
|
else: |
|
sqlname = name |
|
self.xml_field_names.append(name) |
|
# map to sql name and default text type |
|
self.xml_field_map[name] = TableColumn(sqlname, 'text') |
|
self.sql_field_map[sqlname] = name |
logging.debug("FIELD name: "+name) |
logging.debug("FIELD name: "+name) |
return |
return |
|
|
def handle_data(self, end_condition): |
def handle_data_fields(self, end_condition): |
dispatcher = { |
dispatcher = { |
(saxtools.START_ELEMENT, fm_ns, u'ROW'): |
(saxtools.START_ELEMENT, fm_ns, u'ROW'): |
self.handle_row, |
self.handle_row, |
Line 199 class xml_handler:
|
Line 323 class xml_handler:
|
|
|
if self.sync_mode: |
if self.sync_mode: |
# delete unmatched entries in db |
# delete unmatched entries in db |
|
logging.info("deleting unmatched rows from db") |
delQuery = "DELETE FROM %s WHERE %s = %%s"%(self.table,self.id_field) |
delQuery = "DELETE FROM %s WHERE %s = %%s"%(self.table,self.id_field) |
for id in self.dbIDs.keys(): |
for id in self.dbIDs.keys(): |
# find all not-updated fields |
# find all not-updated fields |
if self.dbIDs[id] == 0: |
if self.dbIDs[id] == 0: |
logging.info(" delete:"+id) |
logging.info(" delete:"+id) |
SimpleSearch(self.db, delQuery, [id]) |
SimpleSearch(self.db, delQuery, [id], ascii=self.ascii_db) |
sys.exit(1) |
sys.exit(1) |
|
|
elif self.dbIDs[id] > 1: |
elif self.dbIDs[id] > 1: |
logging.info(" sync:"+"id used more than once?"+id) |
logging.info(" sync: ID %s used more than once?"%id) |
|
|
self.dbCon.commit() |
self.dbCon.commit() |
|
|
|
# reinstate backup tables |
|
if self.backup_table: |
|
backup_name = "%s_%s"%(self.orig_table,time.strftime('%Y_%m_%d_%H_%M_%S')) |
|
logging.info("rename backup table %s to %s"%(self.orig_table,backup_name)) |
|
qstr = "ALTER TABLE %s RENAME TO %s"%(self.orig_table,backup_name) |
|
self.db.execute(qstr) |
|
logging.info("rename working table %s to %s"%(self.table,self.orig_table)) |
|
qstr = "ALTER TABLE %s RENAME TO %s"%(self.table,self.orig_table) |
|
self.db.execute(qstr) |
|
self.dbCon.commit() |
|
|
return |
return |
|
|
def handle_row(self, end_condition): |
def handle_row(self, end_condition): |
Line 220 class xml_handler:
|
Line 356 class xml_handler:
|
self.handle_col, |
self.handle_col, |
} |
} |
logging.debug("START ROW") |
logging.debug("START ROW") |
self.dataSet = {} |
self.xml_data = {} |
self.colIdx = 0 |
self.colIdx = 0 |
yield None |
yield None |
|
|
Line 240 class xml_handler:
|
Line 376 class xml_handler:
|
id_val='' |
id_val='' |
# synchronize by id_field |
# synchronize by id_field |
if self.id_field: |
if self.id_field: |
id_val=self.dataSet[self.id_field.lower()] |
id_val = self.xml_data[self.xml_id] |
if id_val in self.dbIDs: |
if id_val in self.dbIDs: |
self.dbIDs[id_val] += 1 |
self.dbIDs[id_val] += 1 |
update=True |
update=True |
|
|
|
# collect all values |
|
args = [] |
|
for fn in self.xml_update_list: |
|
f = self.xml_field_map[fn] |
|
val = self.xml_data[fn] |
|
type = self.sql_fields[f.getName()].getType() |
|
if type == "date" and len(val) == 0: |
|
# empty date field |
|
val = None |
|
|
|
elif type == "integer" and len(val) == 0: |
|
# empty int field |
|
val = None |
|
|
|
args.append(val) |
|
|
if update: |
if update: |
# update existing row (by id_field) |
# update existing row (by id_field) |
#setvals=[] |
# last argument is ID match |
#for fieldName in self.update_fields: |
|
# setvals.append("%s = %s"%(fieldName,sql_quote(self.dataSet[fieldName]))) |
|
#setStr=string.join(setvals, ',') |
|
id_val=self.dataSet[self.id_field.lower()] |
|
#qstr="UPDATE %s SET %s WHERE %s = '%s' "%(self.table,setStr,self.id_field,id_val) |
|
args = [self.dataSet[f.lower()] for f in self.update_fields] |
|
args.append(id_val) |
args.append(id_val) |
SimpleSearch(self.db, self.updQuery, args) |
logging.debug("update: %s = %s"%(id_val, args)) |
logging.debug("update: %s"%id_val) |
SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db) |
|
|
else: |
else: |
# create new row |
# create new row |
#fields=string.join(update_fields, ',') |
logging.debug("insert: %s"%args) |
#values=string.join([" %s "%sql_quote(self.dataSet[x]) for x in self.update_fields], ',') |
SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db) |
#qstr="INSERT INTO %s (%s) VALUES (%s)"%(self.table,fields,self.values) |
|
args=[] |
|
for f in self.update_fields: |
|
value=self.dataSet[f.lower()].encode('utf-8') |
|
if value=="": #hack DW |
|
value=None |
|
|
|
args.append(value) |
|
|
|
#args = [self.dataSet[f.lower()].encode('utf-8') for f in self.update_fields] |
|
logging.debug(args) |
|
SimpleSearch(self.db, self.addQuery, args) |
|
logging.debug("add: %s"%self.dataSet.get(self.id_field, self.rowcnt)) |
|
|
|
#logging.info(" row:"+"%d (%s)"%(self.rowcnt,id_val)) |
#logging.info(" row:"+"%d (%s)"%(self.rowcnt,id_val)) |
if (self.rowcnt % 10) == 0: |
if (self.rowcnt % 10) == 0: |
Line 311 class xml_handler:
|
Line 445 class xml_handler:
|
content += self.params |
content += self.params |
yield None |
yield None |
#Element closed. Wrap up |
#Element closed. Wrap up |
field = self.fieldNames[self.colIdx] |
fn = self.xml_field_names[self.colIdx] |
self.dataSet[field.lower()] = content |
self.xml_data[fn] = content |
#print " DATA(", field, ") ", repr(content) |
|
return |
return |
|
|
|
|
Line 338 opars.add_option("-t", "--table",
|
Line 471 opars.add_option("-t", "--table",
|
help="database table name") |
help="database table name") |
opars.add_option("--fields", default=None, |
opars.add_option("--fields", default=None, |
dest="update_fields", |
dest="update_fields", |
help="list of fields to update (comma separated)", metavar="LIST") |
help="list of fields to update (comma separated, sql-names)", metavar="LIST") |
opars.add_option("--id-field", default=None, |
opars.add_option("--id-field", default=None, |
dest="id_field", |
dest="id_field", |
help="name of id field for synchronisation (only appends data otherwise)", metavar="NAME") |
help="name of id field for synchronisation (only appends data otherwise, sql-name)", metavar="NAME") |
opars.add_option("--sync-mode", default=False, action="store_true", |
opars.add_option("--sync", "--sync-mode", default=False, action="store_true", |
dest="sync_mode", |
dest="sync_mode", |
help="do full sync based on id field (remove unmatched fields from db)") |
help="do full sync based on id field (remove unmatched fields from db)") |
|
opars.add_option("--lc-names", default=False, action="store_true", |
|
dest="lc_names", |
|
help="clean and lower case field names from XML") |
|
opars.add_option("--keep-fields", default=False, action="store_true", |
|
dest="keep_fields", |
|
help="don't add fields from XML to SQL table") |
|
opars.add_option("--ascii-db", default=False, action="store_true", |
|
dest="ascii_db", |
|
help="the SQL database stores ASCII instead of unicode") |
|
opars.add_option("--replace", default=False, action="store_true", |
|
dest="replace_table", |
|
help="replace table i.e. delete and re-insert data") |
|
opars.add_option("--backup", default=False, action="store_true", |
|
dest="backup_table", |
|
help="create backup of old table (breaks indices)") |
opars.add_option("-d", "--debug", default=False, action="store_true", |
opars.add_option("-d", "--debug", default=False, action="store_true", |
dest="debug", |
dest="debug", |
help="debug mode (more output)") |
help="debug mode (more output)") |
Line 352 opars.add_option("-d", "--debug", defaul
|
Line 500 opars.add_option("-d", "--debug", defaul
|
(options, args) = opars.parse_args() |
(options, args) = opars.parse_args() |
|
|
if len(sys.argv) < 2 or options.filename is None or options.dsn is None: |
if len(sys.argv) < 2 or options.filename is None or options.dsn is None: |
|
print "importFMPXML "+version_string |
opars.print_help() |
opars.print_help() |
sys.exit(1) |
sys.exit(1) |
|
|
Line 367 logging.basicConfig(level=loglevel,
|
Line 516 logging.basicConfig(level=loglevel,
|
update_fields = None |
update_fields = None |
|
|
if options.update_fields: |
if options.update_fields: |
update_fields = [string.strip(s) for s in options.update_fields.split(',')] |
uf = {} |
|
for f in options.update_fields.split(','): |
|
(n,t) = f.split(':') |
|
uf[n] = TableColumn(n,t) |
|
|
|
options.update_fields = uf |
|
|
|
if options.id_field and options.replace_table: |
|
logging.error("ABORT: sorry, you can't do both sync (id_field) and replace") |
|
sys.exit(1) |
|
|
parser = sax.make_parser() |
parser = sax.make_parser() |
#The "consumer" is our own handler |
#The "consumer" is our own handler |
consumer = xml_handler(dsn=options.dsn,table=options.table, |
consumer = xml_handler(options) |
update_fields=update_fields,id_field=options.id_field, |
|
sync_mode=options.sync_mode) |
|
#Initialize Tenorsax with handler |
#Initialize Tenorsax with handler |
handler = saxtools.tenorsax(consumer) |
handler = saxtools.tenorsax(consumer) |
#Resulting tenorsax instance is the SAX handler |
#Resulting tenorsax instance is the SAX handler |