version 1.25, 2008/07/02 11:58:45
|
version 1.35, 2012/02/15 08:41:01
|
Line 6 import logging
|
Line 6 import logging
|
import sys |
import sys |
import types |
import types |
import time |
import time |
|
import re |
|
|
from xml import sax |
from xml import sax |
from amara import saxtools |
from xml.sax.handler import ContentHandler |
|
#from amara import saxtools |
|
|
try: |
try: |
import psycopg2 as psycopg |
import psycopg2 as psycopg |
Line 22 except:
|
Line 24 except:
|
|
|
fm_ns = 'http://www.filemaker.com/fmpxmlresult' |
fm_ns = 'http://www.filemaker.com/fmpxmlresult' |
|
|
version_string = "V0.6.1 ROC .2008" |
version_string = "V0.6.7 ROC 21.6.2011" |
|
|
def unicodify(text, withNone=False): |
def unicodify(text, withNone=False): |
"""decode str (utf-8 or latin-1 representation) into unicode object""" |
"""decode str (utf-8 or latin-1 representation) into unicode object""" |
Line 68 def sql_quote(v):
|
Line 70 def sql_quote(v):
|
v=string.join(string.split(v,dkey),quote_dict[dkey]) |
v=string.join(string.split(v,dkey),quote_dict[dkey]) |
return "'%s'"%v |
return "'%s'"%v |
|
|
|
def sqlName(s, lc=True, more=''): |
|
"""returns restricted ASCII-only version of string""" |
|
if s is None: |
|
return "" |
|
|
|
# remove ' |
|
s = s.replace("'","") |
|
# all else -> "_" |
|
s = re.sub('[^A-Za-z0-9_'+more+']','_',s) |
|
if lc: |
|
return s.lower() |
|
|
|
return s |
|
|
def SimpleSearch(curs,query, args=None, ascii=False): |
def SimpleSearch(curs,query, args=None, ascii=False): |
"""execute sql query and return data""" |
"""execute sql query and return data""" |
#logger.debug("executing: "+query) |
#logger.debug("executing: "+query) |
Line 110 class TableColumn:
|
Line 126 class TableColumn:
|
return self.name |
return self.name |
|
|
|
|
class xml_handler: |
class xml_handler(ContentHandler): |
def __init__(self,options): |
def __init__(self,options): |
"""SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table. |
"""SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table. |
@param options: dict of options |
@param options: dict of options |
Line 136 class xml_handler:
|
Line 152 class xml_handler:
|
|
|
|
|
# set up parser |
# set up parser |
|
self.result={} |
self.event = None |
self.event = None |
self.top_dispatcher = { |
|
(saxtools.START_ELEMENT, fm_ns, u'METADATA'): |
# self.top_dispatcher = { |
self.handle_meta_fields, |
# (saxtools.START_ELEMENT, fm_ns, u'METADATA'): |
(saxtools.START_ELEMENT, fm_ns, u'RESULTSET'): |
# self.handle_meta_fields, |
self.handle_data_fields, |
# (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'): |
} |
# self.handle_data_fields, |
|
# } |
|
|
# connect database |
# connect database |
self.dbCon = psycopg.connect(options.dsn) |
self.dbCon = psycopg.connect(options.dsn) |
Line 160 class xml_handler:
|
Line 178 class xml_handler:
|
self.replace_table = getattr(options,"replace_table",None) |
self.replace_table = getattr(options,"replace_table",None) |
self.backup_table = getattr(options,"backup_table",None) |
self.backup_table = getattr(options,"backup_table",None) |
self.read_before_update = getattr(options,"read_before_update",None) |
self.read_before_update = getattr(options,"read_before_update",None) |
|
self.debug_data = getattr(options,"debug_data",None) |
|
|
self.logger.debug("dsn: "+repr(getattr(options,"dsn",None))) |
self.logger.debug("dsn: "+repr(getattr(options,"dsn",None))) |
self.logger.debug("table: "+repr(self.table)) |
self.logger.debug("table: "+repr(self.table)) |
Line 172 class xml_handler:
|
Line 191 class xml_handler:
|
self.logger.debug("replace_table: "+repr(self.replace_table)) |
self.logger.debug("replace_table: "+repr(self.replace_table)) |
self.logger.debug("backup_table: "+repr(self.backup_table)) |
self.logger.debug("backup_table: "+repr(self.backup_table)) |
self.logger.debug("read_before_update: "+repr(self.read_before_update)) |
self.logger.debug("read_before_update: "+repr(self.read_before_update)) |
|
self.logger.debug("debug_data: "+repr(self.debug_data)) |
|
|
self.dbIDs = {} |
self.dbIDs = {} |
self.rowcnt = 0 |
self.rowcnt = 0 |
|
|
|
self.currentName = None |
|
|
if self.id_field is not None: |
if self.id_field is not None: |
# prepare a list of ids for sync mode |
# prepare a list of ids for sync mode |
qstr="select %s from %s"%(self.id_field,self.table) |
qstr="select %s from %s"%(self.id_field,self.table) |
Line 195 class xml_handler:
|
Line 217 class xml_handler:
|
|
|
return |
return |
|
|
def handle_meta_fields(self, end_condition): |
def startElement(self, name, attrs): |
dispatcher = { |
logging.debug(name) |
(saxtools.START_ELEMENT, fm_ns, u'FIELD'): |
if (name.lower() == "field") : |
self.handle_meta_field, |
self.handle_meta_field(attrs) |
} |
if (name.lower() == "row") : |
|
logging.debug("handleROW") |
|
self.handle_row(attrs) |
|
if (name.lower()=="resultset"): |
|
self.handle_data_fields(attrs) |
|
|
|
if (name.lower()=="data"): |
|
self.handle_data_tag(attrs); |
|
|
|
def endElement(self,name): |
|
if (name.lower() == "resultset") : |
|
self.currentTag="" |
|
self.handle_end_data_fields() |
|
if (name.lower() == "field") : |
|
self.handle_end_meta_field() |
|
if (name.lower() == "metadata"): |
|
self.handle_end_meta_fields() |
|
if (name.lower() == "row") : |
|
logging.debug("handleROW") |
|
self.handle_end_row() |
|
|
|
if (name.lower() == "col") : |
|
self.handle_end_col() |
|
def characters(self,content): |
|
|
|
try: |
|
fn = self.xml_field_names[self.colIdx] |
|
|
|
contentTmp = self.xml_data.get(fn,'') #gibt es schon einen Inhalt, dann dieses hinzufuegen (in einem Tag kann u.U. der characters handler mehrfach aufgerufen werden.) |
|
self.xml_data[fn] = contentTmp+content |
|
except: |
|
logging.debug(content) |
|
pass |
|
|
|
# if self.currentName is not None: |
|
# logging.debug(self.currentName+" "+content) |
|
# self.currentRow[self.currentName]=content; |
|
# |
|
def handle_end_meta_fields(self): |
|
# dispatcher = { |
|
# (saxtools.START_ELEMENT, fm_ns, u'FIELD'): |
|
# self.handle_meta_field, |
|
# } |
#First round through the generator corresponds to the |
#First round through the generator corresponds to the |
#start element event |
#start element event |
self.logger.info("reading metadata...") |
# self.logger.info("reading metadata...") |
self.logger.debug("START METADATA") |
# if self.debug_data: |
yield None |
# self.logger.debug("START METADATA") |
|
# #yield None |
|
|
#delegate is a generator that handles all the events "within" |
#delegate is a generator that handles all the events "within" |
#this element |
#this element |
delegate = None |
# delegate = None |
while not self.event == end_condition: |
# while not self.event == end_condition: |
delegate = saxtools.tenorsax.event_loop_body( |
# delegate = saxtools.tenorsax.event_loop_body( |
dispatcher, delegate, self.event) |
# dispatcher, delegate, self.event) |
yield None |
# yield None |
|
# |
#Element closed. Wrap up |
# #Element closed. Wrap up |
|
if self.debug_data: |
self.logger.debug("END METADATA") |
self.logger.debug("END METADATA") |
|
|
# rename table for backup |
# rename table for backup |
Line 341 class xml_handler:
|
Line 407 class xml_handler:
|
self.logger.debug("add-query: "+self.addQuery) |
self.logger.debug("add-query: "+self.addQuery) |
return |
return |
|
|
def handle_meta_field(self, end_condition): |
def handle_meta_field(self, attrs): |
name = self.params.get((None, u'NAME')) |
self.currentName = attrs.get('NAME') |
yield None |
#yield None |
|
return |
|
def handle_end_meta_field(self): |
#Element closed. Wrap up |
#Element closed. Wrap up |
|
name = self.currentName |
if self.lc_names: |
if self.lc_names: |
# clean name |
# clean name |
sqlname = name.replace(" ","_").lower() |
sqlname = sqlName(name) |
else: |
else: |
sqlname = name |
sqlname = name |
self.xml_field_names.append(name) |
self.xml_field_names.append(name) |
Line 357 class xml_handler:
|
Line 426 class xml_handler:
|
self.logger.debug("FIELD name: "+name) |
self.logger.debug("FIELD name: "+name) |
return |
return |
|
|
def handle_data_fields(self, end_condition): |
def handle_data_fields(self, attrs): |
dispatcher = { |
|
(saxtools.START_ELEMENT, fm_ns, u'ROW'): |
|
self.handle_row, |
|
} |
|
#First round through the generator corresponds to the |
#First round through the generator corresponds to the |
#start element event |
#start element event |
self.logger.info("reading data...") |
self.logger.info("reading data...") |
|
if self.debug_data: |
self.logger.debug("START RESULTSET") |
self.logger.debug("START RESULTSET") |
self.rowcnt = 0 |
self.rowcnt = 0 |
yield None |
return |
|
|
|
def handle_end_data_fields(self): |
#delegate is a generator that handles all the events "within" |
#delegate is a generator that handles all the events "within" |
#this element |
#this element |
delegate = None |
|
while not self.event == end_condition: |
|
delegate = saxtools.tenorsax.event_loop_body( |
|
dispatcher, delegate, self.event) |
|
yield None |
|
|
|
#Element closed. Wrap up |
#Element closed. Wrap up |
|
if self.debug_data: |
self.logger.debug("END RESULTSET") |
self.logger.debug("END RESULTSET") |
self.dbCon.commit() |
self.dbCon.commit() |
|
|
if self.sync_mode: |
if self.sync_mode: |
# delete unmatched entries in db |
# delete unmatched entries in db |
|
if self.rowcnt > 0: |
self.logger.info("deleting unmatched rows from db") |
self.logger.info("deleting unmatched rows from db") |
delQuery = "DELETE FROM %s WHERE \"%s\" = %%s"%(self.table,self.id_field) |
delQuery = "DELETE FROM %s WHERE \"%s\" = %%s"%(self.table,self.id_field) |
for id in self.dbIDs.keys(): |
for id in self.dbIDs.keys(): |
# find all not-updated fields |
# find all not-updated fields |
if self.dbIDs[id] == 0: |
if self.dbIDs[id] == 0: |
self.logger.info(" delete:"+id) |
self.logger.info(" delete: %s"%id) |
SimpleSearch(self.db, delQuery, [id], ascii=self.ascii_db) |
SimpleSearch(self.db, delQuery, [id], ascii=self.ascii_db) |
|
|
elif self.dbIDs[id] > 1: |
elif self.dbIDs[id] > 1: |
Line 396 class xml_handler:
|
Line 461 class xml_handler:
|
|
|
self.dbCon.commit() |
self.dbCon.commit() |
|
|
|
else: |
|
# safety in case we had an empty file |
|
self.logger.warning("no rows read! not deleting unmatched rows!") |
|
|
# reinstate backup tables |
# reinstate backup tables |
if self.backup_table and not self.id_field: |
if self.backup_table and not self.id_field: |
backup_name = "%s_%s"%(self.orig_table,time.strftime('%Y_%m_%d_%H_%M_%S')) |
backup_name = "%s_%s"%(self.orig_table,time.strftime('%Y_%m_%d_%H_%M_%S')) |
Line 407 class xml_handler:
|
Line 476 class xml_handler:
|
self.db.execute(qstr) |
self.db.execute(qstr) |
self.dbCon.commit() |
self.dbCon.commit() |
|
|
|
self.logger.info("Done (%s rows)"%self.rowcnt) |
return |
return |
|
|
def handle_row(self, end_condition): |
def handle_row(self, end_condition): |
dispatcher = { |
|
(saxtools.START_ELEMENT, fm_ns, u'COL'): |
if self.debug_data: |
self.handle_col, |
|
} |
|
self.logger.debug("START ROW") |
self.logger.debug("START ROW") |
self.xml_data = {} |
self.xml_data = {} |
self.colIdx = 0 |
self.colIdx = 0 |
yield None |
|
|
|
|
return |
|
|
|
def handle_end_row(self): |
#delegate is a generator that handles all the events "within" |
#delegate is a generator that handles all the events "within" |
#this element |
#this element |
delegate = None |
|
while not self.event == end_condition: |
|
delegate = saxtools.tenorsax.event_loop_body( |
|
dispatcher, delegate, self.event) |
|
yield None |
|
|
|
#Element closed. Wrap up |
#Element closed. Wrap up |
|
if self.debug_data: |
self.logger.debug("END ROW") |
self.logger.debug("END ROW") |
self.rowcnt += 1 |
self.rowcnt += 1 |
# process collected row data |
# process collected row data |
Line 436 class xml_handler:
|
Line 502 class xml_handler:
|
# synchronize by id_field |
# synchronize by id_field |
if self.id_field: |
if self.id_field: |
if self.id_type == 'integer': |
if self.id_type == 'integer': |
|
try: |
id_val = int(self.xml_data[self.xml_id]) |
id_val = int(self.xml_data[self.xml_id]) |
|
except: |
|
pass |
else: |
else: |
id_val = self.xml_data[self.xml_id] |
id_val = self.xml_data[self.xml_id] |
|
|
|
if not id_val: |
|
# abort update |
|
self.logger.error("ERROR: unable to sync! emtpy id in row %s"%self.rowcnt) |
|
return |
|
|
if id_val in self.dbIDs: |
if id_val in self.dbIDs: |
self.dbIDs[id_val] += 1 |
self.dbIDs[id_val] += 1 |
update=True |
update=True |
Line 452 class xml_handler:
|
Line 526 class xml_handler:
|
continue |
continue |
|
|
f = self.xml_field_map[fn] |
f = self.xml_field_map[fn] |
val = self.xml_data[fn] |
val = self.xml_data.get(fn,None) |
type = self.sql_fields[f.getName()].getType() |
type = self.sql_fields[f.getName()].getType() |
if type == "date" and len(val) == 0: |
if type == "date" and len(val.strip()) == 0: |
# empty date field |
# empty date field |
val = None |
val = None |
|
|
Line 468 class xml_handler:
|
Line 542 class xml_handler:
|
# update existing row (by id_field) |
# update existing row (by id_field) |
if self.read_before_update: |
if self.read_before_update: |
# read data |
# read data |
|
if self.debug_data: |
self.logger.debug("update check: %s = %s"%(id_val, args)) |
self.logger.debug("update check: %s = %s"%(id_val, args)) |
oldrow = SimpleSearch(self.db, self.selQuery, [id_val], ascii=self.ascii_db) |
oldrow = SimpleSearch(self.db, self.selQuery, [id_val], ascii=self.ascii_db) |
#i = 0 |
#i = 0 |
Line 476 class xml_handler:
|
Line 551 class xml_handler:
|
# i += 1 |
# i += 1 |
if tuple(oldrow[0]) != tuple(args): |
if tuple(oldrow[0]) != tuple(args): |
# data has changed -- update |
# data has changed -- update |
|
if self.debug_data: |
self.logger.debug("really update: %s = %s"%(id_val, args)) |
self.logger.debug("really update: %s = %s"%(id_val, args)) |
args.append(id_val) # last arg is id |
args.append(id_val) # last arg is id |
SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db) |
SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db) |
|
|
else: |
else: |
# always update |
# always update |
|
if self.debug_data: |
self.logger.debug("update: %s = %s"%(id_val, args)) |
self.logger.debug("update: %s = %s"%(id_val, args)) |
args.append(id_val) # last arg is id |
args.append(id_val) # last arg is id |
SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db) |
SimpleSearch(self.db, self.updQuery, args, ascii=self.ascii_db) |
|
|
else: |
else: |
# create new row |
# create new row |
|
if self.debug_data: |
self.logger.debug("insert: %s"%args) |
self.logger.debug("insert: %s"%args) |
SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db) |
SimpleSearch(self.db, self.addQuery, args, ascii=self.ascii_db) |
|
|
Line 498 class xml_handler:
|
Line 576 class xml_handler:
|
|
|
return |
return |
|
|
def handle_col(self, end_condition): |
def handle_end_col(self): |
dispatcher = { |
|
(saxtools.START_ELEMENT, fm_ns, u'DATA'): |
|
self.handle_data_tag, |
|
} |
|
#print "START COL" |
|
yield None |
|
#delegate is a generator that handles all the events "within" |
|
#this element |
|
delegate = None |
|
while not self.event == end_condition: |
|
delegate = saxtools.tenorsax.event_loop_body( |
|
dispatcher, delegate, self.event) |
|
yield None |
|
#Element closed. Wrap up |
|
#print "END COL" |
|
self.colIdx += 1 |
self.colIdx += 1 |
return |
return |
|
|
def handle_data_tag(self, end_condition): |
|
|
def handle_data_tag(self, attrs): |
#print "START DATA" |
#print "START DATA" |
content = u'' |
self.content = u'' |
yield None |
# yield None |
# gather child elements |
# # gather child elements |
while not self.event == end_condition: |
# while not self.event == end_condition: |
if self.event[0] == saxtools.CHARACTER_DATA: |
# if self.event[0] == saxtools.CHARACTER_DATA: |
content += self.params |
# content += self.params |
yield None |
# yield None |
#Element closed. Wrap up |
# #Element closed. Wrap up |
fn = self.xml_field_names[self.colIdx] |
# fn = self.xml_field_names[self.colIdx] |
self.xml_data[fn] = content |
# self.xml_data[fn] = content |
return |
return |
|
|
|
|
Line 568 def importFMPXML(options):
|
Line 634 def importFMPXML(options):
|
#The "consumer" is our own handler |
#The "consumer" is our own handler |
consumer = xml_handler(options) |
consumer = xml_handler(options) |
#Initialize Tenorsax with handler |
#Initialize Tenorsax with handler |
handler = saxtools.tenorsax(consumer) |
#handler = saxtools.tenorsax(consumer) |
#Resulting tenorsax instance is the SAX handler |
#Resulting tenorsax instance is the SAX handler |
parser.setContentHandler(handler) |
parser.setContentHandler(consumer) |
parser.setFeature(sax.handler.feature_namespaces, 1) |
#parser.setFeature(sax.handler.feature_namespaces, 1) |
parser.parse(options.filename) |
parser.parse(options.filename) |
|
|
|
|
Line 618 if __name__ == "__main__":
|
Line 684 if __name__ == "__main__":
|
opars.add_option("-d", "--debug", default=False, action="store_true", |
opars.add_option("-d", "--debug", default=False, action="store_true", |
dest="debug", |
dest="debug", |
help="debug mode (more output)") |
help="debug mode (more output)") |
|
opars.add_option("--debug-data", default=False, action="store_true", |
|
dest="debug_data", |
|
help="debug mode for data (even more output)") |
|
|
(options, args) = opars.parse_args() |
(options, args) = opars.parse_args() |
|
|