version 1.1, 2007/04/18 10:03:13
|
version 1.1.2.5, 2009/04/28 13:30:38
|
Line 0
|
Line 1
|
|
try: |
|
import psycopg2 as psycopg |
|
psyco = 2 |
|
except: |
|
import psycopg |
|
psyco = 1 |
|
|
|
import logging |
|
|
|
from xml import sax |
|
from amara import saxtools |
|
|
|
# namespace for FileMaker8 |
|
fm_ns = 'http://www.filemaker.com/fmpxmlresult' |
|
|
|
# list of fields that are taken from XML and checked against DB as conflicts |
|
#checkFields=['key','first_name','last_name','title','home_inst','current_work','e_mail2'] |
|
checkFields=['key','first_name','last_name','title_new','home_inst','e_mail2'] |
|
|
|
def sql_quote(v): |
|
# quote dictionary |
|
quote_dict = {"\'": "''", "\\": "\\\\"} |
|
for dkey in quote_dict.keys(): |
|
if v.find(dkey) >= 0: |
|
v=quote_dict[dkey].join(v.split(dkey)) |
|
return "'%s'"%v |
|
|
|
def SimpleSearch(curs,query, args=None): |
|
"""execute sql query and return data""" |
|
logging.debug("executing: "+query) |
|
if psyco == 1: |
|
query = query.encode("UTF-8") |
|
#if args is not None: |
|
# args = [ sql_quote(a) for a in args ] |
|
#logging.debug(query) |
|
#logging.debug(args) |
|
|
|
curs.execute(query, args) |
|
logging.debug("sql done") |
|
try: |
|
return curs.fetchall() |
|
except: |
|
return None |
|
|
|
class xml_handler: |
|
|
|
def __init__(self): |
|
''' |
|
SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table. |
|
@param dsn: database connection string |
|
@param table: name of the table the xml shall be imported into |
|
''' |
|
|
|
# set up parser |
|
self.result={} |
|
self.event = None |
|
self.top_dispatcher = { |
|
(saxtools.START_ELEMENT, fm_ns, u'METADATA'): |
|
self.handle_meta_fields, |
|
(saxtools.START_ELEMENT, fm_ns, u'RESULTSET'): |
|
self.handle_data, |
|
} |
|
|
|
# connect database |
|
|
|
|
|
|
|
|
|
self.dbIDs = {} |
|
self.rowcnt = 0 |
|
|
|
|
|
|
|
self.newDataset = [] |
|
self.conflicts = [] |
|
self.ok = [] |
|
self.fieldNames=[] |
|
return |
|
|
|
def handle_meta_fields(self, end_condition): |
|
dispatcher = { |
|
(saxtools.START_ELEMENT, fm_ns, u'FIELD'): |
|
self.handle_meta_field, |
|
} |
|
#First round through the generator corresponds to the |
|
#start element event |
|
logging.debug("START METADATA") |
|
yield None |
|
|
|
#delegate is a generator that handles all the events "within" |
|
#this element |
|
delegate = None |
|
while not self.event == end_condition: |
|
delegate = saxtools.tenorsax.event_loop_body( |
|
dispatcher, delegate, self.event) |
|
yield None |
|
|
|
#Element closed. Wrap up |
|
logging.debug("END METADATA") |
|
|
|
self.update_fields = self.fieldNames |
|
|
|
logging.debug("xml-fieldnames:"+repr(self.fieldNames)) |
|
# get list of fields in db table |
|
|
|
#print "upQ: ", self.updQuery |
|
#print "adQ: ", self.addQuery |
|
|
|
return |
|
|
|
def handle_meta_field(self, end_condition): |
|
name = self.params.get((None, u'NAME')) |
|
yield None |
|
#Element closed. Wrap up |
|
name=name.replace(" ","_")# make sure no spaces |
|
self.fieldNames.append(name) |
|
logging.debug("FIELD name: "+name) |
|
return |
|
|
|
def handle_data(self, end_condition): |
|
dispatcher = { |
|
(saxtools.START_ELEMENT, fm_ns, u'ROW'): |
|
self.handle_row, |
|
} |
|
#First round through the generator corresponds to the |
|
#start element event |
|
logging.debug("START RESULTSET") |
|
self.rowcnt = 0 |
|
yield None |
|
|
|
#delegate is a generator that handles all the events "within" |
|
#this element |
|
delegate = None |
|
while not self.event == end_condition: |
|
delegate = saxtools.tenorsax.event_loop_body( |
|
dispatcher, delegate, self.event) |
|
yield None |
|
|
|
#Element closed. Wrap up |
|
logging.debug("END RESULTSET") |
|
|
|
|
|
|
|
return |
|
|
|
def handle_row(self, end_condition): |
|
dispatcher = { |
|
(saxtools.START_ELEMENT, fm_ns, u'COL'): |
|
self.handle_col, |
|
} |
|
logging.debug("START ROW") |
|
self.dataSet = {} |
|
self.colIdx = 0 |
|
yield None |
|
|
|
#delegate is a generator that handles all the events "within" |
|
#this element |
|
delegate = None |
|
while not self.event == end_condition: |
|
delegate = saxtools.tenorsax.event_loop_body( |
|
dispatcher, delegate, self.event) |
|
yield None |
|
|
|
#Element closed. Wrap up |
|
logging.debug("END ROW") |
|
self.rowcnt += 1 |
|
# process collected row data |
|
update=False |
|
id_val='' |
|
|
|
if self.result.has_key(self.dataSet['key']): |
|
logging.error("Key %s not unique"%self.dataSet['key']) |
|
|
|
self.result[self.dataSet['key']]=self.dataSet |
|
|
|
|
|
return |
|
|
|
def handle_col(self, end_condition): |
|
dispatcher = { |
|
(saxtools.START_ELEMENT, fm_ns, u'DATA'): |
|
self.handle_data_tag, |
|
} |
|
#print "START COL" |
|
yield None |
|
#delegate is a generator that handles all the events "within" |
|
#this element |
|
delegate = None |
|
while not self.event == end_condition: |
|
delegate = saxtools.tenorsax.event_loop_body( |
|
dispatcher, delegate, self.event) |
|
yield None |
|
#Element closed. Wrap up |
|
#print "END COL" |
|
self.colIdx += 1 |
|
return |
|
|
|
def handle_data_tag(self, end_condition): |
|
#print "START DATA" |
|
content = u'' |
|
yield None |
|
# gather child elements |
|
while not self.event == end_condition: |
|
if self.event[0] == saxtools.CHARACTER_DATA: |
|
content += self.params |
|
yield None |
|
#Element closed. Wrap up |
|
field = self.fieldNames[self.colIdx] |
|
self.dataSet[field.lower()] = content |
|
#print " DATA(", field, ") ", repr(content) |
|
return |
|
|
|
|
|
def checkImport(dsn,resultSet): |
|
#now connect to the database |
|
logging.info("dsn: %s"%dsn) |
|
dbCon = psycopg.connect(dsn) |
|
db = dbCon.cursor() |
|
|
|
|
|
qstr="select key from personal_www" |
|
|
|
results=SimpleSearch(db,qstr) |
|
|
|
keys=[] |
|
for x in results: |
|
if x[0]: |
|
keys.append(x[0].decode('utf-8')) |
|
|
|
|
|
#first step detect new entries and conflicts |
|
new=[] |
|
conflicts={} |
|
|
|
for x in resultSet.iterkeys(): |
|
|
|
if x not in keys: |
|
|
|
new.append(x) |
|
|
|
else: |
|
|
|
conflict,ret=checkForConflicts(db,resultSet[x],x) |
|
if conflict: |
|
conflicts[x]=ret |
|
|
|
return new,conflicts |
|
|
|
def importFMPXML(filename): |
|
''' |
|
method to import FileMaker XML file (FMPXMLRESULT format) into the table. |
|
@param filename: xmlfile filename |
|
|
|
''' |
|
|
|
parser = sax.make_parser() |
|
#The "consumer" is our own handler |
|
consumer = xml_handler() |
|
#Initialize Tenorsax with handler |
|
handler = saxtools.tenorsax(consumer) |
|
#Resulting tenorsax instance is the SAX handler |
|
parser.setContentHandler(handler) |
|
parser.setFeature(sax.handler.feature_namespaces, 1) |
|
parser.parse(filename) |
|
resultSet=consumer.result # xml now transformed into an dictionary |
|
|
|
return resultSet |
|
|
|
|
|
|
|
def checkForConflicts(cursor,dataSet,key): |
|
|
|
ret=[] |
|
fields=",".join(checkFields) |
|
|
|
qstr="select %s from personal_www where key='%s'"%(fields,key) |
|
|
|
|
|
sr=SimpleSearch(cursor,qstr) |
|
|
|
if not sr: |
|
return True, None |
|
|
|
i=0 |
|
retValue=False |
|
|
|
for checkField in checkFields: |
|
dbValueR=sr[0][i] |
|
if dbValueR: |
|
dbValue=dbValueR.decode('utf-8') |
|
else: |
|
dbValue="" |
|
|
|
setValue=dataSet[checkField] |
|
logging.debug( " %s %s %s %s"%(repr(key),checkField,repr(dbValue),repr(setValue))) |
|
if dbValue.strip().rstrip()!=setValue.lstrip().rstrip(): |
|
ret.append((checkField,dbValue,setValue)) |
|
retValue=True |
|
i+=1 |
|
|
|
return retValue,ret |
|
|
|
|
|
## |
|
## public static int main() |
|
## |
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
|
|
loglevel = logging.DEBUG |
|
|
|
|
|
logging.basicConfig(level=loglevel, |
|
format='%(asctime)s %(levelname)s %(message)s', |
|
datefmt='%H:%M:%S') |
|
|
|
resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml") |
|
news,conflicts=checkImport(dsn="dbname=personalwww host=xserve02a user=mysql password=e1nste1n", resultSet=resultSet) |
|
|
|
|
|
print "new" |
|
print len(news),news |
|
print "-----------" |
|
print "conflicts" |
|
print conflicts |
|
|
|
# update_fields = None |
|
# |
|
# if options.update_fields: |
|
# update_fields = [string.strip(s) for s in options.update_fields.split(',')] |
|
# |
|
# parser = sax.make_parser() |
|
# #The "consumer" is our own handler |
|
# consumer = xml_handler(dsn=options.dsn,table=options.table, |
|
# update_fields=update_fields,id_field=options.id_field, |
|
# sync_mode=options.sync_mode) |
|
# #Initialize Tenorsax with handler |
|
# handler = saxtools.tenorsax(consumer) |
|
# #Resulting tenorsax instance is the SAX handler |
|
# parser.setContentHandler(handler) |
|
# parser.setFeature(sax.handler.feature_namespaces, 1) |
|
# parser.parse(options.filename) |
|
# |
|
# |
|
# print "DONE!" |
|
|
|
|
|
|