version 1.1.2.2, 2007/04/19 12:18:50
|
version 1.1.2.15, 2012/02/15 11:51:48
|
Line 1
|
Line 1
|
try: |
|
import psycopg2 as psycopg |
import psycopg2 as psycopg |
psyco = 2 |
psyco = 2 |
except: |
|
import psycopg |
|
psyco = 1 |
|
|
|
import logging |
import logging |
|
from MPIWGHelper import unicodify, utf8ify |
|
|
from xml import sax |
from xml import sax |
from amara import saxtools |
from xml.sax.handler import ContentHandler |
|
|
|
|
|
# namespace for FileMaker8 |
fm_ns = 'http://www.filemaker.com/fmpxmlresult' |
fm_ns = 'http://www.filemaker.com/fmpxmlresult' |
|
|
|
# list of fields that are taken from XML and checked against DB as conflicts |
#checkFields=['key','first_name','last_name','title','home_inst','current_work','e_mail2'] |
#checkFields=['key','first_name','last_name','title','home_inst','current_work','e_mail2'] |
checkFields=['key','first_name','last_name','title','home_inst','e_mail2'] |
checkFields=['key','first_name','last_name','title','titles_new','home_inst','current_work'] |
|
|
|
|
def sql_quote(v): |
def sql_quote(v): |
# quote dictionary |
# quote dictionary |
Line 40 def SimpleSearch(curs,query, args=None):
|
Line 40 def SimpleSearch(curs,query, args=None):
|
except: |
except: |
return None |
return None |
|
|
class xml_handler: |
class xml_handler(ContentHandler): |
|
|
def __init__(self): |
def __init__(self): |
''' |
''' |
Line 52 class xml_handler:
|
Line 52 class xml_handler:
|
# set up parser |
# set up parser |
self.result={} |
self.result={} |
self.event = None |
self.event = None |
self.top_dispatcher = { |
# self.top_dispatcher = { |
(saxtools.START_ELEMENT, fm_ns, u'METADATA'): |
# (saxtools.START_ELEMENT, fm_ns, u'METADATA'): |
self.handle_meta_fields, |
# self.handle_meta_fields, |
(saxtools.START_ELEMENT, fm_ns, u'RESULTSET'): |
# (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'): |
self.handle_data, |
# self.handle_data, |
} |
# } |
|
|
# connect database |
# connect database |
|
|
Line 67 class xml_handler:
|
Line 67 class xml_handler:
|
self.dbIDs = {} |
self.dbIDs = {} |
self.rowcnt = 0 |
self.rowcnt = 0 |
|
|
|
self.currentName=None |
|
|
self.newDataset = [] |
self.newDataset = [] |
self.conflicts = [] |
self.conflicts = [] |
self.ok = [] |
self.ok = [] |
self.fieldNames=[] |
self.fieldNames=[] |
|
self.currentRow={} |
|
self.currentTag="" |
return |
return |
|
|
def handle_meta_fields(self, end_condition): |
def startElement(self, name, attrs): |
dispatcher = { |
logging.debug(name) |
(saxtools.START_ELEMENT, fm_ns, u'FIELD'): |
if (name.lower() == "field") : |
self.handle_meta_field, |
self.handle_meta_fields(attrs) |
} |
if (name.lower() == "row") : |
#First round through the generator corresponds to the |
logging.debug("handleROW") |
#start element event |
self.currentRow={} # new Row |
logging.debug("START METADATA") |
self.currentData=0 |
yield None |
|
|
|
#delegate is a generator that handles all the events "within" |
|
#this element |
|
delegate = None |
|
while not self.event == end_condition: |
|
delegate = saxtools.tenorsax.event_loop_body( |
|
dispatcher, delegate, self.event) |
|
yield None |
|
|
|
#Element closed. Wrap up |
if (name.lower()=="data"): |
logging.debug("END METADATA") |
|
|
|
self.update_fields = self.fieldNames |
self.currentName=self.fieldNames[self.currentData] |
|
self.currentData+=1 |
|
self.currentTag="data" |
|
logging.debug("currentData"+str(self.currentData)) |
|
logging.debug("currentName"+str(self.currentName)) |
|
self.currentRow[self.currentName]="" #anlegen des eintrages |
|
|
logging.debug("xml-fieldnames:"+repr(self.fieldNames)) |
def endElement(self,name): |
# get list of fields in db table |
if (name.lower() == "data") : |
|
self.currentTag="" |
|
if (name.lower() == "row"): |
|
self.handle_end_row() |
|
|
#print "upQ: ", self.updQuery |
def characters(self,content): |
#print "adQ: ", self.addQuery |
|
|
|
return |
if self.currentName is not None: |
|
logging.debug(self.currentName+" "+content) |
|
contentTmp = self.currentRow.get(self.currentName,'') #gibt es schon einen Inhalt, dann dieses hinzufuegen (in einem Tag kann u.U. der characters handler mehrfach aufgerufen werden.) |
|
self.currentRow[self.currentName]=contentTmp+content; |
|
|
def handle_meta_field(self, end_condition): |
|
name = self.params.get((None, u'NAME')) |
|
yield None |
|
#Element closed. Wrap up |
|
name=name.replace(" ","_")# make sure no spaces |
|
self.fieldNames.append(name) |
|
logging.debug("FIELD name: "+name) |
|
return |
|
|
|
def handle_data(self, end_condition): |
def handle_end_row(self): |
dispatcher = { |
|
(saxtools.START_ELEMENT, fm_ns, u'ROW'): |
|
self.handle_row, |
|
} |
|
#First round through the generator corresponds to the |
|
#start element event |
|
logging.debug("START RESULTSET") |
|
self.rowcnt = 0 |
|
yield None |
|
|
|
#delegate is a generator that handles all the events "within" |
logging.debug("edd ROW") |
#this element |
|
delegate = None |
|
while not self.event == end_condition: |
|
delegate = saxtools.tenorsax.event_loop_body( |
|
dispatcher, delegate, self.event) |
|
yield None |
|
|
|
#Element closed. Wrap up |
if self.result.has_key(self.currentRow['key']): |
logging.debug("END RESULTSET") |
logging.error("Key %s not unique"%self.currentRow['key']) |
|
|
|
logging.debug("currentrow:"+self.currentName) |
|
logging.debug("currentname:"+self.currentRow['key']) |
|
|
|
if self.currentName is not None: |
|
self.result[self.currentRow['key']]=self.currentRow.copy() |
|
# |
|
# |
|
# return |
|
|
return |
def handle_meta_fields(self,attrs): |
|
|
def handle_row(self, end_condition): |
#First round through the generator corresponds to the |
dispatcher = { |
#start element event |
(saxtools.START_ELEMENT, fm_ns, u'COL'): |
logging.debug("START -FIELD") |
self.handle_col, |
name = attrs.get('NAME') |
} |
name=name.replace(" ","_")# make sure no spaces |
logging.debug("START ROW") |
self.fieldNames.append(name) |
self.dataSet = {} |
|
self.colIdx = 0 |
|
yield None |
|
|
|
#delegate is a generator that handles all the events "within" |
|
#this element |
|
delegate = None |
|
while not self.event == end_condition: |
|
delegate = saxtools.tenorsax.event_loop_body( |
|
dispatcher, delegate, self.event) |
|
yield None |
|
|
|
#Element closed. Wrap up |
|
logging.debug("END ROW") |
|
self.rowcnt += 1 |
|
# process collected row data |
|
update=False |
|
id_val='' |
|
|
|
if self.result.has_key(self.dataSet['key']): |
self.update_fields = self.fieldNames |
logging.error("Key %s not unique"%self.dataSet['key']) |
|
|
|
self.result[self.dataSet['key']]=self.dataSet |
logging.debug("xml-fieldnames:"+repr(self.fieldNames)) |
|
# get list of fields in db table |
|
|
|
#print "upQ: ", self.updQuery |
|
#print "adQ: ", self.addQuery |
|
|
return |
return |
|
|
def handle_col(self, end_condition): |
# def handle_meta_field(self, end_condition): |
dispatcher = { |
# name = self.params.get((None, u'NAME')) |
(saxtools.START_ELEMENT, fm_ns, u'DATA'): |
# yield None |
self.handle_data_tag, |
# #Element closed. Wrap up |
} |
# name=name.replace(" ","_")# make sure no spaces |
#print "START COL" |
# self.fieldNames.append(name) |
yield None |
# logging.debug("FIELD name: "+name) |
#delegate is a generator that handles all the events "within" |
# return |
#this element |
|
delegate = None |
# def handle_data(self, end_condition): |
while not self.event == end_condition: |
# dispatcher = { |
delegate = saxtools.tenorsax.event_loop_body( |
# (saxtools.START_ELEMENT, fm_ns, u'ROW'): |
dispatcher, delegate, self.event) |
# self.handle_row, |
yield None |
# } |
#Element closed. Wrap up |
# #First round through the generator corresponds to the |
#print "END COL" |
# #start element event |
self.colIdx += 1 |
# logging.debug("START RESULTSET") |
return |
# self.rowcnt = 0 |
|
# yield None |
|
# |
|
# #delegate is a generator that handles all the events "within" |
|
# #this element |
|
# delegate = None |
|
# while not self.event == end_condition: |
|
# delegate = saxtools.tenorsax.event_loop_body( |
|
# dispatcher, delegate, self.event) |
|
# yield None |
|
# |
|
# #Element closed. Wrap up |
|
# logging.debug("END RESULTSET") |
|
# |
|
# |
|
# |
|
# return |
|
|
def handle_data_tag(self, end_condition): |
# def handle_row(self, end_condition): |
#print "START DATA" |
# dispatcher = { |
content = u'' |
# (saxtools.START_ELEMENT, fm_ns, u'COL'): |
yield None |
# self.handle_col, |
# gather child elements |
# } |
while not self.event == end_condition: |
# logging.debug("START ROW") |
if self.event[0] == saxtools.CHARACTER_DATA: |
# self.dataSet = {} |
content += self.params |
# self.colIdx = 0 |
yield None |
# yield None |
#Element closed. Wrap up |
# |
field = self.fieldNames[self.colIdx] |
# #delegate is a generator that handles all the events "within" |
self.dataSet[field.lower()] = content |
# #this element |
#print " DATA(", field, ") ", repr(content) |
# delegate = None |
return |
# while not self.event == end_condition: |
|
# delegate = saxtools.tenorsax.event_loop_body( |
|
# dispatcher, delegate, self.event) |
|
# yield None |
|
# |
|
# #Element closed. Wrap up |
|
# logging.debug("END ROW") |
|
# self.rowcnt += 1 |
|
# # process collected row data |
|
# update=False |
|
# id_val='' |
|
# |
|
# if self.result.has_key(self.dataSet['key']): |
|
# logging.error("Key %s not unique"%self.dataSet['key']) |
|
# |
|
# self.result[self.dataSet['key']]=self.dataSet |
|
# |
|
# |
|
# return |
|
|
|
# def handle_col(self, end_condition): |
|
# dispatcher = { |
|
# (saxtools.START_ELEMENT, fm_ns, u'DATA'): |
|
# self.handle_data_tag, |
|
# } |
|
# #print "START COL" |
|
# yield None |
|
# #delegate is a generator that handles all the events "within" |
|
# #this element |
|
# delegate = None |
|
# while not self.event == end_condition: |
|
# delegate = saxtools.tenorsax.event_loop_body( |
|
# dispatcher, delegate, self.event) |
|
# yield None |
|
# #Element closed. Wrap up |
|
# #print "END COL" |
|
# self.colIdx += 1 |
|
# return |
|
# |
|
# def handle_data_tag(self, end_condition): |
|
# #print "START DATA" |
|
# content = u'' |
|
# yield None |
|
# # gather child elements |
|
# while not self.event == end_condition: |
|
# if self.event[0] == saxtools.CHARACTER_DATA: |
|
# content += self.params |
|
# yield None |
|
# #Element closed. Wrap up |
|
# field = self.fieldNames[self.colIdx] |
|
# self.dataSet[field.lower()] = content |
|
# #print " DATA(", field, ") ", repr(content) |
|
# return |
|
|
|
|
def checkImport(dsn,resultSet): |
def checkImport(dsn,resultSet): |
#now connect to the database |
#now connect to the database |
|
logging.info("dsn: %s"%dsn) |
dbCon = psycopg.connect(dsn) |
dbCon = psycopg.connect(dsn) |
db = dbCon.cursor() |
db = dbCon.cursor() |
|
|
Line 222 def checkImport(dsn,resultSet):
|
Line 262 def checkImport(dsn,resultSet):
|
keys=[] |
keys=[] |
for x in results: |
for x in results: |
if x[0]: |
if x[0]: |
keys.append(x[0].decode('utf-8')) |
keys.append(unicodify(x[0])) |
|
|
|
|
#first step detect new entries and conflicts |
#first step detect new entries and conflicts |
Line 254 def importFMPXML(filename):
|
Line 294 def importFMPXML(filename):
|
#The "consumer" is our own handler |
#The "consumer" is our own handler |
consumer = xml_handler() |
consumer = xml_handler() |
#Initialize Tenorsax with handler |
#Initialize Tenorsax with handler |
handler = saxtools.tenorsax(consumer) |
#handler = saxtools.tenorsax(consumer) |
#Resulting tenorsax instance is the SAX handler |
#Resulting tenorsax instance is the SAX handler |
parser.setContentHandler(handler) |
parser.setContentHandler(consumer) |
parser.setFeature(sax.handler.feature_namespaces, 1) |
#parser.setFeature(sax.handler.feature_namespaces, 1) |
parser.parse(filename) |
parser.parse(filename) |
resultSet=consumer.result # xml now transformed into an dictionary |
resultSet=consumer.result # xml now transformed into an dictionary |
|
|
Line 284 def checkForConflicts(cursor,dataSet,key
|
Line 324 def checkForConflicts(cursor,dataSet,key
|
for checkField in checkFields: |
for checkField in checkFields: |
dbValueR=sr[0][i] |
dbValueR=sr[0][i] |
if dbValueR: |
if dbValueR: |
dbValue=dbValueR.decode('utf-8') |
dbValue=unicodify(dbValueR) |
else: |
else: |
dbValue="" |
dbValue="" |
|
|
|
if checkField in dataSet: |
setValue=dataSet[checkField] |
setValue=dataSet[checkField] |
logging.debug( " %s %s %s %s"%(repr(key),checkField,repr(dbValue),repr(setValue))) |
logging.debug( " %s %s %s %s"%(repr(key),checkField,repr(dbValue),repr(setValue))) |
if dbValue.strip().rstrip()!=setValue.lstrip().rstrip(): |
if dbValue.strip().rstrip()!=setValue.lstrip().rstrip(): |
ret.append((checkField,dbValue,setValue)) |
ret.append((checkField,dbValue,setValue)) |
retValue=True |
retValue=True |
|
|
|
else: |
|
logging.warning("unknown field %s in data file!"%checkField) |
|
|
i+=1 |
i+=1 |
|
|
return retValue,ret |
return retValue,ret |
Line 314 if __name__ == "__main__":
|
Line 359 if __name__ == "__main__":
|
datefmt='%H:%M:%S') |
datefmt='%H:%M:%S') |
|
|
resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml") |
resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml") |
news,conflicts=checkImport(dsn="dbname=personalwww host=xserve02a user=mysql password=e1nste1n", resultSet=resultSet) |
news,conflicts=checkImport(dsn="dbname=personalwww user=www password=e1nste1n", resultSet=resultSet) |
|
|
|
|
print "new" |
print "new" |