--- MPIWGWeb/Attic/updatePersonalWWW.py 2012/01/09 07:33:31 1.1.2.11 +++ MPIWGWeb/Attic/updatePersonalWWW.py 2012/01/10 12:37:03 1.1.2.12 @@ -5,7 +5,8 @@ import logging from MPIWGHelper import unicodify, utf8ify from xml import sax -from amara import saxtools +from xml.sax.handler import ContentHandler +#from amara import saxtools # namespace for FileMaker8 fm_ns = 'http://www.filemaker.com/fmpxmlresult' @@ -40,7 +41,7 @@ def SimpleSearch(curs,query, args=None): except: return None -class xml_handler: +class xml_handler(ContentHandler): def __init__(self): ''' @@ -52,12 +53,12 @@ class xml_handler: # set up parser self.result={} self.event = None - self.top_dispatcher = { - (saxtools.START_ELEMENT, fm_ns, u'METADATA'): - self.handle_meta_fields, - (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'): - self.handle_data, - } +# self.top_dispatcher = { +# (saxtools.START_ELEMENT, fm_ns, u'METADATA'): +# self.handle_meta_fields, +# (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'): +# self.handle_data, +# } # connect database @@ -67,34 +68,66 @@ class xml_handler: self.dbIDs = {} self.rowcnt = 0 - + self.currentName=None self.newDataset = [] self.conflicts = [] self.ok = [] self.fieldNames=[] + self.currentRow={} + self.currentTag="" return - def handle_meta_fields(self, end_condition): - dispatcher = { - (saxtools.START_ELEMENT, fm_ns, u'FIELD'): - self.handle_meta_field, - } + def startElement(self, name, attrs): + logging.debug(name) + if (name.lower() == "field") : + self.handle_meta_fields(attrs) + if (name.lower() == "row") : + logging.debug("handleROW") + self.currentRow={} # new Row + self.currentData=0 + + + if (name.lower()=="data"): + + self.currentName=self.fieldNames[self.currentData] + self.currentData+=1 + self.currentTag="data" + + def endElement(self,name): + if (name.lower() == "data") : + self.currentTag="" + if (name.lower() == "row"): + self.handle_end_row() + + def characters(self,content): + + if self.currentName is not None: + logging.debug(self.currentName+" "+content) + self.currentRow[self.currentName]=content; + + + def handle_end_row(self): + + logging.debug("edd ROW") + + if self.result.has_key(self.currentRow['key']): + logging.error("Key %s not unique"%self.currentRow['key']) + + if self.currentName is not None: + self.result[self.currentRow['key']]=self.currentRow.copy() +# +# +# return + + def handle_meta_fields(self,attrs): + #First round through the generator corresponds to the #start element event - logging.debug("START METADATA") - yield None - - #delegate is a generator that handles all the events "within" - #this element - delegate = None - while not self.event == end_condition: - delegate = saxtools.tenorsax.event_loop_body( - dispatcher, delegate, self.event) - yield None - - #Element closed. Wrap up - logging.debug("END METADATA") + logging.debug("START -FIELD") + name = attrs.get('NAME') + name=name.replace(" ","_")# make sure no spaces + self.fieldNames.append(name) self.update_fields = self.fieldNames @@ -106,107 +139,107 @@ class xml_handler: return - def handle_meta_field(self, end_condition): - name = self.params.get((None, u'NAME')) - yield None - #Element closed. Wrap up - name=name.replace(" ","_")# make sure no spaces - self.fieldNames.append(name) - logging.debug("FIELD name: "+name) - return - - def handle_data(self, end_condition): - dispatcher = { - (saxtools.START_ELEMENT, fm_ns, u'ROW'): - self.handle_row, - } - #First round through the generator corresponds to the - #start element event - logging.debug("START RESULTSET") - self.rowcnt = 0 - yield None - - #delegate is a generator that handles all the events "within" - #this element - delegate = None - while not self.event == end_condition: - delegate = saxtools.tenorsax.event_loop_body( - dispatcher, delegate, self.event) - yield None - - #Element closed. Wrap up - logging.debug("END RESULTSET") - - - - return - - def handle_row(self, end_condition): - dispatcher = { - (saxtools.START_ELEMENT, fm_ns, u'COL'): - self.handle_col, - } - logging.debug("START ROW") - self.dataSet = {} - self.colIdx = 0 - yield None - - #delegate is a generator that handles all the events "within" - #this element - delegate = None - while not self.event == end_condition: - delegate = saxtools.tenorsax.event_loop_body( - dispatcher, delegate, self.event) - yield None - - #Element closed. Wrap up - logging.debug("END ROW") - self.rowcnt += 1 - # process collected row data - update=False - id_val='' - - if self.result.has_key(self.dataSet['key']): - logging.error("Key %s not unique"%self.dataSet['key']) - - self.result[self.dataSet['key']]=self.dataSet - - - return - - def handle_col(self, end_condition): - dispatcher = { - (saxtools.START_ELEMENT, fm_ns, u'DATA'): - self.handle_data_tag, - } - #print "START COL" - yield None - #delegate is a generator that handles all the events "within" - #this element - delegate = None - while not self.event == end_condition: - delegate = saxtools.tenorsax.event_loop_body( - dispatcher, delegate, self.event) - yield None - #Element closed. Wrap up - #print "END COL" - self.colIdx += 1 - return - - def handle_data_tag(self, end_condition): - #print "START DATA" - content = u'' - yield None - # gather child elements - while not self.event == end_condition: - if self.event[0] == saxtools.CHARACTER_DATA: - content += self.params - yield None - #Element closed. Wrap up - field = self.fieldNames[self.colIdx] - self.dataSet[field.lower()] = content - #print " DATA(", field, ") ", repr(content) - return +# def handle_meta_field(self, end_condition): +# name = self.params.get((None, u'NAME')) +# yield None +# #Element closed. Wrap up +# name=name.replace(" ","_")# make sure no spaces +# self.fieldNames.append(name) +# logging.debug("FIELD name: "+name) +# return + +# def handle_data(self, end_condition): +# dispatcher = { +# (saxtools.START_ELEMENT, fm_ns, u'ROW'): +# self.handle_row, +# } +# #First round through the generator corresponds to the +# #start element event +# logging.debug("START RESULTSET") +# self.rowcnt = 0 +# yield None +# +# #delegate is a generator that handles all the events "within" +# #this element +# delegate = None +# while not self.event == end_condition: +# delegate = saxtools.tenorsax.event_loop_body( +# dispatcher, delegate, self.event) +# yield None +# +# #Element closed. Wrap up +# logging.debug("END RESULTSET") +# +# +# +# return + +# def handle_row(self, end_condition): +# dispatcher = { +# (saxtools.START_ELEMENT, fm_ns, u'COL'): +# self.handle_col, +# } +# logging.debug("START ROW") +# self.dataSet = {} +# self.colIdx = 0 +# yield None +# +# #delegate is a generator that handles all the events "within" +# #this element +# delegate = None +# while not self.event == end_condition: +# delegate = saxtools.tenorsax.event_loop_body( +# dispatcher, delegate, self.event) +# yield None +# +# #Element closed. Wrap up +# logging.debug("END ROW") +# self.rowcnt += 1 +# # process collected row data +# update=False +# id_val='' +# +# if self.result.has_key(self.dataSet['key']): +# logging.error("Key %s not unique"%self.dataSet['key']) +# +# self.result[self.dataSet['key']]=self.dataSet +# +# +# return + +# def handle_col(self, end_condition): +# dispatcher = { +# (saxtools.START_ELEMENT, fm_ns, u'DATA'): +# self.handle_data_tag, +# } +# #print "START COL" +# yield None +# #delegate is a generator that handles all the events "within" +# #this element +# delegate = None +# while not self.event == end_condition: +# delegate = saxtools.tenorsax.event_loop_body( +# dispatcher, delegate, self.event) +# yield None +# #Element closed. Wrap up +# #print "END COL" +# self.colIdx += 1 +# return +# +# def handle_data_tag(self, end_condition): +# #print "START DATA" +# content = u'' +# yield None +# # gather child elements +# while not self.event == end_condition: +# if self.event[0] == saxtools.CHARACTER_DATA: +# content += self.params +# yield None +# #Element closed. Wrap up +# field = self.fieldNames[self.colIdx] +# self.dataSet[field.lower()] = content +# #print " DATA(", field, ") ", repr(content) +# return def checkImport(dsn,resultSet): @@ -255,10 +288,10 @@ def importFMPXML(filename): #The "consumer" is our own handler consumer = xml_handler() #Initialize Tenorsax with handler - handler = saxtools.tenorsax(consumer) + #handler = saxtools.tenorsax(consumer) #Resulting tenorsax instance is the SAX handler - parser.setContentHandler(handler) - parser.setFeature(sax.handler.feature_namespaces, 1) + parser.setContentHandler(consumer) + #parser.setFeature(sax.handler.feature_namespaces, 1) parser.parse(filename) resultSet=consumer.result # xml now transformed into an dictionary @@ -320,7 +353,7 @@ if __name__ == "__main__": datefmt='%H:%M:%S') resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml") - news,conflicts=checkImport(dsn="dbname=personalwww host=xserve02a user=mysql password=e1nste1n", resultSet=resultSet) + news,conflicts=checkImport(dsn="dbname=personalwww user=www password=e1nste1n", resultSet=resultSet) print "new"