File:  [Repository] / MPIWGWeb / Attic / updatePersonalWWW.py
Revision 1.1.2.6: download - view: text, annotated - select for diffs - revision graph
Tue Apr 28 13:35:17 2009 UTC (15 years, 1 month ago) by casties
Branches: r2
typo

try:
    import psycopg2 as psycopg
    psyco = 2
except:
    import psycopg
    psyco = 1
    
import logging

from xml import sax
from amara import saxtools

# namespace for FileMaker8
fm_ns = 'http://www.filemaker.com/fmpxmlresult'

# list of fields that are taken from XML and checked against DB as conflicts
#checkFields=['key','first_name','last_name','title','home_inst','current_work','e_mail2']
checkFields=['key','first_name','last_name','titles_new','home_inst','e_mail2']

def sql_quote(v):
    # quote dictionary
    quote_dict = {"\'": "''", "\\": "\\\\"}
    for dkey in quote_dict.keys():
        if v.find(dkey) >= 0:
            v=quote_dict[dkey].join(v.split(dkey))
    return "'%s'"%v

def SimpleSearch(curs,query, args=None):
    """execute sql query and return data"""
    logging.debug("executing: "+query)
    if psyco == 1:
        query = query.encode("UTF-8")
        #if args is not None:
        #    args = [ sql_quote(a) for a in args ]
    #logging.debug(query)
    #logging.debug(args)

    curs.execute(query, args)
    logging.debug("sql done")
    try:
        return curs.fetchall()
    except:
        return None
    
class xml_handler:
    
    def __init__(self):
        '''
        SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table.
        @param dsn: database connection string
        @param table: name of the table the xml shall be imported into
        '''
        
        # set up parser
        self.result={}
        self.event = None
        self.top_dispatcher = { 
            (saxtools.START_ELEMENT, fm_ns, u'METADATA'): 
            self.handle_meta_fields,
            (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'): 
            self.handle_data,
            }
        
        # connect database
      
      

      
        self.dbIDs = {}
        self.rowcnt = 0
             
 
      
        self.newDataset = []
        self.conflicts = []
        self.ok = []
        self.fieldNames=[]
        return

    def handle_meta_fields(self, end_condition):
        dispatcher = {
            (saxtools.START_ELEMENT, fm_ns, u'FIELD'):
            self.handle_meta_field,
            }
        #First round through the generator corresponds to the
        #start element event
        logging.debug("START METADATA")
        yield None
    
        #delegate is a generator that handles all the events "within"
        #this element
        delegate = None
        while not self.event == end_condition:
            delegate = saxtools.tenorsax.event_loop_body(
                dispatcher, delegate, self.event)
            yield None
        
        #Element closed. Wrap up
        logging.debug("END METADATA")
     
        self.update_fields = self.fieldNames
        
        logging.debug("xml-fieldnames:"+repr(self.fieldNames))
        # get list of fields in db table
     
        #print "upQ: ", self.updQuery
        #print "adQ: ", self.addQuery
                        
        return

    def handle_meta_field(self, end_condition):
        name = self.params.get((None, u'NAME'))
        yield None
        #Element closed.  Wrap up
        name=name.replace(" ","_")# make sure no spaces
        self.fieldNames.append(name)
        logging.debug("FIELD name: "+name)
        return

    def handle_data(self, end_condition):
        dispatcher = {
            (saxtools.START_ELEMENT, fm_ns, u'ROW'):
            self.handle_row,
            }
        #First round through the generator corresponds to the
        #start element event
        logging.debug("START RESULTSET")
        self.rowcnt = 0
        yield None
    
        #delegate is a generator that handles all the events "within"
        #this element
        delegate = None
        while not self.event == end_condition:
            delegate = saxtools.tenorsax.event_loop_body(
                dispatcher, delegate, self.event)
            yield None
        
        #Element closed.  Wrap up
        logging.debug("END RESULTSET")
      
        
  
        return

    def handle_row(self, end_condition):
        dispatcher = {
            (saxtools.START_ELEMENT, fm_ns, u'COL'):
            self.handle_col,
            }
        logging.debug("START ROW")
        self.dataSet = {}
        self.colIdx = 0
        yield None
    
        #delegate is a generator that handles all the events "within"
        #this element
        delegate = None
        while not self.event == end_condition:
            delegate = saxtools.tenorsax.event_loop_body(
                dispatcher, delegate, self.event)
            yield None
        
        #Element closed.  Wrap up
        logging.debug("END ROW")
        self.rowcnt += 1
        # process collected row data
        update=False
        id_val=''
        
        if self.result.has_key(self.dataSet['key']):
            logging.error("Key %s not unique"%self.dataSet['key'])
        
        self.result[self.dataSet['key']]=self.dataSet
      
       
        return

    def handle_col(self, end_condition):
        dispatcher = {
            (saxtools.START_ELEMENT, fm_ns, u'DATA'):
            self.handle_data_tag,
            }
        #print "START COL"
        yield None
        #delegate is a generator that handles all the events "within"
        #this element
        delegate = None
        while not self.event == end_condition:
            delegate = saxtools.tenorsax.event_loop_body(
                dispatcher, delegate, self.event)
            yield None
        #Element closed.  Wrap up
        #print "END COL"
        self.colIdx += 1
        return

    def handle_data_tag(self, end_condition):
        #print "START DATA"
        content = u''
        yield None
        # gather child elements
        while not self.event == end_condition:
            if self.event[0] == saxtools.CHARACTER_DATA:
                content += self.params
            yield None
        #Element closed.  Wrap up
        field = self.fieldNames[self.colIdx]
        self.dataSet[field.lower()] = content
        #print "  DATA(", field, ") ", repr(content)
        return


def checkImport(dsn,resultSet):
    #now connect to the database
    logging.info("dsn: %s"%dsn)
    dbCon = psycopg.connect(dsn)
    db = dbCon.cursor()
    
    
    qstr="select key from personal_www"
    
    results=SimpleSearch(db,qstr)
  
    keys=[]
    for x in results:
        if x[0]:
            keys.append(x[0].decode('utf-8'))
            
  
    #first step detect new entries and conflicts
    new=[]
    conflicts={}

    for x in resultSet.iterkeys():
       
        if x not in keys:
           
            new.append(x)
            
        else:
        
            conflict,ret=checkForConflicts(db,resultSet[x],x)
            if conflict:
                conflicts[x]=ret

    return new,conflicts

def importFMPXML(filename):
    '''
        method to import FileMaker XML file (FMPXMLRESULT format) into the table.
        @param filename: xmlfile filename
       
        '''
   
    parser = sax.make_parser()
    #The "consumer" is our own handler
    consumer = xml_handler()
    #Initialize Tenorsax with handler
    handler = saxtools.tenorsax(consumer)
    #Resulting tenorsax instance is the SAX handler 
    parser.setContentHandler(handler)
    parser.setFeature(sax.handler.feature_namespaces, 1)
    parser.parse(filename)  
    resultSet=consumer.result # xml now transformed into an dictionary
    
    return resultSet
  
    

def checkForConflicts(cursor,dataSet,key):
    
    ret=[]
    fields=",".join(checkFields)
    
    qstr="select %s from personal_www where key='%s'"%(fields,key)
    
  
    sr=SimpleSearch(cursor,qstr)
    
    if not sr:
        return True, None
    
    i=0
    retValue=False
    
    for checkField in checkFields:
        dbValueR=sr[0][i]
        if dbValueR:
            dbValue=dbValueR.decode('utf-8')
        else:
            dbValue=""
            
        setValue=dataSet[checkField]
        logging.debug( "             %s %s %s %s"%(repr(key),checkField,repr(dbValue),repr(setValue)))
        if dbValue.strip().rstrip()!=setValue.lstrip().rstrip():
            ret.append((checkField,dbValue,setValue))
            retValue=True
        i+=1
    
    return retValue,ret
    
    
##
## public static int main()
##

if __name__ == "__main__":
    

  
    loglevel = logging.DEBUG
   
    
    logging.basicConfig(level=loglevel, 
                        format='%(asctime)s %(levelname)s %(message)s',
                        datefmt='%H:%M:%S')
    
    resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml")
    news,conflicts=checkImport(dsn="dbname=personalwww host=xserve02a user=mysql password=e1nste1n", resultSet=resultSet)
    
    
    print "new"
    print len(news),news
    print "-----------"
    print "conflicts"
    print conflicts
    
#    update_fields = None
#    
#    if options.update_fields:
#        update_fields = [string.strip(s) for s in options.update_fields.split(',')]
#    
#    parser = sax.make_parser()
#    #The "consumer" is our own handler
#    consumer = xml_handler(dsn=options.dsn,table=options.table,
#                 update_fields=update_fields,id_field=options.id_field,
#                 sync_mode=options.sync_mode)
#    #Initialize Tenorsax with handler
#    handler = saxtools.tenorsax(consumer)
#    #Resulting tenorsax instance is the SAX handler 
#    parser.setContentHandler(handler)
#    parser.setFeature(sax.handler.feature_namespaces, 1)
#    parser.parse(options.filename)  
#    
#    
#    print "DONE!"

  

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>