Changeset 4:e9085ba2bb51 in documentViewer for extraFunction.py


Ignore:
Timestamp:
Jun 16, 2010, 4:38:17 PM (14 years ago)
Author:
casties
Branch:
modularisierung
Message:
  • first fix for timeout issues
  • cleaned logging calls
File:
1 edited

Legend:

Unmodified
Added
Removed
  • extraFunction.py

    r3 r4  
    5454        self.id=id
    5555        self.title=title
     56       
     57    def getHttpData(self, url, data=None, num_tries=3, timeout=40):
     58        """returns result from url+data HTTP request"""
     59        # we do GET (by appending data to url)
     60        if isinstance(data, str) or isinstance(data, unicode):
     61            # if data is string then append
     62            url = "%s?%s"%(url,data)
     63        else:
     64            # we assume its a dict
     65            url = "%s?%s"%(url,urllib.urlencode(data))
     66       
     67        response = None
     68        errmsg = None
     69        for cnt in range(num_tries):
     70            try:
     71                logging.debug("getHttpData(%s) url=%s"%(cnt+1,url))
     72                if sys.version_info < (2, 6):
     73                    # set timeout on socket -- ugly :-(
     74                    import socket
     75                    socket.setdefaulttimeout(timeout)
     76                    response = urllib2.urlopen(url)
     77                else:
     78                    response = urllib2.urlopen(url,timeout=timeout)
     79                # check result?
     80                break
     81            except urllib2.HTTPError, e:
     82                logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
     83                errmsg = str(e)
     84                # stop trying
     85                break
     86            except urllib2.URLError, e:
     87                logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
     88                errmsg = str(e)
     89                # stop trying
     90                #break
     91
     92        if response is not None:
     93            data = response.read()
     94            response.close()
     95            return data
     96       
     97        raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
     98        #return None
     99
     100               
    56101   
    57102    def getSearch(self, pn=1, pageinfo=None,  docinfo=None, query=None, queryType=None, lemma=None):
     
    72117        selfurl = self.absolute_url()
    73118       
    74         page = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery))
     119        data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery))
    75120        #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False)               
    76         data = page.read()
    77         page.close()
     121        #data = page.read()
     122        #page.close()
    78123       
    79124        pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url)
     
    139184        """get list of pages from fulltext and put in docinfo"""
    140185        xquery = '//pb'
    141         text = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
     186        text = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
    142187        #text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
    143188        docinfo['numPages'] = text.count("<pb ")
     
    162207            textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn)           
    163208       
    164         pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql",textParam)
     209        pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql",textParam)
    165210        """pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False)"""
    166211       
     
    227272    def getTranslate(self, query=None, language=None):
    228273        """translate into another languages"""
    229         pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
     274        data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
    230275        #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
    231         data = pagexml.read()
    232         pagexml.close()
     276        #data = pagexml.read()
     277        #pagexml.close()
    233278        return data
    234279   
    235280    def getLemma(self, lemma=None, language=None):
    236281        """simular words lemma """
    237         pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
     282        data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
    238283        #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
    239         data = pagexml.read()
    240         pagexml.close()
     284        #data = pagexml.read()
     285        #pagexml.close()
    241286        return data
    242287   
    243288    def getLemmaNew(self, query=None, language=None):
    244289        """simular words lemma """
    245        
    246         pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
     290        data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
    247291        #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
    248         data = pagexml.read()
    249         pagexml.close()
     292        #data = pagexml.read()
     293        #pagexml.close()
    250294        return data
    251295
     
    260304         tocDiv = None
    261305         
    262          pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn))
     306         pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn))
    263307         #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False)
    264308         pagedom = Parse(pagexml)
     
    290334        tocDiv = None
    291335       
    292         pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
     336        pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
    293337        #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
    294338        # post-processing downloaded xml
     
    318362        tocPN = pageinfo['tocPN'] 
    319363       
    320         pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) 
    321         data = pagexml.read()
    322         pagexml.close()
     364        data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) 
     365        #data = pagexml.read()
     366        #pagexml.close()
    323367
    324368        page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN))
Note: See TracChangeset for help on using the changeset viewer.