Changeset 4:e9085ba2bb51 in documentViewer


Ignore:
Timestamp:
Jun 16, 2010, 4:38:17 PM (14 years ago)
Author:
casties
Branch:
modularisierung
Message:
  • first fix for timeout issues
  • cleaned logging calls
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • documentViewer.py

    r2 r4  
    267267               
    268268        # quote values and assemble into query string
    269         logging.info("XYXXXXX: %s"%repr(params.items()))
     269        logging.debug("XYXXXXX: %s"%repr(params.items()))
    270270        ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
    271271        url=self.REQUEST['URL1']+"?"+ps
     
    286286        """returns if access to the resource is granted"""
    287287        access = docinfo.get('accessType', None)
    288         logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access)
     288        logging.debug("documentViewer (accessOK) access type %s"%access)
    289289        if access is not None and access == 'free':
    290             logger("documentViewer (accessOK)", logging.INFO, "access is free")
     290            logging.debug("documentViewer (accessOK) access is free")
    291291            return True
    292292        elif access is None or access in self.authgroups:
     
    299299                return False
    300300       
    301         logger("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access)
     301        logging.debug("documentViewer (accessOK) unknown access type %s"%access)
    302302        return False
    303303   
     
    315315        infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
    316316   
    317         logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl))
     317        logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
    318318       
    319319        for cnt in range(num_retries):
     
    324324                break
    325325            except:
    326                 logger("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt))
     326                logging.error("documentViewer (getdirinfofromdigilib) error reading %s (try %d)"%(infoUrl,cnt))
    327327        else:
    328328            raise IOError("Unable to get dir-info from %s"%(infoUrl))
    329329       
    330330        sizes=dom.xpath("//dir/size")
    331         logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes)
     331        logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
    332332       
    333333        if sizes:
     
    364364                break
    365365            except:
    366                 logger("ERROR documentViewer (getIndexMeta)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
     366                logging.error("ERROR documentViewer (getIndexMeta) %s (%s)"%sys.exc_info()[0:2])
    367367               
    368368        if dom is None:
     
    392392                break
    393393            except:
    394                 logger("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
     394                logging.error("ERROR documentViewer (getPresentationInfoXML) %s (%s)"%sys.exc_info()[0:2])
    395395               
    396396        if dom is None:
     
    402402    def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
    403403        """gets authorization info from the index.meta file at path or given by dom"""
    404         logger("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path))
     404        logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
    405405       
    406406        access = None
     
    478478    def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
    479479        """parse texttool tag in index meta"""
    480         logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url))
     480        logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
    481481        if docinfo is None:
    482482           docinfo = {}
     
    493493            archiveName = getTextFromNode(archiveNames[0])
    494494        else:
    495             logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/name missing in: %s" % (url))
     495            logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
    496496       
    497497        archivePaths = dom.xpath("//resource/archive-path")
     
    505505        else:
    506506            # try to get archive-path from url
    507             logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/archive-path missing in: %s" % (url))
     507            logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
    508508            if (not url.startswith('http')):
    509509                archivePath = url.replace('index.meta', '')
     
    597597    def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
    598598        """path ist the path to the images it assumes that the index.meta file is one level higher."""
    599         logger("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path))
     599        logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
    600600        if docinfo is None:
    601601            docinfo = {}
     
    607607        for x in range(cut):       
    608608                path=getParentDir(path)
    609         logging.error("PATH:"+path)
     609        logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
    610610        imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
    611611        docinfo['imageURL'] = imageUrl
     
    619619    def getDocinfo(self, mode, url):
    620620        """returns docinfo depending on mode"""
    621         logger("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url))
     621        logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
    622622        # look for cached docinfo in session
    623623        if self.REQUEST.SESSION.has_key('docinfo'):
     
    625625            # check if its still current
    626626            if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
    627                 logger("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo)
     627                logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
    628628                return docinfo
    629629        # new docinfo
     
    636636            docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
    637637        else:
    638             logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!")
     638            logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
    639639            raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
    640640                       
    641         logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo)
     641        logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
    642642        self.REQUEST.SESSION['docinfo'] = docinfo
    643643        return docinfo
  • extraFunction.py

    r3 r4  
    5454        self.id=id
    5555        self.title=title
     56       
     57    def getHttpData(self, url, data=None, num_tries=3, timeout=40):
     58        """returns result from url+data HTTP request"""
     59        # we do GET (by appending data to url)
     60        if isinstance(data, str) or isinstance(data, unicode):
     61            # if data is string then append
     62            url = "%s?%s"%(url,data)
     63        else:
     64            # we assume its a dict
     65            url = "%s?%s"%(url,urllib.urlencode(data))
     66       
     67        response = None
     68        errmsg = None
     69        for cnt in range(num_tries):
     70            try:
     71                logging.debug("getHttpData(%s) url=%s"%(cnt+1,url))
     72                if sys.version_info < (2, 6):
     73                    # set timeout on socket -- ugly :-(
     74                    import socket
     75                    socket.setdefaulttimeout(timeout)
     76                    response = urllib2.urlopen(url)
     77                else:
     78                    response = urllib2.urlopen(url,timeout=timeout)
     79                # check result?
     80                break
     81            except urllib2.HTTPError, e:
     82                logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
     83                errmsg = str(e)
     84                # stop trying
     85                break
     86            except urllib2.URLError, e:
     87                logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
     88                errmsg = str(e)
     89                # stop trying
     90                #break
     91
     92        if response is not None:
     93            data = response.read()
     94            response.close()
     95            return data
     96       
     97        raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
     98        #return None
     99
     100               
    56101   
    57102    def getSearch(self, pn=1, pageinfo=None,  docinfo=None, query=None, queryType=None, lemma=None):
     
    72117        selfurl = self.absolute_url()
    73118       
    74         page = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery))
     119        data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery))
    75120        #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False)               
    76         data = page.read()
    77         page.close()
     121        #data = page.read()
     122        #page.close()
    78123       
    79124        pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url)
     
    139184        """get list of pages from fulltext and put in docinfo"""
    140185        xquery = '//pb'
    141         text = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
     186        text = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
    142187        #text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
    143188        docinfo['numPages'] = text.count("<pb ")
     
    162207            textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn)           
    163208       
    164         pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql",textParam)
     209        pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql",textParam)
    165210        """pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False)"""
    166211       
     
    227272    def getTranslate(self, query=None, language=None):
    228273        """translate into another languages"""
    229         pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
     274        data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
    230275        #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
    231         data = pagexml.read()
    232         pagexml.close()
     276        #data = pagexml.read()
     277        #pagexml.close()
    233278        return data
    234279   
    235280    def getLemma(self, lemma=None, language=None):
    236281        """simular words lemma """
    237         pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
     282        data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
    238283        #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
    239         data = pagexml.read()
    240         pagexml.close()
     284        #data = pagexml.read()
     285        #pagexml.close()
    241286        return data
    242287   
    243288    def getLemmaNew(self, query=None, language=None):
    244289        """simular words lemma """
    245        
    246         pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
     290        data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
    247291        #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
    248         data = pagexml.read()
    249         pagexml.close()
     292        #data = pagexml.read()
     293        #pagexml.close()
    250294        return data
    251295
     
    260304         tocDiv = None
    261305         
    262          pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn))
     306         pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn))
    263307         #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False)
    264308         pagedom = Parse(pagexml)
     
    290334        tocDiv = None
    291335       
    292         pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
     336        pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
    293337        #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
    294338        # post-processing downloaded xml
     
    318362        tocPN = pageinfo['tocPN'] 
    319363       
    320         pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) 
    321         data = pagexml.read()
    322         pagexml.close()
     364        data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) 
     365        #data = pagexml.read()
     366        #pagexml.close()
    323367
    324368        page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN))
Note: See TracChangeset for help on using the changeset viewer.