Mercurial > hg > documentViewer
changeset 4:e9085ba2bb51 modularisierung
- first fix for timeout issues
- cleaned logging calls
author | casties |
---|---|
date | Wed, 16 Jun 2010 18:38:17 +0200 |
parents | 3ba8479c7aba |
children | 7d10acbad6c0 |
files | documentViewer.py extraFunction.py |
diffstat | 2 files changed, 83 insertions(+), 39 deletions(-) [+] |
line wrap: on
line diff
--- a/documentViewer.py Wed Jun 16 11:16:02 2010 +0200 +++ b/documentViewer.py Wed Jun 16 18:38:17 2010 +0200 @@ -266,7 +266,7 @@ params[param] = str(val) # quote values and assemble into query string - logging.info("XYXXXXX: %s"%repr(params.items())) + logging.debug("XYXXXXX: %s"%repr(params.items())) ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) url=self.REQUEST['URL1']+"?"+ps return url @@ -285,9 +285,9 @@ def isAccessible(self, docinfo): """returns if access to the resource is granted""" access = docinfo.get('accessType', None) - logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access) + logging.debug("documentViewer (accessOK) access type %s"%access) if access is not None and access == 'free': - logger("documentViewer (accessOK)", logging.INFO, "access is free") + logging.debug("documentViewer (accessOK) access is free") return True elif access is None or access in self.authgroups: # only local access -- only logged in users @@ -298,7 +298,7 @@ else: return False - logger("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access) + logging.debug("documentViewer (accessOK) unknown access type %s"%access) return False @@ -314,7 +314,7 @@ infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path - logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl)) + logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) for cnt in range(num_retries): try: @@ -323,12 +323,12 @@ dom = Parse(txt) break except: - logger("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt)) + logging.error("documentViewer (getdirinfofromdigilib) error reading %s (try %d)"%(infoUrl,cnt)) else: raise IOError("Unable to get dir-info from %s"%(infoUrl)) sizes=dom.xpath("//dir/size") - logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes) + logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes) if sizes: docinfo['numPages'] = int(getTextFromNode(sizes[0])) @@ -363,7 +363,7 @@ dom = Parse(txt) break except: - logger("ERROR documentViewer (getIndexMeta)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2]) + logging.error("ERROR documentViewer (getIndexMeta) %s (%s)"%sys.exc_info()[0:2]) if dom is None: raise IOError("Unable to read index meta from %s"%(url)) @@ -391,7 +391,7 @@ dom = Parse(txt) break except: - logger("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2]) + logging.error("ERROR documentViewer (getPresentationInfoXML) %s (%s)"%sys.exc_info()[0:2]) if dom is None: raise IOError("Unable to read infoXMLfrom %s"%(url)) @@ -401,7 +401,7 @@ def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): """gets authorization info from the index.meta file at path or given by dom""" - logger("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path)) + logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path)) access = None @@ -477,7 +477,7 @@ def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): """parse texttool tag in index meta""" - logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url)) + logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url)) if docinfo is None: docinfo = {} if docinfo.get('lang', None) is None: @@ -492,7 +492,7 @@ if archiveNames and (len(archiveNames) > 0): archiveName = getTextFromNode(archiveNames[0]) else: - logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/name missing in: %s" % (url)) + logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url)) archivePaths = dom.xpath("//resource/archive-path") if archivePaths and (len(archivePaths) > 0): @@ -504,7 +504,7 @@ archivePath += "/" + archiveName else: # try to get archive-path from url - logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/archive-path missing in: %s" % (url)) + logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url)) if (not url.startswith('http')): archivePath = url.replace('index.meta', '') @@ -596,7 +596,7 @@ def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): """path ist the path to the images it assumes that the index.meta file is one level higher.""" - logger("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path)) + logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path)) if docinfo is None: docinfo = {} path=path.replace("/mpiwg/online","") @@ -606,7 +606,7 @@ pathorig=path for x in range(cut): path=getParentDir(path) - logging.error("PATH:"+path) + logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path docinfo['imageURL'] = imageUrl @@ -618,13 +618,13 @@ def getDocinfo(self, mode, url): """returns docinfo depending on mode""" - logger("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url)) + logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) # look for cached docinfo in session if self.REQUEST.SESSION.has_key('docinfo'): docinfo = self.REQUEST.SESSION['docinfo'] # check if its still current if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: - logger("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo) + logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo) return docinfo # new docinfo docinfo = {'mode': mode, 'url': url} @@ -635,10 +635,10 @@ elif mode=="filepath": docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) else: - logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!") + logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) - logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo) + logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) self.REQUEST.SESSION['docinfo'] = docinfo return docinfo
--- a/extraFunction.py Wed Jun 16 11:16:02 2010 +0200 +++ b/extraFunction.py Wed Jun 16 18:38:17 2010 +0200 @@ -53,6 +53,51 @@ self.id=id self.title=title + + def getHttpData(self, url, data=None, num_tries=3, timeout=40): + """returns result from url+data HTTP request""" + # we do GET (by appending data to url) + if isinstance(data, str) or isinstance(data, unicode): + # if data is string then append + url = "%s?%s"%(url,data) + else: + # we assume its a dict + url = "%s?%s"%(url,urllib.urlencode(data)) + + response = None + errmsg = None + for cnt in range(num_tries): + try: + logging.debug("getHttpData(%s) url=%s"%(cnt+1,url)) + if sys.version_info < (2, 6): + # set timeout on socket -- ugly :-( + import socket + socket.setdefaulttimeout(timeout) + response = urllib2.urlopen(url) + else: + response = urllib2.urlopen(url,timeout=timeout) + # check result? + break + except urllib2.HTTPError, e: + logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) + errmsg = str(e) + # stop trying + break + except urllib2.URLError, e: + logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) + errmsg = str(e) + # stop trying + #break + + if response is not None: + data = response.read() + response.close() + return data + + raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) + #return None + + def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None): """get search list""" @@ -71,10 +116,10 @@ tocPN = pageinfo['tocPN'] selfurl = self.absolute_url() - page = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery)) + data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery)) #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) - data = page.read() - page.close() + #data = page.read() + #page.close() pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) pagedom = Parse(pagexml) @@ -138,7 +183,7 @@ def getNumPages(self,docinfo=None): """get list of pages from fulltext and put in docinfo""" xquery = '//pb' - text = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) + text = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) #text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) docinfo['numPages'] = text.count("<pb ") return docinfo @@ -161,7 +206,7 @@ if highlightQuery is not None: textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) - pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql",textParam) + pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql",textParam) """pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False)""" pagedom = Parse(pagexml) @@ -226,27 +271,26 @@ def getTranslate(self, query=None, language=None): """translate into another languages""" - pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) + data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) - data = pagexml.read() - pagexml.close() + #data = pagexml.read() + #pagexml.close() return data def getLemma(self, lemma=None, language=None): """simular words lemma """ - pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) + data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) - data = pagexml.read() - pagexml.close() + #data = pagexml.read() + #pagexml.close() return data def getLemmaNew(self, query=None, language=None): """simular words lemma """ - - pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) + data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) - data = pagexml.read() - pagexml.close() + #data = pagexml.read() + #pagexml.close() return data def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): @@ -259,7 +303,7 @@ tocSearch = 0 tocDiv = None - pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn)) + pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn)) #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) pagedom = Parse(pagexml) numdivs = pagedom.xpath("//div[@class='queryResultHits']") @@ -289,7 +333,7 @@ tocSize = 0 tocDiv = None - pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) + pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) # post-processing downloaded xml pagedom = Parse(pagexml) @@ -317,9 +361,9 @@ tocMode = pageinfo['tocMode'] tocPN = pageinfo['tocPN'] - pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) - data = pagexml.read() - pagexml.close() + data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) + #data = pagexml.read() + #pagexml.close() page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) text = page.replace('mode=image','mode=texttool')