Changeset 4:e9085ba2bb51 in documentViewer
- Timestamp:
- Jun 16, 2010, 4:38:17 PM (14 years ago)
- Branch:
- modularisierung
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
documentViewer.py
r2 r4 267 267 268 268 # quote values and assemble into query string 269 logging. info("XYXXXXX: %s"%repr(params.items()))269 logging.debug("XYXXXXX: %s"%repr(params.items())) 270 270 ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) 271 271 url=self.REQUEST['URL1']+"?"+ps … … 286 286 """returns if access to the resource is granted""" 287 287 access = docinfo.get('accessType', None) 288 logg er("documentViewer (accessOK)", logging.INFO, "access type %s"%access)288 logging.debug("documentViewer (accessOK) access type %s"%access) 289 289 if access is not None and access == 'free': 290 logg er("documentViewer (accessOK)", logging.INFO, "access is free")290 logging.debug("documentViewer (accessOK) access is free") 291 291 return True 292 292 elif access is None or access in self.authgroups: … … 299 299 return False 300 300 301 logg er("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access)301 logging.debug("documentViewer (accessOK) unknown access type %s"%access) 302 302 return False 303 303 … … 315 315 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path 316 316 317 logg er("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl))317 logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) 318 318 319 319 for cnt in range(num_retries): … … 324 324 break 325 325 except: 326 logg er("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt))326 logging.error("documentViewer (getdirinfofromdigilib) error reading %s (try %d)"%(infoUrl,cnt)) 327 327 else: 328 328 raise IOError("Unable to get dir-info from %s"%(infoUrl)) 329 329 330 330 sizes=dom.xpath("//dir/size") 331 logg er("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes)331 logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes) 332 332 333 333 if sizes: … … 364 364 break 365 365 except: 366 logg er("ERROR documentViewer (getIndexMeta)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])366 logging.error("ERROR documentViewer (getIndexMeta) %s (%s)"%sys.exc_info()[0:2]) 367 367 368 368 if dom is None: … … 392 392 break 393 393 except: 394 logg er("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])394 logging.error("ERROR documentViewer (getPresentationInfoXML) %s (%s)"%sys.exc_info()[0:2]) 395 395 396 396 if dom is None: … … 402 402 def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): 403 403 """gets authorization info from the index.meta file at path or given by dom""" 404 logg er("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path))404 logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path)) 405 405 406 406 access = None … … 478 478 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): 479 479 """parse texttool tag in index meta""" 480 logg er("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url))480 logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url)) 481 481 if docinfo is None: 482 482 docinfo = {} … … 493 493 archiveName = getTextFromNode(archiveNames[0]) 494 494 else: 495 logg er("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/name missing in: %s" % (url))495 logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url)) 496 496 497 497 archivePaths = dom.xpath("//resource/archive-path") … … 505 505 else: 506 506 # try to get archive-path from url 507 logg er("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/archive-path missing in: %s" % (url))507 logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url)) 508 508 if (not url.startswith('http')): 509 509 archivePath = url.replace('index.meta', '') … … 597 597 def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): 598 598 """path ist the path to the images it assumes that the index.meta file is one level higher.""" 599 logg er("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path))599 logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path)) 600 600 if docinfo is None: 601 601 docinfo = {} … … 607 607 for x in range(cut): 608 608 path=getParentDir(path) 609 logging. error("PATH:"+path)609 logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) 610 610 imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path 611 611 docinfo['imageURL'] = imageUrl … … 619 619 def getDocinfo(self, mode, url): 620 620 """returns docinfo depending on mode""" 621 logg er("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url))621 logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) 622 622 # look for cached docinfo in session 623 623 if self.REQUEST.SESSION.has_key('docinfo'): … … 625 625 # check if its still current 626 626 if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: 627 logg er("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo)627 logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo) 628 628 return docinfo 629 629 # new docinfo … … 636 636 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) 637 637 else: 638 logg er("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!")638 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) 639 639 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) 640 640 641 logg er("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo)641 logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) 642 642 self.REQUEST.SESSION['docinfo'] = docinfo 643 643 return docinfo -
extraFunction.py
r3 r4 54 54 self.id=id 55 55 self.title=title 56 57 def getHttpData(self, url, data=None, num_tries=3, timeout=40): 58 """returns result from url+data HTTP request""" 59 # we do GET (by appending data to url) 60 if isinstance(data, str) or isinstance(data, unicode): 61 # if data is string then append 62 url = "%s?%s"%(url,data) 63 else: 64 # we assume its a dict 65 url = "%s?%s"%(url,urllib.urlencode(data)) 66 67 response = None 68 errmsg = None 69 for cnt in range(num_tries): 70 try: 71 logging.debug("getHttpData(%s) url=%s"%(cnt+1,url)) 72 if sys.version_info < (2, 6): 73 # set timeout on socket -- ugly :-( 74 import socket 75 socket.setdefaulttimeout(timeout) 76 response = urllib2.urlopen(url) 77 else: 78 response = urllib2.urlopen(url,timeout=timeout) 79 # check result? 80 break 81 except urllib2.HTTPError, e: 82 logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) 83 errmsg = str(e) 84 # stop trying 85 break 86 except urllib2.URLError, e: 87 logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) 88 errmsg = str(e) 89 # stop trying 90 #break 91 92 if response is not None: 93 data = response.read() 94 response.close() 95 return data 96 97 raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) 98 #return None 99 100 56 101 57 102 def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None): … … 72 117 selfurl = self.absolute_url() 73 118 74 page = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery))119 data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery)) 75 120 #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) 76 data = page.read()77 page.close()121 #data = page.read() 122 #page.close() 78 123 79 124 pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) … … 139 184 """get list of pages from fulltext and put in docinfo""" 140 185 xquery = '//pb' 141 text = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))186 text = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) 142 187 #text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) 143 188 docinfo['numPages'] = text.count("<pb ") … … 162 207 textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) 163 208 164 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql",textParam)209 pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql",textParam) 165 210 """pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False)""" 166 211 … … 227 272 def getTranslate(self, query=None, language=None): 228 273 """translate into another languages""" 229 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))274 data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) 230 275 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) 231 data = pagexml.read()232 pagexml.close()276 #data = pagexml.read() 277 #pagexml.close() 233 278 return data 234 279 235 280 def getLemma(self, lemma=None, language=None): 236 281 """simular words lemma """ 237 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))282 data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) 238 283 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) 239 data = pagexml.read()240 pagexml.close()284 #data = pagexml.read() 285 #pagexml.close() 241 286 return data 242 287 243 288 def getLemmaNew(self, query=None, language=None): 244 289 """simular words lemma """ 245 246 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) 290 data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) 247 291 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) 248 data = pagexml.read()249 pagexml.close()292 #data = pagexml.read() 293 #pagexml.close() 250 294 return data 251 295 … … 260 304 tocDiv = None 261 305 262 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn))306 pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn)) 263 307 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) 264 308 pagedom = Parse(pagexml) … … 290 334 tocDiv = None 291 335 292 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))336 pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) 293 337 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) 294 338 # post-processing downloaded xml … … 318 362 tocPN = pageinfo['tocPN'] 319 363 320 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))321 data = pagexml.read()322 pagexml.close()364 data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) 365 #data = pagexml.read() 366 #pagexml.close() 323 367 324 368 page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN))
Note: See TracChangeset
for help on using the changeset viewer.