Mercurial > hg > documentViewer
comparison documentViewer.py @ 166:ffb5c62bd459
characterNormalization
author | abukhman |
---|---|
date | Tue, 24 Aug 2010 14:34:32 +0200 |
parents | 820a2a4b23c3 |
children | 7e2b97941a66 |
comparison
equal
deleted
inserted
replaced
165:820a2a4b23c3 | 166:ffb5c62bd459 |
---|---|
66 | 66 |
67 response = None | 67 response = None |
68 errmsg = None | 68 errmsg = None |
69 for cnt in range(num_tries): | 69 for cnt in range(num_tries): |
70 try: | 70 try: |
71 logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url)) | 71 #logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url)) |
72 if sys.version_info < (2, 6): | 72 if sys.version_info < (2, 6): |
73 # set timeout on socket -- ugly :-( | 73 # set timeout on socket -- ugly :-( |
74 import socket | 74 import socket |
75 socket.setdefaulttimeout(float(timeout)) | 75 socket.setdefaulttimeout(float(timeout)) |
76 response = urllib2.urlopen(url) | 76 response = urllib2.urlopen(url) |
77 else: | 77 else: |
78 response = urllib2.urlopen(url,timeout=float(timeout)) | 78 response = urllib2.urlopen(url,timeout=float(timeout)) |
79 # check result? | 79 # check result? |
80 break | 80 break |
81 except urllib2.HTTPError, e: | 81 except urllib2.HTTPError, e: |
82 logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) | 82 #logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) |
83 errmsg = str(e) | 83 errmsg = str(e) |
84 # stop trying | 84 # stop trying |
85 break | 85 break |
86 except urllib2.URLError, e: | 86 except urllib2.URLError, e: |
87 logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) | 87 #logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) |
88 errmsg = str(e) | 88 errmsg = str(e) |
89 # stop trying | 89 # stop trying |
90 #break | 90 #break |
91 | 91 |
92 if response is not None: | 92 if response is not None: |
201 @param mode: defines how to access the document behind url | 201 @param mode: defines how to access the document behind url |
202 @param url: url which contains display information | 202 @param url: url which contains display information |
203 @param viewMode: if images display images, if text display text, default is images (text,images or auto) | 203 @param viewMode: if images display images, if text display text, default is images (text,images or auto) |
204 | 204 |
205 ''' | 205 ''' |
206 logging.debug("HHHHHHHHHHHHHH:load the rss") | 206 #logging.debug("HHHHHHHHHHHHHH:load the rss") |
207 logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) | 207 #logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) |
208 | 208 |
209 if not hasattr(self, 'template'): | 209 if not hasattr(self, 'template'): |
210 # create template folder if it doesn't exist | 210 # create template folder if it doesn't exist |
211 self.manage_addFolder('template') | 211 self.manage_addFolder('template') |
212 | 212 |
235 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) | 235 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) |
236 @param characterNormalization type of text display (reg, norm, none) | 236 @param characterNormalization type of text display (reg, norm, none) |
237 @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma) | 237 @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma) |
238 ''' | 238 ''' |
239 | 239 |
240 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) | 240 #logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) |
241 | 241 |
242 if not hasattr(self, 'template'): | 242 if not hasattr(self, 'template'): |
243 # this won't work | 243 # this won't work |
244 logging.error("template folder missing!") | 244 logging.error("template folder missing!") |
245 return "ERROR: template folder missing!" | 245 return "ERROR: template folder missing!" |
321 del params[param] | 321 del params[param] |
322 else: | 322 else: |
323 params[param] = str(val) | 323 params[param] = str(val) |
324 | 324 |
325 # quote values and assemble into query string | 325 # quote values and assemble into query string |
326 logging.debug("XYXXXXX: %s"%repr(params.items())) | 326 #logging.debug("XYXXXXX: %s"%repr(params.items())) |
327 ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) | 327 ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) |
328 url=self.REQUEST['URL1']+"?"+ps | 328 url=self.REQUEST['URL1']+"?"+ps |
329 return url | 329 return url |
330 | 330 |
331 def getInfo_xml(self,url,mode): | 331 def getInfo_xml(self,url,mode): |
340 | 340 |
341 | 341 |
342 def isAccessible(self, docinfo): | 342 def isAccessible(self, docinfo): |
343 """returns if access to the resource is granted""" | 343 """returns if access to the resource is granted""" |
344 access = docinfo.get('accessType', None) | 344 access = docinfo.get('accessType', None) |
345 logging.debug("documentViewer (accessOK) access type %s"%access) | 345 #logging.debug("documentViewer (accessOK) access type %s"%access) |
346 if access is not None and access == 'free': | 346 if access is not None and access == 'free': |
347 logging.debug("documentViewer (accessOK) access is free") | 347 #logging.debug("documentViewer (accessOK) access is free") |
348 return True | 348 return True |
349 elif access is None or access in self.authgroups: | 349 elif access is None or access in self.authgroups: |
350 # only local access -- only logged in users | 350 # only local access -- only logged in users |
351 user = getSecurityManager().getUser() | 351 user = getSecurityManager().getUser() |
352 logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr())) | 352 #logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr())) |
353 if user is not None: | 353 if user is not None: |
354 #print "user: ", user | 354 #print "user: ", user |
355 return (user.getUserName() != "Anonymous User") | 355 return (user.getUserName() != "Anonymous User") |
356 else: | 356 else: |
357 return False | 357 return False |
358 | 358 |
359 logging.error("documentViewer (accessOK) unknown access type %s"%access) | 359 #logging.error("documentViewer (accessOK) unknown access type %s"%access) |
360 return False | 360 return False |
361 | 361 |
362 | 362 |
363 def getDirinfoFromDigilib(self,path,docinfo=None,cut=0): | 363 def getDirinfoFromDigilib(self,path,docinfo=None,cut=0): |
364 """gibt param von dlInfo aus""" | 364 """gibt param von dlInfo aus""" |
369 | 369 |
370 path=getParentDir(path) | 370 path=getParentDir(path) |
371 | 371 |
372 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path | 372 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path |
373 | 373 |
374 logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) | 374 #logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) |
375 | 375 |
376 txt = getHttpData(infoUrl) | 376 txt = getHttpData(infoUrl) |
377 if txt is None: | 377 if txt is None: |
378 raise IOError("Unable to get dir-info from %s"%(infoUrl)) | 378 raise IOError("Unable to get dir-info from %s"%(infoUrl)) |
379 | 379 |
380 dom = Parse(txt) | 380 dom = Parse(txt) |
381 sizes=dom.xpath("//dir/size") | 381 sizes=dom.xpath("//dir/size") |
382 logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes) | 382 #logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes) |
383 | 383 |
384 if sizes: | 384 if sizes: |
385 docinfo['numPages'] = int(getTextFromNode(sizes[0])) | 385 docinfo['numPages'] = int(getTextFromNode(sizes[0])) |
386 else: | 386 else: |
387 docinfo['numPages'] = 0 | 387 docinfo['numPages'] = 0 |
403 server=self.digilibBaseUrl+"/servlet/Texter?fn=" | 403 server=self.digilibBaseUrl+"/servlet/Texter?fn=" |
404 metaUrl=server+url.replace("/mpiwg/online","") | 404 metaUrl=server+url.replace("/mpiwg/online","") |
405 if not metaUrl.endswith("index.meta"): | 405 if not metaUrl.endswith("index.meta"): |
406 metaUrl += "/index.meta" | 406 metaUrl += "/index.meta" |
407 | 407 |
408 logging.debug("(getIndexMeta): METAURL: %s"%metaUrl) | 408 #logging.debug("(getIndexMeta): METAURL: %s"%metaUrl) |
409 txt=getHttpData(metaUrl) | 409 txt=getHttpData(metaUrl) |
410 if txt is None: | 410 if txt is None: |
411 raise IOError("Unable to read index meta from %s"%(url)) | 411 raise IOError("Unable to read index meta from %s"%(url)) |
412 | 412 |
413 dom = Parse(txt) | 413 dom = Parse(txt) |
433 return dom | 433 return dom |
434 | 434 |
435 | 435 |
436 def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): | 436 def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): |
437 """gets authorization info from the index.meta file at path or given by dom""" | 437 """gets authorization info from the index.meta file at path or given by dom""" |
438 logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path)) | 438 #logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path)) |
439 | 439 |
440 access = None | 440 access = None |
441 | 441 |
442 if docinfo is None: | 442 if docinfo is None: |
443 docinfo = {} | 443 docinfo = {} |
457 return docinfo | 457 return docinfo |
458 | 458 |
459 | 459 |
460 def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): | 460 def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): |
461 """gets bibliographical info from the index.meta file at path or given by dom""" | 461 """gets bibliographical info from the index.meta file at path or given by dom""" |
462 logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path)) | 462 #logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path)) |
463 | 463 |
464 if docinfo is None: | 464 if docinfo is None: |
465 docinfo = {} | 465 docinfo = {} |
466 | 466 |
467 if dom is None: | 467 if dom is None: |
468 for x in range(cut): | 468 for x in range(cut): |
469 path=getParentDir(path) | 469 path=getParentDir(path) |
470 dom = self.getIndexMeta(path) | 470 dom = self.getIndexMeta(path) |
471 | 471 |
472 logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) | 472 #logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) |
473 # put in all raw bib fields as dict "bib" | 473 # put in all raw bib fields as dict "bib" |
474 bib = dom.xpath("//bib/*") | 474 bib = dom.xpath("//bib/*") |
475 if bib and len(bib)>0: | 475 if bib and len(bib)>0: |
476 bibinfo = {} | 476 bibinfo = {} |
477 for e in bib: | 477 for e in bib: |
498 docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0]) | 498 docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0]) |
499 except: pass | 499 except: pass |
500 try: | 500 try: |
501 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0]) | 501 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0]) |
502 except: pass | 502 except: pass |
503 logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype) | 503 #logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype) |
504 try: | 504 try: |
505 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0]) | 505 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0]) |
506 except: | 506 except: |
507 docinfo['lang']='' | 507 docinfo['lang']='' |
508 | 508 |
509 return docinfo | 509 return docinfo |
510 | 510 |
511 | 511 |
512 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): | 512 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): |
513 """parse texttool tag in index meta""" | 513 """parse texttool tag in index meta""" |
514 logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url)) | 514 #logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url)) |
515 if docinfo is None: | 515 if docinfo is None: |
516 docinfo = {} | 516 docinfo = {} |
517 if docinfo.get('lang', None) is None: | 517 if docinfo.get('lang', None) is None: |
518 docinfo['lang'] = '' # default keine Sprache gesetzt | 518 docinfo['lang'] = '' # default keine Sprache gesetzt |
519 if dom is None: | 519 if dom is None: |
536 archivePath = '/' + archivePath | 536 archivePath = '/' + archivePath |
537 if archiveName and (not archivePath.endswith(archiveName)): | 537 if archiveName and (not archivePath.endswith(archiveName)): |
538 archivePath += "/" + archiveName | 538 archivePath += "/" + archiveName |
539 else: | 539 else: |
540 # try to get archive-path from url | 540 # try to get archive-path from url |
541 logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url)) | 541 #logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url)) |
542 if (not url.startswith('http')): | 542 if (not url.startswith('http')): |
543 archivePath = url.replace('index.meta', '') | 543 archivePath = url.replace('index.meta', '') |
544 | 544 |
545 if archivePath is None: | 545 if archivePath is None: |
546 # we balk without archive-path | 546 # we balk without archive-path |
630 pass | 630 pass |
631 return docinfo | 631 return docinfo |
632 | 632 |
633 def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): | 633 def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): |
634 """path ist the path to the images it assumes that the index.meta file is one level higher.""" | 634 """path ist the path to the images it assumes that the index.meta file is one level higher.""" |
635 logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path)) | 635 #logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path)) |
636 if docinfo is None: | 636 if docinfo is None: |
637 docinfo = {} | 637 docinfo = {} |
638 path=path.replace("/mpiwg/online","") | 638 path=path.replace("/mpiwg/online","") |
639 docinfo['imagePath'] = path | 639 docinfo['imagePath'] = path |
640 docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut) | 640 docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut) |
641 | 641 |
642 pathorig=path | 642 pathorig=path |
643 for x in range(cut): | 643 for x in range(cut): |
644 path=getParentDir(path) | 644 path=getParentDir(path) |
645 logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) | 645 #logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) |
646 imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path | 646 imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path |
647 docinfo['imageURL'] = imageUrl | 647 docinfo['imageURL'] = imageUrl |
648 | 648 |
649 #path ist the path to the images it assumes that the index.meta file is one level higher. | 649 #path ist the path to the images it assumes that the index.meta file is one level higher. |
650 docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) | 650 docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) |
652 return docinfo | 652 return docinfo |
653 | 653 |
654 | 654 |
655 def getDocinfo(self, mode, url): | 655 def getDocinfo(self, mode, url): |
656 """returns docinfo depending on mode""" | 656 """returns docinfo depending on mode""" |
657 logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) | 657 #logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) |
658 # look for cached docinfo in session | 658 # look for cached docinfo in session |
659 if self.REQUEST.SESSION.has_key('docinfo'): | 659 if self.REQUEST.SESSION.has_key('docinfo'): |
660 docinfo = self.REQUEST.SESSION['docinfo'] | 660 docinfo = self.REQUEST.SESSION['docinfo'] |
661 # check if its still current | 661 # check if its still current |
662 if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: | 662 if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: |
663 logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo) | 663 #logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo) |
664 return docinfo | 664 return docinfo |
665 # new docinfo | 665 # new docinfo |
666 docinfo = {'mode': mode, 'url': url} | 666 docinfo = {'mode': mode, 'url': url} |
667 if mode=="texttool": #index.meta with texttool information | 667 if mode=="texttool": #index.meta with texttool information |
668 docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) | 668 docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) |
669 elif mode=="imagepath": | 669 elif mode=="imagepath": |
670 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) | 670 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) |
671 elif mode=="filepath": | 671 elif mode=="filepath": |
672 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) | 672 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) |
673 else: | 673 else: |
674 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) | 674 #logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) |
675 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) | 675 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) |
676 | 676 |
677 logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) | 677 #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) |
678 self.REQUEST.SESSION['docinfo'] = docinfo | 678 self.REQUEST.SESSION['docinfo'] = docinfo |
679 return docinfo | 679 return docinfo |
680 | 680 |
681 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): | 681 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): |
682 """returns pageinfo with the given parameters""" | 682 """returns pageinfo with the given parameters""" |
765 "Add a Page Template with optional file content." | 765 "Add a Page Template with optional file content." |
766 | 766 |
767 self._setObject(id, DocumentViewerTemplate(id)) | 767 self._setObject(id, DocumentViewerTemplate(id)) |
768 ob = getattr(self, id) | 768 ob = getattr(self, id) |
769 txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read() | 769 txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read() |
770 logging.info("txt %s:"%txt) | 770 #logging.info("txt %s:"%txt) |
771 ob.pt_edit(txt,"text/html") | 771 ob.pt_edit(txt,"text/html") |
772 if title: | 772 if title: |
773 ob.pt_setTitle(title) | 773 ob.pt_setTitle(title) |
774 try: | 774 try: |
775 u = self.DestinationURL() | 775 u = self.DestinationURL() |