Changeset 84:a6e4f9b6729a in documentViewer
- Timestamp:
- Mar 19, 2010, 11:42:40 AM (15 years ago)
- Branch:
- default
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
documentViewer.py
r83 r84 13 13 14 14 import Ft.Xml.XPath 15 import cStringIO 15 16 import xmlrpclib 16 17 import os.path … … 23 24 import urlparse 24 25 from types import * 26 25 27 def logger(txt,method,txt2): 26 28 """logging""" … … 46 48 return rc 47 49 50 def serializeNode(node, encoding='utf-8'): 51 """returns a string containing node as XML""" 52 buf = cStringIO.StringIO() 53 Print(node, stream=buf, encoding=encoding) 54 s = buf.getvalue() 55 buf.close() 56 return s 57 48 58 49 59 def getParentDir(path): … … 79 89 viewer_main = PageTemplateFile('zpt/viewer_main', globals()) 80 90 thumbs_main = PageTemplateFile('zpt/thumbs_main', globals()) 81 image_main = PageTemplateFile('zpt/image_main', globals()) 91 image_main = PageTemplateFile('zpt/image_main', globals()) # obsolete! 92 page_main_images = PageTemplateFile('zpt/page_main_images', globals()) 93 page_main_text = PageTemplateFile('zpt/page_main_text', globals()) 82 94 head_main = PageTemplateFile('zpt/head_main', globals()) 83 95 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) … … 89 101 90 102 91 def __init__(self,id,image ViewerUrl,textViewerUrl=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"):103 def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"): 92 104 """init document viewer""" 93 105 self.id=id 94 106 self.title=title 95 self.imageViewerUrl=imageViewerUrl96 self.textViewerUrl=textViewerUrl97 98 if not digilibBaseUrl:99 self.digilibBaseUrl = self.findDigilibUrl()100 else:101 self.digilibBaseUrl = digilibBaseUrl102 107 self.thumbcols = thumbcols 103 108 self.thumbrows = thumbrows 104 109 # authgroups is list of authorized groups (delimited by ,) 105 110 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 106 # add template folder so we can always use template.something 107 self.manage_addFolder('template') 108 111 # create template folder so we can always use template.something 112 113 templateFolder = Folder('template') 114 #self['template'] = templateFolder # Zope-2.12 style 115 self._setObject('template',templateFolder) # old style 116 try: 117 from Products.XMLRpcTools.XMLRpcTools import XMLRpcServerProxy 118 xmlRpcClient = XMLRpcServerProxy(id='fulltextclient', serverUrl=textServerName, use_xmlrpc=False) 119 #templateFolder['fulltextclient'] = xmlRpcClient 120 templateFolder._setObject('fulltextclient',xmlRpcClient) 121 except Exception, e: 122 logging.error("Unable to create XMLRpcTools for fulltextclient: "+str(e)) 123 try: 124 from Products.zogiLib.zogiLib import zogiLib 125 zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book") 126 #templateFolder['zogilib'] = zogilib 127 templateFolder._setObject('zogilib',zogilib) 128 except Exception, e: 129 logging.error("Unable to create zogiLib for zogilib: "+str(e)) 130 109 131 110 132 security.declareProtected('View','thumbs_rss') … … 117 139 118 140 ''' 119 logging. info("HHHHHHHHHHHHHH:load the rss")141 logging.debug("HHHHHHHHHHHHHH:load the rss") 120 142 logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 121 143 … … 140 162 141 163 security.declareProtected('View','index_html') 142 def index_html(self, mode,url,viewMode="auto",start=None,pn=1,mk=None):164 def index_html(self,url,mode="texttool",viewMode="auto",start=None,pn=1,mk=None): 143 165 ''' 144 166 view it … … 149 171 ''' 150 172 151 logg er("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))173 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 152 174 153 175 if not hasattr(self, 'template'): 154 # create template folder if it doesn't exist 155 self.manage_addFolder('template') 156 157 if not self.digilibBaseUrl: 176 # this won't work 177 logging.error("template folder missing!") 178 return "ERROR: template folder missing!" 179 180 if not getattr(self, 'digilibBaseUrl', None): 158 181 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 159 182 … … 163 186 164 187 if viewMode=="auto": # automodus gewaehlt 165 if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert188 if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert 166 189 viewMode="text" 167 190 else: … … 180 203 ret+="mk=%s"%m 181 204 return ret 205 206 def findDigilibUrl(self): 207 """try to get the digilib URL from zogilib""" 208 url = self.template.zogilib.getDLBaseUrl() 209 return url 210 211 def getStyle(self, idx, selected, style=""): 212 """returns a string with the given style and append 'sel' if path == selected.""" 213 #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) 214 if idx == selected: 215 return style + 'sel' 216 else: 217 return style 182 218 183 219 def getLink(self,param=None,val=None): … … 190 226 else: 191 227 params[param] = str(val) 192 if params["mode"] == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath 228 229 if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath 193 230 params["mode"] = "imagepath" 194 231 params["url"] = getParentDir(params["url"]) … … 226 263 227 264 228 def getStyle(self, idx, selected, style=""):229 """returns a string with the given style and append 'sel' if path == selected."""230 #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))231 if idx == selected:232 return style + 'sel'233 else:234 return style235 236 def getTextLanguage(self,url,docinfo):237 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])238 lang = urlencode({'':docinfo['lang']})239 return lang240 241 242 265 def isAccessible(self, docinfo): 243 266 """returns if access to the resource is granted""" … … 292 315 else: 293 316 docinfo['numPages'] = 0 317 318 # TODO: produce and keep list of image names and numbers 294 319 295 320 return docinfo … … 432 457 433 458 434 def getNumPages(self, xquery, docinfo=None): #New Method 24.02.2010 435 text=self.viewerTemplates.query.eval("/mpdl/interface/xquery.xql","document="+ docinfo['textURLPath'] +"&xquery="+str(xquery)) 436 docinfo['numPages'] = text.count("<pb ") 437 return docinfo 438 439 440 def getDocinfoFromTextTool(self,url,dom=None,docinfo=None): 441 """parse texttool tag in index meta""" 442 logger("documentViewer (getdocinfofromtexttool)", logging.INFO,"url: %s"%(url)) 443 if docinfo is None: 459 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): 460 """parse texttool tag in index meta""" 461 logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url)) 462 if docinfo is None: 444 463 docinfo = {} 445 446 if docinfo.get('lang',None) is None: 447 docinfo['lang']='' # default keine Sprache gesetzt 448 if dom is None: 449 dom = self.getIndexMeta(url) 450 451 archivePath = None 452 archiveName = None 453 454 archiveNames=dom.xpath("//resource/name") 455 if archiveNames and (len(archiveNames)>0): 456 archiveName=getTextFromNode(archiveNames[0]) 457 else: 458 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING,"resource/name missing in: %s"%(url)) 459 460 archivePaths=dom.xpath("//resource/archive-path") 461 if archivePaths and (len(archivePaths)>0): 462 archivePath=getTextFromNode(archivePaths[0]) 463 # clean up archive path 464 if archivePath[0] != '/': 465 archivePath = '/' + archivePath 466 if archiveName and (not archivePath.endswith(archiveName)): 467 archivePath += "/" + archiveName 468 else: 469 # try to get archive-path from url 470 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING,"resource/archive-path missing in: %s"%(url)) 471 if (not url.startswith('http')): 472 archivePath = url.replace('index.meta', '') 473 474 if archivePath is None: 475 # we balk without archive-path 476 raise IOError("Missing archive-path (for text-tool) in %s"%(url)) 477 478 imageDirs=dom.xpath("//texttool/image") 479 if imageDirs and (len(imageDirs)>0): 480 imageDir=getTextFromNode(imageDirs[0]) 481 482 else: 483 # we balk with no image tag / not necessary anymore because textmode is now standard 484 #raise IOError("No text-tool info in %s"%(url)) 485 imageDir="" 486 #xquery="//pb" 487 docinfo['imagePath'] = "" # keine Bilder 488 docinfo['imageURL'] = "" 489 490 if imageDir and archivePath: 491 #print "image: ", imageDir, " archivepath: ", archivePath 492 imageDir=os.path.join(archivePath,imageDir) 493 imageDir=imageDir.replace("/mpiwg/online",'') 494 docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo) 495 docinfo['imagePath'] = imageDir 496 497 docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir 498 499 viewerUrls=dom.xpath("//texttool/digiliburlprefix") 500 if viewerUrls and (len(viewerUrls)>0): 501 viewerUrl=getTextFromNode(viewerUrls[0]) 502 docinfo['viewerURL'] = viewerUrl 503 504 textUrls=dom.xpath("//texttool/text") 505 if textUrls and (len(textUrls)>0): 506 textUrl=getTextFromNode(textUrls[0]) 507 if urlparse.urlparse(textUrl)[0]=="": #keine url 508 textUrl=os.path.join(archivePath,textUrl) 509 # fix URLs starting with /mpiwg/online 510 if textUrl.startswith("/mpiwg/online"): 511 textUrl = textUrl.replace("/mpiwg/online",'',1) 512 513 docinfo['textURL'] = textUrl 514 515 textUrls=dom.xpath("//texttool/text-url-path") 516 if textUrls and (len(textUrls)>0): 517 textUrl=getTextFromNode(textUrls[0]) 518 docinfo['textURLPath'] = textUrl 519 520 presentationUrls=dom.xpath("//texttool/presentation") 521 docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) # get info von bib tag 522 523 if presentationUrls and (len(presentationUrls)>0): # ueberschreibe diese durch presentation informationen 524 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten 525 # durch den relativen Pfad auf die presentation infos 526 presentationPath = getTextFromNode(presentationUrls[0]) 527 if url.endswith("index.meta"): 528 presentationUrl=url.replace('index.meta',presentationPath) 529 else: 530 presentationUrl=url + "/" + presentationPath 531 docinfo=self.getNumPages('//pb', docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht 532 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl,docinfo=docinfo,dom=dom) 533 534 docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) # get access info 535 536 return docinfo 464 465 if docinfo.get('lang', None) is None: 466 docinfo['lang'] = '' # default keine Sprache gesetzt 467 if dom is None: 468 dom = self.getIndexMeta(url) 469 470 archivePath = None 471 archiveName = None 472 473 archiveNames = dom.xpath("//resource/name") 474 if archiveNames and (len(archiveNames) > 0): 475 archiveName = getTextFromNode(archiveNames[0]) 476 else: 477 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/name missing in: %s" % (url)) 478 479 archivePaths = dom.xpath("//resource/archive-path") 480 if archivePaths and (len(archivePaths) > 0): 481 archivePath = getTextFromNode(archivePaths[0]) 482 # clean up archive path 483 if archivePath[0] != '/': 484 archivePath = '/' + archivePath 485 if archiveName and (not archivePath.endswith(archiveName)): 486 archivePath += "/" + archiveName 487 else: 488 # try to get archive-path from url 489 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/archive-path missing in: %s" % (url)) 490 if (not url.startswith('http')): 491 archivePath = url.replace('index.meta', '') 492 493 if archivePath is None: 494 # we balk without archive-path 495 raise IOError("Missing archive-path (for text-tool) in %s" % (url)) 496 497 imageDirs = dom.xpath("//texttool/image") 498 if imageDirs and (len(imageDirs) > 0): 499 imageDir = getTextFromNode(imageDirs[0]) 500 501 else: 502 # we balk with no image tag / not necessary anymore because textmode is now standard 503 #raise IOError("No text-tool info in %s"%(url)) 504 imageDir = "" 505 #xquery="//pb" 506 docinfo['imagePath'] = "" # keine Bilder 507 docinfo['imageURL'] = "" 508 509 if imageDir and archivePath: 510 #print "image: ", imageDir, " archivepath: ", archivePath 511 imageDir = os.path.join(archivePath, imageDir) 512 imageDir = imageDir.replace("/mpiwg/online", '') 513 docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo) 514 docinfo['imagePath'] = imageDir 515 516 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir 517 518 viewerUrls = dom.xpath("//texttool/digiliburlprefix") 519 if viewerUrls and (len(viewerUrls) > 0): 520 viewerUrl = getTextFromNode(viewerUrls[0]) 521 docinfo['viewerURL'] = viewerUrl 522 523 textUrls = dom.xpath("//texttool/text") 524 if textUrls and (len(textUrls) > 0): 525 textUrl = getTextFromNode(textUrls[0]) 526 if urlparse.urlparse(textUrl)[0] == "": #keine url 527 textUrl = os.path.join(archivePath, textUrl) 528 # fix URLs starting with /mpiwg/online 529 if textUrl.startswith("/mpiwg/online"): 530 textUrl = textUrl.replace("/mpiwg/online", '', 1) 531 532 docinfo['textURL'] = textUrl 533 534 textUrls = dom.xpath("//texttool/text-url-path") 535 if textUrls and (len(textUrls) > 0): 536 textUrl = getTextFromNode(textUrls[0]) 537 docinfo['textURLPath'] = textUrl 538 539 presentationUrls = dom.xpath("//texttool/presentation") 540 docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag 541 542 if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen 543 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten 544 # durch den relativen Pfad auf die presentation infos 545 presentationPath = getTextFromNode(presentationUrls[0]) 546 if url.endswith("index.meta"): 547 presentationUrl = url.replace('index.meta', presentationPath) 548 else: 549 presentationUrl = url + "/" + presentationPath 550 docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht 551 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) 552 553 docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info 554 555 return docinfo 556 557 558 537 559 538 560 … … 620 642 pageinfo['end'] = start + grpsize 621 643 if docinfo is not None: 622 623 644 np = int(docinfo['numPages']) 624 645 pageinfo['end'] = min(pageinfo['end'], np) … … 629 650 return pageinfo 630 651 631 def text(self,mode,url,pn): 632 """give text""" 633 if mode=="texttool": #index.meta with texttool information 634 (viewerUrl,imagepath,textpath)=parseUrlTextTool(url) 635 636 #print textpath 637 try: 638 dom = NonvalidatingReader.parseUri(textpath) 639 except: 640 return None 641 642 list=[] 643 nodes=dom.xpath("//pb") 644 645 node=nodes[int(pn)-1] 646 647 p=node 648 649 while p.tagName!="p": 650 p=p.parentNode 651 652 653 endNode=nodes[int(pn)] 654 655 656 e=endNode 657 658 while e.tagName!="p": 659 e=e.parentNode 660 661 662 next=node.parentNode 663 664 #sammle s 665 while next and (next!=endNode.parentNode): 666 list.append(next) 667 next=next.nextSibling 668 list.append(endNode.parentNode) 669 670 if p==e:# beide im selben paragraphen 671 pass 672 # else: 673 # next=p 674 # while next!=e: 675 # print next,e 676 # list.append(next) 677 # next=next.nextSibling 678 # 679 # for x in list: 680 # PrettyPrint(x) 681 # 682 # return list 683 # 684 685 def findDigilibUrl(self): 686 """try to get the digilib URL from zogilib""" 687 url = self.imageViewerUrl[:-1] + "/getScalerUrl" 688 #print urlparse.urlparse(url)[0] 689 #print urlparse.urljoin(self.absolute_url(),url) 690 logging.info("finddigiliburl: %s"%urlparse.urlparse(url)[0]) 691 logging.info("finddigiliburl: %s"%urlparse.urljoin(self.absolute_url(),url)) 692 693 try: 694 if urlparse.urlparse(url)[0]=='': #relative path 695 url=urlparse.urljoin(self.absolute_url()+"/",url) 696 697 scaler = urlopen(url).read() 698 return scaler.replace("/servlet/Scaler?", "") 699 except: 700 return None 701 702 def changeDocumentViewer(self,imageViewerUrl,textViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None): 652 653 654 def getNumPages(self,docinfo=None): 655 """get list of pages from fulltext and put in docinfo""" 656 xquery = '//pb' 657 text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) 658 # TODO: better processing of the page list. do we need the info somewhere else also? 659 docinfo['numPages'] = text.count("<pb ") 660 return docinfo 661 662 def getTextPage(self, mode="text", pn=1, docinfo=None): 663 """returns single page from fulltext""" 664 pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docinfo['textURLPath'],mode,pn), outputUnicode=False) 665 # post-processing downloaded xml 666 pagedom = Parse(pagexml) 667 # plain text mode 668 if mode == "text": 669 # first div contains text 670 pagedivs = pagedom.xpath("/div") 671 if len(pagedivs) > 0: 672 pagenode = pagedivs[0] 673 return serializeNode(pagenode) 674 675 # text-with-links mode 676 if mode == "textPollux": 677 # first div contains text 678 pagedivs = pagedom.xpath("/div") 679 if len(pagedivs) > 0: 680 pagenode = pagedivs[0] 681 # check all a-tags 682 links = pagenode.xpath("//a") 683 for l in links: 684 hrefNode = l.getAttributeNodeNS(None, u"href") 685 if hrefNode: 686 # is link with href 687 href = hrefNode.nodeValue 688 if href.startswith('lt/lex.xql'): 689 # is pollux link 690 selfurl = self.absolute_url() 691 # change href 692 hrefNode.nodeValue = href.replace('lt/lex.xql','%s/head_main_voc'%selfurl) 693 # add target 694 l.setAttributeNS(None, 'target', '_blank') 695 return serializeNode(pagenode) 696 697 return "no text here" 698 699 700 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None): 703 701 """init document viewer""" 704 702 self.title=title 705 self.imageViewerUrl=imageViewerUrl706 self.textViewerUrl=textViewerUrl707 703 self.digilibBaseUrl = digilibBaseUrl 708 704 self.thumbrows = thumbrows … … 714 710 715 711 716 717 # security.declareProtected('View management screens','renameImageForm')718 719 712 def manage_AddDocumentViewerForm(self): 720 713 """add the viewer form""" … … 722 715 return pt() 723 716 724 def manage_AddDocumentViewer(self,id,image ViewerUrl="",textViewerUrl="",title="",RESPONSE=None):717 def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None): 725 718 """add the viewer""" 726 newObj=documentViewer(id,image ViewerUrl,title=title,textViewerUrl=textViewerUrl)719 newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName) 727 720 self._setObject(id,newObj) 728 721
Note: See TracChangeset
for help on using the changeset viewer.