comparison documentViewer.py @ 90:6a4a72033d58

new version with new full-text infrastructure and some more changed templates
author casties
date Thu, 08 Apr 2010 13:04:51 +0200
parents a6e4f9b6729a
children db6d594aa4d9
comparison
equal deleted inserted replaced
89:3d95ba1bf535 90:6a4a72033d58
85 {'label':'main config','action':'changeDocumentViewerForm'}, 85 {'label':'main config','action':'changeDocumentViewerForm'},
86 ) 86 )
87 87
88 # templates and forms 88 # templates and forms
89 viewer_main = PageTemplateFile('zpt/viewer_main', globals()) 89 viewer_main = PageTemplateFile('zpt/viewer_main', globals())
90 thumbs_main = PageTemplateFile('zpt/thumbs_main', globals()) 90 toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
91 image_main = PageTemplateFile('zpt/image_main', globals()) # obsolete! 91 toc_text = PageTemplateFile('zpt/toc_text', globals())
92 toc_figures = PageTemplateFile('zpt/toc_figures', globals())
92 page_main_images = PageTemplateFile('zpt/page_main_images', globals()) 93 page_main_images = PageTemplateFile('zpt/page_main_images', globals())
93 page_main_text = PageTemplateFile('zpt/page_main_text', globals()) 94 page_main_text = PageTemplateFile('zpt/page_main_text', globals())
95 page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
94 head_main = PageTemplateFile('zpt/head_main', globals()) 96 head_main = PageTemplateFile('zpt/head_main', globals())
95 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) 97 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
96 info_xml = PageTemplateFile('zpt/info_xml', globals()) 98 info_xml = PageTemplateFile('zpt/info_xml', globals())
97 99
98 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) 100 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
159 viewMode="images" 161 viewMode="images"
160 162
161 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) 163 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
162 164
163 security.declareProtected('View','index_html') 165 security.declareProtected('View','index_html')
164 def index_html(self,url,mode="texttool",viewMode="auto",start=None,pn=1,mk=None): 166 def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
165 ''' 167 '''
166 view it 168 view it
167 @param mode: defines how to access the document behind url 169 @param mode: defines how to access the document behind url
168 @param url: url which contains display information 170 @param url: url which contains display information
169 @param viewMode: if images display images, if text display text, default is images (text,images or auto) 171 @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
170 172 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures)
171 ''' 173 '''
172 174
173 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 175 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
174 176
175 if not hasattr(self, 'template'): 177 if not hasattr(self, 'template'):
179 181
180 if not getattr(self, 'digilibBaseUrl', None): 182 if not getattr(self, 'digilibBaseUrl', None):
181 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 183 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
182 184
183 docinfo = self.getDocinfo(mode=mode,url=url) 185 docinfo = self.getDocinfo(mode=mode,url=url)
184 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) 186 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
185 pt = getattr(self.template, 'viewer_main') 187 if tocMode != "thumbs":
186 188 # get table of contents
189 docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
190
187 if viewMode=="auto": # automodus gewaehlt 191 if viewMode=="auto": # automodus gewaehlt
188 if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert 192 if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert
189 viewMode="text" 193 viewMode="text"
190 else: 194 else:
191 viewMode="images" 195 viewMode="images"
192 196
197 pt = getattr(self.template, 'viewer_main')
193 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) 198 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
194 199
195 def generateMarks(self,mk): 200 def generateMarks(self,mk):
196 ret="" 201 ret=""
197 if mk is None: 202 if mk is None:
198 return "" 203 return ""
199 204 if type(mk) is not ListType:
200 if type(mk) is not ListType: 205 mk=[mk]
201 mk=[mk]
202 for m in mk: 206 for m in mk:
203 ret+="mk=%s"%m 207 ret+="mk=%s"%m
204 return ret 208 return ret
209
205 210
206 def findDigilibUrl(self): 211 def findDigilibUrl(self):
207 """try to get the digilib URL from zogilib""" 212 """try to get the digilib URL from zogilib"""
208 url = self.template.zogilib.getDLBaseUrl() 213 url = self.template.zogilib.getDLBaseUrl()
209 return url 214 return url
341 # dom = NonvalidatingReader.parseUri(metaUrl) 346 # dom = NonvalidatingReader.parseUri(metaUrl)
342 txt=urllib.urlopen(metaUrl).read() 347 txt=urllib.urlopen(metaUrl).read()
343 dom = Parse(txt) 348 dom = Parse(txt)
344 break 349 break
345 except: 350 except:
346 logger("ERROR documentViewer (getIndexMata)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2]) 351 logger("ERROR documentViewer (getIndexMeta)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
347 352
348 if dom is None: 353 if dom is None:
349 raise IOError("Unable to read index meta from %s"%(url)) 354 raise IOError("Unable to read index meta from %s"%(url))
350 355
351 return dom 356 return dom
360 metaUrl = url 365 metaUrl = url
361 else: 366 else:
362 # online path 367 # online path
363 server=self.digilibBaseUrl+"/servlet/Texter?fn=" 368 server=self.digilibBaseUrl+"/servlet/Texter?fn="
364 metaUrl=server+url.replace("/mpiwg/online","") 369 metaUrl=server+url.replace("/mpiwg/online","")
365
366 370
367 for cnt in range(num_retries): 371 for cnt in range(num_retries):
368 try: 372 try:
369 # patch dirk encoding fehler treten dann nicht mehr auf 373 # patch dirk encoding fehler treten dann nicht mehr auf
370 # dom = NonvalidatingReader.parseUri(metaUrl) 374 # dom = NonvalidatingReader.parseUri(metaUrl)
459 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): 463 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
460 """parse texttool tag in index meta""" 464 """parse texttool tag in index meta"""
461 logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url)) 465 logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url))
462 if docinfo is None: 466 if docinfo is None:
463 docinfo = {} 467 docinfo = {}
464
465 if docinfo.get('lang', None) is None: 468 if docinfo.get('lang', None) is None:
466 docinfo['lang'] = '' # default keine Sprache gesetzt 469 docinfo['lang'] = '' # default keine Sprache gesetzt
467 if dom is None: 470 if dom is None:
468 dom = self.getIndexMeta(url) 471 dom = self.getIndexMeta(url)
469 472
551 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) 554 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
552 555
553 docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info 556 docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
554 557
555 return docinfo 558 return docinfo
556
557
558
559 559
560 560
561 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): 561 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
562 """gets the bibliographical information from the preseantion entry in texttools 562 """gets the bibliographical information from the preseantion entry in texttools
563 """ 563 """
616 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) 616 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
617 elif mode=="filepath": 617 elif mode=="filepath":
618 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) 618 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
619 else: 619 else:
620 logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!") 620 logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!")
621 raise ValueError("Unknown mode %s"%(mode)) 621 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
622 622
623 logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo) 623 logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo)
624 self.REQUEST.SESSION['docinfo'] = docinfo 624 self.REQUEST.SESSION['docinfo'] = docinfo
625 return docinfo 625 return docinfo
626 626
627 627
628 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None): 628 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
629 """returns pageinfo with the given parameters""" 629 """returns pageinfo with the given parameters"""
630 pageinfo = {} 630 pageinfo = {}
631 current = getInt(current) 631 current = getInt(current)
632 pageinfo['current'] = current 632 pageinfo['current'] = current
633 rows = int(rows or self.thumbrows) 633 rows = int(rows or self.thumbrows)
638 pageinfo['groupsize'] = grpsize 638 pageinfo['groupsize'] = grpsize
639 start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) 639 start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
640 # int(current / grpsize) * grpsize +1)) 640 # int(current / grpsize) * grpsize +1))
641 pageinfo['start'] = start 641 pageinfo['start'] = start
642 pageinfo['end'] = start + grpsize 642 pageinfo['end'] = start + grpsize
643 if docinfo is not None: 643 if (docinfo is not None) and ('numPages' in docinfo):
644 np = int(docinfo['numPages']) 644 np = int(docinfo['numPages'])
645 pageinfo['end'] = min(pageinfo['end'], np) 645 pageinfo['end'] = min(pageinfo['end'], np)
646 pageinfo['numgroups'] = int(np / grpsize) 646 pageinfo['numgroups'] = int(np / grpsize)
647 if np % grpsize > 0: 647 if np % grpsize > 0:
648 pageinfo['numgroups'] += 1 648 pageinfo['numgroups'] += 1
649
650 pageinfo['viewMode'] = viewMode
651 pageinfo['tocMode'] = tocMode
652 pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '10')
653 pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
649 654
650 return pageinfo 655 return pageinfo
651 656
652 657
653 658
659 docinfo['numPages'] = text.count("<pb ") 664 docinfo['numPages'] = text.count("<pb ")
660 return docinfo 665 return docinfo
661 666
662 def getTextPage(self, mode="text", pn=1, docinfo=None): 667 def getTextPage(self, mode="text", pn=1, docinfo=None):
663 """returns single page from fulltext""" 668 """returns single page from fulltext"""
664 pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docinfo['textURLPath'],mode,pn), outputUnicode=False) 669 docpath = docinfo['textURLPath']
670 if mode == "text_dict":
671 textmode = "textPollux"
672 else:
673 textmode = mode
674
675 pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn), outputUnicode=False)
665 # post-processing downloaded xml 676 # post-processing downloaded xml
666 pagedom = Parse(pagexml) 677 pagedom = Parse(pagexml)
667 # plain text mode 678 # plain text mode
668 if mode == "text": 679 if mode == "text":
669 # first div contains text 680 # first div contains text
671 if len(pagedivs) > 0: 682 if len(pagedivs) > 0:
672 pagenode = pagedivs[0] 683 pagenode = pagedivs[0]
673 return serializeNode(pagenode) 684 return serializeNode(pagenode)
674 685
675 # text-with-links mode 686 # text-with-links mode
676 if mode == "textPollux": 687 if mode == "text_dict":
677 # first div contains text 688 # first div contains text
678 pagedivs = pagedom.xpath("/div") 689 pagedivs = pagedom.xpath("/div")
679 if len(pagedivs) > 0: 690 if len(pagedivs) > 0:
680 pagenode = pagedivs[0] 691 pagenode = pagedivs[0]
681 # check all a-tags 692 # check all a-tags
694 l.setAttributeNS(None, 'target', '_blank') 705 l.setAttributeNS(None, 'target', '_blank')
695 return serializeNode(pagenode) 706 return serializeNode(pagenode)
696 707
697 return "no text here" 708 return "no text here"
698 709
710 def getToc(self, mode="text", docinfo=None):
711 """loads table of contents and stores in docinfo"""
712 logging.debug("documentViewer (gettoc) mode: %s"%(mode))
713 if 'tocSize_%s'%mode in docinfo:
714 # cached toc
715 return docinfo
716
717 docpath = docinfo['textURLPath']
718 # we need to set a result set size
719 pagesize = 1000
720 pn = 1
721 if mode == "text":
722 queryType = "toc"
723 else:
724 queryType = mode
725 # number of entries in toc
726 tocSize = 0
727 tocDiv = None
728 pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
729 # post-processing downloaded xml
730 pagedom = Parse(pagexml)
731 # get number of entries
732 numdivs = pagedom.xpath("//div[@class='queryResultHits']")
733 if len(numdivs) > 0:
734 tocSize = int(getTextFromNode(numdivs[0]))
735 # div contains text
736 #pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
737 #if len(pagedivs) > 0:
738 # tocDiv = pagedivs[0]
739
740 docinfo['tocSize_%s'%mode] = tocSize
741 #docinfo['tocDiv_%s'%mode] = tocDiv
742 return docinfo
743
744 def getTocPage(self, mode="toc", pn=1, pageinfo=None, docinfo=None):
745 """returns single page from the table of contents"""
746 # TODO: this should use the cached TOC
747 if mode == "text":
748 queryType = "toc"
749 else:
750 queryType = mode
751 docpath = docinfo['textURLPath']
752 pagesize = pageinfo['tocPageSize']
753 pn = pageinfo['tocPN']
754 pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
755 # post-processing downloaded xml
756 pagedom = Parse(pagexml)
757 # div contains text
758 pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
759 if len(pagedivs) > 0:
760 pagenode = pagedivs[0]
761 return serializeNode(pagenode)
762 else:
763 return "No TOC!"
764
699 765
700 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None): 766 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None):
701 """init document viewer""" 767 """init document viewer"""
702 self.title=title 768 self.title=title
703 self.digilibBaseUrl = digilibBaseUrl 769 self.digilibBaseUrl = digilibBaseUrl