Mercurial > hg > documentViewer
comparison documentViewer.py @ 90:6a4a72033d58
new version with new full-text infrastructure and some more changed templates
author | casties |
---|---|
date | Thu, 08 Apr 2010 13:04:51 +0200 |
parents | a6e4f9b6729a |
children | db6d594aa4d9 |
comparison
equal
deleted
inserted
replaced
89:3d95ba1bf535 | 90:6a4a72033d58 |
---|---|
85 {'label':'main config','action':'changeDocumentViewerForm'}, | 85 {'label':'main config','action':'changeDocumentViewerForm'}, |
86 ) | 86 ) |
87 | 87 |
88 # templates and forms | 88 # templates and forms |
89 viewer_main = PageTemplateFile('zpt/viewer_main', globals()) | 89 viewer_main = PageTemplateFile('zpt/viewer_main', globals()) |
90 thumbs_main = PageTemplateFile('zpt/thumbs_main', globals()) | 90 toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals()) |
91 image_main = PageTemplateFile('zpt/image_main', globals()) # obsolete! | 91 toc_text = PageTemplateFile('zpt/toc_text', globals()) |
92 toc_figures = PageTemplateFile('zpt/toc_figures', globals()) | |
92 page_main_images = PageTemplateFile('zpt/page_main_images', globals()) | 93 page_main_images = PageTemplateFile('zpt/page_main_images', globals()) |
93 page_main_text = PageTemplateFile('zpt/page_main_text', globals()) | 94 page_main_text = PageTemplateFile('zpt/page_main_text', globals()) |
95 page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) | |
94 head_main = PageTemplateFile('zpt/head_main', globals()) | 96 head_main = PageTemplateFile('zpt/head_main', globals()) |
95 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) | 97 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) |
96 info_xml = PageTemplateFile('zpt/info_xml', globals()) | 98 info_xml = PageTemplateFile('zpt/info_xml', globals()) |
97 | 99 |
98 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) | 100 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) |
159 viewMode="images" | 161 viewMode="images" |
160 | 162 |
161 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) | 163 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) |
162 | 164 |
163 security.declareProtected('View','index_html') | 165 security.declareProtected('View','index_html') |
164 def index_html(self,url,mode="texttool",viewMode="auto",start=None,pn=1,mk=None): | 166 def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): |
165 ''' | 167 ''' |
166 view it | 168 view it |
167 @param mode: defines how to access the document behind url | 169 @param mode: defines how to access the document behind url |
168 @param url: url which contains display information | 170 @param url: url which contains display information |
169 @param viewMode: if images display images, if text display text, default is images (text,images or auto) | 171 @param viewMode: if images display images, if text display text, default is auto (text,images or auto) |
170 | 172 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures) |
171 ''' | 173 ''' |
172 | 174 |
173 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) | 175 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) |
174 | 176 |
175 if not hasattr(self, 'template'): | 177 if not hasattr(self, 'template'): |
179 | 181 |
180 if not getattr(self, 'digilibBaseUrl', None): | 182 if not getattr(self, 'digilibBaseUrl', None): |
181 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" | 183 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" |
182 | 184 |
183 docinfo = self.getDocinfo(mode=mode,url=url) | 185 docinfo = self.getDocinfo(mode=mode,url=url) |
184 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) | 186 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) |
185 pt = getattr(self.template, 'viewer_main') | 187 if tocMode != "thumbs": |
186 | 188 # get table of contents |
189 docinfo = self.getToc(mode=tocMode, docinfo=docinfo) | |
190 | |
187 if viewMode=="auto": # automodus gewaehlt | 191 if viewMode=="auto": # automodus gewaehlt |
188 if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert | 192 if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert |
189 viewMode="text" | 193 viewMode="text" |
190 else: | 194 else: |
191 viewMode="images" | 195 viewMode="images" |
192 | 196 |
197 pt = getattr(self.template, 'viewer_main') | |
193 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) | 198 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) |
194 | 199 |
195 def generateMarks(self,mk): | 200 def generateMarks(self,mk): |
196 ret="" | 201 ret="" |
197 if mk is None: | 202 if mk is None: |
198 return "" | 203 return "" |
199 | 204 if type(mk) is not ListType: |
200 if type(mk) is not ListType: | 205 mk=[mk] |
201 mk=[mk] | |
202 for m in mk: | 206 for m in mk: |
203 ret+="mk=%s"%m | 207 ret+="mk=%s"%m |
204 return ret | 208 return ret |
209 | |
205 | 210 |
206 def findDigilibUrl(self): | 211 def findDigilibUrl(self): |
207 """try to get the digilib URL from zogilib""" | 212 """try to get the digilib URL from zogilib""" |
208 url = self.template.zogilib.getDLBaseUrl() | 213 url = self.template.zogilib.getDLBaseUrl() |
209 return url | 214 return url |
341 # dom = NonvalidatingReader.parseUri(metaUrl) | 346 # dom = NonvalidatingReader.parseUri(metaUrl) |
342 txt=urllib.urlopen(metaUrl).read() | 347 txt=urllib.urlopen(metaUrl).read() |
343 dom = Parse(txt) | 348 dom = Parse(txt) |
344 break | 349 break |
345 except: | 350 except: |
346 logger("ERROR documentViewer (getIndexMata)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2]) | 351 logger("ERROR documentViewer (getIndexMeta)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2]) |
347 | 352 |
348 if dom is None: | 353 if dom is None: |
349 raise IOError("Unable to read index meta from %s"%(url)) | 354 raise IOError("Unable to read index meta from %s"%(url)) |
350 | 355 |
351 return dom | 356 return dom |
360 metaUrl = url | 365 metaUrl = url |
361 else: | 366 else: |
362 # online path | 367 # online path |
363 server=self.digilibBaseUrl+"/servlet/Texter?fn=" | 368 server=self.digilibBaseUrl+"/servlet/Texter?fn=" |
364 metaUrl=server+url.replace("/mpiwg/online","") | 369 metaUrl=server+url.replace("/mpiwg/online","") |
365 | |
366 | 370 |
367 for cnt in range(num_retries): | 371 for cnt in range(num_retries): |
368 try: | 372 try: |
369 # patch dirk encoding fehler treten dann nicht mehr auf | 373 # patch dirk encoding fehler treten dann nicht mehr auf |
370 # dom = NonvalidatingReader.parseUri(metaUrl) | 374 # dom = NonvalidatingReader.parseUri(metaUrl) |
459 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): | 463 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): |
460 """parse texttool tag in index meta""" | 464 """parse texttool tag in index meta""" |
461 logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url)) | 465 logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url)) |
462 if docinfo is None: | 466 if docinfo is None: |
463 docinfo = {} | 467 docinfo = {} |
464 | |
465 if docinfo.get('lang', None) is None: | 468 if docinfo.get('lang', None) is None: |
466 docinfo['lang'] = '' # default keine Sprache gesetzt | 469 docinfo['lang'] = '' # default keine Sprache gesetzt |
467 if dom is None: | 470 if dom is None: |
468 dom = self.getIndexMeta(url) | 471 dom = self.getIndexMeta(url) |
469 | 472 |
551 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) | 554 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) |
552 | 555 |
553 docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info | 556 docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info |
554 | 557 |
555 return docinfo | 558 return docinfo |
556 | |
557 | |
558 | |
559 | 559 |
560 | 560 |
561 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): | 561 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): |
562 """gets the bibliographical information from the preseantion entry in texttools | 562 """gets the bibliographical information from the preseantion entry in texttools |
563 """ | 563 """ |
616 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) | 616 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) |
617 elif mode=="filepath": | 617 elif mode=="filepath": |
618 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) | 618 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) |
619 else: | 619 else: |
620 logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!") | 620 logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!") |
621 raise ValueError("Unknown mode %s"%(mode)) | 621 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) |
622 | 622 |
623 logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo) | 623 logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo) |
624 self.REQUEST.SESSION['docinfo'] = docinfo | 624 self.REQUEST.SESSION['docinfo'] = docinfo |
625 return docinfo | 625 return docinfo |
626 | 626 |
627 | 627 |
628 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None): | 628 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): |
629 """returns pageinfo with the given parameters""" | 629 """returns pageinfo with the given parameters""" |
630 pageinfo = {} | 630 pageinfo = {} |
631 current = getInt(current) | 631 current = getInt(current) |
632 pageinfo['current'] = current | 632 pageinfo['current'] = current |
633 rows = int(rows or self.thumbrows) | 633 rows = int(rows or self.thumbrows) |
638 pageinfo['groupsize'] = grpsize | 638 pageinfo['groupsize'] = grpsize |
639 start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) | 639 start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) |
640 # int(current / grpsize) * grpsize +1)) | 640 # int(current / grpsize) * grpsize +1)) |
641 pageinfo['start'] = start | 641 pageinfo['start'] = start |
642 pageinfo['end'] = start + grpsize | 642 pageinfo['end'] = start + grpsize |
643 if docinfo is not None: | 643 if (docinfo is not None) and ('numPages' in docinfo): |
644 np = int(docinfo['numPages']) | 644 np = int(docinfo['numPages']) |
645 pageinfo['end'] = min(pageinfo['end'], np) | 645 pageinfo['end'] = min(pageinfo['end'], np) |
646 pageinfo['numgroups'] = int(np / grpsize) | 646 pageinfo['numgroups'] = int(np / grpsize) |
647 if np % grpsize > 0: | 647 if np % grpsize > 0: |
648 pageinfo['numgroups'] += 1 | 648 pageinfo['numgroups'] += 1 |
649 | |
650 pageinfo['viewMode'] = viewMode | |
651 pageinfo['tocMode'] = tocMode | |
652 pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '10') | |
653 pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') | |
649 | 654 |
650 return pageinfo | 655 return pageinfo |
651 | 656 |
652 | 657 |
653 | 658 |
659 docinfo['numPages'] = text.count("<pb ") | 664 docinfo['numPages'] = text.count("<pb ") |
660 return docinfo | 665 return docinfo |
661 | 666 |
662 def getTextPage(self, mode="text", pn=1, docinfo=None): | 667 def getTextPage(self, mode="text", pn=1, docinfo=None): |
663 """returns single page from fulltext""" | 668 """returns single page from fulltext""" |
664 pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docinfo['textURLPath'],mode,pn), outputUnicode=False) | 669 docpath = docinfo['textURLPath'] |
670 if mode == "text_dict": | |
671 textmode = "textPollux" | |
672 else: | |
673 textmode = mode | |
674 | |
675 pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn), outputUnicode=False) | |
665 # post-processing downloaded xml | 676 # post-processing downloaded xml |
666 pagedom = Parse(pagexml) | 677 pagedom = Parse(pagexml) |
667 # plain text mode | 678 # plain text mode |
668 if mode == "text": | 679 if mode == "text": |
669 # first div contains text | 680 # first div contains text |
671 if len(pagedivs) > 0: | 682 if len(pagedivs) > 0: |
672 pagenode = pagedivs[0] | 683 pagenode = pagedivs[0] |
673 return serializeNode(pagenode) | 684 return serializeNode(pagenode) |
674 | 685 |
675 # text-with-links mode | 686 # text-with-links mode |
676 if mode == "textPollux": | 687 if mode == "text_dict": |
677 # first div contains text | 688 # first div contains text |
678 pagedivs = pagedom.xpath("/div") | 689 pagedivs = pagedom.xpath("/div") |
679 if len(pagedivs) > 0: | 690 if len(pagedivs) > 0: |
680 pagenode = pagedivs[0] | 691 pagenode = pagedivs[0] |
681 # check all a-tags | 692 # check all a-tags |
694 l.setAttributeNS(None, 'target', '_blank') | 705 l.setAttributeNS(None, 'target', '_blank') |
695 return serializeNode(pagenode) | 706 return serializeNode(pagenode) |
696 | 707 |
697 return "no text here" | 708 return "no text here" |
698 | 709 |
710 def getToc(self, mode="text", docinfo=None): | |
711 """loads table of contents and stores in docinfo""" | |
712 logging.debug("documentViewer (gettoc) mode: %s"%(mode)) | |
713 if 'tocSize_%s'%mode in docinfo: | |
714 # cached toc | |
715 return docinfo | |
716 | |
717 docpath = docinfo['textURLPath'] | |
718 # we need to set a result set size | |
719 pagesize = 1000 | |
720 pn = 1 | |
721 if mode == "text": | |
722 queryType = "toc" | |
723 else: | |
724 queryType = mode | |
725 # number of entries in toc | |
726 tocSize = 0 | |
727 tocDiv = None | |
728 pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) | |
729 # post-processing downloaded xml | |
730 pagedom = Parse(pagexml) | |
731 # get number of entries | |
732 numdivs = pagedom.xpath("//div[@class='queryResultHits']") | |
733 if len(numdivs) > 0: | |
734 tocSize = int(getTextFromNode(numdivs[0])) | |
735 # div contains text | |
736 #pagedivs = pagedom.xpath("//div[@class='queryResultPage']") | |
737 #if len(pagedivs) > 0: | |
738 # tocDiv = pagedivs[0] | |
739 | |
740 docinfo['tocSize_%s'%mode] = tocSize | |
741 #docinfo['tocDiv_%s'%mode] = tocDiv | |
742 return docinfo | |
743 | |
744 def getTocPage(self, mode="toc", pn=1, pageinfo=None, docinfo=None): | |
745 """returns single page from the table of contents""" | |
746 # TODO: this should use the cached TOC | |
747 if mode == "text": | |
748 queryType = "toc" | |
749 else: | |
750 queryType = mode | |
751 docpath = docinfo['textURLPath'] | |
752 pagesize = pageinfo['tocPageSize'] | |
753 pn = pageinfo['tocPN'] | |
754 pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) | |
755 # post-processing downloaded xml | |
756 pagedom = Parse(pagexml) | |
757 # div contains text | |
758 pagedivs = pagedom.xpath("//div[@class='queryResultPage']") | |
759 if len(pagedivs) > 0: | |
760 pagenode = pagedivs[0] | |
761 return serializeNode(pagenode) | |
762 else: | |
763 return "No TOC!" | |
764 | |
699 | 765 |
700 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None): | 766 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None): |
701 """init document viewer""" | 767 """init document viewer""" |
702 self.title=title | 768 self.title=title |
703 self.digilibBaseUrl = digilibBaseUrl | 769 self.digilibBaseUrl = digilibBaseUrl |