Mercurial > hg > documentViewer
changeset 90:6a4a72033d58
new version with new full-text infrastructure and some more changed templates
author | casties |
---|---|
date | Thu, 08 Apr 2010 13:04:51 +0200 |
parents | 3d95ba1bf535 |
children | b8c491e52ebc |
files | documentViewer.py zpt/page_main_text.zpt zpt/page_main_text_dict.zpt zpt/thumbs_main.zpt |
diffstat | 4 files changed, 96 insertions(+), 58 deletions(-) [+] |
line wrap: on
line diff
--- a/documentViewer.py Fri Mar 19 12:42:40 2010 +0100 +++ b/documentViewer.py Thu Apr 08 13:04:51 2010 +0200 @@ -87,10 +87,12 @@ # templates and forms viewer_main = PageTemplateFile('zpt/viewer_main', globals()) - thumbs_main = PageTemplateFile('zpt/thumbs_main', globals()) - image_main = PageTemplateFile('zpt/image_main', globals()) # obsolete! + toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals()) + toc_text = PageTemplateFile('zpt/toc_text', globals()) + toc_figures = PageTemplateFile('zpt/toc_figures', globals()) page_main_images = PageTemplateFile('zpt/page_main_images', globals()) page_main_text = PageTemplateFile('zpt/page_main_text', globals()) + page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) head_main = PageTemplateFile('zpt/head_main', globals()) docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) info_xml = PageTemplateFile('zpt/info_xml', globals()) @@ -161,13 +163,13 @@ return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) security.declareProtected('View','index_html') - def index_html(self,url,mode="texttool",viewMode="auto",start=None,pn=1,mk=None): + def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): ''' view it @param mode: defines how to access the document behind url @param url: url which contains display information - @param viewMode: if images display images, if text display text, default is images (text,images or auto) - + @param viewMode: if images display images, if text display text, default is auto (text,images or auto) + @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures) ''' logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) @@ -181,28 +183,31 @@ self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" docinfo = self.getDocinfo(mode=mode,url=url) - pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) - pt = getattr(self.template, 'viewer_main') - + pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) + if tocMode != "thumbs": + # get table of contents + docinfo = self.getToc(mode=tocMode, docinfo=docinfo) + if viewMode=="auto": # automodus gewaehlt if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert viewMode="text" else: viewMode="images" - + + pt = getattr(self.template, 'viewer_main') return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) def generateMarks(self,mk): ret="" - if mk is None: - return "" - - if type(mk) is not ListType: - mk=[mk] + if mk is None: + return "" + if type(mk) is not ListType: + mk=[mk] for m in mk: ret+="mk=%s"%m return ret + def findDigilibUrl(self): """try to get the digilib URL from zogilib""" url = self.template.zogilib.getDLBaseUrl() @@ -343,7 +348,7 @@ dom = Parse(txt) break except: - logger("ERROR documentViewer (getIndexMata)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2]) + logger("ERROR documentViewer (getIndexMeta)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2]) if dom is None: raise IOError("Unable to read index meta from %s"%(url)) @@ -362,7 +367,6 @@ # online path server=self.digilibBaseUrl+"/servlet/Texter?fn=" metaUrl=server+url.replace("/mpiwg/online","") - for cnt in range(num_retries): try: @@ -461,7 +465,6 @@ logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url)) if docinfo is None: docinfo = {} - if docinfo.get('lang', None) is None: docinfo['lang'] = '' # default keine Sprache gesetzt if dom is None: @@ -553,9 +556,6 @@ docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info return docinfo - - - def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): @@ -618,14 +618,14 @@ docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) else: logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!") - raise ValueError("Unknown mode %s"%(mode)) + raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo) self.REQUEST.SESSION['docinfo'] = docinfo return docinfo - def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None): + def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): """returns pageinfo with the given parameters""" pageinfo = {} current = getInt(current) @@ -640,12 +640,17 @@ # int(current / grpsize) * grpsize +1)) pageinfo['start'] = start pageinfo['end'] = start + grpsize - if docinfo is not None: + if (docinfo is not None) and ('numPages' in docinfo): np = int(docinfo['numPages']) pageinfo['end'] = min(pageinfo['end'], np) pageinfo['numgroups'] = int(np / grpsize) if np % grpsize > 0: pageinfo['numgroups'] += 1 + + pageinfo['viewMode'] = viewMode + pageinfo['tocMode'] = tocMode + pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '10') + pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') return pageinfo @@ -661,7 +666,13 @@ def getTextPage(self, mode="text", pn=1, docinfo=None): """returns single page from fulltext""" - pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docinfo['textURLPath'],mode,pn), outputUnicode=False) + docpath = docinfo['textURLPath'] + if mode == "text_dict": + textmode = "textPollux" + else: + textmode = mode + + pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn), outputUnicode=False) # post-processing downloaded xml pagedom = Parse(pagexml) # plain text mode @@ -673,7 +684,7 @@ return serializeNode(pagenode) # text-with-links mode - if mode == "textPollux": + if mode == "text_dict": # first div contains text pagedivs = pagedom.xpath("/div") if len(pagedivs) > 0: @@ -696,6 +707,61 @@ return "no text here" + def getToc(self, mode="text", docinfo=None): + """loads table of contents and stores in docinfo""" + logging.debug("documentViewer (gettoc) mode: %s"%(mode)) + if 'tocSize_%s'%mode in docinfo: + # cached toc + return docinfo + + docpath = docinfo['textURLPath'] + # we need to set a result set size + pagesize = 1000 + pn = 1 + if mode == "text": + queryType = "toc" + else: + queryType = mode + # number of entries in toc + tocSize = 0 + tocDiv = None + pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) + # post-processing downloaded xml + pagedom = Parse(pagexml) + # get number of entries + numdivs = pagedom.xpath("//div[@class='queryResultHits']") + if len(numdivs) > 0: + tocSize = int(getTextFromNode(numdivs[0])) + # div contains text + #pagedivs = pagedom.xpath("//div[@class='queryResultPage']") + #if len(pagedivs) > 0: + # tocDiv = pagedivs[0] + + docinfo['tocSize_%s'%mode] = tocSize + #docinfo['tocDiv_%s'%mode] = tocDiv + return docinfo + + def getTocPage(self, mode="toc", pn=1, pageinfo=None, docinfo=None): + """returns single page from the table of contents""" + # TODO: this should use the cached TOC + if mode == "text": + queryType = "toc" + else: + queryType = mode + docpath = docinfo['textURLPath'] + pagesize = pageinfo['tocPageSize'] + pn = pageinfo['tocPN'] + pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) + # post-processing downloaded xml + pagedom = Parse(pagexml) + # div contains text + pagedivs = pagedom.xpath("//div[@class='queryResultPage']") + if len(pagedivs) > 0: + pagenode = pagedivs[0] + return serializeNode(pagenode) + else: + return "No TOC!" + def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None): """init document viewer"""
--- a/zpt/page_main_text.zpt Fri Mar 19 12:42:40 2010 +0100 +++ b/zpt/page_main_text.zpt Thu Apr 08 13:04:51 2010 +0200 @@ -1,3 +1,3 @@ -<tal:block tal:define="mode python:options.get('viewMode','text'); pageinfo python:options.get('pageinfo',''); docinfo python:options.get('docinfo','')"> - <div tal:content="structure python:here.getTextPage(mode=mode,pn=pageinfo['current'],docinfo=docinfo)"></div> +<tal:block tal:define="pageinfo python:options.get('pageinfo',None); docinfo python:options.get('docinfo',None)"> + <div tal:content="structure python:here.getTextPage(mode='text',pn=pageinfo['current'],docinfo=docinfo)"></div> </tal:block> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/zpt/page_main_text_dict.zpt Thu Apr 08 13:04:51 2010 +0200 @@ -0,0 +1,3 @@ +<tal:block tal:define="pageinfo python:options.get('pageinfo',None); docinfo python:options.get('docinfo',None)"> + <div tal:content="structure python:here.getTextPage(mode='text_dict',pn=pageinfo['current'],docinfo=docinfo)"></div> +</tal:block> \ No newline at end of file
--- a/zpt/thumbs_main.zpt Fri Mar 19 12:42:40 2010 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ -<div tal:define="docinfo options/docinfo; pageinfo options/pageinfo; - start pageinfo/start; end pageinfo/end; rows pageinfo/rows; cols pageinfo/cols; - current pageinfo/current; grpsize pageinfo/groupsize"> - -<div class="thumbruler"> - <span tal:condition="python:(start>1)"> - <a tal:attributes="href python:here.getLink(param='start',val=max(start-grpsize,1))"><</a> - </span> - <select tal:attributes="onChange python:'location.href=\''+here.getLink(param='start',val=None)+'&start=\'+this.options[this.selectedIndex].value'"> - <option tal:repeat="grp python:range(pageinfo['numgroups'])" - tal:attributes="selected python:(start==grp*grpsize+1); value python:(grp*grpsize+1)" - tal:content="python:(grp*grpsize+1)"/> - </select> - <span tal:condition="python:(start+grpsize<int(docinfo['numPages']))"> - <a tal:attributes="href python:here.getLink(param='start',val=start+grpsize)">></a> - </span> -</div> -<table> - <tr tal:repeat="row python:range(rows)"> - <tal:block tal:repeat="idx python:range(start+row*cols,start+(row+1)*cols)"> - <td align="center" tal:condition="python:(idx<=end)" - tal:attributes="class python:here.getStyle(idx,current,'thumb')"> - <a tal:attributes="href python:here.getLink(param='pn',val=idx)"> - <img class="thumbimg" border="0" tal:attributes="src string:${docinfo/imageURL}&pn=$idx&dw=100&dh=100"/> - <div class="thumbcap" tal:content="idx"/> - </a> - </td> - </tal:block> - </tr> -</table> -</div>