changeset 90:6a4a72033d58

new version with new full-text infrastructure and some more changed templates
author casties
date Thu, 08 Apr 2010 13:04:51 +0200
parents 3d95ba1bf535
children b8c491e52ebc
files documentViewer.py zpt/page_main_text.zpt zpt/page_main_text_dict.zpt zpt/thumbs_main.zpt
diffstat 4 files changed, 96 insertions(+), 58 deletions(-) [+]
line wrap: on
line diff
--- a/documentViewer.py	Fri Mar 19 12:42:40 2010 +0100
+++ b/documentViewer.py	Thu Apr 08 13:04:51 2010 +0200
@@ -87,10 +87,12 @@
 
     # templates and forms
     viewer_main = PageTemplateFile('zpt/viewer_main', globals())
-    thumbs_main = PageTemplateFile('zpt/thumbs_main', globals())
-    image_main = PageTemplateFile('zpt/image_main', globals()) # obsolete!
+    toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
+    toc_text = PageTemplateFile('zpt/toc_text', globals())
+    toc_figures = PageTemplateFile('zpt/toc_figures', globals())
     page_main_images = PageTemplateFile('zpt/page_main_images', globals())
     page_main_text = PageTemplateFile('zpt/page_main_text', globals())
+    page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
     head_main = PageTemplateFile('zpt/head_main', globals())
     docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
     info_xml = PageTemplateFile('zpt/info_xml', globals())
@@ -161,13 +163,13 @@
         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
   
     security.declareProtected('View','index_html')
-    def index_html(self,url,mode="texttool",viewMode="auto",start=None,pn=1,mk=None):
+    def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
         '''
         view it
         @param mode: defines how to access the document behind url 
         @param url: url which contains display information
-        @param viewMode: if images display images, if text display text, default is images (text,images or auto)
-        
+        @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
+        @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures)
         '''
         
         logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
@@ -181,28 +183,31 @@
             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
             
         docinfo = self.getDocinfo(mode=mode,url=url)
-        pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
-        pt = getattr(self.template, 'viewer_main')
-        
+        pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
+        if tocMode != "thumbs":
+            # get table of contents
+            docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
+
         if viewMode=="auto": # automodus gewaehlt
             if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert
                 viewMode="text"
             else:
                 viewMode="images"
-               
+                
+        pt = getattr(self.template, 'viewer_main')               
         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
   
     def generateMarks(self,mk):
         ret=""
-	if mk is None:
-		return ""
-	
-	if type(mk) is not ListType:
-		mk=[mk]
+        if mk is None:
+            return ""
+    	if type(mk) is not ListType:
+    		mk=[mk]
         for m in mk:
             ret+="mk=%s"%m
         return ret
 
+
     def findDigilibUrl(self):
         """try to get the digilib URL from zogilib"""
         url = self.template.zogilib.getDLBaseUrl()
@@ -343,7 +348,7 @@
                 dom = Parse(txt)
                 break
             except:
-                logger("ERROR documentViewer (getIndexMata)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
+                logger("ERROR documentViewer (getIndexMeta)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
                 
         if dom is None:
             raise IOError("Unable to read index meta from %s"%(url))
@@ -362,7 +367,6 @@
             # online path
             server=self.digilibBaseUrl+"/servlet/Texter?fn="
             metaUrl=server+url.replace("/mpiwg/online","")
-           
         
         for cnt in range(num_retries):
             try:
@@ -461,7 +465,6 @@
         logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url))
         if docinfo is None:
            docinfo = {}
-            
         if docinfo.get('lang', None) is None:
             docinfo['lang'] = '' # default keine Sprache gesetzt
         if dom is None:
@@ -553,9 +556,6 @@
         docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info
         
         return docinfo
-
-
-
    
    
     def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
@@ -618,14 +618,14 @@
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
         else:
             logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!")
-            raise ValueError("Unknown mode %s"%(mode))
+            raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                         
         logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo)
         self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo
         
         
-    def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None):
+    def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
         """returns pageinfo with the given parameters"""
         pageinfo = {}
         current = getInt(current)
@@ -640,12 +640,17 @@
         # int(current / grpsize) * grpsize +1))
         pageinfo['start'] = start
         pageinfo['end'] = start + grpsize
-        if docinfo is not None:
+        if (docinfo is not None) and ('numPages' in docinfo):
             np = int(docinfo['numPages'])
             pageinfo['end'] = min(pageinfo['end'], np)
             pageinfo['numgroups'] = int(np / grpsize)
             if np % grpsize > 0:
                 pageinfo['numgroups'] += 1
+                
+        pageinfo['viewMode'] = viewMode
+        pageinfo['tocMode'] = tocMode
+        pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '10')
+        pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
 
         return pageinfo
                 
@@ -661,7 +666,13 @@
        
     def getTextPage(self, mode="text", pn=1, docinfo=None):
         """returns single page from fulltext"""
-        pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docinfo['textURLPath'],mode,pn), outputUnicode=False)
+        docpath = docinfo['textURLPath']
+        if mode == "text_dict":
+            textmode = "textPollux"
+        else:
+            textmode = mode
+            
+        pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn), outputUnicode=False)
         # post-processing downloaded xml
         pagedom = Parse(pagexml)
         # plain text mode
@@ -673,7 +684,7 @@
                 return serializeNode(pagenode)
 
         # text-with-links mode
-        if mode == "textPollux":
+        if mode == "text_dict":
             # first div contains text
             pagedivs = pagedom.xpath("/div")
             if len(pagedivs) > 0:
@@ -696,6 +707,61 @@
         
         return "no text here"
 
+    def getToc(self, mode="text", docinfo=None):
+        """loads table of contents and stores in docinfo"""
+        logging.debug("documentViewer (gettoc) mode: %s"%(mode))
+        if 'tocSize_%s'%mode in docinfo:
+            # cached toc
+            return docinfo
+        
+        docpath = docinfo['textURLPath']
+        # we need to set a result set size
+        pagesize = 1000
+        pn = 1
+        if mode == "text":
+            queryType = "toc"
+        else:
+            queryType = mode
+        # number of entries in toc
+        tocSize = 0
+        tocDiv = None
+        pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
+        # post-processing downloaded xml
+        pagedom = Parse(pagexml)
+        # get number of entries
+        numdivs = pagedom.xpath("//div[@class='queryResultHits']")
+        if len(numdivs) > 0:
+            tocSize = int(getTextFromNode(numdivs[0]))
+            # div contains text
+            #pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
+            #if len(pagedivs) > 0:
+            #    tocDiv = pagedivs[0]
+
+        docinfo['tocSize_%s'%mode] = tocSize
+        #docinfo['tocDiv_%s'%mode] = tocDiv
+        return docinfo
+    
+    def getTocPage(self, mode="toc", pn=1, pageinfo=None, docinfo=None):
+        """returns single page from the table of contents"""
+        # TODO: this should use the cached TOC
+        if mode == "text":
+            queryType = "toc"
+        else:
+            queryType = mode
+        docpath = docinfo['textURLPath']
+        pagesize = pageinfo['tocPageSize']
+        pn = pageinfo['tocPN']
+        pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
+        # post-processing downloaded xml
+        pagedom = Parse(pagexml)
+        # div contains text
+        pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
+        if len(pagedivs) > 0:
+            pagenode = pagedivs[0]
+            return serializeNode(pagenode)
+        else:
+            return "No TOC!"
+
     
     def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None):
         """init document viewer"""
--- a/zpt/page_main_text.zpt	Fri Mar 19 12:42:40 2010 +0100
+++ b/zpt/page_main_text.zpt	Thu Apr 08 13:04:51 2010 +0200
@@ -1,3 +1,3 @@
-<tal:block tal:define="mode python:options.get('viewMode','text'); pageinfo python:options.get('pageinfo',''); docinfo python:options.get('docinfo','')">
-  <div tal:content="structure python:here.getTextPage(mode=mode,pn=pageinfo['current'],docinfo=docinfo)"></div>
+<tal:block tal:define="pageinfo python:options.get('pageinfo',None); docinfo python:options.get('docinfo',None)">
+  <div tal:content="structure python:here.getTextPage(mode='text',pn=pageinfo['current'],docinfo=docinfo)"></div>
 </tal:block>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/zpt/page_main_text_dict.zpt	Thu Apr 08 13:04:51 2010 +0200
@@ -0,0 +1,3 @@
+<tal:block tal:define="pageinfo python:options.get('pageinfo',None); docinfo python:options.get('docinfo',None)">
+  <div tal:content="structure python:here.getTextPage(mode='text_dict',pn=pageinfo['current'],docinfo=docinfo)"></div>
+</tal:block>
\ No newline at end of file
--- a/zpt/thumbs_main.zpt	Fri Mar 19 12:42:40 2010 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,31 +0,0 @@
-<div tal:define="docinfo options/docinfo; pageinfo options/pageinfo;  
-  start pageinfo/start; end pageinfo/end; rows pageinfo/rows; cols pageinfo/cols;
-  current pageinfo/current; grpsize pageinfo/groupsize">
-
-<div class="thumbruler">
-  <span tal:condition="python:(start>1)">
-    <a tal:attributes="href python:here.getLink(param='start',val=max(start-grpsize,1))">&lt;</a>
-  </span>
-  <select tal:attributes="onChange python:'location.href=\''+here.getLink(param='start',val=None)+'&start=\'+this.options[this.selectedIndex].value'">
-    <option tal:repeat="grp python:range(pageinfo['numgroups'])" 
-      tal:attributes="selected python:(start==grp*grpsize+1); value python:(grp*grpsize+1)"
-      tal:content="python:(grp*grpsize+1)"/>
-  </select>
-  <span tal:condition="python:(start+grpsize<int(docinfo['numPages']))">
-    <a tal:attributes="href python:here.getLink(param='start',val=start+grpsize)">&gt;</a>
-  </span>
-</div>
-<table>
-  <tr tal:repeat="row python:range(rows)">
-    <tal:block tal:repeat="idx python:range(start+row*cols,start+(row+1)*cols)">
-      <td align="center" tal:condition="python:(idx<=end)"
-		  	tal:attributes="class python:here.getStyle(idx,current,'thumb')">
-		<a tal:attributes="href python:here.getLink(param='pn',val=idx)">
-		  <img class="thumbimg" border="0" tal:attributes="src string:${docinfo/imageURL}&pn=$idx&dw=100&dh=100"/>
-		  <div class="thumbcap" tal:content="idx"/>
-		</a>
-	  </td>
-    </tal:block>
-  </tr>
-</table>
-</div>