Diff for /documentViewer/documentViewer.py between versions 1.47 and 1.90

version 1.47, 2010/04/27 12:58:31 version 1.90, 2010/08/24 09:38:45
Line 6  from AccessControl import ClassSecurityI Line 6  from AccessControl import ClassSecurityI
 from AccessControl import getSecurityManager  from AccessControl import getSecurityManager
 from Globals import package_home  from Globals import package_home
   
 from Ft.Xml.Domlette import NonvalidatingReader  
 from Ft.Xml.Domlette import PrettyPrint, Print  
 from Ft.Xml import EMPTY_NAMESPACE, Parse  from Ft.Xml import EMPTY_NAMESPACE, Parse
   import Ft.Xml.Domlette
 from xml.dom.minidom import parse, parseString  
   
   
   
 import Ft.Xml.XPath  
 import cStringIO  
 import xmlrpclib  
 import os.path  import os.path
 import sys  import sys
 import cgi  
 import urllib  import urllib
   import urllib2
 import logging  import logging
 import math  import math
   
 import urlparse   import urlparse 
 from types import *  import cStringIO
   
 def logger(txt,method,txt2):  def logger(txt,method,txt2):
     """logging"""      """logging"""
Line 53  def getTextFromNode(nodename): Line 43  def getTextFromNode(nodename):
 def serializeNode(node, encoding='utf-8'):  def serializeNode(node, encoding='utf-8'):
     """returns a string containing node as XML"""      """returns a string containing node as XML"""
     buf = cStringIO.StringIO()      buf = cStringIO.StringIO()
     Print(node, stream=buf, encoding=encoding)      Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
     s = buf.getvalue()      s = buf.getvalue()
     buf.close()      buf.close()
     return s      return s
Line 64  def getParentDir(path): Line 54  def getParentDir(path):
     return '/'.join(path.split('/')[0:-1])      return '/'.join(path.split('/')[0:-1])
                   
   
   def getHttpData(url, data=None, num_tries=3, timeout=10):
       """returns result from url+data HTTP request"""
       # we do GET (by appending data to url)
       if isinstance(data, str) or isinstance(data, unicode):
           # if data is string then append
           url = "%s?%s"%(url,data)
       elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
           # urlencode
           url = "%s?%s"%(url,urllib.urlencode(data))
       
       response = None
       errmsg = None
       for cnt in range(num_tries):
           try:
               logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
               if sys.version_info < (2, 6):
                   # set timeout on socket -- ugly :-(
 import socket  import socket
                   socket.setdefaulttimeout(float(timeout))
                   response = urllib2.urlopen(url)
               else:
                   response = urllib2.urlopen(url,timeout=float(timeout))
               # check result?
               break
           except urllib2.HTTPError, e:
               logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
               errmsg = str(e)
               # stop trying
               break
           except urllib2.URLError, e:
               logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
               errmsg = str(e)
               # stop trying
               #break
   
       if response is not None:
           data = response.read()
           response.close()
           return data
       
       raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
       #return None
   
 def urlopen(url,timeout=2):  
         """urlopen mit timeout"""  
         socket.setdefaulttimeout(timeout)  
         ret=urllib.urlopen(url)  
         socket.setdefaulttimeout(5)  
         return ret  
   
   
 ##  ##
Line 79  def urlopen(url,timeout=2): Line 104  def urlopen(url,timeout=2):
 ##  ##
 class documentViewer(Folder):  class documentViewer(Folder):
     """document viewer"""      """document viewer"""
     #textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?"  
       
     meta_type="Document viewer"      meta_type="Document viewer"
           
     security=ClassSecurityInfo()      security=ClassSecurityInfo()
Line 96  class documentViewer(Folder): Line 119  class documentViewer(Folder):
     page_main_images = PageTemplateFile('zpt/page_main_images', globals())      page_main_images = PageTemplateFile('zpt/page_main_images', globals())
     page_main_text = PageTemplateFile('zpt/page_main_text', globals())      page_main_text = PageTemplateFile('zpt/page_main_text', globals())
     page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())      page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
       page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
       page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
     head_main = PageTemplateFile('zpt/head_main', globals())      head_main = PageTemplateFile('zpt/head_main', globals())
     docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())      docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
     info_xml = PageTemplateFile('zpt/info_xml', globals())      info_xml = PageTemplateFile('zpt/info_xml', globals())
   
       
     thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())      thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
     security.declareProtected('View management screens','changeDocumentViewerForm')          security.declareProtected('View management screens','changeDocumentViewerForm')    
     changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())      changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
Line 119  class documentViewer(Folder): Line 145  class documentViewer(Folder):
         #self['template'] = templateFolder # Zope-2.12 style          #self['template'] = templateFolder # Zope-2.12 style
         self._setObject('template',templateFolder) # old style          self._setObject('template',templateFolder) # old style
         try:          try:
             from Products.XMLRpcTools.XMLRpcTools import XMLRpcServerProxy              import MpdlXmlTextServer
             xmlRpcClient = XMLRpcServerProxy(id='fulltextclient', serverUrl=textServerName, use_xmlrpc=False)              textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
             #templateFolder['fulltextclient'] = xmlRpcClient              #templateFolder['fulltextclient'] = xmlRpcClient
             templateFolder._setObject('fulltextclient',xmlRpcClient)              templateFolder._setObject('fulltextclient',textServer)
         except Exception, e:          except Exception, e:
             logging.error("Unable to create XMLRpcTools for fulltextclient: "+str(e))              logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
         try:          try:
             from Products.zogiLib.zogiLib import zogiLib              from Products.zogiLib.zogiLib import zogiLib
             zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")              zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
Line 134  class documentViewer(Folder): Line 160  class documentViewer(Folder):
             logging.error("Unable to create zogiLib for zogilib: "+str(e))              logging.error("Unable to create zogiLib for zogilib: "+str(e))
                   
   
       # proxy text server methods to fulltextclient
       def getTextPage(self, **args):
           """get page"""
           return self.template.fulltextclient.getTextPage(**args)
   
       def getQuery(self, **args):
           """get query"""
           return self.template.fulltextclient.getQuery(**args)
   
       def getSearch(self, **args):
           """get search"""
           return self.template.fulltextclient.getSearch(**args)
   
       def getNumPages(self, docinfo):
           """get numpages"""
           return self.template.fulltextclient.getNumPages(docinfo)
   
       def getTranslate(self, **args):
           """get translate"""
           return self.template.fulltextclient.getTranslate(**args)
   
       def getLemma(self, **args):
           """get lemma"""
           return self.template.fulltextclient.getLemma(**args)
   
       def getToc(self, **args):
           """get toc"""
           return self.template.fulltextclient.getToc(**args)
   
       def getTocPage(self, **args):
           """get tocpage"""
           return self.template.fulltextclient.getTocPage(**args)
   
       
     security.declareProtected('View','thumbs_rss')      security.declareProtected('View','thumbs_rss')
     def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):      def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
         '''          '''
Line 158  class documentViewer(Folder): Line 218  class documentViewer(Folder):
         pt = getattr(self.template, 'thumbs_main_rss')          pt = getattr(self.template, 'thumbs_main_rss')
                   
         if viewMode=="auto": # automodus gewaehlt          if viewMode=="auto": # automodus gewaehlt
             if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert              if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
                 viewMode="text"                  viewMode="text"
             else:              else:
                 viewMode="images"                  viewMode="images"
Line 172  class documentViewer(Folder): Line 232  class documentViewer(Folder):
         @param mode: defines how to access the document behind url           @param mode: defines how to access the document behind url 
         @param url: url which contains display information          @param url: url which contains display information
         @param viewMode: if images display images, if text display text, default is auto (text,images or auto)          @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
         @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, search)          @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
         @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph)          @param characterNormalization type of text display (reg, norm, none)
           @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
         '''          '''
                   
         logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
Line 184  class documentViewer(Folder): Line 245  class documentViewer(Folder):
             return "ERROR: template folder missing!"              return "ERROR: template folder missing!"
                           
         if not getattr(self, 'digilibBaseUrl', None):          if not getattr(self, 'digilibBaseUrl', None):
             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"              self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
                           
         docinfo = self.getDocinfo(mode=mode,url=url)          docinfo = self.getDocinfo(mode=mode,url=url)
                   
           
         if tocMode != "thumbs":          if tocMode != "thumbs":
             # get table of contents              # get table of contents
             docinfo = self.getToc(mode=tocMode, docinfo=docinfo)              docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
                           
         pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)  
           
         if viewMode=="auto": # automodus gewaehlt          if viewMode=="auto": # automodus gewaehlt
             if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert              if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
                 viewMode="text"                  viewMode="text_dict"
             else:              else:
                 viewMode="images"                  viewMode="images"
                                   
           pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
           
         pt = getattr(self.template, 'viewer_main')                         pt = getattr(self.template, 'viewer_main')               
         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))          return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
       
Line 208  class documentViewer(Folder): Line 268  class documentViewer(Folder):
         ret=""          ret=""
         if mk is None:          if mk is None:
             return ""              return ""
         if type(mk) is not ListType:          if not isinstance(mk, list):
             mk=[mk]              mk=[mk]
         for m in mk:          for m in mk:
             ret+="mk=%s"%m              ret+="mk=%s"%m
Line 220  class documentViewer(Folder): Line 280  class documentViewer(Folder):
         url = self.template.zogilib.getDLBaseUrl()          url = self.template.zogilib.getDLBaseUrl()
         return url          return url
           
       def getDocumentViewerURL(self):
           """returns the URL of this instance"""
           return self.absolute_url()
       
     def getStyle(self, idx, selected, style=""):      def getStyle(self, idx, selected, style=""):
         """returns a string with the given style and append 'sel' if path == selected."""          """returns a string with the given style and append 'sel' if path == selected."""
         #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))          #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
Line 243  class documentViewer(Folder): Line 307  class documentViewer(Folder):
                 params["url"] = getParentDir(params["url"])                  params["url"] = getParentDir(params["url"])
                                   
         # quote values and assemble into query string          # quote values and assemble into query string
         ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])          #ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
           ps = urllib.urlencode(params)
         url=self.REQUEST['URL1']+"?"+ps          url=self.REQUEST['URL1']+"?"+ps
         return url          return url
   
Line 258  class documentViewer(Folder): Line 323  class documentViewer(Folder):
                 params[param] = str(val)                  params[param] = str(val)
                                   
         # quote values and assemble into query string          # quote values and assemble into query string
         logging.info("XYXXXXX: %s"%repr(params.items()))          logging.debug("XYXXXXX: %s"%repr(params.items()))
         ps = "&amp;".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])          ps = "&amp;".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
         url=self.REQUEST['URL1']+"?"+ps          url=self.REQUEST['URL1']+"?"+ps
         return url          return url
Line 277  class documentViewer(Folder): Line 342  class documentViewer(Folder):
     def isAccessible(self, docinfo):      def isAccessible(self, docinfo):
         """returns if access to the resource is granted"""          """returns if access to the resource is granted"""
         access = docinfo.get('accessType', None)          access = docinfo.get('accessType', None)
         logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access)          logging.debug("documentViewer (accessOK) access type %s"%access)
         if access is not None and access == 'free':          if access is not None and access == 'free':
             logger("documentViewer (accessOK)", logging.INFO, "access is free")              logging.debug("documentViewer (accessOK) access is free")
             return True              return True
         elif access is None or access in self.authgroups:          elif access is None or access in self.authgroups:
             # only local access -- only logged in users              # only local access -- only logged in users
             user = getSecurityManager().getUser()              user = getSecurityManager().getUser()
               logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
             if user is not None:              if user is not None:
                 #print "user: ", user                  #print "user: ", user
                 return (user.getUserName() != "Anonymous User")                  return (user.getUserName() != "Anonymous User")
             else:              else:
                 return False                  return False
                   
         logger("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access)          logging.error("documentViewer (accessOK) unknown access type %s"%access)
         return False          return False
           
                                   
     def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):      def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
         """gibt param von dlInfo aus"""          """gibt param von dlInfo aus"""
         num_retries = 3  
         if docinfo is None:          if docinfo is None:
             docinfo = {}              docinfo = {}
                   
Line 306  class documentViewer(Folder): Line 371  class documentViewer(Folder):
                 
         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path          infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
           
         logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl))          logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
                   
         for cnt in range(num_retries):          txt = getHttpData(infoUrl)
             try:          if txt is None:
                 # dom = NonvalidatingReader.parseUri(imageUrl)  
                 txt=urllib.urlopen(infoUrl).read()  
                 dom = Parse(txt)  
                 break  
             except:  
                 logger("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt))  
         else:  
             raise IOError("Unable to get dir-info from %s"%(infoUrl))              raise IOError("Unable to get dir-info from %s"%(infoUrl))
                   
           dom = Parse(txt)
         sizes=dom.xpath("//dir/size")          sizes=dom.xpath("//dir/size")
         logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes)          logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
                   
         if sizes:          if sizes:
             docinfo['numPages'] = int(getTextFromNode(sizes[0]))              docinfo['numPages'] = int(getTextFromNode(sizes[0]))
Line 334  class documentViewer(Folder): Line 393  class documentViewer(Folder):
                           
     def getIndexMeta(self, url):      def getIndexMeta(self, url):
         """returns dom of index.meta document at url"""          """returns dom of index.meta document at url"""
         num_retries = 3  
         dom = None          dom = None
         metaUrl = None          metaUrl = None
         if url.startswith("http://"):          if url.startswith("http://"):
Line 346  class documentViewer(Folder): Line 404  class documentViewer(Folder):
             metaUrl=server+url.replace("/mpiwg/online","")              metaUrl=server+url.replace("/mpiwg/online","")
             if not metaUrl.endswith("index.meta"):              if not metaUrl.endswith("index.meta"):
                 metaUrl += "/index.meta"                  metaUrl += "/index.meta"
         logging.debug("METAURL: %s"%metaUrl)  
         for cnt in range(num_retries):  
             try:  
                 # patch dirk encoding fehler treten dann nicht mehr auf  
                 # dom = NonvalidatingReader.parseUri(metaUrl)  
                 txt=urllib.urlopen(metaUrl).read()  
                 dom = Parse(txt)  
                 break  
             except:  
                 logger("ERROR documentViewer (getIndexMeta)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])  
                                   
         if dom is None:          logging.debug("(getIndexMeta): METAURL: %s"%metaUrl)
           txt=getHttpData(metaUrl)
           if txt is None:
             raise IOError("Unable to read index meta from %s"%(url))              raise IOError("Unable to read index meta from %s"%(url))
                                     
           dom = Parse(txt)
         return dom          return dom
           
     def getPresentationInfoXML(self, url):      def getPresentationInfoXML(self, url):
         """returns dom of info.xml document at url"""          """returns dom of info.xml document at url"""
         num_retries = 3  
         dom = None          dom = None
         metaUrl = None          metaUrl = None
         if url.startswith("http://"):          if url.startswith("http://"):
Line 375  class documentViewer(Folder): Line 425  class documentViewer(Folder):
             server=self.digilibBaseUrl+"/servlet/Texter?fn="              server=self.digilibBaseUrl+"/servlet/Texter?fn="
             metaUrl=server+url.replace("/mpiwg/online","")              metaUrl=server+url.replace("/mpiwg/online","")
                   
         for cnt in range(num_retries):          txt=getHttpData(metaUrl)
             try:          if txt is None:
                 # patch dirk encoding fehler treten dann nicht mehr auf  
                 # dom = NonvalidatingReader.parseUri(metaUrl)  
                 txt=urllib.urlopen(metaUrl).read()  
                 dom = Parse(txt)  
                 break  
             except:  
                 logger("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])  
                   
         if dom is None:  
             raise IOError("Unable to read infoXMLfrom %s"%(url))              raise IOError("Unable to read infoXMLfrom %s"%(url))
                                     
           dom = Parse(txt)
         return dom          return dom
                                                   
                   
     def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):      def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
         """gets authorization info from the index.meta file at path or given by dom"""          """gets authorization info from the index.meta file at path or given by dom"""
         logger("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path))          logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
                   
         access = None          access = None
                   
Line 469  class documentViewer(Folder): Line 511  class documentViewer(Folder):
           
     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):      def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
         """parse texttool tag in index meta"""          """parse texttool tag in index meta"""
         logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url))          logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
         if docinfo is None:          if docinfo is None:
            docinfo = {}             docinfo = {}
         if docinfo.get('lang', None) is None:          if docinfo.get('lang', None) is None:
Line 484  class documentViewer(Folder): Line 526  class documentViewer(Folder):
         if archiveNames and (len(archiveNames) > 0):          if archiveNames and (len(archiveNames) > 0):
             archiveName = getTextFromNode(archiveNames[0])              archiveName = getTextFromNode(archiveNames[0])
         else:          else:
             logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/name missing in: %s" % (url))              logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
                   
         archivePaths = dom.xpath("//resource/archive-path")          archivePaths = dom.xpath("//resource/archive-path")
         if archivePaths and (len(archivePaths) > 0):          if archivePaths and (len(archivePaths) > 0):
Line 496  class documentViewer(Folder): Line 538  class documentViewer(Folder):
                 archivePath += "/" + archiveName                  archivePath += "/" + archiveName
         else:          else:
             # try to get archive-path from url              # try to get archive-path from url
             logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/archive-path missing in: %s" % (url))              logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
             if (not url.startswith('http')):              if (not url.startswith('http')):
                 archivePath = url.replace('index.meta', '')                  archivePath = url.replace('index.meta', '')
                                   
Line 530  class documentViewer(Folder): Line 572  class documentViewer(Folder):
             viewerUrl = getTextFromNode(viewerUrls[0])              viewerUrl = getTextFromNode(viewerUrls[0])
             docinfo['viewerURL'] = viewerUrl              docinfo['viewerURL'] = viewerUrl
                                         
           # old style text URL
         textUrls = dom.xpath("//texttool/text")          textUrls = dom.xpath("//texttool/text")
         if textUrls and (len(textUrls) > 0):          if textUrls and (len(textUrls) > 0):
             textUrl = getTextFromNode(textUrls[0])              textUrl = getTextFromNode(textUrls[0])
Line 541  class documentViewer(Folder): Line 584  class documentViewer(Folder):
                           
             docinfo['textURL'] = textUrl              docinfo['textURL'] = textUrl
           
           # new style text-url-path
         textUrls = dom.xpath("//texttool/text-url-path")          textUrls = dom.xpath("//texttool/text-url-path")
         if textUrls and (len(textUrls) > 0):          if textUrls and (len(textUrls) > 0):
             textUrl = getTextFromNode(textUrls[0])              textUrl = getTextFromNode(textUrls[0])
             docinfo['textURLPath'] = textUrl                 docinfo['textURLPath'] = textUrl   
               if not docinfo['imagePath']:
                   # text-only, no page images
                   docinfo = self.getNumPages(docinfo)
                     
         presentationUrls = dom.xpath("//texttool/presentation")          presentationUrls = dom.xpath("//texttool/presentation")
         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag          docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
Line 557  class documentViewer(Folder): Line 604  class documentViewer(Folder):
                 presentationUrl = url.replace('index.meta', presentationPath)                  presentationUrl = url.replace('index.meta', presentationPath)
             else:              else:
                 presentationUrl = url + "/" + presentationPath                  presentationUrl = url + "/" + presentationPath
             docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht                      
             docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)              docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
           
         docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info          docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info
Line 585  class documentViewer(Folder): Line 632  class documentViewer(Folder):
           
     def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):      def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
         """path ist the path to the images it assumes that the index.meta file is one level higher."""          """path ist the path to the images it assumes that the index.meta file is one level higher."""
         logger("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path))          logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
         if docinfo is None:          if docinfo is None:
             docinfo = {}              docinfo = {}
         path=path.replace("/mpiwg/online","")          path=path.replace("/mpiwg/online","")
Line 595  class documentViewer(Folder): Line 642  class documentViewer(Folder):
         pathorig=path          pathorig=path
         for x in range(cut):                 for x in range(cut):       
                 path=getParentDir(path)                  path=getParentDir(path)
         logging.error("PATH:"+path)          logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path          imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
         docinfo['imageURL'] = imageUrl          docinfo['imageURL'] = imageUrl
                   
Line 607  class documentViewer(Folder): Line 654  class documentViewer(Folder):
           
     def getDocinfo(self, mode, url):      def getDocinfo(self, mode, url):
         """returns docinfo depending on mode"""          """returns docinfo depending on mode"""
         logger("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url))          logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
         # look for cached docinfo in session          # look for cached docinfo in session
         if self.REQUEST.SESSION.has_key('docinfo'):          if self.REQUEST.SESSION.has_key('docinfo'):
             docinfo = self.REQUEST.SESSION['docinfo']              docinfo = self.REQUEST.SESSION['docinfo']
             # check if its still current              # check if its still current
             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:              if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
                 logger("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo)                  logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
                 return docinfo                  return docinfo
         # new docinfo          # new docinfo
         docinfo = {'mode': mode, 'url': url}          docinfo = {'mode': mode, 'url': url}
Line 624  class documentViewer(Folder): Line 671  class documentViewer(Folder):
         elif mode=="filepath":          elif mode=="filepath":
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)              docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
         else:          else:
             logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!")              logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))              raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                                                   
         logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo)          logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
         self.REQUEST.SESSION['docinfo'] = docinfo          self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo          return docinfo
                   
               def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None, characterNormalization=None):
     def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):  
         """returns pageinfo with the given parameters"""          """returns pageinfo with the given parameters"""
         pageinfo = {}          pageinfo = {}
         current = getInt(current)          current = getInt(current)
Line 653  class documentViewer(Folder): Line 699  class documentViewer(Folder):
             pageinfo['numgroups'] = int(np / grpsize)              pageinfo['numgroups'] = int(np / grpsize)
             if np % grpsize > 0:              if np % grpsize > 0:
                 pageinfo['numgroups'] += 1                  pageinfo['numgroups'] += 1
                   
         pageinfo['viewMode'] = viewMode          pageinfo['viewMode'] = viewMode
         pageinfo['tocMode'] = tocMode          pageinfo['tocMode'] = tocMode
           pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','')
         pageinfo['query'] = self.REQUEST.get('query',' ')          pageinfo['query'] = self.REQUEST.get('query',' ')
         pageinfo['queryType'] = self.REQUEST.get('queryType',' ')          pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
         pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')          pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
           pageinfo['textPN'] = self.REQUEST.get('textPN','1')
           pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
         pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')          pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
         pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '20')          pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
         pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')          pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
         #if 'tocSize_%s'%tocMode in docinfo:          toc = int (pageinfo['tocPN'])
             # cached toc          pageinfo['textPages'] =int (toc)
           # pageinfo['tocPN'] = min (int (docinfo['tocSize_%s'%tocMode])/int(pageinfo['tocPageSize']),int(pageinfo['tocPN']))  
                  
         pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')  
         pageinfo['sn'] =self.REQUEST.get('sn','1')  
   
         return pageinfo  
                                   
     def getSearch(self, pn=1, pageinfo=None,  docinfo=None, query=None, queryType=None):  
         """get search list"""  
         docpath = docinfo['textURLPath']   
         pagesize = pageinfo['queryPageSize']  
         pn = pageinfo['searchPN']  
         sn = pageinfo['sn']  
         query =pageinfo['query']  
         queryType =pageinfo['queryType']  
         viewMode=  pageinfo['viewMode']  
         tocMode = pageinfo['tocMode']  
         tocPN = pageinfo['tocPN']  
         pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn) ,outputUnicode=False)             
         pagedom = Parse(pagexml)  
         pagedivs = pagedom.xpath("//div[@class='queryResultPage']")  
           
         selfurl = self.absolute_url()  
           
         page = pagexml.replace('page-fragment.xql?document=/echo/la/Benedetti_1585.xml','%s?url=/mpiwg/online/permanent/library/163127KK&viewMode=%s&tocMode=%s&tocPN=%s&query=%s&queryType=%s'%(selfurl, viewMode, tocMode, tocPN, query, queryType))  
         text =page.replace('mode=text','mode=texttool')  
         href = text.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl)  
         lemma= href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl)  
         #logging.debug("documentViewer (gettoc) lemma: %s"%(lemma))  
           
         return lemma  
                          
       
         #if len(pagedivs) > 0:  
         #    pagenode = pagedom[0]  
         #    return serializeNode(pagenode)  
         #else:  
         #    return "xaxa"  
   
     def getNumPages(self,docinfo=None):  
         """get list of pages from fulltext and put in docinfo"""  
         xquery = '//pb'  
         text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))  
         # TODO: better processing of the page list. do we need the info somewhere else also?  
         docinfo['numPages'] = text.count("<pb ")  
         return docinfo  
                 
     def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None,):          if 'tocSize_%s'%tocMode in docinfo:
         """returns single page from fulltext"""              tocSize = int(docinfo['tocSize_%s'%tocMode])
         docpath = docinfo['textURLPath']              tocPageSize = int(pageinfo['tocPageSize'])
         if mode == "text_dict":  
             textmode = "textPollux"  
         else:  
             textmode = mode  
               
         #selfurl = self.absolute_url()    
         #viewMode=  pageinfo['viewMode']  
         #tocMode = pageinfo['tocMode']  
         #tocPN = pageinfo['tocPN']  
     
         pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn), outputUnicode=False)  
         #######  
         #page = pagexml.replace('page-fragment.xql?document=/echo/la/Benedetti_1585.xml','%s?url=/mpiwg/online/permanent/library/163127KK&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl, viewMode, tocMode, tocPN))  
         #text =page.replace('mode=text','mode=texttool')  
         #######  
         # post-processing downloaded xml  
         pagedom = Parse(pagexml)  
         # plain text mode  
         if mode == "text":  
             # first div contains text  
             pagedivs = pagedom.xpath("/div")  
             #queryResultPage  
             if len(pagedivs) > 0:  
                 pagenode = pagedivs[0]  
                 return serializeNode(pagenode)  
   
         # text-with-links mode  
         if mode == "text_dict":  
             # first div contains text  
             pagedivs = pagedom.xpath("/div")  
             if len(pagedivs) > 0:  
                 pagenode = pagedivs[0]  
                 # check all a-tags  
                 links = pagenode.xpath("//a")  
                 for l in links:  
                     hrefNode = l.getAttributeNodeNS(None, u"href")  
                     if hrefNode:  
                         # is link with href  
                         href = hrefNode.nodeValue  
                         if href.startswith('lt/lex.xql'):  
                             # is pollux link  
                             selfurl = self.absolute_url()  
                             # change href  
                             hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl)  
                             # add target  
                             l.setAttributeNS(None, 'target', '_blank')  
                               
                         if href.startswith('lt/lemma.xql'):      
                             selfurl = self.absolute_url()  
                             hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl)  
                             l.setAttributeNS(None, 'target', '_blank')  
                 return serializeNode(pagenode)  
           
         return "no text here"  
   
     def getTranslate(self, query=None, language=None):  
         """translate into another languages"""  
         pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","query=%s&language=%s"%(query,language),outputUnicode=False)  
         return pagexml  
       
     def getLemma(self, lemma=None, language=None):  
         """lemma"""  
         pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","lemma=%s&language=%s"%(lemma,language),outputUnicode=False)  
         return pagexml  
   
     def getQuery (self,  docinfo=None, pageinfo=None, query=None, queryType=None, pn=1):  
          """number of"""  
          docpath = docinfo['textURLPath']   
          pagesize = pageinfo['queryPageSize']  
          pn = pageinfo['searchPN']  
          query =pageinfo['query']  
          queryType =pageinfo['queryType']  
   
          tocSearch = 0  
          tocDiv = None  
          pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False)  
            
          pagedom = Parse(pagexml)  
          numdivs = pagedom.xpath("//div[@class='queryResultHits']")  
          tocSearch = int(getTextFromNode(numdivs[0]))  
          tc=int((tocSearch/20)+1)  
          logging.debug("documentViewer (gettoc) tc: %s"%(tc))  
          return tc  
   
     def getToc(self, mode="text", docinfo=None):  
         """loads table of contents and stores in docinfo"""  
         logging.debug("documentViewer (gettoc) mode: %s"%(mode))  
         if 'tocSize_%s'%mode in docinfo:  
             # cached toc              # cached toc
             return docinfo               if tocSize%tocPageSize>0:
         docpath = docinfo['textURLPath']                  tocPages=tocSize/tocPageSize+1
         # we need to set a result set size              else:
         pagesize = 1000                  tocPages=tocSize/tocPageSize
         pn = 1              pageinfo['tocPN'] = min (tocPages,toc)                    
         if mode == "text":          pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
             queryType = "toc"          pageinfo['sn'] =self.REQUEST.get('sn','')
         else:          return pageinfo
             queryType = mode  
         # number of entries in toc  
         tocSize = 0  
         tocDiv = None  
         pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)  
         # post-processing downloaded xml  
         pagedom = Parse(pagexml)  
         # get number of entries  
         numdivs = pagedom.xpath("//div[@class='queryResultHits']")  
         if len(numdivs) > 0:  
             tocSize = int(getTextFromNode(numdivs[0]))  
             # div contains text  
             #pagedivs = pagedom.xpath("//div[@class='queryResultPage']")  
             #if len(pagedivs) > 0:  
             #    tocDiv = pagedivs[0]  
   
         docinfo['tocSize_%s'%mode] = tocSize  
         #docinfo['tocDiv_%s'%mode] = tocDiv  
         return docinfo  
       
     def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None):  
         """returns single page from the table of contents"""  
         # TODO: this should use the cached TOC  
         if mode == "text":  
             queryType = "toc"  
         else:  
             queryType = mode  
         docpath = docinfo['textURLPath']  
         pagesize = pageinfo['tocPageSize']  
         pn = pageinfo['tocPN']  
           
         selfurl = self.absolute_url()    
         viewMode=  pageinfo['viewMode']  
         tocMode = pageinfo['tocMode']  
         tocPN = pageinfo['tocPN']  
       
         pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn), outputUnicode=False)  
         page = pagexml.replace('page-fragment.xql?document=/echo/la/Benedetti_1585.xml','%s?url=/mpiwg/online/permanent/library/163127KK&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl, viewMode, tocMode, tocPN))  
         text = page.replace('mode=image','mode=texttool')  
         return text  
         # post-processing downloaded xml  
         #pagedom = Parse(text)  
         # div contains text  
         #pagedivs = pagedom.xpath("//div[@class='queryResultPage']")  
         #if len(pagedivs) > 0:  
         #    pagenode = pagedivs[0]  
         #    return serializeNode(pagenode)  
         #else:  
         #    return "No TOC!"  
   
           
     def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):      def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
         """init document viewer"""          """init document viewer"""
Line 872  class documentViewer(Folder): Line 738  class documentViewer(Folder):
         if RESPONSE is not None:          if RESPONSE is not None:
             RESPONSE.redirect('manage_main')              RESPONSE.redirect('manage_main')
           
       
           
 def manage_AddDocumentViewerForm(self):  def manage_AddDocumentViewerForm(self):
     """add the viewer form"""      """add the viewer form"""
     pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)      pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
Line 887  def manage_AddDocumentViewer(self,id,ima Line 751  def manage_AddDocumentViewer(self,id,ima
     if RESPONSE is not None:      if RESPONSE is not None:
         RESPONSE.redirect('manage_main')          RESPONSE.redirect('manage_main')
   
   
 ##  
 ## DocumentViewerTemplate class  ## DocumentViewerTemplate class
 ##  
 class DocumentViewerTemplate(ZopePageTemplate):  class DocumentViewerTemplate(ZopePageTemplate):
     """Template for document viewer"""      """Template for document viewer"""
     meta_type="DocumentViewer Template"      meta_type="DocumentViewer Template"

Removed from v.1.47  
changed lines
  Added in v.1.90


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>