Diff for /documentViewer/documentViewer.py between versions 1.19 and 1.44

version 1.19, 2006/09/11 14:43:09 version 1.44, 2010/04/08 11:04:51
Line 1 Line 1
   
   
 from OFS.Folder import Folder  from OFS.Folder import Folder
 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate  from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
 from Products.PageTemplates.PageTemplateFile import PageTemplateFile  from Products.PageTemplates.PageTemplateFile import PageTemplateFile
Line 11  from Ft.Xml.Domlette import Nonvalidatin Line 10  from Ft.Xml.Domlette import Nonvalidatin
 from Ft.Xml.Domlette import PrettyPrint, Print  from Ft.Xml.Domlette import PrettyPrint, Print
 from Ft.Xml import EMPTY_NAMESPACE, Parse  from Ft.Xml import EMPTY_NAMESPACE, Parse
   
 import Ft.Xml.XPath  
   
   import Ft.Xml.XPath
   import cStringIO
   import xmlrpclib
 import os.path  import os.path
 import sys  import sys
 import cgi  import cgi
 import urllib  import urllib
 import zLOG  import logging
   import math
   
 import urlparse   import urlparse 
   from types import *
   
   def logger(txt,method,txt2):
       """logging"""
       logging.info(txt+ txt2)
       
   
 def getInt(number, default=0):  def getInt(number, default=0):
     """returns always an int (0 in case of problems)"""      """returns always an int (0 in case of problems)"""
     try:      try:
         return int(number)          return int(number)
     except:      except:
         return default          return int(default)
   
 def getTextFromNode(nodename):  def getTextFromNode(nodename):
     """get the cdata content of a node"""      """get the cdata content of a node"""
Line 38  def getTextFromNode(nodename): Line 47  def getTextFromNode(nodename):
            rc = rc + node.data             rc = rc + node.data
     return rc      return rc
   
   def serializeNode(node, encoding='utf-8'):
       """returns a string containing node as XML"""
       buf = cStringIO.StringIO()
       Print(node, stream=buf, encoding=encoding)
       s = buf.getvalue()
       buf.close()
       return s
   
                   
 def getParentDir(path):  def getParentDir(path):
     """returns pathname shortened by one"""      """returns pathname shortened by one"""
Line 59  def urlopen(url,timeout=2): Line 76  def urlopen(url,timeout=2):
 ##  ##
 class documentViewer(Folder):  class documentViewer(Folder):
     """document viewer"""      """document viewer"""
     textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?"      #textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?"
           
     meta_type="Document viewer"      meta_type="Document viewer"
           
Line 70  class documentViewer(Folder): Line 87  class documentViewer(Folder):
   
     # templates and forms      # templates and forms
     viewer_main = PageTemplateFile('zpt/viewer_main', globals())      viewer_main = PageTemplateFile('zpt/viewer_main', globals())
     thumbs_main = PageTemplateFile('zpt/thumbs_main', globals())      toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
     image_main = PageTemplateFile('zpt/image_main', globals())      toc_text = PageTemplateFile('zpt/toc_text', globals())
       toc_figures = PageTemplateFile('zpt/toc_figures', globals())
       page_main_images = PageTemplateFile('zpt/page_main_images', globals())
       page_main_text = PageTemplateFile('zpt/page_main_text', globals())
       page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
     head_main = PageTemplateFile('zpt/head_main', globals())      head_main = PageTemplateFile('zpt/head_main', globals())
     docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())      docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
       info_xml = PageTemplateFile('zpt/info_xml', globals())
   
       thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
     security.declareProtected('View management screens','changeDocumentViewerForm')          security.declareProtected('View management screens','changeDocumentViewerForm')    
     changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())      changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
   
           
     def __init__(self,id,imageViewerUrl,textViewerUrl=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"):      def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"):
         """init document viewer"""          """init document viewer"""
         self.id=id          self.id=id
         self.title=title          self.title=title
         self.imageViewerUrl=imageViewerUrl  
         self.textViewerUrl=textViewerUrl  
           
         if not digilibBaseUrl:  
             self.digilibBaseUrl = self.findDigilibUrl()  
         else:  
             self.digilibBaseUrl = digilibBaseUrl  
         self.thumbcols = thumbcols          self.thumbcols = thumbcols
         self.thumbrows = thumbrows          self.thumbrows = thumbrows
         # authgroups is list of authorized groups (delimited by ,)          # authgroups is list of authorized groups (delimited by ,)
         self.authgroups = [s.strip().lower() for s in authgroups.split(',')]          self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
         # add template folder so we can always use template.something          # create template folder so we can always use template.something
         self.manage_addFolder('template')  
   
           templateFolder = Folder('template')
           #self['template'] = templateFolder # Zope-2.12 style
           self._setObject('template',templateFolder) # old style
           try:
               from Products.XMLRpcTools.XMLRpcTools import XMLRpcServerProxy
               xmlRpcClient = XMLRpcServerProxy(id='fulltextclient', serverUrl=textServerName, use_xmlrpc=False)
               #templateFolder['fulltextclient'] = xmlRpcClient
               templateFolder._setObject('fulltextclient',xmlRpcClient)
           except Exception, e:
               logging.error("Unable to create XMLRpcTools for fulltextclient: "+str(e))
           try:
               from Products.zogiLib.zogiLib import zogiLib
               zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
               #templateFolder['zogilib'] = zogilib
               templateFolder._setObject('zogilib',zogilib)
           except Exception, e:
               logging.error("Unable to create zogiLib for zogilib: "+str(e))
   
     security.declareProtected('View','index_html')  
     def index_html(self,mode,url,viewMode="images",start=None,pn=1):      security.declareProtected('View','thumbs_rss')
       def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
         '''          '''
         view it          view it
         @param mode: defines which type of document is behind url          @param mode: defines how to access the document behind url 
         @param url: url which contains display information          @param url: url which contains display information
         @param viewMode: if images display images, if text display text, default is images          @param viewMode: if images display images, if text display text, default is images (text,images or auto)
                   
         '''          '''
                   logging.debug("HHHHHHHHHHHHHH:load the rss")
         zLOG.LOG("documentViewer (index)", zLOG.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
                   
         if not hasattr(self, 'template'):          if not hasattr(self, 'template'):
             # create template folder if it doesn't exist              # create template folder if it doesn't exist
Line 119  class documentViewer(Folder): Line 152  class documentViewer(Folder):
                           
         docinfo = self.getDocinfo(mode=mode,url=url)          docinfo = self.getDocinfo(mode=mode,url=url)
         pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)          pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
         pt = getattr(self.template, 'viewer_main')          pt = getattr(self.template, 'thumbs_main_rss')
           
           if viewMode=="auto": # automodus gewaehlt
               if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert
                   viewMode="text"
               else:
                   viewMode="images"
                  
         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)          return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
       
       security.declareProtected('View','index_html')
       def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
           '''
           view it
           @param mode: defines how to access the document behind url 
           @param url: url which contains display information
           @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
           @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures)
           '''
           
           logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
           
           if not hasattr(self, 'template'):
               # this won't work
               logging.error("template folder missing!")
               return "ERROR: template folder missing!"
               
           if not getattr(self, 'digilibBaseUrl', None):
               self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
               
           docinfo = self.getDocinfo(mode=mode,url=url)
           pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
           if tocMode != "thumbs":
               # get table of contents
               docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
   
           if viewMode=="auto": # automodus gewaehlt
               if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert
                   viewMode="text"
               else:
                   viewMode="images"
                   
           pt = getattr(self.template, 'viewer_main')               
           return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
     
       def generateMarks(self,mk):
           ret=""
           if mk is None:
               return ""
           if type(mk) is not ListType:
               mk=[mk]
           for m in mk:
               ret+="mk=%s"%m
           return ret
   
   
       def findDigilibUrl(self):
           """try to get the digilib URL from zogilib"""
           url = self.template.zogilib.getDLBaseUrl()
           return url
       
       def getStyle(self, idx, selected, style=""):
           """returns a string with the given style and append 'sel' if path == selected."""
           #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
           if idx == selected:
               return style + 'sel'
           else:
               return style
       
     def getLink(self,param=None,val=None):      def getLink(self,param=None,val=None):
         """link to documentviewer with parameter param set to val"""          """link to documentviewer with parameter param set to val"""
Line 133  class documentViewer(Folder): Line 231  class documentViewer(Folder):
             else:              else:
                 params[param] = str(val)                  params[param] = str(val)
                                   
           if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
                   params["mode"] = "imagepath"
                   params["url"] = getParentDir(params["url"])
                   
         # quote values and assemble into query string          # quote values and assemble into query string
         ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])          ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
         url=self.REQUEST['URL1']+"?"+ps          url=self.REQUEST['URL1']+"?"+ps
         return url          return url
   
           def getLinkAmp(self,param=None,val=None):
     def getStyle(self, idx, selected, style=""):          """link to documentviewer with parameter param set to val"""
         """returns a string with the given style and append 'sel' if path == selected."""          params=self.REQUEST.form.copy()
         #zLOG.LOG("documentViewer (getstyle)", zLOG.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))          if param is not None:
         if idx == selected:              if val is None:
             return style + 'sel'                  if params.has_key(param):
                       del params[param]
         else:          else:
             return style                  params[param] = str(val)
                   
           # quote values and assemble into query string
           logging.info("XYXXXXX: %s"%repr(params.items()))
           ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
           url=self.REQUEST['URL1']+"?"+ps
           return url
       
       def getInfo_xml(self,url,mode):
           """returns info about the document as XML"""
   
           if not self.digilibBaseUrl:
               self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
           
           docinfo = self.getDocinfo(mode=mode,url=url)
           pt = getattr(self.template, 'info_xml')
           return pt(docinfo=docinfo)
                   
                   
     def isAccessible(self, docinfo):      def isAccessible(self, docinfo):
         """returns if access to the resource is granted"""          """returns if access to the resource is granted"""
         access = docinfo.get('accessType', None)          access = docinfo.get('accessType', None)
         zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "access type %s"%access)          logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access)
         if access is not None and access == 'free':          if access is not None and access == 'free':
             zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "access is free")              logger("documentViewer (accessOK)", logging.INFO, "access is free")
             return True              return True
         elif access is None or access in self.authgroups:          elif access is None or access in self.authgroups:
             # only local access -- only logged in users              # only local access -- only logged in users
Line 164  class documentViewer(Folder): Line 283  class documentViewer(Folder):
             else:              else:
                 return False                  return False
                   
         zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "unknown access type %s"%access)          logger("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access)
         return False          return False
           
                                   
     def getDirinfoFromDigilib(self,path,docinfo=None):      def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
         """gibt param von dlInfo aus"""          """gibt param von dlInfo aus"""
         num_retries = 3          num_retries = 3
         if docinfo is None:          if docinfo is None:
             docinfo = {}              docinfo = {}
                           
           for x in range(cut):
                  
                   path=getParentDir(path)
          
         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path          infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
           
         zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo from %s"%(infoUrl))          logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl))
                   
         for cnt in range(num_retries):          for cnt in range(num_retries):
             try:              try:
Line 185  class documentViewer(Folder): Line 308  class documentViewer(Folder):
                 dom = Parse(txt)                  dom = Parse(txt)
                 break                  break
             except:              except:
                 zLOG.LOG("documentViewer (getdirinfofromdigilib)", zLOG.ERROR, "error reading %s (try %d)"%(infoUrl,cnt))                  logger("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt))
         else:          else:
             raise IOError("Unable to get dir-info from %s"%(infoUrl))              raise IOError("Unable to get dir-info from %s"%(infoUrl))
                   
         sizes=dom.xpath("//dir/size")          sizes=dom.xpath("//dir/size")
         zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo:size"%sizes)          logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes)
                   
         if sizes:          if sizes:
             docinfo['numPages'] = int(getTextFromNode(sizes[0]))              docinfo['numPages'] = int(getTextFromNode(sizes[0]))
         else:          else:
             docinfo['numPages'] = 0              docinfo['numPages'] = 0
                                                   
           # TODO: produce and keep list of image names and numbers
                           
         return docinfo          return docinfo
           
                           
Line 214  class documentViewer(Folder): Line 339  class documentViewer(Folder):
             metaUrl=server+url.replace("/mpiwg/online","")              metaUrl=server+url.replace("/mpiwg/online","")
             if not metaUrl.endswith("index.meta"):              if not metaUrl.endswith("index.meta"):
                 metaUrl += "/index.meta"                  metaUrl += "/index.meta"
         print metaUrl          logging.debug("METAURL: %s"%metaUrl)
         for cnt in range(num_retries):          for cnt in range(num_retries):
             try:              try:
                 # patch dirk encoding fehler treten dann nicht mehr auf                  # patch dirk encoding fehler treten dann nicht mehr auf
Line 223  class documentViewer(Folder): Line 348  class documentViewer(Folder):
                 dom = Parse(txt)                  dom = Parse(txt)
                 break                  break
             except:              except:
                 zLOG.LOG("ERROR documentViewer (getIndexMata)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2])                  logger("ERROR documentViewer (getIndexMeta)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
                                   
         if dom is None:          if dom is None:
             raise IOError("Unable to read index meta from %s"%(url))              raise IOError("Unable to read index meta from %s"%(url))
                                     
         return dom          return dom
                                                   
       def getPresentationInfoXML(self, url):
           """returns dom of info.xml document at url"""
           num_retries = 3
           dom = None
           metaUrl = None
           if url.startswith("http://"):
               # real URL
               metaUrl = url
           else:
               # online path
               server=self.digilibBaseUrl+"/servlet/Texter?fn="
               metaUrl=server+url.replace("/mpiwg/online","")
           
           for cnt in range(num_retries):
               try:
                   # patch dirk encoding fehler treten dann nicht mehr auf
                   # dom = NonvalidatingReader.parseUri(metaUrl)
                   txt=urllib.urlopen(metaUrl).read()
                   dom = Parse(txt)
                   break
               except:
                   logger("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
                   
           if dom is None:
               raise IOError("Unable to read infoXMLfrom %s"%(url))
                    
           return dom
                           
                   
     def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None):      def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
         """gets authorization info from the index.meta file at path or given by dom"""          """gets authorization info from the index.meta file at path or given by dom"""
         zLOG.LOG("documentViewer (getauthinfofromindexmeta)", zLOG.INFO,"path: %s"%(path))          logger("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path))
                   
         access = None          access = None
                   
Line 241  class documentViewer(Folder): Line 394  class documentViewer(Folder):
             docinfo = {}              docinfo = {}
                           
         if dom is None:          if dom is None:
             dom = self.getIndexMeta(getParentDir(path))              for x in range(cut):
                   path=getParentDir(path)
               dom = self.getIndexMeta(path)
                 
         acctype = dom.xpath("//access-conditions/access/@type")          acctype = dom.xpath("//access-conditions/access/@type")
         if acctype and (len(acctype)>0):          if acctype and (len(acctype)>0):
Line 253  class documentViewer(Folder): Line 408  class documentViewer(Folder):
         return docinfo          return docinfo
           
                   
     def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None):      def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
         """gets bibliographical info from the index.meta file at path or given by dom"""          """gets bibliographical info from the index.meta file at path or given by dom"""
         zLOG.LOG("documentViewer (getbibinfofromindexmeta)", zLOG.INFO,"path: %s"%(path))          logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
                   
         if docinfo is None:          if docinfo is None:
             docinfo = {}              docinfo = {}
                           
         if dom is None:          if dom is None:
             dom = self.getIndexMeta(getParentDir(path))              for x in range(cut):
                   path=getParentDir(path)
               dom = self.getIndexMeta(path)
           
           logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
           # put in all raw bib fields as dict "bib"
           bib = dom.xpath("//bib/*")
           if bib and len(bib)>0:
               bibinfo = {}
               for e in bib:
                   bibinfo[e.localName] = getTextFromNode(e)
               docinfo['bib'] = bibinfo
                           
           # extract some fields (author, title, year) according to their mapping
         metaData=self.metadata.main.meta.bib          metaData=self.metadata.main.meta.bib
         bibtype=dom.xpath("//bib/@type")          bibtype=dom.xpath("//bib/@type")
         if bibtype and (len(bibtype)>0):          if bibtype and (len(bibtype)>0):
             bibtype=bibtype[0].value              bibtype=bibtype[0].value
         else:          else:
             bibtype="generic"              bibtype="generic"
               
         bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)          bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
           docinfo['bib_type'] = bibtype
         bibmap=metaData.generateMappingForType(bibtype)          bibmap=metaData.generateMappingForType(bibtype)
         #print "bibmap: ", bibmap, " for: ", bibtype  
         # if there is no mapping bibmap is empty (mapping sometimes has empty fields)          # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
         if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:          if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
               try:
             docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])              docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
               except: pass
               try:
             docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])              docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
               except: pass
               try:
             docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])              docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
               except: pass
               logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
               try:
                   docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
               except:
                   docinfo['lang']=''
                   
         return docinfo          return docinfo
   
                   
     def getDocinfoFromTextTool(self,url,dom=None,docinfo=None):      def getDocinfoFromTextTool(self,url,dom=None,docinfo=None):
        """parse texttool tag in index meta"""         """parse texttool tag in index meta"""
        zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.INFO,"url: %s"%(url))          logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url))
        if docinfo is None:         if docinfo is None:
            docinfo = {}             docinfo = {}
                      if docinfo.get('lang', None) is None:
               docinfo['lang'] = '' # default keine Sprache gesetzt
        if dom is None:         if dom is None:
            dom = self.getIndexMeta(url)             dom = self.getIndexMeta(url)
                 
Line 297  class documentViewer(Folder): Line 477  class documentViewer(Folder):
        if archiveNames and (len(archiveNames)>0):         if archiveNames and (len(archiveNames)>0):
            archiveName=getTextFromNode(archiveNames[0])             archiveName=getTextFromNode(archiveNames[0])
        else:         else:
            zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.WARNING,"resource/name missing in: %s"%(url))              logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/name missing in: %s" % (url))
                 
        archivePaths=dom.xpath("//resource/archive-path")         archivePaths=dom.xpath("//resource/archive-path")
        if archivePaths and (len(archivePaths)>0):         if archivePaths and (len(archivePaths)>0):
Line 309  class documentViewer(Folder): Line 489  class documentViewer(Folder):
                archivePath += "/" + archiveName                 archivePath += "/" + archiveName
        else:         else:
            # try to get archive-path from url             # try to get archive-path from url
            zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.WARNING,"resource/archive-path missing in: %s"%(url))              logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/archive-path missing in: %s" % (url))
            if (not url.startswith('http')):             if (not url.startswith('http')):
                archivePath = url.replace('index.meta', '')                 archivePath = url.replace('index.meta', '')
                                 
Line 320  class documentViewer(Folder): Line 500  class documentViewer(Folder):
        imageDirs=dom.xpath("//texttool/image")         imageDirs=dom.xpath("//texttool/image")
        if imageDirs and (len(imageDirs)>0):         if imageDirs and (len(imageDirs)>0):
            imageDir=getTextFromNode(imageDirs[0])             imageDir=getTextFromNode(imageDirs[0])
               
        else:         else:
            # we balk with no image tag              # we balk with no image tag / not necessary anymore because textmode is now standard
            raise IOError("No text-tool info in %s"%(url))              #raise IOError("No text-tool info in %s"%(url))
               imageDir = ""
               #xquery="//pb"  
               docinfo['imagePath'] = "" # keine Bilder
               docinfo['imageURL'] = ""
                         
        if imageDir and archivePath:         if imageDir and archivePath:
            #print "image: ", imageDir, " archivepath: ", archivePath             #print "image: ", imageDir, " archivepath: ", archivePath
Line 330  class documentViewer(Folder): Line 515  class documentViewer(Folder):
            imageDir=imageDir.replace("/mpiwg/online",'')             imageDir=imageDir.replace("/mpiwg/online",'')
            docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo)             docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo)
            docinfo['imagePath'] = imageDir             docinfo['imagePath'] = imageDir
               
            docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir             docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir
                         
        viewerUrls=dom.xpath("//texttool/digiliburlprefix")         viewerUrls=dom.xpath("//texttool/digiliburlprefix")
Line 342  class documentViewer(Folder): Line 528  class documentViewer(Folder):
            textUrl=getTextFromNode(textUrls[0])             textUrl=getTextFromNode(textUrls[0])
            if urlparse.urlparse(textUrl)[0]=="": #keine url             if urlparse.urlparse(textUrl)[0]=="": #keine url
                textUrl=os.path.join(archivePath,textUrl)                  textUrl=os.path.join(archivePath,textUrl) 
               # fix URLs starting with /mpiwg/online
               if textUrl.startswith("/mpiwg/online"):
                   textUrl = textUrl.replace("/mpiwg/online", '', 1)
   
            docinfo['textURL'] = textUrl             docinfo['textURL'] = textUrl
                                             
        docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)          textUrls = dom.xpath("//texttool/text-url-path")
        docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)          if textUrls and (len(textUrls) > 0):
               textUrl = getTextFromNode(textUrls[0])
               docinfo['textURLPath'] = textUrl   
            
           presentationUrls = dom.xpath("//texttool/presentation")
           docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
           
           if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen 
                # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
                # durch den relativen Pfad auf die presentation infos
               presentationPath = getTextFromNode(presentationUrls[0])
               if url.endswith("index.meta"): 
                   presentationUrl = url.replace('index.meta', presentationPath)
               else:
                   presentationUrl = url + "/" + presentationPath
               docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht    
               docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
       
           docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info
           
        return docinfo         return docinfo
         
   
     def getDocinfoFromImagePath(self,path,docinfo=None):      def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
           """gets the bibliographical information from the preseantion entry in texttools
           """
           dom=self.getPresentationInfoXML(url)
           try:
               docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
           except:
               pass
           try:
               docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
           except:
               pass
           try:
               docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
           except:
               pass
           return docinfo
       
       def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
         """path ist the path to the images it assumes that the index.meta file is one level higher."""          """path ist the path to the images it assumes that the index.meta file is one level higher."""
         zLOG.LOG("documentViewer (getdocinfofromimagepath)", zLOG.INFO,"path: %s"%(path))          logger("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path))
         if docinfo is None:          if docinfo is None:
             docinfo = {}              docinfo = {}
         path=path.replace("/mpiwg/online","")          path=path.replace("/mpiwg/online","")
         docinfo['imagePath'] = path          docinfo['imagePath'] = path
         docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo)          docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
           
           pathorig=path
           for x in range(cut):       
                   path=getParentDir(path)
           logging.error("PATH:"+path)
         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path          imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
         docinfo['imageURL'] = imageUrl          docinfo['imageURL'] = imageUrl
                   
         docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo)          #path ist the path to the images it assumes that the index.meta file is one level higher.
         docinfo = self.getAuthinfoFromIndexMeta(path,docinfo=docinfo)          docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
           docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
         return docinfo          return docinfo
           
           
     def getDocinfo(self, mode, url):      def getDocinfo(self, mode, url):
         """returns docinfo depending on mode"""          """returns docinfo depending on mode"""
         zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"mode: %s, url: %s"%(mode,url))          logger("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url))
         # look for cached docinfo in session          # look for cached docinfo in session
         if self.REQUEST.SESSION.has_key('docinfo'):          if self.REQUEST.SESSION.has_key('docinfo'):
             docinfo = self.REQUEST.SESSION['docinfo']              docinfo = self.REQUEST.SESSION['docinfo']
             # check if its still current              # check if its still current
             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:              if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
                 zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo in session: %s"%docinfo)                  logger("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo)
                 return docinfo                  return docinfo
         # new docinfo          # new docinfo
         docinfo = {'mode': mode, 'url': url}          docinfo = {'mode': mode, 'url': url}
Line 382  class documentViewer(Folder): Line 614  class documentViewer(Folder):
             docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)              docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
         elif mode=="imagepath":          elif mode=="imagepath":
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)              docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
           elif mode=="filepath":
               docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
         else:          else:
             zLOG.LOG("documentViewer (getdocinfo)", zLOG.ERROR,"unknown mode!")              logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!")
             raise ValueError("Unknown mode %s"%(mode))              raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                                                   
         zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo: %s"%docinfo)          logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo)
         self.REQUEST.SESSION['docinfo'] = docinfo          self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo          return docinfo
                   
                   
     def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None):      def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
         """returns pageinfo with the given parameters"""          """returns pageinfo with the given parameters"""
         pageinfo = {}          pageinfo = {}
         current = getInt(current)          current = getInt(current)
Line 402  class documentViewer(Folder): Line 636  class documentViewer(Folder):
         pageinfo['cols'] = cols          pageinfo['cols'] = cols
         grpsize = cols * rows          grpsize = cols * rows
         pageinfo['groupsize'] = grpsize          pageinfo['groupsize'] = grpsize
         start = getInt(start, default=(int(current / grpsize) * grpsize +1))          start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
           # int(current / grpsize) * grpsize +1))
         pageinfo['start'] = start          pageinfo['start'] = start
         pageinfo['end'] = start + grpsize          pageinfo['end'] = start + grpsize
         if docinfo is not None:          if (docinfo is not None) and ('numPages' in docinfo):
             np = int(docinfo['numPages'])              np = int(docinfo['numPages'])
             pageinfo['end'] = min(pageinfo['end'], np)              pageinfo['end'] = min(pageinfo['end'], np)
             pageinfo['numgroups'] = int(np / grpsize)              pageinfo['numgroups'] = int(np / grpsize)
             if np % grpsize > 0:              if np % grpsize > 0:
                 pageinfo['numgroups'] += 1                  pageinfo['numgroups'] += 1
                                   
         return pageinfo          pageinfo['viewMode'] = viewMode
                           pageinfo['tocMode'] = tocMode
     def text(self,mode,url,pn):          pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '10')
         """give text"""          pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
         if mode=="texttool": #index.meta with texttool information  
             (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)  
           
         #print textpath  
         try:  
             dom = NonvalidatingReader.parseUri(textpath)  
         except:  
             return None  
       
         list=[]  
         nodes=dom.xpath("//pb")  
   
         node=nodes[int(pn)-1]  
           
         p=node  
           
         while p.tagName!="p":  
             p=p.parentNode  
           
           
         endNode=nodes[int(pn)]  
                   
           return pageinfo
                   
         e=endNode  
                   
         while e.tagName!="p":  
             e=e.parentNode  
                   
       def getNumPages(self,docinfo=None):
           """get list of pages from fulltext and put in docinfo"""
           xquery = '//pb'
           text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
           # TODO: better processing of the page list. do we need the info somewhere else also?
           docinfo['numPages'] = text.count("<pb ")
           return docinfo
                   
         next=node.parentNode      def getTextPage(self, mode="text", pn=1, docinfo=None):
           """returns single page from fulltext"""
           docpath = docinfo['textURLPath']
           if mode == "text_dict":
               textmode = "textPollux"
           else:
               textmode = mode
               
           pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn), outputUnicode=False)
           # post-processing downloaded xml
           pagedom = Parse(pagexml)
           # plain text mode
           if mode == "text":
               # first div contains text
               pagedivs = pagedom.xpath("/div")
               if len(pagedivs) > 0:
                   pagenode = pagedivs[0]
                   return serializeNode(pagenode)
   
           # text-with-links mode
           if mode == "text_dict":
               # first div contains text
               pagedivs = pagedom.xpath("/div")
               if len(pagedivs) > 0:
                   pagenode = pagedivs[0]
                   # check all a-tags
                   links = pagenode.xpath("//a")
                   for l in links:
                       hrefNode = l.getAttributeNodeNS(None, u"href")
                       if hrefNode:
                           # is link with href
                           href = hrefNode.nodeValue
                           if href.startswith('lt/lex.xql'):
                               # is pollux link
                               selfurl = self.absolute_url()
                               # change href
                               hrefNode.nodeValue = href.replace('lt/lex.xql','%s/head_main_voc'%selfurl)
                               # add target
                               l.setAttributeNS(None, 'target', '_blank')
                   return serializeNode(pagenode)
           
           return "no text here"
   
       def getToc(self, mode="text", docinfo=None):
           """loads table of contents and stores in docinfo"""
           logging.debug("documentViewer (gettoc) mode: %s"%(mode))
           if 'tocSize_%s'%mode in docinfo:
               # cached toc
               return docinfo
                   
         #sammle s          docpath = docinfo['textURLPath']
         while next and (next!=endNode.parentNode):          # we need to set a result set size
             list.append(next)              pagesize = 1000
             next=next.nextSibling              pn = 1
         list.append(endNode.parentNode)          if mode == "text":
               queryType = "toc"
           else:
               queryType = mode
           # number of entries in toc
           tocSize = 0
           tocDiv = None
           pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
           # post-processing downloaded xml
           pagedom = Parse(pagexml)
           # get number of entries
           numdivs = pagedom.xpath("//div[@class='queryResultHits']")
           if len(numdivs) > 0:
               tocSize = int(getTextFromNode(numdivs[0]))
               # div contains text
               #pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
               #if len(pagedivs) > 0:
               #    tocDiv = pagedivs[0]
                   
         if p==e:# beide im selben paragraphen          docinfo['tocSize_%s'%mode] = tocSize
             pass          #docinfo['tocDiv_%s'%mode] = tocDiv
 #    else:          return docinfo
 #            next=p  
 #            while next!=e:  
 #                print next,e  
 #                list.append(next)  
 #                next=next.nextSibling  
 #              
 #        for x in list:  
 #            PrettyPrint(x)  
 #  
 #        return list  
 #  
   
     def findDigilibUrl(self):      def getTocPage(self, mode="toc", pn=1, pageinfo=None, docinfo=None):
         """try to get the digilib URL from zogilib"""          """returns single page from the table of contents"""
         url = self.imageViewerUrl[:-1] + "/getScalerUrl"          # TODO: this should use the cached TOC
         print urlparse.urlparse(url)[0]          if mode == "text":
         print urlparse.urljoin(self.absolute_url(),url)              queryType = "toc"
         try:          else:
             if urlparse.urlparse(url)[0]=='': #relative path              queryType = mode
                 url=urlparse.urljoin(self.absolute_url()+"/",url)          docpath = docinfo['textURLPath']
           pagesize = pageinfo['tocPageSize']
           pn = pageinfo['tocPN']
           pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
           # post-processing downloaded xml
           pagedom = Parse(pagexml)
           # div contains text
           pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
           if len(pagedivs) > 0:
               pagenode = pagedivs[0]
               return serializeNode(pagenode)
           else:
               return "No TOC!"
                                   
             scaler = urlopen(url).read()  
             return scaler.replace("/servlet/Scaler?", "")  
         except:  
             return None  
           
     def changeDocumentViewer(self,imageViewerUrl,textViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None):      def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None):
         """init document viewer"""          """init document viewer"""
         self.title=title          self.title=title
         self.imageViewerUrl=imageViewerUrl  
         self.textViewerUrl=textViewerUrl  
         self.digilibBaseUrl = digilibBaseUrl          self.digilibBaseUrl = digilibBaseUrl
         self.thumbrows = thumbrows          self.thumbrows = thumbrows
         self.thumbcols = thumbcols          self.thumbcols = thumbcols
Line 496  class documentViewer(Folder): Line 775  class documentViewer(Folder):
           
           
                   
           
 #    security.declareProtected('View management screens','renameImageForm')  
   
 def manage_AddDocumentViewerForm(self):  def manage_AddDocumentViewerForm(self):
     """add the viewer form"""      """add the viewer form"""
     pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)      pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
     return pt()      return pt()
       
 def manage_AddDocumentViewer(self,id,imageViewerUrl="",textViewerUrl="",title="",RESPONSE=None):  def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
     """add the viewer"""      """add the viewer"""
     newObj=documentViewer(id,imageViewerUrl,title=title,textViewerUrl=textViewerUrl)      newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
     self._setObject(id,newObj)      self._setObject(id,newObj)
           
     if RESPONSE is not None:      if RESPONSE is not None:
Line 532  def manage_addDocumentViewerTemplate(sel Line 808  def manage_addDocumentViewerTemplate(sel
   
     self._setObject(id, DocumentViewerTemplate(id))      self._setObject(id, DocumentViewerTemplate(id))
     ob = getattr(self, id)      ob = getattr(self, id)
     ob.pt_edit(open(os.path.join(package_home(globals()),'zpt/viewer_main.zpt')).read(),None)      txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
       logging.info("txt %s:"%txt)
       ob.pt_edit(txt,"text/html")
     if title:      if title:
         ob.pt_setTitle(title)          ob.pt_setTitle(title)
     try:      try:

Removed from v.1.19  
changed lines
  Added in v.1.44


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>