Diff for /documentViewer/documentViewer.py between versions 1.37 and 1.125

version 1.37, 2008/11/04 22:03:56 version 1.125, 2010/10/18 09:32:22
Line 1 Line 1
   
   
 from OFS.Folder import Folder  from OFS.Folder import Folder
 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate  from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
 from Products.PageTemplates.PageTemplateFile import PageTemplateFile   from Products.PageTemplates.PageTemplateFile import PageTemplateFile 
 from AccessControl import ClassSecurityInfo  from AccessControl import ClassSecurityInfo
 from AccessControl import getSecurityManager  from AccessControl import getSecurityManager
 from Globals import package_home  from Globals import package_home
   from Products.zogiLib.zogiLib import browserCheck
   
 from Ft.Xml.Domlette import NonvalidatingReader  
 from Ft.Xml.Domlette import PrettyPrint, Print  
 from Ft.Xml import EMPTY_NAMESPACE, Parse  from Ft.Xml import EMPTY_NAMESPACE, Parse
   import Ft.Xml.Domlette
 import Ft.Xml.XPath  
   
 import os.path  import os.path
 import sys  import sys
 import cgi  
 import urllib  import urllib
   import urllib2
 import logging  import logging
 import math  import math
   
 import urlparse   import urlparse 
 from types import *  import cStringIO
   import re
   
   
 def logger(txt,method,txt2):  def logger(txt,method,txt2):
     """logging"""      """logging"""
     logging.info(txt+ txt2)      logging.info(txt+ txt2)
Line 45  def getTextFromNode(nodename): Line 43  def getTextFromNode(nodename):
            rc = rc + node.data             rc = rc + node.data
     return rc      return rc
   
   def serializeNode(node, encoding='utf-8'):
       """returns a string containing node as XML"""
       buf = cStringIO.StringIO()
       Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
       s = buf.getvalue()
       buf.close()
       return s
   
   def getBrowserType(self):
           """get browser type object"""
           if self.REQUEST.SESSION.has_key('browserType'):
               return self.REQUEST.SESSION['browserType']
           else:
               bt = browserCheck(self)
               self.REQUEST.SESSION.set('browserType', bt)    
               logging.debug("documentViewer (BROWSER TYPE) bt %s"%bt)                    
               return bt
   
                   
 def getParentDir(path):  def getParentDir(path):
     """returns pathname shortened by one"""      """returns pathname shortened by one"""
     return '/'.join(path.split('/')[0:-1])      return '/'.join(path.split('/')[0:-1])
                   
   
   def getHttpData(url, data=None, num_tries=3, timeout=10):
       """returns result from url+data HTTP request"""
       # we do GET (by appending data to url)
       if isinstance(data, str) or isinstance(data, unicode):
           # if data is string then append
           url = "%s?%s"%(url,data)
       elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
           # urlencode
           url = "%s?%s"%(url,urllib.urlencode(data))
       
       response = None
       errmsg = None
       for cnt in range(num_tries):
           try:
               logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
               if sys.version_info < (2, 6):
                   # set timeout on socket -- ugly :-(
 import socket  import socket
                   socket.setdefaulttimeout(float(timeout))
                   response = urllib2.urlopen(url)
               else:
                   response = urllib2.urlopen(url,timeout=float(timeout))
               # check result?
               break
           except urllib2.HTTPError, e:
               logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
               errmsg = str(e)
               # stop trying
               break
           except urllib2.URLError, e:
               logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
               errmsg = str(e)
               # stop trying
               #break
   
       if response is not None:
           data = response.read()
           response.close()
           return data
       
       raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
       #return None
   
 def urlopen(url,timeout=2):  
         """urlopen mit timeout"""  
         socket.setdefaulttimeout(timeout)  
         ret=urllib.urlopen(url)  
         socket.setdefaulttimeout(5)  
         return ret  
   
   
 ##  ##
Line 66  def urlopen(url,timeout=2): Line 117  def urlopen(url,timeout=2):
 ##  ##
 class documentViewer(Folder):  class documentViewer(Folder):
     """document viewer"""      """document viewer"""
     #textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?"  
       
     meta_type="Document viewer"      meta_type="Document viewer"
           
     security=ClassSecurityInfo()      security=ClassSecurityInfo()
Line 77  class documentViewer(Folder): Line 126  class documentViewer(Folder):
   
     # templates and forms      # templates and forms
     viewer_main = PageTemplateFile('zpt/viewer_main', globals())      viewer_main = PageTemplateFile('zpt/viewer_main', globals())
     thumbs_main = PageTemplateFile('zpt/thumbs_main', globals())      toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
     image_main = PageTemplateFile('zpt/image_main', globals())      toc_text = PageTemplateFile('zpt/toc_text', globals())
       toc_figures = PageTemplateFile('zpt/toc_figures', globals())
       page_main_images = PageTemplateFile('zpt/page_main_images', globals())
       page_main_text = PageTemplateFile('zpt/page_main_text', globals())
       page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
       page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
       page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
     head_main = PageTemplateFile('zpt/head_main', globals())      head_main = PageTemplateFile('zpt/head_main', globals())
     docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())      docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
     info_xml = PageTemplateFile('zpt/info_xml', globals())      info_xml = PageTemplateFile('zpt/info_xml', globals())
   
       
     thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())      thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
     security.declareProtected('View management screens','changeDocumentViewerForm')          security.declareProtected('View management screens','changeDocumentViewerForm')    
     changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())      changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
   
           
     def __init__(self,id,imageViewerUrl,textViewerUrl=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"):      def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
         """init document viewer"""          """init document viewer"""
         self.id=id          self.id=id
         self.title=title          self.title=title
         self.imageViewerUrl=imageViewerUrl  
         self.textViewerUrl=textViewerUrl  
           
         if not digilibBaseUrl:  
             self.digilibBaseUrl = self.findDigilibUrl()  
         else:  
             self.digilibBaseUrl = digilibBaseUrl  
         self.thumbcols = thumbcols          self.thumbcols = thumbcols
         self.thumbrows = thumbrows          self.thumbrows = thumbrows
         # authgroups is list of authorized groups (delimited by ,)          # authgroups is list of authorized groups (delimited by ,)
         self.authgroups = [s.strip().lower() for s in authgroups.split(',')]          self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
         # add template folder so we can always use template.something          # create template folder so we can always use template.something
         self.manage_addFolder('template')          
           templateFolder = Folder('template')
           #self['template'] = templateFolder # Zope-2.12 style
           self._setObject('template',templateFolder) # old style
           try:
               import MpdlXmlTextServer
               textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
               #templateFolder['fulltextclient'] = xmlRpcClient
               templateFolder._setObject('fulltextclient',textServer)
           except Exception, e:
               logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
           try:
               from Products.zogiLib.zogiLib import zogiLib
               zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
               #templateFolder['zogilib'] = zogilib
               templateFolder._setObject('zogilib',zogilib)
           except Exception, e:
               logging.error("Unable to create zogiLib for zogilib: "+str(e))
           
           
       # proxy text server methods to fulltextclient
       def getTextPage(self, **args):
           """get page"""
           return self.template.fulltextclient.getTextPage(**args)
   
       def getQuery(self, **args):
           """get query"""
           return self.template.fulltextclient.getQuery(**args)
   
       def getSearch(self, **args):
           """get search"""
           return self.template.fulltextclient.getSearch(**args)
       
       def getGisPlaces(self, **args):
           """get gis places"""
           return self.template.fulltextclient.getGisPlaces(**args)
    
       def getAllGisPlaces(self, **args):
           """get all gis places """
           return self.template.fulltextclient.getAllGisPlaces(**args)
       
       def getOrigPages(self, **args):
           """get original page number """
           return self.template.fulltextclient.getOrigPages(**args)
       
       def getNumPages(self, docinfo):
           """get numpages"""
           return self.template.fulltextclient.getNumPages(docinfo)
      
       def getNumTextPages(self, docinfo):
           """get numpages text"""
           return self.template.fulltextclient.getNumTextPages(docinfo)
      
       def getTranslate(self, **args):
           """get translate"""
           return self.template.fulltextclient.getTranslate(**args)
   
       def getLemma(self, **args):
           """get lemma"""
           return self.template.fulltextclient.getLemma(**args)
   
       def getToc(self, **args):
           """get toc"""
           return self.template.fulltextclient.getToc(**args)
   
       def getTocPage(self, **args):
           """get tocpage"""
           return self.template.fulltextclient.getTocPage(**args)
   
   
     security.declareProtected('View','thumbs_rss')      security.declareProtected('View','thumbs_rss')
Line 116  class documentViewer(Folder): Line 232  class documentViewer(Folder):
         @param viewMode: if images display images, if text display text, default is images (text,images or auto)          @param viewMode: if images display images, if text display text, default is images (text,images or auto)
                   
         '''          '''
         logging.info("HHHHHHHHHHHHHH:load the rss")          logging.debug("HHHHHHHHHHHHHH:load the rss")
         logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
                   
         if not hasattr(self, 'template'):          if not hasattr(self, 'template'):
Line 131  class documentViewer(Folder): Line 247  class documentViewer(Folder):
         pt = getattr(self.template, 'thumbs_main_rss')          pt = getattr(self.template, 'thumbs_main_rss')
                   
         if viewMode=="auto": # automodus gewaehlt          if viewMode=="auto": # automodus gewaehlt
             if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert              if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
                 viewMode="text"                  viewMode="text"
             else:              else:
                 viewMode="images"                  viewMode="images"
Line 139  class documentViewer(Folder): Line 255  class documentViewer(Folder):
         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)          return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
       
     security.declareProtected('View','index_html')      security.declareProtected('View','index_html')
     def index_html(self,mode,url,viewMode="auto",start=None,pn=1,mk=None):      def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization=""):
         '''          '''
         view it          view it
         @param mode: defines how to access the document behind url           @param mode: defines how to access the document behind url 
         @param url: url which contains display information          @param url: url which contains display information
         @param viewMode: if images display images, if text display text, default is images (text,images or auto)          @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
                   @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
           @param characterNormalization type of text display (reg, norm, none)
           @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
         '''          '''
                   
         logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
                   
         if not hasattr(self, 'template'):          if not hasattr(self, 'template'):
             # create template folder if it doesn't exist              # this won't work
             self.manage_addFolder('template')              logging.error("template folder missing!")
               return "ERROR: template folder missing!"
                           
         if not self.digilibBaseUrl:          if not getattr(self, 'digilibBaseUrl', None):
             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"              self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
                           
         docinfo = self.getDocinfo(mode=mode,url=url)          docinfo = self.getDocinfo(mode=mode,url=url)
         pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)          
         pt = getattr(self.template, 'viewer_main')          if tocMode != "thumbs":
               # get table of contents
               docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
                   
         if viewMode=="auto": # automodus gewaehlt          if viewMode=="auto": # automodus gewaehlt
             if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert              if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
                 viewMode="text"                  viewMode="text_dict"
             else:              else:
                 viewMode="images"                  viewMode="images"
                                 
           pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
           
           pt = getattr(self.template, 'viewer_main')               
         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))          return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
       
     def generateMarks(self,mk):      def generateMarks(self,mk):
         ret=""          ret=""
     if mk is None:      if mk is None:
         return ""          return ""
               if not isinstance(mk, list):
     if type(mk) is not ListType:  
         mk=[mk]          mk=[mk]
         for m in mk:          for m in mk:
             ret+="mk=%s"%m              ret+="mk=%s"%m
         return ret          return ret
           
   
       def findDigilibUrl(self):
           """try to get the digilib URL from zogilib"""
           url = self.template.zogilib.getDLBaseUrl()
           return url
   
       def getDocumentViewerURL(self):
           """returns the URL of this instance"""
           return self.absolute_url()
       
       def getStyle(self, idx, selected, style=""):
           """returns a string with the given style and append 'sel' if path == selected."""
           #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
           if idx == selected:
               return style + 'sel'
           else:
               return style
       
     def getLink(self,param=None,val=None):      def getLink(self,param=None,val=None):
         """link to documentviewer with parameter param set to val"""          """link to documentviewer with parameter param set to val"""
         params=self.REQUEST.form.copy()          params=self.REQUEST.form.copy()
Line 190  class documentViewer(Folder): Line 331  class documentViewer(Folder):
             else:              else:
                 params[param] = str(val)                  params[param] = str(val)
                                   
           if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
                   params["mode"] = "imagepath"
                   params["url"] = getParentDir(params["url"])
                   
         # quote values and assemble into query string          # quote values and assemble into query string
         ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])          #ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
           ps = urllib.urlencode(params)
         url=self.REQUEST['URL1']+"?"+ps          url=self.REQUEST['URL1']+"?"+ps
         return url          return url
   
Line 206  class documentViewer(Folder): Line 352  class documentViewer(Folder):
                 params[param] = str(val)                  params[param] = str(val)
                                   
         # quote values and assemble into query string          # quote values and assemble into query string
         logging.info("XYXXXXX: %s"%repr(params.items()))          logging.debug("XYXXXXX: %s"%repr(params.items()))
         ps = "&amp;".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])          ps = "&amp;".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
         url=self.REQUEST['URL1']+"?"+ps          url=self.REQUEST['URL1']+"?"+ps
         return url          return url
       
     def getInfo_xml(self,url,mode):      def getInfo_xml(self,url,mode):
         """returns info about the document as XML"""          """returns info about the document as XML"""
   
Line 221  class documentViewer(Folder): Line 368  class documentViewer(Folder):
         return pt(docinfo=docinfo)          return pt(docinfo=docinfo)
   
           
     def getStyle(self, idx, selected, style=""):  
         """returns a string with the given style and append 'sel' if path == selected."""  
         #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))  
         if idx == selected:  
             return style + 'sel'  
         else:  
             return style  
   
           
     def isAccessible(self, docinfo):      def isAccessible(self, docinfo):
         """returns if access to the resource is granted"""          """returns if access to the resource is granted"""
         access = docinfo.get('accessType', None)          access = docinfo.get('accessType', None)
         logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access)          logging.debug("documentViewer (accessOK) access type %s"%access)
         if access is not None and access == 'free':          if access is not None and access == 'free':
             logger("documentViewer (accessOK)", logging.INFO, "access is free")              logging.debug("documentViewer (accessOK) access is free")
             return True              return True
         elif access is None or access in self.authgroups:          elif access is None or access in self.authgroups:
             # only local access -- only logged in users              # only local access -- only logged in users
             user = getSecurityManager().getUser()              user = getSecurityManager().getUser()
               logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
             if user is not None:              if user is not None:
                 #print "user: ", user                  #print "user: ", user
                 return (user.getUserName() != "Anonymous User")                  return (user.getUserName() != "Anonymous User")
             else:              else:
                 return False                  return False
                   
         logger("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access)          logging.error("documentViewer (accessOK) unknown access type %s"%access)
         return False          return False
           
                                   
     def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):      def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
         """gibt param von dlInfo aus"""          """gibt param von dlInfo aus"""
         num_retries = 3  
         if docinfo is None:          if docinfo is None:
             docinfo = {}              docinfo = {}
                   
         for x in range(cut):          for x in range(cut):
                  
                 path=getParentDir(path)                  path=getParentDir(path)
          
         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path          infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
           
         logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl))          logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
                   
         for cnt in range(num_retries):          txt = getHttpData(infoUrl)
             try:          if txt is None:
                 # dom = NonvalidatingReader.parseUri(imageUrl)  
                 txt=urllib.urlopen(infoUrl).read()  
                 dom = Parse(txt)  
                 break  
             except:  
                 logger("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt))  
         else:  
             raise IOError("Unable to get dir-info from %s"%(infoUrl))              raise IOError("Unable to get dir-info from %s"%(infoUrl))
                   
           dom = Parse(txt)
         sizes=dom.xpath("//dir/size")          sizes=dom.xpath("//dir/size")
         logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes)          logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
                   
         if sizes:          if sizes:
             docinfo['numPages'] = int(getTextFromNode(sizes[0]))              docinfo['numPages'] = int(getTextFromNode(sizes[0]))
         else:          else:
             docinfo['numPages'] = 0              docinfo['numPages'] = 0
                                                   
           # TODO: produce and keep list of image names and numbers
                           
         return docinfo          return docinfo
           
       def getIndexMetaPath(self,url):
           """gib nur den Pfad zurueck"""
           regexp = re.compile(r".*(experimental|permanent)/(.*)")
           regpath = regexp.match(url)
           if (regpath==None):
               return ""
           logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))            
           return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
        
       
       
       def getIndexMetaUrl(self,url):
           """returns utr  of index.meta document at url"""
                           
     def getIndexMeta(self, url):  
         """returns dom of index.meta document at url"""  
         num_retries = 3  
         dom = None  
         metaUrl = None          metaUrl = None
         if url.startswith("http://"):          if url.startswith("http://"):
             # real URL              # real URL
Line 298  class documentViewer(Folder): Line 443  class documentViewer(Folder):
             metaUrl=server+url.replace("/mpiwg/online","")              metaUrl=server+url.replace("/mpiwg/online","")
             if not metaUrl.endswith("index.meta"):              if not metaUrl.endswith("index.meta"):
                 metaUrl += "/index.meta"                  metaUrl += "/index.meta"
         logging.debug("METAURL: %s"%metaUrl)  
         for cnt in range(num_retries):  
             try:  
                 # patch dirk encoding fehler treten dann nicht mehr auf  
                 # dom = NonvalidatingReader.parseUri(metaUrl)  
                 txt=urllib.urlopen(metaUrl).read()  
                 dom = Parse(txt)  
                 break  
             except:  
                 logger("ERROR documentViewer (getIndexMata)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])  
                                   
         if dom is None:          return metaUrl
       
       def getDomFromIndexMeta(self, url):
           """get dom from index meta"""
           dom = None
           metaUrl = self.getIndexMetaUrl(url)
                   
           logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
           txt=getHttpData(metaUrl)
           if txt is None:
             raise IOError("Unable to read index meta from %s"%(url))              raise IOError("Unable to read index meta from %s"%(url))
                                     
           dom = Parse(txt)
         return dom          return dom
           
     def getPresentationInfoXML(self, url):      def getPresentationInfoXML(self, url):
         """returns dom of info.xml document at url"""          """returns dom of info.xml document at url"""
         num_retries = 3  
         dom = None          dom = None
         metaUrl = None          metaUrl = None
         if url.startswith("http://"):          if url.startswith("http://"):
Line 327  class documentViewer(Folder): Line 471  class documentViewer(Folder):
             server=self.digilibBaseUrl+"/servlet/Texter?fn="              server=self.digilibBaseUrl+"/servlet/Texter?fn="
             metaUrl=server+url.replace("/mpiwg/online","")              metaUrl=server+url.replace("/mpiwg/online","")
                         
                   txt=getHttpData(metaUrl)
         for cnt in range(num_retries):          if txt is None:
             try:  
                 # patch dirk encoding fehler treten dann nicht mehr auf  
                 # dom = NonvalidatingReader.parseUri(metaUrl)  
                 txt=urllib.urlopen(metaUrl).read()  
                 dom = Parse(txt)  
                 break  
             except:  
                 logger("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])  
                   
         if dom is None:  
             raise IOError("Unable to read infoXMLfrom %s"%(url))              raise IOError("Unable to read infoXMLfrom %s"%(url))
                                     
           dom = Parse(txt)
         return dom          return dom
                                                   
                   
     def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):      def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
         """gets authorization info from the index.meta file at path or given by dom"""          """gets authorization info from the index.meta file at path or given by dom"""
         logger("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path))          logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
                   
         access = None          access = None
                   
Line 354  class documentViewer(Folder): Line 489  class documentViewer(Folder):
             docinfo = {}              docinfo = {}
                           
         if dom is None:          if dom is None:
             for x in range(cut+1):              for x in range(cut):
                 path=getParentDir(path)                  path=getParentDir(path)
             dom = self.getIndexMeta(path)              dom = self.getDomFromIndexMeta(path)
                 
         acctype = dom.xpath("//access-conditions/access/@type")          acctype = dom.xpath("//access-conditions/access/@type")
         if acctype and (len(acctype)>0):          if acctype and (len(acctype)>0):
Line 376  class documentViewer(Folder): Line 511  class documentViewer(Folder):
             docinfo = {}              docinfo = {}
                           
         if dom is None:          if dom is None:
             for x in range(cut+1):              for x in range(cut):
                 path=getParentDir(path)                  path=getParentDir(path)
             dom = self.getIndexMeta(path)              dom = self.getDomFromIndexMeta(path)
                           
           docinfo['indexMetaPath']=self.getIndexMetaPath(path);
           
           logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
         # put in all raw bib fields as dict "bib"          # put in all raw bib fields as dict "bib"
         bib = dom.xpath("//bib/*")          bib = dom.xpath("//bib/*")
         if bib and len(bib)>0:          if bib and len(bib)>0:
Line 399  class documentViewer(Folder): Line 537  class documentViewer(Folder):
         bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)          bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
         docinfo['bib_type'] = bibtype          docinfo['bib_type'] = bibtype
         bibmap=metaData.generateMappingForType(bibtype)          bibmap=metaData.generateMappingForType(bibtype)
           logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
           logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
         # if there is no mapping bibmap is empty (mapping sometimes has empty fields)          # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
         if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:          if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
             try:              try:
Line 419  class documentViewer(Folder): Line 559  class documentViewer(Folder):
         return docinfo          return docinfo
   
                   
       def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
           """gets name info from the index.meta file at path or given by dom"""
           if docinfo is None:
               docinfo = {}
           
           if dom is None:
               for x in range(cut):
                   path=getParentDir(path)
               dom = self.getDomFromIndexMeta(path)
   
           docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
           logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
           return docinfo
       
     def getDocinfoFromTextTool(self,url,dom=None,docinfo=None):      def getDocinfoFromTextTool(self,url,dom=None,docinfo=None):
        """parse texttool tag in index meta"""         """parse texttool tag in index meta"""
        logger("documentViewer (getdocinfofromtexttool)", logging.INFO,"url: %s"%(url))          logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
        if docinfo is None:         if docinfo is None:
            docinfo = {}             docinfo = {}
              
        if docinfo.get('lang',None) is None:         if docinfo.get('lang',None) is None:
            docinfo['lang']='' # default keine Sprache gesetzt             docinfo['lang']='' # default keine Sprache gesetzt
        if dom is None:         if dom is None:
            dom = self.getIndexMeta(url)              dom = self.getDomFromIndexMeta(url)
                 
        archivePath = None         archivePath = None
        archiveName = None         archiveName = None
Line 437  class documentViewer(Folder): Line 590  class documentViewer(Folder):
        if archiveNames and (len(archiveNames)>0):         if archiveNames and (len(archiveNames)>0):
            archiveName=getTextFromNode(archiveNames[0])             archiveName=getTextFromNode(archiveNames[0])
        else:         else:
            logger("documentViewer (getdocinfofromtexttool)", logging.WARNING,"resource/name missing in: %s"%(url))              logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
                 
        archivePaths=dom.xpath("//resource/archive-path")         archivePaths=dom.xpath("//resource/archive-path")
        if archivePaths and (len(archivePaths)>0):         if archivePaths and (len(archivePaths)>0):
Line 449  class documentViewer(Folder): Line 602  class documentViewer(Folder):
                archivePath += "/" + archiveName                 archivePath += "/" + archiveName
        else:         else:
            # try to get archive-path from url             # try to get archive-path from url
            logger("documentViewer (getdocinfofromtexttool)", logging.WARNING,"resource/archive-path missing in: %s"%(url))              logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
            if (not url.startswith('http')):             if (not url.startswith('http')):
                archivePath = url.replace('index.meta', '')                 archivePath = url.replace('index.meta', '')
                                 
Line 460  class documentViewer(Folder): Line 613  class documentViewer(Folder):
        imageDirs=dom.xpath("//texttool/image")         imageDirs=dom.xpath("//texttool/image")
        if imageDirs and (len(imageDirs)>0):         if imageDirs and (len(imageDirs)>0):
            imageDir=getTextFromNode(imageDirs[0])             imageDir=getTextFromNode(imageDirs[0])
               
        else:         else:
            # we balk with no image tag / not necessary anymore because textmode is now standard             # we balk with no image tag / not necessary anymore because textmode is now standard
            #raise IOError("No text-tool info in %s"%(url))             #raise IOError("No text-tool info in %s"%(url))
            imageDir=""             imageDir=""
            docinfo['numPages']=1 # im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht              #xquery="//pb"  
          
            docinfo['imagePath'] = "" # keine Bilder             docinfo['imagePath'] = "" # keine Bilder
            docinfo['imageURL'] = ""             docinfo['imageURL'] = ""
   
Line 475  class documentViewer(Folder): Line 628  class documentViewer(Folder):
            imageDir=imageDir.replace("/mpiwg/online",'')             imageDir=imageDir.replace("/mpiwg/online",'')
            docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo)             docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo)
            docinfo['imagePath'] = imageDir             docinfo['imagePath'] = imageDir
               
            docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir             docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir
                         
        viewerUrls=dom.xpath("//texttool/digiliburlprefix")         viewerUrls=dom.xpath("//texttool/digiliburlprefix")
Line 482  class documentViewer(Folder): Line 636  class documentViewer(Folder):
            viewerUrl=getTextFromNode(viewerUrls[0])             viewerUrl=getTextFromNode(viewerUrls[0])
            docinfo['viewerURL'] = viewerUrl             docinfo['viewerURL'] = viewerUrl
                                       
           # old style text URL
        textUrls=dom.xpath("//texttool/text")         textUrls=dom.xpath("//texttool/text")
        if textUrls and (len(textUrls)>0):         if textUrls and (len(textUrls)>0):
            textUrl=getTextFromNode(textUrls[0])             textUrl=getTextFromNode(textUrls[0])
Line 493  class documentViewer(Folder): Line 648  class documentViewer(Folder):
                         
            docinfo['textURL'] = textUrl             docinfo['textURL'] = textUrl
         
           # new style text-url-path
           textUrls = dom.xpath("//texttool/text-url-path")
           if textUrls and (len(textUrls) > 0):
               textUrl = getTextFromNode(textUrls[0])
               docinfo['textURLPath'] = textUrl
               if not docinfo['imagePath']:
                   # text-only, no page images
                   docinfo = self.getNumTextPages(docinfo)
            
        presentationUrls=dom.xpath("//texttool/presentation")         presentationUrls=dom.xpath("//texttool/presentation")
        docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)   # get info von bib tag         docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)   # get info von bib tag
           docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
                 
        if presentationUrls and (len(presentationUrls)>0): # ueberschreibe diese durch presentation informationen          if presentationUrls and (len(presentationUrls)>0): # ueberschreibe diese durch presentation informationen 
             # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten              # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
             # durch den relativen Pfad auf die presentation infos              # durch den relativen Pfad auf die presentation infos
            presentationUrl=url.replace('index.meta',getTextFromNode(presentationUrls[0]))              presentationPath = getTextFromNode(presentationUrls[0])
               if url.endswith("index.meta"): 
                   presentationUrl = url.replace('index.meta', presentationPath)
               else:
                   presentationUrl = url + "/" + presentationPath
                   
            docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl,docinfo=docinfo,dom=dom)             docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl,docinfo=docinfo,dom=dom)
   
        docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)   # get access info         docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)   # get access info
           
        return docinfo         return docinfo
         
         
Line 526  class documentViewer(Folder): Line 697  class documentViewer(Folder):
           
     def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):      def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
         """path ist the path to the images it assumes that the index.meta file is one level higher."""          """path ist the path to the images it assumes that the index.meta file is one level higher."""
         logger("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path))          logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
         if docinfo is None:          if docinfo is None:
             docinfo = {}              docinfo = {}
         path=path.replace("/mpiwg/online","")          path=path.replace("/mpiwg/online","")
         docinfo['imagePath'] = path          docinfo['imagePath'] = path
         docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)          docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
           
           pathorig=path
           for x in range(cut):       
                   path=getParentDir(path)
           logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path          imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
         docinfo['imageURL'] = imageUrl          docinfo['imageURL'] = imageUrl
                   
         docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo,cut=cut)          #path ist the path to the images it assumes that the index.meta file is one level higher.
         docinfo = self.getAuthinfoFromIndexMeta(path,docinfo=docinfo,cut=cut)          docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
           docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
         return docinfo          return docinfo
           
           
     def getDocinfo(self, mode, url):      def getDocinfo(self, mode, url):
         """returns docinfo depending on mode"""          """returns docinfo depending on mode"""
         logger("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url))          logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
         # look for cached docinfo in session          # look for cached docinfo in session
         if self.REQUEST.SESSION.has_key('docinfo'):          if self.REQUEST.SESSION.has_key('docinfo'):
             docinfo = self.REQUEST.SESSION['docinfo']              docinfo = self.REQUEST.SESSION['docinfo']
             # check if its still current              # check if its still current
             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:              if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
                 logger("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo)                  logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
                 return docinfo                  return docinfo
         # new docinfo          # new docinfo
         docinfo = {'mode': mode, 'url': url}          docinfo = {'mode': mode, 'url': url}
Line 559  class documentViewer(Folder): Line 736  class documentViewer(Folder):
         elif mode=="filepath":          elif mode=="filepath":
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)              docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
         else:          else:
             logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!")              logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
             raise ValueError("Unknown mode %s"%(mode))              raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                                                   
         logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo)          logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
         self.REQUEST.SESSION['docinfo'] = docinfo          self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo          return docinfo
                   
               def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization=""):
     def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None):  
         """returns pageinfo with the given parameters"""          """returns pageinfo with the given parameters"""
         pageinfo = {}          pageinfo = {}
         current = getInt(current)          current = getInt(current)
           #pageinfo ['originalPage'] = originalPage
         pageinfo['current'] = current          pageinfo['current'] = current
         rows = int(rows or self.thumbrows)          rows = int(rows or self.thumbrows)
         pageinfo['rows'] = rows          pageinfo['rows'] = rows
Line 582  class documentViewer(Folder): Line 759  class documentViewer(Folder):
         # int(current / grpsize) * grpsize +1))          # int(current / grpsize) * grpsize +1))
         pageinfo['start'] = start          pageinfo['start'] = start
         pageinfo['end'] = start + grpsize          pageinfo['end'] = start + grpsize
         if docinfo is not None:          if (docinfo is not None) and ('numPages' in docinfo):
             np = int(docinfo['numPages'])              np = int(docinfo['numPages'])
             pageinfo['end'] = min(pageinfo['end'], np)              pageinfo['end'] = min(pageinfo['end'], np)
             pageinfo['numgroups'] = int(np / grpsize)              pageinfo['numgroups'] = int(np / grpsize)
             if np % grpsize > 0:              if np % grpsize > 0:
                 pageinfo['numgroups'] += 1                  pageinfo['numgroups'] += 1
         logging.debug("getPageInfo: %s"%repr(pageinfo))          pageinfo['viewMode'] = viewMode
         return pageinfo          pageinfo['tocMode'] = tocMode
                           #pageinfo['characterNormalization'] =characterNormalization
     def text(self,mode,url,pn):          pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization',' ')
         """give text"""          pageinfo['query'] = self.REQUEST.get('query',' ')
         if mode=="texttool": #index.meta with texttool information          pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
             (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)          pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
                   pageinfo['textPN'] = self.REQUEST.get('textPN','1')
         #print textpath          pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
         try:          pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
             dom = NonvalidatingReader.parseUri(textpath)          pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
         except:          pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
             return None          toc = int (pageinfo['tocPN'])
               pageinfo['textPages'] =int (toc)
         list=[]  
         nodes=dom.xpath("//pb")  
   
         node=nodes[int(pn)-1]  
           
         p=node  
           
         while p.tagName!="p":  
             p=p.parentNode  
           
           
         endNode=nodes[int(pn)]  
           
           
         e=endNode  
                   
         while e.tagName!="p":  
             e=e.parentNode  
                   
                   
         next=node.parentNode          if 'tocSize_%s'%tocMode in docinfo:
                       tocSize = int(docinfo['tocSize_%s'%tocMode])
         #sammle s              tocPageSize = int(pageinfo['tocPageSize'])
         while next and (next!=endNode.parentNode):              # cached toc           
             list.append(next)                  if tocSize%tocPageSize>0:
             next=next.nextSibling                      tocPages=tocSize/tocPageSize+1
         list.append(endNode.parentNode)              else:
                           tocPages=tocSize/tocPageSize
         if p==e:# beide im selben paragraphen              pageinfo['tocPN'] = min (tocPages,toc)                    
             pass          pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
 #    else:          pageinfo['sn'] =self.REQUEST.get('sn','')
 #            next=p          return pageinfo
 #            while next!=e:  
 #                print next,e  
 #                list.append(next)  
 #                next=next.nextSibling  
 #              
 #        for x in list:  
 #            PrettyPrint(x)  
 #  
 #        return list  
 #  
   
     def findDigilibUrl(self):  
         """try to get the digilib URL from zogilib"""  
         url = self.imageViewerUrl[:-1] + "/getScalerUrl"  
         #print urlparse.urlparse(url)[0]  
         #print urlparse.urljoin(self.absolute_url(),url)  
         logging.info("finddigiliburl: %s"%urlparse.urlparse(url)[0])  
         logging.info("finddigiliburl: %s"%urlparse.urljoin(self.absolute_url(),url))  
           
         try:  
             if urlparse.urlparse(url)[0]=='': #relative path  
                 url=urlparse.urljoin(self.absolute_url()+"/",url)  
                   
             scaler = urlopen(url).read()  
             return scaler.replace("/servlet/Scaler?", "")  
         except:  
             return None  
           
     def changeDocumentViewer(self,imageViewerUrl,textViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None):  def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
         """init document viewer"""          """init document viewer"""
         self.title=title          self.title=title
         self.imageViewerUrl=imageViewerUrl  
         self.textViewerUrl=textViewerUrl  
         self.digilibBaseUrl = digilibBaseUrl          self.digilibBaseUrl = digilibBaseUrl
         self.thumbrows = thumbrows          self.thumbrows = thumbrows
         self.thumbcols = thumbcols          self.thumbcols = thumbcols
Line 674  class documentViewer(Folder): Line 805  class documentViewer(Folder):
         if RESPONSE is not None:          if RESPONSE is not None:
             RESPONSE.redirect('manage_main')              RESPONSE.redirect('manage_main')
           
       
           
           
 #    security.declareProtected('View management screens','renameImageForm')  
   
 def manage_AddDocumentViewerForm(self):  def manage_AddDocumentViewerForm(self):
     """add the viewer form"""      """add the viewer form"""
     pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)      pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
     return pt()      return pt()
       
 def manage_AddDocumentViewer(self,id,imageViewerUrl="",textViewerUrl="",title="",RESPONSE=None):  def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
     """add the viewer"""      """add the viewer"""
     newObj=documentViewer(id,imageViewerUrl,title=title,textViewerUrl=textViewerUrl)      newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
     self._setObject(id,newObj)      self._setObject(id,newObj)
           
     if RESPONSE is not None:      if RESPONSE is not None:
         RESPONSE.redirect('manage_main')          RESPONSE.redirect('manage_main')
   
   
 ##  
 ## DocumentViewerTemplate class  ## DocumentViewerTemplate class
 ##  
 class DocumentViewerTemplate(ZopePageTemplate):  class DocumentViewerTemplate(ZopePageTemplate):
     """Template for document viewer"""      """Template for document viewer"""
     meta_type="DocumentViewer Template"      meta_type="DocumentViewer Template"

Removed from v.1.37  
changed lines
  Added in v.1.125


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>