Diff for /documentViewer/documentViewer.py between versions 1.3 and 1.175.2.23

version 1.3, 2006/04/06 17:13:43 version 1.175.2.23, 2011/08/10 19:18:03
Line 1 Line 1
   
 genericDigilib="http://nausikaa2.rz-berlin.mpg.de/digitallibrary/"  
   
 from OFS.Folder import Folder  from OFS.Folder import Folder
 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate  from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
 from Products.PageTemplates.PageTemplateFile import PageTemplateFile  from Products.PageTemplates.PageTemplateFile import PageTemplateFile
 from AccessControl import ClassSecurityInfo  from AccessControl import ClassSecurityInfo
   from AccessControl import getSecurityManager
 from Globals import package_home  from Globals import package_home
   
 from Ft.Xml.Domlette import NonvalidatingReader  #from Ft.Xml import EMPTY_NAMESPACE, Parse 
 from Ft.Xml.Domlette import PrettyPrint, Print  #import Ft.Xml.Domlette
 from Ft.Xml import EMPTY_NAMESPACE  
   
 import Ft.Xml.XPath  import xml.etree.ElementTree as ET
   
 import os.path  import os.path
 import cgi  import sys
 import urllib  import urllib
 import zLOG  import logging
   import math
 def getTextFromNode(nodename):  import urlparse 
     nodelist=nodename.childNodes  import re
     rc = ""  import string
     for node in nodelist:  
         if node.nodeType == node.TEXT_NODE:  from SrvTxtUtils import getInt, getText, getHttpData
            rc = rc + node.data  
     return rc  def logger(txt,method,txt2):
       """logging"""
 import socket      logging.info(txt+ txt2)
       
 def urlopen(url):      
         """urlopen mit timeout"""  def serializeNode(node, encoding="utf-8"):
         socket.setdefaulttimeout(2)      """returns a string containing node as XML"""
         ret=urllib.urlopen(url)      s = ET.tostring(node)
         socket.setdefaulttimeout(5)      
         return ret      # 4Suite:
           #    stream = cStringIO.StringIO()
 def getParamFromDigilib(path,param):      #    Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
     """gibt param von dlInfo aus"""      #    s = stream.getvalue()
     imageUrl=genericDigilib+"/dirInfo-xml.jsp?mo=dir&fn="+path      #    stream.close()
       return s
     zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo (%s) from %s"%(param,imageUrl))  
   def browserCheck(self):
       """check the browsers request to find out the browser type"""
       bt = {}
       ua = self.REQUEST.get_header("HTTP_USER_AGENT")
       bt['ua'] = ua
       bt['isIE'] = False
       bt['isN4'] = False
       bt['versFirefox']=""
       bt['versIE']=""
       bt['versSafariChrome']=""
       bt['versOpera']=""
           
       if string.find(ua, 'MSIE') > -1:
           bt['isIE'] = True
       else:
           bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
       # Safari oder Chrome identification    
     try:      try:
         dom = NonvalidatingReader.parseUri(imageUrl)          nav = ua[string.find(ua, '('):]
     except:          nav1=ua[string.find(ua,')'):]
         return None          nav2=nav1[string.find(nav1,'('):]
               nav3=nav2[string.find(nav2,')'):]
     params=dom.xpath("//dir/%s"%param)          ie = string.split(nav, "; ")[1]
     zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo:%s"%params)          ie1 =string.split(nav1, " ")[2]
               ie2 =string.split(nav3, " ")[1]
     if params:          ie3 =string.split(nav3, " ")[2]
         return getTextFromNode(params[0])          if string.find(ie3, "Safari") >-1:
               bt['versSafariChrome']=string.split(ie2, "/")[1]
           except: pass
       # IE identification
       try:
           nav = ua[string.find(ua, '('):]
           ie = string.split(nav, "; ")[1]
           if string.find(ie, "MSIE") > -1:
               bt['versIE'] = string.split(ie, " ")[1]
       except:pass
       # Firefox identification
       try:
           nav = ua[string.find(ua, '('):]
           nav1=ua[string.find(ua,')'):]
           if string.find(ie1, "Firefox") >-1:
               nav5= string.split(ie1, "/")[1]
               logging.debug("FIREFOX: %s"%(nav5))
               bt['versFirefox']=nav5[0:3]                   
       except:pass
       #Opera identification
       try:
           if string.find(ua,"Opera") >-1:
               nav = ua[string.find(ua, '('):]
               nav1=nav[string.find(nav,')'):]
               bt['versOpera']=string.split(nav1,"/")[2]
       except:pass
       
       bt['isMac'] = string.find(ua, 'Macintosh') > -1
       bt['isWin'] = string.find(ua, 'Windows') > -1
       bt['isIEWin'] = bt['isIE'] and bt['isWin']
       bt['isIEMac'] = bt['isIE'] and bt['isMac']
       bt['staticHTML'] = False
   
       return bt
   
   def getParentPath(path, cnt=1):
       """returns pathname shortened by cnt"""
       # make sure path doesn't end with /
       path = path.rstrip('/')
       # split by /, shorten, and reassemble
       return '/'.join(path.split('/')[0:-cnt])
   
   
 ##  ##
Line 60  def getParamFromDigilib(path,param): Line 111  def getParamFromDigilib(path,param):
 ##  ##
 class documentViewer(Folder):  class documentViewer(Folder):
     """document viewer"""      """document viewer"""
   
     meta_type="Document viewer"      meta_type="Document viewer"
           
     security=ClassSecurityInfo()      security=ClassSecurityInfo()
Line 68  class documentViewer(Folder): Line 118  class documentViewer(Folder):
         {'label':'main config','action':'changeDocumentViewerForm'},          {'label':'main config','action':'changeDocumentViewerForm'},
         )          )
   
       metadataService = None
       """MetaDataFolder instance"""
   
     # templates and forms      # templates and forms
     viewer_main = PageTemplateFile('zpt/viewer_main', globals())      viewer_main = PageTemplateFile('zpt/viewer_main', globals())
     thumbs_main = PageTemplateFile('zpt/thumbs_main', globals())      toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
     image_main = PageTemplateFile('zpt/image_main', globals())      toc_text = PageTemplateFile('zpt/toc_text', globals())
       toc_figures = PageTemplateFile('zpt/toc_figures', globals())
       page_main_images = PageTemplateFile('zpt/page_main_images', globals())
       page_main_double = PageTemplateFile('zpt/page_main_double', globals())
       page_main_text = PageTemplateFile('zpt/page_main_text', globals())
       page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
       page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
       page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
       page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
     head_main = PageTemplateFile('zpt/head_main', globals())      head_main = PageTemplateFile('zpt/head_main', globals())
     docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())      docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
       info_xml = PageTemplateFile('zpt/info_xml', globals())
   
     security.declareProtected('View management screens','changeDocumentViewerForm')      
     changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())  
   
       thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
           
     def __init__(self,id,imageViewerUrl,title="",digilibBaseUrl=None):      
       def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
         """init document viewer"""          """init document viewer"""
         self.id=id          self.id=id
         self.title=title          self.title=title
         self.imageViewerUrl=imageViewerUrl          self.thumbcols = thumbcols
           self.thumbrows = thumbrows
           # authgroups is list of authorized groups (delimited by ,)
           self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
           # create template folder so we can always use template.something
           
           templateFolder = Folder('template')
           #self['template'] = templateFolder # Zope-2.12 style
           self._setObject('template',templateFolder) # old style
           try:
               import MpdlXmlTextServer
               textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
               #templateFolder['fulltextclient'] = xmlRpcClient
               templateFolder._setObject('fulltextclient',textServer)
           except Exception, e:
               logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
               
           try:
               from Products.zogiLib.zogiLib import zogiLib
               zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
               #templateFolder['zogilib'] = zogilib
               templateFolder._setObject('zogilib',zogilib)
           except Exception, e:
               logging.error("Unable to create zogiLib for zogilib: "+str(e))
               
           try:
               # assume MetaDataFolder instance is called metadata 
               self.metadataService = getattr(self, 'metadata')
           except Exception, e:
               logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
               
           if digilibBaseUrl is not None:
         self.digilibBaseUrl = digilibBaseUrl          self.digilibBaseUrl = digilibBaseUrl
         if not self.digilibBaseUrl:  
             self.digilibBaseUrl = self.findDigilibUrl()  
         # add template folder so we can always use template.something  
         self.manage_addFolder('template')  
   
   
     security.declareProtected('View','index_html')      # proxy text server methods to fulltextclient
     def index_html(self,mode,url,start=0,pn=1):      def getTextPage(self, **args):
           """get page"""
           return self.template.fulltextclient.getTextPage(**args)
   
       def getOrigPages(self, **args):
           """get page"""
           return self.template.fulltextclient.getOrigPages(**args)
       
       def getOrigPagesNorm(self, **args):
           """get page"""
           return self.template.fulltextclient.getOrigPagesNorm(**args)
   
       def getQuery(self, **args):
           """get query in search"""
           return self.template.fulltextclient.getQuery(**args)
        
       def getSearch(self, **args):
           """get search"""
           return self.template.fulltextclient.getSearch(**args)
       
       def getGisPlaces(self, **args):
           """get gis places"""
           return self.template.fulltextclient.getGisPlaces(**args)
    
       def getAllGisPlaces(self, **args):
           """get all gis places """
           return self.template.fulltextclient.getAllGisPlaces(**args)
          
       def getTranslate(self, **args):
           """get translate"""
           return self.template.fulltextclient.getTranslate(**args)
   
       def getLemma(self, **args):
           """get lemma"""
           return self.template.fulltextclient.getLemma(**args)
   
       def getLemmaQuery(self, **args):
           """get query"""
           return self.template.fulltextclient.getLemmaQuery(**args)
   
       def getLex(self, **args):
           """get lex"""
           return self.template.fulltextclient.getLex(**args)
   
       def getToc(self, **args):
           """get toc"""
           return self.template.fulltextclient.getToc(**args)
   
       def getTocPage(self, **args):
           """get tocpage"""
           return self.template.fulltextclient.getTocPage(**args)
   
       
       security.declareProtected('View','thumbs_rss')
       def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
         '''          '''
         view it          view it
         @param mode: defines which type of document is behind url          @param mode: defines how to access the document behind url 
         @param url: url which contains display information          @param url: url which contains display information
         '''          @param viewMode: if images display images, if text display text, default is images (text,images or auto)
                   
         zLOG.LOG("documentViewer (index)", zLOG.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          '''
         print "dlbaseurl:", self.digilibBaseUrl          logging.debug("HHHHHHHHHHHHHH:load the rss")
           logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
                   
         if not hasattr(self, 'template'):          if not hasattr(self, 'template'):
             # create template folder if it doesn't exist              # create template folder if it doesn't exist
             print "no template folder -- creating"  
             self.manage_addFolder('template')              self.manage_addFolder('template')
                           
         if not self.digilibBaseUrl:          if not self.digilibBaseUrl:
             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"              self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
                           
         print "dlbaseurl:", self.digilibBaseUrl  
   
         docinfo = self.getDocinfo(mode=mode, url=url)          docinfo = self.getDocinfo(mode=mode, url=url)
         pageinfo = self.getPageinfo(start=start,current=pn)          #pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
         pt = getattr(self.template, 'viewer_main')          pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
         return pt(docinfo=docinfo,pageinfo=pageinfo)          ''' ZDES '''
           pt = getattr(self.template, 'thumbs_main_rss')
           
           if viewMode=="auto": # automodus gewaehlt
               if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
                   viewMode="text"
               else:
                   viewMode="images"
       
           return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
       
     def imageLink(self,nr):  
         """link hinter den images"""  
         paramsTmp=cgi.parse_qs(self.REQUEST['QUERY_STRING'])  
         params={}  
         for x in paramsTmp.iteritems():  
                 params[x[0]]=x[1][0]  
       
         params['pn']=nr  
         newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params)  
         return newUrl  
                   
     def getStyle(self, idx, selected, style=""):      security.declareProtected('View','index_html')
         """returns a string with the given style + 'sel' if path == selected."""      def index_html(self,url,mode="texttool",viewMode="auto",viewType=None,tocMode="thumbs",start=1,pn=1):
         #zLOG.LOG("documentViewer (getstyle)", zLOG.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))          """
         if idx == selected:          view page
             return style + 'sel'          @param url: url which contains display information
         else:          @param mode: defines how to access the document behind url 
             return style              @param viewMode: 'images': display images, 'text': display text, default is 'auto'
           @param viewType: sub-type of viewMode, e.g. 'dict' for viewMode='text'
           @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
           """
   
           logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewType=%s start=%s pn=%s"%(mode,url,viewMode,viewType,start,pn))
                           
     def thumbruler(self,cols,rows,start,maximum):          if not hasattr(self, 'template'):
         """ruler for thumbs"""              # this won't work
         ret=""              logging.error("template folder missing!")
         paramsTmp=cgi.parse_qs(self.REQUEST['QUERY_STRING'])              return "ERROR: template folder missing!"
         params={}  
         for x in paramsTmp.iteritems():  
   
             if not x[0]=="start":  
                 params[x[0]]=x[1][0]  
   
         newUrlSelect=self.REQUEST['URL']+"?"+urllib.urlencode(params)      
         if start>0:  
             newStart=max(start-cols*rows,0)  
             params['start']=newStart  
             newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params)  
             ret+="""<a href="%s">prev</a>"""%newUrl  
   
   
         ret+="""<select onChange="location.href='%s&start='+this.options[this.selectedIndex].value">"""%newUrlSelect  
         nr,rest=divmod(maximum,cols*rows)  
         if rest > 0:  
             nr+=1  
         for i in range(nr):  
             nr=i*cols*rows  
              
             if (start >= nr) and (start < nr+cols*rows):    
                 ret+="""<option value="%s" selected>%s</option>"""%(nr,nr)  
             else:  
                 ret+="""<option value="%s">%s</option>"""%(nr,nr)  
         ret+="</select>"  
           
         if start<maximum:  
             newStart=min(start+cols*rows,maximum)  
             params['start']=newStart  
             newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params)  
             ret+="""<a href="%s">next</a>"""%newUrl  
                   
         return ret          if not getattr(self, 'digilibBaseUrl', None):
               self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
           
           docinfo = self.getDocinfo(mode=mode,url=url)
                   
           if tocMode != "thumbs":
               # get table of contents
               docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
   
           # auto viewMode: text if there is a text else images
           if viewMode=="auto": 
               if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 
                   viewMode = "text"
                   viewType = "dict"
               else:
                   viewMode = "images"
                   
     def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None):          elif viewMode == "text_dict":
         """gets bibliographical info from the index.meta file at url or given by dom"""              # legacy fix
         zLOG.LOG("documentViewer (getbibinfofromindexmeta)", zLOG.INFO,"path: %s"%(path))              viewMode = "text"
                       viewType = "dict"
         if docinfo is None:              
             docinfo = {}          # stringify viewType
                       if isinstance(viewType, list):
         metaData=self.metadata.main.meta.bib              viewType = ','.join([t for t in viewType if t])
         if dom is None:                          
             server="http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn="          pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewType=viewType, tocMode=tocMode)
             path="/".join(path.split("/")[0:-1])                      
             metaUrl=server+path+"/index.meta"          # get template /template/viewer_$viewMode
             try:          pt = getattr(self.template, 'viewer_%s'%viewMode, None)
                 dom = NonvalidatingReader.parseUri(metaUrl)          if pt is None:
             except:              logging.error("No template for viewMode=%s!"%viewMode)
                 return docinfo              # TODO: error page?
               return "No template for viewMode=%s!"%viewMode
                   
         type=dom.xpath("//bib/@type")          # and execute with parameters
         if type and (len(type)>0):          return pt(docinfo=docinfo, pageinfo=pageinfo)
             type=type[0].value    
         else:      def generateMarks(self,mk):
             type="generic"          ret=""
         type=type.replace("-"," ")# wrong typesiin index meta "-" instead of " "          if mk is None:
         hash=metaData.generateMappingForType(type)              return ""
           if not isinstance(mk, list):
               mk=[mk]
           for m in mk:
               ret+="mk=%s"%m
           return ret
   
         docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%hash['author'][0])[0])  
         docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%hash['title'][0])[0])  
         docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%hash['year'][0])[0])  
                   
         return docinfo      def getBrowser(self):
           """getBrowser the version of browser """
           bt = browserCheck(self)
           logging.debug("BROWSER VERSION: %s"%(bt))
           return bt
   
       def findDigilibUrl(self):
           """try to get the digilib URL from zogilib"""
           url = self.template.zogilib.getDLBaseUrl()
           return url
                   
     def getDocinfoFromTextTool(self,url,docinfo=None):      def getDocumentViewerURL(self):
        """parse texttool tag in index meta"""          """returns the URL of this instance"""
        zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.INFO,"url: %s"%(url))          return self.absolute_url()
        if docinfo is None:  
            docinfo = {}  
                         
        try:      def getStyle(self, idx, selected, style=""):
            dom = NonvalidatingReader.parseUri(url)          """returns a string with the given style and append 'sel' if path == selected."""
        except:          #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
            zLOG.LOG("documentViewer (parseUrlTexttool)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2])          if idx == selected:
            return docinfo              return style + 'sel'
           else:
               return style
                 
        archivePaths=dom.xpath("//resource/archive-path")      def getParams(self, param=None, val=None, params=None):
           """returns dict with URL parameters.
                 
        if archivePaths and (len(archivePaths)>0):          Takes URL parameters and additionally param=val or dict params.
            archivePath=getTextFromNode(archivePaths[0])          Deletes key if value is None."""
           # copy existing request params
           newParams=self.REQUEST.form.copy()
           # change single param
           if param is not None:
               if val is None:
                   if newParams.has_key(param):
                       del newParams[param]
        else:         else:
            archivePath=None                  newParams[param] = str(val)
                 
        images=dom.xpath("//texttool/image")          # change more params
           if params is not None:
               for k in params.keys():
                   v = params[k]
                   if v is None:
                       # val=None removes param
                       if newParams.has_key(k):
                           del newParams[k]
                 
        if images and (len(images)>0):  
            image=getTextFromNode(images[0])  
        else:         else:
            image=None                      newParams[k] = v
                         
        if image and archivePath:          return newParams
            image=os.path.join(archivePath,image)  
            image=image.replace("/mpiwg/online",'')  
            pt=getParamFromDigilib(image,'size')  
            docinfo['imagePath'] = image  
            docinfo['numberOfPages'] = pt  
                         
        viewerUrls=dom.xpath("//texttool/digiliburlprefix")      def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
           """returns URL to documentviewer with parameter param set to val or from dict params"""
           urlParams = self.getParams(param=param, val=val, params=params)
           # quote values and assemble into query string (not escaping '/')
           ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()])
           if baseUrl is None:
               baseUrl = self.getDocumentViewerURL()
                 
        if viewerUrls and (len(viewerUrls)>0):          url = "%s?%s"%(baseUrl, ps)
            viewerUrl=getTextFromNode(viewerUrls[0])          return url
            docinfo['imageURL'] = viewerURL  
                                       
        textUrls=dom.xpath("//texttool/text")      def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
           """link to documentviewer with parameter param set to val"""
           return self.getLink(param, val, params, baseUrl, '&amp;')
                 
        if textUrls and (len(textUrls)>0):  
            textUrl=getTextFromNode(textUrls[0])  
            docinfo['textURL'] = textURL  
                                             
        docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)      def getInfo_xml(self,url,mode):
        return docinfo          """returns info about the document as XML"""
           if not self.digilibBaseUrl:
               self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
           
           docinfo = self.getDocinfo(mode=mode,url=url)
           pt = getattr(self.template, 'info_xml')
           return pt(docinfo=docinfo)
         
       def isAccessible(self, docinfo):
           """returns if access to the resource is granted"""
           access = docinfo.get('accessType', None)
           logging.debug("documentViewer (accessOK) access type %s"%access)
           if access == 'free':
               logging.debug("documentViewer (accessOK) access is free")
               return True
           
           elif access is None or access in self.authgroups:
               # only local access -- only logged in users
               user = getSecurityManager().getUser()
               logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
               if user is not None:
                   #print "user: ", user
                   return (user.getUserName() != "Anonymous User")
               else:
                   return False
   
     def getDocinfoFromImagePath(self,path,docinfo=None):          logging.error("documentViewer (accessOK) unknown access type %s"%access)
         """path ist the path to the images it assumes that the index.meta file is one level higher."""          return False
         zLOG.LOG("documentViewer (getdocinfofromimagepath)", zLOG.INFO,"path: %s"%(path))  
         if docinfo is None:  
             docinfo = {}  
         docinfo['imagePath'] = path  
         path=path.replace("/mpiwg/online","")  
         pt=getParamFromDigilib(path,'size')  
         docinfo['numberOfPages'] = pt  
         imageUrl=genericDigilib+"/servlet/Scaler?fn=%s"%path  
         docinfo['imageURL'] = imageUrl  
                   
         docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo)  
         return docinfo  
           
           
     def getDocinfo(self, mode, url):      def getDocinfo(self, mode, url):
         """returns docinfo depending on mode"""          """returns docinfo depending on mode"""
         zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"mode: %s, url: %s"%(mode,url))          logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
         # look for cached docinfo in session          # look for cached docinfo in session
         if self.REQUEST.SESSION.has_key('docinfo'):          if self.REQUEST.SESSION.has_key('docinfo'):
             docinfo = self.REQUEST.SESSION['docinfo']              docinfo = self.REQUEST.SESSION['docinfo']
             # check if its still current              # check if its still current
             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:              if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
                 zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo in session: %s"%docinfo)                  logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
                 return docinfo                  return docinfo
               
         # new docinfo          # new docinfo
         docinfo = {'mode': mode, 'url': url}          docinfo = {'mode': mode, 'url': url}
         if mode=="texttool": #index.meta with texttool information          # add self url
             docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)          docinfo['viewerUrl'] = self.getDocumentViewerURL()
           docinfo['digilibBaseUrl'] = self.digilibBaseUrl
           # get index.meta DOM
           docUrl = None
           metaDom = None
           if mode=="texttool": 
               # url points to document dir or index.meta
               metaDom = self.metadataService.getDomFromPathOrUrl(url)
               docUrl = url.replace('/index.meta', '')
               if metaDom is None:
                   raise IOError("Unable to find index.meta for mode=texttool!")
   
         elif mode=="imagepath":          elif mode=="imagepath":
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)              # url points to folder with images, index.meta optional
               # asssume index.meta in parent dir
               docUrl = getParentPath(url)
               metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
   
           elif mode=="filepath":
               # url points to image file, index.meta optional
               # asssume index.meta is two path segments up
               docUrl = getParentPath(url, 2)
               metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
   
           else:
               logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
               raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
           
           docinfo['documentUrl'] = docUrl
           # process index.meta contents
           if metaDom is not None and metaDom.tag == 'resource':
               # document directory name and path
               resource = self.metadataService.getResourceData(dom=metaDom)
               if resource:
                   docinfo = self.getDocinfoFromResource(docinfo, resource)
   
               # texttool info
               texttool = self.metadataService.getTexttoolData(dom=metaDom)
               if texttool:
                   docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
               
               # bib info
               bib = self.metadataService.getBibData(dom=metaDom)
               if bib:
                   docinfo = self.getDocinfoFromBib(docinfo, bib)
         else:          else:
             zLOG.LOG("documentViewer (getdocinfo)", zLOG.ERROR,"unknown mode!")                  # no bib - try info.xml
         zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo: %s"%docinfo)                  docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
                   
               # auth info
               access = self.metadataService.getAccessData(dom=metaDom)
               if access:
                   docinfo = self.getDocinfoFromAccess(docinfo, access)
   
               # attribution info
               attribution = self.metadataService.getAttributionData(dom=metaDom)
               if attribution:
                   logging.debug("getDocinfo: attribution=%s"%repr(attribution))
                   docinfo['attribution'] = attribution
                   #docinfo = self.getDocinfoFromAccess(docinfo, access)
   
               # copyright info
               copyright = self.metadataService.getCopyrightData(dom=metaDom)
               if copyright:
                   logging.debug("getDocinfo: copyright=%s"%repr(copyright))
                   docinfo['copyright'] = copyright
                   #docinfo = self.getDocinfoFromAccess(docinfo, access)
   
           # image path
           if mode != 'texttool':
               # override image path from texttool with url
               docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
   
               
   
           # number of images from digilib
           if docinfo.get('imagePath', None):
               docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
               docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
   
           logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
           #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
           # store in session
         self.REQUEST.SESSION['docinfo'] = docinfo          self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo          return docinfo
                   
       def getDocinfoFromResource(self, docinfo, resource):
           """reads contents of resource element into docinfo"""
           docName = resource.get('name', None)
           docinfo['documentName'] = docName
           docPath = resource.get('archive-path', None)
           if docPath:
               # clean up document path
               if docPath[0] != '/':
                   docPath = '/' + docPath
                   
     def getPageinfo(self, start, current):              if docName and (not docPath.endswith(docName)):
         """returns pageinfo with the given parameters"""                  docPath += "/" + docName
         pageinfo = {}  
         pageinfo['start'] = start  
         pageinfo['current'] = current  
         return pageinfo  
                                   
     def text(self,mode,url,pn):          else:
         """give text"""              # use docUrl as docPath
         if mode=="texttool": #index.meta with texttool information              docUrl = docinfo['documentURL']
             (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)              if not docUrl.startswith('http:'):
                   docPath = docUrl
           if docPath:
               # fix URLs starting with /mpiwg/online
               docPath = docPath.replace('/mpiwg/online', '', 1)
                   
         print textpath          docinfo['documentPath'] = docPath
           return docinfo
   
       def getDocinfoFromTexttool(self, docinfo, texttool):
           """reads contents of texttool element into docinfo"""
           # image dir
           imageDir = texttool.get('image', None)
           docPath = docinfo.get('documentPath', None)
           if imageDir and docPath:
               #print "image: ", imageDir, " archivepath: ", archivePath
               imageDir = os.path.join(docPath, imageDir)
               imageDir = imageDir.replace('/mpiwg/online', '', 1)
               docinfo['imagePath'] = imageDir
           
           # old style text URL
           textUrl = texttool.get('text', None)
           if textUrl and docPath:
               if urlparse.urlparse(textUrl)[0] == "": #keine url
                   textUrl = os.path.join(docPath, textUrl) 
               
               docinfo['textURL'] = textUrl
       
           # new style text-url-path
           textUrl = texttool.get('text-url-path', None)
           if textUrl:
               docinfo['textURLPath'] = textUrl
               
           # page flow
           docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
               
           # odd pages are left
           docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
               
           # number of title page (0: not defined)
           docinfo['titlePage'] = texttool.get('title-scan-no', 0)
               
           # old presentation stuff
           presentation = texttool.get('presentation', None)
           if presentation and docPath:
               if presentation.startswith('http:'):
                   docinfo['presentationUrl'] = presentation
               else:
                   docinfo['presentationUrl'] = os.path.join(docPath, presentation)
               
           
           return docinfo
   
       def getDocinfoFromBib(self, docinfo, bib):
           """reads contents of bib element into docinfo"""
           logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
           # put all raw bib fields in dict "bib"
           docinfo['bib'] = bib
           bibtype = bib.get('@type', None)
           docinfo['bibType'] = bibtype
           # also store DC metadata for convenience
           dc = self.metadataService.getDCMappedData(bib)
           docinfo['creator'] = dc.get('creator',None)
           docinfo['title'] = dc.get('title',None)
           docinfo['date'] = dc.get('date',None)
           return docinfo
               
       def getDocinfoFromAccess(self, docinfo, acc):
           """reads contents of access element into docinfo"""
           #TODO: also read resource type
           logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
         try:          try:
             dom = NonvalidatingReader.parseUri(textpath)              acctype = acc['@attr']['type']
               if acctype:
                   access=acctype
                   if access in ['group', 'institution']:
                       access = acc['name'].lower()
                   
                   docinfo['accessType'] = access
   
         except:          except:
             return None              pass
           
         list=[]          return docinfo
         nodes=dom.xpath("//pb")  
   
         node=nodes[int(pn)-1]      def getDocinfoFromDigilib(self, docinfo, path):
           infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
           # fetch data
           txt = getHttpData(infoUrl)
           if not txt:
               logging.error("Unable to get dir-info from %s"%(infoUrl))
               return docinfo
                   
         p=node          dom = ET.fromstring(txt)
           size = getText(dom.find("size"))
           logging.debug("getDocinfoFromDigilib: size=%s"%size)
           if size:
               docinfo['numPages'] = int(size)
           else:
               docinfo['numPages'] = 0
                   
         while p.tagName!="p":          # TODO: produce and keep list of image names and numbers
             p=p.parentNode          return docinfo
                   
                   
         endNode=nodes[int(pn)]      def getDocinfoFromPresentationInfoXml(self,docinfo):
           """gets DC-like bibliographical information from the presentation entry in texttools"""
           url = docinfo.get('presentationUrl', None)
           if not url:
               logging.error("getDocinfoFromPresentation: no URL!")
               return docinfo
                   
           dom = None
           metaUrl = None
           if url.startswith("http://"):
               # real URL
               metaUrl = url
           else:
               # online path
                   
         e=endNode              server=self.digilibBaseUrl+"/servlet/Texter?fn="
               metaUrl=server+url
                   
         while e.tagName!="p":          txt=getHttpData(metaUrl)
             e=e.parentNode          if txt is None:
               logging.error("Unable to read info.xml from %s"%(url))
               return docinfo
                   
           dom = ET.fromstring(txt)
           docinfo['creator']=getText(dom.find(".//author"))
           docinfo['title']=getText(dom.find(".//title"))
           docinfo['date']=getText(dom.find(".//date"))
           return docinfo
                   
         next=node.parentNode  
                   
         #sammle s      def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewType=None, tocMode=None):
         while next and (next!=endNode.parentNode):          """returns pageinfo with the given parameters"""
             list.append(next)              pageinfo = {}
             next=next.nextSibling              pageinfo['viewMode'] = viewMode
         list.append(endNode.parentNode)          pageinfo['viewType'] = viewType
           pageinfo['tocMode'] = tocMode
                   
         if p==e:# beide im selben paragraphen          current = getInt(current)
             pass          pageinfo['current'] = current
 #    else:          rows = int(rows or self.thumbrows)
 #            next=p          pageinfo['rows'] = rows
 #            while next!=e:          cols = int(cols or self.thumbcols)
 #                print next,e          pageinfo['cols'] = cols
 #                list.append(next)          grpsize = cols * rows
 #                next=next.nextSibling          pageinfo['groupsize'] = grpsize
 #                      # is start is empty use one around current
 #        for x in list:          start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
 #            PrettyPrint(x)          # int(current / grpsize) * grpsize +1))
 #          pageinfo['start'] = start
 #        return list          pn = self.REQUEST.get('pn','1')
 #          pageinfo['pn'] = pn
           np = int(docinfo.get('numPages', 0))
     def image(self,mode,url,pn):          if np == 0:
         """give image out"""              # numPages unknown - maybe we can get it from text page
         if mode=="texttool": #index.meta with texttool information              if docinfo.get('textURLPath', None):
             (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)                  # cache text page as well
             if not viewerUrl:                  pageinfo['textPage'] = self.getTextPage(mode=viewType, pn=pn, docinfo=docinfo, pageinfo=pageinfo)
                 viewerUrl=self.imageViewerUrl                  np = int(docinfo.get('numPages', 0))
             url=viewerUrl+"pn=%s&fn=%s"%(pn,imagepath[0])                  
             ret="""<iframe height="100%%" width="100%%" src="%s"/>"""%url          pageinfo['numgroups'] = int(np / grpsize)
             return url          if np % grpsize > 0:
         elif mode=="imagepath":              pageinfo['numgroups'] += 1
             url=url.replace("/mpiwg/online","")  
             url=self.imageViewerUrl+"pn=%s&fn=%s"%(pn,url)          pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
             ret="""<iframe height="100%%" width="100%%" src="%s"/>"""%url          oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
             return url          # add zeroth page for two columns
           pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
           pageinfo['pageZero'] = pageZero
           pageinfo['pageList'] = self.getPageList(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
                   
           pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
           pageinfo['query'] = self.REQUEST.get('query','') 
           pageinfo['queryType'] = self.REQUEST.get('queryType','')
           pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
           pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
           pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
           pageinfo['queryPageSize'] = getInt(self.REQUEST.get('queryPageSize', 10))
           pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1'))
           pageinfo['searchPN'] = getInt(self.REQUEST.get('searchPN','1'))
           
           # limit tocPN
           if 'tocSize_%s'%tocMode in docinfo:
               tocSize = docinfo['tocSize_%s'%tocMode]
               tocPageSize = pageinfo['tocPageSize']
               # cached toc           
               if tocSize%tocPageSize>0:
                   tocPages=tocSize/tocPageSize+1
               else:
                   tocPages=tocSize/tocPageSize
                           
               pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN'])
                   
     def findDigilibUrl(self):          return pageinfo
         """try to get the digilib URL from zogilib"""  
         url = self.imageViewerUrl[:-1] + "/getScalerUrl"  
         try:      def getPageList(self, start=None, rows=None, cols=None, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
             scaler = urlopen(url).read()          """returns array of page informations for one screenfull of thumbnails"""
             return scaler.replace("/servlet/Scaler?", "")          if maxIdx == 0:
         except:              maxIdx = start + rows * cols
             return None  
           pages = []
           if pageZero and start == 1:
               # correct beginning
               idx = 0
           else:
               idx = start
           
     def changeDocumentViewer(self,imageViewerUrl,title="",digilibBaseUrl=None,RESPONSE=None):          for r in range(rows):
               row = []
               for c in range(cols):
                   if idx < minIdx or idx > maxIdx:
                       page = {'idx':None}
                   else:
                       page = {'idx':idx}
                       
                   idx += 1
                   if pageFlowLtr:
                       row.append(page)
                   else:
                       row.insert(0, page) 
                   
               pages.append(row)
               
           logging.debug("getPageList returns=%s"%(pages))
           return pages
           
   
       security.declareProtected('View management screens','changeDocumentViewerForm')    
       changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
       
       def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
         """init document viewer"""          """init document viewer"""
         self.title=title          self.title=title
         self.imageViewerUrl=imageViewerUrl  
         self.digilibBaseUrl = digilibBaseUrl          self.digilibBaseUrl = digilibBaseUrl
           self.thumbrows = thumbrows
           self.thumbcols = thumbcols
           self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
           try:
               # assume MetaDataFolder instance is called metadata 
               self.metadataService = getattr(self, 'metadata')
           except Exception, e:
               logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
                   
         if RESPONSE is not None:          if RESPONSE is not None:
             RESPONSE.redirect('manage_main')              RESPONSE.redirect('manage_main')
           
       
           
           
 #    security.declareProtected('View management screens','renameImageForm')  
   
 def manage_AddDocumentViewerForm(self):  def manage_AddDocumentViewerForm(self):
     """add the viewer form"""      """add the viewer form"""
     pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)      pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
     return pt()      return pt()
       
 def manage_AddDocumentViewer(self,id,imageViewerUrl="",title="",RESPONSE=None):  def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
     """add the viewer"""      """add the viewer"""
     newObj=documentViewer(id,imageViewerUrl,title)      newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
     self._setObject(id,newObj)      self._setObject(id,newObj)
           
     if RESPONSE is not None:      if RESPONSE is not None:
         RESPONSE.redirect('manage_main')          RESPONSE.redirect('manage_main')
   
   
 ##  
 ## DocumentViewerTemplate class  ## DocumentViewerTemplate class
 ##  
 class DocumentViewerTemplate(ZopePageTemplate):  class DocumentViewerTemplate(ZopePageTemplate):
     """Template for document viewer"""      """Template for document viewer"""
     meta_type="DocumentViewer Template"      meta_type="DocumentViewer Template"
Line 432  def manage_addDocumentViewerTemplate(sel Line 832  def manage_addDocumentViewerTemplate(sel
   
     self._setObject(id, DocumentViewerTemplate(id))      self._setObject(id, DocumentViewerTemplate(id))
     ob = getattr(self, id)      ob = getattr(self, id)
     ob.pt_edit(open(os.path.join(package_home(globals()),'zpt/viewer_main.zpt')).read(),None)      txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
       logging.info("txt %s:"%txt)
       ob.pt_edit(txt,"text/html")
     if title:      if title:
         ob.pt_setTitle(title)          ob.pt_setTitle(title)
     try:      try:

Removed from v.1.3  
changed lines
  Added in v.1.175.2.23


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>