Diff for /documentViewer/documentViewer.py between versions 1.8 and 1.175.2.19

version 1.8, 2006/04/11 17:27:57 version 1.175.2.19, 2011/08/05 09:24:42
Line 5  from AccessControl import ClassSecurityI Line 5  from AccessControl import ClassSecurityI
 from AccessControl import getSecurityManager  from AccessControl import getSecurityManager
 from Globals import package_home  from Globals import package_home
   
 from Ft.Xml.Domlette import NonvalidatingReader  #from Ft.Xml import EMPTY_NAMESPACE, Parse 
 from Ft.Xml.Domlette import PrettyPrint, Print  #import Ft.Xml.Domlette
 from Ft.Xml import EMPTY_NAMESPACE  
   
 import Ft.Xml.XPath  import xml.etree.ElementTree as ET
   
 import os.path  import os.path
 import sys  import sys
 import cgi  
 import urllib  import urllib
 import zLOG  import logging
   import math
   import urlparse 
   import re
   import string
   
   from SrvTxtUtils import getInt, getText, getHttpData
   
   def logger(txt,method,txt2):
       """logging"""
       logging.info(txt+ txt2)
       
       
   def serializeNode(node, encoding="utf-8"):
       """returns a string containing node as XML"""
       s = ET.tostring(node)
       
       # 4Suite:
       #    stream = cStringIO.StringIO()
       #    Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
       #    s = stream.getvalue()
       #    stream.close()
       return s
   
   def browserCheck(self):
       """check the browsers request to find out the browser type"""
       bt = {}
       ua = self.REQUEST.get_header("HTTP_USER_AGENT")
       bt['ua'] = ua
       bt['isIE'] = False
       bt['isN4'] = False
       bt['versFirefox']=""
       bt['versIE']=""
       bt['versSafariChrome']=""
       bt['versOpera']=""
   
 def getInt(number, default=0):      if string.find(ua, 'MSIE') > -1:
     """returns always an int (0 in case of problems)"""          bt['isIE'] = True
       else:
           bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
       # Safari oder Chrome identification    
     try:      try:
         return int(number)          nav = ua[string.find(ua, '('):]
     except:          nav1=ua[string.find(ua,')'):]
         return default          nav2=nav1[string.find(nav1,'('):]
               nav3=nav2[string.find(nav2,')'):]
           ie = string.split(nav, "; ")[1]
 def getTextFromNode(nodename):          ie1 =string.split(nav1, " ")[2]
     if nodename is None:          ie2 =string.split(nav3, " ")[1]
         return ""          ie3 =string.split(nav3, " ")[2]
     nodelist=nodename.childNodes          if string.find(ie3, "Safari") >-1:
     rc = ""              bt['versSafariChrome']=string.split(ie2, "/")[1]
     for node in nodelist:      except: pass
         if node.nodeType == node.TEXT_NODE:      # IE identification
            rc = rc + node.data      try:
     return rc          nav = ua[string.find(ua, '('):]
           ie = string.split(nav, "; ")[1]
 import socket          if string.find(ie, "MSIE") > -1:
               bt['versIE'] = string.split(ie, " ")[1]
 def urlopen(url,timeout=2):      except:pass
         """urlopen mit timeout"""      # Firefox identification
         socket.setdefaulttimeout(timeout)      try:
         ret=urllib.urlopen(url)          nav = ua[string.find(ua, '('):]
         socket.setdefaulttimeout(5)          nav1=ua[string.find(ua,')'):]
         return ret          if string.find(ie1, "Firefox") >-1:
               nav5= string.split(ie1, "/")[1]
               logging.debug("FIREFOX: %s"%(nav5))
               bt['versFirefox']=nav5[0:3]                   
       except:pass
       #Opera identification
       try:
           if string.find(ua,"Opera") >-1:
               nav = ua[string.find(ua, '('):]
               nav1=nav[string.find(nav,')'):]
               bt['versOpera']=string.split(nav1,"/")[2]
       except:pass
       
       bt['isMac'] = string.find(ua, 'Macintosh') > -1
       bt['isWin'] = string.find(ua, 'Windows') > -1
       bt['isIEWin'] = bt['isIE'] and bt['isWin']
       bt['isIEMac'] = bt['isIE'] and bt['isMac']
       bt['staticHTML'] = False
   
       return bt
   
   def getParentPath(path, cnt=1):
       """returns pathname shortened by cnt"""
       # make sure path doesn't end with /
       path = path.rstrip('/')
       # split by /, shorten, and reassemble
       return '/'.join(path.split('/')[0:-cnt])
   
   
 ##  ##
Line 50  def urlopen(url,timeout=2): Line 111  def urlopen(url,timeout=2):
 ##  ##
 class documentViewer(Folder):  class documentViewer(Folder):
     """document viewer"""      """document viewer"""
   
     meta_type="Document viewer"      meta_type="Document viewer"
           
     security=ClassSecurityInfo()      security=ClassSecurityInfo()
Line 58  class documentViewer(Folder): Line 118  class documentViewer(Folder):
         {'label':'main config','action':'changeDocumentViewerForm'},          {'label':'main config','action':'changeDocumentViewerForm'},
         )          )
   
       metadataService = None
       """MetaDataFolder instance"""
   
     # templates and forms      # templates and forms
     viewer_main = PageTemplateFile('zpt/viewer_main', globals())      viewer_main = PageTemplateFile('zpt/viewer_main', globals())
     thumbs_main = PageTemplateFile('zpt/thumbs_main', globals())      toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
     image_main = PageTemplateFile('zpt/image_main', globals())      toc_text = PageTemplateFile('zpt/toc_text', globals())
       toc_figures = PageTemplateFile('zpt/toc_figures', globals())
       page_main_images = PageTemplateFile('zpt/page_main_images', globals())
       page_main_double = PageTemplateFile('zpt/page_main_double', globals())
       page_main_text = PageTemplateFile('zpt/page_main_text', globals())
       page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
       page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
       page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
       page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
     head_main = PageTemplateFile('zpt/head_main', globals())      head_main = PageTemplateFile('zpt/head_main', globals())
     docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())      docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
       info_xml = PageTemplateFile('zpt/info_xml', globals())
   
     security.declareProtected('View management screens','changeDocumentViewerForm')          
     changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())      thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
   
           
     def __init__(self,id,imageViewerUrl,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"):      def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
         """init document viewer"""          """init document viewer"""
         self.id=id          self.id=id
         self.title=title          self.title=title
         self.imageViewerUrl=imageViewerUrl  
         if not digilibBaseUrl:  
             self.digilibBaseUrl = self.findDigilibUrl()  
         else:  
             self.digilibBaseUrl = digilibBaseUrl  
         self.thumbcols = thumbcols          self.thumbcols = thumbcols
         self.thumbrows = thumbrows          self.thumbrows = thumbrows
         # authgroups is list of authorized groups (delimited by ,)          # authgroups is list of authorized groups (delimited by ,)
         self.authgroups = [s.strip().lower() for s in authgroups.split(',')]          self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
         # add template folder so we can always use template.something          # create template folder so we can always use template.something
         self.manage_addFolder('template')  
   
           templateFolder = Folder('template')
           #self['template'] = templateFolder # Zope-2.12 style
           self._setObject('template',templateFolder) # old style
           try:
               import MpdlXmlTextServer
               textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
               #templateFolder['fulltextclient'] = xmlRpcClient
               templateFolder._setObject('fulltextclient',textServer)
           except Exception, e:
               logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
   
     security.declareProtected('View','index_html')          try:
     def index_html(self,mode,url,start=None,pn=1):              from Products.zogiLib.zogiLib import zogiLib
               zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
               #templateFolder['zogilib'] = zogilib
               templateFolder._setObject('zogilib',zogilib)
           except Exception, e:
               logging.error("Unable to create zogiLib for zogilib: "+str(e))
               
           try:
               # assume MetaDataFolder instance is called metadata 
               self.metadataService = getattr(self, 'metadata')
           except Exception, e:
               logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
               
           
       # proxy text server methods to fulltextclient
       def getTextPage(self, **args):
           """get page"""
           return self.template.fulltextclient.getTextPage(**args)
   
       def getOrigPages(self, **args):
           """get page"""
           return self.template.fulltextclient.getOrigPages(**args)
       
       def getOrigPagesNorm(self, **args):
           """get page"""
           return self.template.fulltextclient.getOrigPagesNorm(**args)
   
       def getQuery(self, **args):
           """get query in search"""
           return self.template.fulltextclient.getQuery(**args)
        
       def getSearch(self, **args):
           """get search"""
           return self.template.fulltextclient.getSearch(**args)
       
       def getGisPlaces(self, **args):
           """get gis places"""
           return self.template.fulltextclient.getGisPlaces(**args)
    
       def getAllGisPlaces(self, **args):
           """get all gis places """
           return self.template.fulltextclient.getAllGisPlaces(**args)
          
       def getTranslate(self, **args):
           """get translate"""
           return self.template.fulltextclient.getTranslate(**args)
   
       def getLemma(self, **args):
           """get lemma"""
           return self.template.fulltextclient.getLemma(**args)
   
       def getLemmaQuery(self, **args):
           """get query"""
           return self.template.fulltextclient.getLemmaQuery(**args)
   
       def getLex(self, **args):
           """get lex"""
           return self.template.fulltextclient.getLex(**args)
   
       def getToc(self, **args):
           """get toc"""
           return self.template.fulltextclient.getToc(**args)
   
       def getTocPage(self, **args):
           """get tocpage"""
           return self.template.fulltextclient.getTocPage(**args)
   
       
       security.declareProtected('View','thumbs_rss')
       def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
         '''          '''
         view it          view it
         @param mode: defines which type of document is behind url          @param mode: defines how to access the document behind url 
         @param url: url which contains display information          @param url: url which contains display information
         '''          @param viewMode: if images display images, if text display text, default is images (text,images or auto)
                   
         zLOG.LOG("documentViewer (index)", zLOG.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          '''
           logging.debug("HHHHHHHHHHHHHH:load the rss")
           logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
                   
         if not hasattr(self, 'template'):          if not hasattr(self, 'template'):
             # create template folder if it doesn't exist              # create template folder if it doesn't exist
Line 104  class documentViewer(Folder): Line 251  class documentViewer(Folder):
             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"              self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
                           
         docinfo = self.getDocinfo(mode=mode,url=url)          docinfo = self.getDocinfo(mode=mode,url=url)
           #pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
         pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)          pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
           ''' ZDES '''
           pt = getattr(self.template, 'thumbs_main_rss')
           
           if viewMode=="auto": # automodus gewaehlt
               if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
                   viewMode="text"
               else:
                   viewMode="images"
                  
           return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
   
     
       security.declareProtected('View','index_html')
       def index_html(self,url,mode="texttool",viewMode="auto",viewType=None,tocMode="thumbs",start=1,pn=1):
           """
           view page
           @param url: url which contains display information
           @param mode: defines how to access the document behind url 
           @param viewMode: 'images': display images, 'text': display text, default is 'auto'
           @param viewType: sub-type of viewMode, e.g. 'dict' for viewMode='text'
           @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
           """
           
           logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
           
           if not hasattr(self, 'template'):
               # this won't work
               logging.error("template folder missing!")
               return "ERROR: template folder missing!"
               
           if not getattr(self, 'digilibBaseUrl', None):
               self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
               
           docinfo = self.getDocinfo(mode=mode,url=url)
           
           if tocMode != "thumbs":
               # get table of contents
               docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
   
           # auto viewMode: text if there is a text else images
           if viewMode=="auto": 
               if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 
                   viewMode = "text"
                   viewType = "dict"
               else:
                   viewMode = "images"
                   
           pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewType=viewType, tocMode=tocMode)
                       
           # get template /template/viewer_main
         pt = getattr(self.template, 'viewer_main')          pt = getattr(self.template, 'viewer_main')
           # and execute with parameters
         return pt(docinfo=docinfo,pageinfo=pageinfo)          return pt(docinfo=docinfo,pageinfo=pageinfo)
       
       def generateMarks(self,mk):
           ret=""
           if mk is None:
               return ""
           if not isinstance(mk, list):
               mk=[mk]
           for m in mk:
               ret+="mk=%s"%m
           return ret
       
     def getLink(self,param=None,val=None):  
         """link to documentviewer with parameter param set to val"""  
         params=cgi.parse_qs(self.REQUEST['QUERY_STRING'])  
         if param is not None:  
             if val is None:  
                 if params.has_key(param):  
                     del params[param]  
             else:  
                 params[param] = [str(val)]  
                                   
         ps = "&".join(["%s=%s"%(k,urllib.quote(v[0])) for (k, v) in params.items()])      def getBrowser(self):
         url=self.REQUEST['URL']+"?"+ps          """getBrowser the version of browser """
         #url=self.REQUEST['URL']+"?"+urllib.urlencode(params, doseq=True)          bt = browserCheck(self)
           logging.debug("BROWSER VERSION: %s"%(bt))
           return bt
           
       def findDigilibUrl(self):
           """try to get the digilib URL from zogilib"""
           url = self.template.zogilib.getDLBaseUrl()
         return url          return url
   
       def getDocumentViewerURL(self):
           """returns the URL of this instance"""
           return self.absolute_url()
           
     def getStyle(self, idx, selected, style=""):      def getStyle(self, idx, selected, style=""):
         """returns a string with the given style and append 'sel' if path == selected."""          """returns a string with the given style and append 'sel' if path == selected."""
         #zLOG.LOG("documentViewer (getstyle)", zLOG.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))          #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
         if idx == selected:          if idx == selected:
             return style + 'sel'              return style + 'sel'
         else:          else:
             return style                  return style    
                   
     def accessOK(self, docinfo):      def getParams(self, param=None, val=None, params=None):
           """returns dict with URL parameters.
           
           Takes URL parameters and additionally param=val or dict params.
           Deletes key if value is None."""
           # copy existing request params
           newParams=self.REQUEST.form.copy()
           # change single param
           if param is not None:
               if val is None:
                   if newParams.has_key(param):
                       del newParams[param]
               else:
                   newParams[param] = str(val)
                   
           # change more params
           if params is not None:
               for k in params.keys():
                   v = params[k]
                   if v is None:
                       # val=None removes param
                       if newParams.has_key(k):
                           del newParams[k]
                           
                   else:
                       newParams[k] = v
                       
           return newParams
       
       def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
           """returns URL to documentviewer with parameter param set to val or from dict params"""
           urlParams = self.getParams(param=param, val=val, params=params)
           # quote values and assemble into query string (not escaping '/')
           ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
           if baseUrl is None:
               baseUrl = self.getDocumentViewerURL()
               
           url = "%s?%s"%(baseUrl, ps)
           return url
   
       def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
           """link to documentviewer with parameter param set to val"""
           return self.getLink(param, val, params, baseUrl, '&')
       
       
       def getInfo_xml(self,url,mode):
           """returns info about the document as XML"""
           if not self.digilibBaseUrl:
               self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
           
           docinfo = self.getDocinfo(mode=mode,url=url)
           pt = getattr(self.template, 'info_xml')
           return pt(docinfo=docinfo)
   
       def isAccessible(self, docinfo):
         """returns if access to the resource is granted"""          """returns if access to the resource is granted"""
         access = docinfo.get('accessType', None)          access = docinfo.get('accessType', None)
         if access is None:          logging.debug("documentViewer (accessOK) access type %s"%access)
             # no information - no access (not yet)          if access == 'free':
             return True              logging.debug("documentViewer (accessOK) access is free")
         elif access == 'free':  
             return True              return True
                   
         print "access: ", access, " authgroups: ", self.authgroups          elif access is None or access in self.authgroups:
         if access in self.authgroups:              # only local access -- only logged in users
             # local access OK              user = getSecurityManager().getUser()
             user = getSecurityManager().getUser().getUserName()              logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
             print "user: ", user              if user is not None:
             return (user != "Anonymous User")                  #print "user: ", user
                   return (user.getUserName() != "Anonymous User")
               else:
                   return False
                   
         zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "unknown access group %s"%access)          logging.error("documentViewer (accessOK) unknown access type %s"%access)
         return False          return False
                                   
                   
     def getDirinfoFromDigilib(self,path,docinfo=None):  
         """gibt param von dlInfo aus"""  
         if docinfo is None:  
             docinfo = {}  
                           
         imageUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path      def getDocinfo(self, mode, url):
           """returns docinfo depending on mode"""
           logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
           # look for cached docinfo in session
           if self.REQUEST.SESSION.has_key('docinfo'):
               docinfo = self.REQUEST.SESSION['docinfo']
               # check if its still current
               if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
                   logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
                   return docinfo
           
         zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo from %s"%(imageUrl))          # new docinfo
           docinfo = {'mode': mode, 'url': url}
           # add self url
           docinfo['viewerUrl'] = self.getDocumentViewerURL()
           # get index.meta DOM
           docUrl = None
           metaDom = None
           if mode=="texttool": 
               # url points to document dir or index.meta
               metaDom = self.metadataService.getDomFromPathOrUrl(url)
               docUrl = url.replace('/index.meta', '')
               if metaDom is None:
                   raise IOError("Unable to find index.meta for mode=texttool!")
                   
         try:          elif mode=="imagepath":
             dom = NonvalidatingReader.parseUri(imageUrl)              # url points to folder with images, index.meta optional
         except:              # asssume index.meta in parent dir
             zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.ERROR, "error reading %s"%(imageUrl))              docUrl = getParentPath(url)
             raise IOError("Unable to get dirinfo from %s"%(imageUrl))              metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
   
           elif mode=="filepath":
               # url points to image file, index.meta optional
               # asssume index.meta is two path segments up
               docUrl = getParentPath(url, 2)
               metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
                   
         params=dom.xpath("//dir/size")          else:
         zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo:size"%params)              logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
               raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                   
         if params:          docinfo['documentUrl'] = docUrl
             docinfo['numPages'] = int(getTextFromNode(params[0]))          # process index.meta contents
           if metaDom is not None and metaDom.tag == 'resource':
               # document directory name and path
               resource = self.metadataService.getResourceData(dom=metaDom)
               if resource:
                   docinfo = self.getDocinfoFromResource(docinfo, resource)
   
               # texttool info
               texttool = self.metadataService.getTexttoolData(dom=metaDom)
               if texttool:
                   docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
               
               # bib info
               bib = self.metadataService.getBibData(dom=metaDom)
               if bib:
                   docinfo = self.getDocinfoFromBib(docinfo, bib)
         else:          else:
             docinfo['numPages'] = 0                  # no bib - try info.xml
                   docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
                                                   
               # auth info
               access = self.metadataService.getAccessData(dom=metaDom)
               if access:
                   docinfo = self.getDocinfoFromAccess(docinfo, access)
   
               # attribution info
               attribution = self.metadataService.getAttributionData(dom=metaDom)
               if attribution:
                   logging.debug("getDocinfo: attribution=%s"%repr(attribution))
                   docinfo['attribution'] = attribution
                   #docinfo = self.getDocinfoFromAccess(docinfo, access)
   
               # copyright info
               copyright = self.metadataService.getCopyrightData(dom=metaDom)
               if copyright:
                   logging.debug("getDocinfo: copyright=%s"%repr(copyright))
                   docinfo['copyright'] = copyright
                   #docinfo = self.getDocinfoFromAccess(docinfo, access)
   
           # image path
           if mode != 'texttool':
               # override image path from texttool
               docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
   
           # number of images from digilib
           if docinfo.get('imagePath', None):
               docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
               docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
   
           logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
           #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
           # store in session
           self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo          return docinfo
           
       def getDocinfoFromResource(self, docinfo, resource):
           """reads contents of resource element into docinfo"""
           docName = resource.get('name', None)
           docinfo['documentName'] = docName
           docPath = resource.get('archive-path', None)
           if docPath:
               # clean up document path
               if docPath[0] != '/':
                   docPath = '/' + docPath
                           
     def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None):              if docName and (not docPath.endswith(docName)):
         """gets authorization info from the index.meta file at url or given by dom"""                  docPath += "/" + docName
         zLOG.LOG("documentViewer (getbibinfofromindexmeta)", zLOG.INFO,"path: %s"%(path))  
                   
         access = None          else:
                       # use docUrl as docPath
         if docinfo is None:              docUrl = docinfo['documentURL']
             docinfo = {}              if not docUrl.startswith('http:'):
                   docPath = docUrl
           if docPath:
               # fix URLs starting with /mpiwg/online
               docPath = docPath.replace('/mpiwg/online', '', 1)
                           
         if dom is None:          docinfo['documentPath'] = docPath
             server=self.digilibBaseUrl+"/servlet/Texter?fn="  
             path="/".join(path.split("/")[0:-1])  
             metaUrl=server+path+"/index.meta"  
             try:  
                 dom = NonvalidatingReader.parseUri(metaUrl)  
             except:  
                 return docinfo                  return docinfo
                           
         acctype = dom.xpath("//access-conditions/access/@type")      def getDocinfoFromTexttool(self, docinfo, texttool):
         if acctype and (len(acctype)>0):          """reads contents of texttool element into docinfo"""
             access=acctype[0].value          # image dir
             if access == 'group':          imageDir = texttool.get('image', None)
                 access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()          docPath = docinfo.get('documentPath', None)
                       if imageDir and docPath:
         docinfo['accessType'] = access              #print "image: ", imageDir, " archivepath: ", archivePath
         return docinfo              imageDir = os.path.join(docPath, imageDir)
               imageDir = imageDir.replace('/mpiwg/online', '', 1)
               docinfo['imagePath'] = imageDir
           
           # old style text URL
           textUrl = texttool.get('text', None)
           if textUrl and docPath:
               if urlparse.urlparse(textUrl)[0] == "": #keine url
                   textUrl = os.path.join(docPath, textUrl) 
           
               docinfo['textURL'] = textUrl
                   
     def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None):          # new style text-url-path
         """gets bibliographical info from the index.meta file at url or given by dom"""          textUrl = texttool.get('text-url-path', None)
         zLOG.LOG("documentViewer (getbibinfofromindexmeta)", zLOG.INFO,"path: %s"%(path))          if textUrl:
               docinfo['textURLPath'] = textUrl
               
           # page flow
           docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
               
           # odd pages are left
           docinfo['oddPage'] = texttool.get('odd-scan-orientation', 'left')
               
           # number of title page (0: not defined)
           docinfo['titlePage'] = texttool.get('title-scan-no', 0)
               
           # old presentation stuff
           presentation = texttool.get('presentation', None)
           if presentation and docPath:
               if presentation.startswith('http:'):
                   docinfo['presentationUrl'] = presentation
               else:
                   docinfo['presentationUrl'] = os.path.join(docPath, presentation)
                   
         if docinfo is None:  
             docinfo = {}  
                           
         if dom is None:  
             server=self.digilibBaseUrl+"/servlet/Texter?fn="  
             path="/".join(path.split("/")[0:-1])  
             metaUrl=server+path+"/index.meta"  
             try:  
                 dom = NonvalidatingReader.parseUri(metaUrl)  
             except:  
                 return docinfo                  return docinfo
                   
         metaData=self.metadata.main.meta.bib      def getDocinfoFromBib(self, docinfo, bib):
         bibtype=dom.xpath("//bib/@type")          """reads contents of bib element into docinfo"""
         if bibtype and (len(bibtype)>0):          logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
             bibtype=bibtype[0].value          # put all raw bib fields in dict "bib"
         else:          docinfo['bib'] = bib
             bibtype="generic"          bibtype = bib.get('@type', None)
         bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)          docinfo['bibType'] = bibtype
         bibmap=metaData.generateMappingForType(bibtype)          # also store DC metadata for convenience
         print "bibmap: ", bibmap, " for: ", bibtype          dc = self.metadataService.getDCMappedData(bib)
         # if there is no mapping bibmap is empty (mapping sometimes has empty fields)          docinfo['creator'] = dc.get('creator',None)
         if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:          docinfo['title'] = dc.get('title',None)
             docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])          docinfo['date'] = dc.get('date',None)
             docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])  
             docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])  
           
         return docinfo          return docinfo
   
       def getDocinfoFromAccess(self, docinfo, acc):
           """reads contents of access element into docinfo"""
           #TODO: also read resource type
           logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
           try:
               acctype = acc['@attr']['type']
               if acctype:
                   access=acctype
                   if access in ['group', 'institution']:
                       access = acc['name'].lower()
                   
     def getDocinfoFromTextTool(self,url,dom=None,docinfo=None):                  docinfo['accessType'] = access
        """parse texttool tag in index meta"""  
        zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.INFO,"url: %s"%(url))  
        if docinfo is None:  
            docinfo = {}  
                         
        if dom is None:  
            try:  
                dom = NonvalidatingReader.parseUri(url)  
            except:             except:
                zLOG.LOG("documentViewer (parseUrlTexttool)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2])              pass
                raise IOError("Unable to get texttool info from %s"%(url))  
          
        archiveNames=dom.xpath("//resource/name")  
        if archiveNames and (len(archiveNames)>0):  
            archiveName=getTextFromNode(archiveNames[0])  
          
        archivePaths=dom.xpath("//resource/archive-path")  
        if archivePaths and (len(archivePaths)>0):  
            archivePath=getTextFromNode(archivePaths[0])  
            # clean up archive path  
            if archivePath[0] != '/':  
                archivePath = '/' + archivePath  
            if not archivePath.endswith(archiveName):  
                archivePath += "/" + archiveName  
        else:  
            archivePath=None  
          
        images=dom.xpath("//texttool/image")  
        if images and (len(images)>0):  
            image=getTextFromNode(images[0])  
        else:  
            image=None  
              
        if image and archivePath:  
            print "image: ", image, " archivepath: ", archivePath  
            image=os.path.join(archivePath,image)  
            image=image.replace("/mpiwg/online",'')  
            docinfo=self.getDirinfoFromDigilib(image,docinfo=docinfo)  
            docinfo['imagePath'] = image  
            docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+image  
              
        viewerUrls=dom.xpath("//texttool/digiliburlprefix")  
        if viewerUrls and (len(viewerUrls)>0):  
            viewerUrl=getTextFromNode(viewerUrls[0])  
            docinfo['viewerURL'] = viewerUrl  
                     
        textUrls=dom.xpath("//texttool/text")  
        if textUrls and (len(textUrls)>0):  
            textUrl=getTextFromNode(textUrls[0])  
            docinfo['textURL'] = textUrl  
                                             
        docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)  
        docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)  
        return docinfo         return docinfo
         
       def getDocinfoFromDigilib(self, docinfo, path):
           infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
           # fetch data
           txt = getHttpData(infoUrl)
           if not txt:
               logging.error("Unable to get dir-info from %s"%(infoUrl))
               return docinfo
   
     def getDocinfoFromImagePath(self,path,docinfo=None):          dom = ET.fromstring(txt)
         """path ist the path to the images it assumes that the index.meta file is one level higher."""          size = getText(dom.find("size"))
         zLOG.LOG("documentViewer (getdocinfofromimagepath)", zLOG.INFO,"path: %s"%(path))          logging.debug("getDocinfoFromDigilib: size=%s"%size)
         if docinfo is None:          if size:
             docinfo = {}              docinfo['numPages'] = int(size)
         path=path.replace("/mpiwg/online","")          else:
         docinfo['imagePath'] = path              docinfo['numPages'] = 0
         docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo)  
         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path  
         docinfo['imageURL'] = imageUrl  
                   
         docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo)          # TODO: produce and keep list of image names and numbers
         docinfo = self.getAuthinfoFromIndexMeta(path,docinfo=docinfo)  
         return docinfo          return docinfo
           
           
     def getDocinfo(self, mode, url):      def getDocinfoFromPresentationInfoXml(self,docinfo):
         """returns docinfo depending on mode"""          """gets DC-like bibliographical information from the presentation entry in texttools"""
         zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"mode: %s, url: %s"%(mode,url))          url = docinfo.get('presentationUrl', None)
         # look for cached docinfo in session          if not url:
         if self.REQUEST.SESSION.has_key('docinfo'):              logging.error("getDocinfoFromPresentation: no URL!")
             docinfo = self.REQUEST.SESSION['docinfo']  
             # check if its still current  
             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:  
                 zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo in session: %s"%docinfo)  
                 return docinfo                  return docinfo
         # new docinfo          
         docinfo = {'mode': mode, 'url': url}          dom = None
         if mode=="texttool": #index.meta with texttool information          metaUrl = None
             docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)          if url.startswith("http://"):
         elif mode=="imagepath":              # real URL
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)              metaUrl = url
         else:          else:
             zLOG.LOG("documentViewer (getdocinfo)", zLOG.ERROR,"unknown mode!")              # online path
         zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo: %s"%docinfo)              
         self.REQUEST.SESSION['docinfo'] = docinfo              server=self.digilibBaseUrl+"/servlet/Texter?fn="
               metaUrl=server+url
           
           txt=getHttpData(metaUrl)
           if txt is None:
               logging.error("Unable to read info.xml from %s"%(url))
               return docinfo
               
           dom = ET.fromstring(txt)
           docinfo['creator']=getText(dom.find(".//author"))
           docinfo['title']=getText(dom.find(".//title"))
           docinfo['date']=getText(dom.find(".//date"))
         return docinfo          return docinfo
                   
                   
     def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None):      def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewType=None, tocMode=None):
         """returns pageinfo with the given parameters"""          """returns pageinfo with the given parameters"""
         pageinfo = {}          pageinfo = {}
           pageinfo['viewMode'] = viewMode
           pageinfo['viewType'] = viewType
           pageinfo['tocMode'] = tocMode
   
         current = getInt(current)          current = getInt(current)
         pageinfo['current'] = current          pageinfo['current'] = current
         rows = int(rows or self.thumbrows)          rows = int(rows or self.thumbrows)
Line 348  class documentViewer(Folder): Line 681  class documentViewer(Folder):
         pageinfo['cols'] = cols          pageinfo['cols'] = cols
         grpsize = cols * rows          grpsize = cols * rows
         pageinfo['groupsize'] = grpsize          pageinfo['groupsize'] = grpsize
         start = getInt(start, default=(int(current / grpsize) * grpsize +1))          # what does this do?
           start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
           # int(current / grpsize) * grpsize +1))
         pageinfo['start'] = start          pageinfo['start'] = start
         pageinfo['end'] = start + grpsize          pageinfo['end'] = start + grpsize
         if docinfo is not None:          pn = self.REQUEST.get('pn','1')
             np = int(docinfo['numPages'])          pageinfo['pn'] = pn
           np = int(docinfo.get('numPages', 0))
           if np == 0:
               # numPages unknown - maybe we can get it from text page
               if docinfo.get('textURLPath', None):
                   # cache text page as well
                   pageinfo['textPage'] = self.getTextPage(mode=viewType, pn=pn, docinfo=docinfo, pageinfo=pageinfo)
                   np = int(docinfo.get('numPages', 0))
                   
             pageinfo['end'] = min(pageinfo['end'], np)              pageinfo['end'] = min(pageinfo['end'], np)
             pageinfo['numgroups'] = int(np / grpsize)              pageinfo['numgroups'] = int(np / grpsize)
             if np % grpsize > 0:              if np % grpsize > 0:
                 pageinfo['numgroups'] += 1                  pageinfo['numgroups'] += 1
                                   
         return pageinfo          pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
                           pageinfo['query'] = self.REQUEST.get('query','') 
     def text(self,mode,url,pn):          pageinfo['queryType'] = self.REQUEST.get('queryType','')
         """give text"""          pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
         if mode=="texttool": #index.meta with texttool information          pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
             (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)          pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
                   pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
         print textpath          pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
         try:          # WTF?:
             dom = NonvalidatingReader.parseUri(textpath)          toc = int(pageinfo['tocPN'])
         except:          pageinfo['textPages'] = int(toc)
             return None          
               # What does this do?
         list=[]          if 'tocSize_%s'%tocMode in docinfo:
         nodes=dom.xpath("//pb")              tocSize = int(docinfo['tocSize_%s'%tocMode])
               tocPageSize = int(pageinfo['tocPageSize'])
         node=nodes[int(pn)-1]              # cached toc           
                       if tocSize%tocPageSize>0:
         p=node                  tocPages=tocSize/tocPageSize+1
                       else:
         while p.tagName!="p":                  tocPages=tocSize/tocPageSize
             p=p.parentNode  
           
           
         endNode=nodes[int(pn)]  
           
           
         e=endNode  
           
         while e.tagName!="p":  
             e=e.parentNode  
                   
               pageinfo['tocPN'] = min(tocPages,toc)
                   
         next=node.parentNode          pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
           return pageinfo
                   
         #sammle s  
         while next and (next!=endNode.parentNode):  
             list.append(next)      
             next=next.nextSibling      
         list.append(endNode.parentNode)  
                   
         if p==e:# beide im selben paragraphen      security.declareProtected('View management screens','changeDocumentViewerForm')    
             pass      changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
 #    else:  
 #            next=p  
 #            while next!=e:  
 #                print next,e  
 #                list.append(next)  
 #                next=next.nextSibling  
 #              
 #        for x in list:  
 #            PrettyPrint(x)  
 #  
 #        return list  
 #  
   
     def findDigilibUrl(self):  
         """try to get the digilib URL from zogilib"""  
         url = self.imageViewerUrl[:-1] + "/getScalerUrl"  
         try:  
             scaler = urlopen(url).read()  
             return scaler.replace("/servlet/Scaler?", "")  
         except:  
             return None  
           
     def changeDocumentViewer(self,imageViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None):      def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
         """init document viewer"""          """init document viewer"""
         self.title=title          self.title=title
         self.imageViewerUrl=imageViewerUrl  
         self.digilibBaseUrl = digilibBaseUrl          self.digilibBaseUrl = digilibBaseUrl
         self.thumbrows = thumbrows          self.thumbrows = thumbrows
         self.thumbcols = thumbcols          self.thumbcols = thumbcols
         self.authgroups = [s.strip().lower() for s in authgroups.split(',')]          self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
           try:
               # assume MetaDataFolder instance is called metadata 
               self.metadataService = getattr(self, 'metadata')
           except Exception, e:
               logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
   
         if RESPONSE is not None:          if RESPONSE is not None:
             RESPONSE.redirect('manage_main')              RESPONSE.redirect('manage_main')
           
       
           
           
 #    security.declareProtected('View management screens','renameImageForm')  
   
 def manage_AddDocumentViewerForm(self):  def manage_AddDocumentViewerForm(self):
     """add the viewer form"""      """add the viewer form"""
     pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)      pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
     return pt()      return pt()
       
 def manage_AddDocumentViewer(self,id,imageViewerUrl="",title="",RESPONSE=None):  def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
     """add the viewer"""      """add the viewer"""
     newObj=documentViewer(id,imageViewerUrl,title)      newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
     self._setObject(id,newObj)      self._setObject(id,newObj)
           
     if RESPONSE is not None:      if RESPONSE is not None:
         RESPONSE.redirect('manage_main')          RESPONSE.redirect('manage_main')
   
   
 ##  
 ## DocumentViewerTemplate class  ## DocumentViewerTemplate class
 ##  
 class DocumentViewerTemplate(ZopePageTemplate):  class DocumentViewerTemplate(ZopePageTemplate):
     """Template for document viewer"""      """Template for document viewer"""
     meta_type="DocumentViewer Template"      meta_type="DocumentViewer Template"
Line 472  def manage_addDocumentViewerTemplate(sel Line 778  def manage_addDocumentViewerTemplate(sel
   
     self._setObject(id, DocumentViewerTemplate(id))      self._setObject(id, DocumentViewerTemplate(id))
     ob = getattr(self, id)      ob = getattr(self, id)
     ob.pt_edit(open(os.path.join(package_home(globals()),'zpt/viewer_main.zpt')).read(),None)      txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
       logging.info("txt %s:"%txt)
       ob.pt_edit(txt,"text/html")
     if title:      if title:
         ob.pt_setTitle(title)          ob.pt_setTitle(title)
     try:      try:

Removed from v.1.8  
changed lines
  Added in v.1.175.2.19


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>