Diff for /documentViewer/documentViewer.py between versions 1.3 and 1.175.2.10

version 1.3, 2006/04/06 17:13:43 version 1.175.2.10, 2011/07/28 13:00:07
Line 1 Line 1
   
 genericDigilib="http://nausikaa2.rz-berlin.mpg.de/digitallibrary/"  
   
 from OFS.Folder import Folder  from OFS.Folder import Folder
 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate  from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
 from Products.PageTemplates.PageTemplateFile import PageTemplateFile  from Products.PageTemplates.PageTemplateFile import PageTemplateFile
 from AccessControl import ClassSecurityInfo  from AccessControl import ClassSecurityInfo
   from AccessControl import getSecurityManager
 from Globals import package_home  from Globals import package_home
   
 from Ft.Xml.Domlette import NonvalidatingReader  #from Ft.Xml import EMPTY_NAMESPACE, Parse 
 from Ft.Xml.Domlette import PrettyPrint, Print  #import Ft.Xml.Domlette
 from Ft.Xml import EMPTY_NAMESPACE  
   
 import Ft.Xml.XPath  import xml.etree.ElementTree as ET
   
 import os.path  import os.path
 import cgi  import sys
 import urllib  import urllib
 import zLOG  import logging
   import math
 def getTextFromNode(nodename):  import urlparse 
     nodelist=nodename.childNodes  import re
     rc = ""  import string
     for node in nodelist:  
         if node.nodeType == node.TEXT_NODE:  from SrvTxtUtils import getInt, getText, getHttpData
            rc = rc + node.data  
     return rc  def logger(txt,method,txt2):
       """logging"""
 import socket      logging.info(txt+ txt2)
       
 def urlopen(url):      
         """urlopen mit timeout"""  def serializeNode(node, encoding="utf-8"):
         socket.setdefaulttimeout(2)      """returns a string containing node as XML"""
         ret=urllib.urlopen(url)      s = ET.tostring(node)
         socket.setdefaulttimeout(5)      
         return ret      # 4Suite:
           #    stream = cStringIO.StringIO()
 def getParamFromDigilib(path,param):      #    Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
     """gibt param von dlInfo aus"""      #    s = stream.getvalue()
     imageUrl=genericDigilib+"/dirInfo-xml.jsp?mo=dir&fn="+path      #    stream.close()
       return s
     zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo (%s) from %s"%(param,imageUrl))  
   def browserCheck(self):
       """check the browsers request to find out the browser type"""
       bt = {}
       ua = self.REQUEST.get_header("HTTP_USER_AGENT")
       bt['ua'] = ua
       bt['isIE'] = False
       bt['isN4'] = False
       bt['versFirefox']=""
       bt['versIE']=""
       bt['versSafariChrome']=""
       bt['versOpera']=""
           
       if string.find(ua, 'MSIE') > -1:
           bt['isIE'] = True
       else:
           bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
       # Safari oder Chrome identification    
     try:      try:
         dom = NonvalidatingReader.parseUri(imageUrl)          nav = ua[string.find(ua, '('):]
     except:          nav1=ua[string.find(ua,')'):]
         return None          nav2=nav1[string.find(nav1,'('):]
               nav3=nav2[string.find(nav2,')'):]
     params=dom.xpath("//dir/%s"%param)          ie = string.split(nav, "; ")[1]
     zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo:%s"%params)          ie1 =string.split(nav1, " ")[2]
               ie2 =string.split(nav3, " ")[1]
     if params:          ie3 =string.split(nav3, " ")[2]
         return getTextFromNode(params[0])          if string.find(ie3, "Safari") >-1:
               bt['versSafariChrome']=string.split(ie2, "/")[1]
           except: pass
       # IE identification
       try:
           nav = ua[string.find(ua, '('):]
           ie = string.split(nav, "; ")[1]
           if string.find(ie, "MSIE") > -1:
               bt['versIE'] = string.split(ie, " ")[1]
       except:pass
       # Firefox identification
       try:
           nav = ua[string.find(ua, '('):]
           nav1=ua[string.find(ua,')'):]
           if string.find(ie1, "Firefox") >-1:
               nav5= string.split(ie1, "/")[1]
               logging.debug("FIREFOX: %s"%(nav5))
               bt['versFirefox']=nav5[0:3]                   
       except:pass
       #Opera identification
       try:
           if string.find(ua,"Opera") >-1:
               nav = ua[string.find(ua, '('):]
               nav1=nav[string.find(nav,')'):]
               bt['versOpera']=string.split(nav1,"/")[2]
       except:pass
       
       bt['isMac'] = string.find(ua, 'Macintosh') > -1
       bt['isWin'] = string.find(ua, 'Windows') > -1
       bt['isIEWin'] = bt['isIE'] and bt['isWin']
       bt['isIEMac'] = bt['isIE'] and bt['isMac']
       bt['staticHTML'] = False
   
       return bt
   
   def getParentDir(path):
       """returns pathname shortened by one"""
       return '/'.join(path.split('/')[0:-1])
   
   
 ##  ##
Line 60  def getParamFromDigilib(path,param): Line 108  def getParamFromDigilib(path,param):
 ##  ##
 class documentViewer(Folder):  class documentViewer(Folder):
     """document viewer"""      """document viewer"""
   
     meta_type="Document viewer"      meta_type="Document viewer"
           
     security=ClassSecurityInfo()      security=ClassSecurityInfo()
Line 68  class documentViewer(Folder): Line 115  class documentViewer(Folder):
         {'label':'main config','action':'changeDocumentViewerForm'},          {'label':'main config','action':'changeDocumentViewerForm'},
         )          )
   
       metadataService = None
       """MetaDataFolder instance"""
   
     # templates and forms      # templates and forms
     viewer_main = PageTemplateFile('zpt/viewer_main', globals())      viewer_main = PageTemplateFile('zpt/viewer_main', globals())
     thumbs_main = PageTemplateFile('zpt/thumbs_main', globals())      toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
     image_main = PageTemplateFile('zpt/image_main', globals())      toc_text = PageTemplateFile('zpt/toc_text', globals())
       toc_figures = PageTemplateFile('zpt/toc_figures', globals())
       page_main_images = PageTemplateFile('zpt/page_main_images', globals())
       page_main_double = PageTemplateFile('zpt/page_main_double', globals())
       page_main_text = PageTemplateFile('zpt/page_main_text', globals())
       page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
       page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
       page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
       page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
     head_main = PageTemplateFile('zpt/head_main', globals())      head_main = PageTemplateFile('zpt/head_main', globals())
     docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())      docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
       info_xml = PageTemplateFile('zpt/info_xml', globals())
   
     security.declareProtected('View management screens','changeDocumentViewerForm')          
     changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())      thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
   
           
     def __init__(self,id,imageViewerUrl,title="",digilibBaseUrl=None):      def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
         """init document viewer"""          """init document viewer"""
         self.id=id          self.id=id
         self.title=title          self.title=title
         self.imageViewerUrl=imageViewerUrl          self.thumbcols = thumbcols
         self.digilibBaseUrl = digilibBaseUrl          self.thumbrows = thumbrows
         if not self.digilibBaseUrl:          # authgroups is list of authorized groups (delimited by ,)
             self.digilibBaseUrl = self.findDigilibUrl()          self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
         # add template folder so we can always use template.something          # create template folder so we can always use template.something
         self.manage_addFolder('template')          
           templateFolder = Folder('template')
           #self['template'] = templateFolder # Zope-2.12 style
           self._setObject('template',templateFolder) # old style
           try:
               import MpdlXmlTextServer
               textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
               #templateFolder['fulltextclient'] = xmlRpcClient
               templateFolder._setObject('fulltextclient',textServer)
           except Exception, e:
               logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
   
           try:
               from Products.zogiLib.zogiLib import zogiLib
               zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
               #templateFolder['zogilib'] = zogilib
               templateFolder._setObject('zogilib',zogilib)
           except Exception, e:
               logging.error("Unable to create zogiLib for zogilib: "+str(e))
   
     security.declareProtected('View','index_html')          try:
     def index_html(self,mode,url,start=0,pn=1):              # assume MetaDataFolder instance is called metadata 
               self.metadataService = getattr(self, 'metadata')
           except Exception, e:
               logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
               
           
       # proxy text server methods to fulltextclient
       def getTextPage(self, **args):
           """get page"""
           return self.template.fulltextclient.getTextPage(**args)
   
       def getOrigPages(self, **args):
           """get page"""
           return self.template.fulltextclient.getOrigPages(**args)
       
       def getOrigPagesNorm(self, **args):
           """get page"""
           return self.template.fulltextclient.getOrigPagesNorm(**args)
   
       def getQuery(self, **args):
           """get query in search"""
           return self.template.fulltextclient.getQuery(**args)
        
       def getSearch(self, **args):
           """get search"""
           return self.template.fulltextclient.getSearch(**args)
       
       def getGisPlaces(self, **args):
           """get gis places"""
           return self.template.fulltextclient.getGisPlaces(**args)
    
       def getAllGisPlaces(self, **args):
           """get all gis places """
           return self.template.fulltextclient.getAllGisPlaces(**args)
          
       def getTranslate(self, **args):
           """get translate"""
           return self.template.fulltextclient.getTranslate(**args)
   
       def getLemma(self, **args):
           """get lemma"""
           return self.template.fulltextclient.getLemma(**args)
   
       def getLemmaQuery(self, **args):
           """get query"""
           return self.template.fulltextclient.getLemmaQuery(**args)
   
       def getLex(self, **args):
           """get lex"""
           return self.template.fulltextclient.getLex(**args)
   
       def getToc(self, **args):
           """get toc"""
           return self.template.fulltextclient.getToc(**args)
   
       def getTocPage(self, **args):
           """get tocpage"""
           return self.template.fulltextclient.getTocPage(**args)
   
       
       security.declareProtected('View','thumbs_rss')
       def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
         '''          '''
         view it          view it
         @param mode: defines which type of document is behind url          @param mode: defines how to access the document behind url 
         @param url: url which contains display information          @param url: url which contains display information
         '''          @param viewMode: if images display images, if text display text, default is images (text,images or auto)
                   
         zLOG.LOG("documentViewer (index)", zLOG.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          '''
         print "dlbaseurl:", self.digilibBaseUrl          logging.debug("HHHHHHHHHHHHHH:load the rss")
           logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
                   
         if not hasattr(self, 'template'):          if not hasattr(self, 'template'):
             # create template folder if it doesn't exist              # create template folder if it doesn't exist
             print "no template folder -- creating"  
             self.manage_addFolder('template')              self.manage_addFolder('template')
                           
         if not self.digilibBaseUrl:          if not self.digilibBaseUrl:
             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"              self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
                           
         print "dlbaseurl:", self.digilibBaseUrl          docinfo = self.getDocinfo(mode=mode,url=url)
           #pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
           pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
           ''' ZDES '''
           pt = getattr(self.template, 'thumbs_main_rss')
           
           if viewMode=="auto": # automodus gewaehlt
               if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
                   viewMode="text"
               else:
                   viewMode="images"
                  
           return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
     
       security.declareProtected('View','index_html')
       def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
           '''
           view it
           @param mode: defines how to access the document behind url 
           @param url: url which contains display information
           @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
           @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
           @param characterNormalization type of text display (reg, norm, none)
           @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
           '''
           
           logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
           
           if not hasattr(self, 'template'):
               # this won't work
               logging.error("template folder missing!")
               return "ERROR: template folder missing!"
               
           if not getattr(self, 'digilibBaseUrl', None):
               self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
   
         docinfo = self.getDocinfo(mode=mode, url=url)          docinfo = self.getDocinfo(mode=mode, url=url)
         pageinfo = self.getPageinfo(start=start,current=pn)          
           if tocMode != "thumbs":
               # get table of contents
               docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
   
           # auto viewMode: text_dict if text else images
           if viewMode=="auto": 
               if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 
                   #texturl gesetzt und textViewer konfiguriert
                   viewMode="text_dict"
               else:
                   viewMode="images"
                   
           pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, tocMode=tocMode)
           
           if viewMode != 'images' and docinfo.get('textURLPath', None):
               # get full text page
               page = self.getTextPage(mode=viewMode, pn=pn, docinfo=docinfo, pageinfo=pageinfo)
               pageinfo['textPage'] = page
               
           # get template /template/viewer_main
         pt = getattr(self.template, 'viewer_main')          pt = getattr(self.template, 'viewer_main')
         return pt(docinfo=docinfo,pageinfo=pageinfo)          # and execute with parameters
           return pt(docinfo=docinfo, pageinfo=pageinfo, viewMode=viewMode, mk=self.generateMarks(mk))
     
       def generateMarks(self,mk):
           ret=""
           if mk is None:
               return ""
           if not isinstance(mk, list):
               mk=[mk]
           for m in mk:
               ret+="mk=%s"%m
           return ret
       
       
     def imageLink(self,nr):      def getBrowser(self):
         """link hinter den images"""          """getBrowser the version of browser """
         paramsTmp=cgi.parse_qs(self.REQUEST['QUERY_STRING'])          bt = browserCheck(self)
         params={}          logging.debug("BROWSER VERSION: %s"%(bt))
         for x in paramsTmp.iteritems():          return bt
                 params[x[0]]=x[1][0]          
           def findDigilibUrl(self):
         params['pn']=nr          """try to get the digilib URL from zogilib"""
         newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params)          url = self.template.zogilib.getDLBaseUrl()
         return newUrl          return url
   
       def getDocumentViewerURL(self):
           """returns the URL of this instance"""
           return self.absolute_url()
                   
     def getStyle(self, idx, selected, style=""):      def getStyle(self, idx, selected, style=""):
         """returns a string with the given style + 'sel' if path == selected."""          """returns a string with the given style and append 'sel' if path == selected."""
         #zLOG.LOG("documentViewer (getstyle)", zLOG.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))          #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
         if idx == selected:          if idx == selected:
             return style + 'sel'              return style + 'sel'
         else:          else:
             return style                  return style    
   
       def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
           """returns URL to documentviewer with parameter param set to val or from dict params"""
           # copy existing request params
           urlParams=self.REQUEST.form.copy()
           # change single param
           if param is not None:
               if val is None:
                   if urlParams.has_key(param):
                       del urlParams[param]
               else:
                   urlParams[param] = str(val)
                           
     def thumbruler(self,cols,rows,start,maximum):          # change more params
         """ruler for thumbs"""          if params is not None:
         ret=""              for k in params.keys():
         paramsTmp=cgi.parse_qs(self.REQUEST['QUERY_STRING'])                  v = params[k]
         params={}                  if v is None:
         for x in paramsTmp.iteritems():                      # val=None removes param
                       if urlParams.has_key(k):
             if not x[0]=="start":                          del urlParams[k]
                 params[x[0]]=x[1][0]                          
                   else:
         newUrlSelect=self.REQUEST['URL']+"?"+urllib.urlencode(params)                          urlParams[k] = v
         if start>0:  
             newStart=max(start-cols*rows,0)          # FIXME: does this belong here?
             params['start']=newStart          if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
             newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params)                  urlParams["mode"] = "imagepath"
             ret+="""<a href="%s">prev</a>"""%newUrl                  urlParams["url"] = getParentDir(urlParams["url"])
                   
           # quote values and assemble into query string (not escaping '/')
         ret+="""<select onChange="location.href='%s&start='+this.options[this.selectedIndex].value">"""%newUrlSelect          ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
         nr,rest=divmod(maximum,cols*rows)          #ps = urllib.urlencode(urlParams)
         if rest > 0:          if baseUrl is None:
             nr+=1              baseUrl = self.REQUEST['URL1']
         for i in range(nr):              
             nr=i*cols*rows          url = "%s?%s"%(baseUrl, ps)
                      return url
             if (start >= nr) and (start < nr+cols*rows):    
                 ret+="""<option value="%s" selected>%s</option>"""%(nr,nr)  
             else:  
                 ret+="""<option value="%s">%s</option>"""%(nr,nr)  
         ret+="</select>"  
           
         if start<maximum:  
             newStart=min(start+cols*rows,maximum)  
             params['start']=newStart  
             newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params)  
             ret+="""<a href="%s">next</a>"""%newUrl  
                   
         return ret  
           
       def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
           """link to documentviewer with parameter param set to val"""
           return self.getLink(param, val, params, baseUrl, '&amp;')
                   
       def getInfo_xml(self,url,mode):
           """returns info about the document as XML"""
                   
     def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None):          if not self.digilibBaseUrl:
         """gets bibliographical info from the index.meta file at url or given by dom"""              self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
         zLOG.LOG("documentViewer (getbibinfofromindexmeta)", zLOG.INFO,"path: %s"%(path))          
           docinfo = self.getDocinfo(mode=mode,url=url)
           pt = getattr(self.template, 'info_xml')
           return pt(docinfo=docinfo)
                   
       def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
           """returns new option state"""
           if not self.REQUEST.SESSION.has_key(optionName):
               # not in session -- initial
               opt = {'lastState': newState, 'state': initialState}
           else:
               opt = self.REQUEST.SESSION.get(optionName)
               if opt['lastState'] != newState:
                   # state in session has changed -- toggle
                   opt['state'] = not opt['state']
                   opt['lastState'] = newState
           
           self.REQUEST.SESSION[optionName] = opt
           return opt['state']
       
       def isAccessible(self, docinfo):
           """returns if access to the resource is granted"""
           access = docinfo.get('accessType', None)
           logging.debug("documentViewer (accessOK) access type %s"%access)
           if access is not None and access == 'free':
               logging.debug("documentViewer (accessOK) access is free")
               return True
           elif access is None or access in self.authgroups:
               # only local access -- only logged in users
               user = getSecurityManager().getUser()
               logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
               if user is not None:
                   #print "user: ", user
                   return (user.getUserName() != "Anonymous User")
               else:
                   return False
           
           logging.error("documentViewer (accessOK) unknown access type %s"%access)
           return False
       
                   
       def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
           """gibt param von dlInfo aus"""
         if docinfo is None:          if docinfo is None:
             docinfo = {}              docinfo = {}
                           
         metaData=self.metadata.main.meta.bib          for x in range(cut):
         if dom is None:              path=getParentDir(path)
             server="http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn="         
             path="/".join(path.split("/")[0:-1])          infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
             metaUrl=server+path+"/index.meta"      
             try:          logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
                 dom = NonvalidatingReader.parseUri(metaUrl)          
             except:          txt = getHttpData(infoUrl)
                 return docinfo          if txt is None:
               raise IOError("Unable to get dir-info from %s"%(infoUrl))
   
           dom = ET.fromstring(txt)
           #dom = Parse(txt)
           size=getText(dom.find("size"))
           #sizes=dom.xpath("//dir/size")
           logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)
                   
         type=dom.xpath("//bib/@type")          if size:
         if type and (len(type)>0):              docinfo['numPages'] = int(size)
             type=type[0].value          else:
         else:              docinfo['numPages'] = 0
             type="generic"              
         type=type.replace("-"," ")# wrong typesiin index meta "-" instead of " "          # TODO: produce and keep list of image names and numbers
         hash=metaData.generateMappingForType(type)  
   
         docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%hash['author'][0])[0])  
         docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%hash['title'][0])[0])  
         docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%hash['year'][0])[0])  
                   
         return docinfo          return docinfo
   
       def getIndexMetaPath(self,url):
           """gib nur den Pfad zurueck"""
           regexp = re.compile(r".*(experimental|permanent)/(.*)")
           regpath = regexp.match(url)
           if (regpath==None):
               return ""
           logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))            
           return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
        
       
       
       def getIndexMetaUrl(self,url):
           """returns utr  of index.meta document at url"""
         
           metaUrl = None
           if url.startswith("http://"):
               # real URL
               metaUrl = url
           else:
               # online path
               server=self.digilibBaseUrl+"/servlet/Texter?fn="
               metaUrl=server+url.replace("/mpiwg/online","")
               if not metaUrl.endswith("index.meta"):
                   metaUrl += "/index.meta"
           
           return metaUrl
       
       def getDomFromIndexMeta(self, url):
           """get dom from index meta"""
           dom = None
           metaUrl = self.getIndexMetaUrl(url)
                   
           logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
           txt=getHttpData(metaUrl)
           if txt is None:
               raise IOError("Unable to read index meta from %s"%(url))
           
           dom = ET.fromstring(txt)
           #dom = Parse(txt)
           return dom
       
       def getPresentationInfoXML(self, url):
           """returns dom of info.xml document at url"""
           dom = None
           metaUrl = None
           if url.startswith("http://"):
               # real URL
               metaUrl = url
           else:
               # online path
               server=self.digilibBaseUrl+"/servlet/Texter?fn="
               metaUrl=server+url.replace("/mpiwg/online","")
           
           txt=getHttpData(metaUrl)
           if txt is None:
               raise IOError("Unable to read infoXMLfrom %s"%(url))
               
           dom = ET.fromstring(txt)
           #dom = Parse(txt)
           return dom
                           
           
       def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
           """gets authorization info from the index.meta file at path or given by dom"""
           logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
           
           access = None
                   
     def getDocinfoFromTextTool(self,url,docinfo=None):  
        """parse texttool tag in index meta"""  
        zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.INFO,"url: %s"%(url))  
        if docinfo is None:         if docinfo is None:
            docinfo = {}             docinfo = {}
                         
        try:          if dom is None:
            dom = NonvalidatingReader.parseUri(url)              for x in range(cut):
        except:                  path=getParentDir(path)
            zLOG.LOG("documentViewer (parseUrlTexttool)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2])              dom = self.getDomFromIndexMeta(path)
          
           acc = dom.find(".//access-conditions/access")
           if acc is not None:
               acctype = acc.get('type')
               #acctype = dom.xpath("//access-conditions/access/@type")
               if acctype:
                   access=acctype
                   if access in ['group', 'institution']:
                       access = dom.find(".//access-conditions/access/name").text.lower()
               
           docinfo['accessType'] = access
            return docinfo             return docinfo
                 
        archivePaths=dom.xpath("//resource/archive-path")  
                 
        if archivePaths and (len(archivePaths)>0):      def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
            archivePath=getTextFromNode(archivePaths[0])          """gets bibliographical info from the index.meta file at path or given by dom"""
        else:          logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
            archivePath=None  
                 
        images=dom.xpath("//texttool/image")          if docinfo is None:
               docinfo = {}
                 
        if images and (len(images)>0):          if dom is None:
            image=getTextFromNode(images[0])              for x in range(cut):
                   path=getParentDir(path)
               dom = self.getDomFromIndexMeta(path)
           
           docinfo['indexMetaPath']=self.getIndexMetaPath(path);
           
           logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
           if self.metadataService is not None:
               # put all raw bib fields in dict "bib"
               bib = self.metadataService.getBibData(dom=dom)
               docinfo['bib'] = bib
               bibtype = bib.get('@type', None)
               docinfo['bib_type'] = bibtype
               # also store DC metadata for convenience
               dc = self.metadataService.getDCMappedData(bib)
               docinfo['creator'] = dc.get('creator',None)
               docinfo['title'] = dc.get('title',None)
               docinfo['date'] = dc.get('date',None)
        else:         else:
            image=None              logging.error("MetadataService not found!")
           return docinfo
       
       
       # TODO: is this needed?
       def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
           """gets name info from the index.meta file at path or given by dom"""
           if docinfo is None:
               docinfo = {}
           
           if dom is None:
               for x in range(cut):
                   path=getParentDir(path)
               dom = self.getDomFromIndexMeta(path)
   
           docinfo['name']=getText(dom.find("name"))
           logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
           return docinfo
                         
        if image and archivePath:  
            image=os.path.join(archivePath,image)  
            image=image.replace("/mpiwg/online",'')  
            pt=getParamFromDigilib(image,'size')  
            docinfo['imagePath'] = image  
            docinfo['numberOfPages'] = pt  
                         
        viewerUrls=dom.xpath("//texttool/digiliburlprefix")      def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
           """parse texttool tag in index meta"""
           logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
           if docinfo is None:
              docinfo = {}
           if docinfo.get('lang', None) is None:
               docinfo['lang'] = '' # default keine Sprache gesetzt
           if dom is None:
               dom = self.getDomFromIndexMeta(url)
                 
        if viewerUrls and (len(viewerUrls)>0):          texttool = self.metadata.getTexttoolData(dom=dom)
            viewerUrl=getTextFromNode(viewerUrls[0])          
            docinfo['imageURL'] = viewerURL          archivePath = None
           archiveName = None
       
           archiveName = getText(dom.find("name"))
           if not archiveName:
               logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
           
           archivePath = getText(dom.find("archive-path"))
           if archivePath:
               # clean up archive path
               if archivePath[0] != '/':
                   archivePath = '/' + archivePath
               if archiveName and (not archivePath.endswith(archiveName)):
                   archivePath += "/" + archiveName
           else:
               # try to get archive-path from url
               logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
               if (not url.startswith('http')):
                   archivePath = url.replace('index.meta', '')
                   
           if archivePath is None:
               # we balk without archive-path
               raise IOError("Missing archive-path (for text-tool) in %s" % (url))
           
           imageDir = texttool.get('image', None)
               
           if not imageDir:
               # we balk with no image tag / not necessary anymore because textmode is now standard
               #raise IOError("No text-tool info in %s"%(url))
               imageDir = ""
               #xquery="//pb"  
               docinfo['imagePath'] = "" # keine Bilder
               docinfo['imageURL'] = ""
               
           if imageDir and archivePath:
               #print "image: ", imageDir, " archivepath: ", archivePath
               imageDir = os.path.join(archivePath, imageDir)
               imageDir = imageDir.replace("/mpiwg/online", '')
               docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
               docinfo['imagePath'] = imageDir
               
               docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
               
           viewerUrl = texttool.get('digiliburlprefix', None)
           if viewerUrl:
               docinfo['viewerURL'] = viewerUrl
           
           # old style text URL
           textUrl = texttool.get('text', None)
           if textUrl:
               if urlparse.urlparse(textUrl)[0] == "": #keine url
                   textUrl = os.path.join(archivePath, textUrl) 
               # fix URLs starting with /mpiwg/online
               if textUrl.startswith("/mpiwg/online"):
                   textUrl = textUrl.replace("/mpiwg/online", '', 1)
               
               docinfo['textURL'] = textUrl
       
           # new style text-url-path
           textUrl = texttool.get('text-url-path', None)
           if textUrl:
               docinfo['textURLPath'] = textUrl
               textUrlkurz = string.split(textUrl, ".")[0]
               docinfo['textURLPathkurz'] = textUrlkurz
               #if not docinfo['imagePath']:
                   # text-only, no page images
                   #docinfo = self.getNumTextPages(docinfo)
                     
           # get bib info
           docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
           # TODO: is this needed here?
           docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
           
           # TODO: what to do with presentation?
           presentationUrl = texttool.get('presentation', None)
           if presentationUrl: # ueberschreibe diese durch presentation informationen 
                # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
                # durch den relativen Pfad auf die presentation infos
               presentationPath = presentationUrl
               if url.endswith("index.meta"): 
                   presentationUrl = url.replace('index.meta', presentationPath)
               else:
                   presentationUrl = url + "/" + presentationPath
                                       
        textUrls=dom.xpath("//texttool/text")              docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
                 
        if textUrls and (len(textUrls)>0):          # get authorization
            textUrl=getTextFromNode(textUrls[0])          docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info
            docinfo['textURL'] = textURL  
                                             
        docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)  
        return docinfo         return docinfo
         
   
     def getDocinfoFromImagePath(self,path,docinfo=None):      def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
           """gets the bibliographical information from the preseantion entry in texttools
           """
           dom=self.getPresentationInfoXML(url)
           docinfo['author']=getText(dom.find(".//author"))
           docinfo['title']=getText(dom.find(".//title"))
           docinfo['year']=getText(dom.find(".//date"))
           return docinfo
       
       def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
         """path ist the path to the images it assumes that the index.meta file is one level higher."""          """path ist the path to the images it assumes that the index.meta file is one level higher."""
         zLOG.LOG("documentViewer (getdocinfofromimagepath)", zLOG.INFO,"path: %s"%(path))          logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
         if docinfo is None:          if docinfo is None:
             docinfo = {}              docinfo = {}
         docinfo['imagePath'] = path  
         path=path.replace("/mpiwg/online","")          path=path.replace("/mpiwg/online","")
         pt=getParamFromDigilib(path,'size')          docinfo['imagePath'] = path
         docinfo['numberOfPages'] = pt          docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
         imageUrl=genericDigilib+"/servlet/Scaler?fn=%s"%path          
           pathorig=path
           for x in range(cut):       
                   path=getParentDir(path)
           logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
           imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
         docinfo['imageURL'] = imageUrl          docinfo['imageURL'] = imageUrl
                   
         docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo)          #TODO: use getDocinfoFromIndexMeta
           #path ist the path to the images it assumes that the index.meta file is one level higher.
           docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
           docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
         return docinfo          return docinfo
           
           
     def getDocinfo(self, mode, url):      def getDocinfo(self, mode, url):
         """returns docinfo depending on mode"""          """returns docinfo depending on mode"""
         zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"mode: %s, url: %s"%(mode,url))          logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
         # look for cached docinfo in session          # look for cached docinfo in session
         if self.REQUEST.SESSION.has_key('docinfo'):          if self.REQUEST.SESSION.has_key('docinfo'):
             docinfo = self.REQUEST.SESSION['docinfo']              docinfo = self.REQUEST.SESSION['docinfo']
             # check if its still current              # check if its still current
             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:              if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
                 zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo in session: %s"%docinfo)                  logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())
                 return docinfo                  return docinfo
               
         # new docinfo          # new docinfo
         docinfo = {'mode': mode, 'url': url}          docinfo = {'mode': mode, 'url': url}
         if mode=="texttool": #index.meta with texttool information          # add self url
           docinfo['viewerUrl'] = self.getDocumentViewerURL()
           if mode=="texttool": 
               # index.meta with texttool information
             docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)              docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
         elif mode=="imagepath":          elif mode=="imagepath":
               # folder with images, index.meta optional
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)              docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
           elif mode=="filepath":
               # filename
               docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
         else:          else:
             zLOG.LOG("documentViewer (getdocinfo)", zLOG.ERROR,"unknown mode!")              logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
         zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo: %s"%docinfo)              raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                   
           logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
           #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
           # store in session
         self.REQUEST.SESSION['docinfo'] = docinfo          self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo          return docinfo
                   
               def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
     def getPageinfo(self, start, current):  
         """returns pageinfo with the given parameters"""          """returns pageinfo with the given parameters"""
         pageinfo = {}          pageinfo = {}
         pageinfo['start'] = start          current = getInt(current)
         pageinfo['current'] = current  
         return pageinfo  
                   
     def text(self,mode,url,pn):  
         """give text"""  
         if mode=="texttool": #index.meta with texttool information  
             (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)  
           
         print textpath  
         try:  
             dom = NonvalidatingReader.parseUri(textpath)  
         except:  
             return None  
       
         list=[]  
         nodes=dom.xpath("//pb")  
   
         node=nodes[int(pn)-1]  
           
         p=node  
           
         while p.tagName!="p":  
             p=p.parentNode  
           
           
         endNode=nodes[int(pn)]  
           
           
         e=endNode  
           
         while e.tagName!="p":  
             e=e.parentNode  
                   
           pageinfo['current'] = current
           rows = int(rows or self.thumbrows)
           pageinfo['rows'] = rows
           cols = int(cols or self.thumbcols)
           pageinfo['cols'] = cols
           grpsize = cols * rows
           pageinfo['groupsize'] = grpsize
           # what does this do?
           start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
           # int(current / grpsize) * grpsize +1))
           pageinfo['start'] = start
           pageinfo['end'] = start + grpsize
           if (docinfo is not None) and ('numPages' in docinfo):
               np = int(docinfo['numPages'])
               pageinfo['end'] = min(pageinfo['end'], np)
               pageinfo['numgroups'] = int(np / grpsize)
               if np % grpsize > 0:
                   pageinfo['numgroups'] += 1
                   
           pageinfo['viewMode'] = viewMode
           pageinfo['tocMode'] = tocMode
           pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
           #pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1')
           pageinfo['query'] = self.REQUEST.get('query','') 
           pageinfo['queryType'] = self.REQUEST.get('queryType','')
           pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
           pageinfo['textPN'] = self.REQUEST.get('textPN','1')
           pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
           pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
           pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
           pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
           # WTF?:
           toc = int(pageinfo['tocPN'])
           pageinfo['textPages'] =int(toc)
           
           # What does this do?
           if 'tocSize_%s'%tocMode in docinfo:
               tocSize = int(docinfo['tocSize_%s'%tocMode])
               tocPageSize = int(pageinfo['tocPageSize'])
               # cached toc           
               if tocSize%tocPageSize>0:
                   tocPages=tocSize/tocPageSize+1
               else:
                   tocPages=tocSize/tocPageSize
                   
         next=node.parentNode              pageinfo['tocPN'] = min(tocPages,toc)
           
         #sammle s  
         while next and (next!=endNode.parentNode):  
             list.append(next)      
             next=next.nextSibling      
         list.append(endNode.parentNode)  
           
         if p==e:# beide im selben paragraphen  
             pass  
 #    else:  
 #            next=p  
 #            while next!=e:  
 #                print next,e  
 #                list.append(next)  
 #                next=next.nextSibling  
 #              
 #        for x in list:  
 #            PrettyPrint(x)  
 #  
 #        return list  
 #  
   
     def image(self,mode,url,pn):          pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
         """give image out"""          pageinfo['sn'] =self.REQUEST.get('sn','')
         if mode=="texttool": #index.meta with texttool information          return pageinfo
             (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)  
             if not viewerUrl:  
                 viewerUrl=self.imageViewerUrl  
             url=viewerUrl+"pn=%s&fn=%s"%(pn,imagepath[0])  
             ret="""<iframe height="100%%" width="100%%" src="%s"/>"""%url  
             return url  
         elif mode=="imagepath":  
             url=url.replace("/mpiwg/online","")  
             url=self.imageViewerUrl+"pn=%s&fn=%s"%(pn,url)  
             ret="""<iframe height="100%%" width="100%%" src="%s"/>"""%url  
             return url  
                           
                   
     def findDigilibUrl(self):      security.declareProtected('View management screens','changeDocumentViewerForm')    
         """try to get the digilib URL from zogilib"""      changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
         url = self.imageViewerUrl[:-1] + "/getScalerUrl"  
         try:  
             scaler = urlopen(url).read()  
             return scaler.replace("/servlet/Scaler?", "")  
         except:  
             return None  
           
     def changeDocumentViewer(self,imageViewerUrl,title="",digilibBaseUrl=None,RESPONSE=None):      def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
         """init document viewer"""          """init document viewer"""
         self.title=title          self.title=title
         self.imageViewerUrl=imageViewerUrl  
         self.digilibBaseUrl = digilibBaseUrl          self.digilibBaseUrl = digilibBaseUrl
           self.thumbrows = thumbrows
           self.thumbcols = thumbcols
           self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
           try:
               # assume MetaDataFolder instance is called metadata 
               self.metadataService = getattr(self, 'metadata')
           except Exception, e:
               logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
                   
         if RESPONSE is not None:          if RESPONSE is not None:
             RESPONSE.redirect('manage_main')              RESPONSE.redirect('manage_main')
           
       
           
           
 #    security.declareProtected('View management screens','renameImageForm')  
   
 def manage_AddDocumentViewerForm(self):  def manage_AddDocumentViewerForm(self):
     """add the viewer form"""      """add the viewer form"""
     pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)      pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
     return pt()      return pt()
       
 def manage_AddDocumentViewer(self,id,imageViewerUrl="",title="",RESPONSE=None):  def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
     """add the viewer"""      """add the viewer"""
     newObj=documentViewer(id,imageViewerUrl,title)      newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
     self._setObject(id,newObj)      self._setObject(id,newObj)
           
     if RESPONSE is not None:      if RESPONSE is not None:
         RESPONSE.redirect('manage_main')          RESPONSE.redirect('manage_main')
   
   
 ##  
 ## DocumentViewerTemplate class  ## DocumentViewerTemplate class
 ##  
 class DocumentViewerTemplate(ZopePageTemplate):  class DocumentViewerTemplate(ZopePageTemplate):
     """Template for document viewer"""      """Template for document viewer"""
     meta_type="DocumentViewer Template"      meta_type="DocumentViewer Template"
Line 432  def manage_addDocumentViewerTemplate(sel Line 875  def manage_addDocumentViewerTemplate(sel
   
     self._setObject(id, DocumentViewerTemplate(id))      self._setObject(id, DocumentViewerTemplate(id))
     ob = getattr(self, id)      ob = getattr(self, id)
     ob.pt_edit(open(os.path.join(package_home(globals()),'zpt/viewer_main.zpt')).read(),None)      txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
       logging.info("txt %s:"%txt)
       ob.pt_edit(txt,"text/html")
     if title:      if title:
         ob.pt_setTitle(title)          ob.pt_setTitle(title)
     try:      try:

Removed from v.1.3  
changed lines
  Added in v.1.175.2.10


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>