Diff for /documentViewer/documentViewer.py between versions 1.1 and 1.175.2.11

version 1.1, 2005/12/18 12:35:02 version 1.175.2.11, 2011/07/29 16:27:24
Line 1 Line 1
   
 genericDigilib="http://nausikaa2.rz-berlin.mpg.de/digitallibrary/"  
   
 from OFS.Folder import Folder  from OFS.Folder import Folder
 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate  from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
 from Products.PageTemplates.PageTemplateFile import PageTemplateFile  from Products.PageTemplates.PageTemplateFile import PageTemplateFile
 from AccessControl import ClassSecurityInfo  from AccessControl import ClassSecurityInfo
   from AccessControl import getSecurityManager
 from Globals import package_home  from Globals import package_home
   
 from Ft.Xml.Domlette import NonvalidatingReader  #from Ft.Xml import EMPTY_NAMESPACE, Parse 
 from Ft.Xml.Domlette import PrettyPrint, Print  #import Ft.Xml.Domlette
 from Ft.Xml import EMPTY_NAMESPACE  
   
 import Ft.Xml.XPath  import xml.etree.ElementTree as ET
   
 import os.path  import os.path
 import cgi  import sys
 import urllib  import urllib
   import logging
   import math
   import urlparse 
   import re
   import string
   
   from SrvTxtUtils import getInt, getText, getHttpData
   
   def logger(txt,method,txt2):
       """logging"""
       logging.info(txt+ txt2)
       
       
   def serializeNode(node, encoding="utf-8"):
       """returns a string containing node as XML"""
       s = ET.tostring(node)
       
       # 4Suite:
       #    stream = cStringIO.StringIO()
       #    Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
       #    s = stream.getvalue()
       #    stream.close()
       return s
   
   def browserCheck(self):
       """check the browsers request to find out the browser type"""
       bt = {}
       ua = self.REQUEST.get_header("HTTP_USER_AGENT")
       bt['ua'] = ua
       bt['isIE'] = False
       bt['isN4'] = False
       bt['versFirefox']=""
       bt['versIE']=""
       bt['versSafariChrome']=""
       bt['versOpera']=""
   
 def getTextFromNode(nodename):      if string.find(ua, 'MSIE') > -1:
     nodelist=nodename.childNodes          bt['isIE'] = True
     rc = ""      else:
     for node in nodelist:          bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
         if node.nodeType == node.TEXT_NODE:      # Safari oder Chrome identification    
            rc = rc + node.data      try:
     return rc          nav = ua[string.find(ua, '('):]
           nav1=ua[string.find(ua,')'):]
 import socket          nav2=nav1[string.find(nav1,'('):]
           nav3=nav2[string.find(nav2,')'):]
 def urlopen(url):          ie = string.split(nav, "; ")[1]
         """urlopen mit timeout"""          ie1 =string.split(nav1, " ")[2]
         socket.setdefaulttimeout(2)          ie2 =string.split(nav3, " ")[1]
         ret=urllib.urlopen(url)          ie3 =string.split(nav3, " ")[2]
         socket.setdefaulttimeout(5)          if string.find(ie3, "Safari") >-1:
         return ret              bt['versSafariChrome']=string.split(ie2, "/")[1]
       except: pass
       # IE identification
       try:
           nav = ua[string.find(ua, '('):]
           ie = string.split(nav, "; ")[1]
           if string.find(ie, "MSIE") > -1:
               bt['versIE'] = string.split(ie, " ")[1]
       except:pass
       # Firefox identification
       try:
           nav = ua[string.find(ua, '('):]
           nav1=ua[string.find(ua,')'):]
           if string.find(ie1, "Firefox") >-1:
               nav5= string.split(ie1, "/")[1]
               logging.debug("FIREFOX: %s"%(nav5))
               bt['versFirefox']=nav5[0:3]                   
       except:pass
       #Opera identification
       try:
           if string.find(ua,"Opera") >-1:
               nav = ua[string.find(ua, '('):]
               nav1=nav[string.find(nav,')'):]
               bt['versOpera']=string.split(nav1,"/")[2]
       except:pass
       
       bt['isMac'] = string.find(ua, 'Macintosh') > -1
       bt['isWin'] = string.find(ua, 'Windows') > -1
       bt['isIEWin'] = bt['isIE'] and bt['isWin']
       bt['isIEMac'] = bt['isIE'] and bt['isMac']
       bt['staticHTML'] = False
   
       return bt
   
   def getParentPath(path, cnt=1):
       """returns pathname shortened by cnt"""
       # make sure path doesn't end with /
       path = path.rstrip('/')
       # split by /, shorten, and reassemble
       return '/'.join(path.split('/')[0:-cnt])
   
   
   ##
   ## documentViewer class
   ##
   class documentViewer(Folder):
       """document viewer"""
       meta_type="Document viewer"
           
 def getParamFromDigilib(path,param):      security=ClassSecurityInfo()
     """gibt param von dlInfo aus"""      manage_options=Folder.manage_options+(
     imageUrl=genericDigilib+"/dlInfo-xml.jsp?fn="+path          {'label':'main config','action':'changeDocumentViewerForm'},
           )
   
     try:      metadataService = None
         dom = NonvalidatingReader.parseUri(imageUrl)      """MetaDataFolder instance"""
     except:  
         return None  
           
       # templates and forms
       viewer_main = PageTemplateFile('zpt/viewer_main', globals())
       toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
       toc_text = PageTemplateFile('zpt/toc_text', globals())
       toc_figures = PageTemplateFile('zpt/toc_figures', globals())
       page_main_images = PageTemplateFile('zpt/page_main_images', globals())
       page_main_double = PageTemplateFile('zpt/page_main_double', globals())
       page_main_text = PageTemplateFile('zpt/page_main_text', globals())
       page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
       page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
       page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
       page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
       head_main = PageTemplateFile('zpt/head_main', globals())
       docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
       info_xml = PageTemplateFile('zpt/info_xml', globals())
           
     params=dom.xpath("//document-parameters/parameter[@name='%s']/@value"%param)  
           
     if params:      thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
         return params[0].value  
           
 def parseUrlTextTool(url):  
    """parse index meta"""  
           
       def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
           """init document viewer"""
           self.id=id
           self.title=title
           self.thumbcols = thumbcols
           self.thumbrows = thumbrows
           # authgroups is list of authorized groups (delimited by ,)
           self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
           # create template folder so we can always use template.something
           
           templateFolder = Folder('template')
           #self['template'] = templateFolder # Zope-2.12 style
           self._setObject('template',templateFolder) # old style
    try:     try:
        dom = NonvalidatingReader.parseUri(url)              import MpdlXmlTextServer
    except:              textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
        zLOG.LOG("documentViewer (parseUrlTexttool)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2])              #templateFolder['fulltextclient'] = xmlRpcClient
        return (None,None,None)              templateFolder._setObject('fulltextclient',textServer)
           except Exception, e:
               logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
         
    archivePaths=dom.xpath("//resource/archive-path")          try:
               from Products.zogiLib.zogiLib import zogiLib
               zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
               #templateFolder['zogilib'] = zogilib
               templateFolder._setObject('zogilib',zogilib)
           except Exception, e:
               logging.error("Unable to create zogiLib for zogilib: "+str(e))
         
    if archivePaths and (len(archivePaths)>0):          try:
        archivePath=getTextFromNode(archivePaths[0])              # assume MetaDataFolder instance is called metadata 
    else:              self.metadataService = getattr(self, 'metadata')
        archivePath=None          except Exception, e:
               logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
               
           
       # proxy text server methods to fulltextclient
       def getTextPage(self, **args):
           """get page"""
           return self.template.fulltextclient.getTextPage(**args)
   
       def getOrigPages(self, **args):
           """get page"""
           return self.template.fulltextclient.getOrigPages(**args)
       
       def getOrigPagesNorm(self, **args):
           """get page"""
           return self.template.fulltextclient.getOrigPagesNorm(**args)
   
       def getQuery(self, **args):
           """get query in search"""
           return self.template.fulltextclient.getQuery(**args)
        
       def getSearch(self, **args):
           """get search"""
           return self.template.fulltextclient.getSearch(**args)
         
       def getGisPlaces(self, **args):
           """get gis places"""
           return self.template.fulltextclient.getGisPlaces(**args)
           
    images=dom.xpath("//texttool/image")      def getAllGisPlaces(self, **args):
           """get all gis places """
           return self.template.fulltextclient.getAllGisPlaces(**args)
         
    if images and (len(images)>0):      def getTranslate(self, **args):
        image=getTextFromNode(images[0])          """get translate"""
           return self.template.fulltextclient.getTranslate(**args)
   
       def getLemma(self, **args):
           """get lemma"""
           return self.template.fulltextclient.getLemma(**args)
   
       def getLemmaQuery(self, **args):
           """get query"""
           return self.template.fulltextclient.getLemmaQuery(**args)
   
       def getLex(self, **args):
           """get lex"""
           return self.template.fulltextclient.getLex(**args)
   
       def getToc(self, **args):
           """get toc"""
           return self.template.fulltextclient.getToc(**args)
   
       def getTocPage(self, **args):
           """get tocpage"""
           return self.template.fulltextclient.getTocPage(**args)
   
       
       security.declareProtected('View','thumbs_rss')
       def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
           '''
           view it
           @param mode: defines how to access the document behind url 
           @param url: url which contains display information
           @param viewMode: if images display images, if text display text, default is images (text,images or auto)
           
           '''
           logging.debug("HHHHHHHHHHHHHH:load the rss")
           logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
           
           if not hasattr(self, 'template'):
               # create template folder if it doesn't exist
               self.manage_addFolder('template')
               
           if not self.digilibBaseUrl:
               self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
               
           docinfo = self.getDocinfo(mode=mode,url=url)
           #pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
           pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
           ''' ZDES '''
           pt = getattr(self.template, 'thumbs_main_rss')
           
           if viewMode=="auto": # automodus gewaehlt
               if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
                   viewMode="text"
    else:     else:
        image=None                  viewMode="images"
                 
    if image and archivePath:          return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
        image=os.path.join(archivePath,image)  
        image=image.replace("/mpiwg/online",'')  
        pt=getParamFromDigilib(image,'pt')  
   
       security.declareProtected('View','index_html')
       def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
           """
           view it
           @param mode: defines how to access the document behind url 
           @param url: url which contains display information
           @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
           @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
           """
           
           logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
           
           if not hasattr(self, 'template'):
               # this won't work
               logging.error("template folder missing!")
               return "ERROR: template folder missing!"
               
           if not getattr(self, 'digilibBaseUrl', None):
               self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
               
           docinfo = self.getDocinfo(mode=mode,url=url)
           
           if tocMode != "thumbs":
               # get table of contents
               docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
   
           # auto viewMode: text_dict if text else images
           if viewMode=="auto": 
               if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 
                   #texturl gesetzt und textViewer konfiguriert
                   viewMode="text_dict"
    else:     else:
        image=None                  viewMode="images"
                   
           pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, tocMode=tocMode)
           
           if viewMode != 'images' and docinfo.get('textURLPath', None):
               # get full text page
               page = self.getTextPage(mode=viewMode, pn=pn, docinfo=docinfo, pageinfo=pageinfo)
               pageinfo['textPage'] = page
                 
    viewerUrls=dom.xpath("//texttool/digiliburlprefix")          # get template /template/viewer_main
           pt = getattr(self.template, 'viewer_main')
           # and execute with parameters
           return pt(docinfo=docinfo, pageinfo=pageinfo, viewMode=viewMode, mk=self.generateMarks(mk))
         
    if viewerUrls and (len(viewerUrls)>0):      def generateMarks(self,mk):
        viewerUrl=getTextFromNode(viewerUrls[0])          ret=""
           if mk is None:
               return ""
           if not isinstance(mk, list):
               mk=[mk]
           for m in mk:
               ret+="mk=%s"%m
           return ret
       
       
       def getBrowser(self):
           """getBrowser the version of browser """
           bt = browserCheck(self)
           logging.debug("BROWSER VERSION: %s"%(bt))
           return bt
           
       def findDigilibUrl(self):
           """try to get the digilib URL from zogilib"""
           url = self.template.zogilib.getDLBaseUrl()
           return url
   
       def getDocumentViewerURL(self):
           """returns the URL of this instance"""
           return self.absolute_url()
       
       def getStyle(self, idx, selected, style=""):
           """returns a string with the given style and append 'sel' if path == selected."""
           #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
           if idx == selected:
               return style + 'sel'
    else:     else:
        viewerUrl=None              return style
         
       def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
           """returns URL to documentviewer with parameter param set to val or from dict params"""
           # copy existing request params
           urlParams=self.REQUEST.form.copy()
           # change single param
           if param is not None:
               if val is None:
                   if urlParams.has_key(param):
                       del urlParams[param]
               else:
                   urlParams[param] = str(val)
         
    textUrls=dom.xpath("//texttool/text")          # change more params
           if params is not None:
               for k in params.keys():
                   v = params[k]
                   if v is None:
                       # val=None removes param
                       if urlParams.has_key(k):
                           del urlParams[k]
         
    if textUrls and (len(textUrls)>0):  
        textUrl=getTextFromNode(textUrls[0])  
    else:     else:
        textUrl=None                      urlParams[k] = v
    return viewerUrl,(image,pt),textUrl  
   
           # FIXME: does this belong here?
           if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
                   urlParams["mode"] = "imagepath"
                   urlParams["url"] = getParentPath(urlParams["url"])
                   
           # quote values and assemble into query string (not escaping '/')
           ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
           #ps = urllib.urlencode(urlParams)
           if baseUrl is None:
               baseUrl = self.REQUEST['URL1']
   
 class documentViewer(ZopePageTemplate):          url = "%s?%s"%(baseUrl, ps)
     """document viewer"""          return url
   
     meta_type="Document viewer"  
           
     security=ClassSecurityInfo()      def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
     manage_options=ZopePageTemplate.manage_options+(          """link to documentviewer with parameter param set to val"""
         {'label':'main config','action':'changeDocumentViewerForm'},          return self.getLink(param, val, params, baseUrl, '&')
         )      
       def getInfo_xml(self,url,mode):
           """returns info about the document as XML"""
   
           if not self.digilibBaseUrl:
               self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
           
           docinfo = self.getDocinfo(mode=mode,url=url)
           pt = getattr(self.template, 'info_xml')
           return pt(docinfo=docinfo)
   
       def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
           """returns new option state"""
           if not self.REQUEST.SESSION.has_key(optionName):
               # not in session -- initial
               opt = {'lastState': newState, 'state': initialState}
           else:
               opt = self.REQUEST.SESSION.get(optionName)
               if opt['lastState'] != newState:
                   # state in session has changed -- toggle
                   opt['state'] = not opt['state']
                   opt['lastState'] = newState
           
           self.REQUEST.SESSION[optionName] = opt
           return opt['state']
       
       def isAccessible(self, docinfo):
           """returns if access to the resource is granted"""
           access = docinfo.get('accessType', None)
           logging.debug("documentViewer (accessOK) access type %s"%access)
           if access is not None and access == 'free':
               logging.debug("documentViewer (accessOK) access is free")
               return True
           elif access is None or access in self.authgroups:
               # only local access -- only logged in users
               user = getSecurityManager().getUser()
               logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
               if user is not None:
                   #print "user: ", user
                   return (user.getUserName() != "Anonymous User")
               else:
                   return False
   
     _default_content_fn = os.path.join(package_home(globals()),'zpt','documentViewer_template.zpt')          logging.error("documentViewer (accessOK) unknown access type %s"%access)
           return False
           
     def __init__(self,id,imageViewerUrl,title=""):  
         """init document viewer"""  
         self.id=id  
         self.title=title  
         self.imageViewerUrl=imageViewerUrl  
                   
     security.declareProtected('View management screens','changeDocumentViewerForm')          def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
     def changeDocumentViewerForm(self):          """gibt param von dlInfo aus"""
         """change it"""          if docinfo is None:
         pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeDocumentViewer.zpt')).__of__(self)              docinfo = {}
         return pt()  
       
           for x in range(cut):
               path=getParentPath(path)
       
     def changeDocumentViewer(self,imageViewerUrl,title="",RESPONSE=None):          infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
         """init document viewer"""  
         self.title=title  
         self.imageViewerUrl=imageViewerUrl  
                   
         if RESPONSE is not None:          logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
             RESPONSE.redirect('manage_main')  
           
           txt = getHttpData(infoUrl)
           if txt is None:
               raise IOError("Unable to get dir-info from %s"%(infoUrl))
           
     def imageLink(self,nr):          dom = ET.fromstring(txt)
         """link hinter den images"""          #dom = Parse(txt)
         paramsTmp=cgi.parse_qs(self.REQUEST['QUERY_STRING'])          size=getText(dom.find("size"))
         params={}          #sizes=dom.xpath("//dir/size")
         for x in paramsTmp.iteritems():          logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)
                 params[x[0]]=x[1][0]  
       
         params['pn']=nr  
         newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params)  
         return newUrl  
                   
           if size:
               docinfo['numPages'] = int(size)
           else:
               docinfo['numPages'] = 0
                   
     def thumbruler(self,cols,rows,start,maximum):          # TODO: produce and keep list of image names and numbers
         """ruler for thumbs"""  
         ret=""  
         paramsTmp=cgi.parse_qs(self.REQUEST['QUERY_STRING'])  
         params={}  
         for x in paramsTmp.iteritems():  
   
             if not x[0]=="start":  
                 params[x[0]]=x[1][0]  
   
         newUrlSelect=self.REQUEST['URL']+"?"+urllib.urlencode(params)      
         if start>0:  
             newStart=max(start-cols*rows,0)  
             params['start']=newStart  
             newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params)  
             ret+="""<a href="%s">prev</a>"""%newUrl  
   
   
         ret+="""<select onChange="location.href='%s&start='+this.options[this.selectedIndex].value" """%newUrlSelect  
         nr,rest=divmod(maximum,cols*rows)  
         if rest > 0:  
             nr+=1  
         for i in range(nr):  
             nr=i*cols*rows  
              
             if (start >= nr) and (start < nr+cols*rows):    
                 ret+="""<option value="%s" selected>%s</option>"""%(nr,nr)  
             else:  
                 ret+="""<option value="%s">%s</option>"""%(nr,nr)  
         ret+="</select>"  
           
         if start<maximum:  
             newStart=min(start+cols*rows,maximum)  
             params['start']=newStart  
             newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params)  
             ret+="""<a href="%s">next</a>"""%newUrl  
                   
         return ret          return docinfo
                   
     def textToolThumb(self,url,start=0):      def getIndexMetaPath(self,url):
         """understands the texttool format          """gib nur den Pfad zurueck"""
         @param url: url to index.meta with texttool tag          regexp = re.compile(r".*(experimental|permanent)/(.*)")
         """          regpath = regexp.match(url)
         (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)          if (regpath==None):
               return ""
           logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))            
           return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
                   
         imageUrl=genericDigilib+"/servlet/Scaler?fn=%s"%imagepath[0]  
                   
         pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','thumbs.zpt')).__of__(self)  
         return pt(imageUrl=imageUrl,pt=imagepath[1],start=start)  
           
     def text(self,mode,url,pn):      def getIndexMetaUrl(self,url):
         """give text"""          """returns utr  of index.meta document at url"""
         if mode=="texttool": #index.meta with texttool information  
             (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)  
                   
         print textpath          metaUrl = None
         try:          if url.startswith("http://"):
             dom = NonvalidatingReader.parseUri(textpath)              # real URL
         except:              metaUrl = url
             return None          else:
               # online path
               server=self.digilibBaseUrl+"/servlet/Texter?fn="
               metaUrl=server+url.replace("/mpiwg/online","")
               if not metaUrl.endswith("index.meta"):
                   metaUrl += "/index.meta"
           
           return metaUrl
       
       def getDomFromIndexMeta(self, url):
           """get dom from index meta"""
           dom = None
           metaUrl = self.getIndexMetaUrl(url)
                   
           logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
           txt=getHttpData(metaUrl)
           if txt is None:
               raise IOError("Unable to read index meta from %s"%(url))
           
           dom = ET.fromstring(txt)
           #dom = Parse(txt)
           return dom
       
       def getPresentationInfoXML(self, url):
           """returns dom of info.xml document at url"""
           dom = None
           metaUrl = None
           if url.startswith("http://"):
               # real URL
               metaUrl = url
           else:
               # online path
               server=self.digilibBaseUrl+"/servlet/Texter?fn="
               metaUrl=server+url.replace("/mpiwg/online","")
           
           txt=getHttpData(metaUrl)
           if txt is None:
               raise IOError("Unable to read infoXMLfrom %s"%(url))
               
           dom = ET.fromstring(txt)
           #dom = Parse(txt)
           return dom
                           
           
       def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
           """gets authorization info from the index.meta file at path or given by dom"""
           logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
           
           access = None
           
           if docinfo is None:
               docinfo = {}
               
           if dom is None:
               for x in range(cut):
                   path=getParentPath(path)
               dom = self.getDomFromIndexMeta(path)
          
           acc = dom.find(".//access-conditions/access")
           if acc is not None:
               acctype = acc.get('type')
               #acctype = dom.xpath("//access-conditions/access/@type")
               if acctype:
                   access=acctype
                   if access in ['group', 'institution']:
                       access = dom.find(".//access-conditions/access/name").text.lower()
               
           docinfo['accessType'] = access
           return docinfo
       
           
       def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
           """gets bibliographical info from the index.meta file at path or given by dom"""
           logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
           
           if docinfo is None:
               docinfo = {}
           
           if dom is None:
               for x in range(cut):
                   path=getParentDir(path)
               dom = self.getDomFromIndexMeta(path)
           
           docinfo['indexMetaPath']=self.getIndexMetaPath(path);
           
           logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
           if self.metadataService is not None:
               # put all raw bib fields in dict "bib"
               bib = self.metadataService.getBibData(dom=dom)
               docinfo['bib'] = bib
               bibtype = bib.get('@type', None)
               docinfo['bib_type'] = bibtype
               # also store DC metadata for convenience
               dc = self.metadataService.getDCMappedData(bib)
               docinfo['creator'] = dc.get('creator',None)
               docinfo['title'] = dc.get('title',None)
               docinfo['date'] = dc.get('date',None)
           else:
               logging.error("MetadataService not found!")
           return docinfo
           
         list=[]  
         nodes=dom.xpath("//pb")  
   
         node=nodes[int(pn)-1]      # TODO: is this needed?
       def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
           """gets name info from the index.meta file at path or given by dom"""
           if docinfo is None:
               docinfo = {}
           
           if dom is None:
               for x in range(cut):
                   path=getParentPath(path)
               dom = self.getDomFromIndexMeta(path)
   
           docinfo['name']=getText(dom.find("name"))
           logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
           return docinfo
   
       
       def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
           """parse texttool tag in index meta"""
           logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
           if docinfo is None:
              docinfo = {}
           if docinfo.get('lang', None) is None:
               docinfo['lang'] = '' # default keine Sprache gesetzt
           if dom is None:
               dom = self.getDomFromIndexMeta(url)
                   
         p=node          texttool = self.metadata.getTexttoolData(dom=dom)
                   
         while p.tagName!="p":          archivePath = None
             p=p.parentNode          archiveName = None
                   
           archiveName = getText(dom.find("name"))
           if not archiveName:
               logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
           
           archivePath = getText(dom.find("archive-path"))
           if archivePath:
               # clean up archive path
               if archivePath[0] != '/':
                   archivePath = '/' + archivePath
               if archiveName and (not archivePath.endswith(archiveName)):
                   archivePath += "/" + archiveName
           else:
               # try to get archive-path from url
               logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
               if (not url.startswith('http')):
                   archivePath = url.replace('index.meta', '')
                   
           if archivePath is None:
               # we balk without archive-path
               raise IOError("Missing archive-path (for text-tool) in %s" % (url))
           
           imageDir = texttool.get('image', None)
               
           if not imageDir:
               # we balk with no image tag / not necessary anymore because textmode is now standard
               #raise IOError("No text-tool info in %s"%(url))
               imageDir = ""
               #xquery="//pb"  
               docinfo['imagePath'] = "" # keine Bilder
               docinfo['imageURL'] = ""
               
           if imageDir and archivePath:
               #print "image: ", imageDir, " archivepath: ", archivePath
               imageDir = os.path.join(archivePath, imageDir)
               imageDir = imageDir.replace("/mpiwg/online", '')
               docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
               docinfo['imagePath'] = imageDir
               
               docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
               
           viewerUrl = texttool.get('digiliburlprefix', None)
           if viewerUrl:
               docinfo['viewerURL'] = viewerUrl
           
           # old style text URL
           textUrl = texttool.get('text', None)
           if textUrl:
               if urlparse.urlparse(textUrl)[0] == "": #keine url
                   textUrl = os.path.join(archivePath, textUrl) 
               # fix URLs starting with /mpiwg/online
               if textUrl.startswith("/mpiwg/online"):
                   textUrl = textUrl.replace("/mpiwg/online", '', 1)
               
               docinfo['textURL'] = textUrl
       
           # new style text-url-path
           textUrl = texttool.get('text-url-path', None)
           if textUrl:
               docinfo['textURLPath'] = textUrl
               textUrlkurz = string.split(textUrl, ".")[0]
               docinfo['textURLPathkurz'] = textUrlkurz
               #if not docinfo['imagePath']:
                   # text-only, no page images
                   #docinfo = self.getNumTextPages(docinfo)
                     
           # get bib info
           docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
           # TODO: is this needed here?
           docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
           
           # TODO: what to do with presentation?
           presentationUrl = texttool.get('presentation', None)
           if presentationUrl: # ueberschreibe diese durch presentation informationen 
                # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
                # durch den relativen Pfad auf die presentation infos
               presentationPath = presentationUrl
               if url.endswith("index.meta"): 
                   presentationUrl = url.replace('index.meta', presentationPath)
               else:
                   presentationUrl = url + "/" + presentationPath
                   
         endNode=nodes[int(pn)]              docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
                   
           # get authorization
           docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info
                   
         e=endNode          return docinfo
                   
         while e.tagName!="p":  
             e=e.parentNode  
                   
       def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
           """gets the bibliographical information from the preseantion entry in texttools
           """
           dom=self.getPresentationInfoXML(url)
           docinfo['author']=getText(dom.find(".//author"))
           docinfo['title']=getText(dom.find(".//title"))
           docinfo['year']=getText(dom.find(".//date"))
           return docinfo
       
       def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
           """path ist the path to the images it assumes that the index.meta file is one level higher."""
           logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
           if docinfo is None:
               docinfo = {}
           path=path.replace("/mpiwg/online","")
           docinfo['imagePath'] = path
           docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
           
           pathorig=path
           for x in range(cut):       
                   path=getParentPath(path)
           logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
           imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
           docinfo['imageURL'] = imageUrl
           
           #TODO: use getDocinfoFromIndexMeta
           #path ist the path to the images it assumes that the index.meta file is one level higher.
           docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
           docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
           return docinfo
       
       
       def OLDgetDocinfo(self, mode, url):
           """returns docinfo depending on mode"""
           logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
           # look for cached docinfo in session
           if self.REQUEST.SESSION.has_key('docinfo'):
               docinfo = self.REQUEST.SESSION['docinfo']
               # check if its still current
               if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
                   logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())
                   return docinfo
               
           # new docinfo
           docinfo = {'mode': mode, 'url': url}
           # add self url
           docinfo['viewerUrl'] = self.getDocumentViewerURL()
           if mode=="texttool": 
               # index.meta with texttool information
               docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
           elif mode=="imagepath":
               # folder with images, index.meta optional
               docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
           elif mode=="filepath":
               # filename
               docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
           else:
               logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
               raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                   
         next=node.parentNode          logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
           #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
           # store in session
           self.REQUEST.SESSION['docinfo'] = docinfo
           return docinfo
   
   
       def getDocinfo(self, mode, url):
           """returns docinfo depending on mode"""
           logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
           # look for cached docinfo in session
           if self.REQUEST.SESSION.has_key('docinfo'):
               docinfo = self.REQUEST.SESSION['docinfo']
               # check if its still current
               if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
                   logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
                   return docinfo
               
           # new docinfo
           docinfo = {'mode': mode, 'url': url}
           # add self url
           docinfo['viewerUrl'] = self.getDocumentViewerURL()
           # get index.meta DOM
           docUrl = None
           metaDom = None
           if mode=="texttool": 
               # url points to document dir or index.meta
               metaDom = self.metadataService.getDomFromPathOrUrl(url)
               docUrl = url.replace('/index.meta', '')
               if metaDom is None:
                   raise IOError("Unable to find index.meta for mode=texttool!")
   
           elif mode=="imagepath":
               # url points to folder with images, index.meta optional
               # asssume index.meta in parent dir
               docUrl = getParentPath(url)
               metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
   
           elif mode=="filepath":
               # url points to image file, index.meta optional
               # asssume index.meta is two path segments up
               docUrl = getParentPath(url, 2)
               metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
   
           else:
               logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
               raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                   
         #sammle s          docinfo['documentUrl'] = docUrl
         while next and (next!=endNode.parentNode):          # process index.meta contents
             list.append(next)              if metaDom is not None:
             next=next.nextSibling                  # document directory name and path
         list.append(endNode.parentNode)              resource = self.metadataService.getResourceData(dom=metaDom)
               if resource:
                   docinfo = self.getDocinfoFromResource(docinfo, resource)
   
               # texttool info
               texttool = self.metadataService.getTexttoolData(dom=metaDom)
               if texttool:
                   docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
               
               # bib info
               bib = self.metadataService.getBibData(dom=metaDom)
               if bib:
                   docinfo = self.getDocinfoFromBib(docinfo, bib)
                   
               # auth info
               access = self.metadataService.getAccessData(dom=metaDom)
               if access:
                   docinfo = self.getDocinfoFromAccess(docinfo, access)
   
           # image path
           if mode != 'texttool':
               # override image path from texttool
               docinfo['imagePath'] = url
   
           # number of images from digilib
           if docinfo.get('imagePath', None):
               docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
               docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
   
           logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
           #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
           # store in session
           self.REQUEST.SESSION['docinfo'] = docinfo
           return docinfo
   
       def getDocinfoFromResource(self, docinfo, resource):
           """reads contents of resource element into docinfo"""
           docName = resource.get('name', None)
           docinfo['documentName'] = docName
           docPath = resource.get('archive-path', None)
           if docPath:
               # clean up document path
               if docPath[0] != '/':
                   docPath = '/' + docPath
                   
         if p==e:# beide im selben paragraphen              if docName and (not docPath.endswith(docName)):
                   docPath += "/" + docName
                   
     else:      else:
             next=p              # use docUrl as docPath
             while next!=e:              docUrl = docinfo['documentURL']
                 print next,e              if not docUrl.startswith('http:'):
                 list.append(next)                  docPath = docUrl
                 next=next.nextSibling                  
           docinfo['documentPath'] = docPath
           return docinfo
   
       def getDocinfoFromTexttool(self, docinfo, texttool):
           """reads contents of texttool element into docinfo"""
           # image dir
           imageDir = texttool.get('image', None)
           docPath = docinfo.get('documentPath', None)
           if imageDir and docPath:
               #print "image: ", imageDir, " archivepath: ", archivePath
               imageDir = os.path.join(docPath, imageDir)
               imageDir = imageDir.replace('/mpiwg/online', '', 1)
               docinfo['imagePath'] = imageDir
           
           # old style text URL
           textUrl = texttool.get('text', None)
           if textUrl and docPath:
               if urlparse.urlparse(textUrl)[0] == "": #keine url
                   textUrl = os.path.join(docPath, textUrl) 
                   # fix URLs starting with /mpiwg/online
                   textUrl = textUrl.replace('/mpiwg/online', '', 1)
               
               docinfo['textURL'] = textUrl
       
           # new style text-url-path
           textUrl = texttool.get('text-url-path', None)
           if textUrl:
               docinfo['textURLPath'] = textUrl
               #TODO: ugly:
               #textUrlkurz = string.split(textUrl, ".")[0]
               #docinfo['textURLPathkurz'] = textUrlkurz
               
           # old presentation stuff
           presentation = texttool.get('presentation', None)
           if presentation and docPath:
               docinfo['presentationPath'] = os.path.join(docPath, presentation)
               
           return docinfo
   
       def getDocinfoFromBib(self, docinfo, bib):
           """reads contents of bib element into docinfo"""
           # put all raw bib fields in dict "bib"
           docinfo['bib'] = bib
           bibtype = bib.get('@type', None)
           docinfo['bibType'] = bibtype
           # also store DC metadata for convenience
           dc = self.metadataService.getDCMappedData(bib)
           docinfo['creator'] = dc.get('creator',None)
           docinfo['title'] = dc.get('title',None)
           docinfo['date'] = dc.get('date',None)
           return docinfo
               
       def getDocinfoFromAccess(self, docinfo, acc):
           """reads contents of access element into docinfo"""
           #TODO: also read resource type
           try:
               acctype = accc['@attr']['type']
               if acctype:
                   access=acctype
                   if access in ['group', 'institution']:
                       access = acc['name'].lower()
                           
         for x in list:                  docinfo['accessType'] = access
             PrettyPrint(x)  
   
         return list          except:
               pass
           
     def image(self,mode,url,pn):          return docinfo
         """give image out"""  
         if mode=="texttool": #index.meta with texttool information  
             (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)  
             url=viewerUrl+"pn=%s&fn=%s"%(pn,imagepath[0])  
             ret="""<iframe height="100%%" width="100%%" src="%s"/>"""%url  
             return url  
                   
     def thumbs(self,mode,url,start):      def getDocinfoFromDigilib(self, docinfo, path):
         """give thumbs out"""          infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
         if mode=="texttool": #index.meta with texttool information          # fetch data
             return self.textToolThumb(url,int(start))          txt = getHttpData(infoUrl)
           if not txt:
               logging.error("Unable to get dir-info from %s"%(infoUrl))
               return docinfo
   
           dom = ET.fromstring(txt)
           size = getText(dom.find("size"))
           logging.debug("getDocinfoFromDigilib: size=%s"%size)
           if size:
               docinfo['numPages'] = int(size)
           else:
               docinfo['numPages'] = 0
                   
     security.declareProtected('View','index_html')          # TODO: produce and keep list of image names and numbers
           return docinfo
           
           
     def index_html(self,mode,url,start=0,pn=0):      def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
         '''          """returns pageinfo with the given parameters"""
         view it          pageinfo = {}
         @param mode: defines which type of document is behind url          current = getInt(current)
         @param url: url which contains display information      
         '''          pageinfo['current'] = current
           rows = int(rows or self.thumbrows)
           pageinfo['rows'] = rows
           cols = int(cols or self.thumbcols)
           pageinfo['cols'] = cols
           grpsize = cols * rows
           pageinfo['groupsize'] = grpsize
           # what does this do?
           start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
           # int(current / grpsize) * grpsize +1))
           pageinfo['start'] = start
           pageinfo['end'] = start + grpsize
           if (docinfo is not None) and ('numPages' in docinfo):
               np = int(docinfo['numPages'])
               pageinfo['end'] = min(pageinfo['end'], np)
               pageinfo['numgroups'] = int(np / grpsize)
               if np % grpsize > 0:
                   pageinfo['numgroups'] += 1
                   
           pageinfo['viewMode'] = viewMode
           pageinfo['tocMode'] = tocMode
           pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
           #pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1')
           pageinfo['query'] = self.REQUEST.get('query','') 
           pageinfo['queryType'] = self.REQUEST.get('queryType','')
           pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
           pageinfo['textPN'] = self.REQUEST.get('textPN','1')
           pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
           pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
           pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
           pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
           # WTF?:
           toc = int(pageinfo['tocPN'])
           pageinfo['textPages'] =int(toc)
           
           # What does this do?
           if 'tocSize_%s'%tocMode in docinfo:
               tocSize = int(docinfo['tocSize_%s'%tocMode])
               tocPageSize = int(pageinfo['tocPageSize'])
               # cached toc           
               if tocSize%tocPageSize>0:
                   tocPages=tocSize/tocPageSize+1
               else:
                   tocPages=tocSize/tocPageSize
                   
               pageinfo['tocPN'] = min(tocPages,toc)
           
         pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','documentViewer_template.zpt')).__of__(self)          pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
         return pt(mode=mode,url=url,start=start,pn=pn)          pageinfo['sn'] =self.REQUEST.get('sn','')
           return pageinfo
                   
                   
       security.declareProtected('View management screens','changeDocumentViewerForm')    
       changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
                   
 #    security.declareProtected('View management screens','renameImageForm')      def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
           """init document viewer"""
           self.title=title
           self.digilibBaseUrl = digilibBaseUrl
           self.thumbrows = thumbrows
           self.thumbcols = thumbcols
           self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
           try:
               # assume MetaDataFolder instance is called metadata 
               self.metadataService = getattr(self, 'metadata')
           except Exception, e:
               logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
   
           if RESPONSE is not None:
               RESPONSE.redirect('manage_main')
   
 def manage_AddDocumentViewerForm(self):  def manage_AddDocumentViewerForm(self):
     """add the viewer form"""      """add the viewer form"""
     pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addDocumentViewer.zpt')).__of__(self)      pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
     return pt()      return pt()
       
 def manage_AddDocumentViewer(self,id,imageViewerUrl="",title="",RESPONSE=None):  def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
     """add the viewer"""      """add the viewer"""
     newObj=documentViewer(id,imageViewerUrl,title)      newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
     self._setObject(id,newObj)      self._setObject(id,newObj)
           
     if RESPONSE is not None:      if RESPONSE is not None:
         RESPONSE.redirect('manage_main')          RESPONSE.redirect('manage_main')
           
   
   ## DocumentViewerTemplate class
   class DocumentViewerTemplate(ZopePageTemplate):
       """Template for document viewer"""
       meta_type="DocumentViewer Template"
   
   
   def manage_addDocumentViewerTemplateForm(self):
       """Form for adding"""
       pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
       return pt()
   
   def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
                              REQUEST=None, submit=None):
       "Add a Page Template with optional file content."
   
       self._setObject(id, DocumentViewerTemplate(id))
       ob = getattr(self, id)
       txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
       logging.info("txt %s:"%txt)
       ob.pt_edit(txt,"text/html")
       if title:
           ob.pt_setTitle(title)
       try:
           u = self.DestinationURL()
       except AttributeError:
           u = REQUEST['URL1']
           
       u = "%s/%s" % (u, urllib.quote(id))
       REQUEST.RESPONSE.redirect(u+'/manage_main')
       return ''
   
   
       

Removed from v.1.1  
changed lines
  Added in v.1.175.2.11


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>