Diff for /documentViewer/documentViewer.py between versions 1.44 and 1.175.2.7

version 1.44, 2010/04/08 11:04:51 version 1.175.2.7, 2011/07/20 19:36:57
Line 1 Line 1
   
 from OFS.Folder import Folder  from OFS.Folder import Folder
 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate  from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
 from Products.PageTemplates.PageTemplateFile import PageTemplateFile   from Products.PageTemplates.PageTemplateFile import PageTemplateFile 
Line 6  from AccessControl import ClassSecurityI Line 5  from AccessControl import ClassSecurityI
 from AccessControl import getSecurityManager  from AccessControl import getSecurityManager
 from Globals import package_home  from Globals import package_home
   
 from Ft.Xml.Domlette import NonvalidatingReader  #from Ft.Xml import EMPTY_NAMESPACE, Parse 
 from Ft.Xml.Domlette import PrettyPrint, Print  #import Ft.Xml.Domlette
 from Ft.Xml import EMPTY_NAMESPACE, Parse  
   
   import xml.etree.ElementTree as ET
   
 import Ft.Xml.XPath  
 import cStringIO  
 import xmlrpclib  
 import os.path  import os.path
 import sys  import sys
 import cgi  
 import urllib  import urllib
 import logging  import logging
 import math  import math
   
 import urlparse   import urlparse 
 from types import *  import re
   import string
   
   from SrvTxtUtils import getInt, getText, getHttpData
   
 def logger(txt,method,txt2):  def logger(txt,method,txt2):
     """logging"""      """logging"""
     logging.info(txt+ txt2)      logging.info(txt+ txt2)
           
           
 def getInt(number, default=0):  def serializeNode(node, encoding="utf-8"):
     """returns always an int (0 in case of problems)"""  
     try:  
         return int(number)  
     except:  
         return int(default)  
   
 def getTextFromNode(nodename):  
     """get the cdata content of a node"""  
     if nodename is None:  
         return ""  
     nodelist=nodename.childNodes  
     rc = ""  
     for node in nodelist:  
         if node.nodeType == node.TEXT_NODE:  
            rc = rc + node.data  
     return rc  
   
 def serializeNode(node, encoding='utf-8'):  
     """returns a string containing node as XML"""      """returns a string containing node as XML"""
     buf = cStringIO.StringIO()      s = ET.tostring(node)
     Print(node, stream=buf, encoding=encoding)      
     s = buf.getvalue()      # 4Suite:
     buf.close()      #    stream = cStringIO.StringIO()
       #    Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
       #    s = stream.getvalue()
       #    stream.close()
     return s      return s
   
   def browserCheck(self):
       """check the browsers request to find out the browser type"""
       bt = {}
       ua = self.REQUEST.get_header("HTTP_USER_AGENT")
       bt['ua'] = ua
       bt['isIE'] = False
       bt['isN4'] = False
       bt['versFirefox']=""
       bt['versIE']=""
       bt['versSafariChrome']=""
       bt['versOpera']=""
       
       if string.find(ua, 'MSIE') > -1:
           bt['isIE'] = True
       else:
           bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
       # Safari oder Chrome identification    
       try:
           nav = ua[string.find(ua, '('):]
           nav1=ua[string.find(ua,')'):]
           nav2=nav1[string.find(nav1,'('):]
           nav3=nav2[string.find(nav2,')'):]
           ie = string.split(nav, "; ")[1]
           ie1 =string.split(nav1, " ")[2]
           ie2 =string.split(nav3, " ")[1]
           ie3 =string.split(nav3, " ")[2]
           if string.find(ie3, "Safari") >-1:
               bt['versSafariChrome']=string.split(ie2, "/")[1]
       except: pass
       # IE identification
       try:
           nav = ua[string.find(ua, '('):]
           ie = string.split(nav, "; ")[1]
           if string.find(ie, "MSIE") > -1:
               bt['versIE'] = string.split(ie, " ")[1]
       except:pass
       # Firefox identification
       try:
           nav = ua[string.find(ua, '('):]
           nav1=ua[string.find(ua,')'):]
           if string.find(ie1, "Firefox") >-1:
               nav5= string.split(ie1, "/")[1]
               logging.debug("FIREFOX: %s"%(nav5))
               bt['versFirefox']=nav5[0:3]                   
       except:pass
       #Opera identification
       try:
           if string.find(ua,"Opera") >-1:
               nav = ua[string.find(ua, '('):]
               nav1=nav[string.find(nav,')'):]
               bt['versOpera']=string.split(nav1,"/")[2]
       except:pass
       
       bt['isMac'] = string.find(ua, 'Macintosh') > -1
       bt['isWin'] = string.find(ua, 'Windows') > -1
       bt['isIEWin'] = bt['isIE'] and bt['isWin']
       bt['isIEMac'] = bt['isIE'] and bt['isMac']
       bt['staticHTML'] = False
   
       return bt
                   
 def getParentDir(path):  def getParentDir(path):
     """returns pathname shortened by one"""      """returns pathname shortened by one"""
     return '/'.join(path.split('/')[0:-1])      return '/'.join(path.split('/')[0:-1])
                   
   def normalizeBibtype(bt):
       """returns normalised bib type for looking up mappings"""
       bt = bt.strip().replace(' ', '-').lower()
       return bt
   
 import socket  def getBibdataFromDom(dom):
       """returns dict with all elements from bib-tag"""
 def urlopen(url,timeout=2):      bibinfo = {}
         """urlopen mit timeout"""      bib = dom.find(".//meta/bib")
         socket.setdefaulttimeout(timeout)      if bib is not None:
         ret=urllib.urlopen(url)          # put type in @type
         socket.setdefaulttimeout(5)          type = bib.get('type')
         return ret          bibinfo['@type'] = normalizeBibtype(type)
           # put all subelements in dict
           for e in bib:
               bibinfo[e.tag] = getText(e)
   
       return bibinfo
   
 ##  ##
 ## documentViewer class  ## documentViewer class
 ##  ##
 class documentViewer(Folder):  class documentViewer(Folder):
     """document viewer"""      """document viewer"""
     #textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?"  
       
     meta_type="Document viewer"      meta_type="Document viewer"
           
     security=ClassSecurityInfo()      security=ClassSecurityInfo()
Line 91  class documentViewer(Folder): Line 139  class documentViewer(Folder):
     toc_text = PageTemplateFile('zpt/toc_text', globals())      toc_text = PageTemplateFile('zpt/toc_text', globals())
     toc_figures = PageTemplateFile('zpt/toc_figures', globals())      toc_figures = PageTemplateFile('zpt/toc_figures', globals())
     page_main_images = PageTemplateFile('zpt/page_main_images', globals())      page_main_images = PageTemplateFile('zpt/page_main_images', globals())
       page_main_double = PageTemplateFile('zpt/page_main_double', globals())
     page_main_text = PageTemplateFile('zpt/page_main_text', globals())      page_main_text = PageTemplateFile('zpt/page_main_text', globals())
     page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())      page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
       page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
       page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
       page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
     head_main = PageTemplateFile('zpt/head_main', globals())      head_main = PageTemplateFile('zpt/head_main', globals())
     docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())      docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
     info_xml = PageTemplateFile('zpt/info_xml', globals())      info_xml = PageTemplateFile('zpt/info_xml', globals())
   
       
     thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())      thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
     security.declareProtected('View management screens','changeDocumentViewerForm')          security.declareProtected('View management screens','changeDocumentViewerForm')    
     changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())      changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
   
           
     def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"):      def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
         """init document viewer"""          """init document viewer"""
         self.id=id          self.id=id
         self.title=title          self.title=title
Line 116  class documentViewer(Folder): Line 169  class documentViewer(Folder):
         #self['template'] = templateFolder # Zope-2.12 style          #self['template'] = templateFolder # Zope-2.12 style
         self._setObject('template',templateFolder) # old style          self._setObject('template',templateFolder) # old style
         try:          try:
             from Products.XMLRpcTools.XMLRpcTools import XMLRpcServerProxy              import MpdlXmlTextServer
             xmlRpcClient = XMLRpcServerProxy(id='fulltextclient', serverUrl=textServerName, use_xmlrpc=False)              textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
             #templateFolder['fulltextclient'] = xmlRpcClient              #templateFolder['fulltextclient'] = xmlRpcClient
             templateFolder._setObject('fulltextclient',xmlRpcClient)              templateFolder._setObject('fulltextclient',textServer)
         except Exception, e:          except Exception, e:
             logging.error("Unable to create XMLRpcTools for fulltextclient: "+str(e))              logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
         try:          try:
             from Products.zogiLib.zogiLib import zogiLib              from Products.zogiLib.zogiLib import zogiLib
             zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")              zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
Line 131  class documentViewer(Folder): Line 184  class documentViewer(Folder):
             logging.error("Unable to create zogiLib for zogilib: "+str(e))              logging.error("Unable to create zogiLib for zogilib: "+str(e))
                   
   
       # proxy text server methods to fulltextclient
       def getTextPage(self, **args):
           """get page"""
           return self.template.fulltextclient.getTextPage(**args)
   
       def getOrigPages(self, **args):
           """get page"""
           return self.template.fulltextclient.getOrigPages(**args)
       
       def getOrigPagesNorm(self, **args):
           """get page"""
           return self.template.fulltextclient.getOrigPagesNorm(**args)
   
       def getQuery(self, **args):
           """get query in search"""
           return self.template.fulltextclient.getQuery(**args)
        
       def getSearch(self, **args):
           """get search"""
           return self.template.fulltextclient.getSearch(**args)
       
       def getGisPlaces(self, **args):
           """get gis places"""
           return self.template.fulltextclient.getGisPlaces(**args)
    
       def getAllGisPlaces(self, **args):
           """get all gis places """
           return self.template.fulltextclient.getAllGisPlaces(**args)
          
       def getTranslate(self, **args):
           """get translate"""
           return self.template.fulltextclient.getTranslate(**args)
   
       def getLemma(self, **args):
           """get lemma"""
           return self.template.fulltextclient.getLemma(**args)
   
       def getLemmaQuery(self, **args):
           """get query"""
           return self.template.fulltextclient.getLemmaQuery(**args)
   
       def getLex(self, **args):
           """get lex"""
           return self.template.fulltextclient.getLex(**args)
   
       def getToc(self, **args):
           """get toc"""
           return self.template.fulltextclient.getToc(**args)
   
       def getTocPage(self, **args):
           """get tocpage"""
           return self.template.fulltextclient.getTocPage(**args)
   
       
     security.declareProtected('View','thumbs_rss')      security.declareProtected('View','thumbs_rss')
     def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):      def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
         '''          '''
Line 141  class documentViewer(Folder): Line 248  class documentViewer(Folder):
                   
         '''          '''
         logging.debug("HHHHHHHHHHHHHH:load the rss")          logging.debug("HHHHHHHHHHHHHH:load the rss")
         logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
                   
         if not hasattr(self, 'template'):          if not hasattr(self, 'template'):
             # create template folder if it doesn't exist              # create template folder if it doesn't exist
Line 151  class documentViewer(Folder): Line 258  class documentViewer(Folder):
             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"              self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
                           
         docinfo = self.getDocinfo(mode=mode,url=url)          docinfo = self.getDocinfo(mode=mode,url=url)
           #pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
         pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)          pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
           ''' ZDES '''
         pt = getattr(self.template, 'thumbs_main_rss')          pt = getattr(self.template, 'thumbs_main_rss')
                   
         if viewMode=="auto": # automodus gewaehlt          if viewMode=="auto": # automodus gewaehlt
             if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert              if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
                 viewMode="text"                  viewMode="text"
             else:              else:
                 viewMode="images"                  viewMode="images"
Line 169  class documentViewer(Folder): Line 278  class documentViewer(Folder):
         @param mode: defines how to access the document behind url           @param mode: defines how to access the document behind url 
         @param url: url which contains display information          @param url: url which contains display information
         @param viewMode: if images display images, if text display text, default is auto (text,images or auto)          @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
         @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures)          @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
           @param characterNormalization type of text display (reg, norm, none)
           @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
         '''          '''
                   
         logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
Line 180  class documentViewer(Folder): Line 291  class documentViewer(Folder):
             return "ERROR: template folder missing!"              return "ERROR: template folder missing!"
                           
         if not getattr(self, 'digilibBaseUrl', None):          if not getattr(self, 'digilibBaseUrl', None):
             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"              self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
                           
         docinfo = self.getDocinfo(mode=mode,url=url)          docinfo = self.getDocinfo(mode=mode,url=url)
         pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)          
         if tocMode != "thumbs":          if tocMode != "thumbs":
             # get table of contents              # get table of contents
             docinfo = self.getToc(mode=tocMode, docinfo=docinfo)              docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
   
         if viewMode=="auto": # automodus gewaehlt          # auto viewMode: text_dict if text else images
             if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert          if viewMode=="auto": 
                 viewMode="text"              if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 
                   #texturl gesetzt und textViewer konfiguriert
                   viewMode="text_dict"
             else:              else:
                 viewMode="images"                  viewMode="images"
                                   
           pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, tocMode=tocMode)
           
           if viewMode != 'images' and docinfo.get('textURLPath', None):
               # get full text page
               page = self.getTextPage(mode=viewMode, pn=pn, docinfo=docinfo, pageinfo=pageinfo)
               pageinfo['textPage'] = page
               
           # get template /template/viewer_main
         pt = getattr(self.template, 'viewer_main')                         pt = getattr(self.template, 'viewer_main')               
           # and execute with parameters
         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))          return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
       
     def generateMarks(self,mk):      def generateMarks(self,mk):
         ret=""          ret=""
         if mk is None:          if mk is None:
             return ""              return ""
         if type(mk) is not ListType:          if not isinstance(mk, list):
             mk=[mk]              mk=[mk]
         for m in mk:          for m in mk:
             ret+="mk=%s"%m              ret+="mk=%s"%m
         return ret          return ret
   
   
       def getBrowser(self):
           """getBrowser the version of browser """
           bt = browserCheck(self)
           logging.debug("BROWSER VERSION: %s"%(bt))
           return bt
           
     def findDigilibUrl(self):      def findDigilibUrl(self):
         """try to get the digilib URL from zogilib"""          """try to get the digilib URL from zogilib"""
         url = self.template.zogilib.getDLBaseUrl()          url = self.template.zogilib.getDLBaseUrl()
         return url          return url
           
       def getDocumentViewerURL(self):
           """returns the URL of this instance"""
           return self.absolute_url()
       
     def getStyle(self, idx, selected, style=""):      def getStyle(self, idx, selected, style=""):
         """returns a string with the given style and append 'sel' if path == selected."""          """returns a string with the given style and append 'sel' if path == selected."""
         #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))          #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
Line 221  class documentViewer(Folder): Line 353  class documentViewer(Folder):
         else:          else:
             return style              return style
           
     def getLink(self,param=None,val=None):      def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
         """link to documentviewer with parameter param set to val"""          """returns URL to documentviewer with parameter param set to val or from dict params"""
         params=self.REQUEST.form.copy()          # copy existing request params
           urlParams=self.REQUEST.form.copy()
           # change single param
         if param is not None:          if param is not None:
             if val is None:              if val is None:
                 if params.has_key(param):                  if urlParams.has_key(param):
                     del params[param]                      del urlParams[param]
             else:              else:
                 params[param] = str(val)                  urlParams[param] = str(val)
                   
           # change more params
           if params is not None:
               for k in params.keys():
                   v = params[k]
                   if v is None:
                       # val=None removes param
                       if urlParams.has_key(k):
                           del urlParams[k]
                           
                   else:
                       urlParams[k] = v
   
           # FIXME: does this belong here?
           if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
                   urlParams["mode"] = "imagepath"
                   urlParams["url"] = getParentDir(urlParams["url"])
                   
           # quote values and assemble into query string (not escaping '/')
           ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
           #ps = urllib.urlencode(urlParams)
           if baseUrl is None:
               baseUrl = self.REQUEST['URL1']
                                   
         if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath          url = "%s?%s"%(baseUrl, ps)
                 params["mode"] = "imagepath"  
                 params["url"] = getParentDir(params["url"])  
                   
         # quote values and assemble into query string  
         ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])  
         url=self.REQUEST['URL1']+"?"+ps  
         return url          return url
   
     def getLinkAmp(self,param=None,val=None):  
         """link to documentviewer with parameter param set to val"""  
         params=self.REQUEST.form.copy()  
         if param is not None:  
             if val is None:  
                 if params.has_key(param):  
                     del params[param]  
             else:  
                 params[param] = str(val)  
                                   
         # quote values and assemble into query string      def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
         logging.info("XYXXXXX: %s"%repr(params.items()))          """link to documentviewer with parameter param set to val"""
         ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])          return self.getLink(param, val, params, baseUrl, '&')
         url=self.REQUEST['URL1']+"?"+ps  
         return url  
           
     def getInfo_xml(self,url,mode):      def getInfo_xml(self,url,mode):
         """returns info about the document as XML"""          """returns info about the document as XML"""
Line 266  class documentViewer(Folder): Line 406  class documentViewer(Folder):
         pt = getattr(self.template, 'info_xml')          pt = getattr(self.template, 'info_xml')
         return pt(docinfo=docinfo)          return pt(docinfo=docinfo)
   
       def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
           """returns new option state"""
           if not self.REQUEST.SESSION.has_key(optionName):
               # not in session -- initial
               opt = {'lastState': newState, 'state': initialState}
           else:
               opt = self.REQUEST.SESSION.get(optionName)
               if opt['lastState'] != newState:
                   # state in session has changed -- toggle
                   opt['state'] = not opt['state']
                   opt['lastState'] = newState
           
           self.REQUEST.SESSION[optionName] = opt
           return opt['state']
           
     def isAccessible(self, docinfo):      def isAccessible(self, docinfo):
         """returns if access to the resource is granted"""          """returns if access to the resource is granted"""
         access = docinfo.get('accessType', None)          access = docinfo.get('accessType', None)
         logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access)          logging.debug("documentViewer (accessOK) access type %s"%access)
         if access is not None and access == 'free':          if access is not None and access == 'free':
             logger("documentViewer (accessOK)", logging.INFO, "access is free")              logging.debug("documentViewer (accessOK) access is free")
             return True              return True
         elif access is None or access in self.authgroups:          elif access is None or access in self.authgroups:
             # only local access -- only logged in users              # only local access -- only logged in users
             user = getSecurityManager().getUser()              user = getSecurityManager().getUser()
               logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
             if user is not None:              if user is not None:
                 #print "user: ", user                  #print "user: ", user
                 return (user.getUserName() != "Anonymous User")                  return (user.getUserName() != "Anonymous User")
             else:              else:
                 return False                  return False
                   
         logger("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access)          logging.error("documentViewer (accessOK) unknown access type %s"%access)
         return False          return False
           
                                   
     def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):      def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
         """gibt param von dlInfo aus"""          """gibt param von dlInfo aus"""
         num_retries = 3  
         if docinfo is None:          if docinfo is None:
             docinfo = {}              docinfo = {}
                   
         for x in range(cut):          for x in range(cut):
                  
                 path=getParentDir(path)                  path=getParentDir(path)
                 
         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path          infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
           
         logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl))          logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
                   
         for cnt in range(num_retries):          txt = getHttpData(infoUrl)
             try:          if txt is None:
                 # dom = NonvalidatingReader.parseUri(imageUrl)  
                 txt=urllib.urlopen(infoUrl).read()  
                 dom = Parse(txt)  
                 break  
             except:  
                 logger("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt))  
         else:  
             raise IOError("Unable to get dir-info from %s"%(infoUrl))              raise IOError("Unable to get dir-info from %s"%(infoUrl))
                   
         sizes=dom.xpath("//dir/size")          dom = ET.fromstring(txt)
         logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes)          #dom = Parse(txt)
           size=getText(dom.find("size"))
           #sizes=dom.xpath("//dir/size")
           logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)
                   
         if sizes:          if size:
             docinfo['numPages'] = int(getTextFromNode(sizes[0]))              docinfo['numPages'] = int(size)
         else:          else:
             docinfo['numPages'] = 0              docinfo['numPages'] = 0
                           
Line 324  class documentViewer(Folder): Line 473  class documentViewer(Folder):
                                                   
         return docinfo          return docinfo
           
       def getIndexMetaPath(self,url):
           """gib nur den Pfad zurueck"""
           regexp = re.compile(r".*(experimental|permanent)/(.*)")
           regpath = regexp.match(url)
           if (regpath==None):
               return ""
           logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))            
           return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
        
       
       
       def getIndexMetaUrl(self,url):
           """returns utr  of index.meta document at url"""
                           
     def getIndexMeta(self, url):  
         """returns dom of index.meta document at url"""  
         num_retries = 3  
         dom = None  
         metaUrl = None          metaUrl = None
         if url.startswith("http://"):          if url.startswith("http://"):
             # real URL              # real URL
Line 339  class documentViewer(Folder): Line 497  class documentViewer(Folder):
             metaUrl=server+url.replace("/mpiwg/online","")              metaUrl=server+url.replace("/mpiwg/online","")
             if not metaUrl.endswith("index.meta"):              if not metaUrl.endswith("index.meta"):
                 metaUrl += "/index.meta"                  metaUrl += "/index.meta"
         logging.debug("METAURL: %s"%metaUrl)  
         for cnt in range(num_retries):  
             try:  
                 # patch dirk encoding fehler treten dann nicht mehr auf  
                 # dom = NonvalidatingReader.parseUri(metaUrl)  
                 txt=urllib.urlopen(metaUrl).read()  
                 dom = Parse(txt)  
                 break  
             except:  
                 logger("ERROR documentViewer (getIndexMeta)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])  
                                   
         if dom is None:          return metaUrl
       
       def getDomFromIndexMeta(self, url):
           """get dom from index meta"""
           dom = None
           metaUrl = self.getIndexMetaUrl(url)
                   
           logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
           txt=getHttpData(metaUrl)
           if txt is None:
             raise IOError("Unable to read index meta from %s"%(url))              raise IOError("Unable to read index meta from %s"%(url))
                                     
           dom = ET.fromstring(txt)
           #dom = Parse(txt)
         return dom          return dom
           
     def getPresentationInfoXML(self, url):      def getPresentationInfoXML(self, url):
         """returns dom of info.xml document at url"""          """returns dom of info.xml document at url"""
         num_retries = 3  
         dom = None          dom = None
         metaUrl = None          metaUrl = None
         if url.startswith("http://"):          if url.startswith("http://"):
Line 368  class documentViewer(Folder): Line 526  class documentViewer(Folder):
             server=self.digilibBaseUrl+"/servlet/Texter?fn="              server=self.digilibBaseUrl+"/servlet/Texter?fn="
             metaUrl=server+url.replace("/mpiwg/online","")              metaUrl=server+url.replace("/mpiwg/online","")
                   
         for cnt in range(num_retries):          txt=getHttpData(metaUrl)
             try:          if txt is None:
                 # patch dirk encoding fehler treten dann nicht mehr auf  
                 # dom = NonvalidatingReader.parseUri(metaUrl)  
                 txt=urllib.urlopen(metaUrl).read()  
                 dom = Parse(txt)  
                 break  
             except:  
                 logger("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])  
                   
         if dom is None:  
             raise IOError("Unable to read infoXMLfrom %s"%(url))              raise IOError("Unable to read infoXMLfrom %s"%(url))
                                     
           dom = ET.fromstring(txt)
           #dom = Parse(txt)
         return dom          return dom
                                                   
                   
     def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):      def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
         """gets authorization info from the index.meta file at path or given by dom"""          """gets authorization info from the index.meta file at path or given by dom"""
         logger("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path))          logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
                   
         access = None          access = None
                   
Line 396  class documentViewer(Folder): Line 547  class documentViewer(Folder):
         if dom is None:          if dom is None:
             for x in range(cut):              for x in range(cut):
                 path=getParentDir(path)                  path=getParentDir(path)
             dom = self.getIndexMeta(path)              dom = self.getDomFromIndexMeta(path)
                 
         acctype = dom.xpath("//access-conditions/access/@type")          acc = dom.find(".//access-conditions/access")
         if acctype and (len(acctype)>0):          if acc is not None:
             access=acctype[0].value              acctype = acc.get('type')
               #acctype = dom.xpath("//access-conditions/access/@type")
               if acctype:
                   access=acctype
             if access in ['group', 'institution']:              if access in ['group', 'institution']:
                 access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()                      access = dom.find(".//access-conditions/access/name").text.lower()
                           
         docinfo['accessType'] = access          docinfo['accessType'] = access
         return docinfo          return docinfo
Line 418  class documentViewer(Folder): Line 572  class documentViewer(Folder):
         if dom is None:          if dom is None:
             for x in range(cut):              for x in range(cut):
                 path=getParentDir(path)                  path=getParentDir(path)
             dom = self.getIndexMeta(path)              dom = self.getDomFromIndexMeta(path)
           
         logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))  
         # put in all raw bib fields as dict "bib"  
         bib = dom.xpath("//bib/*")  
         if bib and len(bib)>0:  
             bibinfo = {}  
             for e in bib:  
                 bibinfo[e.localName] = getTextFromNode(e)  
             docinfo['bib'] = bibinfo  
                   
         # extract some fields (author, title, year) according to their mapping          docinfo['indexMetaPath']=self.getIndexMetaPath(path);
         metaData=self.metadata.main.meta.bib  
         bibtype=dom.xpath("//bib/@type")  
         if bibtype and (len(bibtype)>0):  
             bibtype=bibtype[0].value  
         else:  
             bibtype="generic"  
                           
         bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)          logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
           # put all raw bib fields in dict "bib"
           bib = getBibdataFromDom(dom)
           docinfo['bib'] = bib
           bibtype = bib.get('@type', None)
         docinfo['bib_type'] = bibtype          docinfo['bib_type'] = bibtype
         bibmap=metaData.generateMappingForType(bibtype)          if bibtype:
         # if there is no mapping bibmap is empty (mapping sometimes has empty fields)              # also store standard mapped metadata for convenience
         if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:  
             try:  
                 docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])  
             except: pass  
             try:  
                 docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])  
             except: pass  
             try:  
                 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])  
             except: pass  
             logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)  
             try:              try:
                 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])                  stdbib = self.metadata.getStdMappedHash(bib)
                   docinfo['std_bib'] = stdbib
                   docinfo['author'] = stdbib['author']
                   docinfo['title'] = stdbib['title']
                   docinfo['year'] = stdbib['year']
             except:              except:
                 docinfo['lang']=''                  pass
   
         return docinfo          return docinfo
           
           
       # TODO: is this needed?
       def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
           """gets name info from the index.meta file at path or given by dom"""
           if docinfo is None:
               docinfo = {}
           
           if dom is None:
               for x in range(cut):
                   path=getParentDir(path)
               dom = self.getDomFromIndexMeta(path)
   
           docinfo['name']=getText(dom.find("name"))
           logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
           return docinfo
       
     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):      def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
         """parse texttool tag in index meta"""          """parse texttool tag in index meta"""
         logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url))          logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
         if docinfo is None:          if docinfo is None:
            docinfo = {}             docinfo = {}
         if docinfo.get('lang', None) is None:          if docinfo.get('lang', None) is None:
             docinfo['lang'] = '' # default keine Sprache gesetzt              docinfo['lang'] = '' # default keine Sprache gesetzt
         if dom is None:          if dom is None:
             dom = self.getIndexMeta(url)              dom = self.getDomFromIndexMeta(url)
                   
         archivePath = None          archivePath = None
         archiveName = None          archiveName = None
           
         archiveNames = dom.xpath("//resource/name")          archiveName = getText(dom.find("name"))
         if archiveNames and (len(archiveNames) > 0):          if not archiveName:
             archiveName = getTextFromNode(archiveNames[0])              logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
         else:          
             logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/name missing in: %s" % (url))          archivePath = getText(dom.find("archive-path"))
                   if archivePath:
         archivePaths = dom.xpath("//resource/archive-path")  
         if archivePaths and (len(archivePaths) > 0):  
             archivePath = getTextFromNode(archivePaths[0])  
             # clean up archive path              # clean up archive path
             if archivePath[0] != '/':              if archivePath[0] != '/':
                 archivePath = '/' + archivePath                  archivePath = '/' + archivePath
Line 489  class documentViewer(Folder): Line 637  class documentViewer(Folder):
                 archivePath += "/" + archiveName                  archivePath += "/" + archiveName
         else:          else:
             # try to get archive-path from url              # try to get archive-path from url
             logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/archive-path missing in: %s" % (url))              logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
             if (not url.startswith('http')):              if (not url.startswith('http')):
                 archivePath = url.replace('index.meta', '')                  archivePath = url.replace('index.meta', '')
                                   
Line 497  class documentViewer(Folder): Line 645  class documentViewer(Folder):
             # we balk without archive-path              # we balk without archive-path
             raise IOError("Missing archive-path (for text-tool) in %s" % (url))              raise IOError("Missing archive-path (for text-tool) in %s" % (url))
                   
         imageDirs = dom.xpath("//texttool/image")          imageDir = getText(dom.find(".//texttool/image"))
         if imageDirs and (len(imageDirs) > 0):  
             imageDir = getTextFromNode(imageDirs[0])  
                           
         else:          if not imageDir:
             # we balk with no image tag / not necessary anymore because textmode is now standard              # we balk with no image tag / not necessary anymore because textmode is now standard
             #raise IOError("No text-tool info in %s"%(url))              #raise IOError("No text-tool info in %s"%(url))
             imageDir = ""              imageDir = ""
Line 518  class documentViewer(Folder): Line 664  class documentViewer(Folder):
                           
             docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir              docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
                           
         viewerUrls = dom.xpath("//texttool/digiliburlprefix")          viewerUrl = getText(dom.find(".//texttool/digiliburlprefix"))
         if viewerUrls and (len(viewerUrls) > 0):          if viewerUrl:
             viewerUrl = getTextFromNode(viewerUrls[0])  
             docinfo['viewerURL'] = viewerUrl              docinfo['viewerURL'] = viewerUrl
                                         
         textUrls = dom.xpath("//texttool/text")          # old style text URL
         if textUrls and (len(textUrls) > 0):          textUrl = getText(dom.find(".//texttool/text"))
             textUrl = getTextFromNode(textUrls[0])          if textUrl:
             if urlparse.urlparse(textUrl)[0] == "": #keine url              if urlparse.urlparse(textUrl)[0] == "": #keine url
                 textUrl = os.path.join(archivePath, textUrl)                   textUrl = os.path.join(archivePath, textUrl) 
             # fix URLs starting with /mpiwg/online              # fix URLs starting with /mpiwg/online
Line 534  class documentViewer(Folder): Line 679  class documentViewer(Folder):
                           
             docinfo['textURL'] = textUrl              docinfo['textURL'] = textUrl
           
         textUrls = dom.xpath("//texttool/text-url-path")          # new style text-url-path
         if textUrls and (len(textUrls) > 0):          textUrl = getText(dom.find(".//texttool/text-url-path"))
             textUrl = getTextFromNode(textUrls[0])          if textUrl:
             docinfo['textURLPath'] = textUrl                 docinfo['textURLPath'] = textUrl   
               textUrlkurz = string.split(textUrl, ".")[0]
               docinfo['textURLPathkurz'] = textUrlkurz
               #if not docinfo['imagePath']:
                   # text-only, no page images
                   #docinfo = self.getNumTextPages(docinfo)
                     
         presentationUrls = dom.xpath("//texttool/presentation")           
           presentationUrl = getText(dom.find(".//texttool/presentation"))
         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag          docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
           # TODO: is this needed here?
           docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
           
                   
         if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen           if presentationUrl: # ueberschreibe diese durch presentation informationen 
              # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten               # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
              # durch den relativen Pfad auf die presentation infos               # durch den relativen Pfad auf die presentation infos
             presentationPath = getTextFromNode(presentationUrls[0])              presentationPath = presentationUrl
             if url.endswith("index.meta"):               if url.endswith("index.meta"): 
                 presentationUrl = url.replace('index.meta', presentationPath)                  presentationUrl = url.replace('index.meta', presentationPath)
             else:              else:
                 presentationUrl = url + "/" + presentationPath                  presentationUrl = url + "/" + presentationPath
             docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht                      
             docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)              docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
           
         docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info          docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info
Line 562  class documentViewer(Folder): Line 716  class documentViewer(Folder):
         """gets the bibliographical information from the preseantion entry in texttools          """gets the bibliographical information from the preseantion entry in texttools
         """          """
         dom=self.getPresentationInfoXML(url)          dom=self.getPresentationInfoXML(url)
         try:          docinfo['author']=getText(dom.find(".//author"))
             docinfo['author']=getTextFromNode(dom.xpath("//author")[0])          docinfo['title']=getText(dom.find(".//title"))
         except:          docinfo['year']=getText(dom.find(".//date"))
             pass  
         try:  
             docinfo['title']=getTextFromNode(dom.xpath("//title")[0])  
         except:  
             pass  
         try:  
             docinfo['year']=getTextFromNode(dom.xpath("//date")[0])  
         except:  
             pass  
         return docinfo          return docinfo
           
     def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):      def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
         """path ist the path to the images it assumes that the index.meta file is one level higher."""          """path ist the path to the images it assumes that the index.meta file is one level higher."""
         logger("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path))          logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
         if docinfo is None:          if docinfo is None:
             docinfo = {}              docinfo = {}
         path=path.replace("/mpiwg/online","")          path=path.replace("/mpiwg/online","")
Line 588  class documentViewer(Folder): Line 733  class documentViewer(Folder):
         pathorig=path          pathorig=path
         for x in range(cut):                 for x in range(cut):       
                 path=getParentDir(path)                  path=getParentDir(path)
         logging.error("PATH:"+path)          logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path          imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
         docinfo['imageURL'] = imageUrl          docinfo['imageURL'] = imageUrl
                   
           #TODO: use getDocinfoFromIndexMeta
         #path ist the path to the images it assumes that the index.meta file is one level higher.          #path ist the path to the images it assumes that the index.meta file is one level higher.
         docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)          docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
         docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)          docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
Line 600  class documentViewer(Folder): Line 746  class documentViewer(Folder):
           
     def getDocinfo(self, mode, url):      def getDocinfo(self, mode, url):
         """returns docinfo depending on mode"""          """returns docinfo depending on mode"""
         logger("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url))          logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
         # look for cached docinfo in session          # look for cached docinfo in session
         if self.REQUEST.SESSION.has_key('docinfo'):          if self.REQUEST.SESSION.has_key('docinfo'):
             docinfo = self.REQUEST.SESSION['docinfo']              docinfo = self.REQUEST.SESSION['docinfo']
             # check if its still current              # check if its still current
             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:              if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
                 logger("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo)                  logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())
                 return docinfo                  return docinfo
               
         # new docinfo          # new docinfo
         docinfo = {'mode': mode, 'url': url}          docinfo = {'mode': mode, 'url': url}
         if mode=="texttool": #index.meta with texttool information          # add self url
           docinfo['viewerUrl'] = self.getDocumentViewerURL()
           if mode=="texttool": 
               # index.meta with texttool information
             docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)              docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
         elif mode=="imagepath":          elif mode=="imagepath":
               # folder with images, index.meta optional
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)              docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
         elif mode=="filepath":          elif mode=="filepath":
               # filename
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)              docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
         else:          else:
             logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!")              logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))              raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                                                   
         logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo)          logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
           #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
           # store in session
         self.REQUEST.SESSION['docinfo'] = docinfo          self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo          return docinfo
                   
           
     def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):      def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
         """returns pageinfo with the given parameters"""          """returns pageinfo with the given parameters"""
         pageinfo = {}          pageinfo = {}
         current = getInt(current)          current = getInt(current)
       
         pageinfo['current'] = current          pageinfo['current'] = current
         rows = int(rows or self.thumbrows)          rows = int(rows or self.thumbrows)
         pageinfo['rows'] = rows          pageinfo['rows'] = rows
Line 636  class documentViewer(Folder): Line 790  class documentViewer(Folder):
         pageinfo['cols'] = cols          pageinfo['cols'] = cols
         grpsize = cols * rows          grpsize = cols * rows
         pageinfo['groupsize'] = grpsize          pageinfo['groupsize'] = grpsize
           # what does this do?
         start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))          start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
         # int(current / grpsize) * grpsize +1))          # int(current / grpsize) * grpsize +1))
         pageinfo['start'] = start          pageinfo['start'] = start
Line 649  class documentViewer(Folder): Line 804  class documentViewer(Folder):
                                   
         pageinfo['viewMode'] = viewMode          pageinfo['viewMode'] = viewMode
         pageinfo['tocMode'] = tocMode          pageinfo['tocMode'] = tocMode
         pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '10')          pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
           #pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1')
           pageinfo['query'] = self.REQUEST.get('query','') 
           pageinfo['queryType'] = self.REQUEST.get('queryType','')
           pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
           pageinfo['textPN'] = self.REQUEST.get('textPN','1')
           pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
           pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
           pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
         pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')          pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
           # WTF?:
         return pageinfo          toc = int(pageinfo['tocPN'])
                           pageinfo['textPages'] =int(toc)
           
           # What does this do?
     def getNumPages(self,docinfo=None):          if 'tocSize_%s'%tocMode in docinfo:
         """get list of pages from fulltext and put in docinfo"""              tocSize = int(docinfo['tocSize_%s'%tocMode])
         xquery = '//pb'              tocPageSize = int(pageinfo['tocPageSize'])
         text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))  
         # TODO: better processing of the page list. do we need the info somewhere else also?  
         docinfo['numPages'] = text.count("<pb ")  
         return docinfo  
          
     def getTextPage(self, mode="text", pn=1, docinfo=None):  
         """returns single page from fulltext"""  
         docpath = docinfo['textURLPath']  
         if mode == "text_dict":  
             textmode = "textPollux"  
         else:  
             textmode = mode  
               
         pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn), outputUnicode=False)  
         # post-processing downloaded xml  
         pagedom = Parse(pagexml)  
         # plain text mode  
         if mode == "text":  
             # first div contains text  
             pagedivs = pagedom.xpath("/div")  
             if len(pagedivs) > 0:  
                 pagenode = pagedivs[0]  
                 return serializeNode(pagenode)  
   
         # text-with-links mode  
         if mode == "text_dict":  
             # first div contains text  
             pagedivs = pagedom.xpath("/div")  
             if len(pagedivs) > 0:  
                 pagenode = pagedivs[0]  
                 # check all a-tags  
                 links = pagenode.xpath("//a")  
                 for l in links:  
                     hrefNode = l.getAttributeNodeNS(None, u"href")  
                     if hrefNode:  
                         # is link with href  
                         href = hrefNode.nodeValue  
                         if href.startswith('lt/lex.xql'):  
                             # is pollux link  
                             selfurl = self.absolute_url()  
                             # change href  
                             hrefNode.nodeValue = href.replace('lt/lex.xql','%s/head_main_voc'%selfurl)  
                             # add target  
                             l.setAttributeNS(None, 'target', '_blank')  
                 return serializeNode(pagenode)  
           
         return "no text here"  
   
     def getToc(self, mode="text", docinfo=None):  
         """loads table of contents and stores in docinfo"""  
         logging.debug("documentViewer (gettoc) mode: %s"%(mode))  
         if 'tocSize_%s'%mode in docinfo:  
             # cached toc              # cached toc
             return docinfo              if tocSize%tocPageSize>0:
                           tocPages=tocSize/tocPageSize+1
         docpath = docinfo['textURLPath']              else:
         # we need to set a result set size                  tocPages=tocSize/tocPageSize
         pagesize = 1000  
         pn = 1  
         if mode == "text":  
             queryType = "toc"  
         else:  
             queryType = mode  
         # number of entries in toc  
         tocSize = 0  
         tocDiv = None  
         pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)  
         # post-processing downloaded xml  
         pagedom = Parse(pagexml)  
         # get number of entries  
         numdivs = pagedom.xpath("//div[@class='queryResultHits']")  
         if len(numdivs) > 0:  
             tocSize = int(getTextFromNode(numdivs[0]))  
             # div contains text  
             #pagedivs = pagedom.xpath("//div[@class='queryResultPage']")  
             #if len(pagedivs) > 0:  
             #    tocDiv = pagedivs[0]  
   
         docinfo['tocSize_%s'%mode] = tocSize              pageinfo['tocPN'] = min(tocPages,toc)
         #docinfo['tocDiv_%s'%mode] = tocDiv  
         return docinfo  
           
     def getTocPage(self, mode="toc", pn=1, pageinfo=None, docinfo=None):          pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
         """returns single page from the table of contents"""          pageinfo['sn'] =self.REQUEST.get('sn','')
         # TODO: this should use the cached TOC          return pageinfo
         if mode == "text":  
             queryType = "toc"  
         else:  
             queryType = mode  
         docpath = docinfo['textURLPath']  
         pagesize = pageinfo['tocPageSize']  
         pn = pageinfo['tocPN']  
         pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)  
         # post-processing downloaded xml  
         pagedom = Parse(pagexml)  
         # div contains text  
         pagedivs = pagedom.xpath("//div[@class='queryResultPage']")  
         if len(pagedivs) > 0:  
             pagenode = pagedivs[0]  
             return serializeNode(pagenode)  
         else:  
             return "No TOC!"  
   
           
     def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None):      def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
         """init document viewer"""          """init document viewer"""
         self.title=title          self.title=title
         self.digilibBaseUrl = digilibBaseUrl          self.digilibBaseUrl = digilibBaseUrl
Line 773  class documentViewer(Folder): Line 845  class documentViewer(Folder):
         if RESPONSE is not None:          if RESPONSE is not None:
             RESPONSE.redirect('manage_main')              RESPONSE.redirect('manage_main')
           
       
           
 def manage_AddDocumentViewerForm(self):  def manage_AddDocumentViewerForm(self):
     """add the viewer form"""      """add the viewer form"""
     pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)      pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
Line 788  def manage_AddDocumentViewer(self,id,ima Line 858  def manage_AddDocumentViewer(self,id,ima
     if RESPONSE is not None:      if RESPONSE is not None:
         RESPONSE.redirect('manage_main')          RESPONSE.redirect('manage_main')
   
   
 ##  
 ## DocumentViewerTemplate class  ## DocumentViewerTemplate class
 ##  
 class DocumentViewerTemplate(ZopePageTemplate):  class DocumentViewerTemplate(ZopePageTemplate):
     """Template for document viewer"""      """Template for document viewer"""
     meta_type="DocumentViewer Template"      meta_type="DocumentViewer Template"

Removed from v.1.44  
changed lines
  Added in v.1.175.2.7


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>