Diff for /documentViewer/documentViewer.py between versions 1.175.2.11 and 1.175.2.29

version 1.175.2.11, 2011/07/29 16:27:24 version 1.175.2.29, 2011/08/22 15:00:28
Line 1 Line 1
 from OFS.Folder import Folder  from OFS.Folder import Folder
   from OFS.Image import File
 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate  from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
 from Products.PageTemplates.PageTemplateFile import PageTemplateFile   from Products.PageTemplates.PageTemplateFile import PageTemplateFile 
   from Products.ZSimpleFile import ZSimpleFile
 from AccessControl import ClassSecurityInfo  from AccessControl import ClassSecurityInfo
 from AccessControl import getSecurityManager  from AccessControl import getSecurityManager
 from Globals import package_home  from Globals import package_home
Line 122  class documentViewer(Folder): Line 124  class documentViewer(Folder):
     """MetaDataFolder instance"""      """MetaDataFolder instance"""
   
     # templates and forms      # templates and forms
       viewer_text = PageTemplateFile('zpt/viewer_text', globals())
       viewer_images = PageTemplateFile('zpt/viewer_images', globals())
     viewer_main = PageTemplateFile('zpt/viewer_main', globals())      viewer_main = PageTemplateFile('zpt/viewer_main', globals())
     toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())      toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
     toc_text = PageTemplateFile('zpt/toc_text', globals())      toc_text = PageTemplateFile('zpt/toc_text', globals())
Line 134  class documentViewer(Folder): Line 138  class documentViewer(Folder):
     page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())      page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
     page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())      page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
     head_main = PageTemplateFile('zpt/head_main', globals())      head_main = PageTemplateFile('zpt/head_main', globals())
     docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())  
     info_xml = PageTemplateFile('zpt/info_xml', globals())      info_xml = PageTemplateFile('zpt/info_xml', globals())
       # TODO: can this be nicer?
       docuviewer_css = ZSimpleFile(content_type='text/css',filename='css/docuviewer.css', id='docuviewer_css',_prefix=globals())
           
           
     thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())      thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
Line 176  class documentViewer(Folder): Line 181  class documentViewer(Folder):
         except Exception, e:          except Exception, e:
             logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))              logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
                           
           if digilibBaseUrl is not None:
               self.digilibBaseUrl = digilibBaseUrl
               
                   
     # proxy text server methods to fulltextclient      # proxy text server methods to fulltextclient
     def getTextPage(self, **args):      def getTextPage(self, **args):
Line 206  class documentViewer(Folder): Line 214  class documentViewer(Folder):
         """get all gis places """          """get all gis places """
         return self.template.fulltextclient.getAllGisPlaces(**args)          return self.template.fulltextclient.getAllGisPlaces(**args)
                 
     def getTranslate(self, **args):      def getWordInfo(self, **args):
         """get translate"""          """get translate"""
         return self.template.fulltextclient.getTranslate(**args)          return self.template.fulltextclient.getWordInfo(**args)
   
     def getLemma(self, **args):      def getLemma(self, **args):
         """get lemma"""          """get lemma"""
Line 264  class documentViewer(Folder): Line 272  class documentViewer(Folder):
                                 
         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)          return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
       
     
     security.declareProtected('View','index_html')      security.declareProtected('View','index_html')
     def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):      def index_html(self,url,mode="texttool",viewMode="auto",viewType=None,tocMode="thumbs",start=1,pn=1):
         """          """
         view it          view page
         @param mode: defines how to access the document behind url   
         @param url: url which contains display information          @param url: url which contains display information
         @param viewMode: if images display images, if text display text, default is auto (text,images or auto)          @param mode: defines how to access the document behind url 
           @param viewMode: 'images': display images, 'text': display text, default is 'auto'
           @param viewType: sub-type of viewMode, e.g. 'dict' for viewMode='text'
         @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)          @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
         """          """
                   
         logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewType=%s start=%s pn=%s"%(mode,url,viewMode,viewType,start,pn))
                   
         if not hasattr(self, 'template'):          if not hasattr(self, 'template'):
             # this won't work              # this won't work
Line 290  class documentViewer(Folder): Line 300  class documentViewer(Folder):
             # get table of contents              # get table of contents
             docinfo = self.getToc(mode=tocMode, docinfo=docinfo)              docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
   
         # auto viewMode: text_dict if text else images          # auto viewMode: text if there is a text else images
         if viewMode=="auto":           if viewMode=="auto": 
             if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):               if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 
                 #texturl gesetzt und textViewer konfiguriert                  viewMode = "text"
                 viewMode="text_dict"                  viewType = "dict"
             else:              else:
                 viewMode="images"                  viewMode="images"
                                   
         pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, tocMode=tocMode)          elif viewMode == "text_dict":
               # legacy fix
               viewMode = "text"
               viewType = "dict"
                   
         if viewMode != 'images' and docinfo.get('textURLPath', None):          # stringify viewType
             # get full text page          if isinstance(viewType, list):
             page = self.getTextPage(mode=viewMode, pn=pn, docinfo=docinfo, pageinfo=pageinfo)              logging.debug("index_html: viewType is list:%s"%viewType)
             pageinfo['textPage'] = page              viewType = ','.join([t for t in viewType if t])
                           
           pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewType=viewType, tocMode=tocMode)
                       
           # get template /template/viewer_$viewMode
           pt = getattr(self.template, 'viewer_%s'%viewMode, None)
           if pt is None:
               logging.error("No template for viewMode=%s!"%viewMode)
               # TODO: error page?
               return "No template for viewMode=%s!"%viewMode
                           
         # get template /template/viewer_main  
         pt = getattr(self.template, 'viewer_main')  
         # and execute with parameters          # and execute with parameters
         return pt(docinfo=docinfo, pageinfo=pageinfo, viewMode=viewMode, mk=self.generateMarks(mk))          return pt(docinfo=docinfo, pageinfo=pageinfo)
       
     def generateMarks(self,mk):      def generateMarks(self,mk):
         ret=""          ret=""
Line 332  class documentViewer(Folder): Line 352  class documentViewer(Folder):
         url = self.template.zogilib.getDLBaseUrl()          url = self.template.zogilib.getDLBaseUrl()
         return url          return url
   
       def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
           """returns URL to digilib Scaler with params"""
           url = None
           if docinfo is not None:
               url = docinfo.get('imageURL', None)
               
           if url is None:
               url = "%s/servlet/Scaler?"%self.digilibBaseUrl
               if fn is None and docinfo is not None:
                   fn = docinfo.get('imagePath','')
               
               url += "fn=%s"%fn
               
           if pn:
               url += "&pn=%s"%pn
               
           url += "&dw=%s&dh=%s"%(dw,dh)
           return url
   
     def getDocumentViewerURL(self):      def getDocumentViewerURL(self):
         """returns the URL of this instance"""          """returns the URL of this instance"""
         return self.absolute_url()          return self.absolute_url()
           
     def getStyle(self, idx, selected, style=""):      def getStyle(self, idx, selected, style=""):
         """returns a string with the given style and append 'sel' if path == selected."""          """returns a string with the given style and append 'sel' if idx == selected."""
         #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))          #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
         if idx == selected:          if idx == selected:
             return style + 'sel'              return style + 'sel'
         else:          else:
             return style              return style
           
     def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):      def getParams(self, param=None, val=None, params=None, duplicates=None):
         """returns URL to documentviewer with parameter param set to val or from dict params"""          """returns dict with URL parameters.
           
           Takes URL parameters and additionally param=val or dict params.
           Deletes key if value is None."""
         # copy existing request params          # copy existing request params
         urlParams=self.REQUEST.form.copy()          newParams=self.REQUEST.form.copy()
         # change single param          # change single param
         if param is not None:          if param is not None:
             if val is None:              if val is None:
                 if urlParams.has_key(param):                  if newParams.has_key(param):
                     del urlParams[param]                      del newParams[param]
             else:              else:
                 urlParams[param] = str(val)                  newParams[param] = str(val)
                                   
         # change more params          # change more params
         if params is not None:          if params is not None:
             for k in params.keys():              for (k, v) in params.items():
                 v = params[k]  
                 if v is None:                  if v is None:
                     # val=None removes param                      # val=None removes param
                     if urlParams.has_key(k):                      if newParams.has_key(k):
                         del urlParams[k]                          del newParams[k]
                                                   
                 else:                  else:
                     urlParams[k] = v                      newParams[k] = v
   
         # FIXME: does this belong here?          if duplicates:
         if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath              # eliminate lists (coming from duplicate keys)
                 urlParams["mode"] = "imagepath"              for (k,v) in newParams.items():
                 urlParams["url"] = getParentPath(urlParams["url"])                  if isinstance(v, list):
                       if duplicates == 'comma':
                           # make comma-separated list of non-empty entries
                           newParams[k] = ','.join([t for t in v if t])
                       elif duplicates == 'first':
                           # take first non-empty entry
                           newParams[k] = [t for t in v if t][0]
                                   
           return newParams
       
       def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
           """returns URL to documentviewer with parameter param set to val or from dict params"""
           urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
         # quote values and assemble into query string (not escaping '/')          # quote values and assemble into query string (not escaping '/')
         ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])          ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()])
         #ps = urllib.urlencode(urlParams)  
         if baseUrl is None:          if baseUrl is None:
             baseUrl = self.REQUEST['URL1']              baseUrl = self.getDocumentViewerURL()
                           
         url = "%s?%s"%(baseUrl, ps)          url = "%s?%s"%(baseUrl, ps)
         return url          return url
   
       def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
     def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):  
         """link to documentviewer with parameter param set to val"""          """link to documentviewer with parameter param set to val"""
         return self.getLink(param, val, params, baseUrl, '&')          return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
       
           
     def getInfo_xml(self,url,mode):      def getInfo_xml(self,url,mode):
         """returns info about the document as XML"""          """returns info about the document as XML"""
   
         if not self.digilibBaseUrl:          if not self.digilibBaseUrl:
             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"              self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
                   
Line 397  class documentViewer(Folder): Line 447  class documentViewer(Folder):
         pt = getattr(self.template, 'info_xml')          pt = getattr(self.template, 'info_xml')
         return pt(docinfo=docinfo)          return pt(docinfo=docinfo)
   
     def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):  
         """returns new option state"""  
         if not self.REQUEST.SESSION.has_key(optionName):  
             # not in session -- initial  
             opt = {'lastState': newState, 'state': initialState}  
         else:  
             opt = self.REQUEST.SESSION.get(optionName)  
             if opt['lastState'] != newState:  
                 # state in session has changed -- toggle  
                 opt['state'] = not opt['state']  
                 opt['lastState'] = newState  
           
         self.REQUEST.SESSION[optionName] = opt  
         return opt['state']  
       
     def isAccessible(self, docinfo):      def isAccessible(self, docinfo):
         """returns if access to the resource is granted"""          """returns if access to the resource is granted"""
         access = docinfo.get('accessType', None)          access = docinfo.get('accessType', None)
         logging.debug("documentViewer (accessOK) access type %s"%access)          logging.debug("documentViewer (accessOK) access type %s"%access)
         if access is not None and access == 'free':          if access == 'free':
             logging.debug("documentViewer (accessOK) access is free")              logging.debug("documentViewer (accessOK) access is free")
             return True              return True
           
         elif access is None or access in self.authgroups:          elif access is None or access in self.authgroups:
             # only local access -- only logged in users              # only local access -- only logged in users
             user = getSecurityManager().getUser()              user = getSecurityManager().getUser()
Line 433  class documentViewer(Folder): Line 469  class documentViewer(Folder):
         return False          return False
           
                                   
     def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):  
         """gibt param von dlInfo aus"""  
         if docinfo is None:  
             docinfo = {}  
           
         for x in range(cut):  
             path=getParentPath(path)  
          
         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path  
       
         logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))  
           
         txt = getHttpData(infoUrl)  
         if txt is None:  
             raise IOError("Unable to get dir-info from %s"%(infoUrl))  
   
         dom = ET.fromstring(txt)  
         #dom = Parse(txt)  
         size=getText(dom.find("size"))  
         #sizes=dom.xpath("//dir/size")  
         logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)  
           
         if size:  
             docinfo['numPages'] = int(size)  
         else:  
             docinfo['numPages'] = 0  
               
         # TODO: produce and keep list of image names and numbers  
                           
         return docinfo  
       
     def getIndexMetaPath(self,url):  
         """gib nur den Pfad zurueck"""  
         regexp = re.compile(r".*(experimental|permanent)/(.*)")  
         regpath = regexp.match(url)  
         if (regpath==None):  
             return ""  
         logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))              
         return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))  
        
       
       
     def getIndexMetaUrl(self,url):  
         """returns utr  of index.meta document at url"""  
         
         metaUrl = None  
         if url.startswith("http://"):  
             # real URL  
             metaUrl = url  
         else:  
             # online path  
             server=self.digilibBaseUrl+"/servlet/Texter?fn="  
             metaUrl=server+url.replace("/mpiwg/online","")  
             if not metaUrl.endswith("index.meta"):  
                 metaUrl += "/index.meta"  
           
         return metaUrl  
       
     def getDomFromIndexMeta(self, url):  
         """get dom from index meta"""  
         dom = None  
         metaUrl = self.getIndexMetaUrl(url)  
                   
         logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)  
         txt=getHttpData(metaUrl)  
         if txt is None:  
             raise IOError("Unable to read index meta from %s"%(url))  
           
         dom = ET.fromstring(txt)  
         #dom = Parse(txt)  
         return dom  
       
     def getPresentationInfoXML(self, url):  
         """returns dom of info.xml document at url"""  
         dom = None  
         metaUrl = None  
         if url.startswith("http://"):  
             # real URL  
             metaUrl = url  
         else:  
             # online path  
             server=self.digilibBaseUrl+"/servlet/Texter?fn="  
             metaUrl=server+url.replace("/mpiwg/online","")  
           
         txt=getHttpData(metaUrl)  
         if txt is None:  
             raise IOError("Unable to read infoXMLfrom %s"%(url))  
               
         dom = ET.fromstring(txt)  
         #dom = Parse(txt)  
         return dom  
                           
           
     def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):  
         """gets authorization info from the index.meta file at path or given by dom"""  
         logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))  
           
         access = None  
           
         if docinfo is None:  
             docinfo = {}  
               
         if dom is None:  
             for x in range(cut):  
                 path=getParentPath(path)  
             dom = self.getDomFromIndexMeta(path)  
          
         acc = dom.find(".//access-conditions/access")  
         if acc is not None:  
             acctype = acc.get('type')  
             #acctype = dom.xpath("//access-conditions/access/@type")  
             if acctype:  
                 access=acctype  
                 if access in ['group', 'institution']:  
                     access = dom.find(".//access-conditions/access/name").text.lower()  
               
         docinfo['accessType'] = access  
         return docinfo  
       
           
     def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):  
         """gets bibliographical info from the index.meta file at path or given by dom"""  
         logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))  
           
         if docinfo is None:  
             docinfo = {}  
           
         if dom is None:  
             for x in range(cut):  
                 path=getParentDir(path)  
             dom = self.getDomFromIndexMeta(path)  
           
         docinfo['indexMetaPath']=self.getIndexMetaPath(path);  
           
         logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))  
         if self.metadataService is not None:  
             # put all raw bib fields in dict "bib"  
             bib = self.metadataService.getBibData(dom=dom)  
             docinfo['bib'] = bib  
             bibtype = bib.get('@type', None)  
             docinfo['bib_type'] = bibtype  
             # also store DC metadata for convenience  
             dc = self.metadataService.getDCMappedData(bib)  
             docinfo['creator'] = dc.get('creator',None)  
             docinfo['title'] = dc.get('title',None)  
             docinfo['date'] = dc.get('date',None)  
         else:  
             logging.error("MetadataService not found!")  
         return docinfo  
       
       
     # TODO: is this needed?  
     def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):  
         """gets name info from the index.meta file at path or given by dom"""  
         if docinfo is None:  
             docinfo = {}  
           
         if dom is None:  
             for x in range(cut):  
                 path=getParentPath(path)  
             dom = self.getDomFromIndexMeta(path)  
   
         docinfo['name']=getText(dom.find("name"))  
         logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])  
         return docinfo  
   
       
     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):  
         """parse texttool tag in index meta"""  
         logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))  
         if docinfo is None:  
            docinfo = {}  
         if docinfo.get('lang', None) is None:  
             docinfo['lang'] = '' # default keine Sprache gesetzt  
         if dom is None:  
             dom = self.getDomFromIndexMeta(url)  
               
         texttool = self.metadata.getTexttoolData(dom=dom)  
           
         archivePath = None  
         archiveName = None  
       
         archiveName = getText(dom.find("name"))  
         if not archiveName:  
             logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))  
           
         archivePath = getText(dom.find("archive-path"))  
         if archivePath:  
             # clean up archive path  
             if archivePath[0] != '/':  
                 archivePath = '/' + archivePath  
             if archiveName and (not archivePath.endswith(archiveName)):  
                 archivePath += "/" + archiveName  
         else:  
             # try to get archive-path from url  
             logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))  
             if (not url.startswith('http')):  
                 archivePath = url.replace('index.meta', '')  
                   
         if archivePath is None:  
             # we balk without archive-path  
             raise IOError("Missing archive-path (for text-tool) in %s" % (url))  
           
         imageDir = texttool.get('image', None)  
               
         if not imageDir:  
             # we balk with no image tag / not necessary anymore because textmode is now standard  
             #raise IOError("No text-tool info in %s"%(url))  
             imageDir = ""  
             #xquery="//pb"    
             docinfo['imagePath'] = "" # keine Bilder  
             docinfo['imageURL'] = ""  
               
         if imageDir and archivePath:  
             #print "image: ", imageDir, " archivepath: ", archivePath  
             imageDir = os.path.join(archivePath, imageDir)  
             imageDir = imageDir.replace("/mpiwg/online", '')  
             docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)  
             docinfo['imagePath'] = imageDir  
               
             docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir  
               
         viewerUrl = texttool.get('digiliburlprefix', None)  
         if viewerUrl:  
             docinfo['viewerURL'] = viewerUrl  
           
         # old style text URL  
         textUrl = texttool.get('text', None)  
         if textUrl:  
             if urlparse.urlparse(textUrl)[0] == "": #keine url  
                 textUrl = os.path.join(archivePath, textUrl)   
             # fix URLs starting with /mpiwg/online  
             if textUrl.startswith("/mpiwg/online"):  
                 textUrl = textUrl.replace("/mpiwg/online", '', 1)  
               
             docinfo['textURL'] = textUrl  
       
         # new style text-url-path  
         textUrl = texttool.get('text-url-path', None)  
         if textUrl:  
             docinfo['textURLPath'] = textUrl  
             textUrlkurz = string.split(textUrl, ".")[0]  
             docinfo['textURLPathkurz'] = textUrlkurz  
             #if not docinfo['imagePath']:  
                 # text-only, no page images  
                 #docinfo = self.getNumTextPages(docinfo)  
                     
         # get bib info  
         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag  
         # TODO: is this needed here?  
         docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)  
           
         # TODO: what to do with presentation?  
         presentationUrl = texttool.get('presentation', None)  
         if presentationUrl: # ueberschreibe diese durch presentation informationen   
              # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten  
              # durch den relativen Pfad auf die presentation infos  
             presentationPath = presentationUrl  
             if url.endswith("index.meta"):   
                 presentationUrl = url.replace('index.meta', presentationPath)  
             else:  
                 presentationUrl = url + "/" + presentationPath  
                   
             docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)  
       
         # get authorization  
         docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info  
           
         return docinfo  
      
      
     def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):  
         """gets the bibliographical information from the preseantion entry in texttools  
         """  
         dom=self.getPresentationInfoXML(url)  
         docinfo['author']=getText(dom.find(".//author"))  
         docinfo['title']=getText(dom.find(".//title"))  
         docinfo['year']=getText(dom.find(".//date"))  
         return docinfo  
       
     def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):  
         """path ist the path to the images it assumes that the index.meta file is one level higher."""  
         logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))  
         if docinfo is None:  
             docinfo = {}  
         path=path.replace("/mpiwg/online","")  
         docinfo['imagePath'] = path  
         docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)  
           
         pathorig=path  
         for x in range(cut):         
                 path=getParentPath(path)  
         logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)  
         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path  
         docinfo['imageURL'] = imageUrl  
           
         #TODO: use getDocinfoFromIndexMeta  
         #path ist the path to the images it assumes that the index.meta file is one level higher.  
         docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)  
         docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)  
         return docinfo  
       
       
     def OLDgetDocinfo(self, mode, url):  
         """returns docinfo depending on mode"""  
         logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))  
         # look for cached docinfo in session  
         if self.REQUEST.SESSION.has_key('docinfo'):  
             docinfo = self.REQUEST.SESSION['docinfo']  
             # check if its still current  
             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:  
                 logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())  
                 return docinfo  
               
         # new docinfo  
         docinfo = {'mode': mode, 'url': url}  
         # add self url  
         docinfo['viewerUrl'] = self.getDocumentViewerURL()  
         if mode=="texttool":   
             # index.meta with texttool information  
             docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)  
         elif mode=="imagepath":  
             # folder with images, index.meta optional  
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)  
         elif mode=="filepath":  
             # filename  
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)  
         else:  
             logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)  
             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))  
                   
         logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())  
         #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)  
         # store in session  
         self.REQUEST.SESSION['docinfo'] = docinfo  
         return docinfo  
   
   
     def getDocinfo(self, mode, url):      def getDocinfo(self, mode, url):
         """returns docinfo depending on mode"""          """returns docinfo depending on mode"""
Line 786  class documentViewer(Folder): Line 485  class documentViewer(Folder):
         docinfo = {'mode': mode, 'url': url}          docinfo = {'mode': mode, 'url': url}
         # add self url          # add self url
         docinfo['viewerUrl'] = self.getDocumentViewerURL()          docinfo['viewerUrl'] = self.getDocumentViewerURL()
           docinfo['digilibBaseUrl'] = self.digilibBaseUrl
         # get index.meta DOM          # get index.meta DOM
         docUrl = None          docUrl = None
         metaDom = None          metaDom = None
Line 814  class documentViewer(Folder): Line 514  class documentViewer(Folder):
                   
         docinfo['documentUrl'] = docUrl          docinfo['documentUrl'] = docUrl
         # process index.meta contents          # process index.meta contents
         if metaDom is not None:          if metaDom is not None and metaDom.tag == 'resource':
             # document directory name and path              # document directory name and path
             resource = self.metadataService.getResourceData(dom=metaDom)              resource = self.metadataService.getResourceData(dom=metaDom)
             if resource:              if resource:
Line 829  class documentViewer(Folder): Line 529  class documentViewer(Folder):
             bib = self.metadataService.getBibData(dom=metaDom)              bib = self.metadataService.getBibData(dom=metaDom)
             if bib:              if bib:
                 docinfo = self.getDocinfoFromBib(docinfo, bib)                  docinfo = self.getDocinfoFromBib(docinfo, bib)
               else:
                   # no bib - try info.xml
                   docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
                                   
             # auth info              # auth info
             access = self.metadataService.getAccessData(dom=metaDom)              access = self.metadataService.getAccessData(dom=metaDom)
             if access:              if access:
                 docinfo = self.getDocinfoFromAccess(docinfo, access)                  docinfo = self.getDocinfoFromAccess(docinfo, access)
   
               # attribution info
               attribution = self.metadataService.getAttributionData(dom=metaDom)
               if attribution:
                   logging.debug("getDocinfo: attribution=%s"%repr(attribution))
                   docinfo['attribution'] = attribution
                   #docinfo = self.getDocinfoFromAccess(docinfo, access)
   
               # copyright info
               copyright = self.metadataService.getCopyrightData(dom=metaDom)
               if copyright:
                   logging.debug("getDocinfo: copyright=%s"%repr(copyright))
                   docinfo['copyright'] = copyright
                   #docinfo = self.getDocinfoFromAccess(docinfo, access)
   
         # image path          # image path
         if mode != 'texttool':          if mode != 'texttool':
             # override image path from texttool              # override image path from texttool with url
             docinfo['imagePath'] = url              docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
   
         # number of images from digilib          # number of images from digilib
         if docinfo.get('imagePath', None):          if docinfo.get('imagePath', None):
Line 869  class documentViewer(Folder): Line 586  class documentViewer(Folder):
             docUrl = docinfo['documentURL']              docUrl = docinfo['documentURL']
             if not docUrl.startswith('http:'):              if not docUrl.startswith('http:'):
                 docPath = docUrl                  docPath = docUrl
           if docPath:
               # fix URLs starting with /mpiwg/online
               docPath = docPath.replace('/mpiwg/online', '', 1)
                                   
         docinfo['documentPath'] = docPath          docinfo['documentPath'] = docPath
         return docinfo          return docinfo
Line 889  class documentViewer(Folder): Line 609  class documentViewer(Folder):
         if textUrl and docPath:          if textUrl and docPath:
             if urlparse.urlparse(textUrl)[0] == "": #keine url              if urlparse.urlparse(textUrl)[0] == "": #keine url
                 textUrl = os.path.join(docPath, textUrl)                   textUrl = os.path.join(docPath, textUrl) 
                 # fix URLs starting with /mpiwg/online  
                 textUrl = textUrl.replace('/mpiwg/online', '', 1)  
                           
             docinfo['textURL'] = textUrl              docinfo['textURL'] = textUrl
           
Line 898  class documentViewer(Folder): Line 616  class documentViewer(Folder):
         textUrl = texttool.get('text-url-path', None)          textUrl = texttool.get('text-url-path', None)
         if textUrl:          if textUrl:
             docinfo['textURLPath'] = textUrl              docinfo['textURLPath'] = textUrl
             #TODO: ugly:              
             #textUrlkurz = string.split(textUrl, ".")[0]          # page flow
             #docinfo['textURLPathkurz'] = textUrlkurz          docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
               
           # odd pages are left
           docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
               
           # number of title page (0: not defined)
           docinfo['titlePage'] = texttool.get('title-scan-no', 0)
                           
         # old presentation stuff          # old presentation stuff
         presentation = texttool.get('presentation', None)          presentation = texttool.get('presentation', None)
         if presentation and docPath:          if presentation and docPath:
             docinfo['presentationPath'] = os.path.join(docPath, presentation)              if presentation.startswith('http:'):
                   docinfo['presentationUrl'] = presentation
               else:
                   docinfo['presentationUrl'] = os.path.join(docPath, presentation)
               
                           
         return docinfo          return docinfo
   
     def getDocinfoFromBib(self, docinfo, bib):      def getDocinfoFromBib(self, docinfo, bib):
         """reads contents of bib element into docinfo"""          """reads contents of bib element into docinfo"""
           logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
         # put all raw bib fields in dict "bib"          # put all raw bib fields in dict "bib"
         docinfo['bib'] = bib          docinfo['bib'] = bib
         bibtype = bib.get('@type', None)          bibtype = bib.get('@type', None)
Line 925  class documentViewer(Folder): Line 654  class documentViewer(Folder):
     def getDocinfoFromAccess(self, docinfo, acc):      def getDocinfoFromAccess(self, docinfo, acc):
         """reads contents of access element into docinfo"""          """reads contents of access element into docinfo"""
         #TODO: also read resource type          #TODO: also read resource type
           logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
         try:          try:
             acctype = accc['@attr']['type']              acctype = acc['@attr']['type']
             if acctype:              if acctype:
                 access=acctype                  access=acctype
                 if access in ['group', 'institution']:                  if access in ['group', 'institution']:
Line 959  class documentViewer(Folder): Line 689  class documentViewer(Folder):
         return docinfo          return docinfo
                           
                           
     def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):      def getDocinfoFromPresentationInfoXml(self,docinfo):
           """gets DC-like bibliographical information from the presentation entry in texttools"""
           url = docinfo.get('presentationUrl', None)
           if not url:
               logging.error("getDocinfoFromPresentation: no URL!")
               return docinfo
           
           dom = None
           metaUrl = None
           if url.startswith("http://"):
               # real URL
               metaUrl = url
           else:
               # online path
               
               server=self.digilibBaseUrl+"/servlet/Texter?fn="
               metaUrl=server+url
           
           txt=getHttpData(metaUrl)
           if txt is None:
               logging.error("Unable to read info.xml from %s"%(url))
               return docinfo
               
           dom = ET.fromstring(txt)
           docinfo['creator']=getText(dom.find(".//author"))
           docinfo['title']=getText(dom.find(".//title"))
           docinfo['date']=getText(dom.find(".//date"))
           return docinfo
       
   
       def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewType=None, tocMode=None):
         """returns pageinfo with the given parameters"""          """returns pageinfo with the given parameters"""
           logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewType=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewType,tocMode))
         pageinfo = {}          pageinfo = {}
         current = getInt(current)          pageinfo['viewMode'] = viewMode
           pageinfo['viewType'] = viewType
           pageinfo['tocMode'] = tocMode
           
           current = getInt(current)
         pageinfo['current'] = current          pageinfo['current'] = current
           pageinfo['pn'] = current
         rows = int(rows or self.thumbrows)          rows = int(rows or self.thumbrows)
         pageinfo['rows'] = rows          pageinfo['rows'] = rows
         cols = int(cols or self.thumbcols)          cols = int(cols or self.thumbcols)
         pageinfo['cols'] = cols          pageinfo['cols'] = cols
         grpsize = cols * rows          grpsize = cols * rows
         pageinfo['groupsize'] = grpsize          pageinfo['groupsize'] = grpsize
         # what does this do?          # is start is empty use one around current
         start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))          start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
         # int(current / grpsize) * grpsize +1))          # int(current / grpsize) * grpsize +1))
         pageinfo['start'] = start          pageinfo['start'] = start
         pageinfo['end'] = start + grpsize          
         if (docinfo is not None) and ('numPages' in docinfo):          np = int(docinfo.get('numPages', 0))
             np = int(docinfo['numPages'])          if np == 0:
             pageinfo['end'] = min(pageinfo['end'], np)              # numPages unknown - maybe we can get it from text page
               if docinfo.get('textURLPath', None):
                   # cache text page as well
                   pageinfo['textPage'] = self.getTextPage(mode=viewType, pn=current, docinfo=docinfo, pageinfo=pageinfo)
                   np = int(docinfo.get('numPages', 0))
                   
             pageinfo['numgroups'] = int(np / grpsize)              pageinfo['numgroups'] = int(np / grpsize)
             if np % grpsize > 0:              if np % grpsize > 0:
                 pageinfo['numgroups'] += 1                  pageinfo['numgroups'] += 1
                                   
         pageinfo['viewMode'] = viewMode          pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
         pageinfo['tocMode'] = tocMode          oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
           # add zeroth page for two columns
           pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
           pageinfo['pageZero'] = pageZero
           pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
                   
           # TODO: do we need this here?
         pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')          pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
         #pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1')  
         pageinfo['query'] = self.REQUEST.get('query','')           pageinfo['query'] = self.REQUEST.get('query','') 
         pageinfo['queryType'] = self.REQUEST.get('queryType','')          pageinfo['queryType'] = self.REQUEST.get('queryType','')
         pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')          pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
         pageinfo['textPN'] = self.REQUEST.get('textPN','1')  
         pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')          pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
         pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')          pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
         pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')          pageinfo['queryPageSize'] = getInt(self.REQUEST.get('queryPageSize', 10))
         pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')          pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1'))
         # WTF?:          pageinfo['searchPN'] = getInt(self.REQUEST.get('searchPN','1'))
         toc = int(pageinfo['tocPN'])  
         pageinfo['textPages'] =int(toc)  
                   
         # What does this do?          # limit tocPN
         if 'tocSize_%s'%tocMode in docinfo:          if 'tocSize_%s'%tocMode in docinfo:
             tocSize = int(docinfo['tocSize_%s'%tocMode])              tocSize = docinfo['tocSize_%s'%tocMode]
             tocPageSize = int(pageinfo['tocPageSize'])              tocPageSize = pageinfo['tocPageSize']
             # cached toc                         # cached toc           
             if tocSize%tocPageSize>0:              if tocSize%tocPageSize>0:
                 tocPages=tocSize/tocPageSize+1                  tocPages=tocSize/tocPageSize+1
             else:              else:
                 tocPages=tocSize/tocPageSize                  tocPages=tocSize/tocPageSize
                                   
             pageinfo['tocPN'] = min(tocPages,toc)              pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN'])
                           
         pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')  
         pageinfo['sn'] =self.REQUEST.get('sn','')  
         return pageinfo          return pageinfo
   
   
       def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
           """returns dict with array of page informations for one screenfull of thumbnails"""
           batch = {}
           grpsize = rows * cols
           if maxIdx == 0:
               maxIdx = start + grpsize
   
           nb = int(math.ceil(maxIdx / float(grpsize)))
           # list of all batch start and end points
           batches = []
           if pageZero:
               ofs = 0
           else:
               ofs = 1
               
           for i in range(nb):
               s = i * grpsize + ofs
               e = min((i + 1) * grpsize + ofs - 1, maxIdx)
               batches.append({'start':s, 'end':e})
               
           batch['batches'] = batches
   
           pages = []
           if pageZero and start == 1:
               # correct beginning
               idx = 0
           else:
               idx = start
               
           for r in range(rows):
               row = []
               for c in range(cols):
                   if idx < minIdx or idx > maxIdx:
                       page = {'idx':None}
                   else:
                       page = {'idx':idx}
                       
                   idx += 1
                   if pageFlowLtr:
                       row.append(page)
                   else:
                       row.insert(0, page) 
                   
               pages.append(row)
               
           if start > 1:
               batch['prevStart'] = max(start - grpsize, 1)
           else:
               batch['prevStart'] = None
               
           if start + grpsize < maxIdx:
               batch['nextStart'] = start + grpsize
           else:
               batch['nextStart'] = None
   
           batch['pages'] = pages
           return batch
           
       def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
           """returns dict with information for one screenfull of data."""
           batch = {}
           if end == 0:
               end = start + size                    
               
           nb = int(math.ceil(end / float(size)))
           # list of all batch start and end points
           batches = []
           for i in range(nb):
               s = i * size + 1
               e = min((i + 1) * size, end)
               batches.append({'start':s, 'end':e})
               
           batch['batches'] = batches
           # list of elements in this batch
           this = []
           j = 0
           for i in range(start, min(start+size, end)):
               if data:
                   if fullData:
                       d = data[i]
                   else:
                       d = data[j]
                       j += 1
               
               else:
                   d = i+1
                   
               this.append(d)
               
           batch['this'] = this
           if start > 1:
               batch['prevStart'] = max(start - size, 1)
           else:
               batch['prevStart'] = None
               
           if start + size < end:
               batch['nextStart'] = start + size
           else:
               batch['nextStart'] = None
           
           return batch
           
   
     security.declareProtected('View management screens','changeDocumentViewerForm')          security.declareProtected('View management screens','changeDocumentViewerForm')    
     changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())      changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
           
Line 1047  def manage_AddDocumentViewer(self,id,ima Line 920  def manage_AddDocumentViewer(self,id,ima
           
     if RESPONSE is not None:      if RESPONSE is not None:
         RESPONSE.redirect('manage_main')          RESPONSE.redirect('manage_main')
   
 ## DocumentViewerTemplate class  
 class DocumentViewerTemplate(ZopePageTemplate):  
     """Template for document viewer"""  
     meta_type="DocumentViewer Template"  
   
   
 def manage_addDocumentViewerTemplateForm(self):  
     """Form for adding"""  
     pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)  
     return pt()  
   
 def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,  
                            REQUEST=None, submit=None):  
     "Add a Page Template with optional file content."  
   
     self._setObject(id, DocumentViewerTemplate(id))  
     ob = getattr(self, id)  
     txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()  
     logging.info("txt %s:"%txt)  
     ob.pt_edit(txt,"text/html")  
     if title:  
         ob.pt_setTitle(title)  
     try:  
         u = self.DestinationURL()  
     except AttributeError:  
         u = REQUEST['URL1']  
           
     u = "%s/%s" % (u, urllib.quote(id))  
     REQUEST.RESPONSE.redirect(u+'/manage_main')  
     return ''  
   
   
       

Removed from v.1.175.2.11  
changed lines
  Added in v.1.175.2.29


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>