Diff for /documentViewer/documentViewer.py between versions 1.175 and 1.175.2.11

version 1.175, 2011/06/14 09:57:11 version 1.175.2.11, 2011/07/29 16:27:24
Line 1 Line 1
   
 from OFS.Folder import Folder  from OFS.Folder import Folder
 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate  from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
 from Products.PageTemplates.PageTemplateFile import PageTemplateFile   from Products.PageTemplates.PageTemplateFile import PageTemplateFile 
 from AccessControl import ClassSecurityInfo  from AccessControl import ClassSecurityInfo
 from AccessControl import getSecurityManager  from AccessControl import getSecurityManager
 from Globals import package_home  from Globals import package_home
 from Products.zogiLib.zogiLib import browserCheck  
   
 from Ft.Xml import EMPTY_NAMESPACE, Parse   #from Ft.Xml import EMPTY_NAMESPACE, Parse 
 import Ft.Xml.Domlette  #import Ft.Xml.Domlette
   
   import xml.etree.ElementTree as ET
   
 import os.path  import os.path
 import sys  import sys
 import urllib  import urllib
 import urllib2  
 import logging  import logging
 import math  import math
 import urlparse   import urlparse 
 import cStringIO  
 import re  import re
 import string  import string
   
   from SrvTxtUtils import getInt, getText, getHttpData
   
 def logger(txt,method,txt2):  def logger(txt,method,txt2):
     """logging"""      """logging"""
     logging.info(txt+ txt2)      logging.info(txt+ txt2)
           
           
 def getInt(number, default=0):  
     """returns always an int (0 in case of problems)"""  
     try:  
         return int(number)  
     except:  
         return int(default)  
   
 def getTextFromNode(nodename):  
     """get the cdata content of a node"""  
     if nodename is None:  
         return ""  
     nodelist=nodename.childNodes  
     rc = ""  
     for node in nodelist:  
         if node.nodeType == node.TEXT_NODE:  
            rc = rc + node.data  
     return rc  
   
 def serializeNode(node, encoding="utf-8"):  def serializeNode(node, encoding="utf-8"):
     """returns a string containing node as XML"""      """returns a string containing node as XML"""
     stream = cStringIO.StringIO()      s = ET.tostring(node)
     #logging.debug("BUF: %s"%(stream))      
     Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)      # 4Suite:
     s = stream.getvalue()      #    stream = cStringIO.StringIO()
     #logging.debug("BUF: %s"%(s))      #    Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
     stream.close()      #    s = stream.getvalue()
       #    stream.close()
     return s      return s
   
 def browserCheck(self):  def browserCheck(self):
Line 114  def browserCheck(self): Line 98  def browserCheck(self):
   
     return bt      return bt
   
   def getParentPath(path, cnt=1):
       """returns pathname shortened by cnt"""
       # make sure path doesn't end with /
       path = path.rstrip('/')
       # split by /, shorten, and reassemble
       return '/'.join(path.split('/')[0:-cnt])
                 
 def getParentDir(path):  
     """returns pathname shortened by one"""  
     return '/'.join(path.split('/')[0:-1])  
           
   
 def getHttpData(url, data=None, num_tries=3, timeout=10):  
     """returns result from url+data HTTP request"""  
     # we do GET (by appending data to url)  
     if isinstance(data, str) or isinstance(data, unicode):  
         # if data is string then append  
         url = "%s?%s"%(url,data)  
     elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):  
         # urlencode  
         url = "%s?%s"%(url,urllib.urlencode(data))  
       
     response = None  
     errmsg = None  
     for cnt in range(num_tries):  
         try:  
             logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))  
             if sys.version_info < (2, 6):  
                 # set timeout on socket -- ugly :-(  
                 import socket  
                 socket.setdefaulttimeout(float(timeout))  
                 response = urllib2.urlopen(url)  
             else:  
                 response = urllib2.urlopen(url,timeout=float(timeout))  
             # check result?  
             break  
         except urllib2.HTTPError, e:  
             logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))  
             errmsg = str(e)  
             # stop trying  
             break  
         except urllib2.URLError, e:  
             logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))  
             errmsg = str(e)  
             # stop trying  
             #break  
   
     if response is not None:  
         data = response.read()  
         response.close()  
         return data  
       
     raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))  
     #return None  
   
 ##  ##
 ## documentViewer class  ## documentViewer class
Line 175  class documentViewer(Folder): Line 118  class documentViewer(Folder):
         {'label':'main config','action':'changeDocumentViewerForm'},          {'label':'main config','action':'changeDocumentViewerForm'},
         )          )
   
       metadataService = None
       """MetaDataFolder instance"""
   
     # templates and forms      # templates and forms
     viewer_main = PageTemplateFile('zpt/viewer_main', globals())      viewer_main = PageTemplateFile('zpt/viewer_main', globals())
     toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())      toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
Line 193  class documentViewer(Folder): Line 139  class documentViewer(Folder):
           
           
     thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())      thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
     security.declareProtected('View management screens','changeDocumentViewerForm')      
     changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())  
   
           
     def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):      def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
Line 217  class documentViewer(Folder): Line 161  class documentViewer(Folder):
             templateFolder._setObject('fulltextclient',textServer)              templateFolder._setObject('fulltextclient',textServer)
         except Exception, e:          except Exception, e:
             logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))              logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
               
         try:          try:
             from Products.zogiLib.zogiLib import zogiLib              from Products.zogiLib.zogiLib import zogiLib
             zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")              zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
Line 225  class documentViewer(Folder): Line 170  class documentViewer(Folder):
         except Exception, e:          except Exception, e:
             logging.error("Unable to create zogiLib for zogilib: "+str(e))              logging.error("Unable to create zogiLib for zogilib: "+str(e))
                   
           try:
               # assume MetaDataFolder instance is called metadata 
               self.metadataService = getattr(self, 'metadata')
           except Exception, e:
               logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
               
                   
     # proxy text server methods to fulltextclient      # proxy text server methods to fulltextclient
     def getTextPage(self, **args):      def getTextPage(self, **args):
Line 290  class documentViewer(Folder): Line 241  class documentViewer(Folder):
                   
         '''          '''
         logging.debug("HHHHHHHHHHHHHH:load the rss")          logging.debug("HHHHHHHHHHHHHH:load the rss")
         logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
                   
         if not hasattr(self, 'template'):          if not hasattr(self, 'template'):
             # create template folder if it doesn't exist              # create template folder if it doesn't exist
Line 315  class documentViewer(Folder): Line 266  class documentViewer(Folder):
       
     security.declareProtected('View','index_html')      security.declareProtected('View','index_html')
     def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):      def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
         '''          """
         view it          view it
         @param mode: defines how to access the document behind url           @param mode: defines how to access the document behind url 
         @param url: url which contains display information          @param url: url which contains display information
         @param viewMode: if images display images, if text display text, default is auto (text,images or auto)          @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
         @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)          @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
         @param characterNormalization type of text display (reg, norm, none)          """
         @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)  
         '''  
                   
         logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
                   
Line 341  class documentViewer(Folder): Line 290  class documentViewer(Folder):
             # get table of contents              # get table of contents
             docinfo = self.getToc(mode=tocMode, docinfo=docinfo)              docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
                           
         if viewMode=="auto": # automodus gewaehlt          # auto viewMode: text_dict if text else images
             if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert          if viewMode=="auto": 
               if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 
                   #texturl gesetzt und textViewer konfiguriert
                 viewMode="text_dict"                  viewMode="text_dict"
             else:              else:
                 viewMode="images"                  viewMode="images"
                                   
         pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)          pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
                   
         if (docinfo.get('textURLPath',None)):          if viewMode != 'images' and docinfo.get('textURLPath', None):
             page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo)              # get full text page
               page = self.getTextPage(mode=viewMode, pn=pn, docinfo=docinfo, pageinfo=pageinfo)
             pageinfo['textPage'] = page              pageinfo['textPage'] = page
         tt = getattr(self, 'template')                 
         pt = getattr(tt, 'viewer_main')                         # get template /template/viewer_main
           pt = getattr(self.template, 'viewer_main')
           # and execute with parameters
         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))          return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
       
     def generateMarks(self,mk):      def generateMarks(self,mk):
Line 417  class documentViewer(Folder): Line 371  class documentViewer(Folder):
         # FIXME: does this belong here?          # FIXME: does this belong here?
         if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath          if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
                 urlParams["mode"] = "imagepath"                  urlParams["mode"] = "imagepath"
                 urlParams["url"] = getParentDir(urlParams["url"])                  urlParams["url"] = getParentPath(urlParams["url"])
                                   
         # quote values and assemble into query string (not escaping '/')          # quote values and assemble into query string (not escaping '/')
         ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])          ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
Line 485  class documentViewer(Folder): Line 439  class documentViewer(Folder):
             docinfo = {}              docinfo = {}
                   
         for x in range(cut):          for x in range(cut):
                              path=getParentPath(path)
                 path=getParentDir(path)  
                 
         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path          infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
           
Line 496  class documentViewer(Folder): Line 449  class documentViewer(Folder):
         if txt is None:          if txt is None:
             raise IOError("Unable to get dir-info from %s"%(infoUrl))              raise IOError("Unable to get dir-info from %s"%(infoUrl))
   
         dom = Parse(txt)          dom = ET.fromstring(txt)
         sizes=dom.xpath("//dir/size")          #dom = Parse(txt)
         logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)          size=getText(dom.find("size"))
           #sizes=dom.xpath("//dir/size")
           logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)
                   
         if sizes:          if size:
             docinfo['numPages'] = int(getTextFromNode(sizes[0]))              docinfo['numPages'] = int(size)
         else:          else:
             docinfo['numPages'] = 0              docinfo['numPages'] = 0
                           
Line 546  class documentViewer(Folder): Line 501  class documentViewer(Folder):
         if txt is None:          if txt is None:
             raise IOError("Unable to read index meta from %s"%(url))              raise IOError("Unable to read index meta from %s"%(url))
                   
         dom = Parse(txt)          dom = ET.fromstring(txt)
           #dom = Parse(txt)
         return dom          return dom
           
     def getPresentationInfoXML(self, url):      def getPresentationInfoXML(self, url):
Line 565  class documentViewer(Folder): Line 521  class documentViewer(Folder):
         if txt is None:          if txt is None:
             raise IOError("Unable to read infoXMLfrom %s"%(url))              raise IOError("Unable to read infoXMLfrom %s"%(url))
                           
         dom = Parse(txt)          dom = ET.fromstring(txt)
           #dom = Parse(txt)
         return dom          return dom
                                                   
                   
Line 580  class documentViewer(Folder): Line 537  class documentViewer(Folder):
                           
         if dom is None:          if dom is None:
             for x in range(cut):              for x in range(cut):
                 path=getParentDir(path)                  path=getParentPath(path)
             dom = self.getDomFromIndexMeta(path)              dom = self.getDomFromIndexMeta(path)
                 
         acctype = dom.xpath("//access-conditions/access/@type")          acc = dom.find(".//access-conditions/access")
         if acctype and (len(acctype)>0):          if acc is not None:
             access=acctype[0].value              acctype = acc.get('type')
               #acctype = dom.xpath("//access-conditions/access/@type")
               if acctype:
                   access=acctype
             if access in ['group', 'institution']:              if access in ['group', 'institution']:
                 access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()                      access = dom.find(".//access-conditions/access/name").text.lower()
                           
         docinfo['accessType'] = access          docinfo['accessType'] = access
         return docinfo          return docinfo
Line 608  class documentViewer(Folder): Line 568  class documentViewer(Folder):
         docinfo['indexMetaPath']=self.getIndexMetaPath(path);          docinfo['indexMetaPath']=self.getIndexMetaPath(path);
                   
         logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))          logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
         # put in all raw bib fields as dict "bib"          if self.metadataService is not None:
         bib = dom.xpath("//bib/*")              # put all raw bib fields in dict "bib"
         if bib and len(bib)>0:              bib = self.metadataService.getBibData(dom=dom)
             bibinfo = {}              docinfo['bib'] = bib
             for e in bib:              bibtype = bib.get('@type', None)
                 bibinfo[e.localName] = getTextFromNode(e)  
             docinfo['bib'] = bibinfo  
           
         # extract some fields (author, title, year) according to their mapping  
         metaData=self.metadata.main.meta.bib  
         bibtype=dom.xpath("//bib/@type")  
         if bibtype and (len(bibtype)>0):  
             bibtype=bibtype[0].value  
         else:  
             bibtype="generic"  
               
         bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)  
         docinfo['bib_type'] = bibtype          docinfo['bib_type'] = bibtype
         bibmap=metaData.generateMappingForType(bibtype)              # also store DC metadata for convenience
         logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))              dc = self.metadataService.getDCMappedData(bib)
         logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))              docinfo['creator'] = dc.get('creator',None)
         # if there is no mapping bibmap is empty (mapping sometimes has empty fields)              docinfo['title'] = dc.get('title',None)
         if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:              docinfo['date'] = dc.get('date',None)
             try:          else:
                 docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])              logging.error("MetadataService not found!")
             except: pass  
             try:  
                 docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])  
             except: pass  
             try:  
                 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])  
             except: pass  
             logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)  
             try:  
                 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])  
             except:  
                 docinfo['lang']=''  
             try:  
                 docinfo['city']=getTextFromNode(dom.xpath("//bib/city")[0])  
             except:  
                 docinfo['city']=''  
             try:  
                 docinfo['number_of_pages']=getTextFromNode(dom.xpath("//bib/number_of_pages")[0])  
             except:  
                 docinfo['number_of_pages']=''  
             try:  
                 docinfo['series_volume']=getTextFromNode(dom.xpath("//bib/series_volume")[0])  
             except:  
                 docinfo['series_volume']=''  
             try:  
                 docinfo['number_of_volumes']=getTextFromNode(dom.xpath("//bib/number_of_volumes")[0])  
             except:  
                 docinfo['number_of_volumes']=''  
             try:  
                 docinfo['translator']=getTextFromNode(dom.xpath("//bib/translator")[0])  
             except:  
                 docinfo['translator']=''  
             try:  
                 docinfo['edition']=getTextFromNode(dom.xpath("//bib/edition")[0])  
             except:  
                 docinfo['edition']=''  
             try:  
                 docinfo['series_author']=getTextFromNode(dom.xpath("//bib/series_author")[0])  
             except:  
                 docinfo['series_author']=''  
             try:  
                 docinfo['publisher']=getTextFromNode(dom.xpath("//bib/publisher")[0])  
             except:  
                 docinfo['publisher']=''  
             try:  
                 docinfo['series_title']=getTextFromNode(dom.xpath("//bib/series_title")[0])  
             except:  
                 docinfo['series_title']=''  
             try:  
                 docinfo['isbn_issn']=getTextFromNode(dom.xpath("//bib/isbn_issn")[0])  
             except:  
                 docinfo['isbn_issn']=''             
         return docinfo          return docinfo
           
             
       # TODO: is this needed?
     def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):      def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
         """gets name info from the index.meta file at path or given by dom"""          """gets name info from the index.meta file at path or given by dom"""
         if docinfo is None:          if docinfo is None:
Line 695  class documentViewer(Folder): Line 592  class documentViewer(Folder):
                   
         if dom is None:          if dom is None:
             for x in range(cut):              for x in range(cut):
                 path=getParentDir(path)                  path=getParentPath(path)
             dom = self.getDomFromIndexMeta(path)              dom = self.getDomFromIndexMeta(path)
   
         docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])          docinfo['name']=getText(dom.find("name"))
         logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])          logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
         return docinfo          return docinfo
           
       
     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):      def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
         """parse texttool tag in index meta"""          """parse texttool tag in index meta"""
         logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))          logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
Line 712  class documentViewer(Folder): Line 610  class documentViewer(Folder):
         if dom is None:          if dom is None:
             dom = self.getDomFromIndexMeta(url)              dom = self.getDomFromIndexMeta(url)
                   
           texttool = self.metadata.getTexttoolData(dom=dom)
           
         archivePath = None          archivePath = None
         archiveName = None          archiveName = None
           
         archiveNames = dom.xpath("//resource/name")          archiveName = getText(dom.find("name"))
         if archiveNames and (len(archiveNames) > 0):          if not archiveName:
             archiveName = getTextFromNode(archiveNames[0])  
         else:  
             logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))              logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
                   
         archivePaths = dom.xpath("//resource/archive-path")          archivePath = getText(dom.find("archive-path"))
         if archivePaths and (len(archivePaths) > 0):          if archivePath:
             archivePath = getTextFromNode(archivePaths[0])  
             # clean up archive path              # clean up archive path
             if archivePath[0] != '/':              if archivePath[0] != '/':
                 archivePath = '/' + archivePath                  archivePath = '/' + archivePath
Line 739  class documentViewer(Folder): Line 636  class documentViewer(Folder):
             # we balk without archive-path              # we balk without archive-path
             raise IOError("Missing archive-path (for text-tool) in %s" % (url))              raise IOError("Missing archive-path (for text-tool) in %s" % (url))
                   
         imageDirs = dom.xpath("//texttool/image")          imageDir = texttool.get('image', None)
         if imageDirs and (len(imageDirs) > 0):  
             imageDir = getTextFromNode(imageDirs[0])  
                           
         else:          if not imageDir:
             # we balk with no image tag / not necessary anymore because textmode is now standard              # we balk with no image tag / not necessary anymore because textmode is now standard
             #raise IOError("No text-tool info in %s"%(url))              #raise IOError("No text-tool info in %s"%(url))
             imageDir = ""              imageDir = ""
Line 760  class documentViewer(Folder): Line 655  class documentViewer(Folder):
                           
             docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir              docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
                           
         viewerUrls = dom.xpath("//texttool/digiliburlprefix")          viewerUrl = texttool.get('digiliburlprefix', None)
         if viewerUrls and (len(viewerUrls) > 0):          if viewerUrl:
             viewerUrl = getTextFromNode(viewerUrls[0])  
             docinfo['viewerURL'] = viewerUrl              docinfo['viewerURL'] = viewerUrl
                   
         # old style text URL          # old style text URL
         textUrls = dom.xpath("//texttool/text")          textUrl = texttool.get('text', None)
         if textUrls and (len(textUrls) > 0):          if textUrl:
             textUrl = getTextFromNode(textUrls[0])  
             if urlparse.urlparse(textUrl)[0] == "": #keine url              if urlparse.urlparse(textUrl)[0] == "": #keine url
                 textUrl = os.path.join(archivePath, textUrl)                   textUrl = os.path.join(archivePath, textUrl) 
             # fix URLs starting with /mpiwg/online              # fix URLs starting with /mpiwg/online
Line 778  class documentViewer(Folder): Line 671  class documentViewer(Folder):
             docinfo['textURL'] = textUrl              docinfo['textURL'] = textUrl
           
         # new style text-url-path          # new style text-url-path
         textUrls = dom.xpath("//texttool/text-url-path")          textUrl = texttool.get('text-url-path', None)
         if textUrls and (len(textUrls) > 0):          if textUrl:
             textUrl = getTextFromNode(textUrls[0])  
             docinfo['textURLPath'] = textUrl              docinfo['textURLPath'] = textUrl
             textUrlkurz = string.split(textUrl, ".")[0]              textUrlkurz = string.split(textUrl, ".")[0]
             docinfo['textURLPathkurz'] = textUrlkurz              docinfo['textURLPathkurz'] = textUrlkurz
Line 788  class documentViewer(Folder): Line 680  class documentViewer(Folder):
                 # text-only, no page images                  # text-only, no page images
                 #docinfo = self.getNumTextPages(docinfo)                  #docinfo = self.getNumTextPages(docinfo)
                                       
                    # get bib info
         presentationUrls = dom.xpath("//texttool/presentation")  
         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag          docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
           # TODO: is this needed here?
         docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)          docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
                   
                   # TODO: what to do with presentation?
         if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen           presentationUrl = texttool.get('presentation', None)
           if presentationUrl: # ueberschreibe diese durch presentation informationen 
              # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten               # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
              # durch den relativen Pfad auf die presentation infos               # durch den relativen Pfad auf die presentation infos
             presentationPath = getTextFromNode(presentationUrls[0])              presentationPath = presentationUrl
             if url.endswith("index.meta"):               if url.endswith("index.meta"): 
                 presentationUrl = url.replace('index.meta', presentationPath)                  presentationUrl = url.replace('index.meta', presentationPath)
             else:              else:
Line 805  class documentViewer(Folder): Line 698  class documentViewer(Folder):
                                   
             docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)              docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
           
           # get authorization
         docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info          docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info
                   
         return docinfo          return docinfo
Line 814  class documentViewer(Folder): Line 708  class documentViewer(Folder):
         """gets the bibliographical information from the preseantion entry in texttools          """gets the bibliographical information from the preseantion entry in texttools
         """          """
         dom=self.getPresentationInfoXML(url)          dom=self.getPresentationInfoXML(url)
         try:          docinfo['author']=getText(dom.find(".//author"))
             docinfo['author']=getTextFromNode(dom.xpath("//author")[0])          docinfo['title']=getText(dom.find(".//title"))
         except:          docinfo['year']=getText(dom.find(".//date"))
             pass  
         try:  
             docinfo['title']=getTextFromNode(dom.xpath("//title")[0])  
         except:  
             pass  
         try:  
             docinfo['year']=getTextFromNode(dom.xpath("//date")[0])  
         except:  
             pass  
         return docinfo          return docinfo
           
     def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):      def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
Line 839  class documentViewer(Folder): Line 724  class documentViewer(Folder):
                   
         pathorig=path          pathorig=path
         for x in range(cut):                 for x in range(cut):       
                 path=getParentDir(path)                  path=getParentPath(path)
         logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)          logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path          imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
         docinfo['imageURL'] = imageUrl          docinfo['imageURL'] = imageUrl
                   
           #TODO: use getDocinfoFromIndexMeta
         #path ist the path to the images it assumes that the index.meta file is one level higher.          #path ist the path to the images it assumes that the index.meta file is one level higher.
         docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)          docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
         docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)          docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
         return docinfo          return docinfo
           
           
     def getDocinfo(self, mode, url):      def OLDgetDocinfo(self, mode, url):
         """returns docinfo depending on mode"""          """returns docinfo depending on mode"""
         logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))          logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
         # look for cached docinfo in session          # look for cached docinfo in session
Line 858  class documentViewer(Folder): Line 744  class documentViewer(Folder):
             docinfo = self.REQUEST.SESSION['docinfo']              docinfo = self.REQUEST.SESSION['docinfo']
             # check if its still current              # check if its still current
             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:              if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
                 logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)                  logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())
                 return docinfo                  return docinfo
               
         # new docinfo          # new docinfo
         docinfo = {'mode': mode, 'url': url}          docinfo = {'mode': mode, 'url': url}
         if mode=="texttool": #index.meta with texttool information          # add self url
           docinfo['viewerUrl'] = self.getDocumentViewerURL()
           if mode=="texttool": 
               # index.meta with texttool information
             docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)              docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
         elif mode=="imagepath":          elif mode=="imagepath":
               # folder with images, index.meta optional
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)              docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
         elif mode=="filepath":          elif mode=="filepath":
               # filename
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)              docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
         else:          else:
             logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)              logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))              raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                                   
         # FIXME: fake texturlpath           logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
         if not docinfo.has_key('textURLPath'):          #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
             docinfo['textURLPath'] = None          # store in session
           self.REQUEST.SESSION['docinfo'] = docinfo
           return docinfo
   
   
       def getDocinfo(self, mode, url):
           """returns docinfo depending on mode"""
           logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
           # look for cached docinfo in session
           if self.REQUEST.SESSION.has_key('docinfo'):
               docinfo = self.REQUEST.SESSION['docinfo']
               # check if its still current
               if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
                   logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
                   return docinfo
               
           # new docinfo
           docinfo = {'mode': mode, 'url': url}
           # add self url
           docinfo['viewerUrl'] = self.getDocumentViewerURL()
           # get index.meta DOM
           docUrl = None
           metaDom = None
           if mode=="texttool": 
               # url points to document dir or index.meta
               metaDom = self.metadataService.getDomFromPathOrUrl(url)
               docUrl = url.replace('/index.meta', '')
               if metaDom is None:
                   raise IOError("Unable to find index.meta for mode=texttool!")
   
           elif mode=="imagepath":
               # url points to folder with images, index.meta optional
               # asssume index.meta in parent dir
               docUrl = getParentPath(url)
               metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
   
           elif mode=="filepath":
               # url points to image file, index.meta optional
               # asssume index.meta is two path segments up
               docUrl = getParentPath(url, 2)
               metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
   
           else:
               logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
               raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                   
         logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)          docinfo['documentUrl'] = docUrl
         #logging.debug("documentViewer (getdocinfo) docinfo: %s"%)          # process index.meta contents
           if metaDom is not None:
               # document directory name and path
               resource = self.metadataService.getResourceData(dom=metaDom)
               if resource:
                   docinfo = self.getDocinfoFromResource(docinfo, resource)
   
               # texttool info
               texttool = self.metadataService.getTexttoolData(dom=metaDom)
               if texttool:
                   docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
               
               # bib info
               bib = self.metadataService.getBibData(dom=metaDom)
               if bib:
                   docinfo = self.getDocinfoFromBib(docinfo, bib)
                   
               # auth info
               access = self.metadataService.getAccessData(dom=metaDom)
               if access:
                   docinfo = self.getDocinfoFromAccess(docinfo, access)
   
           # image path
           if mode != 'texttool':
               # override image path from texttool
               docinfo['imagePath'] = url
   
           # number of images from digilib
           if docinfo.get('imagePath', None):
               docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
               docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
   
           logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
           #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
           # store in session
         self.REQUEST.SESSION['docinfo'] = docinfo          self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo          return docinfo
                                 
       def getDocinfoFromResource(self, docinfo, resource):
           """reads contents of resource element into docinfo"""
           docName = resource.get('name', None)
           docinfo['documentName'] = docName
           docPath = resource.get('archive-path', None)
           if docPath:
               # clean up document path
               if docPath[0] != '/':
                   docPath = '/' + docPath
                   
               if docName and (not docPath.endswith(docName)):
                   docPath += "/" + docName
               
           else:
               # use docUrl as docPath
               docUrl = docinfo['documentURL']
               if not docUrl.startswith('http:'):
                   docPath = docUrl
                   
           docinfo['documentPath'] = docPath
           return docinfo
   
       def getDocinfoFromTexttool(self, docinfo, texttool):
           """reads contents of texttool element into docinfo"""
           # image dir
           imageDir = texttool.get('image', None)
           docPath = docinfo.get('documentPath', None)
           if imageDir and docPath:
               #print "image: ", imageDir, " archivepath: ", archivePath
               imageDir = os.path.join(docPath, imageDir)
               imageDir = imageDir.replace('/mpiwg/online', '', 1)
               docinfo['imagePath'] = imageDir
           
           # old style text URL
           textUrl = texttool.get('text', None)
           if textUrl and docPath:
               if urlparse.urlparse(textUrl)[0] == "": #keine url
                   textUrl = os.path.join(docPath, textUrl) 
                   # fix URLs starting with /mpiwg/online
                   textUrl = textUrl.replace('/mpiwg/online', '', 1)
               
               docinfo['textURL'] = textUrl
       
           # new style text-url-path
           textUrl = texttool.get('text-url-path', None)
           if textUrl:
               docinfo['textURLPath'] = textUrl
               #TODO: ugly:
               #textUrlkurz = string.split(textUrl, ".")[0]
               #docinfo['textURLPathkurz'] = textUrlkurz
               
           # old presentation stuff
           presentation = texttool.get('presentation', None)
           if presentation and docPath:
               docinfo['presentationPath'] = os.path.join(docPath, presentation)
               
           return docinfo
   
       def getDocinfoFromBib(self, docinfo, bib):
           """reads contents of bib element into docinfo"""
           # put all raw bib fields in dict "bib"
           docinfo['bib'] = bib
           bibtype = bib.get('@type', None)
           docinfo['bibType'] = bibtype
           # also store DC metadata for convenience
           dc = self.metadataService.getDCMappedData(bib)
           docinfo['creator'] = dc.get('creator',None)
           docinfo['title'] = dc.get('title',None)
           docinfo['date'] = dc.get('date',None)
           return docinfo
               
       def getDocinfoFromAccess(self, docinfo, acc):
           """reads contents of access element into docinfo"""
           #TODO: also read resource type
           try:
               acctype = accc['@attr']['type']
               if acctype:
                   access=acctype
                   if access in ['group', 'institution']:
                       access = acc['name'].lower()
                   
                   docinfo['accessType'] = access
   
           except:
               pass
           
           return docinfo
   
       def getDocinfoFromDigilib(self, docinfo, path):
           infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
           # fetch data
           txt = getHttpData(infoUrl)
           if not txt:
               logging.error("Unable to get dir-info from %s"%(infoUrl))
               return docinfo
   
           dom = ET.fromstring(txt)
           size = getText(dom.find("size"))
           logging.debug("getDocinfoFromDigilib: size=%s"%size)
           if size:
               docinfo['numPages'] = int(size)
           else:
               docinfo['numPages'] = 0
               
           # TODO: produce and keep list of image names and numbers
           return docinfo
               
               
     def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):      def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
         """returns pageinfo with the given parameters"""          """returns pageinfo with the given parameters"""
         pageinfo = {}          pageinfo = {}
Line 893  class documentViewer(Folder): Line 971  class documentViewer(Folder):
         pageinfo['cols'] = cols          pageinfo['cols'] = cols
         grpsize = cols * rows          grpsize = cols * rows
         pageinfo['groupsize'] = grpsize          pageinfo['groupsize'] = grpsize
           # what does this do?
         start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))          start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
         # int(current / grpsize) * grpsize +1))          # int(current / grpsize) * grpsize +1))
         pageinfo['start'] = start          pageinfo['start'] = start
Line 903  class documentViewer(Folder): Line 982  class documentViewer(Folder):
             pageinfo['numgroups'] = int(np / grpsize)              pageinfo['numgroups'] = int(np / grpsize)
             if np % grpsize > 0:              if np % grpsize > 0:
                 pageinfo['numgroups'] += 1                          pageinfo['numgroups'] += 1        
                   
         pageinfo['viewMode'] = viewMode          pageinfo['viewMode'] = viewMode
         pageinfo['tocMode'] = tocMode          pageinfo['tocMode'] = tocMode
         pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')          pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
Line 915  class documentViewer(Folder): Line 995  class documentViewer(Folder):
         pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')          pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
         pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')          pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
         pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')               pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')     
           # WTF?:
         toc = int (pageinfo['tocPN'])          toc = int (pageinfo['tocPN'])
         pageinfo['textPages'] =int (toc)          pageinfo['textPages'] =int (toc)
                   
           # What does this do?
         if 'tocSize_%s'%tocMode in docinfo:          if 'tocSize_%s'%tocMode in docinfo:
             tocSize = int(docinfo['tocSize_%s'%tocMode])              tocSize = int(docinfo['tocSize_%s'%tocMode])
             tocPageSize = int(pageinfo['tocPageSize'])              tocPageSize = int(pageinfo['tocPageSize'])
Line 926  class documentViewer(Folder): Line 1008  class documentViewer(Folder):
                 tocPages=tocSize/tocPageSize+1                  tocPages=tocSize/tocPageSize+1
             else:              else:
                 tocPages=tocSize/tocPageSize                  tocPages=tocSize/tocPageSize
                   
             pageinfo['tocPN'] = min (tocPages,toc)                                  pageinfo['tocPN'] = min (tocPages,toc)                    
               
         pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')          pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
         pageinfo['sn'] =self.REQUEST.get('sn','')          pageinfo['sn'] =self.REQUEST.get('sn','')
         return pageinfo          return pageinfo
           
   
       security.declareProtected('View management screens','changeDocumentViewerForm')    
       changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
       
 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):  def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
         """init document viewer"""          """init document viewer"""
         self.title=title          self.title=title
Line 938  def changeDocumentViewer(self,title="",d Line 1026  def changeDocumentViewer(self,title="",d
         self.thumbrows = thumbrows          self.thumbrows = thumbrows
         self.thumbcols = thumbcols          self.thumbcols = thumbcols
         self.authgroups = [s.strip().lower() for s in authgroups.split(',')]          self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
           try:
               # assume MetaDataFolder instance is called metadata 
               self.metadataService = getattr(self, 'metadata')
           except Exception, e:
               logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
   
         if RESPONSE is not None:          if RESPONSE is not None:
             RESPONSE.redirect('manage_main')              RESPONSE.redirect('manage_main')
                   

Removed from v.1.175  
changed lines
  Added in v.1.175.2.11


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>