Diff for /documentViewer/documentViewer.py between versions 1.175.2.11 and 1.178

version 1.175.2.11, 2011/07/29 16:27:24 version 1.178, 2012/01/04 07:38:17
Line 1 Line 1
   
 from OFS.Folder import Folder  from OFS.Folder import Folder
 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate  from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
 from Products.PageTemplates.PageTemplateFile import PageTemplateFile   from Products.PageTemplates.PageTemplateFile import PageTemplateFile 
 from AccessControl import ClassSecurityInfo  from AccessControl import ClassSecurityInfo
 from AccessControl import getSecurityManager  from AccessControl import getSecurityManager
 from Globals import package_home  from Globals import package_home
   from Products.zogiLib.zogiLib import browserCheck
   
 #from Ft.Xml import EMPTY_NAMESPACE, Parse   from Ft.Xml import EMPTY_NAMESPACE, Parse 
 #import Ft.Xml.Domlette  import Ft.Xml.Domlette
   
 import xml.etree.ElementTree as ET  
   
 import os.path  import os.path
 import sys  import sys
 import urllib  import urllib
   import urllib2
 import logging  import logging
 import math  import math
 import urlparse   import urlparse 
   import cStringIO
 import re  import re
 import string  import string
   
 from SrvTxtUtils import getInt, getText, getHttpData  
   
 def logger(txt,method,txt2):  def logger(txt,method,txt2):
     """logging"""      """logging"""
     logging.info(txt+ txt2)      logging.info(txt+ txt2)
           
           
   def getInt(number, default=0):
       """returns always an int (0 in case of problems)"""
       try:
           return int(number)
       except:
           return int(default)
   
   def getTextFromNode(nodename):
       """get the cdata content of a node"""
       if nodename is None:
           return ""
       nodelist=nodename.childNodes
       rc = ""
       for node in nodelist:
           if node.nodeType == node.TEXT_NODE:
              rc = rc + node.data
       return rc
   
 def serializeNode(node, encoding="utf-8"):  def serializeNode(node, encoding="utf-8"):
     """returns a string containing node as XML"""      """returns a string containing node as XML"""
     s = ET.tostring(node)      stream = cStringIO.StringIO()
           #logging.debug("BUF: %s"%(stream))
     # 4Suite:      Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
     #    stream = cStringIO.StringIO()      s = stream.getvalue()
     #    Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)      #logging.debug("BUF: %s"%(s))
     #    s = stream.getvalue()      stream.close()
     #    stream.close()  
     return s      return s
   
 def browserCheck(self):  def browserCheck(self):
Line 98  def browserCheck(self): Line 114  def browserCheck(self):
   
     return bt      return bt
   
 def getParentPath(path, cnt=1):  
     """returns pathname shortened by cnt"""  
     # make sure path doesn't end with /  
     path = path.rstrip('/')  
     # split by /, shorten, and reassemble  
     return '/'.join(path.split('/')[0:-cnt])  
   
   def getParentDir(path):
       """returns pathname shortened by one"""
       return '/'.join(path.split('/')[0:-1])
           
   
   def getHttpData(url, data=None, num_tries=3, timeout=10):
       """returns result from url+data HTTP request"""
       # we do GET (by appending data to url)
       if isinstance(data, str) or isinstance(data, unicode):
           # if data is string then append
           url = "%s?%s"%(url,data)
       elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
           # urlencode
           url = "%s?%s"%(url,urllib.urlencode(data))
       
       response = None
       errmsg = None
       for cnt in range(num_tries):
           try:
               logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
               if sys.version_info < (2, 6):
                   # set timeout on socket -- ugly :-(
                   import socket
                   socket.setdefaulttimeout(float(timeout))
                   response = urllib2.urlopen(url)
               else:
                   response = urllib2.urlopen(url,timeout=float(timeout))
               # check result?
               break
           except urllib2.HTTPError, e:
               logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
               errmsg = str(e)
               # stop trying
               break
           except urllib2.URLError, e:
               logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
               errmsg = str(e)
               # stop trying
               #break
   
       if response is not None:
           data = response.read()
           response.close()
           return data
       
       raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
       #return None
   
 ##  ##
 ## documentViewer class  ## documentViewer class
Line 118  class documentViewer(Folder): Line 175  class documentViewer(Folder):
         {'label':'main config','action':'changeDocumentViewerForm'},          {'label':'main config','action':'changeDocumentViewerForm'},
         )          )
           
     metadataService = None  
     """MetaDataFolder instance"""  
   
     # templates and forms      # templates and forms
     viewer_main = PageTemplateFile('zpt/viewer_main', globals())      viewer_main = PageTemplateFile('zpt/viewer_main', globals())
     toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())      toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
Line 139  class documentViewer(Folder): Line 193  class documentViewer(Folder):
           
           
     thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())      thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
       security.declareProtected('View management screens','changeDocumentViewerForm')    
       changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
   
           
     def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):      def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
Line 161  class documentViewer(Folder): Line 217  class documentViewer(Folder):
             templateFolder._setObject('fulltextclient',textServer)              templateFolder._setObject('fulltextclient',textServer)
         except Exception, e:          except Exception, e:
             logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))              logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
               
         try:          try:
             from Products.zogiLib.zogiLib import zogiLib              from Products.zogiLib.zogiLib import zogiLib
             zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")              zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
Line 170  class documentViewer(Folder): Line 225  class documentViewer(Folder):
         except Exception, e:          except Exception, e:
             logging.error("Unable to create zogiLib for zogilib: "+str(e))              logging.error("Unable to create zogiLib for zogilib: "+str(e))
                           
         try:  
             # assume MetaDataFolder instance is called metadata   
             self.metadataService = getattr(self, 'metadata')  
         except Exception, e:  
             logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))  
               
                   
     # proxy text server methods to fulltextclient      # proxy text server methods to fulltextclient
     def getTextPage(self, **args):      def getTextPage(self, **args):
Line 241  class documentViewer(Folder): Line 290  class documentViewer(Folder):
                   
         '''          '''
         logging.debug("HHHHHHHHHHHHHH:load the rss")          logging.debug("HHHHHHHHHHHHHH:load the rss")
         logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
                   
         if not hasattr(self, 'template'):          if not hasattr(self, 'template'):
             # create template folder if it doesn't exist              # create template folder if it doesn't exist
Line 266  class documentViewer(Folder): Line 315  class documentViewer(Folder):
       
     security.declareProtected('View','index_html')      security.declareProtected('View','index_html')
     def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):      def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
         """          '''
         view it          view it
         @param mode: defines how to access the document behind url           @param mode: defines how to access the document behind url 
         @param url: url which contains display information          @param url: url which contains display information
         @param viewMode: if images display images, if text display text, default is auto (text,images or auto)          @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
         @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)          @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
         """          @param characterNormalization type of text display (reg, norm, none)
           @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
           '''
                   
         logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
                   
Line 290  class documentViewer(Folder): Line 341  class documentViewer(Folder):
             # get table of contents              # get table of contents
             docinfo = self.getToc(mode=tocMode, docinfo=docinfo)              docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
   
         # auto viewMode: text_dict if text else images          if viewMode=="auto": # automodus gewaehlt
         if viewMode=="auto":               if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
             if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):   
                 #texturl gesetzt und textViewer konfiguriert  
                 viewMode="text_dict"                  viewMode="text_dict"
             else:              else:
                 viewMode="images"                  viewMode="images"
                                   
         pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, tocMode=tocMode)          pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, tocMode=tocMode)
                   
         if viewMode != 'images' and docinfo.get('textURLPath', None):          if (docinfo.get('textURLPath',None)):
             # get full text page              page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo)
             page = self.getTextPage(mode=viewMode, pn=pn, docinfo=docinfo, pageinfo=pageinfo)  
             pageinfo['textPage'] = page              pageinfo['textPage'] = page
                       tt = getattr(self, 'template')   
         # get template /template/viewer_main          pt = getattr(tt, 'viewer_main')               
         pt = getattr(self.template, 'viewer_main')  
         # and execute with parameters  
         return pt(docinfo=docinfo, pageinfo=pageinfo, viewMode=viewMode, mk=self.generateMarks(mk))          return pt(docinfo=docinfo, pageinfo=pageinfo, viewMode=viewMode, mk=self.generateMarks(mk))
       
     def generateMarks(self,mk):      def generateMarks(self,mk):
Line 371  class documentViewer(Folder): Line 417  class documentViewer(Folder):
         # FIXME: does this belong here?          # FIXME: does this belong here?
         if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath          if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
                 urlParams["mode"] = "imagepath"                  urlParams["mode"] = "imagepath"
                 urlParams["url"] = getParentPath(urlParams["url"])                  urlParams["url"] = getParentDir(urlParams["url"])
                                   
         # quote values and assemble into query string (not escaping '/')          # quote values and assemble into query string (not escaping '/')
         ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])          ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
Line 439  class documentViewer(Folder): Line 485  class documentViewer(Folder):
             docinfo = {}              docinfo = {}
                   
         for x in range(cut):          for x in range(cut):
             path=getParentPath(path)                 
                   path=getParentDir(path)
                 
         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path          infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
           
Line 449  class documentViewer(Folder): Line 496  class documentViewer(Folder):
         if txt is None:          if txt is None:
             raise IOError("Unable to get dir-info from %s"%(infoUrl))              raise IOError("Unable to get dir-info from %s"%(infoUrl))
   
         dom = ET.fromstring(txt)          dom = Parse(txt)
         #dom = Parse(txt)          sizes=dom.xpath("//dir/size")
         size=getText(dom.find("size"))          logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
         #sizes=dom.xpath("//dir/size")  
         logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)  
                   
         if size:          if sizes:
             docinfo['numPages'] = int(size)              docinfo['numPages'] = int(getTextFromNode(sizes[0]))
         else:          else:
             docinfo['numPages'] = 0              docinfo['numPages'] = 0
                           
Line 501  class documentViewer(Folder): Line 546  class documentViewer(Folder):
         if txt is None:          if txt is None:
             raise IOError("Unable to read index meta from %s"%(url))              raise IOError("Unable to read index meta from %s"%(url))
                   
         dom = ET.fromstring(txt)          dom = Parse(txt)
         #dom = Parse(txt)  
         return dom          return dom
           
     def getPresentationInfoXML(self, url):      def getPresentationInfoXML(self, url):
Line 521  class documentViewer(Folder): Line 565  class documentViewer(Folder):
         if txt is None:          if txt is None:
             raise IOError("Unable to read infoXMLfrom %s"%(url))              raise IOError("Unable to read infoXMLfrom %s"%(url))
                           
         dom = ET.fromstring(txt)          dom = Parse(txt)
         #dom = Parse(txt)  
         return dom          return dom
                                                   
                   
Line 537  class documentViewer(Folder): Line 580  class documentViewer(Folder):
                           
         if dom is None:          if dom is None:
             for x in range(cut):              for x in range(cut):
                 path=getParentPath(path)                  path=getParentDir(path)
             dom = self.getDomFromIndexMeta(path)              dom = self.getDomFromIndexMeta(path)
                 
         acc = dom.find(".//access-conditions/access")          acctype = dom.xpath("//access-conditions/access/@type")
         if acc is not None:          if acctype and (len(acctype)>0):
             acctype = acc.get('type')              access=acctype[0].value
             #acctype = dom.xpath("//access-conditions/access/@type")  
             if acctype:  
                 access=acctype  
                 if access in ['group', 'institution']:                  if access in ['group', 'institution']:
                     access = dom.find(".//access-conditions/access/name").text.lower()                  access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
                           
         docinfo['accessType'] = access          docinfo['accessType'] = access
         return docinfo          return docinfo
Line 555  class documentViewer(Folder): Line 595  class documentViewer(Folder):
                   
     def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):      def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
         """gets bibliographical info from the index.meta file at path or given by dom"""          """gets bibliographical info from the index.meta file at path or given by dom"""
         logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))          #logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
                   
         if docinfo is None:          if docinfo is None:
             docinfo = {}              docinfo = {}
Line 567  class documentViewer(Folder): Line 607  class documentViewer(Folder):
                   
         docinfo['indexMetaPath']=self.getIndexMetaPath(path);          docinfo['indexMetaPath']=self.getIndexMetaPath(path);
                   
         logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))          #logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
         if self.metadataService is not None:          # put in all raw bib fields as dict "bib"
             # put all raw bib fields in dict "bib"          bib = dom.xpath("//bib/*")
             bib = self.metadataService.getBibData(dom=dom)          if bib and len(bib)>0:
             docinfo['bib'] = bib              bibinfo = {}
             bibtype = bib.get('@type', None)              for e in bib:
             docinfo['bib_type'] = bibtype                  bibinfo[e.localName] = getTextFromNode(e)
             # also store DC metadata for convenience              docinfo['bib'] = bibinfo
             dc = self.metadataService.getDCMappedData(bib)          
             docinfo['creator'] = dc.get('creator',None)          # extract some fields (author, title, year) according to their mapping
             docinfo['title'] = dc.get('title',None)          metaData=self.metadata.main.meta.bib
             docinfo['date'] = dc.get('date',None)          bibtype=dom.xpath("//bib/@type")
           if bibtype and (len(bibtype)>0):
               bibtype=bibtype[0].value
         else:          else:
             logging.error("MetadataService not found!")              bibtype="generic"
               
           bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
           docinfo['bib_type'] = bibtype
           bibmap=metaData.generateMappingForType(bibtype)
           #logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
           #logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
           # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
           if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
               try:
                   docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
               except: pass
               try:
                   docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
               except: pass
               try:
                   docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
               except: pass
               #logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
               try:
                   docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
               except:
                   docinfo['lang']=''
               try:
                   docinfo['city']=getTextFromNode(dom.xpath("//bib/city")[0])
               except:
                   docinfo['city']=''
               try:
                   docinfo['number_of_pages']=getTextFromNode(dom.xpath("//bib/number_of_pages")[0])
               except:
                   docinfo['number_of_pages']=''
               try:
                   docinfo['series_volume']=getTextFromNode(dom.xpath("//bib/series_volume")[0])
               except:
                   docinfo['series_volume']=''
               try:
                   docinfo['number_of_volumes']=getTextFromNode(dom.xpath("//bib/number_of_volumes")[0])
               except:
                   docinfo['number_of_volumes']=''
               try:
                   docinfo['translator']=getTextFromNode(dom.xpath("//bib/translator")[0])
               except:
                   docinfo['translator']=''
               try:
                   docinfo['edition']=getTextFromNode(dom.xpath("//bib/edition")[0])
               except:
                   docinfo['edition']=''
               try:
                   docinfo['series_author']=getTextFromNode(dom.xpath("//bib/series_author")[0])
               except:
                   docinfo['series_author']=''
               try:
                   docinfo['publisher']=getTextFromNode(dom.xpath("//bib/publisher")[0])
               except:
                   docinfo['publisher']=''
               try:
                   docinfo['series_title']=getTextFromNode(dom.xpath("//bib/series_title")[0])
               except:
                   docinfo['series_title']=''
               try:
                   docinfo['isbn_issn']=getTextFromNode(dom.xpath("//bib/isbn_issn")[0])
               except:
                   docinfo['isbn_issn']=''           
           #logging.debug("I NEED BIBTEX %s"%docinfo)
         return docinfo          return docinfo
           
           
     # TODO: is this needed?  
     def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):      def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
         """gets name info from the index.meta file at path or given by dom"""          """gets name info from the index.meta file at path or given by dom"""
         if docinfo is None:          if docinfo is None:
Line 592  class documentViewer(Folder): Line 696  class documentViewer(Folder):
                   
         if dom is None:          if dom is None:
             for x in range(cut):              for x in range(cut):
                 path=getParentPath(path)                  path=getParentDir(path)
             dom = self.getDomFromIndexMeta(path)              dom = self.getDomFromIndexMeta(path)
   
         docinfo['name']=getText(dom.find("name"))          docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
         logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])          logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
         return docinfo          return docinfo
   
       
     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):      def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
         """parse texttool tag in index meta"""          """parse texttool tag in index meta"""
         logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))          logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
Line 610  class documentViewer(Folder): Line 713  class documentViewer(Folder):
         if dom is None:          if dom is None:
             dom = self.getDomFromIndexMeta(url)              dom = self.getDomFromIndexMeta(url)
                           
         texttool = self.metadata.getTexttoolData(dom=dom)  
           
         archivePath = None          archivePath = None
         archiveName = None          archiveName = None
           
         archiveName = getText(dom.find("name"))          archiveNames = dom.xpath("//resource/name")
         if not archiveName:          if archiveNames and (len(archiveNames) > 0):
               archiveName = getTextFromNode(archiveNames[0])
           else:
             logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))              logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
                   
         archivePath = getText(dom.find("archive-path"))          archivePaths = dom.xpath("//resource/archive-path")
         if archivePath:          if archivePaths and (len(archivePaths) > 0):
               archivePath = getTextFromNode(archivePaths[0])
             # clean up archive path              # clean up archive path
             if archivePath[0] != '/':              if archivePath[0] != '/':
                 archivePath = '/' + archivePath                  archivePath = '/' + archivePath
Line 636  class documentViewer(Folder): Line 740  class documentViewer(Folder):
             # we balk without archive-path              # we balk without archive-path
             raise IOError("Missing archive-path (for text-tool) in %s" % (url))              raise IOError("Missing archive-path (for text-tool) in %s" % (url))
                   
         imageDir = texttool.get('image', None)          imageDirs = dom.xpath("//texttool/image")
           if imageDirs and (len(imageDirs) > 0):
               imageDir = getTextFromNode(imageDirs[0])
                           
         if not imageDir:          else:
             # we balk with no image tag / not necessary anymore because textmode is now standard              # we balk with no image tag / not necessary anymore because textmode is now standard
             #raise IOError("No text-tool info in %s"%(url))              #raise IOError("No text-tool info in %s"%(url))
             imageDir = ""              imageDir = ""
Line 655  class documentViewer(Folder): Line 761  class documentViewer(Folder):
                           
             docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir              docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
                           
         viewerUrl = texttool.get('digiliburlprefix', None)          viewerUrls = dom.xpath("//texttool/digiliburlprefix")
         if viewerUrl:          if viewerUrls and (len(viewerUrls) > 0):
               viewerUrl = getTextFromNode(viewerUrls[0])
             docinfo['viewerURL'] = viewerUrl              docinfo['viewerURL'] = viewerUrl
                   
         # old style text URL          # old style text URL
         textUrl = texttool.get('text', None)          textUrls = dom.xpath("//texttool/text")
         if textUrl:          if textUrls and (len(textUrls) > 0):
               textUrl = getTextFromNode(textUrls[0])
             if urlparse.urlparse(textUrl)[0] == "": #keine url              if urlparse.urlparse(textUrl)[0] == "": #keine url
                 textUrl = os.path.join(archivePath, textUrl)                   textUrl = os.path.join(archivePath, textUrl) 
             # fix URLs starting with /mpiwg/online              # fix URLs starting with /mpiwg/online
Line 670  class documentViewer(Folder): Line 778  class documentViewer(Folder):
                           
             docinfo['textURL'] = textUrl              docinfo['textURL'] = textUrl
           
   
               #TODO: hack-DW for annalen
               if (textUrl is not None) and (textUrl.startswith("/permanent/einstein/annalen")):
                   textUrl=textUrl.replace("/permanent/einstein/annalen/","/diverse/de/") 
                   splitted=textUrl.split("/fulltext")
                   textUrl=splitted[0]+".xml"
                   textUrlkurz = string.split(textUrl, ".")[0]
                   docinfo['textURLPathkurz'] = textUrlkurz
                   docinfo['textURLPath'] = textUrl
                   logging.debug("hack")
                   logging.debug(textUrl)
               
               
         # new style text-url-path          # new style text-url-path
         textUrl = texttool.get('text-url-path', None)          textUrls = dom.xpath("//texttool/text-url-path")
         if textUrl:          if textUrls and (len(textUrls) > 0):
               textUrl = getTextFromNode(textUrls[0])
             docinfo['textURLPath'] = textUrl              docinfo['textURLPath'] = textUrl
             textUrlkurz = string.split(textUrl, ".")[0]              textUrlkurz = string.split(textUrl, ".")[0]
             docinfo['textURLPathkurz'] = textUrlkurz              docinfo['textURLPathkurz'] = textUrlkurz
Line 680  class documentViewer(Folder): Line 802  class documentViewer(Folder):
                 # text-only, no page images                  # text-only, no page images
                 #docinfo = self.getNumTextPages(docinfo)                  #docinfo = self.getNumTextPages(docinfo)
                                       
         # get bib info           
           presentationUrls = dom.xpath("//texttool/presentation")
         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag          docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
         # TODO: is this needed here?          #docinfo = self.getDownloadfromDocinfoToBibtex(url, docinfo=docinfo, dom=dom)
         docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)          docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
                   
         # TODO: what to do with presentation?          
         presentationUrl = texttool.get('presentation', None)          if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen 
         if presentationUrl: # ueberschreibe diese durch presentation informationen   
              # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten               # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
              # durch den relativen Pfad auf die presentation infos               # durch den relativen Pfad auf die presentation infos
             presentationPath = presentationUrl              presentationPath = getTextFromNode(presentationUrls[0])
             if url.endswith("index.meta"):               if url.endswith("index.meta"): 
                 presentationUrl = url.replace('index.meta', presentationPath)                  presentationUrl = url.replace('index.meta', presentationPath)
             else:              else:
Line 698  class documentViewer(Folder): Line 820  class documentViewer(Folder):
                                   
             docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)              docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
           
         # get authorization  
         docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info          docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info
                   
         return docinfo          return docinfo
Line 708  class documentViewer(Folder): Line 829  class documentViewer(Folder):
         """gets the bibliographical information from the preseantion entry in texttools          """gets the bibliographical information from the preseantion entry in texttools
         """          """
         dom=self.getPresentationInfoXML(url)          dom=self.getPresentationInfoXML(url)
         docinfo['author']=getText(dom.find(".//author"))          try:
         docinfo['title']=getText(dom.find(".//title"))              docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
         docinfo['year']=getText(dom.find(".//date"))          except:
               pass
           try:
               docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
           except:
               pass
           try:
               docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
           except:
               pass
         return docinfo          return docinfo
           
     def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):      def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
Line 724  class documentViewer(Folder): Line 854  class documentViewer(Folder):
                   
         pathorig=path          pathorig=path
         for x in range(cut):                 for x in range(cut):       
                 path=getParentPath(path)                  path=getParentDir(path)
         logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)          logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path          imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
         docinfo['imageURL'] = imageUrl          docinfo['imageURL'] = imageUrl
                   
         #TODO: use getDocinfoFromIndexMeta  
         #path ist the path to the images it assumes that the index.meta file is one level higher.          #path ist the path to the images it assumes that the index.meta file is one level higher.
         docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)          docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
           #docinfo = self.getDownloadfromDocinfoToBibtex(pathorig,docinfo=docinfo,cut=cut+1)
         docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)          docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
         return docinfo          return docinfo
           
           
     def OLDgetDocinfo(self, mode, url):      def getDocinfo(self, mode, url):
         """returns docinfo depending on mode"""          """returns docinfo depending on mode"""
         logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))          logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
         # look for cached docinfo in session          # look for cached docinfo in session
Line 744  class documentViewer(Folder): Line 874  class documentViewer(Folder):
             docinfo = self.REQUEST.SESSION['docinfo']              docinfo = self.REQUEST.SESSION['docinfo']
             # check if its still current              # check if its still current
             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:              if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
                 logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())                  logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
                 return docinfo                  return docinfo
               
         # new docinfo          # new docinfo
         docinfo = {'mode': mode, 'url': url}          docinfo = {'mode': mode, 'url': url}
         # add self url          if mode=="texttool": #index.meta with texttool information
         docinfo['viewerUrl'] = self.getDocumentViewerURL()  
         if mode=="texttool":   
             # index.meta with texttool information  
             docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)              docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
         elif mode=="imagepath":          elif mode=="imagepath":
             # folder with images, index.meta optional  
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)              docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
         elif mode=="filepath":          elif mode=="filepath":
             # filename  
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)              docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
         else:          else:
             logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)              logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))              raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                                   
         logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())          # FIXME: fake texturlpath 
         #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)          if not docinfo.has_key('textURLPath'):
         # store in session              docinfo['textURLPath'] = None
         self.REQUEST.SESSION['docinfo'] = docinfo  
         return docinfo  
   
           logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
     def getDocinfo(self, mode, url):          #logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
         """returns docinfo depending on mode"""  
         logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))  
         # look for cached docinfo in session  
         if self.REQUEST.SESSION.has_key('docinfo'):  
             docinfo = self.REQUEST.SESSION['docinfo']  
             # check if its still current  
             if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:  
                 logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())  
                 return docinfo  
               
         # new docinfo  
         docinfo = {'mode': mode, 'url': url}  
         # add self url  
         docinfo['viewerUrl'] = self.getDocumentViewerURL()  
         # get index.meta DOM  
         docUrl = None  
         metaDom = None  
         if mode=="texttool":   
             # url points to document dir or index.meta  
             metaDom = self.metadataService.getDomFromPathOrUrl(url)  
             docUrl = url.replace('/index.meta', '')  
             if metaDom is None:  
                 raise IOError("Unable to find index.meta for mode=texttool!")  
   
         elif mode=="imagepath":  
             # url points to folder with images, index.meta optional  
             # asssume index.meta in parent dir  
             docUrl = getParentPath(url)  
             metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)  
   
         elif mode=="filepath":  
             # url points to image file, index.meta optional  
             # asssume index.meta is two path segments up  
             docUrl = getParentPath(url, 2)  
             metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)  
   
         else:  
             logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)  
             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))  
           
         docinfo['documentUrl'] = docUrl  
         # process index.meta contents  
         if metaDom is not None:  
             # document directory name and path  
             resource = self.metadataService.getResourceData(dom=metaDom)  
             if resource:  
                 docinfo = self.getDocinfoFromResource(docinfo, resource)  
   
             # texttool info  
             texttool = self.metadataService.getTexttoolData(dom=metaDom)  
             if texttool:  
                 docinfo = self.getDocinfoFromTexttool(docinfo, texttool)  
               
             # bib info  
             bib = self.metadataService.getBibData(dom=metaDom)  
             if bib:  
                 docinfo = self.getDocinfoFromBib(docinfo, bib)  
                   
             # auth info  
             access = self.metadataService.getAccessData(dom=metaDom)  
             if access:  
                 docinfo = self.getDocinfoFromAccess(docinfo, access)  
   
         # image path  
         if mode != 'texttool':  
             # override image path from texttool  
             docinfo['imagePath'] = url  
   
         # number of images from digilib  
         if docinfo.get('imagePath', None):  
             docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']  
             docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])  
   
         logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())  
         #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)  
         # store in session  
         self.REQUEST.SESSION['docinfo'] = docinfo          self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo          return docinfo
   
     def getDocinfoFromResource(self, docinfo, resource):  
         """reads contents of resource element into docinfo"""  
         docName = resource.get('name', None)  
         docinfo['documentName'] = docName  
         docPath = resource.get('archive-path', None)  
         if docPath:  
             # clean up document path  
             if docPath[0] != '/':  
                 docPath = '/' + docPath  
                   
             if docName and (not docPath.endswith(docName)):  
                 docPath += "/" + docName  
               
         else:  
             # use docUrl as docPath  
             docUrl = docinfo['documentURL']  
             if not docUrl.startswith('http:'):  
                 docPath = docUrl  
                   
         docinfo['documentPath'] = docPath  
         return docinfo  
   
     def getDocinfoFromTexttool(self, docinfo, texttool):  
         """reads contents of texttool element into docinfo"""  
         # image dir  
         imageDir = texttool.get('image', None)  
         docPath = docinfo.get('documentPath', None)  
         if imageDir and docPath:  
             #print "image: ", imageDir, " archivepath: ", archivePath  
             imageDir = os.path.join(docPath, imageDir)  
             imageDir = imageDir.replace('/mpiwg/online', '', 1)  
             docinfo['imagePath'] = imageDir  
           
         # old style text URL  
         textUrl = texttool.get('text', None)  
         if textUrl and docPath:  
             if urlparse.urlparse(textUrl)[0] == "": #keine url  
                 textUrl = os.path.join(docPath, textUrl)   
                 # fix URLs starting with /mpiwg/online  
                 textUrl = textUrl.replace('/mpiwg/online', '', 1)  
               
             docinfo['textURL'] = textUrl  
       
         # new style text-url-path  
         textUrl = texttool.get('text-url-path', None)  
         if textUrl:  
             docinfo['textURLPath'] = textUrl  
             #TODO: ugly:  
             #textUrlkurz = string.split(textUrl, ".")[0]  
             #docinfo['textURLPathkurz'] = textUrlkurz  
               
         # old presentation stuff  
         presentation = texttool.get('presentation', None)  
         if presentation and docPath:  
             docinfo['presentationPath'] = os.path.join(docPath, presentation)  
               
         return docinfo  
   
     def getDocinfoFromBib(self, docinfo, bib):  
         """reads contents of bib element into docinfo"""  
         # put all raw bib fields in dict "bib"  
         docinfo['bib'] = bib  
         bibtype = bib.get('@type', None)  
         docinfo['bibType'] = bibtype  
         # also store DC metadata for convenience  
         dc = self.metadataService.getDCMappedData(bib)  
         docinfo['creator'] = dc.get('creator',None)  
         docinfo['title'] = dc.get('title',None)  
         docinfo['date'] = dc.get('date',None)  
         return docinfo  
               
     def getDocinfoFromAccess(self, docinfo, acc):  
         """reads contents of access element into docinfo"""  
         #TODO: also read resource type  
         try:  
             acctype = accc['@attr']['type']  
             if acctype:  
                 access=acctype  
                 if access in ['group', 'institution']:  
                     access = acc['name'].lower()  
                   
                 docinfo['accessType'] = access  
   
         except:  
             pass  
           
         return docinfo  
   
     def getDocinfoFromDigilib(self, docinfo, path):  
         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path  
         # fetch data  
         txt = getHttpData(infoUrl)  
         if not txt:  
             logging.error("Unable to get dir-info from %s"%(infoUrl))  
             return docinfo  
   
         dom = ET.fromstring(txt)  
         size = getText(dom.find("size"))  
         logging.debug("getDocinfoFromDigilib: size=%s"%size)  
         if size:  
             docinfo['numPages'] = int(size)  
         else:  
             docinfo['numPages'] = 0  
               
         # TODO: produce and keep list of image names and numbers  
         return docinfo  
               
               
     def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):      def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
         """returns pageinfo with the given parameters"""          """returns pageinfo with the given parameters"""
         pageinfo = {}          pageinfo = {}
Line 971  class documentViewer(Folder): Line 909  class documentViewer(Folder):
         pageinfo['cols'] = cols          pageinfo['cols'] = cols
         grpsize = cols * rows          grpsize = cols * rows
         pageinfo['groupsize'] = grpsize          pageinfo['groupsize'] = grpsize
         # what does this do?  
         start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))          start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
         # int(current / grpsize) * grpsize +1))          # int(current / grpsize) * grpsize +1))
         pageinfo['start'] = start          pageinfo['start'] = start
Line 982  class documentViewer(Folder): Line 919  class documentViewer(Folder):
             pageinfo['numgroups'] = int(np / grpsize)              pageinfo['numgroups'] = int(np / grpsize)
             if np % grpsize > 0:              if np % grpsize > 0:
                 pageinfo['numgroups'] += 1                  pageinfo['numgroups'] += 1
                   
         pageinfo['viewMode'] = viewMode          pageinfo['viewMode'] = viewMode
         pageinfo['tocMode'] = tocMode          pageinfo['tocMode'] = tocMode
         pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')          pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
Line 992  class documentViewer(Folder): Line 928  class documentViewer(Folder):
         pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')          pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
         pageinfo['textPN'] = self.REQUEST.get('textPN','1')          pageinfo['textPN'] = self.REQUEST.get('textPN','1')
         pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')          pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
           
           pageinfo ['highlightElementPos'] = self.REQUEST.get('highlightElementPos','')
           pageinfo ['highlightElement'] = self.REQUEST.get('highlightElement','')
           
           pageinfo ['xpointer'] = self.REQUEST.get('xpointer','')
           
         pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')          pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
         pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')          pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
         pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')          pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
         # WTF?:  
         toc = int(pageinfo['tocPN'])          toc = int(pageinfo['tocPN'])
         pageinfo['textPages'] =int(toc)          pageinfo['textPages'] =int(toc)
                   
         # What does this do?  
         if 'tocSize_%s'%tocMode in docinfo:          if 'tocSize_%s'%tocMode in docinfo:
             tocSize = int(docinfo['tocSize_%s'%tocMode])              tocSize = int(docinfo['tocSize_%s'%tocMode])
             tocPageSize = int(pageinfo['tocPageSize'])              tocPageSize = int(pageinfo['tocPageSize'])
Line 1008  class documentViewer(Folder): Line 948  class documentViewer(Folder):
                 tocPages=tocSize/tocPageSize+1                  tocPages=tocSize/tocPageSize+1
             else:              else:
                 tocPages=tocSize/tocPageSize                  tocPages=tocSize/tocPageSize
                   
             pageinfo['tocPN'] = min(tocPages,toc)              pageinfo['tocPN'] = min(tocPages,toc)
               
         pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')          pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
         pageinfo['sn'] =self.REQUEST.get('sn','')          #pageinfo['sn'] =self.REQUEST.get('sn','')
           pageinfo['s'] =self.REQUEST.get('s','')
         return pageinfo          return pageinfo
   
   
     security.declareProtected('View management screens','changeDocumentViewerForm')      
     changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())  
       
     def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):      def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
         """init document viewer"""          """init document viewer"""
         self.title=title          self.title=title
Line 1026  class documentViewer(Folder): Line 961  class documentViewer(Folder):
         self.thumbrows = thumbrows          self.thumbrows = thumbrows
         self.thumbcols = thumbcols          self.thumbcols = thumbcols
         self.authgroups = [s.strip().lower() for s in authgroups.split(',')]          self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
         try:  
             # assume MetaDataFolder instance is called metadata   
             self.metadataService = getattr(self, 'metadata')  
         except Exception, e:  
             logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))  
   
         if RESPONSE is not None:          if RESPONSE is not None:
             RESPONSE.redirect('manage_main')              RESPONSE.redirect('manage_main')
                   

Removed from v.1.175.2.11  
changed lines
  Added in v.1.178


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>