Diff for /documentViewer/documentViewer.py between versions 1.69.2.5 and 1.134

version 1.69.2.5, 2010/06/16 18:35:59 version 1.134, 2010/10/18 10:14:08
Line 5  from Products.PageTemplates.PageTemplate Line 5  from Products.PageTemplates.PageTemplate
 from AccessControl import ClassSecurityInfo  from AccessControl import ClassSecurityInfo
 from AccessControl import getSecurityManager  from AccessControl import getSecurityManager
 from Globals import package_home  from Globals import package_home
   from Products.zogiLib.zogiLib import browserCheck
   
 from Ft.Xml import EMPTY_NAMESPACE, Parse  from Ft.Xml import EMPTY_NAMESPACE, Parse
   import Ft.Xml.Domlette
 import os.path  import os.path
 import sys  import sys
 import urllib  import urllib
 import urllib2  import urllib2
 import logging  import logging
 import math  import math
   
 import urlparse   import urlparse 
 from types import *  import cStringIO
   import re
   
   
 def logger(txt,method,txt2):  def logger(txt,method,txt2):
     """logging"""      """logging"""
Line 43  def getTextFromNode(nodename): Line 46  def getTextFromNode(nodename):
 def serializeNode(node, encoding='utf-8'):  def serializeNode(node, encoding='utf-8'):
     """returns a string containing node as XML"""      """returns a string containing node as XML"""
     buf = cStringIO.StringIO()      buf = cStringIO.StringIO()
     Print(node, stream=buf, encoding=encoding)      Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
     s = buf.getvalue()      s = buf.getvalue()
     buf.close()      buf.close()
     return s      return s
   
   def getBrowserType(self):
           """get browser type object"""
           if self.REQUEST.SESSION.has_key('browserType'):
               return self.REQUEST.SESSION['browserType']
           else:
               bt = browserCheck(self)
               self.REQUEST.SESSION.set('browserType', bt)    
               logging.debug("documentViewer (BROWSER TYPE) bt %s"%bt)                    
               return bt
   
                   
 def getParentDir(path):  def getParentDir(path):
     """returns pathname shortened by one"""      """returns pathname shortened by one"""
Line 72  def getHttpData(url, data=None, num_trie Line 85  def getHttpData(url, data=None, num_trie
             if sys.version_info < (2, 6):              if sys.version_info < (2, 6):
                 # set timeout on socket -- ugly :-(                  # set timeout on socket -- ugly :-(
                 import socket                  import socket
                 socket.setdefaulttimeout(timeout)                  socket.setdefaulttimeout(float(timeout))
                 response = urllib2.urlopen(url)                  response = urllib2.urlopen(url)
             else:              else:
                 response = urllib2.urlopen(url,timeout=float(timeout))                  response = urllib2.urlopen(url,timeout=float(timeout))
Line 119  class documentViewer(Folder): Line 132  class documentViewer(Folder):
     page_main_images = PageTemplateFile('zpt/page_main_images', globals())      page_main_images = PageTemplateFile('zpt/page_main_images', globals())
     page_main_text = PageTemplateFile('zpt/page_main_text', globals())      page_main_text = PageTemplateFile('zpt/page_main_text', globals())
     page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())      page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
       page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
     page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())      page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
     head_main = PageTemplateFile('zpt/head_main', globals())      head_main = PageTemplateFile('zpt/head_main', globals())
     docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())      docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
Line 145  class documentViewer(Folder): Line 159  class documentViewer(Folder):
         self._setObject('template',templateFolder) # old style          self._setObject('template',templateFolder) # old style
         try:          try:
             import MpdlXmlTextServer              import MpdlXmlTextServer
             textServer = MpdlXmlTextServer(id='fulltextclient')              textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
             #templateFolder['fulltextclient'] = xmlRpcClient              #templateFolder['fulltextclient'] = xmlRpcClient
             templateFolder._setObject('fulltextclient',textServer)              templateFolder._setObject('fulltextclient',textServer)
         except Exception, e:          except Exception, e:
Line 172  class documentViewer(Folder): Line 186  class documentViewer(Folder):
         """get search"""          """get search"""
         return self.template.fulltextclient.getSearch(**args)          return self.template.fulltextclient.getSearch(**args)
   
     def getNumPages(self, **args):      def getGisPlaces(self, **args):
           """get gis places"""
           return self.template.fulltextclient.getGisPlaces(**args)
    
       def getAllGisPlaces(self, **args):
           """get all gis places """
           return self.template.fulltextclient.getAllGisPlaces(**args)
       
       def getOrigPages(self, **args):
           """get original page number """
           return self.template.fulltextclient.getOrigPages(**args)
       
       def getNumPages(self, docinfo):
         """get numpages"""          """get numpages"""
         return self.template.fulltextclient.getNumPages(**args)          return self.template.fulltextclient.getNumPages(docinfo)
      
       def getNumTextPages(self, docinfo):
           """get numpages text"""
           return self.template.fulltextclient.getNumTextPages(docinfo)
   
     def getTranslate(self, **args):      def getTranslate(self, **args):
         """get translate"""          """get translate"""
Line 213  class documentViewer(Folder): Line 243  class documentViewer(Folder):
             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"              self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
                           
         docinfo = self.getDocinfo(mode=mode,url=url)          docinfo = self.getDocinfo(mode=mode,url=url)
         pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)          #pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
           pageinfo = self.getPageinfo(start=start,current=pn, originalPage=originalPage, docinfo=docinfo)
           ''' ZDES '''
         pt = getattr(self.template, 'thumbs_main_rss')          pt = getattr(self.template, 'thumbs_main_rss')
                   
         if viewMode=="auto": # automodus gewaehlt          if viewMode=="auto": # automodus gewaehlt
             if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert              if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
                 viewMode="text"                  viewMode="text"
             else:              else:
                 viewMode="images"                  viewMode="images"
Line 225  class documentViewer(Folder): Line 257  class documentViewer(Folder):
         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)          return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
       
     security.declareProtected('View','index_html')      security.declareProtected('View','index_html')
     def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None):      def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization="",originalPage=None):
         '''          '''
         view it          view it
         @param mode: defines how to access the document behind url           @param mode: defines how to access the document behind url 
         @param url: url which contains display information          @param url: url which contains display information
         @param viewMode: if images display images, if text display text, default is auto (text,images or auto)          @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
         @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)          @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
           @param characterNormalization type of text display (reg, norm, none)
         @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)          @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
         '''          '''
                   
Line 243  class documentViewer(Folder): Line 276  class documentViewer(Folder):
             return "ERROR: template folder missing!"              return "ERROR: template folder missing!"
                           
         if not getattr(self, 'digilibBaseUrl', None):          if not getattr(self, 'digilibBaseUrl', None):
             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"              self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
                           
         docinfo = self.getDocinfo(mode=mode,url=url)          docinfo = self.getDocinfo(mode=mode,url=url)
                   
           
         if tocMode != "thumbs":          if tocMode != "thumbs":
             # get table of contents              # get table of contents
             docinfo = self.getToc(mode=tocMode, docinfo=docinfo)              docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
                           
         if viewMode=="auto": # automodus gewaehlt          if viewMode=="auto": # automodus gewaehlt
             if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert              if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
                 viewMode="text_dict"                  viewMode="text_dict"
             else:              else:
                 viewMode="images"                  viewMode="images"
                                   
         pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)          pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode, originalPage=originalPage)
                   
         pt = getattr(self.template, 'viewer_main')                         pt = getattr(self.template, 'viewer_main')               
         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))          return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
Line 267  class documentViewer(Folder): Line 299  class documentViewer(Folder):
         ret=""          ret=""
         if mk is None:          if mk is None:
             return ""              return ""
         if type(mk) is not ListType:          if not isinstance(mk, list):
             mk=[mk]              mk=[mk]
         for m in mk:          for m in mk:
             ret+="mk=%s"%m              ret+="mk=%s"%m
Line 306  class documentViewer(Folder): Line 338  class documentViewer(Folder):
                 params["url"] = getParentDir(params["url"])                  params["url"] = getParentDir(params["url"])
                                   
         # quote values and assemble into query string          # quote values and assemble into query string
         ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])          #ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
           ps = urllib.urlencode(params)
         url=self.REQUEST['URL1']+"?"+ps          url=self.REQUEST['URL1']+"?"+ps
         return url          return url
   
Line 347  class documentViewer(Folder): Line 380  class documentViewer(Folder):
         elif access is None or access in self.authgroups:          elif access is None or access in self.authgroups:
             # only local access -- only logged in users              # only local access -- only logged in users
             user = getSecurityManager().getUser()              user = getSecurityManager().getUser()
               logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
             if user is not None:              if user is not None:
                 #print "user: ", user                  #print "user: ", user
                 return (user.getUserName() != "Anonymous User")                  return (user.getUserName() != "Anonymous User")
             else:              else:
                 return False                  return False
                   
         logging.debug("documentViewer (accessOK) unknown access type %s"%access)          logging.error("documentViewer (accessOK) unknown access type %s"%access)
         return False          return False
           
                                   
Line 387  class documentViewer(Folder): Line 421  class documentViewer(Folder):
                                                   
         return docinfo          return docinfo
           
       def getIndexMetaPath(self,url):
           """gib nur den Pfad zurueck"""
           regexp = re.compile(r".*(experimental|permanent)/(.*)")
           regpath = regexp.match(url)
           if (regpath==None):
               return ""
           logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))            
           return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
        
       
       
       def getIndexMetaUrl(self,url):
           """returns utr  of index.meta document at url"""
                           
     def getIndexMeta(self, url):  
         """returns dom of index.meta document at url"""  
         dom = None  
         metaUrl = None          metaUrl = None
         if url.startswith("http://"):          if url.startswith("http://"):
             # real URL              # real URL
Line 402  class documentViewer(Folder): Line 446  class documentViewer(Folder):
             if not metaUrl.endswith("index.meta"):              if not metaUrl.endswith("index.meta"):
                 metaUrl += "/index.meta"                  metaUrl += "/index.meta"
                                   
         logging.debug("(getIndexMeta): METAURL: %s"%metaUrl)          return metaUrl
       
       def getDomFromIndexMeta(self, url):
           """get dom from index meta"""
           dom = None
           metaUrl = self.getIndexMetaUrl(url)
                   
           logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
         txt=getHttpData(metaUrl)          txt=getHttpData(metaUrl)
         if txt is None:          if txt is None:
             raise IOError("Unable to read index meta from %s"%(url))              raise IOError("Unable to read index meta from %s"%(url))
Line 442  class documentViewer(Folder): Line 493  class documentViewer(Folder):
         if dom is None:          if dom is None:
             for x in range(cut):              for x in range(cut):
                 path=getParentDir(path)                  path=getParentDir(path)
             dom = self.getIndexMeta(path)              dom = self.getDomFromIndexMeta(path)
                 
         acctype = dom.xpath("//access-conditions/access/@type")          acctype = dom.xpath("//access-conditions/access/@type")
         if acctype and (len(acctype)>0):          if acctype and (len(acctype)>0):
Line 464  class documentViewer(Folder): Line 515  class documentViewer(Folder):
         if dom is None:          if dom is None:
             for x in range(cut):              for x in range(cut):
                 path=getParentDir(path)                  path=getParentDir(path)
             dom = self.getIndexMeta(path)              dom = self.getDomFromIndexMeta(path)
           
           docinfo['indexMetaPath']=self.getIndexMetaPath(path);
                   
         logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))          logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
         # put in all raw bib fields as dict "bib"          # put in all raw bib fields as dict "bib"
Line 486  class documentViewer(Folder): Line 539  class documentViewer(Folder):
         bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)          bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
         docinfo['bib_type'] = bibtype          docinfo['bib_type'] = bibtype
         bibmap=metaData.generateMappingForType(bibtype)          bibmap=metaData.generateMappingForType(bibtype)
           logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
           logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
         # if there is no mapping bibmap is empty (mapping sometimes has empty fields)          # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
         if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:          if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
             try:              try:
Line 506  class documentViewer(Folder): Line 561  class documentViewer(Folder):
         return docinfo          return docinfo
           
           
       def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
           """gets name info from the index.meta file at path or given by dom"""
           if docinfo is None:
               docinfo = {}
           
           if dom is None:
               for x in range(cut):
                   path=getParentDir(path)
               dom = self.getDomFromIndexMeta(path)
   
           docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
           logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
           return docinfo
       
     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):      def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
         """parse texttool tag in index meta"""          """parse texttool tag in index meta"""
         logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))          logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
Line 514  class documentViewer(Folder): Line 583  class documentViewer(Folder):
         if docinfo.get('lang', None) is None:          if docinfo.get('lang', None) is None:
             docinfo['lang'] = '' # default keine Sprache gesetzt              docinfo['lang'] = '' # default keine Sprache gesetzt
         if dom is None:          if dom is None:
             dom = self.getIndexMeta(url)              dom = self.getDomFromIndexMeta(url)
                   
         archivePath = None          archivePath = None
         archiveName = None          archiveName = None
Line 569  class documentViewer(Folder): Line 638  class documentViewer(Folder):
             viewerUrl = getTextFromNode(viewerUrls[0])              viewerUrl = getTextFromNode(viewerUrls[0])
             docinfo['viewerURL'] = viewerUrl              docinfo['viewerURL'] = viewerUrl
                                         
           # old style text URL
         textUrls = dom.xpath("//texttool/text")          textUrls = dom.xpath("//texttool/text")
         if textUrls and (len(textUrls) > 0):          if textUrls and (len(textUrls) > 0):
             textUrl = getTextFromNode(textUrls[0])              textUrl = getTextFromNode(textUrls[0])
Line 580  class documentViewer(Folder): Line 650  class documentViewer(Folder):
                           
             docinfo['textURL'] = textUrl              docinfo['textURL'] = textUrl
           
           # new style text-url-path
         textUrls = dom.xpath("//texttool/text-url-path")          textUrls = dom.xpath("//texttool/text-url-path")
         if textUrls and (len(textUrls) > 0):          if textUrls and (len(textUrls) > 0):
             textUrl = getTextFromNode(textUrls[0])              textUrl = getTextFromNode(textUrls[0])
             docinfo['textURLPath'] = textUrl              docinfo['textURLPath'] = textUrl
             if not docinfo['imagePath']:              if not docinfo['imagePath']:
                 # text-only, no page images                  # text-only, no page images
                 docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht                      docinfo = self.getNumTextPages(docinfo)
                     
         presentationUrls = dom.xpath("//texttool/presentation")          presentationUrls = dom.xpath("//texttool/presentation")
         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag          docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
           docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
           #docinfo =self.getOrigPages(docinfo=docinfo)
                   
         if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen           if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen 
              # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten               # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
Line 670  class documentViewer(Folder): Line 743  class documentViewer(Folder):
             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))              raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                                                   
         logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)          logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
           logging.debug("originalPage: %s"%originalPage)
         self.REQUEST.SESSION['docinfo'] = docinfo          self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo          return docinfo
                                 
     def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):      def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization="", originalPage=None):
         """returns pageinfo with the given parameters"""          """returns pageinfo with the given parameters"""
         pageinfo = {}          pageinfo = {}
         current = getInt(current)          current = getInt(current)
       
           pageinfo ['originalPage'] = originalPage
         pageinfo['current'] = current          pageinfo['current'] = current
         rows = int(rows or self.thumbrows)          rows = int(rows or self.thumbrows)
         pageinfo['rows'] = rows          pageinfo['rows'] = rows
Line 696  class documentViewer(Folder): Line 772  class documentViewer(Folder):
                 pageinfo['numgroups'] += 1                          pageinfo['numgroups'] += 1        
         pageinfo['viewMode'] = viewMode          pageinfo['viewMode'] = viewMode
         pageinfo['tocMode'] = tocMode          pageinfo['tocMode'] = tocMode
           #pageinfo['characterNormalization'] =characterNormalization
           pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization',' ')
         pageinfo['query'] = self.REQUEST.get('query',' ')          pageinfo['query'] = self.REQUEST.get('query',' ')
         pageinfo['queryType'] = self.REQUEST.get('queryType',' ')          pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
         pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')          pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
Line 707  class documentViewer(Folder): Line 785  class documentViewer(Folder):
         toc = int (pageinfo['tocPN'])          toc = int (pageinfo['tocPN'])
         pageinfo['textPages'] =int (toc)          pageinfo['textPages'] =int (toc)
                   
           
           
         if 'tocSize_%s'%tocMode in docinfo:          if 'tocSize_%s'%tocMode in docinfo:
             tocSize = int(docinfo['tocSize_%s'%tocMode])              tocSize = int(docinfo['tocSize_%s'%tocMode])
             tocPageSize = int(pageinfo['tocPageSize'])              tocPageSize = int(pageinfo['tocPageSize'])

Removed from v.1.69.2.5  
changed lines
  Added in v.1.134


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>