Diff for /documentViewer/documentViewer.py between versions 1.73 and 1.134

version 1.73, 2010/06/18 14:08:46 version 1.134, 2010/10/18 10:14:08
Line 5  from Products.PageTemplates.PageTemplate Line 5  from Products.PageTemplates.PageTemplate
 from AccessControl import ClassSecurityInfo  from AccessControl import ClassSecurityInfo
 from AccessControl import getSecurityManager  from AccessControl import getSecurityManager
 from Globals import package_home  from Globals import package_home
   from Products.zogiLib.zogiLib import browserCheck
   
 from Ft.Xml import EMPTY_NAMESPACE, Parse  from Ft.Xml import EMPTY_NAMESPACE, Parse
 import Ft.Xml.Domlette  import Ft.Xml.Domlette
Line 16  import logging Line 17  import logging
 import math  import math
 import urlparse   import urlparse 
 import cStringIO  import cStringIO
   import re
   
   
 def logger(txt,method,txt2):  def logger(txt,method,txt2):
     """logging"""      """logging"""
Line 43  def getTextFromNode(nodename): Line 46  def getTextFromNode(nodename):
 def serializeNode(node, encoding='utf-8'):  def serializeNode(node, encoding='utf-8'):
     """returns a string containing node as XML"""      """returns a string containing node as XML"""
     buf = cStringIO.StringIO()      buf = cStringIO.StringIO()
     Print(node, stream=buf, encoding=encoding)      Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
     s = buf.getvalue()      s = buf.getvalue()
     buf.close()      buf.close()
     return s      return s
   
   def getBrowserType(self):
           """get browser type object"""
           if self.REQUEST.SESSION.has_key('browserType'):
               return self.REQUEST.SESSION['browserType']
           else:
               bt = browserCheck(self)
               self.REQUEST.SESSION.set('browserType', bt)    
               logging.debug("documentViewer (BROWSER TYPE) bt %s"%bt)                    
               return bt
   
                   
 def getParentDir(path):  def getParentDir(path):
     """returns pathname shortened by one"""      """returns pathname shortened by one"""
Line 119  class documentViewer(Folder): Line 132  class documentViewer(Folder):
     page_main_images = PageTemplateFile('zpt/page_main_images', globals())      page_main_images = PageTemplateFile('zpt/page_main_images', globals())
     page_main_text = PageTemplateFile('zpt/page_main_text', globals())      page_main_text = PageTemplateFile('zpt/page_main_text', globals())
     page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())      page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
       page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
     page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())      page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
     head_main = PageTemplateFile('zpt/head_main', globals())      head_main = PageTemplateFile('zpt/head_main', globals())
     docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())      docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
Line 172  class documentViewer(Folder): Line 186  class documentViewer(Folder):
         """get search"""          """get search"""
         return self.template.fulltextclient.getSearch(**args)          return self.template.fulltextclient.getSearch(**args)
   
       def getGisPlaces(self, **args):
           """get gis places"""
           return self.template.fulltextclient.getGisPlaces(**args)
    
       def getAllGisPlaces(self, **args):
           """get all gis places """
           return self.template.fulltextclient.getAllGisPlaces(**args)
       
       def getOrigPages(self, **args):
           """get original page number """
           return self.template.fulltextclient.getOrigPages(**args)
       
     def getNumPages(self, docinfo):      def getNumPages(self, docinfo):
         """get numpages"""          """get numpages"""
         return self.template.fulltextclient.getNumPages(docinfo)          return self.template.fulltextclient.getNumPages(docinfo)
   
       def getNumTextPages(self, docinfo):
           """get numpages text"""
           return self.template.fulltextclient.getNumTextPages(docinfo)
      
     def getTranslate(self, **args):      def getTranslate(self, **args):
         """get translate"""          """get translate"""
         return self.template.fulltextclient.getTranslate(**args)          return self.template.fulltextclient.getTranslate(**args)
Line 213  class documentViewer(Folder): Line 243  class documentViewer(Folder):
             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"              self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
                           
         docinfo = self.getDocinfo(mode=mode,url=url)          docinfo = self.getDocinfo(mode=mode,url=url)
         pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)          #pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
           pageinfo = self.getPageinfo(start=start,current=pn, originalPage=originalPage, docinfo=docinfo)
           ''' ZDES '''
         pt = getattr(self.template, 'thumbs_main_rss')          pt = getattr(self.template, 'thumbs_main_rss')
                   
         if viewMode=="auto": # automodus gewaehlt          if viewMode=="auto": # automodus gewaehlt
Line 225  class documentViewer(Folder): Line 257  class documentViewer(Folder):
         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)          return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
       
     security.declareProtected('View','index_html')      security.declareProtected('View','index_html')
     def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None):      def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization="",originalPage=None):
         '''          '''
         view it          view it
         @param mode: defines how to access the document behind url           @param mode: defines how to access the document behind url 
         @param url: url which contains display information          @param url: url which contains display information
         @param viewMode: if images display images, if text display text, default is auto (text,images or auto)          @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
         @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)          @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
           @param characterNormalization type of text display (reg, norm, none)
         @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)          @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
         '''          '''
                   
Line 257  class documentViewer(Folder): Line 290  class documentViewer(Folder):
             else:              else:
                 viewMode="images"                  viewMode="images"
                                   
         pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)          pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode, originalPage=originalPage)
                   
         pt = getattr(self.template, 'viewer_main')                         pt = getattr(self.template, 'viewer_main')               
         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))          return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
Line 305  class documentViewer(Folder): Line 338  class documentViewer(Folder):
                 params["url"] = getParentDir(params["url"])                  params["url"] = getParentDir(params["url"])
                                   
         # quote values and assemble into query string          # quote values and assemble into query string
         ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])          #ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
           ps = urllib.urlencode(params)
         url=self.REQUEST['URL1']+"?"+ps          url=self.REQUEST['URL1']+"?"+ps
         return url          return url
   
Line 346  class documentViewer(Folder): Line 380  class documentViewer(Folder):
         elif access is None or access in self.authgroups:          elif access is None or access in self.authgroups:
             # only local access -- only logged in users              # only local access -- only logged in users
             user = getSecurityManager().getUser()              user = getSecurityManager().getUser()
               logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
             if user is not None:              if user is not None:
                 #print "user: ", user                  #print "user: ", user
                 return (user.getUserName() != "Anonymous User")                  return (user.getUserName() != "Anonymous User")
             else:              else:
                 return False                  return False
                   
         logging.debug("documentViewer (accessOK) unknown access type %s"%access)          logging.error("documentViewer (accessOK) unknown access type %s"%access)
         return False          return False
           
                                   
Line 386  class documentViewer(Folder): Line 421  class documentViewer(Folder):
                                                   
         return docinfo          return docinfo
           
       def getIndexMetaPath(self,url):
           """gib nur den Pfad zurueck"""
           regexp = re.compile(r".*(experimental|permanent)/(.*)")
           regpath = regexp.match(url)
           if (regpath==None):
               return ""
           logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))            
           return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
        
       
       
       def getIndexMetaUrl(self,url):
           """returns utr  of index.meta document at url"""
                           
     def getIndexMeta(self, url):  
         """returns dom of index.meta document at url"""  
         dom = None  
         metaUrl = None          metaUrl = None
         if url.startswith("http://"):          if url.startswith("http://"):
             # real URL              # real URL
Line 401  class documentViewer(Folder): Line 446  class documentViewer(Folder):
             if not metaUrl.endswith("index.meta"):              if not metaUrl.endswith("index.meta"):
                 metaUrl += "/index.meta"                  metaUrl += "/index.meta"
                                   
         logging.debug("(getIndexMeta): METAURL: %s"%metaUrl)          return metaUrl
       
       def getDomFromIndexMeta(self, url):
           """get dom from index meta"""
           dom = None
           metaUrl = self.getIndexMetaUrl(url)
                   
           logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
         txt=getHttpData(metaUrl)          txt=getHttpData(metaUrl)
         if txt is None:          if txt is None:
             raise IOError("Unable to read index meta from %s"%(url))              raise IOError("Unable to read index meta from %s"%(url))
Line 441  class documentViewer(Folder): Line 493  class documentViewer(Folder):
         if dom is None:          if dom is None:
             for x in range(cut):              for x in range(cut):
                 path=getParentDir(path)                  path=getParentDir(path)
             dom = self.getIndexMeta(path)              dom = self.getDomFromIndexMeta(path)
                 
         acctype = dom.xpath("//access-conditions/access/@type")          acctype = dom.xpath("//access-conditions/access/@type")
         if acctype and (len(acctype)>0):          if acctype and (len(acctype)>0):
Line 463  class documentViewer(Folder): Line 515  class documentViewer(Folder):
         if dom is None:          if dom is None:
             for x in range(cut):              for x in range(cut):
                 path=getParentDir(path)                  path=getParentDir(path)
             dom = self.getIndexMeta(path)              dom = self.getDomFromIndexMeta(path)
           
           docinfo['indexMetaPath']=self.getIndexMetaPath(path);
                   
         logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))          logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
         # put in all raw bib fields as dict "bib"          # put in all raw bib fields as dict "bib"
Line 485  class documentViewer(Folder): Line 539  class documentViewer(Folder):
         bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)          bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
         docinfo['bib_type'] = bibtype          docinfo['bib_type'] = bibtype
         bibmap=metaData.generateMappingForType(bibtype)          bibmap=metaData.generateMappingForType(bibtype)
           logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
           logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
         # if there is no mapping bibmap is empty (mapping sometimes has empty fields)          # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
         if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:          if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
             try:              try:
Line 505  class documentViewer(Folder): Line 561  class documentViewer(Folder):
         return docinfo          return docinfo
           
           
       def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
           """gets name info from the index.meta file at path or given by dom"""
           if docinfo is None:
               docinfo = {}
           
           if dom is None:
               for x in range(cut):
                   path=getParentDir(path)
               dom = self.getDomFromIndexMeta(path)
   
           docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
           logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
           return docinfo
       
     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):      def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
         """parse texttool tag in index meta"""          """parse texttool tag in index meta"""
         logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))          logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
Line 513  class documentViewer(Folder): Line 583  class documentViewer(Folder):
         if docinfo.get('lang', None) is None:          if docinfo.get('lang', None) is None:
             docinfo['lang'] = '' # default keine Sprache gesetzt              docinfo['lang'] = '' # default keine Sprache gesetzt
         if dom is None:          if dom is None:
             dom = self.getIndexMeta(url)              dom = self.getDomFromIndexMeta(url)
                   
         archivePath = None          archivePath = None
         archiveName = None          archiveName = None
Line 587  class documentViewer(Folder): Line 657  class documentViewer(Folder):
             docinfo['textURLPath'] = textUrl              docinfo['textURLPath'] = textUrl
             if not docinfo['imagePath']:              if not docinfo['imagePath']:
                 # text-only, no page images                  # text-only, no page images
                 docinfo = self.getNumPages(docinfo)                  docinfo = self.getNumTextPages(docinfo)
                     
         presentationUrls = dom.xpath("//texttool/presentation")          presentationUrls = dom.xpath("//texttool/presentation")
         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag          docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
           docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
           #docinfo =self.getOrigPages(docinfo=docinfo)
                   
         if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen           if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen 
              # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten               # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
Line 671  class documentViewer(Folder): Line 743  class documentViewer(Folder):
             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))              raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                                                   
         logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)          logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
           logging.debug("originalPage: %s"%originalPage)
         self.REQUEST.SESSION['docinfo'] = docinfo          self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo          return docinfo
                                 
     def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):      def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization="", originalPage=None):
         """returns pageinfo with the given parameters"""          """returns pageinfo with the given parameters"""
         pageinfo = {}          pageinfo = {}
         current = getInt(current)          current = getInt(current)
       
           pageinfo ['originalPage'] = originalPage
         pageinfo['current'] = current          pageinfo['current'] = current
         rows = int(rows or self.thumbrows)          rows = int(rows or self.thumbrows)
         pageinfo['rows'] = rows          pageinfo['rows'] = rows
Line 697  class documentViewer(Folder): Line 772  class documentViewer(Folder):
                 pageinfo['numgroups'] += 1                          pageinfo['numgroups'] += 1        
         pageinfo['viewMode'] = viewMode          pageinfo['viewMode'] = viewMode
         pageinfo['tocMode'] = tocMode          pageinfo['tocMode'] = tocMode
           #pageinfo['characterNormalization'] =characterNormalization
           pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization',' ')
         pageinfo['query'] = self.REQUEST.get('query',' ')          pageinfo['query'] = self.REQUEST.get('query',' ')
         pageinfo['queryType'] = self.REQUEST.get('queryType',' ')          pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
         pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')          pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
Line 708  class documentViewer(Folder): Line 785  class documentViewer(Folder):
         toc = int (pageinfo['tocPN'])          toc = int (pageinfo['tocPN'])
         pageinfo['textPages'] =int (toc)          pageinfo['textPages'] =int (toc)
                   
           
           
         if 'tocSize_%s'%tocMode in docinfo:          if 'tocSize_%s'%tocMode in docinfo:
             tocSize = int(docinfo['tocSize_%s'%tocMode])              tocSize = int(docinfo['tocSize_%s'%tocMode])
             tocPageSize = int(pageinfo['tocPageSize'])              tocPageSize = int(pageinfo['tocPageSize'])

Removed from v.1.73  
changed lines
  Added in v.1.134


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>