Diff for /documentViewer/documentViewer.py between versions 1.175.2.4 and 1.175.2.10

version 1.175.2.4, 2011/07/19 09:54:06 version 1.175.2.10, 2011/07/28 13:00:07
Line 1 Line 1
   
 from OFS.Folder import Folder  from OFS.Folder import Folder
 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate  from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
 from Products.PageTemplates.PageTemplateFile import PageTemplateFile   from Products.PageTemplates.PageTemplateFile import PageTemplateFile 
 from AccessControl import ClassSecurityInfo  from AccessControl import ClassSecurityInfo
 from AccessControl import getSecurityManager  from AccessControl import getSecurityManager
 from Globals import package_home  from Globals import package_home
 from Products.zogiLib.zogiLib import browserCheck  
   
 #from Ft.Xml import EMPTY_NAMESPACE, Parse   #from Ft.Xml import EMPTY_NAMESPACE, Parse 
 #import Ft.Xml.Domlette  #import Ft.Xml.Domlette
Line 15  import xml.etree.ElementTree as ET Line 13  import xml.etree.ElementTree as ET
 import os.path  import os.path
 import sys  import sys
 import urllib  import urllib
 import urllib2  
 import logging  import logging
 import math  import math
 import urlparse   import urlparse 
 import re  import re
 import string  import string
   
   from SrvTxtUtils import getInt, getText, getHttpData
   
 def logger(txt,method,txt2):  def logger(txt,method,txt2):
     """logging"""      """logging"""
     logging.info(txt+ txt2)      logging.info(txt+ txt2)
           
           
 def getInt(number, default=0):  
     """returns always an int (0 in case of problems)"""  
     try:  
         return int(number)  
     except:  
         return int(default)  
   
 def getText(node):  
     """get the cdata content of a node"""  
     if node is None:  
         return ""  
     # ET:  
     text = node.text or ""  
     for e in node:  
         text += gettext(e)  
         if e.tail:  
             text += e.tail  
   
     # 4Suite:  
     #nodelist=node.childNodes  
     #text = ""  
     #for n in nodelist:  
     #    if n.nodeType == node.TEXT_NODE:  
     #       text = text + n.data  
       
     return text  
   
 getTextFromNode = getText  
   
 def serializeNode(node, encoding="utf-8"):  def serializeNode(node, encoding="utf-8"):
     """returns a string containing node as XML"""      """returns a string containing node as XML"""
     s = ET.tostring(node)      s = ET.tostring(node)
Line 128  def browserCheck(self): Line 98  def browserCheck(self):
   
     return bt      return bt
   
          
 def getParentDir(path):  def getParentDir(path):
     """returns pathname shortened by one"""      """returns pathname shortened by one"""
     return '/'.join(path.split('/')[0:-1])      return '/'.join(path.split('/')[0:-1])
                   
   
 def getHttpData(url, data=None, num_tries=3, timeout=10):  
     """returns result from url+data HTTP request"""  
     # we do GET (by appending data to url)  
     if isinstance(data, str) or isinstance(data, unicode):  
         # if data is string then append  
         url = "%s?%s"%(url,data)  
     elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):  
         # urlencode  
         url = "%s?%s"%(url,urllib.urlencode(data))  
       
     response = None  
     errmsg = None  
     for cnt in range(num_tries):  
         try:  
             logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))  
             if sys.version_info < (2, 6):  
                 # set timeout on socket -- ugly :-(  
                 import socket  
                 socket.setdefaulttimeout(float(timeout))  
                 response = urllib2.urlopen(url)  
             else:  
                 response = urllib2.urlopen(url,timeout=float(timeout))  
             # check result?  
             break  
         except urllib2.HTTPError, e:  
             logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))  
             errmsg = str(e)  
             # stop trying  
             break  
         except urllib2.URLError, e:  
             logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))  
             errmsg = str(e)  
             # stop trying  
             #break  
   
     if response is not None:  
         data = response.read()  
         response.close()  
         return data  
       
     raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))  
     #return None  
   
 ##  ##
 ## documentViewer class  ## documentViewer class
 ##  ##
Line 189  class documentViewer(Folder): Line 115  class documentViewer(Folder):
         {'label':'main config','action':'changeDocumentViewerForm'},          {'label':'main config','action':'changeDocumentViewerForm'},
         )          )
   
       metadataService = None
       """MetaDataFolder instance"""
   
     # templates and forms      # templates and forms
     viewer_main = PageTemplateFile('zpt/viewer_main', globals())      viewer_main = PageTemplateFile('zpt/viewer_main', globals())
     toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())      toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
Line 207  class documentViewer(Folder): Line 136  class documentViewer(Folder):
           
           
     thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())      thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
     security.declareProtected('View management screens','changeDocumentViewerForm')      
     changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())  
   
           
     def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):      def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
Line 231  class documentViewer(Folder): Line 158  class documentViewer(Folder):
             templateFolder._setObject('fulltextclient',textServer)              templateFolder._setObject('fulltextclient',textServer)
         except Exception, e:          except Exception, e:
             logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))              logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
               
         try:          try:
             from Products.zogiLib.zogiLib import zogiLib              from Products.zogiLib.zogiLib import zogiLib
             zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")              zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
Line 239  class documentViewer(Folder): Line 167  class documentViewer(Folder):
         except Exception, e:          except Exception, e:
             logging.error("Unable to create zogiLib for zogilib: "+str(e))              logging.error("Unable to create zogiLib for zogilib: "+str(e))
                   
           try:
               # assume MetaDataFolder instance is called metadata 
               self.metadataService = getattr(self, 'metadata')
           except Exception, e:
               logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
               
                   
     # proxy text server methods to fulltextclient      # proxy text server methods to fulltextclient
     def getTextPage(self, **args):      def getTextPage(self, **args):
Line 304  class documentViewer(Folder): Line 238  class documentViewer(Folder):
                   
         '''          '''
         logging.debug("HHHHHHHHHHHHHH:load the rss")          logging.debug("HHHHHHHHHHHHHH:load the rss")
         logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
                   
         if not hasattr(self, 'template'):          if not hasattr(self, 'template'):
             # create template folder if it doesn't exist              # create template folder if it doesn't exist
Line 633  class documentViewer(Folder): Line 567  class documentViewer(Folder):
         docinfo['indexMetaPath']=self.getIndexMetaPath(path);          docinfo['indexMetaPath']=self.getIndexMetaPath(path);
                   
         logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))          logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
         # put in all raw bib fields as dict "bib"          if self.metadataService is not None:
         bib = dom.find(".//bib")              # put all raw bib fields in dict "bib"
         #bib = dom.xpath("//bib/*")              bib = self.metadataService.getBibData(dom=dom)
         if bib is not None:              docinfo['bib'] = bib
             bibinfo = {}              bibtype = bib.get('@type', None)
             for e in bib:  
                 bibinfo[e.tag] = getText(e)  
                   
             docinfo['bib'] = bibinfo  
           
         # extract some fields (author, title, year) according to their mapping  
         metaData=self.metadata.main.meta.bib  
         bibtype=bib.get("type")  
         #bibtype=dom.xpath("//bib/@type")  
         if not bibtype:  
             bibtype="generic"  
               
         bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)  
         docinfo['bib_type'] = bibtype          docinfo['bib_type'] = bibtype
         bibmap=metaData.generateMappingForType(bibtype)              # also store DC metadata for convenience
         logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))              dc = self.metadataService.getDCMappedData(bib)
         logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))              docinfo['creator'] = dc.get('creator',None)
         # if there is no mapping bibmap is empty (mapping sometimes has empty fields)              docinfo['title'] = dc.get('title',None)
         logging.debug("bibmap: %s"%repr(bibmap))              docinfo['date'] = dc.get('date',None)
         if len(bibmap) > 0 and bibmap.get('author',None) or bibmap.get('title',None):          else:
             try:              logging.error("MetadataService not found!")
                 docinfo['author']=getText(bib.find(bibmap['author'][0]))  
             except: pass  
             try:  
                 docinfo['title']=getText(bib.find(bibmap['title'][0]))  
             except: pass  
             try:  
                 docinfo['year']=getText(bib.find(bibmap['year'][0]))  
             except: pass  
               
             # ROC: why is this here?  
             #            logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)  
             #            try:  
             #                docinfo['lang']=getTextFromNode(dom.find(".//bib/lang")[0])  
             #            except:  
             #                docinfo['lang']=''  
             #            try:  
             #                docinfo['city']=getTextFromNode(dom.find(".//bib/city")[0])  
             #            except:  
             #                docinfo['city']=''  
             #            try:  
             #                docinfo['number_of_pages']=getTextFromNode(dom.find(".//bib/number_of_pages")[0])  
             #            except:  
             #                docinfo['number_of_pages']=''  
             #            try:  
             #                docinfo['series_volume']=getTextFromNode(dom.find(".//bib/series_volume")[0])  
             #            except:  
             #                docinfo['series_volume']=''  
             #            try:  
             #                docinfo['number_of_volumes']=getTextFromNode(dom.find(".//bib/number_of_volumes")[0])  
             #            except:  
             #                docinfo['number_of_volumes']=''  
             #            try:  
             #                docinfo['translator']=getTextFromNode(dom.find(".//bib/translator")[0])  
             #            except:  
             #                docinfo['translator']=''  
             #            try:  
             #                docinfo['edition']=getTextFromNode(dom.find(".//bib/edition")[0])  
             #            except:  
             #                docinfo['edition']=''  
             #            try:  
             #                docinfo['series_author']=getTextFromNode(dom.find(".//bib/series_author")[0])  
             #            except:  
             #                docinfo['series_author']=''  
             #            try:  
             #                docinfo['publisher']=getTextFromNode(dom.find(".//bib/publisher")[0])  
             #            except:  
             #                docinfo['publisher']=''  
             #            try:  
             #                docinfo['series_title']=getTextFromNode(dom.find(".//bib/series_title")[0])  
             #            except:  
             #                docinfo['series_title']=''  
             #            try:  
             #                docinfo['isbn_issn']=getTextFromNode(dom.find(".//bib/isbn_issn")[0])  
             #            except:  
             #                docinfo['isbn_issn']=''             
         return docinfo          return docinfo
           
           
Line 732  class documentViewer(Folder): Line 598  class documentViewer(Folder):
         logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])          logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
         return docinfo          return docinfo
           
       
     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):      def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
         """parse texttool tag in index meta"""          """parse texttool tag in index meta"""
         logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))          logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
Line 742  class documentViewer(Folder): Line 609  class documentViewer(Folder):
         if dom is None:          if dom is None:
             dom = self.getDomFromIndexMeta(url)              dom = self.getDomFromIndexMeta(url)
                   
           texttool = self.metadata.getTexttoolData(dom=dom)
           
         archivePath = None          archivePath = None
         archiveName = None          archiveName = None
           
Line 766  class documentViewer(Folder): Line 635  class documentViewer(Folder):
             # we balk without archive-path              # we balk without archive-path
             raise IOError("Missing archive-path (for text-tool) in %s" % (url))              raise IOError("Missing archive-path (for text-tool) in %s" % (url))
                   
         imageDir = getText(dom.find(".//texttool/image"))          imageDir = texttool.get('image', None)
                           
         if not imageDir:          if not imageDir:
             # we balk with no image tag / not necessary anymore because textmode is now standard              # we balk with no image tag / not necessary anymore because textmode is now standard
Line 785  class documentViewer(Folder): Line 654  class documentViewer(Folder):
                           
             docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir              docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
                           
         viewerUrl = getText(dom.find(".//texttool/digiliburlprefix"))          viewerUrl = texttool.get('digiliburlprefix', None)
         if viewerUrl:          if viewerUrl:
             docinfo['viewerURL'] = viewerUrl              docinfo['viewerURL'] = viewerUrl
                   
         # old style text URL          # old style text URL
         textUrl = getText(dom.find(".//texttool/text"))          textUrl = texttool.get('text', None)
         if textUrl:          if textUrl:
             if urlparse.urlparse(textUrl)[0] == "": #keine url              if urlparse.urlparse(textUrl)[0] == "": #keine url
                 textUrl = os.path.join(archivePath, textUrl)                   textUrl = os.path.join(archivePath, textUrl) 
Line 801  class documentViewer(Folder): Line 670  class documentViewer(Folder):
             docinfo['textURL'] = textUrl              docinfo['textURL'] = textUrl
           
         # new style text-url-path          # new style text-url-path
         textUrl = getText(dom.find(".//texttool/text-url-path"))          textUrl = texttool.get('text-url-path', None)
         if textUrl:          if textUrl:
             docinfo['textURLPath'] = textUrl              docinfo['textURLPath'] = textUrl
             textUrlkurz = string.split(textUrl, ".")[0]              textUrlkurz = string.split(textUrl, ".")[0]
Line 810  class documentViewer(Folder): Line 679  class documentViewer(Folder):
                 # text-only, no page images                  # text-only, no page images
                 #docinfo = self.getNumTextPages(docinfo)                  #docinfo = self.getNumTextPages(docinfo)
                                       
                    # get bib info
         presentationUrl = getText(dom.find(".//texttool/presentation"))  
         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag          docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
         # TODO: is this needed here?          # TODO: is this needed here?
         docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)          docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
                   
                   # TODO: what to do with presentation?
           presentationUrl = texttool.get('presentation', None)
         if presentationUrl: # ueberschreibe diese durch presentation informationen           if presentationUrl: # ueberschreibe diese durch presentation informationen 
              # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten               # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
              # durch den relativen Pfad auf die presentation infos               # durch den relativen Pfad auf die presentation infos
Line 828  class documentViewer(Folder): Line 697  class documentViewer(Folder):
                                   
             docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)              docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
           
           # get authorization
         docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info          docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info
                   
         return docinfo          return docinfo
Line 858  class documentViewer(Folder): Line 728  class documentViewer(Folder):
         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path          imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
         docinfo['imageURL'] = imageUrl          docinfo['imageURL'] = imageUrl
                   
           #TODO: use getDocinfoFromIndexMeta
         #path ist the path to the images it assumes that the index.meta file is one level higher.          #path ist the path to the images it assumes that the index.meta file is one level higher.
         docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)          docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
         docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)          docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
Line 892  class documentViewer(Folder): Line 763  class documentViewer(Folder):
             logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)              logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))              raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                                   
         # FIXME: fake texturlpath           logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
         if not docinfo.has_key('textURLPath'):          #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
             docinfo['textURLPath'] = None          # store in session
           
         logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)  
         #logging.debug("documentViewer (getdocinfo) docinfo: %s"%)  
         self.REQUEST.SESSION['docinfo'] = docinfo          self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo          return docinfo
                                 
Line 913  class documentViewer(Folder): Line 781  class documentViewer(Folder):
         pageinfo['cols'] = cols          pageinfo['cols'] = cols
         grpsize = cols * rows          grpsize = cols * rows
         pageinfo['groupsize'] = grpsize          pageinfo['groupsize'] = grpsize
           # what does this do?
         start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))          start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
         # int(current / grpsize) * grpsize +1))          # int(current / grpsize) * grpsize +1))
         pageinfo['start'] = start          pageinfo['start'] = start
Line 923  class documentViewer(Folder): Line 792  class documentViewer(Folder):
             pageinfo['numgroups'] = int(np / grpsize)              pageinfo['numgroups'] = int(np / grpsize)
             if np % grpsize > 0:              if np % grpsize > 0:
                 pageinfo['numgroups'] += 1                          pageinfo['numgroups'] += 1        
                   
         pageinfo['viewMode'] = viewMode          pageinfo['viewMode'] = viewMode
         pageinfo['tocMode'] = tocMode          pageinfo['tocMode'] = tocMode
         pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')          pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
Line 935  class documentViewer(Folder): Line 805  class documentViewer(Folder):
         pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')          pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
         pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')          pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
         pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')               pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')     
           # WTF?:
         toc = int (pageinfo['tocPN'])          toc = int (pageinfo['tocPN'])
         pageinfo['textPages'] =int (toc)          pageinfo['textPages'] =int (toc)
                   
           # What does this do?
         if 'tocSize_%s'%tocMode in docinfo:          if 'tocSize_%s'%tocMode in docinfo:
             tocSize = int(docinfo['tocSize_%s'%tocMode])              tocSize = int(docinfo['tocSize_%s'%tocMode])
             tocPageSize = int(pageinfo['tocPageSize'])              tocPageSize = int(pageinfo['tocPageSize'])
Line 946  class documentViewer(Folder): Line 818  class documentViewer(Folder):
                 tocPages=tocSize/tocPageSize+1                  tocPages=tocSize/tocPageSize+1
             else:              else:
                 tocPages=tocSize/tocPageSize                  tocPages=tocSize/tocPageSize
                   
             pageinfo['tocPN'] = min (tocPages,toc)                                  pageinfo['tocPN'] = min (tocPages,toc)                    
               
         pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')          pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
         pageinfo['sn'] =self.REQUEST.get('sn','')          pageinfo['sn'] =self.REQUEST.get('sn','')
         return pageinfo          return pageinfo
           
   
       security.declareProtected('View management screens','changeDocumentViewerForm')    
       changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
       
 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):  def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
         """init document viewer"""          """init document viewer"""
         self.title=title          self.title=title
Line 958  def changeDocumentViewer(self,title="",d Line 836  def changeDocumentViewer(self,title="",d
         self.thumbrows = thumbrows          self.thumbrows = thumbrows
         self.thumbcols = thumbcols          self.thumbcols = thumbcols
         self.authgroups = [s.strip().lower() for s in authgroups.split(',')]          self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
           try:
               # assume MetaDataFolder instance is called metadata 
               self.metadataService = getattr(self, 'metadata')
           except Exception, e:
               logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
   
         if RESPONSE is not None:          if RESPONSE is not None:
             RESPONSE.redirect('manage_main')              RESPONSE.redirect('manage_main')
                   

Removed from v.1.175.2.4  
changed lines
  Added in v.1.175.2.10


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>