Diff for /documentViewer/documentViewer.py between versions 1.175.2.4 and 1.175.2.9

version 1.175.2.4, 2011/07/19 09:54:06 version 1.175.2.9, 2011/07/27 19:09:44
Line 1 Line 1
   
 from OFS.Folder import Folder  from OFS.Folder import Folder
 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate  from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
 from Products.PageTemplates.PageTemplateFile import PageTemplateFile   from Products.PageTemplates.PageTemplateFile import PageTemplateFile 
 from AccessControl import ClassSecurityInfo  from AccessControl import ClassSecurityInfo
 from AccessControl import getSecurityManager  from AccessControl import getSecurityManager
 from Globals import package_home  from Globals import package_home
 from Products.zogiLib.zogiLib import browserCheck  
   
 #from Ft.Xml import EMPTY_NAMESPACE, Parse   #from Ft.Xml import EMPTY_NAMESPACE, Parse 
 #import Ft.Xml.Domlette  #import Ft.Xml.Domlette
Line 15  import xml.etree.ElementTree as ET Line 13  import xml.etree.ElementTree as ET
 import os.path  import os.path
 import sys  import sys
 import urllib  import urllib
 import urllib2  
 import logging  import logging
 import math  import math
 import urlparse   import urlparse 
 import re  import re
 import string  import string
   
   from SrvTxtUtils import getInt, getText, getHttpData
   
 def logger(txt,method,txt2):  def logger(txt,method,txt2):
     """logging"""      """logging"""
     logging.info(txt+ txt2)      logging.info(txt+ txt2)
           
           
 def getInt(number, default=0):  
     """returns always an int (0 in case of problems)"""  
     try:  
         return int(number)  
     except:  
         return int(default)  
   
 def getText(node):  
     """get the cdata content of a node"""  
     if node is None:  
         return ""  
     # ET:  
     text = node.text or ""  
     for e in node:  
         text += gettext(e)  
         if e.tail:  
             text += e.tail  
   
     # 4Suite:  
     #nodelist=node.childNodes  
     #text = ""  
     #for n in nodelist:  
     #    if n.nodeType == node.TEXT_NODE:  
     #       text = text + n.data  
       
     return text  
   
 getTextFromNode = getText  
   
 def serializeNode(node, encoding="utf-8"):  def serializeNode(node, encoding="utf-8"):
     """returns a string containing node as XML"""      """returns a string containing node as XML"""
     s = ET.tostring(node)      s = ET.tostring(node)
Line 128  def browserCheck(self): Line 98  def browserCheck(self):
   
     return bt      return bt
   
          
 def getParentDir(path):  def getParentDir(path):
     """returns pathname shortened by one"""      """returns pathname shortened by one"""
     return '/'.join(path.split('/')[0:-1])      return '/'.join(path.split('/')[0:-1])
                   
   def normalizeBibField(bt, underscore=True):
       """returns normalised bib type for looking up mappings"""
       bt = bt.strip().replace(' ', '-').lower()
       if underscore:
           bt = bt.replace('_', '-')
   
 def getHttpData(url, data=None, num_tries=3, timeout=10):      return bt
     """returns result from url+data HTTP request"""  
     # we do GET (by appending data to url)  def getBibdataFromDom(dom):
     if isinstance(data, str) or isinstance(data, unicode):      """returns dict with all elements from bib-tag"""
         # if data is string then append      bibinfo = {}
         url = "%s?%s"%(url,data)      bib = dom.find(".//meta/bib")
     elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):      if bib is not None:
         # urlencode          # put type in @type
         url = "%s?%s"%(url,urllib.urlencode(data))          type = bib.get('type')
               bibinfo['@type'] = normalizeBibField(type)
     response = None          # put all subelements in dict
     errmsg = None          for e in bib:
     for cnt in range(num_tries):              bibinfo[normalizeBibField(e.tag)] = getText(e)
         try:              
             logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))      return bibinfo
             if sys.version_info < (2, 6):  
                 # set timeout on socket -- ugly :-(  
                 import socket  
                 socket.setdefaulttimeout(float(timeout))  
                 response = urllib2.urlopen(url)  
             else:  
                 response = urllib2.urlopen(url,timeout=float(timeout))  
             # check result?  
             break  
         except urllib2.HTTPError, e:  
             logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))  
             errmsg = str(e)  
             # stop trying  
             break  
         except urllib2.URLError, e:  
             logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))  
             errmsg = str(e)  
             # stop trying  
             #break  
   
     if response is not None:  
         data = response.read()  
         response.close()  
         return data  
           
     raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))  
     #return None  
   
 ##  ##
 ## documentViewer class  ## documentViewer class
Line 304  class documentViewer(Folder): Line 252  class documentViewer(Folder):
                   
         '''          '''
         logging.debug("HHHHHHHHHHHHHH:load the rss")          logging.debug("HHHHHHHHHHHHHH:load the rss")
         logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
                   
         if not hasattr(self, 'template'):          if not hasattr(self, 'template'):
             # create template folder if it doesn't exist              # create template folder if it doesn't exist
Line 633  class documentViewer(Folder): Line 581  class documentViewer(Folder):
         docinfo['indexMetaPath']=self.getIndexMetaPath(path);          docinfo['indexMetaPath']=self.getIndexMetaPath(path);
                   
         logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))          logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
         # put in all raw bib fields as dict "bib"          # try to get MetaDataFolder
         bib = dom.find(".//bib")          metadata = getattr(self, 'metadata')
         #bib = dom.xpath("//bib/*")          if metadata is not None:
         if bib is not None:              # put all raw bib fields in dict "bib"
             bibinfo = {}              bib = metadata.getBibdataFromDom(dom)
             for e in bib:              docinfo['bib'] = bib
                 bibinfo[e.tag] = getText(e)              bibtype = bib.get('@type', None)
                   
             docinfo['bib'] = bibinfo  
           
         # extract some fields (author, title, year) according to their mapping  
         metaData=self.metadata.main.meta.bib  
         bibtype=bib.get("type")  
         #bibtype=dom.xpath("//bib/@type")  
         if not bibtype:  
             bibtype="generic"  
               
         bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)  
         docinfo['bib_type'] = bibtype          docinfo['bib_type'] = bibtype
         bibmap=metaData.generateMappingForType(bibtype)              # also store DC metadata for convenience
         logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))              dc = metadata.getDCMappedData(bib)
         logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))              docinfo['creator'] = dc.get('creator',None)
         # if there is no mapping bibmap is empty (mapping sometimes has empty fields)              docinfo['title'] = dc.get('title',None)
         logging.debug("bibmap: %s"%repr(bibmap))              docinfo['date'] = dc.get('date',None)
         if len(bibmap) > 0 and bibmap.get('author',None) or bibmap.get('title',None):          else:
             try:              logging.error("MetaDataFolder 'metadata' not found!")
                 docinfo['author']=getText(bib.find(bibmap['author'][0]))              #TODO: remove
             except: pass              bib = getBibdataFromDom(dom)
             try:  
                 docinfo['title']=getText(bib.find(bibmap['title'][0]))  
             except: pass  
             try:  
                 docinfo['year']=getText(bib.find(bibmap['year'][0]))  
             except: pass  
               
             # ROC: why is this here?  
             #            logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)  
             #            try:  
             #                docinfo['lang']=getTextFromNode(dom.find(".//bib/lang")[0])  
             #            except:  
             #                docinfo['lang']=''  
             #            try:  
             #                docinfo['city']=getTextFromNode(dom.find(".//bib/city")[0])  
             #            except:  
             #                docinfo['city']=''  
             #            try:  
             #                docinfo['number_of_pages']=getTextFromNode(dom.find(".//bib/number_of_pages")[0])  
             #            except:  
             #                docinfo['number_of_pages']=''  
             #            try:  
             #                docinfo['series_volume']=getTextFromNode(dom.find(".//bib/series_volume")[0])  
             #            except:  
             #                docinfo['series_volume']=''  
             #            try:  
             #                docinfo['number_of_volumes']=getTextFromNode(dom.find(".//bib/number_of_volumes")[0])  
             #            except:  
             #                docinfo['number_of_volumes']=''  
             #            try:  
             #                docinfo['translator']=getTextFromNode(dom.find(".//bib/translator")[0])  
             #            except:  
             #                docinfo['translator']=''  
             #            try:  
             #                docinfo['edition']=getTextFromNode(dom.find(".//bib/edition")[0])  
             #            except:  
             #                docinfo['edition']=''  
             #            try:  
             #                docinfo['series_author']=getTextFromNode(dom.find(".//bib/series_author")[0])  
             #            except:  
             #                docinfo['series_author']=''  
             #            try:  
             #                docinfo['publisher']=getTextFromNode(dom.find(".//bib/publisher")[0])  
             #            except:  
             #                docinfo['publisher']=''  
             #            try:  
             #                docinfo['series_title']=getTextFromNode(dom.find(".//bib/series_title")[0])  
             #            except:  
             #                docinfo['series_title']=''  
             #            try:  
             #                docinfo['isbn_issn']=getTextFromNode(dom.find(".//bib/isbn_issn")[0])  
             #            except:  
             #                docinfo['isbn_issn']=''             
         return docinfo          return docinfo
           
           
Line 858  class documentViewer(Folder): Line 742  class documentViewer(Folder):
         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path          imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
         docinfo['imageURL'] = imageUrl          docinfo['imageURL'] = imageUrl
                   
           #TODO: use getDocinfoFromIndexMeta
         #path ist the path to the images it assumes that the index.meta file is one level higher.          #path ist the path to the images it assumes that the index.meta file is one level higher.
         docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)          docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
         docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)          docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
Line 892  class documentViewer(Folder): Line 777  class documentViewer(Folder):
             logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)              logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))              raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                                   
         # FIXME: fake texturlpath           logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
         if not docinfo.has_key('textURLPath'):          #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
             docinfo['textURLPath'] = None          # store in session
           
         logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)  
         #logging.debug("documentViewer (getdocinfo) docinfo: %s"%)  
         self.REQUEST.SESSION['docinfo'] = docinfo          self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo          return docinfo
                                 
Line 913  class documentViewer(Folder): Line 795  class documentViewer(Folder):
         pageinfo['cols'] = cols          pageinfo['cols'] = cols
         grpsize = cols * rows          grpsize = cols * rows
         pageinfo['groupsize'] = grpsize          pageinfo['groupsize'] = grpsize
           # what does this do?
         start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))          start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
         # int(current / grpsize) * grpsize +1))          # int(current / grpsize) * grpsize +1))
         pageinfo['start'] = start          pageinfo['start'] = start
Line 923  class documentViewer(Folder): Line 806  class documentViewer(Folder):
             pageinfo['numgroups'] = int(np / grpsize)              pageinfo['numgroups'] = int(np / grpsize)
             if np % grpsize > 0:              if np % grpsize > 0:
                 pageinfo['numgroups'] += 1                          pageinfo['numgroups'] += 1        
                   
         pageinfo['viewMode'] = viewMode          pageinfo['viewMode'] = viewMode
         pageinfo['tocMode'] = tocMode          pageinfo['tocMode'] = tocMode
         pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')          pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
Line 935  class documentViewer(Folder): Line 819  class documentViewer(Folder):
         pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')          pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
         pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')          pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
         pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')               pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')     
           # WTF?:
         toc = int (pageinfo['tocPN'])          toc = int (pageinfo['tocPN'])
         pageinfo['textPages'] =int (toc)          pageinfo['textPages'] =int (toc)
                   
           # What does this do?
         if 'tocSize_%s'%tocMode in docinfo:          if 'tocSize_%s'%tocMode in docinfo:
             tocSize = int(docinfo['tocSize_%s'%tocMode])              tocSize = int(docinfo['tocSize_%s'%tocMode])
             tocPageSize = int(pageinfo['tocPageSize'])              tocPageSize = int(pageinfo['tocPageSize'])
Line 946  class documentViewer(Folder): Line 832  class documentViewer(Folder):
                 tocPages=tocSize/tocPageSize+1                  tocPages=tocSize/tocPageSize+1
             else:              else:
                 tocPages=tocSize/tocPageSize                  tocPages=tocSize/tocPageSize
                   
             pageinfo['tocPN'] = min (tocPages,toc)                                  pageinfo['tocPN'] = min (tocPages,toc)                    
               
         pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')          pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
         pageinfo['sn'] =self.REQUEST.get('sn','')          pageinfo['sn'] =self.REQUEST.get('sn','')
         return pageinfo          return pageinfo
           
       
 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):  def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
         """init document viewer"""          """init document viewer"""
         self.title=title          self.title=title

Removed from v.1.175.2.4  
changed lines
  Added in v.1.175.2.9


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>