Diff for /documentViewer/documentViewer.py between versions 1.175.2.1 and 1.175.2.5

version 1.175.2.1, 2011/07/14 17:43:56 version 1.175.2.5, 2011/07/19 18:46:35
Line 1 Line 1
   
 from OFS.Folder import Folder  from OFS.Folder import Folder
 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate  from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
 from Products.PageTemplates.PageTemplateFile import PageTemplateFile   from Products.PageTemplates.PageTemplateFile import PageTemplateFile 
 from AccessControl import ClassSecurityInfo  from AccessControl import ClassSecurityInfo
 from AccessControl import getSecurityManager  from AccessControl import getSecurityManager
 from Globals import package_home  from Globals import package_home
 from Products.zogiLib.zogiLib import browserCheck  
   
 #from Ft.Xml import EMPTY_NAMESPACE, Parse   #from Ft.Xml import EMPTY_NAMESPACE, Parse 
 #import Ft.Xml.Domlette  #import Ft.Xml.Domlette
Line 15  import xml.etree.ElementTree as ET Line 13  import xml.etree.ElementTree as ET
 import os.path  import os.path
 import sys  import sys
 import urllib  import urllib
 import urllib2  
 import logging  import logging
 import math  import math
 import urlparse   import urlparse 
 import re  import re
 import string  import string
   
   from SrvTxtUtils import getInt, getText, getHttpData
   
 def logger(txt,method,txt2):  def logger(txt,method,txt2):
     """logging"""      """logging"""
     logging.info(txt+ txt2)      logging.info(txt+ txt2)
           
           
 def getInt(number, default=0):  
     """returns always an int (0 in case of problems)"""  
     try:  
         return int(number)  
     except:  
         return int(default)  
   
 def getText(node):  
     """get the cdata content of a node"""  
     if node is None:  
         return ""  
     # ET:  
     text = node.text or ""  
     for e in node:  
         text += gettext(e)  
         if e.tail:  
             text += e.tail  
   
     # 4Suite:  
     #nodelist=node.childNodes  
     #text = ""  
     #for n in nodelist:  
     #    if n.nodeType == node.TEXT_NODE:  
     #       text = text + n.data  
       
     return text  
   
 getTextFromNode = getText  
   
 def serializeNode(node, encoding="utf-8"):  def serializeNode(node, encoding="utf-8"):
     """returns a string containing node as XML"""      """returns a string containing node as XML"""
     s = ET.tostring(node)      s = ET.tostring(node)
Line 128  def browserCheck(self): Line 98  def browserCheck(self):
   
     return bt      return bt
   
          
 def getParentDir(path):  def getParentDir(path):
     """returns pathname shortened by one"""      """returns pathname shortened by one"""
     return '/'.join(path.split('/')[0:-1])      return '/'.join(path.split('/')[0:-1])
                   
   def getBibdataFromDom(dom):
       """returns dict with all elements from bib-tag"""
       bibinfo = {}
       bib = dom.find(".//meta/bib")
       if bib is not None:
           # put type in @type
           type = bib.get('type')
           bibinfo['@type'] = type
           # put all subelements in dict
           for e in bib:
               bibinfo[e.tag] = getText(e)
   
 def getHttpData(url, data=None, num_tries=3, timeout=10):      return bibinfo
     """returns result from url+data HTTP request"""  
     # we do GET (by appending data to url)  
     if isinstance(data, str) or isinstance(data, unicode):  
         # if data is string then append  
         url = "%s?%s"%(url,data)  
     elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):  
         # urlencode  
         url = "%s?%s"%(url,urllib.urlencode(data))  
       
     response = None  
     errmsg = None  
     for cnt in range(num_tries):  
         try:  
             logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))  
             if sys.version_info < (2, 6):  
                 # set timeout on socket -- ugly :-(  
                 import socket  
                 socket.setdefaulttimeout(float(timeout))  
                 response = urllib2.urlopen(url)  
             else:  
                 response = urllib2.urlopen(url,timeout=float(timeout))  
             # check result?  
             break  
         except urllib2.HTTPError, e:  
             logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))  
             errmsg = str(e)  
             # stop trying  
             break  
         except urllib2.URLError, e:  
             logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))  
             errmsg = str(e)  
             # stop trying  
             #break  
   
     if response is not None:  
         data = response.read()  
         response.close()  
         return data  
       
     raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))  
     #return None  
   
 ##  ##
 ## documentViewer class  ## documentViewer class
Line 304  class documentViewer(Folder): Line 243  class documentViewer(Folder):
                   
         '''          '''
         logging.debug("HHHHHHHHHHHHHH:load the rss")          logging.debug("HHHHHHHHHHHHHH:load the rss")
         logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
                   
         if not hasattr(self, 'template'):          if not hasattr(self, 'template'):
             # create template folder if it doesn't exist              # create template folder if it doesn't exist
Line 355  class documentViewer(Folder): Line 294  class documentViewer(Folder):
             # get table of contents              # get table of contents
             docinfo = self.getToc(mode=tocMode, docinfo=docinfo)              docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
                           
         if viewMode=="auto": # automodus gewaehlt          # auto viewMode: text_dict if text else images
             if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert          if viewMode=="auto": 
               if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 
                   #texturl gesetzt und textViewer konfiguriert
                 viewMode="text_dict"                  viewMode="text_dict"
             else:              else:
                 viewMode="images"                  viewMode="images"
                                   
         pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)          pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
                   
         if (docinfo.get('textURLPath',None)):          if viewMode != 'images' and docinfo.get('textURLPath', None):
             page = self.getTextPage(mode=viewMode, docinfo=docinfo, pageinfo=pageinfo)              # get full text page
               page = self.getTextPage(mode=viewMode, pn=pn, docinfo=docinfo, pageinfo=pageinfo)
             pageinfo['textPage'] = page              pageinfo['textPage'] = page
         tt = getattr(self, 'template')                 
         pt = getattr(tt, 'viewer_main')                         # get template /template/viewer_main
           pt = getattr(self.template, 'viewer_main')
           # and execute with parameters
         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))          return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
       
     def generateMarks(self,mk):      def generateMarks(self,mk):
Line 499  class documentViewer(Folder): Line 443  class documentViewer(Folder):
             docinfo = {}              docinfo = {}
                   
         for x in range(cut):          for x in range(cut):
                  
                 path=getParentDir(path)                  path=getParentDir(path)
                 
         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path          infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
Line 630  class documentViewer(Folder): Line 573  class documentViewer(Folder):
                   
         logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))          logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
         # put in all raw bib fields as dict "bib"          # put in all raw bib fields as dict "bib"
         bib = dom.find(".//bib/*")          bib = getBibdataFromDom(dom)
         #bib = dom.xpath("//bib/*")          docinfo['bib'] = bib
         if bib and len(bib)>0:  
             bibinfo = {}  
             for e in bib:  
                 bibinfo[e.localName] = getTextFromNode(e)  
             docinfo['bib'] = bibinfo  
                   
         # extract some fields (author, title, year) according to their mapping          # extract some fields (author, title, year) according to their mapping
         metaData=self.metadata.main.meta.bib          metaData=self.metadata.main.meta.bib
         bib = dom.find(".//bib")          bibtype=bib.get("@type")
         bibtype=bib.get("type")  
         #bibtype=dom.xpath("//bib/@type")          #bibtype=dom.xpath("//bib/@type")
         if not bibtype:          if not bibtype:
             bibtype="generic"              bibtype="generic"
                           
         bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)          bibtype=bibtype.replace("-"," ") # wrong types in index meta "-" instead of " " (not wrong! ROC)
         docinfo['bib_type'] = bibtype          docinfo['bib_type'] = bibtype
         bibmap=metaData.generateMappingForType(bibtype)          bibmap=metaData.generateMappingForType(bibtype)
         logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))          logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
         logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))          logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
         # if there is no mapping bibmap is empty (mapping sometimes has empty fields)          # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
         if len(bibmap) > 0 and len(bibmap['author'][0]) > 0 or len(bibmap['title'][0]) > 0:          if len(bibmap) > 0 and bibmap.get('author',None) or bibmap.get('title',None):
             try:              try:
                 docinfo['author']=getText(bib.find(bibmap['author'][0]))                  docinfo['author']=bib.get(bibmap['author'][0])
             except: pass              except: pass
             try:              try:
                 docinfo['title']=getText(bib.find(bibmap['title'][0]))                  docinfo['title']=bib.get(bibmap['title'][0])
             except: pass              except: pass
             try:              try:
                 docinfo['year']=getText(bib.find(bibmap['year'][0]))                  docinfo['year']=bib.get(bibmap['year'][0])
             except: pass              except: pass
                           
             # ROC: why is this here?              # ROC: why is this here?
Line 712  class documentViewer(Folder): Line 649  class documentViewer(Folder):
         return docinfo          return docinfo
           
             
       # TODO: is this needed?
     def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):      def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
         """gets name info from the index.meta file at path or given by dom"""          """gets name info from the index.meta file at path or given by dom"""
         if docinfo is None:          if docinfo is None:
Line 739  class documentViewer(Folder): Line 677  class documentViewer(Folder):
         archivePath = None          archivePath = None
         archiveName = None          archiveName = None
           
         archiveName = getTextFromNode(dom.find("name"))          archiveName = getText(dom.find("name"))
         if not archiveName:          if not archiveName:
             logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))              logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
                   
         archivePath = getTextFromNode(dom.find("archive-path"))          archivePath = getText(dom.find("archive-path"))
         if archivePath:          if archivePath:
             # clean up archive path              # clean up archive path
             if archivePath[0] != '/':              if archivePath[0] != '/':
Line 807  class documentViewer(Folder): Line 745  class documentViewer(Folder):
                     
         presentationUrl = getText(dom.find(".//texttool/presentation"))          presentationUrl = getText(dom.find(".//texttool/presentation"))
         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag          docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
           # TODO: is this needed here?
         docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)          docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
                   
                   
Line 830  class documentViewer(Folder): Line 769  class documentViewer(Folder):
         """gets the bibliographical information from the preseantion entry in texttools          """gets the bibliographical information from the preseantion entry in texttools
         """          """
         dom=self.getPresentationInfoXML(url)          dom=self.getPresentationInfoXML(url)
         try:  
             docinfo['author']=getText(dom.find(".//author"))              docinfo['author']=getText(dom.find(".//author"))
         except:  
             pass  
         try:  
             docinfo['title']=getText(dom.find(".//title"))              docinfo['title']=getText(dom.find(".//title"))
         except:  
             pass  
         try:  
             docinfo['year']=getText(dom.find(".//date"))              docinfo['year']=getText(dom.find(".//date"))
         except:  
             pass  
         return docinfo          return docinfo
           
     def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):      def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
Line 874  class documentViewer(Folder): Line 804  class documentViewer(Folder):
             docinfo = self.REQUEST.SESSION['docinfo']              docinfo = self.REQUEST.SESSION['docinfo']
             # check if its still current              # check if its still current
             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:              if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
                 logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)                  logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())
                 return docinfo                  return docinfo
               
         # new docinfo          # new docinfo
         docinfo = {'mode': mode, 'url': url}          docinfo = {'mode': mode, 'url': url}
         if mode=="texttool": #index.meta with texttool information          # add self url
           docinfo['viewerUrl'] = self.getDocumentViewerURL()
           if mode=="texttool": 
               # index.meta with texttool information
             docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)              docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
         elif mode=="imagepath":          elif mode=="imagepath":
               # folder with images, index.meta optional
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)              docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
         elif mode=="filepath":          elif mode=="filepath":
               # filename
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)              docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
         else:          else:
             logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)              logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
Line 892  class documentViewer(Folder): Line 828  class documentViewer(Folder):
         if not docinfo.has_key('textURLPath'):          if not docinfo.has_key('textURLPath'):
             docinfo['textURLPath'] = None              docinfo['textURLPath'] = None
                   
         logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)          logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
         #logging.debug("documentViewer (getdocinfo) docinfo: %s"%)          #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
         self.REQUEST.SESSION['docinfo'] = docinfo          self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo          return docinfo
                                 

Removed from v.1.175.2.1  
changed lines
  Added in v.1.175.2.5


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>