File:  [Repository] / documentViewer / documentViewer.py
Revision 1.25: download - view: text, annotated - select for diffs - revision graph
Thu May 3 17:35:10 2007 UTC (17 years, 1 month ago) by casties
Branches: MAIN
CVS tags: HEAD
fixed bug in fix for missing getauthinfo in getdocinfofromtexttool



from OFS.Folder import Folder
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
from Products.PageTemplates.PageTemplateFile import PageTemplateFile 
from AccessControl import ClassSecurityInfo
from AccessControl import getSecurityManager
from Globals import package_home

from Ft.Xml.Domlette import NonvalidatingReader
from Ft.Xml.Domlette import PrettyPrint, Print
from Ft.Xml import EMPTY_NAMESPACE, Parse

import Ft.Xml.XPath

import os.path
import sys
import cgi
import urllib
import logging

import urlparse 

def logger(txt,method,txt2):
    """logging"""
    logging.info(txt+ txt2)
    
    
def getInt(number, default=0):
    """returns always an int (0 in case of problems)"""
    try:
        return int(number)
    except:
        return default

def getTextFromNode(nodename):
    """get the cdata content of a node"""
    if nodename is None:
        return ""
    nodelist=nodename.childNodes
    rc = ""
    for node in nodelist:
        if node.nodeType == node.TEXT_NODE:
           rc = rc + node.data
    return rc

        
def getParentDir(path):
    """returns pathname shortened by one"""
    return '/'.join(path.split('/')[0:-1])
        

import socket

def urlopen(url,timeout=2):
        """urlopen mit timeout"""
        socket.setdefaulttimeout(timeout)
        ret=urllib.urlopen(url)
        socket.setdefaulttimeout(5)
        return ret


##
## documentViewer class
##
class documentViewer(Folder):
    """document viewer"""
    #textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?"
    
    meta_type="Document viewer"
    
    security=ClassSecurityInfo()
    manage_options=Folder.manage_options+(
        {'label':'main config','action':'changeDocumentViewerForm'},
        )

    # templates and forms
    viewer_main = PageTemplateFile('zpt/viewer_main', globals())
    thumbs_main = PageTemplateFile('zpt/thumbs_main', globals())
    image_main = PageTemplateFile('zpt/image_main', globals())
    head_main = PageTemplateFile('zpt/head_main', globals())
    docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())

    security.declareProtected('View management screens','changeDocumentViewerForm')    
    changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())

    
    def __init__(self,id,imageViewerUrl,textViewerUrl=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"):
        """init document viewer"""
        self.id=id
        self.title=title
        self.imageViewerUrl=imageViewerUrl
        self.textViewerUrl=textViewerUrl
        
        if not digilibBaseUrl:
            self.digilibBaseUrl = self.findDigilibUrl()
        else:
            self.digilibBaseUrl = digilibBaseUrl
        self.thumbcols = thumbcols
        self.thumbrows = thumbrows
        # authgroups is list of authorized groups (delimited by ,)
        self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
        # add template folder so we can always use template.something
        self.manage_addFolder('template')


    security.declareProtected('View','index_html')
    def index_html(self,mode,url,viewMode="auto",start=None,pn=1):
        '''
        view it
        @param mode: defines which type of document is behind url (text,images or auto)
        @param url: url which contains display information
        @param viewMode: if images display images, if text display text, default is images
        
        '''
        
        logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
        
        if not hasattr(self, 'template'):
            # create template folder if it doesn't exist
            self.manage_addFolder('template')
            
        if not self.digilibBaseUrl:
            self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
            
        docinfo = self.getDocinfo(mode=mode,url=url)
        pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
        pt = getattr(self.template, 'viewer_main')
        
        if viewMode=="auto": # automodus gewaehlt
            if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert
                viewMode="text"
            else:
                viewMode="images"
               

        return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
  
  
    def getLink(self,param=None,val=None):
        """link to documentviewer with parameter param set to val"""
        params=self.REQUEST.form.copy()
        if param is not None:
            if val is None:
                if params.has_key(param):
                    del params[param]
            else:
                params[param] = str(val)
                
        # quote values and assemble into query string
        ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
        url=self.REQUEST['URL1']+"?"+ps
        return url

    
    def getStyle(self, idx, selected, style=""):
        """returns a string with the given style and append 'sel' if path == selected."""
        #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
        if idx == selected:
            return style + 'sel'
        else:
            return style
        
        
    def isAccessible(self, docinfo):
        """returns if access to the resource is granted"""
        access = docinfo.get('accessType', None)
        logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access)
        if access is not None and access == 'free':
            logger("documentViewer (accessOK)", logging.INFO, "access is free")
            return True
        elif access is None or access in self.authgroups:
            # only local access -- only logged in users
            user = getSecurityManager().getUser()
            if user is not None:
                #print "user: ", user
                return (user.getUserName() != "Anonymous User")
            else:
                return False
        
        logger("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access)
        return False
    
                
    def getDirinfoFromDigilib(self,path,docinfo=None):
        """gibt param von dlInfo aus"""
        num_retries = 3
        if docinfo is None:
            docinfo = {}
            
        infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
    
        logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl))
        
        for cnt in range(num_retries):
            try:
                # dom = NonvalidatingReader.parseUri(imageUrl)
                txt=urllib.urlopen(infoUrl).read()
                dom = Parse(txt)
                break
            except:
                logger("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt))
        else:
            raise IOError("Unable to get dir-info from %s"%(infoUrl))
        
        sizes=dom.xpath("//dir/size")
        logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes)
        
        if sizes:
            docinfo['numPages'] = int(getTextFromNode(sizes[0]))
        else:
            docinfo['numPages'] = 0
                        
        return docinfo
    
            
    def getIndexMeta(self, url):
        """returns dom of index.meta document at url"""
        num_retries = 3
        dom = None
        metaUrl = None
        if url.startswith("http://"):
            # real URL
            metaUrl = url
        else:
            # online path
            server=self.digilibBaseUrl+"/servlet/Texter?fn="
            metaUrl=server+url.replace("/mpiwg/online","")
            if not metaUrl.endswith("index.meta"):
                metaUrl += "/index.meta"
        print metaUrl
        for cnt in range(num_retries):
            try:
                # patch dirk encoding fehler treten dann nicht mehr auf
                # dom = NonvalidatingReader.parseUri(metaUrl)
                txt=urllib.urlopen(metaUrl).read()
                dom = Parse(txt)
                break
            except:
                logger("ERROR documentViewer (getIndexMata)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
                
        if dom is None:
            raise IOError("Unable to read index meta from %s"%(url))
                 
        return dom
    
    def getPresentationInfoXML(self, url):
        """returns dom of info.xml document at url"""
        num_retries = 3
        dom = None
        metaUrl = None
        if url.startswith("http://"):
            # real URL
            metaUrl = url
        else:
            # online path
            server=self.digilibBaseUrl+"/servlet/Texter?fn="
            metaUrl=server+url.replace("/mpiwg/online","")
           
        
        for cnt in range(num_retries):
            try:
                # patch dirk encoding fehler treten dann nicht mehr auf
                # dom = NonvalidatingReader.parseUri(metaUrl)
                txt=urllib.urlopen(metaUrl).read()
                dom = Parse(txt)
                break
            except:
                logger("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
                
        if dom is None:
            raise IOError("Unable to read infoXMLfrom %s"%(url))
                 
        return dom
                        
        
    def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None):
        """gets authorization info from the index.meta file at path or given by dom"""
        logger("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path))
        
        access = None
        
        if docinfo is None:
            docinfo = {}
            
        if dom is None:
            dom = self.getIndexMeta(getParentDir(path))
       
        acctype = dom.xpath("//access-conditions/access/@type")
        if acctype and (len(acctype)>0):
            access=acctype[0].value
            if access in ['group', 'institution']:
                access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
            
        docinfo['accessType'] = access
        return docinfo
    
        
    def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None):
        """gets bibliographical info from the index.meta file at path or given by dom"""
        logger("documentViewer (getbibinfofromindexmeta)", logging.INFO,"path: %s"%(path))
        
        if docinfo is None:
            docinfo = {}
            
        if dom is None:
            dom = self.getIndexMeta(getParentDir(path))
            
        metaData=self.metadata.main.meta.bib
        bibtype=dom.xpath("//bib/@type")
        if bibtype and (len(bibtype)>0):
            bibtype=bibtype[0].value
        else:
            bibtype="generic"
        bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
        bibmap=metaData.generateMappingForType(bibtype)
        #print "bibmap: ", bibmap, " for: ", bibtype
        # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
        if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
            docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
            docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
            docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
            
            logging.info("bla")
            try:
                docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
            except:
                docinfo['lang']=''
        return docinfo

        
    def getDocinfoFromTextTool(self,url,dom=None,docinfo=None):
       """parse texttool tag in index meta"""
       logger("documentViewer (getdocinfofromtexttool)", logging.INFO,"url: %s"%(url))
       if docinfo is None:
           docinfo = {}
           
       if docinfo.get('lang',None) is None:
           docinfo['lang']='' # default keine Sprache gesetzt
       if dom is None:
           dom = self.getIndexMeta(url)
       
       archivePath = None
       archiveName = None

       archiveNames=dom.xpath("//resource/name")
       if archiveNames and (len(archiveNames)>0):
           archiveName=getTextFromNode(archiveNames[0])
       else:
           logger("documentViewer (getdocinfofromtexttool)", logging.WARNING,"resource/name missing in: %s"%(url))
       
       archivePaths=dom.xpath("//resource/archive-path")
       if archivePaths and (len(archivePaths)>0):
           archivePath=getTextFromNode(archivePaths[0])
           # clean up archive path
           if archivePath[0] != '/':
               archivePath = '/' + archivePath
           if archiveName and (not archivePath.endswith(archiveName)):
               archivePath += "/" + archiveName
       else:
           # try to get archive-path from url
           logger("documentViewer (getdocinfofromtexttool)", logging.WARNING,"resource/archive-path missing in: %s"%(url))
           if (not url.startswith('http')):
               archivePath = url.replace('index.meta', '')
               
       if archivePath is None:
           # we balk without archive-path
           raise IOError("Missing archive-path (for text-tool) in %s"%(url))
       
       imageDirs=dom.xpath("//texttool/image")
       if imageDirs and (len(imageDirs)>0):
           imageDir=getTextFromNode(imageDirs[0])
       else:
           # we balk with no image tag / not necessary anymore because textmode is now standard
           #raise IOError("No text-tool info in %s"%(url))
           imageDir=""
           docinfo['numPages']=1 # im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht
       
           docinfo['imagePath'] = "" # keine Bilder
           docinfo['imageURL'] = ""

       if imageDir and archivePath:
           #print "image: ", imageDir, " archivepath: ", archivePath
           imageDir=os.path.join(archivePath,imageDir)
           imageDir=imageDir.replace("/mpiwg/online",'')
           docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo)
           docinfo['imagePath'] = imageDir
           docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir
           
       viewerUrls=dom.xpath("//texttool/digiliburlprefix")
       if viewerUrls and (len(viewerUrls)>0):
           viewerUrl=getTextFromNode(viewerUrls[0])
           docinfo['viewerURL'] = viewerUrl
                  
       textUrls=dom.xpath("//texttool/text")
       if textUrls and (len(textUrls)>0):
           textUrl=getTextFromNode(textUrls[0])
           if urlparse.urlparse(textUrl)[0]=="": #keine url
               textUrl=os.path.join(archivePath,textUrl) 

           docinfo['textURL'] = textUrl
   
       presentationUrls=dom.xpath("//texttool/presentation")
       docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)   # get info von bib tag
       
       if presentationUrls and (len(presentationUrls)>0): # ueberschreibe diese durch presentation informationen 
            # presentation url ergiebt sich ersetzen von index.meta in der url der fŸr die Metadaten
            # durch den relativen Pfad auf die presentation infos
           presentationUrl=url.replace('index.meta',getTextFromNode(presentationUrls[0]))
           docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl,docinfo=docinfo,dom=dom)

       docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)   # get access info
       return docinfo
   
   
    def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
        """gets the bibliographical information from the preseantion entry in texttools
        """
        dom=self.getPresentationInfoXML(url)
        docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
        docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
        docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
        return docinfo
    
    def getDocinfoFromImagePath(self,path,docinfo=None):
        """path ist the path to the images it assumes that the index.meta file is one level higher."""
        logger("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path))
        if docinfo is None:
            docinfo = {}
        path=path.replace("/mpiwg/online","")
        docinfo['imagePath'] = path
        docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo)
        imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
        docinfo['imageURL'] = imageUrl
        
        docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo)
        docinfo = self.getAuthinfoFromIndexMeta(path,docinfo=docinfo)
        return docinfo
    
    
    def getDocinfo(self, mode, url):
        """returns docinfo depending on mode"""
        logger("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url))
        # look for cached docinfo in session
        if self.REQUEST.SESSION.has_key('docinfo'):
            docinfo = self.REQUEST.SESSION['docinfo']
            # check if its still current
            if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
                logger("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo)
                return docinfo
        # new docinfo
        docinfo = {'mode': mode, 'url': url}
        if mode=="texttool": #index.meta with texttool information
            docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
        elif mode=="imagepath":
            docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
        else:
            logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!")
            raise ValueError("Unknown mode %s"%(mode))
                        
        logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo)
        self.REQUEST.SESSION['docinfo'] = docinfo
        return docinfo
        
        
    def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None):
        """returns pageinfo with the given parameters"""
        pageinfo = {}
        current = getInt(current)
        pageinfo['current'] = current
        rows = int(rows or self.thumbrows)
        pageinfo['rows'] = rows
        cols = int(cols or self.thumbcols)
        pageinfo['cols'] = cols
        grpsize = cols * rows
        pageinfo['groupsize'] = grpsize
        start = getInt(start, default=(int(current / grpsize) * grpsize +1))
        pageinfo['start'] = start
        pageinfo['end'] = start + grpsize
        if docinfo is not None:
            np = int(docinfo['numPages'])
            pageinfo['end'] = min(pageinfo['end'], np)
            pageinfo['numgroups'] = int(np / grpsize)
            if np % grpsize > 0:
                pageinfo['numgroups'] += 1
                
        return pageinfo
                
    def text(self,mode,url,pn):
        """give text"""
        if mode=="texttool": #index.meta with texttool information
            (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)
        
        #print textpath
        try:
            dom = NonvalidatingReader.parseUri(textpath)
        except:
            return None
    
        list=[]
        nodes=dom.xpath("//pb")

        node=nodes[int(pn)-1]
        
        p=node
        
        while p.tagName!="p":
            p=p.parentNode
        
        
        endNode=nodes[int(pn)]
        
        
        e=endNode
        
        while e.tagName!="p":
            e=e.parentNode
        
        
        next=node.parentNode
        
        #sammle s
        while next and (next!=endNode.parentNode):
            list.append(next)    
            next=next.nextSibling    
        list.append(endNode.parentNode)
        
        if p==e:# beide im selben paragraphen
            pass
#    else:
#            next=p
#            while next!=e:
#                print next,e
#                list.append(next)
#                next=next.nextSibling
#            
#        for x in list:
#            PrettyPrint(x)
#
#        return list
#

    def findDigilibUrl(self):
        """try to get the digilib URL from zogilib"""
        url = self.imageViewerUrl[:-1] + "/getScalerUrl"
        #print urlparse.urlparse(url)[0]
        #print urlparse.urljoin(self.absolute_url(),url)
        logging.info("finddigiliburl: %s"%urlparse.urlparse(url)[0])
        logging.info("finddigiliburl: %s"%urlparse.urljoin(self.absolute_url(),url))
        
        try:
            if urlparse.urlparse(url)[0]=='': #relative path
                url=urlparse.urljoin(self.absolute_url()+"/",url)
                
            scaler = urlopen(url).read()
            return scaler.replace("/servlet/Scaler?", "")
        except:
            return None
    
    def changeDocumentViewer(self,imageViewerUrl,textViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None):
        """init document viewer"""
        self.title=title
        self.imageViewerUrl=imageViewerUrl
        self.textViewerUrl=textViewerUrl
        self.digilibBaseUrl = digilibBaseUrl
        self.thumbrows = thumbrows
        self.thumbcols = thumbcols
        self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
        if RESPONSE is not None:
            RESPONSE.redirect('manage_main')
    
    
        
        
#    security.declareProtected('View management screens','renameImageForm')

def manage_AddDocumentViewerForm(self):
    """add the viewer form"""
    pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
    return pt()
  
def manage_AddDocumentViewer(self,id,imageViewerUrl="",textViewerUrl="",title="",RESPONSE=None):
    """add the viewer"""
    newObj=documentViewer(id,imageViewerUrl,title=title,textViewerUrl=textViewerUrl)
    self._setObject(id,newObj)
    
    if RESPONSE is not None:
        RESPONSE.redirect('manage_main')


##
## DocumentViewerTemplate class
##
class DocumentViewerTemplate(ZopePageTemplate):
    """Template for document viewer"""
    meta_type="DocumentViewer Template"


def manage_addDocumentViewerTemplateForm(self):
    """Form for adding"""
    pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
    return pt()

def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
                           REQUEST=None, submit=None):
    "Add a Page Template with optional file content."

    self._setObject(id, DocumentViewerTemplate(id))
    ob = getattr(self, id)
    txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
    logging.info("txt %s:"%txt)
    ob.pt_edit(txt,"text/html")
    if title:
        ob.pt_setTitle(title)
    try:
        u = self.DestinationURL()
    except AttributeError:
        u = REQUEST['URL1']
        
    u = "%s/%s" % (u, urllib.quote(id))
    REQUEST.RESPONSE.redirect(u+'/manage_main')
    return ''


    

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>