File:  [Repository] / documentViewer / documentViewer.py
Revision 1.175.2.1: download - view: text, annotated - select for diffs - revision graph
Thu Jul 14 17:43:56 2011 UTC (12 years, 11 months ago) by casties
Branches: elementtree
Diff to: branchpoint 1.175: preferred, colored
first version using elementtree instead of 4suite xml


from OFS.Folder import Folder
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
from Products.PageTemplates.PageTemplateFile import PageTemplateFile 
from AccessControl import ClassSecurityInfo
from AccessControl import getSecurityManager
from Globals import package_home
from Products.zogiLib.zogiLib import browserCheck

#from Ft.Xml import EMPTY_NAMESPACE, Parse 
#import Ft.Xml.Domlette

import xml.etree.ElementTree as ET

import os.path
import sys
import urllib
import urllib2
import logging
import math
import urlparse 
import re
import string

def logger(txt,method,txt2):
    """logging"""
    logging.info(txt+ txt2)
    
    
def getInt(number, default=0):
    """returns always an int (0 in case of problems)"""
    try:
        return int(number)
    except:
        return int(default)

def getText(node):
    """get the cdata content of a node"""
    if node is None:
        return ""
    # ET:
    text = node.text or ""
    for e in node:
        text += gettext(e)
        if e.tail:
            text += e.tail

    # 4Suite:
    #nodelist=node.childNodes
    #text = ""
    #for n in nodelist:
    #    if n.nodeType == node.TEXT_NODE:
    #       text = text + n.data
    
    return text

getTextFromNode = getText

def serializeNode(node, encoding="utf-8"):
    """returns a string containing node as XML"""
    s = ET.tostring(node)
    
    # 4Suite:
    #    stream = cStringIO.StringIO()
    #    Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
    #    s = stream.getvalue()
    #    stream.close()
    return s

def browserCheck(self):
    """check the browsers request to find out the browser type"""
    bt = {}
    ua = self.REQUEST.get_header("HTTP_USER_AGENT")
    bt['ua'] = ua
    bt['isIE'] = False
    bt['isN4'] = False
    bt['versFirefox']=""
    bt['versIE']=""
    bt['versSafariChrome']=""
    bt['versOpera']=""
    
    if string.find(ua, 'MSIE') > -1:
        bt['isIE'] = True
    else:
        bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
    # Safari oder Chrome identification    
    try:
        nav = ua[string.find(ua, '('):]
        nav1=ua[string.find(ua,')'):]
        nav2=nav1[string.find(nav1,'('):]
        nav3=nav2[string.find(nav2,')'):]
        ie = string.split(nav, "; ")[1]
        ie1 =string.split(nav1, " ")[2]
        ie2 =string.split(nav3, " ")[1]
        ie3 =string.split(nav3, " ")[2]
        if string.find(ie3, "Safari") >-1:
            bt['versSafariChrome']=string.split(ie2, "/")[1]
    except: pass
    # IE identification
    try:
        nav = ua[string.find(ua, '('):]
        ie = string.split(nav, "; ")[1]
        if string.find(ie, "MSIE") > -1:
            bt['versIE'] = string.split(ie, " ")[1]
    except:pass
    # Firefox identification
    try:
        nav = ua[string.find(ua, '('):]
        nav1=ua[string.find(ua,')'):]
        if string.find(ie1, "Firefox") >-1:
            nav5= string.split(ie1, "/")[1]
            logging.debug("FIREFOX: %s"%(nav5))
            bt['versFirefox']=nav5[0:3]                   
    except:pass
    #Opera identification
    try:
        if string.find(ua,"Opera") >-1:
            nav = ua[string.find(ua, '('):]
            nav1=nav[string.find(nav,')'):]
            bt['versOpera']=string.split(nav1,"/")[2]
    except:pass
    
    bt['isMac'] = string.find(ua, 'Macintosh') > -1
    bt['isWin'] = string.find(ua, 'Windows') > -1
    bt['isIEWin'] = bt['isIE'] and bt['isWin']
    bt['isIEMac'] = bt['isIE'] and bt['isMac']
    bt['staticHTML'] = False

    return bt

       
def getParentDir(path):
    """returns pathname shortened by one"""
    return '/'.join(path.split('/')[0:-1])
        

def getHttpData(url, data=None, num_tries=3, timeout=10):
    """returns result from url+data HTTP request"""
    # we do GET (by appending data to url)
    if isinstance(data, str) or isinstance(data, unicode):
        # if data is string then append
        url = "%s?%s"%(url,data)
    elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
        # urlencode
        url = "%s?%s"%(url,urllib.urlencode(data))
    
    response = None
    errmsg = None
    for cnt in range(num_tries):
        try:
            logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
            if sys.version_info < (2, 6):
                # set timeout on socket -- ugly :-(
                import socket
                socket.setdefaulttimeout(float(timeout))
                response = urllib2.urlopen(url)
            else:
                response = urllib2.urlopen(url,timeout=float(timeout))
            # check result?
            break
        except urllib2.HTTPError, e:
            logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
            errmsg = str(e)
            # stop trying
            break
        except urllib2.URLError, e:
            logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
            errmsg = str(e)
            # stop trying
            #break

    if response is not None:
        data = response.read()
        response.close()
        return data
    
    raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
    #return None

##
## documentViewer class
##
class documentViewer(Folder):
    """document viewer"""
    meta_type="Document viewer"
    
    security=ClassSecurityInfo()
    manage_options=Folder.manage_options+(
        {'label':'main config','action':'changeDocumentViewerForm'},
        )

    # templates and forms
    viewer_main = PageTemplateFile('zpt/viewer_main', globals())
    toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
    toc_text = PageTemplateFile('zpt/toc_text', globals())
    toc_figures = PageTemplateFile('zpt/toc_figures', globals())
    page_main_images = PageTemplateFile('zpt/page_main_images', globals())
    page_main_double = PageTemplateFile('zpt/page_main_double', globals())
    page_main_text = PageTemplateFile('zpt/page_main_text', globals())
    page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
    page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
    page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
    page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
    head_main = PageTemplateFile('zpt/head_main', globals())
    docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
    info_xml = PageTemplateFile('zpt/info_xml', globals())
    
    
    thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
    security.declareProtected('View management screens','changeDocumentViewerForm')    
    changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())

    
    def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
        """init document viewer"""
        self.id=id
        self.title=title
        self.thumbcols = thumbcols
        self.thumbrows = thumbrows
        # authgroups is list of authorized groups (delimited by ,)
        self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
        # create template folder so we can always use template.something
        
        templateFolder = Folder('template')
        #self['template'] = templateFolder # Zope-2.12 style
        self._setObject('template',templateFolder) # old style
        try:
            import MpdlXmlTextServer
            textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
            #templateFolder['fulltextclient'] = xmlRpcClient
            templateFolder._setObject('fulltextclient',textServer)
        except Exception, e:
            logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
        try:
            from Products.zogiLib.zogiLib import zogiLib
            zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
            #templateFolder['zogilib'] = zogilib
            templateFolder._setObject('zogilib',zogilib)
        except Exception, e:
            logging.error("Unable to create zogiLib for zogilib: "+str(e))
        
        
    # proxy text server methods to fulltextclient
    def getTextPage(self, **args):
        """get page"""
        return self.template.fulltextclient.getTextPage(**args)

    def getOrigPages(self, **args):
        """get page"""
        return self.template.fulltextclient.getOrigPages(**args)
    
    def getOrigPagesNorm(self, **args):
        """get page"""
        return self.template.fulltextclient.getOrigPagesNorm(**args)

    def getQuery(self, **args):
        """get query in search"""
        return self.template.fulltextclient.getQuery(**args)
     
    def getSearch(self, **args):
        """get search"""
        return self.template.fulltextclient.getSearch(**args)
    
    def getGisPlaces(self, **args):
        """get gis places"""
        return self.template.fulltextclient.getGisPlaces(**args)
 
    def getAllGisPlaces(self, **args):
        """get all gis places """
        return self.template.fulltextclient.getAllGisPlaces(**args)
       
    def getTranslate(self, **args):
        """get translate"""
        return self.template.fulltextclient.getTranslate(**args)

    def getLemma(self, **args):
        """get lemma"""
        return self.template.fulltextclient.getLemma(**args)

    def getLemmaQuery(self, **args):
        """get query"""
        return self.template.fulltextclient.getLemmaQuery(**args)

    def getLex(self, **args):
        """get lex"""
        return self.template.fulltextclient.getLex(**args)

    def getToc(self, **args):
        """get toc"""
        return self.template.fulltextclient.getToc(**args)

    def getTocPage(self, **args):
        """get tocpage"""
        return self.template.fulltextclient.getTocPage(**args)

    
    security.declareProtected('View','thumbs_rss')
    def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
        '''
        view it
        @param mode: defines how to access the document behind url 
        @param url: url which contains display information
        @param viewMode: if images display images, if text display text, default is images (text,images or auto)
        
        '''
        logging.debug("HHHHHHHHHHHHHH:load the rss")
        logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
        
        if not hasattr(self, 'template'):
            # create template folder if it doesn't exist
            self.manage_addFolder('template')
            
        if not self.digilibBaseUrl:
            self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
            
        docinfo = self.getDocinfo(mode=mode,url=url)
        #pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
        pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
        ''' ZDES '''
        pt = getattr(self.template, 'thumbs_main_rss')
        
        if viewMode=="auto": # automodus gewaehlt
            if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
                viewMode="text"
            else:
                viewMode="images"
               
        return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
  
    security.declareProtected('View','index_html')
    def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
        '''
        view it
        @param mode: defines how to access the document behind url 
        @param url: url which contains display information
        @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
        @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
        @param characterNormalization type of text display (reg, norm, none)
        @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
        '''
        
        logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
        
        if not hasattr(self, 'template'):
            # this won't work
            logging.error("template folder missing!")
            return "ERROR: template folder missing!"
            
        if not getattr(self, 'digilibBaseUrl', None):
            self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
            
        docinfo = self.getDocinfo(mode=mode,url=url)
        
        if tocMode != "thumbs":
            # get table of contents
            docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
            
        if viewMode=="auto": # automodus gewaehlt
            if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
                viewMode="text_dict"
            else:
                viewMode="images"
                
        pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
        
        if (docinfo.get('textURLPath',None)):
            page = self.getTextPage(mode=viewMode, docinfo=docinfo, pageinfo=pageinfo)
            pageinfo['textPage'] = page
        tt = getattr(self, 'template')   
        pt = getattr(tt, 'viewer_main')               
        return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
  
    def generateMarks(self,mk):
        ret=""
        if mk is None:
            return ""
        if not isinstance(mk, list):
            mk=[mk]
        for m in mk:
            ret+="mk=%s"%m
        return ret
    
    
    def getBrowser(self):
        """getBrowser the version of browser """
        bt = browserCheck(self)
        logging.debug("BROWSER VERSION: %s"%(bt))
        return bt
        
    def findDigilibUrl(self):
        """try to get the digilib URL from zogilib"""
        url = self.template.zogilib.getDLBaseUrl()
        return url

    def getDocumentViewerURL(self):
        """returns the URL of this instance"""
        return self.absolute_url()
    
    def getStyle(self, idx, selected, style=""):
        """returns a string with the given style and append 'sel' if path == selected."""
        #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
        if idx == selected:
            return style + 'sel'
        else:
            return style
    
    def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
        """returns URL to documentviewer with parameter param set to val or from dict params"""
        # copy existing request params
        urlParams=self.REQUEST.form.copy()
        # change single param
        if param is not None:
            if val is None:
                if urlParams.has_key(param):
                    del urlParams[param]
            else:
                urlParams[param] = str(val)
                
        # change more params
        if params is not None:
            for k in params.keys():
                v = params[k]
                if v is None:
                    # val=None removes param
                    if urlParams.has_key(k):
                        del urlParams[k]
                        
                else:
                    urlParams[k] = v

        # FIXME: does this belong here?
        if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
                urlParams["mode"] = "imagepath"
                urlParams["url"] = getParentDir(urlParams["url"])
                
        # quote values and assemble into query string (not escaping '/')
        ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
        #ps = urllib.urlencode(urlParams)
        if baseUrl is None:
            baseUrl = self.REQUEST['URL1']
            
        url = "%s?%s"%(baseUrl, ps)
        return url


    def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
        """link to documentviewer with parameter param set to val"""
        return self.getLink(param, val, params, baseUrl, '&amp;')
    
    def getInfo_xml(self,url,mode):
        """returns info about the document as XML"""

        if not self.digilibBaseUrl:
            self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
        
        docinfo = self.getDocinfo(mode=mode,url=url)
        pt = getattr(self.template, 'info_xml')
        return pt(docinfo=docinfo)

    def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
        """returns new option state"""
        if not self.REQUEST.SESSION.has_key(optionName):
            # not in session -- initial
            opt = {'lastState': newState, 'state': initialState}
        else:
            opt = self.REQUEST.SESSION.get(optionName)
            if opt['lastState'] != newState:
                # state in session has changed -- toggle
                opt['state'] = not opt['state']
                opt['lastState'] = newState
        
        self.REQUEST.SESSION[optionName] = opt
        return opt['state']
    
    def isAccessible(self, docinfo):
        """returns if access to the resource is granted"""
        access = docinfo.get('accessType', None)
        logging.debug("documentViewer (accessOK) access type %s"%access)
        if access is not None and access == 'free':
            logging.debug("documentViewer (accessOK) access is free")
            return True
        elif access is None or access in self.authgroups:
            # only local access -- only logged in users
            user = getSecurityManager().getUser()
            logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
            if user is not None:
                #print "user: ", user
                return (user.getUserName() != "Anonymous User")
            else:
                return False
        
        logging.error("documentViewer (accessOK) unknown access type %s"%access)
        return False
    
                
    def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
        """gibt param von dlInfo aus"""
        if docinfo is None:
            docinfo = {}
        
        for x in range(cut):
               
                path=getParentDir(path)
       
        infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
    
        logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
        
        txt = getHttpData(infoUrl)
        if txt is None:
            raise IOError("Unable to get dir-info from %s"%(infoUrl))

        dom = ET.fromstring(txt)
        #dom = Parse(txt)
        size=getText(dom.find("size"))
        #sizes=dom.xpath("//dir/size")
        logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)
        
        if size:
            docinfo['numPages'] = int(size)
        else:
            docinfo['numPages'] = 0
            
        # TODO: produce and keep list of image names and numbers
                        
        return docinfo
    
    def getIndexMetaPath(self,url):
        """gib nur den Pfad zurueck"""
        regexp = re.compile(r".*(experimental|permanent)/(.*)")
        regpath = regexp.match(url)
        if (regpath==None):
            return ""
        logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))            
        return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
     
    
    
    def getIndexMetaUrl(self,url):
        """returns utr  of index.meta document at url"""
      
        metaUrl = None
        if url.startswith("http://"):
            # real URL
            metaUrl = url
        else:
            # online path
            server=self.digilibBaseUrl+"/servlet/Texter?fn="
            metaUrl=server+url.replace("/mpiwg/online","")
            if not metaUrl.endswith("index.meta"):
                metaUrl += "/index.meta"
        
        return metaUrl
    
    def getDomFromIndexMeta(self, url):
        """get dom from index meta"""
        dom = None
        metaUrl = self.getIndexMetaUrl(url)
                
        logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
        txt=getHttpData(metaUrl)
        if txt is None:
            raise IOError("Unable to read index meta from %s"%(url))
        
        dom = ET.fromstring(txt)
        #dom = Parse(txt)
        return dom
    
    def getPresentationInfoXML(self, url):
        """returns dom of info.xml document at url"""
        dom = None
        metaUrl = None
        if url.startswith("http://"):
            # real URL
            metaUrl = url
        else:
            # online path
            server=self.digilibBaseUrl+"/servlet/Texter?fn="
            metaUrl=server+url.replace("/mpiwg/online","")
        
        txt=getHttpData(metaUrl)
        if txt is None:
            raise IOError("Unable to read infoXMLfrom %s"%(url))
            
        dom = ET.fromstring(txt)
        #dom = Parse(txt)
        return dom
                        
        
    def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
        """gets authorization info from the index.meta file at path or given by dom"""
        logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
        
        access = None
        
        if docinfo is None:
            docinfo = {}
            
        if dom is None:
            for x in range(cut):
                path=getParentDir(path)
            dom = self.getDomFromIndexMeta(path)
       
        acc = dom.find(".//access-conditions/access")
        if acc is not None:
            acctype = acc.get('type')
            #acctype = dom.xpath("//access-conditions/access/@type")
            if acctype:
                access=acctype
                if access in ['group', 'institution']:
                    access = dom.find(".//access-conditions/access/name").text.lower()
            
        docinfo['accessType'] = access
        return docinfo
    
        
    def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
        """gets bibliographical info from the index.meta file at path or given by dom"""
        logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
        
        if docinfo is None:
            docinfo = {}
        
        if dom is None:
            for x in range(cut):
                path=getParentDir(path)
            dom = self.getDomFromIndexMeta(path)
        
        docinfo['indexMetaPath']=self.getIndexMetaPath(path);
        
        logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
        # put in all raw bib fields as dict "bib"
        bib = dom.find(".//bib/*")
        #bib = dom.xpath("//bib/*")
        if bib and len(bib)>0:
            bibinfo = {}
            for e in bib:
                bibinfo[e.localName] = getTextFromNode(e)
            docinfo['bib'] = bibinfo
        
        # extract some fields (author, title, year) according to their mapping
        metaData=self.metadata.main.meta.bib
        bib = dom.find(".//bib")
        bibtype=bib.get("type")
        #bibtype=dom.xpath("//bib/@type")
        if not bibtype:
            bibtype="generic"
            
        bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
        docinfo['bib_type'] = bibtype
        bibmap=metaData.generateMappingForType(bibtype)
        logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
        logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
        # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
        if len(bibmap) > 0 and len(bibmap['author'][0]) > 0 or len(bibmap['title'][0]) > 0:
            try:
                docinfo['author']=getText(bib.find(bibmap['author'][0]))
            except: pass
            try:
                docinfo['title']=getText(bib.find(bibmap['title'][0]))
            except: pass
            try:
                docinfo['year']=getText(bib.find(bibmap['year'][0]))
            except: pass
            
            # ROC: why is this here?
            #            logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
            #            try:
            #                docinfo['lang']=getTextFromNode(dom.find(".//bib/lang")[0])
            #            except:
            #                docinfo['lang']=''
            #            try:
            #                docinfo['city']=getTextFromNode(dom.find(".//bib/city")[0])
            #            except:
            #                docinfo['city']=''
            #            try:
            #                docinfo['number_of_pages']=getTextFromNode(dom.find(".//bib/number_of_pages")[0])
            #            except:
            #                docinfo['number_of_pages']=''
            #            try:
            #                docinfo['series_volume']=getTextFromNode(dom.find(".//bib/series_volume")[0])
            #            except:
            #                docinfo['series_volume']=''
            #            try:
            #                docinfo['number_of_volumes']=getTextFromNode(dom.find(".//bib/number_of_volumes")[0])
            #            except:
            #                docinfo['number_of_volumes']=''
            #            try:
            #                docinfo['translator']=getTextFromNode(dom.find(".//bib/translator")[0])
            #            except:
            #                docinfo['translator']=''
            #            try:
            #                docinfo['edition']=getTextFromNode(dom.find(".//bib/edition")[0])
            #            except:
            #                docinfo['edition']=''
            #            try:
            #                docinfo['series_author']=getTextFromNode(dom.find(".//bib/series_author")[0])
            #            except:
            #                docinfo['series_author']=''
            #            try:
            #                docinfo['publisher']=getTextFromNode(dom.find(".//bib/publisher")[0])
            #            except:
            #                docinfo['publisher']=''
            #            try:
            #                docinfo['series_title']=getTextFromNode(dom.find(".//bib/series_title")[0])
            #            except:
            #                docinfo['series_title']=''
            #            try:
            #                docinfo['isbn_issn']=getTextFromNode(dom.find(".//bib/isbn_issn")[0])
            #            except:
            #                docinfo['isbn_issn']=''           
        return docinfo
    
     
    def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
        """gets name info from the index.meta file at path or given by dom"""
        if docinfo is None:
            docinfo = {}
        
        if dom is None:
            for x in range(cut):
                path=getParentDir(path)
            dom = self.getDomFromIndexMeta(path)

        docinfo['name']=getText(dom.find("name"))
        logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
        return docinfo
    
    def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
        """parse texttool tag in index meta"""
        logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
        if docinfo is None:
           docinfo = {}
        if docinfo.get('lang', None) is None:
            docinfo['lang'] = '' # default keine Sprache gesetzt
        if dom is None:
            dom = self.getDomFromIndexMeta(url)
        
        archivePath = None
        archiveName = None
    
        archiveName = getTextFromNode(dom.find("name"))
        if not archiveName:
            logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
        
        archivePath = getTextFromNode(dom.find("archive-path"))
        if archivePath:
            # clean up archive path
            if archivePath[0] != '/':
                archivePath = '/' + archivePath
            if archiveName and (not archivePath.endswith(archiveName)):
                archivePath += "/" + archiveName
        else:
            # try to get archive-path from url
            logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
            if (not url.startswith('http')):
                archivePath = url.replace('index.meta', '')
                
        if archivePath is None:
            # we balk without archive-path
            raise IOError("Missing archive-path (for text-tool) in %s" % (url))
        
        imageDir = getText(dom.find(".//texttool/image"))
            
        if not imageDir:
            # we balk with no image tag / not necessary anymore because textmode is now standard
            #raise IOError("No text-tool info in %s"%(url))
            imageDir = ""
            #xquery="//pb"  
            docinfo['imagePath'] = "" # keine Bilder
            docinfo['imageURL'] = ""
            
        if imageDir and archivePath:
            #print "image: ", imageDir, " archivepath: ", archivePath
            imageDir = os.path.join(archivePath, imageDir)
            imageDir = imageDir.replace("/mpiwg/online", '')
            docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
            docinfo['imagePath'] = imageDir
            
            docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
            
        viewerUrl = getText(dom.find(".//texttool/digiliburlprefix"))
        if viewerUrl:
            docinfo['viewerURL'] = viewerUrl
        
        # old style text URL
        textUrl = getText(dom.find(".//texttool/text"))
        if textUrl:
            if urlparse.urlparse(textUrl)[0] == "": #keine url
                textUrl = os.path.join(archivePath, textUrl) 
            # fix URLs starting with /mpiwg/online
            if textUrl.startswith("/mpiwg/online"):
                textUrl = textUrl.replace("/mpiwg/online", '', 1)
            
            docinfo['textURL'] = textUrl
    
        # new style text-url-path
        textUrl = getText(dom.find(".//texttool/text-url-path"))
        if textUrl:
            docinfo['textURLPath'] = textUrl
            textUrlkurz = string.split(textUrl, ".")[0]
            docinfo['textURLPathkurz'] = textUrlkurz
            #if not docinfo['imagePath']:
                # text-only, no page images
                #docinfo = self.getNumTextPages(docinfo)
                  
         
        presentationUrl = getText(dom.find(".//texttool/presentation"))
        docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
        docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
        
        
        if presentationUrl: # ueberschreibe diese durch presentation informationen 
             # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
             # durch den relativen Pfad auf die presentation infos
            presentationPath = presentationUrl
            if url.endswith("index.meta"): 
                presentationUrl = url.replace('index.meta', presentationPath)
            else:
                presentationUrl = url + "/" + presentationPath
                
            docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
    
        docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info
        
        return docinfo
   
   
    def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
        """gets the bibliographical information from the preseantion entry in texttools
        """
        dom=self.getPresentationInfoXML(url)
        try:
            docinfo['author']=getText(dom.find(".//author"))
        except:
            pass
        try:
            docinfo['title']=getText(dom.find(".//title"))
        except:
            pass
        try:
            docinfo['year']=getText(dom.find(".//date"))
        except:
            pass
        return docinfo
    
    def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
        """path ist the path to the images it assumes that the index.meta file is one level higher."""
        logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
        if docinfo is None:
            docinfo = {}
        path=path.replace("/mpiwg/online","")
        docinfo['imagePath'] = path
        docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
        
        pathorig=path
        for x in range(cut):       
                path=getParentDir(path)
        logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
        imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
        docinfo['imageURL'] = imageUrl
        
        #path ist the path to the images it assumes that the index.meta file is one level higher.
        docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
        docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
        return docinfo
    
    
    def getDocinfo(self, mode, url):
        """returns docinfo depending on mode"""
        logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
        # look for cached docinfo in session
        if self.REQUEST.SESSION.has_key('docinfo'):
            docinfo = self.REQUEST.SESSION['docinfo']
            # check if its still current
            if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
                logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
                return docinfo
        # new docinfo
        docinfo = {'mode': mode, 'url': url}
        if mode=="texttool": #index.meta with texttool information
            docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
        elif mode=="imagepath":
            docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
        elif mode=="filepath":
            docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
        else:
            logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
            raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                
        # FIXME: fake texturlpath 
        if not docinfo.has_key('textURLPath'):
            docinfo['textURLPath'] = None
        
        logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
        #logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
        self.REQUEST.SESSION['docinfo'] = docinfo
        return docinfo
               
    def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
        """returns pageinfo with the given parameters"""
        pageinfo = {}
        current = getInt(current)
    
        pageinfo['current'] = current
        rows = int(rows or self.thumbrows)
        pageinfo['rows'] = rows
        cols = int(cols or self.thumbcols)
        pageinfo['cols'] = cols
        grpsize = cols * rows
        pageinfo['groupsize'] = grpsize
        start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
        # int(current / grpsize) * grpsize +1))
        pageinfo['start'] = start
        pageinfo['end'] = start + grpsize
        if (docinfo is not None) and ('numPages' in docinfo):
            np = int(docinfo['numPages'])
            pageinfo['end'] = min(pageinfo['end'], np)
            pageinfo['numgroups'] = int(np / grpsize)
            if np % grpsize > 0:
                pageinfo['numgroups'] += 1        
        pageinfo['viewMode'] = viewMode
        pageinfo['tocMode'] = tocMode
        pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
        #pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1')
        pageinfo['query'] = self.REQUEST.get('query','') 
        pageinfo['queryType'] = self.REQUEST.get('queryType','')
        pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
        pageinfo['textPN'] = self.REQUEST.get('textPN','1')
        pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
        pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
        pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
        pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')     
        toc = int (pageinfo['tocPN'])
        pageinfo['textPages'] =int (toc)
        
        if 'tocSize_%s'%tocMode in docinfo:
            tocSize = int(docinfo['tocSize_%s'%tocMode])
            tocPageSize = int(pageinfo['tocPageSize'])
            # cached toc           
            if tocSize%tocPageSize>0:
                tocPages=tocSize/tocPageSize+1
            else:
                tocPages=tocSize/tocPageSize
            pageinfo['tocPN'] = min (tocPages,toc)                    
        pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
        pageinfo['sn'] =self.REQUEST.get('sn','')
        return pageinfo
    
def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
        """init document viewer"""
        self.title=title
        self.digilibBaseUrl = digilibBaseUrl
        self.thumbrows = thumbrows
        self.thumbcols = thumbcols
        self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
        if RESPONSE is not None:
            RESPONSE.redirect('manage_main')
        
def manage_AddDocumentViewerForm(self):
    """add the viewer form"""
    pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
    return pt()
  
def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
    """add the viewer"""
    newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
    self._setObject(id,newObj)
    
    if RESPONSE is not None:
        RESPONSE.redirect('manage_main')

## DocumentViewerTemplate class
class DocumentViewerTemplate(ZopePageTemplate):
    """Template for document viewer"""
    meta_type="DocumentViewer Template"


def manage_addDocumentViewerTemplateForm(self):
    """Form for adding"""
    pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
    return pt()

def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
                           REQUEST=None, submit=None):
    "Add a Page Template with optional file content."

    self._setObject(id, DocumentViewerTemplate(id))
    ob = getattr(self, id)
    txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
    logging.info("txt %s:"%txt)
    ob.pt_edit(txt,"text/html")
    if title:
        ob.pt_setTitle(title)
    try:
        u = self.DestinationURL()
    except AttributeError:
        u = REQUEST['URL1']
        
    u = "%s/%s" % (u, urllib.quote(id))
    REQUEST.RESPONSE.redirect(u+'/manage_main')
    return ''


    

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>