view documentViewer.py @ 487:8fd6f26fe29e elementtree

more new templates
author casties
date Tue, 23 Aug 2011 15:05:11 +0200
parents f2c5417b7ff4
children ec3d848fe9e8
line wrap: on
line source

from OFS.Folder import Folder
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from App.ImageFile import ImageFile
#from Products.ZSimpleFile.ZSimpleFile import ZSimpleFile
from AccessControl import ClassSecurityInfo
from AccessControl import getSecurityManager
from Globals import package_home

#from Ft.Xml import EMPTY_NAMESPACE, Parse 
#import Ft.Xml.Domlette

import xml.etree.ElementTree as ET

import os.path
import sys
import urllib
import logging
import math
import urlparse 
import re
import string

from SrvTxtUtils import getInt, getText, getHttpData

def logger(txt,method,txt2):
    """logging"""
    logging.info(txt+ txt2)
    
    
def serializeNode(node, encoding="utf-8"):
    """returns a string containing node as XML"""
    s = ET.tostring(node)
    
    # 4Suite:
    #    stream = cStringIO.StringIO()
    #    Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
    #    s = stream.getvalue()
    #    stream.close()
    return s

def browserCheck(self):
    """check the browsers request to find out the browser type"""
    bt = {}
    ua = self.REQUEST.get_header("HTTP_USER_AGENT")
    bt['ua'] = ua
    bt['isIE'] = False
    bt['isN4'] = False
    bt['versFirefox']=""
    bt['versIE']=""
    bt['versSafariChrome']=""
    bt['versOpera']=""
    
    if string.find(ua, 'MSIE') > -1:
        bt['isIE'] = True
    else:
        bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
    # Safari oder Chrome identification    
    try:
        nav = ua[string.find(ua, '('):]
        nav1=ua[string.find(ua,')'):]
        nav2=nav1[string.find(nav1,'('):]
        nav3=nav2[string.find(nav2,')'):]
        ie = string.split(nav, "; ")[1]
        ie1 =string.split(nav1, " ")[2]
        ie2 =string.split(nav3, " ")[1]
        ie3 =string.split(nav3, " ")[2]
        if string.find(ie3, "Safari") >-1:
            bt['versSafariChrome']=string.split(ie2, "/")[1]
    except: pass
    # IE identification
    try:
        nav = ua[string.find(ua, '('):]
        ie = string.split(nav, "; ")[1]
        if string.find(ie, "MSIE") > -1:
            bt['versIE'] = string.split(ie, " ")[1]
    except:pass
    # Firefox identification
    try:
        nav = ua[string.find(ua, '('):]
        nav1=ua[string.find(ua,')'):]
        if string.find(ie1, "Firefox") >-1:
            nav5= string.split(ie1, "/")[1]
            logging.debug("FIREFOX: %s"%(nav5))
            bt['versFirefox']=nav5[0:3]                   
    except:pass
    #Opera identification
    try:
        if string.find(ua,"Opera") >-1:
            nav = ua[string.find(ua, '('):]
            nav1=nav[string.find(nav,')'):]
            bt['versOpera']=string.split(nav1,"/")[2]
    except:pass
    
    bt['isMac'] = string.find(ua, 'Macintosh') > -1
    bt['isWin'] = string.find(ua, 'Windows') > -1
    bt['isIEWin'] = bt['isIE'] and bt['isWin']
    bt['isIEMac'] = bt['isIE'] and bt['isMac']
    bt['staticHTML'] = False

    return bt

def getParentPath(path, cnt=1):
    """returns pathname shortened by cnt"""
    # make sure path doesn't end with /
    path = path.rstrip('/')
    # split by /, shorten, and reassemble
    return '/'.join(path.split('/')[0:-cnt])


##
## documentViewer class
##
class documentViewer(Folder):
    """document viewer"""
    meta_type="Document viewer"
    
    security=ClassSecurityInfo()
    manage_options=Folder.manage_options+(
        {'label':'main config','action':'changeDocumentViewerForm'},
        )
    
    metadataService = None
    """MetaDataFolder instance"""

    # templates and forms
    viewer_text = PageTemplateFile('zpt/viewer_text', globals())
    viewer_images = PageTemplateFile('zpt/viewer_images', globals())
    viewer_main = PageTemplateFile('zpt/viewer_main', globals())
    toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
    toc_text = PageTemplateFile('zpt/toc_text', globals())
    toc_figures = PageTemplateFile('zpt/toc_figures', globals())
    page_main_images = PageTemplateFile('zpt/page_main_images', globals())
    page_main_double = PageTemplateFile('zpt/page_main_double', globals())
    page_main_text = PageTemplateFile('zpt/page_main_text', globals())
    page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
    page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
    page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
    page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
    head_main = PageTemplateFile('zpt/head_main', globals())
    info_xml = PageTemplateFile('zpt/info_xml', globals())
    # TODO: can this be nicer?
    docuviewer_css = ImageFile('css/docuviewer.css',globals())
    
    
    thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())

    
    def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
        """init document viewer"""
        self.id=id
        self.title=title
        self.thumbcols = thumbcols
        self.thumbrows = thumbrows
        # authgroups is list of authorized groups (delimited by ,)
        self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
        # create template folder so we can always use template.something
        
        templateFolder = Folder('template')
        #self['template'] = templateFolder # Zope-2.12 style
        self._setObject('template',templateFolder) # old style
        try:
            import MpdlXmlTextServer
            textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
            #templateFolder['fulltextclient'] = xmlRpcClient
            templateFolder._setObject('fulltextclient',textServer)
        except Exception, e:
            logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
            
        try:
            from Products.zogiLib.zogiLib import zogiLib
            zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
            #templateFolder['zogilib'] = zogilib
            templateFolder._setObject('zogilib',zogilib)
        except Exception, e:
            logging.error("Unable to create zogiLib for zogilib: "+str(e))
            
        try:
            # assume MetaDataFolder instance is called metadata 
            self.metadataService = getattr(self, 'metadata')
        except Exception, e:
            logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
            
        if digilibBaseUrl is not None:
            self.digilibBaseUrl = digilibBaseUrl
            
        
    # proxy text server methods to fulltextclient
    def getTextPage(self, **args):
        """get page"""
        return self.template.fulltextclient.getTextPage(**args)

    def getOrigPages(self, **args):
        """get page"""
        return self.template.fulltextclient.getOrigPages(**args)
    
    def getOrigPagesNorm(self, **args):
        """get page"""
        return self.template.fulltextclient.getOrigPagesNorm(**args)

    def getQuery(self, **args):
        """get query in search"""
        return self.template.fulltextclient.getQuery(**args)
     
    def getSearch(self, **args):
        """get search"""
        return self.template.fulltextclient.getSearch(**args)
    
    def getGisPlaces(self, **args):
        """get gis places"""
        return self.template.fulltextclient.getGisPlaces(**args)
 
    def getAllGisPlaces(self, **args):
        """get all gis places """
        return self.template.fulltextclient.getAllGisPlaces(**args)
       
    def getWordInfo(self, **args):
        """get translate"""
        return self.template.fulltextclient.getWordInfo(**args)

    def getLemma(self, **args):
        """get lemma"""
        return self.template.fulltextclient.getLemma(**args)

    def getLemmaQuery(self, **args):
        """get query"""
        return self.template.fulltextclient.getLemmaQuery(**args)

    def getLex(self, **args):
        """get lex"""
        return self.template.fulltextclient.getLex(**args)

    def getToc(self, **args):
        """get toc"""
        return self.template.fulltextclient.getToc(**args)

    def getTocPage(self, **args):
        """get tocpage"""
        return self.template.fulltextclient.getTocPage(**args)

    
    security.declareProtected('View','thumbs_rss')
    def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
        '''
        view it
        @param mode: defines how to access the document behind url 
        @param url: url which contains display information
        @param viewMode: if images display images, if text display text, default is images (text,images or auto)
        
        '''
        logging.debug("HHHHHHHHHHHHHH:load the rss")
        logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
        
        if not hasattr(self, 'template'):
            # create template folder if it doesn't exist
            self.manage_addFolder('template')
            
        if not self.digilibBaseUrl:
            self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
            
        docinfo = self.getDocinfo(mode=mode,url=url)
        #pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
        pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
        ''' ZDES '''
        pt = getattr(self.template, 'thumbs_main_rss')
        
        if viewMode=="auto": # automodus gewaehlt
            if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
                viewMode="text"
            else:
                viewMode="images"
               
        return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)

  
    security.declareProtected('View','index_html')
    def index_html(self,url,mode="texttool",viewMode="auto",viewType=None,tocMode="thumbs",start=1,pn=1):
        """
        view page
        @param url: url which contains display information
        @param mode: defines how to access the document behind url 
        @param viewMode: 'images': display images, 'text': display text, default is 'auto'
        @param viewType: sub-type of viewMode, e.g. 'dict' for viewMode='text'
        @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
        """
        
        logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewType=%s start=%s pn=%s"%(mode,url,viewMode,viewType,start,pn))
        
        if not hasattr(self, 'template'):
            # this won't work
            logging.error("template folder missing!")
            return "ERROR: template folder missing!"
            
        if not getattr(self, 'digilibBaseUrl', None):
            self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
            
        docinfo = self.getDocinfo(mode=mode,url=url)
        
        if tocMode != "thumbs":
            # get table of contents
            docinfo = self.getToc(mode=tocMode, docinfo=docinfo)

        # auto viewMode: text if there is a text else images
        if viewMode=="auto": 
            if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 
                viewMode = "text"
                viewType = "dict"
            else:
                viewMode = "images"
                
        elif viewMode == "text_dict":
            # legacy fix
            viewMode = "text"
            viewType = "dict"
            
        # stringify viewType
        if isinstance(viewType, list):
            logging.debug("index_html: viewType is list:%s"%viewType)
            viewType = ','.join([t for t in viewType if t])
                        
        pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewType=viewType, tocMode=tocMode)
                    
        # get template /template/viewer_$viewMode
        pt = getattr(self.template, 'viewer_%s'%viewMode, None)
        if pt is None:
            logging.error("No template for viewMode=%s!"%viewMode)
            # TODO: error page?
            return "No template for viewMode=%s!"%viewMode
        
        # and execute with parameters
        return pt(docinfo=docinfo, pageinfo=pageinfo)
  
    def generateMarks(self,mk):
        ret=""
        if mk is None:
            return ""
        if not isinstance(mk, list):
            mk=[mk]
        for m in mk:
            ret+="mk=%s"%m
        return ret
    
    
    def getBrowser(self):
        """getBrowser the version of browser """
        bt = browserCheck(self)
        logging.debug("BROWSER VERSION: %s"%(bt))
        return bt
        
    def findDigilibUrl(self):
        """try to get the digilib URL from zogilib"""
        url = self.template.zogilib.getDLBaseUrl()
        return url
    
    def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
        """returns URL to digilib Scaler with params"""
        url = None
        if docinfo is not None:
            url = docinfo.get('imageURL', None)
            
        if url is None:
            url = "%s/servlet/Scaler?"%self.digilibBaseUrl
            if fn is None and docinfo is not None:
                fn = docinfo.get('imagePath','')
            
            url += "fn=%s"%fn
            
        if pn:
            url += "&pn=%s"%pn
            
        url += "&dw=%s&dh=%s"%(dw,dh)
        return url

    def getDocumentViewerURL(self):
        """returns the URL of this instance"""
        return self.absolute_url()
    
    def getStyle(self, idx, selected, style=""):
        """returns a string with the given style and append 'sel' if idx == selected."""
        #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
        if idx == selected:
            return style + 'sel'
        else:
            return style
    
    def getParams(self, param=None, val=None, params=None, duplicates=None):
        """returns dict with URL parameters.
        
        Takes URL parameters and additionally param=val or dict params.
        Deletes key if value is None."""
        # copy existing request params
        newParams=self.REQUEST.form.copy()
        # change single param
        if param is not None:
            if val is None:
                if newParams.has_key(param):
                    del newParams[param]
            else:
                newParams[param] = str(val)
                
        # change more params
        if params is not None:
            for (k, v) in params.items():
                if v is None:
                    # val=None removes param
                    if newParams.has_key(k):
                        del newParams[k]
                        
                else:
                    newParams[k] = v

        if duplicates:
            # eliminate lists (coming from duplicate keys)
            for (k,v) in newParams.items():
                if isinstance(v, list):
                    if duplicates == 'comma':
                        # make comma-separated list of non-empty entries
                        newParams[k] = ','.join([t for t in v if t])
                    elif duplicates == 'first':
                        # take first non-empty entry
                        newParams[k] = [t for t in v if t][0]
        
        return newParams
    
    def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
        """returns URL to documentviewer with parameter param set to val or from dict params"""
        urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
        # quote values and assemble into query string (not escaping '/')
        ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()])
        if baseUrl is None:
            baseUrl = self.getDocumentViewerURL()
            
        url = "%s?%s"%(baseUrl, ps)
        return url

    def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
        """link to documentviewer with parameter param set to val"""
        return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
    
    
    def getInfo_xml(self,url,mode):
        """returns info about the document as XML"""
        if not self.digilibBaseUrl:
            self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
        
        docinfo = self.getDocinfo(mode=mode,url=url)
        pt = getattr(self.template, 'info_xml')
        return pt(docinfo=docinfo)

    def isAccessible(self, docinfo):
        """returns if access to the resource is granted"""
        access = docinfo.get('accessType', None)
        logging.debug("documentViewer (accessOK) access type %s"%access)
        if access == 'free':
            logging.debug("documentViewer (accessOK) access is free")
            return True
        
        elif access is None or access in self.authgroups:
            # only local access -- only logged in users
            user = getSecurityManager().getUser()
            logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
            if user is not None:
                #print "user: ", user
                return (user.getUserName() != "Anonymous User")
            else:
                return False
        
        logging.error("documentViewer (accessOK) unknown access type %s"%access)
        return False
    


    def getDocinfo(self, mode, url):
        """returns docinfo depending on mode"""
        logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
        # look for cached docinfo in session
        if self.REQUEST.SESSION.has_key('docinfo'):
            docinfo = self.REQUEST.SESSION['docinfo']
            # check if its still current
            if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
                logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
                return docinfo
            
        # new docinfo
        docinfo = {'mode': mode, 'url': url}
        # add self url
        docinfo['viewerUrl'] = self.getDocumentViewerURL()
        docinfo['digilibBaseUrl'] = self.digilibBaseUrl
        # get index.meta DOM
        docUrl = None
        metaDom = None
        if mode=="texttool": 
            # url points to document dir or index.meta
            metaDom = self.metadataService.getDomFromPathOrUrl(url)
            docUrl = url.replace('/index.meta', '')
            if metaDom is None:
                raise IOError("Unable to find index.meta for mode=texttool!")

        elif mode=="imagepath":
            # url points to folder with images, index.meta optional
            # asssume index.meta in parent dir
            docUrl = getParentPath(url)
            metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)

        elif mode=="filepath":
            # url points to image file, index.meta optional
            # asssume index.meta is two path segments up
            docUrl = getParentPath(url, 2)
            metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)

        else:
            logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
            raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
        
        docinfo['documentUrl'] = docUrl
        # process index.meta contents
        if metaDom is not None and metaDom.tag == 'resource':
            # document directory name and path
            resource = self.metadataService.getResourceData(dom=metaDom)
            if resource:
                docinfo = self.getDocinfoFromResource(docinfo, resource)

            # texttool info
            texttool = self.metadataService.getTexttoolData(dom=metaDom)
            if texttool:
                docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
            
            # bib info
            bib = self.metadataService.getBibData(dom=metaDom)
            if bib:
                docinfo = self.getDocinfoFromBib(docinfo, bib)
            else:
                # no bib - try info.xml
                docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
                
            # auth info
            access = self.metadataService.getAccessData(dom=metaDom)
            if access:
                docinfo = self.getDocinfoFromAccess(docinfo, access)

            # attribution info
            attribution = self.metadataService.getAttributionData(dom=metaDom)
            if attribution:
                logging.debug("getDocinfo: attribution=%s"%repr(attribution))
                docinfo['attribution'] = attribution
                #docinfo = self.getDocinfoFromAccess(docinfo, access)

            # copyright info
            copyright = self.metadataService.getCopyrightData(dom=metaDom)
            if copyright:
                logging.debug("getDocinfo: copyright=%s"%repr(copyright))
                docinfo['copyright'] = copyright
                #docinfo = self.getDocinfoFromAccess(docinfo, access)

        # image path
        if mode != 'texttool':
            # override image path from texttool with url
            docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)

        # number of images from digilib
        if docinfo.get('imagePath', None):
            docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
            docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])

        logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
        #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
        # store in session
        self.REQUEST.SESSION['docinfo'] = docinfo
        return docinfo

    def getDocinfoFromResource(self, docinfo, resource):
        """reads contents of resource element into docinfo"""
        docName = resource.get('name', None)
        docinfo['documentName'] = docName
        docPath = resource.get('archive-path', None)
        if docPath:
            # clean up document path
            if docPath[0] != '/':
                docPath = '/' + docPath
                
            if docName and (not docPath.endswith(docName)):
                docPath += "/" + docName
            
        else:
            # use docUrl as docPath
            docUrl = docinfo['documentURL']
            if not docUrl.startswith('http:'):
                docPath = docUrl
        if docPath:
            # fix URLs starting with /mpiwg/online
            docPath = docPath.replace('/mpiwg/online', '', 1)

        docinfo['documentPath'] = docPath
        return docinfo

    def getDocinfoFromTexttool(self, docinfo, texttool):
        """reads contents of texttool element into docinfo"""
        # image dir
        imageDir = texttool.get('image', None)
        docPath = docinfo.get('documentPath', None)
        if imageDir and docPath:
            #print "image: ", imageDir, " archivepath: ", archivePath
            imageDir = os.path.join(docPath, imageDir)
            imageDir = imageDir.replace('/mpiwg/online', '', 1)
            docinfo['imagePath'] = imageDir
        
        # old style text URL
        textUrl = texttool.get('text', None)
        if textUrl and docPath:
            if urlparse.urlparse(textUrl)[0] == "": #keine url
                textUrl = os.path.join(docPath, textUrl) 
            
            docinfo['textURL'] = textUrl
    
        # new style text-url-path
        textUrl = texttool.get('text-url-path', None)
        if textUrl:
            docinfo['textURLPath'] = textUrl
            
        # page flow
        docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
            
        # odd pages are left
        docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
            
        # number of title page (0: not defined)
        docinfo['titlePage'] = texttool.get('title-scan-no', 0)
            
        # old presentation stuff
        presentation = texttool.get('presentation', None)
        if presentation and docPath:
            if presentation.startswith('http:'):
                docinfo['presentationUrl'] = presentation
            else:
                docinfo['presentationUrl'] = os.path.join(docPath, presentation)
            
        
        return docinfo

    def getDocinfoFromBib(self, docinfo, bib):
        """reads contents of bib element into docinfo"""
        logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
        # put all raw bib fields in dict "bib"
        docinfo['bib'] = bib
        bibtype = bib.get('@type', None)
        docinfo['bibType'] = bibtype
        # also store DC metadata for convenience
        dc = self.metadataService.getDCMappedData(bib)
        docinfo['creator'] = dc.get('creator',None)
        docinfo['title'] = dc.get('title',None)
        docinfo['date'] = dc.get('date',None)
        return docinfo
            
    def getDocinfoFromAccess(self, docinfo, acc):
        """reads contents of access element into docinfo"""
        #TODO: also read resource type
        logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
        try:
            acctype = acc['@attr']['type']
            if acctype:
                access=acctype
                if access in ['group', 'institution']:
                    access = acc['name'].lower()
                
                docinfo['accessType'] = access

        except:
            pass
        
        return docinfo

    def getDocinfoFromDigilib(self, docinfo, path):
        infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
        # fetch data
        txt = getHttpData(infoUrl)
        if not txt:
            logging.error("Unable to get dir-info from %s"%(infoUrl))
            return docinfo

        dom = ET.fromstring(txt)
        size = getText(dom.find("size"))
        logging.debug("getDocinfoFromDigilib: size=%s"%size)
        if size:
            docinfo['numPages'] = int(size)
        else:
            docinfo['numPages'] = 0
            
        # TODO: produce and keep list of image names and numbers
        return docinfo
            
            
    def getDocinfoFromPresentationInfoXml(self,docinfo):
        """gets DC-like bibliographical information from the presentation entry in texttools"""
        url = docinfo.get('presentationUrl', None)
        if not url:
            logging.error("getDocinfoFromPresentation: no URL!")
            return docinfo
        
        dom = None
        metaUrl = None
        if url.startswith("http://"):
            # real URL
            metaUrl = url
        else:
            # online path
            
            server=self.digilibBaseUrl+"/servlet/Texter?fn="
            metaUrl=server+url
        
        txt=getHttpData(metaUrl)
        if txt is None:
            logging.error("Unable to read info.xml from %s"%(url))
            return docinfo
            
        dom = ET.fromstring(txt)
        docinfo['creator']=getText(dom.find(".//author"))
        docinfo['title']=getText(dom.find(".//title"))
        docinfo['date']=getText(dom.find(".//date"))
        return docinfo
    

    def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewType=None, tocMode=None):
        """returns pageinfo with the given parameters"""
        logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewType=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewType,tocMode))
        pageinfo = {}
        pageinfo['viewMode'] = viewMode
        pageinfo['viewType'] = viewType
        pageinfo['tocMode'] = tocMode

        current = getInt(current)
        pageinfo['current'] = current
        pageinfo['pn'] = current
        rows = int(rows or self.thumbrows)
        pageinfo['rows'] = rows
        cols = int(cols or self.thumbcols)
        pageinfo['cols'] = cols
        grpsize = cols * rows
        pageinfo['groupsize'] = grpsize
        # is start is empty use one around current
        start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
        # int(current / grpsize) * grpsize +1))
        pageinfo['start'] = start
        
        np = int(docinfo.get('numPages', 0))
        if np == 0:
            # numPages unknown - maybe we can get it from text page
            if docinfo.get('textURLPath', None):
                # cache text page as well
                pageinfo['textPage'] = self.getTextPage(mode=viewType, pn=current, docinfo=docinfo, pageinfo=pageinfo)
                np = int(docinfo.get('numPages', 0))
                
        pageinfo['numgroups'] = int(np / grpsize)
        if np % grpsize > 0:
            pageinfo['numgroups'] += 1

        pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
        oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
        # add zeroth page for two columns
        pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
        pageinfo['pageZero'] = pageZero
        pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
                
        # TODO: do we need this here?
        pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
        pageinfo['query'] = self.REQUEST.get('query','') 
        pageinfo['queryType'] = self.REQUEST.get('queryType','')
        pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
        pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
        pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
        pageinfo['queryPageSize'] = getInt(self.REQUEST.get('queryPageSize', 10))
        pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1'))
        pageinfo['searchPN'] = getInt(self.REQUEST.get('searchPN','1'))
        
        # limit tocPN
        if 'tocSize_%s'%tocMode in docinfo:
            tocSize = docinfo['tocSize_%s'%tocMode]
            tocPageSize = pageinfo['tocPageSize']
            # cached toc           
            if tocSize%tocPageSize>0:
                tocPages=tocSize/tocPageSize+1
            else:
                tocPages=tocSize/tocPageSize
                
            pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN'])
            
        return pageinfo


    def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
        """returns dict with array of page informations for one screenfull of thumbnails"""
        batch = {}
        grpsize = rows * cols
        if maxIdx == 0:
            maxIdx = start + grpsize

        nb = int(math.ceil(maxIdx / float(grpsize)))
        # list of all batch start and end points
        batches = []
        if pageZero:
            ofs = 0
        else:
            ofs = 1
            
        for i in range(nb):
            s = i * grpsize + ofs
            e = min((i + 1) * grpsize + ofs - 1, maxIdx)
            batches.append({'start':s, 'end':e})
            
        batch['batches'] = batches

        pages = []
        if pageZero and start == 1:
            # correct beginning
            idx = 0
        else:
            idx = start
            
        for r in range(rows):
            row = []
            for c in range(cols):
                if idx < minIdx or idx > maxIdx:
                    page = {'idx':None}
                else:
                    page = {'idx':idx}
                    
                idx += 1
                if pageFlowLtr:
                    row.append(page)
                else:
                    row.insert(0, page) 
                
            pages.append(row)
            
        if start > 1:
            batch['prevStart'] = max(start - grpsize, 1)
        else:
            batch['prevStart'] = None
            
        if start + grpsize < maxIdx:
            batch['nextStart'] = start + grpsize
        else:
            batch['nextStart'] = None

        batch['pages'] = pages
        return batch
        
    def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
        """returns dict with information for one screenfull of data."""
        batch = {}
        if end == 0:
            end = start + size                    
            
        nb = int(math.ceil(end / float(size)))
        # list of all batch start and end points
        batches = []
        for i in range(nb):
            s = i * size + 1
            e = min((i + 1) * size, end)
            batches.append({'start':s, 'end':e})
            
        batch['batches'] = batches
        # list of elements in this batch
        this = []
        j = 0
        for i in range(start, min(start+size, end)):
            if data:
                if fullData:
                    d = data[i]
                else:
                    d = data[j]
                    j += 1
            
            else:
                d = i+1
                
            this.append(d)
            
        batch['this'] = this
        if start > 1:
            batch['prevStart'] = max(start - size, 1)
        else:
            batch['prevStart'] = None
            
        if start + size < end:
            batch['nextStart'] = start + size
        else:
            batch['nextStart'] = None
        
        return batch
        

    security.declareProtected('View management screens','changeDocumentViewerForm')    
    changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
    
    def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
        """init document viewer"""
        self.title=title
        self.digilibBaseUrl = digilibBaseUrl
        self.thumbrows = thumbrows
        self.thumbcols = thumbcols
        self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
        try:
            # assume MetaDataFolder instance is called metadata 
            self.metadataService = getattr(self, 'metadata')
        except Exception, e:
            logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))

        if RESPONSE is not None:
            RESPONSE.redirect('manage_main')
        
def manage_AddDocumentViewerForm(self):
    """add the viewer form"""
    pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
    return pt()
  
def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
    """add the viewer"""
    newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
    self._setObject(id,newObj)
    
    if RESPONSE is not None:
        RESPONSE.redirect('manage_main')