Mercurial > hg > documentViewer

from OFS.Folder import Folder
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from App.ImageFile import ImageFile
from AccessControl import ClassSecurityInfo
from AccessControl import getSecurityManager
from Globals import package_home

import xml.etree.ElementTree as ET

import os
import sys
import urllib
import logging
import math
import urlparse
import re
import string
import json

from Products.MetaDataProvider import MetaDataFolder

from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml

def serializeNode(node, encoding="utf-8"):
    """returns a string containing node as XML"""
    s = ET.tostring(node)

    # 4Suite:
    #    stream = cStringIO.StringIO()
    #    Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
    #    s = stream.getvalue()
    #    stream.close()
    return s

def getMDText(node):
    """returns the @text content from the MetaDataProvider metadata node"""
    if isinstance(node, dict):
        return node.get('@text', None)

    return node

def getParentPath(path, cnt=1):
    """returns pathname shortened by cnt"""
    # make sure path doesn't end with /
    path = path.rstrip('/')
    # split by /, shorten, and reassemble
    return '/'.join(path.split('/')[0:-cnt])

def getPnForPf(docinfo, pf, default=0):
    """returns image number for image file name or default"""
    if 'imgFileNames' in docinfo:
        pn = docinfo['imgFileNames'].get(pf, None)
        if pn is None:
            # try to cut extension
            xi = pf.rfind('.')
            if xi > 0:
                pf = pf[:xi]
                # try again, else return 0
                pn = docinfo['imgFileNames'].get(pf, default)
            else:
                # no extension
                pn = default

        return pn

    return default


##
## documentViewer class
##
class documentViewer(Folder):
    """document viewer"""
    meta_type="Document viewer"

    security=ClassSecurityInfo()
    manage_options=Folder.manage_options+(
        {'label':'Configuration','action':'changeDocumentViewerForm'},
        )

    metadataService = None
    """MetaDataFolder instance"""


    #
    # templates and forms
    #
    # viewMode templates
    viewer_text = PageTemplateFile('zpt/viewer_text', globals())
    viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
    viewer_images = PageTemplateFile('zpt/viewer_images', globals())
    viewer_index = PageTemplateFile('zpt/viewer_index', globals())
    viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
    viewer_indexonly = PageTemplateFile('zpt/viewer_indexonly', globals())
    # available layer types (annotator not default)
    builtinLayers = {'text': ['dict','search','gis'],
                     'xml': None, 'images': None, 'index': ['extended']}
    availableLayers = builtinLayers;
    # layer templates
    layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
    layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
    layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
    layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
    layer_text_pundit = PageTemplateFile('zpt/layer_text_pundit', globals())
    layer_images_annotator = PageTemplateFile('zpt/layer_images_annotator', globals())
    layer_index_extended = PageTemplateFile('zpt/layer_index_extended', globals())
    # toc templates
    toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
    toc_text = PageTemplateFile('zpt/toc_text', globals())
    toc_figures = PageTemplateFile('zpt/toc_figures', globals())
    toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
    toc_handwritten = PageTemplateFile('zpt/toc_handwritten', globals())
    toc_none = PageTemplateFile('zpt/toc_none', globals())
    # other templates
    common_template = PageTemplateFile('zpt/common_template', globals())
    info_xml = PageTemplateFile('zpt/info_xml', globals())
    docuviewer_css = ImageFile('css/docuviewer.css',globals())
    # make docuviewer_css refreshable for development
    docuviewer_css.index_html = refreshingImageFileIndexHtml
    docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
    # make docuviewer_ie_css refreshable for development
    #docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
    jquery_js = ImageFile('js/jquery.js',globals())


    def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
        """init document viewer"""
        self.id=id
        self.title=title
        self.thumbcols = thumbcols
        self.thumbrows = thumbrows
        # authgroups is list of authorized groups (delimited by ,)
        self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
        # create template folder so we can always use template.something

        templateFolder = Folder('template')
        self['template'] = templateFolder # Zope-2.12 style
        #self._setObject('template',templateFolder) # old style
        try:
            import MpdlXmlTextServer
            textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
            templateFolder['fulltextclient'] = textServer
            #templateFolder._setObject('fulltextclient',textServer)
        except Exception, e:
            logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))

        try:
            from Products.zogiLib.zogiLib import zogiLib
            zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
            templateFolder['zogilib'] = zogilib
            #templateFolder._setObject('zogilib',zogilib)
        except Exception, e:
            logging.error("Unable to create zogiLib for zogilib: "+str(e))

        try:
            # assume MetaDataFolder instance is called metadata
            self.metadataService = getattr(self, 'metadata')
        except Exception, e:
            logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))

        if digilibBaseUrl is not None:
            self.digilibBaseUrl = digilibBaseUrl
            self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
            self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'


    # proxy text server methods to fulltextclient
    def getTextPage(self, **args):
        """returns full text content of page"""
        return self.template.fulltextclient.getTextPage(**args)

    def getSearchResults(self, **args):
        """loads list of search results and stores XML in docinfo"""
        return self.template.fulltextclient.getSearchResults(**args)

    def getResultsPage(self, **args):
        """returns one page of the search results"""
        return self.template.fulltextclient.getResultsPage(**args)

    def getTextInfo(self, **args):
        """returns document info from the text server"""
        return self.template.fulltextclient.getTextInfo(**args)

    def getToc(self, **args):
        """loads table of contents and stores XML in docinfo"""
        return self.template.fulltextclient.getToc(**args)

    def getTocPage(self, **args):
        """returns one page of the table of contents"""
        return self.template.fulltextclient.getTocPage(**args)

    def getRepositoryType(self, **args):
        """get repository type"""
        return self.template.fulltextclient.getRepositoryType(**args)

    def getTextDownloadUrl(self, **args):
        """get list of gis places on one page"""
        return self.template.fulltextclient.getTextDownloadUrl(**args)

    def getPlacesOnPage(self, **args):
        """get list of gis places on one page"""
        return self.template.fulltextclient.getPlacesOnPage(**args)

    # Thumb list for CoolIris Plugin
    thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
    security.declareProtected('View','thumbs_rss')
    def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
        '''
        view it
        @param mode: defines how to access the document behind url
        @param url: url which contains display information
        @param viewMode: if images display images, if text display text, default is images (text,images or auto)

        '''

        if not hasattr(self, 'template'):
            # create template folder if it doesn't exist
            self.manage_addFolder('template')

        if not self.digilibBaseUrl:
            self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"

        docinfo = self.getDocinfo(mode=mode,url=url)
        #pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
        pageinfo = self.getPageinfo(start=start,pn=pn, docinfo=docinfo)
        ''' ZDES '''
        pt = getattr(self.template, 'thumbs_main_rss')

        if viewMode=="auto": # automodus gewaehlt
            if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
                viewMode="text"
            else:
                viewMode="images"

        return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)


    security.declareProtected('View','index_html')
    def index_html(self, url, mode="texttool", viewMode="auto", viewLayer=None, tocMode=None, start=None, pn=None, pf=None):
        """
        show page
        @param url: url which contains display information
        @param mode: defines how to access the document behind url
        @param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
        @param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
        @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
        """

        logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s pf=%s"%(mode,url,viewMode,viewLayer,start,pn,pf))

        if not hasattr(self, 'template'):
            # this won't work
            logging.error("template folder missing!")
            return "ERROR: template folder missing!"

        if not getattr(self, 'digilibBaseUrl', None):
            self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"

        # mode=filepath should not have toc-thumbs
        if tocMode is None:
            if mode == "filepath":
                tocMode = "none"
            else:
                tocMode = "thumbs"

        # docinfo: information about document (cached)
        docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)

        # userinfo: user settings (cached)
        userinfo = self.getUserinfo()

        # auto viewMode: text if there is a text else images
        if viewMode=="auto":
            if docinfo.get('textURLPath', None):
                # docinfo.get('textURL', None) not implemented yet
                viewMode = "text"
                if viewLayer is None and 'viewLayer' not in userinfo:
                    # use layer dict as default
                    viewLayer = "dict"
            else:
                viewMode = "images"

        elif viewMode == "text_dict":
            # legacy fix
            viewMode = "text"
            viewLayer = "dict"

        # safe viewLayer in userinfo
        userinfo['viewLayer'] = viewLayer

        # pageinfo: information about page (not cached)
        pageinfo = self.getPageinfo(start=start, pn=pn, pf=pf, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)

        # get template /template/viewer_$viewMode
        pt = getattr(self.template, 'viewer_%s'%viewMode, None)
        if pt is None:
            logging.error("No template for viewMode=%s!"%viewMode)
            # TODO: error page?
            return "No template for viewMode=%s!"%viewMode

        # and execute with parameters
        return pt(docinfo=docinfo, pageinfo=pageinfo)

    def getAvailableLayers(self):
        """returns dict with list of available layers per viewMode"""
        return self.availableLayers

    def findDigilibUrl(self):
        """try to get the digilib URL from zogilib"""
        url = self.template.zogilib.getDLBaseUrl()
        return url

    def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
        """returns URL to digilib Scaler with params"""
        url = None
        if docinfo is not None:
            url = docinfo.get('imageURL', None)

        if url is None:
            url = self.digilibScalerUrl
            if fn is None and docinfo is not None:
                fn = docinfo.get('imagePath','')

            url += "fn=%s"%fn

        if pn:
            url += "&pn=%s"%pn

        url += "&dw=%s&dh=%s"%(dw,dh)
        return url

    def getDocumentViewerURL(self):
        """returns the URL of this instance"""
        return self.absolute_url()

    def getStyle(self, idx, selected, style=""):
        """returns a string with the given style and append 'sel' if idx == selected."""
        #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
        if idx == selected:
            return style + 'sel'
        else:
            return style

    def getParams(self, param=None, val=None, params=None, duplicates=None):
        """returns dict with URL parameters.

        Takes URL parameters and additionally param=val or dict params.
        Deletes key if value is None."""
        # copy existing request params
        newParams=self.REQUEST.form.copy()
        # change single param
        if param is not None:
            if val is None:
                if newParams.has_key(param):
                    del newParams[param]
            else:
                newParams[param] = str(val)

        # change more params
        if params is not None:
            for (k, v) in params.items():
                if v is None:
                    # val=None removes param
                    if newParams.has_key(k):
                        del newParams[k]

                else:
                    newParams[k] = v

        if duplicates:
            # eliminate lists (coming from duplicate keys)
            for (k,v) in newParams.items():
                if isinstance(v, list):
                    if duplicates == 'comma':
                        # make comma-separated list of non-empty entries
                        newParams[k] = ','.join([t for t in v if t])
                    elif duplicates == 'first':
                        # take first non-empty entry
                        newParams[k] = [t for t in v if t][0]

        return newParams

    def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
        """returns URL to documentviewer with parameter param set to val or from dict params"""
        urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
        # quote values and assemble into query string (not escaping '/')
        ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
        if baseUrl is None:
            baseUrl = self.getDocumentViewerURL()

        url = "%s?%s"%(baseUrl, ps)
        return url

    def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
        """link to documentviewer with parameter param set to val"""
        return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&amp;', duplicates=duplicates)


    def setAvailableLayers(self, newLayerString=None):
        """sets availableLayers to newLayerString or tries to autodetect available layers.
        assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
        newLayerString is parsed as JSON."""
        if newLayerString is not None:
            try:
                layers = json.loads(newLayerString)
                if 'text' in layers and 'images' in layers:
                    self.availableLayers = layers
                    return
            except:
                pass

            logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))

        # start with builtin layers
        self.availableLayers = self.builtinLayers.copy()
        # add layers from templates
        for t in self.template:
            if t.startswith('layer_'):
                try:
                    (x, m, l) = t.split('_', 3)
                    if m not in self.availableLayers:
                        # mode m doesn't exist -> new list
                        self.availableLayers[m] = [l]

                    else:
                        # m exists -> append
                        if l not in self.availableLayers[m]:
                            self.availableLayers[m].append()

                except:
                    pass

    def getAvailableLayersJson(self):
        """returns available layers as JSON string."""
        return json.dumps(self.availableLayers)


    def getInfo_xml(self,url,mode):
        """returns info about the document as XML"""
        if not self.digilibBaseUrl:
            self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"

        docinfo = self.getDocinfo(mode=mode,url=url)
        pt = getattr(self.template, 'info_xml')
        return pt(docinfo=docinfo)

    def getAuthenticatedUser(self, anon=None):
        """returns the authenticated user object or None. (ignores Zopes anonymous user)"""
        user = getSecurityManager().getUser()
        if user is not None and user.getUserName() != "Anonymous User":
            return user
        else:
            return anon

    def isAccessible(self, docinfo):
        """returns if access to the resource is granted"""
        access = docinfo.get('accessType', None)
        logging.debug("documentViewer (accessOK) access type %s"%access)
        if access == 'free':
            logging.debug("documentViewer (accessOK) access is free")
            return True

        elif access is None or access in self.authgroups:
            # only local access -- only logged in users
            user = self.getAuthenticatedUser()
            logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
            return (user is not None)

        logging.error("documentViewer (accessOK) unknown access type %s"%access)
        return False

    def getUserinfo(self):
        """returns userinfo object"""
        logging.debug("getUserinfo")
        userinfo = {}
        # look for cached userinfo in session
        if self.REQUEST.SESSION.has_key('userinfo'):
            userinfo = self.REQUEST.SESSION['userinfo']
            # check if its still current?
        else:
            # store in session
            self.REQUEST.SESSION['userinfo'] = userinfo

        return userinfo

    def getDocinfo(self, mode, url, tocMode=None):
        """returns docinfo depending on mode"""
        logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
        # look for cached docinfo in session
        if self.REQUEST.SESSION.has_key('docinfo'):
            docinfo = self.REQUEST.SESSION['docinfo']
            # check if its still current
            if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
                logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
                return docinfo

        # new docinfo
        docinfo = {'mode': mode, 'url': url}
        # add self url
        docinfo['viewerUrl'] = self.getDocumentViewerURL()
        docinfo['digilibBaseUrl'] = self.digilibBaseUrl
        docinfo['digilibScalerUrl'] = self.digilibScalerUrl
        docinfo['digilibViewerUrl'] = self.digilibViewerUrl
        # get index.meta DOM
        docUrl = None
        metaDom = None
        if mode=="texttool":
            # url points to document dir or index.meta
            metaDom = self.metadataService.getDomFromPathOrUrl(url)
            if metaDom is None:
                raise IOError("Unable to find index.meta for mode=texttool!")

            docUrl = url.replace('/index.meta', '')
            if url.startswith('/mpiwg/online/'):
                docUrl = url.replace('/mpiwg/online/', '', 1)

        elif mode=="imagepath":
            # url points to folder with images, index.meta optional
            # asssume index.meta in parent dir
            docUrl = getParentPath(url)
            metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
            docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)

        elif mode=="filepath":
            # url points to image file, index.meta optional
            docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url)
            docinfo['numPages'] = 1
            # asssume index.meta is two path segments up
            docUrl = getParentPath(url, 2)
            metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)

        else:
            logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
            raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))

        docinfo['documentUrl'] = docUrl
        # process index.meta contents
        if metaDom is not None and metaDom.tag == 'resource':
            # document directory name and path
            resource = self.metadataService.getResourceData(dom=metaDom)
            if resource:
                docinfo = self.getDocinfoFromResource(docinfo, resource)

            # texttool info
            texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
            if texttool:
                docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
                # document info from full text server
                if docinfo.get('textURLPath', None):
                    docinfo = self.getTextInfo(mode=None, docinfo=docinfo)
                    # include list of pages TODO: do we need this always?
                    docinfo = self.getTextInfo(mode='pages', docinfo=docinfo)

            # bib info
            bib = self.metadataService.getBibData(dom=metaDom)
            if bib:
                # save extended version as 'bibx' TODO: ugly
                bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
                if len(bibx) == 1:
                    # unwrap list if possible
                    bibx = bibx[0]

                docinfo['bibx'] = bibx
                docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
            else:
                # no bib - try info.xml
                docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)

            # auth info
            access = self.metadataService.getAccessData(dom=metaDom)
            if access:
                docinfo = self.getDocinfoFromAccess(docinfo, access)

            # attribution info
            attribution = self.metadataService.getAttributionData(dom=metaDom)
            if attribution:
                logging.debug("getDocinfo: attribution=%s"%repr(attribution))
                docinfo['attribution'] = attribution

            # copyright info
            copyright = self.metadataService.getCopyrightData(dom=metaDom)
            if copyright:
                logging.debug("getDocinfo: copyright=%s"%repr(copyright))
                docinfo['copyright'] = copyright

            # DRI (permanent ID)
            dri = self.metadataService.getDRI(dom=metaDom, type='mpiwg')
            if dri:
                docinfo['DRI'] = dri

            # (presentation) context
            ctx = self.metadataService.getContextData(dom=metaDom, all=True)
            if ctx:
                logging.debug("getcontext: ctx=%s"%repr(ctx))
                docinfo['presentationContext'] = ctx

        # image path
        if mode != 'texttool':
            # override image path from texttool with url parameter TODO: how about mode=auto?
            docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)

        # check numPages
        if docinfo.get('numPages', 0) == 0:
            # number of images from digilib
            if docinfo.get('imagePath', None):
                imgpath = docinfo['imagePath'].replace('/mpiwg/online/', '', 1)
                logging.debug("imgpath=%s"%imgpath)
                docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, imgpath)
                docinfo = self.getDocinfoFromDigilib(docinfo, imgpath)
            else:
                # imagePath still missing? try "./pageimg"
                imgPath = os.path.join(docUrl, 'pageimg')
                docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
                if docinfo.get('numPages', 0) > 0:
                    # there are pages
                    docinfo['imagePath'] = imgPath
                    docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, docinfo['imagePath'])

        # check numPages
        if docinfo.get('numPages', 0) == 0:
            if docinfo.get('numTextPages', 0) > 0:
                # replace with numTextPages (text-only?)
                docinfo['numPages'] = docinfo['numTextPages']

        logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
        #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
        # store in session
        self.REQUEST.SESSION['docinfo'] = docinfo
        return docinfo


    def getDocinfoFromResource(self, docinfo, resource):
        """reads contents of resource element into docinfo"""
        docName = resource.get('name', None)
        docinfo['documentName'] = docName
        docPath = resource.get('archive-path', None)
        if docPath:
            # clean up document path
            if docPath[0] != '/':
                docPath = '/' + docPath

            if docName and (not docPath.endswith(docName)):
                docPath += "/" + docName

        else:
            # use docUrl as docPath
            docUrl = docinfo['documentURL']
            if not docUrl.startswith('http:'):
                docPath = docUrl
        if docPath:
            # fix URLs starting with /mpiwg/online
            docPath = docPath.replace('/mpiwg/online', '', 1)

        docinfo['documentPath'] = docPath
        return docinfo

    def getDocinfoFromTexttool(self, docinfo, texttool):
        """reads contents of texttool element into docinfo"""
        logging.debug("texttool=%s"%repr(texttool))
        # unpack list if necessary
        if isinstance(texttool, list):
            texttool = texttool[0]

        # image dir
        imageDir = getMDText(texttool.get('image', None))
        docPath = getMDText(docinfo.get('documentPath', None))
        if imageDir and docPath:
            imageDir = os.path.join(docPath, imageDir)
            imageDir = imageDir.replace('/mpiwg/online', '', 1)
            docinfo['imagePath'] = imageDir

        # old style text URL
        textUrl = getMDText(texttool.get('text', None))
        if textUrl and docPath:
            if urlparse.urlparse(textUrl)[0] == "": #keine url
                textUrl = os.path.join(docPath, textUrl)

            docinfo['textURL'] = textUrl

        # new style text-url-path (can be more than one with "repository" attribute)
        textUrlNode = texttool.get('text-url-path', None)
        if not isinstance(textUrlNode, list):
            textUrlNode = [textUrlNode]

        for tun in textUrlNode:
            textUrl = getMDText(tun)
            if textUrl:
                textUrlAtts = tun.get('@attr')
                if (textUrlAtts and 'repository' in textUrlAtts):
                    textRepo = textUrlAtts['repository']
                    # use matching repository
                    if self.getRepositoryType() == textRepo:
                        docinfo['textURLPath'] = textUrl
                        docinfo['textURLRepository'] = textRepo
                        break

                else:
                    # no repo attribute - use always
                    docinfo['textURLPath'] = textUrl

        # page flow
        docinfo['pageFlow'] = getMDText(texttool.get('page-flow', 'ltr'))

        # odd pages are left
        docinfo['oddPage'] = getMDText(texttool.get('odd-scan-position', 'left'))

        # number of title page (default 1)
        docinfo['titlePage'] = getMDText(texttool.get('title-scan-no', 1))

        # old presentation stuff
        presentation = getMDText(texttool.get('presentation', None))
        if presentation and docPath:
            if presentation.startswith('http:'):
                docinfo['presentationUrl'] = presentation
            else:
                docinfo['presentationUrl'] = os.path.join(docPath, presentation)

        return docinfo

    def getDocinfoFromBib(self, docinfo, bib, bibx=None):
        """reads contents of bib element into docinfo"""
        logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
        # put all raw bib fields in dict "bib"
        docinfo['bib'] = bib
        bibtype = bib.get('@type', None)
        docinfo['bibType'] = bibtype
        # also store DC metadata for convenience
        dc = self.metadataService.getDCMappedData(bib)
        docinfo['creator'] = dc.get('creator','')
        docinfo['title'] = dc.get('title','')
        docinfo['date'] = dc.get('date','')
        return docinfo

    def getDocinfoFromAccess(self, docinfo, acc):
        """reads contents of access element into docinfo"""
        #TODO: also read resource type
        logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
        try:
            acctype = acc['@attr']['type']
            if acctype:
                access=acctype
                if access in ['group', 'institution']:
                    access = acc['name'].lower()

                docinfo['accessType'] = access

        except:
            pass

        return docinfo

    def getDocinfoFromDigilib(self, docinfo, path):
        infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?fn="+path
        # fetch data
        txt = getHttpData(infoUrl)
        if not txt:
            logging.error("Unable to get dir-info from %s"%(infoUrl))
            return docinfo

        dom = ET.fromstring(txt)
        dir = dom
        # save size
        size = dir.findtext('size')
        logging.debug("getDocinfoFromDigilib: size=%s"%size)
        if size:
            docinfo['numPages'] = int(size)
        else:
            docinfo['numPages'] = 0
            return docinfo

        # save list of image names and numbers
        imgNames = {}
        for f in dir:
            fn = f.findtext('name')
            pn = f.findtext('index')
            imgNames[fn] = getInt(pn)

        docinfo['imgFileNames'] = imgNames
        return docinfo


    def getDocinfoFromPresentationInfoXml(self,docinfo):
        """gets DC-like bibliographical information from the presentation entry in texttools"""
        url = docinfo.get('presentationUrl', None)
        if not url:
            logging.error("getDocinfoFromPresentation: no URL!")
            return docinfo

        dom = None
        metaUrl = None
        if url.startswith("http://"):
            # real URL
            metaUrl = url
        else:
            # online path
            server=self.digilibBaseUrl+"/servlet/Texter?fn="
            metaUrl=server+url

        txt=getHttpData(metaUrl)
        if txt is None:
            logging.error("Unable to read info.xml from %s"%(url))
            return docinfo

        dom = ET.fromstring(txt)
        docinfo['creator']=getText(dom.find(".//author"))
        docinfo['title']=getText(dom.find(".//title"))
        docinfo['date']=getText(dom.find(".//date"))
        return docinfo


    def getPageinfo(self, pn=None, pf=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
        """returns pageinfo with the given parameters"""
        logging.debug("getPageInfo(pn=%s, pf=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(pn,pf,start,rows,cols,viewMode,viewLayer,tocMode))
        pageinfo = {}
        pageinfo['viewMode'] = viewMode
        # split viewLayer if necessary
        if isinstance(viewLayer,basestring):
            viewLayer = viewLayer.split(',')

        if isinstance(viewLayer, list):
            logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
            # save (unique) list in viewLayers
            seen = set()
            viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
            pageinfo['viewLayers'] = viewLayers
            # stringify viewLayer
            viewLayer = ','.join(viewLayers)
        else:
            #create list
            pageinfo['viewLayers'] = [viewLayer]

        pageinfo['viewLayer'] = viewLayer
        pageinfo['tocMode'] = tocMode

        # pf takes precedence over pn
        if pf:
            pageinfo['pf'] = pf
            pn = getPnForPf(docinfo, pf)
            # replace pf in request params (used for creating new URLs)
            self.REQUEST.form.pop('pf', None)
            self.REQUEST.form['pn'] = pn
        else:
            pn = getInt(pn, 1)

        pageinfo['pn'] = pn
        rows = int(rows or self.thumbrows)
        pageinfo['rows'] = rows
        cols = int(cols or self.thumbcols)
        pageinfo['cols'] = cols
        grpsize = cols * rows
        pageinfo['groupsize'] = grpsize
        # is start is empty use one around pn
        start = getInt(start, default=(math.ceil(float(pn)/float(grpsize))*grpsize-(grpsize-1)))
        # int(current / grpsize) * grpsize +1))
        pageinfo['start'] = start
        # get number of pages
        np = int(docinfo.get('numPages', 0))
        if np == 0:
            # try numTextPages
            np = docinfo.get('numTextPages', 0)
            if np != 0:
                docinfo['numPages'] = np

        # cache table of contents
        pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
        pageinfo['numgroups'] = int(np / grpsize)
        if np % grpsize > 0:
            pageinfo['numgroups'] += 1

        pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
        oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
        # add zeroth page for two columns
        pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
        pageinfo['pageZero'] = pageZero
        pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
        # more page parameters
        pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
        if docinfo.get('pageNumbers'):
            # get original page numbers
            pageNumber = docinfo['pageNumbers'].get(pn, None)
            if pageNumber is not None:
                pageinfo['pageNumberOrig'] = pageNumber['no']
                pageinfo['pageNumberOrigNorm'] = pageNumber['non']

        # cache search results
        pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
        query = self.REQUEST.get('query',None)
        pageinfo['query'] = query
        if query:
            queryType = self.REQUEST.get('queryType', 'fulltextMorph')
            pageinfo['queryType'] = queryType
            pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
            self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)

            # highlighting
            highlightQuery = self.REQUEST.get('highlightQuery', None)
            if highlightQuery:
                pageinfo['highlightQuery'] = highlightQuery
                pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
                pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')

        return pageinfo


    def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
        """returns dict with array of page informations for one screenfull of thumbnails"""
        batch = {}
        grpsize = rows * cols
        if maxIdx == 0:
            maxIdx = start + grpsize

        nb = int(math.ceil(maxIdx / float(grpsize)))
        # list of all batch start and end points
        batches = []
        if pageZero:
            ofs = 0
        else:
            ofs = 1

        for i in range(nb):
            s = i * grpsize + ofs
            e = min((i + 1) * grpsize + ofs - 1, maxIdx)
            batches.append({'start':s, 'end':e})

        batch['batches'] = batches

        pages = []
        if pageZero and start == 1:
            # correct beginning
            idx = 0
        else:
            idx = start

        for r in range(rows):
            row = []
            for c in range(cols):
                if idx < minIdx or idx > maxIdx:
                    page = {'idx':None}
                else:
                    page = {'idx':idx}

                idx += 1
                if pageFlowLtr:
                    row.append(page)
                else:
                    row.insert(0, page)

            pages.append(row)

        if start > 1:
            batch['prevStart'] = max(start - grpsize, 1)
        else:
            batch['prevStart'] = None

        if start + grpsize <= maxIdx:
            batch['nextStart'] = start + grpsize
        else:
            batch['nextStart'] = None

        batch['pages'] = pages
        batch['first'] = minIdx
        batch['last'] = maxIdx
        return batch

    def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
        """returns dict with information for one screenfull of data."""
        batch = {}
        if end == 0:
            end = start + size

        nb = int(math.ceil(end / float(size)))
        # list of all batch start and end points
        batches = []
        for i in range(nb):
            s = i * size + 1
            e = min((i + 1) * size, end)
            batches.append({'start':s, 'end':e})

        batch['batches'] = batches
        # list of elements in this batch
        this = []
        j = 0
        for i in range(start, min(start+size, end+1)):
            if data:
                if fullData:
                    d = data.get(i, None)
                else:
                    d = data.get(j, None)
                    j += 1

            else:
                d = i+1

            this.append(d)

        batch['this'] = this
        if start > 1:
            batch['prevStart'] = max(start - size, 1)
        else:
            batch['prevStart'] = None

        if start + size < end:
            batch['nextStart'] = start + size
        else:
            batch['nextStart'] = None

        batch['first'] = start
        batch['last'] = end
        return batch


    def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
        """returns list of groups {name:*, id:*} on the annotation server for the user"""
        groups = []
        groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
        data = getHttpData(url=groupsUrl, noExceptions=True)
        if data:
            res = json.loads(data)
            rows = res.get('rows', None)
            if rows is None:
                return groups
            for r in rows:
                groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})

        return groups


    security.declareProtected('View management screens','changeDocumentViewerForm')
    changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())

    def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
        """init document viewer"""
        self.title=title
        self.digilibBaseUrl = digilibBaseUrl
        self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
        self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
        self.thumbrows = thumbrows
        self.thumbcols = thumbcols
        self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
        try:
            # assume MetaDataFolder instance is called metadata
            self.metadataService = getattr(self, 'metadata')
        except Exception, e:
            logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))

        self.setAvailableLayers(availableLayers)

        if RESPONSE is not None:
            RESPONSE.redirect('manage_main')

def manage_AddDocumentViewerForm(self):
    """add the viewer form"""
    pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
    return pt()

def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
    """add the viewer"""
    newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
    self._setObject(id,newObj)

    if RESPONSE is not None:
        RESPONSE.redirect('manage_main')
author	casties
date	Thu, 15 Nov 2012 17:58:14 +0100
parents	6000c7e24d8a
children	ed4485d2748e