Changeset 513:67095296c95a in documentViewer for documentViewer.py


Ignore:
Timestamp:
Feb 28, 2012, 6:10:08 PM (12 years ago)
Author:
casties
Branch:
default
Parents:
497:73fb73577961 (diff), 512:92a6443a6f16 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge from elementtree branch
92a6443a6f16ff25674d43814ec0d6c0a43a5e1a

File:
1 edited

Legend:

Unmodified
Added
Removed
  • documentViewer.py

    r452 r513  
    1 
    21from OFS.Folder import Folder
    32from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
    4 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
     3from Products.PageTemplates.PageTemplateFile import PageTemplateFile
     4from App.ImageFile import ImageFile
    55from AccessControl import ClassSecurityInfo
    66from AccessControl import getSecurityManager
    77from Globals import package_home
    8 from Products.zogiLib.zogiLib import browserCheck
    9 
    10 from Ft.Xml import EMPTY_NAMESPACE, Parse
    11 import Ft.Xml.Domlette
    12 import os.path
     8
     9import xml.etree.ElementTree as ET
     10
     11import os
    1312import sys
    1413import urllib
    15 import urllib2
    1614import logging
    1715import math
    1816import urlparse
    19 import cStringIO
    2017import re
    2118import string
    2219
    23 def logger(txt,method,txt2):
    24     """logging"""
    25     logging.info(txt+ txt2)
    26    
    27    
    28 def getInt(number, default=0):
    29     """returns always an int (0 in case of problems)"""
    30     try:
    31         return int(number)
    32     except:
    33         return int(default)
    34 
    35 def getTextFromNode(nodename):
    36     """get the cdata content of a node"""
    37     if nodename is None:
    38         return ""
    39     nodelist=nodename.childNodes
    40     rc = ""
    41     for node in nodelist:
    42         if node.nodeType == node.TEXT_NODE:
    43            rc = rc + node.data
    44     return rc
    45 
     20from SrvTxtUtils import getInt, getText, getHttpData, refreshingImageFileIndexHtml
     21   
    4622def serializeNode(node, encoding="utf-8"):
    4723    """returns a string containing node as XML"""
    48     stream = cStringIO.StringIO()
    49     #logging.debug("BUF: %s"%(stream))
    50     Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
    51     s = stream.getvalue()
    52     #logging.debug("BUF: %s"%(s))
    53     stream.close()
     24    s = ET.tostring(node)
     25   
     26    # 4Suite:
     27    #    stream = cStringIO.StringIO()
     28    #    Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
     29    #    s = stream.getvalue()
     30    #    stream.close()
    5431    return s
    5532
     
    11592    return bt
    11693
    117        
    118 def getParentDir(path):
    119     """returns pathname shortened by one"""
    120     return '/'.join(path.split('/')[0:-1])
    121        
    122 
    123 def getHttpData(url, data=None, num_tries=3, timeout=10):
    124     """returns result from url+data HTTP request"""
    125     # we do GET (by appending data to url)
    126     if isinstance(data, str) or isinstance(data, unicode):
    127         # if data is string then append
    128         url = "%s?%s"%(url,data)
    129     elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
    130         # urlencode
    131         url = "%s?%s"%(url,urllib.urlencode(data))
    132    
    133     response = None
    134     errmsg = None
    135     for cnt in range(num_tries):
    136         try:
    137             logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
    138             if sys.version_info < (2, 6):
    139                 # set timeout on socket -- ugly :-(
    140                 import socket
    141                 socket.setdefaulttimeout(float(timeout))
    142                 response = urllib2.urlopen(url)
    143             else:
    144                 response = urllib2.urlopen(url,timeout=float(timeout))
    145             # check result?
    146             break
    147         except urllib2.HTTPError, e:
    148             logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
    149             errmsg = str(e)
    150             # stop trying
    151             break
    152         except urllib2.URLError, e:
    153             logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
    154             errmsg = str(e)
    155             # stop trying
    156             #break
    157 
    158     if response is not None:
    159         data = response.read()
    160         response.close()
    161         return data
    162    
    163     raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
    164     #return None
     94def getParentPath(path, cnt=1):
     95    """returns pathname shortened by cnt"""
     96    # make sure path doesn't end with /
     97    path = path.rstrip('/')
     98    # split by /, shorten, and reassemble
     99    return '/'.join(path.split('/')[0:-cnt])
    165100
    166101##
     
    173108    security=ClassSecurityInfo()
    174109    manage_options=Folder.manage_options+(
    175         {'label':'main config','action':'changeDocumentViewerForm'},
     110        {'label':'Configuration','action':'changeDocumentViewerForm'},
    176111        )
     112   
     113    metadataService = None
     114    """MetaDataFolder instance"""
    177115
    178116    # templates and forms
    179     viewer_main = PageTemplateFile('zpt/viewer_main', globals())
     117    viewer_text = PageTemplateFile('zpt/viewer_text', globals())
     118    viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
     119    viewer_images = PageTemplateFile('zpt/viewer_images', globals())
     120    viewer_index = PageTemplateFile('zpt/viewer_index', globals())
    180121    toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
    181122    toc_text = PageTemplateFile('zpt/toc_text', globals())
    182123    toc_figures = PageTemplateFile('zpt/toc_figures', globals())
    183     page_main_images = PageTemplateFile('zpt/page_main_images', globals())
    184     page_main_double = PageTemplateFile('zpt/page_main_double', globals())
    185     page_main_text = PageTemplateFile('zpt/page_main_text', globals())
    186     page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
    187     page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
    188     page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
    189     page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
    190     head_main = PageTemplateFile('zpt/head_main', globals())
    191     docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
     124    toc_none = PageTemplateFile('zpt/toc_none', globals())
     125    common_template = PageTemplateFile('zpt/common_template', globals())
     126    search_template = PageTemplateFile('zpt/search_template', globals())
    192127    info_xml = PageTemplateFile('zpt/info_xml', globals())
    193    
    194    
    195     thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
    196     security.declareProtected('View management screens','changeDocumentViewerForm')   
    197     changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
    198 
     128    docuviewer_css = ImageFile('css/docuviewer.css',globals())
     129    # make ImageFile better for development
     130    docuviewer_css.index_html = refreshingImageFileIndexHtml
     131    jquery_js = ImageFile('js/jquery.js',globals())
     132   
    199133   
    200134    def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
     
    209143       
    210144        templateFolder = Folder('template')
    211         #self['template'] = templateFolder # Zope-2.12 style
    212         self._setObject('template',templateFolder) # old style
     145        self['template'] = templateFolder # Zope-2.12 style
     146        #self._setObject('template',templateFolder) # old style
    213147        try:
    214148            import MpdlXmlTextServer
    215149            textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
    216             #templateFolder['fulltextclient'] = xmlRpcClient
    217             templateFolder._setObject('fulltextclient',textServer)
     150            templateFolder['fulltextclient'] = textServer
     151            #templateFolder._setObject('fulltextclient',textServer)
    218152        except Exception, e:
    219153            logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
     154           
    220155        try:
    221156            from Products.zogiLib.zogiLib import zogiLib
    222157            zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
    223             #templateFolder['zogilib'] = zogilib
    224             templateFolder._setObject('zogilib',zogilib)
     158            templateFolder['zogilib'] = zogilib
     159            #templateFolder._setObject('zogilib',zogilib)
    225160        except Exception, e:
    226161            logging.error("Unable to create zogiLib for zogilib: "+str(e))
    227        
     162           
     163        try:
     164            # assume MetaDataFolder instance is called metadata
     165            self.metadataService = getattr(self, 'metadata')
     166        except Exception, e:
     167            logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
     168           
     169        if digilibBaseUrl is not None:
     170            self.digilibBaseUrl = digilibBaseUrl
     171           
    228172       
    229173    # proxy text server methods to fulltextclient
    230174    def getTextPage(self, **args):
    231         """get page"""
     175        """returns full text content of page"""
    232176        return self.template.fulltextclient.getTextPage(**args)
    233177
    234     def getOrigPages(self, **args):
    235         """get page"""
    236         return self.template.fulltextclient.getOrigPages(**args)
    237    
    238     def getOrigPagesNorm(self, **args):
    239         """get page"""
    240         return self.template.fulltextclient.getOrigPagesNorm(**args)
    241 
    242     def getQuery(self, **args):
    243         """get query in search"""
    244         return self.template.fulltextclient.getQuery(**args)
    245      
    246     def getSearch(self, **args):
    247         """get search"""
    248         return self.template.fulltextclient.getSearch(**args)
    249    
    250     def getGisPlaces(self, **args):
    251         """get gis places"""
    252         return self.template.fulltextclient.getGisPlaces(**args)
     178    def getSearchResults(self, **args):
     179        """loads list of search results and stores XML in docinfo"""
     180        return self.template.fulltextclient.getSearchResults(**args)
     181
     182    def getResultsPage(self, **args):
     183        """returns one page of the search results"""
     184        return self.template.fulltextclient.getResultsPage(**args)
     185
     186    def getToc(self, **args):
     187        """loads table of contents and stores XML in docinfo"""
     188        return self.template.fulltextclient.getToc(**args)
     189
     190    def getTocPage(self, **args):
     191        """returns one page of the table of contents"""
     192        return self.template.fulltextclient.getTocPage(**args)
     193
     194    def getPlacesOnPage(self, **args):
     195        """get list of gis places on one page"""
     196        return self.template.fulltextclient.getPlacesOnPage(**args)
    253197 
    254     def getAllGisPlaces(self, **args):
    255         """get all gis places """
    256         return self.template.fulltextclient.getAllGisPlaces(**args)
    257        
    258     def getTranslate(self, **args):
    259         """get translate"""
    260         return self.template.fulltextclient.getTranslate(**args)
    261 
    262     def getLemma(self, **args):
    263         """get lemma"""
    264         return self.template.fulltextclient.getLemma(**args)
    265 
    266     def getLemmaQuery(self, **args):
    267         """get query"""
    268         return self.template.fulltextclient.getLemmaQuery(**args)
    269 
    270     def getLex(self, **args):
    271         """get lex"""
    272         return self.template.fulltextclient.getLex(**args)
    273 
    274     def getToc(self, **args):
    275         """get toc"""
    276         return self.template.fulltextclient.getToc(**args)
    277 
    278     def getTocPage(self, **args):
    279         """get tocpage"""
    280         return self.template.fulltextclient.getTocPage(**args)
    281 
    282    
     198    #WTF?
     199    thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
    283200    security.declareProtected('View','thumbs_rss')
    284201    def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
     
    291208        '''
    292209        logging.debug("HHHHHHHHHHHHHH:load the rss")
    293         logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
     210        logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
    294211       
    295212        if not hasattr(self, 'template'):
     
    313230               
    314231        return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
     232
    315233 
    316234    security.declareProtected('View','index_html')
    317     def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
    318         '''
    319         view it
     235    def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode="thumbs",start=1,pn=1):
     236        """
     237        view page
     238        @param url: url which contains display information
    320239        @param mode: defines how to access the document behind url
    321         @param url: url which contains display information
    322         @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
     240        @param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
     241        @param viewLayer: sub-type of viewMode, e.g. 'dict' for viewMode='text'
    323242        @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
    324         @param characterNormalization type of text display (reg, norm, none)
    325         @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
    326         '''
    327        
    328         logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
     243        """
     244       
     245        logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
    329246       
    330247        if not hasattr(self, 'template'):
     
    340257        if tocMode != "thumbs":
    341258            # get table of contents
    342             docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
    343            
    344         if viewMode=="auto": # automodus gewaehlt
    345             if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
    346                 viewMode="text_dict"
     259            self.getToc(mode=tocMode, docinfo=docinfo)
     260
     261        # auto viewMode: text if there is a text else images
     262        if viewMode=="auto":
     263            if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
     264                viewMode = "text"
     265                if viewLayer is None:
     266                    viewLayer = "dict"
    347267            else:
    348                 viewMode="images"
     268                viewMode = "images"
    349269               
    350         pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
    351        
    352         if (docinfo.get('textURLPath',None)):
    353             page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo)
    354             pageinfo['textPage'] = page
    355         tt = getattr(self, 'template')   
    356         pt = getattr(tt, 'viewer_main')               
    357         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
     270        elif viewMode == "text_dict":
     271            # legacy fix
     272            viewMode = "text"
     273            viewLayer = "dict"
     274           
     275        pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
     276                   
     277        # get template /template/viewer_$viewMode
     278        pt = getattr(self.template, 'viewer_%s'%viewMode, None)
     279        if pt is None:
     280            logging.error("No template for viewMode=%s!"%viewMode)
     281            # TODO: error page?
     282            return "No template for viewMode=%s!"%viewMode
     283       
     284        # and execute with parameters
     285        return pt(docinfo=docinfo, pageinfo=pageinfo)
    358286 
     287    #WTF?
    359288    def generateMarks(self,mk):
    360289        ret=""
     
    378307        url = self.template.zogilib.getDLBaseUrl()
    379308        return url
     309   
     310    def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
     311        """returns URL to digilib Scaler with params"""
     312        url = None
     313        if docinfo is not None:
     314            url = docinfo.get('imageURL', None)
     315           
     316        if url is None:
     317            url = "%s/servlet/Scaler?"%self.digilibBaseUrl
     318            if fn is None and docinfo is not None:
     319                fn = docinfo.get('imagePath','')
     320           
     321            url += "fn=%s"%fn
     322           
     323        if pn:
     324            url += "&pn=%s"%pn
     325           
     326        url += "&dw=%s&dh=%s"%(dw,dh)
     327        return url
    380328
    381329    def getDocumentViewerURL(self):
     
    384332   
    385333    def getStyle(self, idx, selected, style=""):
    386         """returns a string with the given style and append 'sel' if path == selected."""
     334        """returns a string with the given style and append 'sel' if idx == selected."""
    387335        #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
    388336        if idx == selected:
     
    391339            return style
    392340   
    393     def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
    394         """returns URL to documentviewer with parameter param set to val or from dict params"""
     341    def getParams(self, param=None, val=None, params=None, duplicates=None):
     342        """returns dict with URL parameters.
     343       
     344        Takes URL parameters and additionally param=val or dict params.
     345        Deletes key if value is None."""
    395346        # copy existing request params
    396         urlParams=self.REQUEST.form.copy()
     347        newParams=self.REQUEST.form.copy()
    397348        # change single param
    398349        if param is not None:
    399350            if val is None:
    400                 if urlParams.has_key(param):
    401                     del urlParams[param]
     351                if newParams.has_key(param):
     352                    del newParams[param]
    402353            else:
    403                 urlParams[param] = str(val)
     354                newParams[param] = str(val)
    404355               
    405356        # change more params
    406357        if params is not None:
    407             for k in params.keys():
    408                 v = params[k]
     358            for (k, v) in params.items():
    409359                if v is None:
    410360                    # val=None removes param
    411                     if urlParams.has_key(k):
    412                         del urlParams[k]
     361                    if newParams.has_key(k):
     362                        del newParams[k]
    413363                       
    414364                else:
    415                     urlParams[k] = v
    416 
    417         # FIXME: does this belong here?
    418         if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
    419                 urlParams["mode"] = "imagepath"
    420                 urlParams["url"] = getParentDir(urlParams["url"])
    421                
     365                    newParams[k] = v
     366
     367        if duplicates:
     368            # eliminate lists (coming from duplicate keys)
     369            for (k,v) in newParams.items():
     370                if isinstance(v, list):
     371                    if duplicates == 'comma':
     372                        # make comma-separated list of non-empty entries
     373                        newParams[k] = ','.join([t for t in v if t])
     374                    elif duplicates == 'first':
     375                        # take first non-empty entry
     376                        newParams[k] = [t for t in v if t][0]
     377       
     378        return newParams
     379   
     380    def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
     381        """returns URL to documentviewer with parameter param set to val or from dict params"""
     382        urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
    422383        # quote values and assemble into query string (not escaping '/')
    423         ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
    424         #ps = urllib.urlencode(urlParams)
     384        ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()])
    425385        if baseUrl is None:
    426             baseUrl = self.REQUEST['URL1']
     386            baseUrl = self.getDocumentViewerURL()
    427387           
    428388        url = "%s?%s"%(baseUrl, ps)
    429389        return url
    430390
    431 
    432     def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
     391    def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
    433392        """link to documentviewer with parameter param set to val"""
    434         return self.getLink(param, val, params, baseUrl, '&amp;')
     393        return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&amp;', duplicates=duplicates)
     394   
    435395   
    436396    def getInfo_xml(self,url,mode):
    437397        """returns info about the document as XML"""
    438 
    439398        if not self.digilibBaseUrl:
    440399            self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
     
    444403        return pt(docinfo=docinfo)
    445404
    446     def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
    447         """returns new option state"""
    448         if not self.REQUEST.SESSION.has_key(optionName):
    449             # not in session -- initial
    450             opt = {'lastState': newState, 'state': initialState}
    451         else:
    452             opt = self.REQUEST.SESSION.get(optionName)
    453             if opt['lastState'] != newState:
    454                 # state in session has changed -- toggle
    455                 opt['state'] = not opt['state']
    456                 opt['lastState'] = newState
    457        
    458         self.REQUEST.SESSION[optionName] = opt
    459         return opt['state']
    460    
    461405    def isAccessible(self, docinfo):
    462406        """returns if access to the resource is granted"""
    463407        access = docinfo.get('accessType', None)
    464408        logging.debug("documentViewer (accessOK) access type %s"%access)
    465         if access is not None and access == 'free':
     409        if access == 'free':
    466410            logging.debug("documentViewer (accessOK) access is free")
    467411            return True
     412       
    468413        elif access is None or access in self.authgroups:
    469414            # only local access -- only logged in users
     
    479424        return False
    480425   
     426
     427
     428    def getDocinfo(self, mode, url):
     429        """returns docinfo depending on mode"""
     430        logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
     431        # look for cached docinfo in session
     432        if self.REQUEST.SESSION.has_key('docinfo'):
     433            docinfo = self.REQUEST.SESSION['docinfo']
     434            # check if its still current
     435            if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
     436                logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
     437                return docinfo
     438           
     439        # new docinfo
     440        docinfo = {'mode': mode, 'url': url}
     441        # add self url
     442        docinfo['viewerUrl'] = self.getDocumentViewerURL()
     443        docinfo['digilibBaseUrl'] = self.digilibBaseUrl
     444        # get index.meta DOM
     445        docUrl = None
     446        metaDom = None
     447        if mode=="texttool":
     448            # url points to document dir or index.meta
     449            metaDom = self.metadataService.getDomFromPathOrUrl(url)
     450            docUrl = url.replace('/index.meta', '')
     451            if metaDom is None:
     452                raise IOError("Unable to find index.meta for mode=texttool!")
     453
     454        elif mode=="imagepath":
     455            # url points to folder with images, index.meta optional
     456            # asssume index.meta in parent dir
     457            docUrl = getParentPath(url)
     458            metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
     459
     460        elif mode=="filepath":
     461            # url points to image file, index.meta optional
     462            # asssume index.meta is two path segments up
     463            docUrl = getParentPath(url, 2)
     464            metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
     465
     466        else:
     467            logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
     468            raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
     469       
     470        docinfo['documentUrl'] = docUrl
     471        # process index.meta contents
     472        if metaDom is not None and metaDom.tag == 'resource':
     473            # document directory name and path
     474            resource = self.metadataService.getResourceData(dom=metaDom)
     475            if resource:
     476                docinfo = self.getDocinfoFromResource(docinfo, resource)
     477
     478            # texttool info
     479            texttool = self.metadataService.getTexttoolData(dom=metaDom)
     480            if texttool:
     481                docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
     482           
     483            # bib info
     484            bib = self.metadataService.getBibData(dom=metaDom)
     485            if bib:
     486                docinfo = self.getDocinfoFromBib(docinfo, bib)
     487            else:
     488                # no bib - try info.xml
     489                docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
    481490               
    482     def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
    483         """gibt param von dlInfo aus"""
    484         if docinfo is None:
    485             docinfo = {}
    486        
    487         for x in range(cut):
    488                
    489                 path=getParentDir(path)
    490        
     491            # auth info
     492            access = self.metadataService.getAccessData(dom=metaDom)
     493            if access:
     494                docinfo = self.getDocinfoFromAccess(docinfo, access)
     495
     496            # attribution info
     497            attribution = self.metadataService.getAttributionData(dom=metaDom)
     498            if attribution:
     499                logging.debug("getDocinfo: attribution=%s"%repr(attribution))
     500                docinfo['attribution'] = attribution
     501                #docinfo = self.getDocinfoFromAccess(docinfo, access)
     502
     503            # copyright info
     504            copyright = self.metadataService.getCopyrightData(dom=metaDom)
     505            if copyright:
     506                logging.debug("getDocinfo: copyright=%s"%repr(copyright))
     507                docinfo['copyright'] = copyright
     508                #docinfo = self.getDocinfoFromAccess(docinfo, access)
     509
     510        # image path
     511        if mode != 'texttool':
     512            # override image path from texttool with url
     513            docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
     514
     515        # number of images from digilib
     516        if docinfo.get('imagePath', None):
     517            docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
     518            docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
     519
     520        logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
     521        #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
     522        # store in session
     523        self.REQUEST.SESSION['docinfo'] = docinfo
     524        return docinfo
     525
     526    def getDocinfoFromResource(self, docinfo, resource):
     527        """reads contents of resource element into docinfo"""
     528        docName = resource.get('name', None)
     529        docinfo['documentName'] = docName
     530        docPath = resource.get('archive-path', None)
     531        if docPath:
     532            # clean up document path
     533            if docPath[0] != '/':
     534                docPath = '/' + docPath
     535               
     536            if docName and (not docPath.endswith(docName)):
     537                docPath += "/" + docName
     538           
     539        else:
     540            # use docUrl as docPath
     541            docUrl = docinfo['documentURL']
     542            if not docUrl.startswith('http:'):
     543                docPath = docUrl
     544        if docPath:
     545            # fix URLs starting with /mpiwg/online
     546            docPath = docPath.replace('/mpiwg/online', '', 1)
     547
     548        docinfo['documentPath'] = docPath
     549        return docinfo
     550
     551    def getDocinfoFromTexttool(self, docinfo, texttool):
     552        """reads contents of texttool element into docinfo"""
     553        # image dir
     554        imageDir = texttool.get('image', None)
     555        docPath = docinfo.get('documentPath', None)
     556        if imageDir and docPath:
     557            #print "image: ", imageDir, " archivepath: ", archivePath
     558            imageDir = os.path.join(docPath, imageDir)
     559            imageDir = imageDir.replace('/mpiwg/online', '', 1)
     560            docinfo['imagePath'] = imageDir
     561       
     562        # old style text URL
     563        textUrl = texttool.get('text', None)
     564        if textUrl and docPath:
     565            if urlparse.urlparse(textUrl)[0] == "": #keine url
     566                textUrl = os.path.join(docPath, textUrl)
     567           
     568            docinfo['textURL'] = textUrl
     569   
     570        # new style text-url-path
     571        textUrl = texttool.get('text-url-path', None)
     572        if textUrl:
     573            docinfo['textURLPath'] = textUrl
     574           
     575        # page flow
     576        docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
     577           
     578        # odd pages are left
     579        docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
     580           
     581        # number of title page (0: not defined)
     582        docinfo['titlePage'] = texttool.get('title-scan-no', 0)
     583           
     584        # old presentation stuff
     585        presentation = texttool.get('presentation', None)
     586        if presentation and docPath:
     587            if presentation.startswith('http:'):
     588                docinfo['presentationUrl'] = presentation
     589            else:
     590                docinfo['presentationUrl'] = os.path.join(docPath, presentation)
     591           
     592       
     593        return docinfo
     594
     595    def getDocinfoFromBib(self, docinfo, bib):
     596        """reads contents of bib element into docinfo"""
     597        logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
     598        # put all raw bib fields in dict "bib"
     599        docinfo['bib'] = bib
     600        bibtype = bib.get('@type', None)
     601        docinfo['bibType'] = bibtype
     602        # also store DC metadata for convenience
     603        dc = self.metadataService.getDCMappedData(bib)
     604        docinfo['creator'] = dc.get('creator',None)
     605        docinfo['title'] = dc.get('title',None)
     606        docinfo['date'] = dc.get('date',None)
     607        return docinfo
     608           
     609    def getDocinfoFromAccess(self, docinfo, acc):
     610        """reads contents of access element into docinfo"""
     611        #TODO: also read resource type
     612        logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
     613        try:
     614            acctype = acc['@attr']['type']
     615            if acctype:
     616                access=acctype
     617                if access in ['group', 'institution']:
     618                    access = acc['name'].lower()
     619               
     620                docinfo['accessType'] = access
     621
     622        except:
     623            pass
     624       
     625        return docinfo
     626
     627    def getDocinfoFromDigilib(self, docinfo, path):
    491628        infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
    492    
    493         logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
    494        
     629        # fetch data
    495630        txt = getHttpData(infoUrl)
    496         if txt is None:
    497             raise IOError("Unable to get dir-info from %s"%(infoUrl))
    498 
    499         dom = Parse(txt)
    500         sizes=dom.xpath("//dir/size")
    501         logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
    502        
    503         if sizes:
    504             docinfo['numPages'] = int(getTextFromNode(sizes[0]))
     631        if not txt:
     632            logging.error("Unable to get dir-info from %s"%(infoUrl))
     633            return docinfo
     634
     635        dom = ET.fromstring(txt)
     636        size = getText(dom.find("size"))
     637        logging.debug("getDocinfoFromDigilib: size=%s"%size)
     638        if size:
     639            docinfo['numPages'] = int(size)
    505640        else:
    506641            docinfo['numPages'] = 0
    507642           
    508643        # TODO: produce and keep list of image names and numbers
    509                        
    510644        return docinfo
    511    
    512     def getIndexMetaPath(self,url):
    513         """gib nur den Pfad zurueck"""
    514         regexp = re.compile(r".*(experimental|permanent)/(.*)")
    515         regpath = regexp.match(url)
    516         if (regpath==None):
    517             return ""
    518         logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))           
    519         return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
    520      
    521    
    522    
    523     def getIndexMetaUrl(self,url):
    524         """returns utr  of index.meta document at url"""
    525      
    526         metaUrl = None
    527         if url.startswith("http://"):
    528             # real URL
    529             metaUrl = url
    530         else:
    531             # online path
    532             server=self.digilibBaseUrl+"/servlet/Texter?fn="
    533             metaUrl=server+url.replace("/mpiwg/online","")
    534             if not metaUrl.endswith("index.meta"):
    535                 metaUrl += "/index.meta"
    536        
    537         return metaUrl
    538    
    539     def getDomFromIndexMeta(self, url):
    540         """get dom from index meta"""
    541         dom = None
    542         metaUrl = self.getIndexMetaUrl(url)
    543                
    544         logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
    545         txt=getHttpData(metaUrl)
    546         if txt is None:
    547             raise IOError("Unable to read index meta from %s"%(url))
    548        
    549         dom = Parse(txt)
    550         return dom
    551    
    552     def getPresentationInfoXML(self, url):
    553         """returns dom of info.xml document at url"""
     645           
     646           
     647    def getDocinfoFromPresentationInfoXml(self,docinfo):
     648        """gets DC-like bibliographical information from the presentation entry in texttools"""
     649        url = docinfo.get('presentationUrl', None)
     650        if not url:
     651            logging.error("getDocinfoFromPresentation: no URL!")
     652            return docinfo
     653       
    554654        dom = None
    555655        metaUrl = None
     
    559659        else:
    560660            # online path
     661           
    561662            server=self.digilibBaseUrl+"/servlet/Texter?fn="
    562             metaUrl=server+url.replace("/mpiwg/online","")
     663            metaUrl=server+url
    563664       
    564665        txt=getHttpData(metaUrl)
    565666        if txt is None:
    566             raise IOError("Unable to read infoXMLfrom %s"%(url))
    567            
    568         dom = Parse(txt)
    569         return dom
     667            logging.error("Unable to read info.xml from %s"%(url))
     668            return docinfo
     669           
     670        dom = ET.fromstring(txt)
     671        docinfo['creator']=getText(dom.find(".//author"))
     672        docinfo['title']=getText(dom.find(".//title"))
     673        docinfo['date']=getText(dom.find(".//date"))
     674        return docinfo
     675   
     676
     677    def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewLayer=None, tocMode=None):
     678        """returns pageinfo with the given parameters"""
     679        logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
     680        pageinfo = {}
     681        pageinfo['viewMode'] = viewMode
     682        # split viewLayer if necessary
     683        if isinstance(viewLayer,basestring):
     684            viewLayer = viewLayer.split(',')
     685           
     686        if isinstance(viewLayer, list):
     687            logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
     688            # save (unique) list in viewLayers
     689            seen = set()
     690            viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
     691            pageinfo['viewLayers'] = viewLayers
     692            # stringify viewLayer
     693            viewLayer = ','.join(viewLayers)
     694        else:
     695            #create list
     696            pageinfo['viewLayers'] = [viewLayer]
    570697                       
    571        
    572     def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
    573         """gets authorization info from the index.meta file at path or given by dom"""
    574         logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
    575        
    576         access = None
    577        
    578         if docinfo is None:
    579             docinfo = {}
    580            
    581         if dom is None:
    582             for x in range(cut):
    583                 path=getParentDir(path)
    584             dom = self.getDomFromIndexMeta(path)
    585        
    586         acctype = dom.xpath("//access-conditions/access/@type")
    587         if acctype and (len(acctype)>0):
    588             access=acctype[0].value
    589             if access in ['group', 'institution']:
    590                 access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
    591            
    592         docinfo['accessType'] = access
    593         return docinfo
    594    
    595        
    596     def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
    597         """gets bibliographical info from the index.meta file at path or given by dom"""
    598         #logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
    599        
    600         if docinfo is None:
    601             docinfo = {}
    602        
    603         if dom is None:
    604             for x in range(cut):
    605                 path=getParentDir(path)
    606             dom = self.getDomFromIndexMeta(path)
    607        
    608         docinfo['indexMetaPath']=self.getIndexMetaPath(path);
    609        
    610         #logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
    611         # put in all raw bib fields as dict "bib"
    612         bib = dom.xpath("//bib/*")
    613         if bib and len(bib)>0:
    614             bibinfo = {}
    615             for e in bib:
    616                 bibinfo[e.localName] = getTextFromNode(e)
    617             docinfo['bib'] = bibinfo
    618        
    619         # extract some fields (author, title, year) according to their mapping
    620         metaData=self.metadata.main.meta.bib
    621         bibtype=dom.xpath("//bib/@type")
    622         if bibtype and (len(bibtype)>0):
    623             bibtype=bibtype[0].value
    624         else:
    625             bibtype="generic"
    626            
    627         bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
    628         docinfo['bib_type'] = bibtype
    629         bibmap=metaData.generateMappingForType(bibtype)
    630         #logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
    631         #logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
    632         # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
    633         if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
    634             try:
    635                 docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
    636             except: pass
    637             try:
    638                 docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
    639             except: pass
    640             try:
    641                 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
    642             except: pass
    643             #logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
    644             try:
    645                 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
    646             except:
    647                 docinfo['lang']=''
    648             try:
    649                 docinfo['city']=getTextFromNode(dom.xpath("//bib/city")[0])
    650             except:
    651                 docinfo['city']=''
    652             try:
    653                 docinfo['number_of_pages']=getTextFromNode(dom.xpath("//bib/number_of_pages")[0])
    654             except:
    655                 docinfo['number_of_pages']=''
    656             try:
    657                 docinfo['series_volume']=getTextFromNode(dom.xpath("//bib/series_volume")[0])
    658             except:
    659                 docinfo['series_volume']=''
    660             try:
    661                 docinfo['number_of_volumes']=getTextFromNode(dom.xpath("//bib/number_of_volumes")[0])
    662             except:
    663                 docinfo['number_of_volumes']=''
    664             try:
    665                 docinfo['translator']=getTextFromNode(dom.xpath("//bib/translator")[0])
    666             except:
    667                 docinfo['translator']=''
    668             try:
    669                 docinfo['edition']=getTextFromNode(dom.xpath("//bib/edition")[0])
    670             except:
    671                 docinfo['edition']=''
    672             try:
    673                 docinfo['series_author']=getTextFromNode(dom.xpath("//bib/series_author")[0])
    674             except:
    675                 docinfo['series_author']=''
    676             try:
    677                 docinfo['publisher']=getTextFromNode(dom.xpath("//bib/publisher")[0])
    678             except:
    679                 docinfo['publisher']=''
    680             try:
    681                 docinfo['series_title']=getTextFromNode(dom.xpath("//bib/series_title")[0])
    682             except:
    683                 docinfo['series_title']=''
    684             try:
    685                 docinfo['isbn_issn']=getTextFromNode(dom.xpath("//bib/isbn_issn")[0])
    686             except:
    687                 docinfo['isbn_issn']=''           
    688         #logging.debug("I NEED BIBTEX %s"%docinfo)
    689         return docinfo
    690    
    691    
    692     def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
    693         """gets name info from the index.meta file at path or given by dom"""
    694         if docinfo is None:
    695             docinfo = {}
    696        
    697         if dom is None:
    698             for x in range(cut):
    699                 path=getParentDir(path)
    700             dom = self.getDomFromIndexMeta(path)
    701 
    702         docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
    703         logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
    704         return docinfo
    705    
    706     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
    707         """parse texttool tag in index meta"""
    708         logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
    709         if docinfo is None:
    710            docinfo = {}
    711         if docinfo.get('lang', None) is None:
    712             docinfo['lang'] = '' # default keine Sprache gesetzt
    713         if dom is None:
    714             dom = self.getDomFromIndexMeta(url)
    715        
    716         archivePath = None
    717         archiveName = None
    718    
    719         archiveNames = dom.xpath("//resource/name")
    720         if archiveNames and (len(archiveNames) > 0):
    721             archiveName = getTextFromNode(archiveNames[0])
    722         else:
    723             logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
    724        
    725         archivePaths = dom.xpath("//resource/archive-path")
    726         if archivePaths and (len(archivePaths) > 0):
    727             archivePath = getTextFromNode(archivePaths[0])
    728             # clean up archive path
    729             if archivePath[0] != '/':
    730                 archivePath = '/' + archivePath
    731             if archiveName and (not archivePath.endswith(archiveName)):
    732                 archivePath += "/" + archiveName
    733         else:
    734             # try to get archive-path from url
    735             logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
    736             if (not url.startswith('http')):
    737                 archivePath = url.replace('index.meta', '')
    738                
    739         if archivePath is None:
    740             # we balk without archive-path
    741             raise IOError("Missing archive-path (for text-tool) in %s" % (url))
    742        
    743         imageDirs = dom.xpath("//texttool/image")
    744         if imageDirs and (len(imageDirs) > 0):
    745             imageDir = getTextFromNode(imageDirs[0])
    746            
    747         else:
    748             # we balk with no image tag / not necessary anymore because textmode is now standard
    749             #raise IOError("No text-tool info in %s"%(url))
    750             imageDir = ""
    751             #xquery="//pb" 
    752             docinfo['imagePath'] = "" # keine Bilder
    753             docinfo['imageURL'] = ""
    754            
    755         if imageDir and archivePath:
    756             #print "image: ", imageDir, " archivepath: ", archivePath
    757             imageDir = os.path.join(archivePath, imageDir)
    758             imageDir = imageDir.replace("/mpiwg/online", '')
    759             docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
    760             docinfo['imagePath'] = imageDir
    761            
    762             docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
    763            
    764         viewerUrls = dom.xpath("//texttool/digiliburlprefix")
    765         if viewerUrls and (len(viewerUrls) > 0):
    766             viewerUrl = getTextFromNode(viewerUrls[0])
    767             docinfo['viewerURL'] = viewerUrl
    768        
    769         # old style text URL
    770         textUrls = dom.xpath("//texttool/text")
    771         if textUrls and (len(textUrls) > 0):
    772             textUrl = getTextFromNode(textUrls[0])
    773             if urlparse.urlparse(textUrl)[0] == "": #keine url
    774                 textUrl = os.path.join(archivePath, textUrl)
    775             # fix URLs starting with /mpiwg/online
    776             if textUrl.startswith("/mpiwg/online"):
    777                 textUrl = textUrl.replace("/mpiwg/online", '', 1)
    778            
    779             docinfo['textURL'] = textUrl
    780        
    781 
    782             #TODO: hack-DW for annalen
    783             if (textUrl is not None) and (textUrl.startswith("/permanent/einstein/annalen")):
    784                 textUrl=textUrl.replace("/permanent/einstein/annalen/","/diverse/de/")
    785                 splitted=textUrl.split("/fulltext")
    786                 textUrl=splitted[0]+".xml"
    787                 textUrlkurz = string.split(textUrl, ".")[0]
    788                 docinfo['textURLPathkurz'] = textUrlkurz
    789                 docinfo['textURLPath'] = textUrl
    790                 logging.debug("hack")
    791                 logging.debug(textUrl)
    792            
    793            
    794         # new style text-url-path
    795         textUrls = dom.xpath("//texttool/text-url-path")
    796         if textUrls and (len(textUrls) > 0):
    797             textUrl = getTextFromNode(textUrls[0])
    798             docinfo['textURLPath'] = textUrl
    799             textUrlkurz = string.split(textUrl, ".")[0]
    800             docinfo['textURLPathkurz'] = textUrlkurz
    801             #if not docinfo['imagePath']:
    802                 # text-only, no page images
    803                 #docinfo = self.getNumTextPages(docinfo)
    804                  
    805          
    806         presentationUrls = dom.xpath("//texttool/presentation")
    807         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
    808         #docinfo = self.getDownloadfromDocinfoToBibtex(url, docinfo=docinfo, dom=dom)
    809         docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
    810        
    811        
    812         if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
    813              # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
    814              # durch den relativen Pfad auf die presentation infos
    815             presentationPath = getTextFromNode(presentationUrls[0])
    816             if url.endswith("index.meta"):
    817                 presentationUrl = url.replace('index.meta', presentationPath)
    818             else:
    819                 presentationUrl = url + "/" + presentationPath
    820                
    821             docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
    822    
    823         docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info
    824        
    825         return docinfo
    826    
    827    
    828     def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
    829         """gets the bibliographical information from the preseantion entry in texttools
    830         """
    831         dom=self.getPresentationInfoXML(url)
    832         try:
    833             docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
    834         except:
    835             pass
    836         try:
    837             docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
    838         except:
    839             pass
    840         try:
    841             docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
    842         except:
    843             pass
    844         return docinfo
    845    
    846     def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
    847         """path ist the path to the images it assumes that the index.meta file is one level higher."""
    848         logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
    849         if docinfo is None:
    850             docinfo = {}
    851         path=path.replace("/mpiwg/online","")
    852         docinfo['imagePath'] = path
    853         docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
    854        
    855         pathorig=path
    856         for x in range(cut):       
    857                 path=getParentDir(path)
    858         logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
    859         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
    860         docinfo['imageURL'] = imageUrl
    861        
    862         #path ist the path to the images it assumes that the index.meta file is one level higher.
    863         docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
    864         #docinfo = self.getDownloadfromDocinfoToBibtex(pathorig,docinfo=docinfo,cut=cut+1)
    865         docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
    866         return docinfo
    867    
    868    
    869     def getDocinfo(self, mode, url):
    870         """returns docinfo depending on mode"""
    871         logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
    872         # look for cached docinfo in session
    873         if self.REQUEST.SESSION.has_key('docinfo'):
    874             docinfo = self.REQUEST.SESSION['docinfo']
    875             # check if its still current
    876             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
    877                 logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
    878                 return docinfo
    879         # new docinfo
    880         docinfo = {'mode': mode, 'url': url}
    881         if mode=="texttool": #index.meta with texttool information
    882             docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
    883         elif mode=="imagepath":
    884             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
    885         elif mode=="filepath":
    886             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
    887         else:
    888             logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
    889             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
    890                
    891         # FIXME: fake texturlpath
    892         if not docinfo.has_key('textURLPath'):
    893             docinfo['textURLPath'] = None
    894        
    895         logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
    896         #logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
    897         self.REQUEST.SESSION['docinfo'] = docinfo
    898         return docinfo
    899                
    900     def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
    901         """returns pageinfo with the given parameters"""
    902         pageinfo = {}
     698        pageinfo['viewLayer'] = viewLayer
     699        pageinfo['tocMode'] = tocMode
     700
    903701        current = getInt(current)
    904    
    905702        pageinfo['current'] = current
     703        pageinfo['pn'] = current
    906704        rows = int(rows or self.thumbrows)
    907705        pageinfo['rows'] = rows
     
    910708        grpsize = cols * rows
    911709        pageinfo['groupsize'] = grpsize
     710        # is start is empty use one around current
    912711        start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
    913712        # int(current / grpsize) * grpsize +1))
    914713        pageinfo['start'] = start
    915         pageinfo['end'] = start + grpsize
    916         if (docinfo is not None) and ('numPages' in docinfo):
    917             np = int(docinfo['numPages'])
    918             pageinfo['end'] = min(pageinfo['end'], np)
    919             pageinfo['numgroups'] = int(np / grpsize)
    920             if np % grpsize > 0:
    921                 pageinfo['numgroups'] += 1       
    922         pageinfo['viewMode'] = viewMode
    923         pageinfo['tocMode'] = tocMode
     714        # get number of pages
     715        np = int(docinfo.get('numPages', 0))
     716        if np == 0:
     717            # numPages unknown - maybe we can get it from text page
     718            if docinfo.get('textURLPath', None):
     719                # cache text page as well
     720                pageinfo['textPage'] = self.getTextPage(mode=viewLayer, pn=current, docinfo=docinfo, pageinfo=pageinfo)
     721                np = int(docinfo.get('numPages', 0))
     722               
     723        # cache table of contents
     724        pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
     725        pageinfo['numgroups'] = int(np / grpsize)
     726        if np % grpsize > 0:
     727            pageinfo['numgroups'] += 1
     728
     729        pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
     730        oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
     731        # add zeroth page for two columns
     732        pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
     733        pageinfo['pageZero'] = pageZero
     734        pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
     735               
    924736        pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
    925         #pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1')
    926         pageinfo['query'] = self.REQUEST.get('query','')
    927         pageinfo['queryType'] = self.REQUEST.get('queryType','')
    928         pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
    929         pageinfo['textPN'] = self.REQUEST.get('textPN','1')
    930         pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
    931        
    932         pageinfo ['highlightElementPos'] = self.REQUEST.get('highlightElementPos','')
    933         pageinfo ['highlightElement'] = self.REQUEST.get('highlightElement','')
    934        
    935         pageinfo ['xpointer'] = self.REQUEST.get('xpointer','')
    936        
    937         pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
    938         pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
    939         pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')     
    940         toc = int (pageinfo['tocPN'])
    941         pageinfo['textPages'] =int (toc)
    942        
    943         if 'tocSize_%s'%tocMode in docinfo:
    944             tocSize = int(docinfo['tocSize_%s'%tocMode])
    945             tocPageSize = int(pageinfo['tocPageSize'])
    946             # cached toc           
    947             if tocSize%tocPageSize>0:
    948                 tocPages=tocSize/tocPageSize+1
     737       
     738        # cache search results
     739        pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
     740        query = self.REQUEST.get('query',None)
     741        pageinfo['query'] = query
     742        if query:
     743            queryType = self.REQUEST.get('queryType', 'fulltextMorph')
     744            pageinfo['queryType'] = queryType
     745            pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
     746            self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
     747           
     748            # highlighting
     749            highlightQuery = self.REQUEST.get('highlightQuery', None)
     750            if highlightQuery:
     751                pageinfo['highlightQuery'] = highlightQuery
     752                pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
     753                pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
     754           
     755        return pageinfo
     756
     757
     758    def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
     759        """returns dict with array of page informations for one screenfull of thumbnails"""
     760        batch = {}
     761        grpsize = rows * cols
     762        if maxIdx == 0:
     763            maxIdx = start + grpsize
     764
     765        nb = int(math.ceil(maxIdx / float(grpsize)))
     766        # list of all batch start and end points
     767        batches = []
     768        if pageZero:
     769            ofs = 0
     770        else:
     771            ofs = 1
     772           
     773        for i in range(nb):
     774            s = i * grpsize + ofs
     775            e = min((i + 1) * grpsize + ofs - 1, maxIdx)
     776            batches.append({'start':s, 'end':e})
     777           
     778        batch['batches'] = batches
     779
     780        pages = []
     781        if pageZero and start == 1:
     782            # correct beginning
     783            idx = 0
     784        else:
     785            idx = start
     786           
     787        for r in range(rows):
     788            row = []
     789            for c in range(cols):
     790                if idx < minIdx or idx > maxIdx:
     791                    page = {'idx':None}
     792                else:
     793                    page = {'idx':idx}
     794                   
     795                idx += 1
     796                if pageFlowLtr:
     797                    row.append(page)
     798                else:
     799                    row.insert(0, page)
     800               
     801            pages.append(row)
     802           
     803        if start > 1:
     804            batch['prevStart'] = max(start - grpsize, 1)
     805        else:
     806            batch['prevStart'] = None
     807           
     808        if start + grpsize < maxIdx:
     809            batch['nextStart'] = start + grpsize
     810        else:
     811            batch['nextStart'] = None
     812
     813        batch['pages'] = pages
     814        return batch
     815       
     816    def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
     817        """returns dict with information for one screenfull of data."""
     818        batch = {}
     819        if end == 0:
     820            end = start + size                   
     821           
     822        nb = int(math.ceil(end / float(size)))
     823        # list of all batch start and end points
     824        batches = []
     825        for i in range(nb):
     826            s = i * size + 1
     827            e = min((i + 1) * size, end)
     828            batches.append({'start':s, 'end':e})
     829           
     830        batch['batches'] = batches
     831        # list of elements in this batch
     832        this = []
     833        j = 0
     834        for i in range(start, min(start+size, end)):
     835            if data:
     836                if fullData:
     837                    d = data[i]
     838                else:
     839                    d = data[j]
     840                    j += 1
     841           
    949842            else:
    950                 tocPages=tocSize/tocPageSize
    951             pageinfo['tocPN'] = min (tocPages,toc)                   
    952         pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
    953         #pageinfo['sn'] =self.REQUEST.get('sn','')
    954         pageinfo['s'] =self.REQUEST.get('s','')
    955         return pageinfo
    956    
    957 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
     843                d = i+1
     844               
     845            this.append(d)
     846           
     847        batch['this'] = this
     848        if start > 1:
     849            batch['prevStart'] = max(start - size, 1)
     850        else:
     851            batch['prevStart'] = None
     852           
     853        if start + size < end:
     854            batch['nextStart'] = start + size
     855        else:
     856            batch['nextStart'] = None
     857       
     858        return batch
     859       
     860
     861    security.declareProtected('View management screens','changeDocumentViewerForm')   
     862    changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
     863   
     864    def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
    958865        """init document viewer"""
    959866        self.title=title
     
    962869        self.thumbcols = thumbcols
    963870        self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
     871        try:
     872            # assume MetaDataFolder instance is called metadata
     873            self.metadataService = getattr(self, 'metadata')
     874        except Exception, e:
     875            logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
     876
    964877        if RESPONSE is not None:
    965878            RESPONSE.redirect('manage_main')
     
    977890    if RESPONSE is not None:
    978891        RESPONSE.redirect('manage_main')
    979 
    980 ## DocumentViewerTemplate class
    981 class DocumentViewerTemplate(ZopePageTemplate):
    982     """Template for document viewer"""
    983     meta_type="DocumentViewer Template"
    984 
    985 
    986 def manage_addDocumentViewerTemplateForm(self):
    987     """Form for adding"""
    988     pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
    989     return pt()
    990 
    991 def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
    992                            REQUEST=None, submit=None):
    993     "Add a Page Template with optional file content."
    994 
    995     self._setObject(id, DocumentViewerTemplate(id))
    996     ob = getattr(self, id)
    997     txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
    998     logging.info("txt %s:"%txt)
    999     ob.pt_edit(txt,"text/html")
    1000     if title:
    1001         ob.pt_setTitle(title)
    1002     try:
    1003         u = self.DestinationURL()
    1004     except AttributeError:
    1005         u = REQUEST['URL1']
    1006        
    1007     u = "%s/%s" % (u, urllib.quote(id))
    1008     REQUEST.RESPONSE.redirect(u+'/manage_main')
    1009     return ''
    1010 
    1011 
    1012    
Note: See TracChangeset for help on using the changeset viewer.