Changeset 464:19bd41d95f62 in documentViewer for documentViewer.py


Ignore:
Timestamp:
Jul 29, 2011, 4:27:24 PM (13 years ago)
Author:
casties
Branch:
elementtree
Message:

first version with new getdocinfo

File:
1 edited

Legend:

Unmodified
Added
Removed
  • documentViewer.py

    r463 r464  
    9999    return bt
    100100
    101 def getParentDir(path):
    102     """returns pathname shortened by one"""
    103     return '/'.join(path.split('/')[0:-1])
    104        
     101def getParentPath(path, cnt=1):
     102    """returns pathname shortened by cnt"""
     103    # make sure path doesn't end with /
     104    path = path.rstrip('/')
     105    # split by /, shorten, and reassemble
     106    return '/'.join(path.split('/')[0:-cnt])
     107
    105108
    106109##
     
    264267    security.declareProtected('View','index_html')
    265268    def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
    266         '''
     269        """
    267270        view it
    268271        @param mode: defines how to access the document behind url
     
    270273        @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
    271274        @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
    272         @param characterNormalization type of text display (reg, norm, none)
    273         @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
    274         '''
     275        """
    275276       
    276277        logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
     
    371372        if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
    372373                urlParams["mode"] = "imagepath"
    373                 urlParams["url"] = getParentDir(urlParams["url"])
     374                urlParams["url"] = getParentPath(urlParams["url"])
    374375               
    375376        # quote values and assemble into query string (not escaping '/')
     
    439440       
    440441        for x in range(cut):
    441             path=getParentDir(path)
     442            path=getParentPath(path)
    442443       
    443444        infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
     
    537538        if dom is None:
    538539            for x in range(cut):
    539                 path=getParentDir(path)
     540                path=getParentPath(path)
    540541            dom = self.getDomFromIndexMeta(path)
    541542       
     
    592593        if dom is None:
    593594            for x in range(cut):
    594                 path=getParentDir(path)
     595                path=getParentPath(path)
    595596            dom = self.getDomFromIndexMeta(path)
    596597
     
    724725        pathorig=path
    725726        for x in range(cut):       
    726                 path=getParentDir(path)
     727                path=getParentPath(path)
    727728        logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
    728729        imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
     
    736737   
    737738   
    738     def getDocinfo(self, mode, url):
     739    def OLDgetDocinfo(self, mode, url):
    739740        """returns docinfo depending on mode"""
    740741        logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
     
    769770        self.REQUEST.SESSION['docinfo'] = docinfo
    770771        return docinfo
    771                
     772
     773
     774    def getDocinfo(self, mode, url):
     775        """returns docinfo depending on mode"""
     776        logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
     777        # look for cached docinfo in session
     778        if self.REQUEST.SESSION.has_key('docinfo'):
     779            docinfo = self.REQUEST.SESSION['docinfo']
     780            # check if its still current
     781            if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
     782                logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
     783                return docinfo
     784           
     785        # new docinfo
     786        docinfo = {'mode': mode, 'url': url}
     787        # add self url
     788        docinfo['viewerUrl'] = self.getDocumentViewerURL()
     789        # get index.meta DOM
     790        docUrl = None
     791        metaDom = None
     792        if mode=="texttool":
     793            # url points to document dir or index.meta
     794            metaDom = self.metadataService.getDomFromPathOrUrl(url)
     795            docUrl = url.replace('/index.meta', '')
     796            if metaDom is None:
     797                raise IOError("Unable to find index.meta for mode=texttool!")
     798
     799        elif mode=="imagepath":
     800            # url points to folder with images, index.meta optional
     801            # asssume index.meta in parent dir
     802            docUrl = getParentPath(url)
     803            metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
     804
     805        elif mode=="filepath":
     806            # url points to image file, index.meta optional
     807            # asssume index.meta is two path segments up
     808            docUrl = getParentPath(url, 2)
     809            metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
     810
     811        else:
     812            logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
     813            raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
     814       
     815        docinfo['documentUrl'] = docUrl
     816        # process index.meta contents
     817        if metaDom is not None:
     818            # document directory name and path
     819            resource = self.metadataService.getResourceData(dom=metaDom)
     820            if resource:
     821                docinfo = self.getDocinfoFromResource(docinfo, resource)
     822
     823            # texttool info
     824            texttool = self.metadataService.getTexttoolData(dom=metaDom)
     825            if texttool:
     826                docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
     827           
     828            # bib info
     829            bib = self.metadataService.getBibData(dom=metaDom)
     830            if bib:
     831                docinfo = self.getDocinfoFromBib(docinfo, bib)
     832               
     833            # auth info
     834            access = self.metadataService.getAccessData(dom=metaDom)
     835            if access:
     836                docinfo = self.getDocinfoFromAccess(docinfo, access)
     837
     838        # image path
     839        if mode != 'texttool':
     840            # override image path from texttool
     841            docinfo['imagePath'] = url
     842
     843        # number of images from digilib
     844        if docinfo.get('imagePath', None):
     845            docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
     846            docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
     847
     848        logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
     849        #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
     850        # store in session
     851        self.REQUEST.SESSION['docinfo'] = docinfo
     852        return docinfo
     853
     854    def getDocinfoFromResource(self, docinfo, resource):
     855        """reads contents of resource element into docinfo"""
     856        docName = resource.get('name', None)
     857        docinfo['documentName'] = docName
     858        docPath = resource.get('archive-path', None)
     859        if docPath:
     860            # clean up document path
     861            if docPath[0] != '/':
     862                docPath = '/' + docPath
     863               
     864            if docName and (not docPath.endswith(docName)):
     865                docPath += "/" + docName
     866           
     867        else:
     868            # use docUrl as docPath
     869            docUrl = docinfo['documentURL']
     870            if not docUrl.startswith('http:'):
     871                docPath = docUrl
     872               
     873        docinfo['documentPath'] = docPath
     874        return docinfo
     875
     876    def getDocinfoFromTexttool(self, docinfo, texttool):
     877        """reads contents of texttool element into docinfo"""
     878        # image dir
     879        imageDir = texttool.get('image', None)
     880        docPath = docinfo.get('documentPath', None)
     881        if imageDir and docPath:
     882            #print "image: ", imageDir, " archivepath: ", archivePath
     883            imageDir = os.path.join(docPath, imageDir)
     884            imageDir = imageDir.replace('/mpiwg/online', '', 1)
     885            docinfo['imagePath'] = imageDir
     886       
     887        # old style text URL
     888        textUrl = texttool.get('text', None)
     889        if textUrl and docPath:
     890            if urlparse.urlparse(textUrl)[0] == "": #keine url
     891                textUrl = os.path.join(docPath, textUrl)
     892                # fix URLs starting with /mpiwg/online
     893                textUrl = textUrl.replace('/mpiwg/online', '', 1)
     894           
     895            docinfo['textURL'] = textUrl
     896   
     897        # new style text-url-path
     898        textUrl = texttool.get('text-url-path', None)
     899        if textUrl:
     900            docinfo['textURLPath'] = textUrl
     901            #TODO: ugly:
     902            #textUrlkurz = string.split(textUrl, ".")[0]
     903            #docinfo['textURLPathkurz'] = textUrlkurz
     904           
     905        # old presentation stuff
     906        presentation = texttool.get('presentation', None)
     907        if presentation and docPath:
     908            docinfo['presentationPath'] = os.path.join(docPath, presentation)
     909           
     910        return docinfo
     911
     912    def getDocinfoFromBib(self, docinfo, bib):
     913        """reads contents of bib element into docinfo"""
     914        # put all raw bib fields in dict "bib"
     915        docinfo['bib'] = bib
     916        bibtype = bib.get('@type', None)
     917        docinfo['bibType'] = bibtype
     918        # also store DC metadata for convenience
     919        dc = self.metadataService.getDCMappedData(bib)
     920        docinfo['creator'] = dc.get('creator',None)
     921        docinfo['title'] = dc.get('title',None)
     922        docinfo['date'] = dc.get('date',None)
     923        return docinfo
     924           
     925    def getDocinfoFromAccess(self, docinfo, acc):
     926        """reads contents of access element into docinfo"""
     927        #TODO: also read resource type
     928        try:
     929            acctype = accc['@attr']['type']
     930            if acctype:
     931                access=acctype
     932                if access in ['group', 'institution']:
     933                    access = acc['name'].lower()
     934               
     935                docinfo['accessType'] = access
     936
     937        except:
     938            pass
     939       
     940        return docinfo
     941
     942    def getDocinfoFromDigilib(self, docinfo, path):
     943        infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
     944        # fetch data
     945        txt = getHttpData(infoUrl)
     946        if not txt:
     947            logging.error("Unable to get dir-info from %s"%(infoUrl))
     948            return docinfo
     949
     950        dom = ET.fromstring(txt)
     951        size = getText(dom.find("size"))
     952        logging.debug("getDocinfoFromDigilib: size=%s"%size)
     953        if size:
     954            docinfo['numPages'] = int(size)
     955        else:
     956            docinfo['numPages'] = 0
     957           
     958        # TODO: produce and keep list of image names and numbers
     959        return docinfo
     960           
     961           
    772962    def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
    773963        """returns pageinfo with the given parameters"""
Note: See TracChangeset for help on using the changeset viewer.