Changeset 617:7aefbddddaf9 in documentViewer for documentViewer.py


Ignore:
Timestamp:
Jul 23, 2014, 3:36:04 PM (10 years ago)
Author:
dwinter
Branch:
default
Message:

alpaha of hocr server support

File:
1 edited

Legend:

Unmodified
Added
Removed
  • documentViewer.py

    r613 r617  
    2222def getMDText(node):
    2323    """returns the @text content from the MetaDataProvider metadata node"""
     24
     25   
     26
    2427    if isinstance(node, dict):
    2528        return node.get('@text', None)
    2629   
     30    if isinstance(node,list): #more than one text file if there is an attribute don't choose it
     31        for nodeInList in node:
     32            attr = nodeInList.get("@attr",None)
     33            if attr is None:
     34                return node.get('@text',None)
     35        return None
     36
     37
     38
    2739    return node
    2840
     
    8395    # viewMode templates
    8496    viewer_text = PageTemplateFile('zpt/viewer/viewer_text', globals())
     97    viewer_hocr = PageTemplateFile('zpt/viewer/viewer_hocr', globals())
    8598    viewer_xml = PageTemplateFile('zpt/viewer/viewer_xml', globals())
    8699    viewer_image = PageTemplateFile('zpt/viewer/viewer_image', globals())
     
    165178    def getTextPage(self, **args):
    166179        """returns full text content of page"""
     180       
    167181        return self.template.fulltextclient.getTextPage(**args)
     182   
     183   
     184   
    168185
    169186    def getSearchResults(self, **args):
     
    240257        @param url: url which contains display information
    241258        @param mode: defines how to access the document behind url
    242         @param viewMode: 'image': display images, 'text': display text, 'xml': display xml, default is 'auto'
     259        @param viewMode: 'image': display images, 'text': display text, 'xml': display xml, default is 'auto', 'hocr' : hocr format
    243260        @param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
    244261        @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
     
    251268            logging.error("template folder missing!")
    252269            return "ERROR: template folder missing!"
    253            
     270       
     271       
     272
    254273        if not getattr(self, 'digilibBaseUrl', None):
    255274            self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
     
    288307            viewMode = 'image'
    289308            self.REQUEST['viewMode'] = 'image'
     309           
     310       
     311           
    290312
    291313        # safe viewLayer in userinfo
     
    533555            metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
    534556            docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
     557           
     558        elif mode=="hocr":
     559            # url points to folder with images, index.meta optional
     560            # asssume index.meta in parent dir
     561            docUrl = getParentPath(url)
     562            metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
     563            docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
     564            docinfo['textURLPath'] = url.replace('/mpiwg/online', '', 1)
     565            if docinfo.get("creator", None) is None:
     566                docinfo['creator'] = ""
     567           
     568            if docinfo.get("title", None) is None:
     569                docinfo['title'] = ""
     570
     571            if docinfo.get("documentPath", None) is None:
     572                docinfo['documentPath'] = url.replace('/mpiwg/online', '', 1)
     573                docinfo['documentPath'] = url.replace('/pages', '', 1)
    535574
    536575        elif mode=="filepath":
     
    726765        # old style text URL
    727766        textUrl = getMDText(texttool.get('text', None))
     767
     768       
     769
     770
    728771        if textUrl and docPath:
    729772            if urlparse.urlparse(textUrl)[0] == "": #keine url
Note: See TracChangeset for help on using the changeset viewer.