Context Navigation

← Previous Change
Next Change →

Changeset 617:7aefbddddaf9 in documentViewer for documentViewer.py

Timestamp:

Jul 23, 2014, 3:36:04 PM (10 years ago)

Author:

dwinter

Branch:

default

Message:

alpaha of hocr server support

File:

: 1 edited

documentViewer.py (modified) (8 diffs)

Legend:

: Unmodified
: Added
: Removed

documentViewer.py

-                      r613
+                      r617
 def getMDText(node):
     """returns the @text content from the MetaDataProvider metadata node"""
     if isinstance(node, dict):
         return node.get('@text', None)
+    if isinstance(node,list): #more than one text file if there is an attribute don't choose it
+        for nodeInList in node:
+            attr = nodeInList.get("@attr",None)
+            if attr is None:
+                return node.get('@text',None)
+        return None
     return node
 …
     # viewMode templates
     viewer_text = PageTemplateFile('zpt/viewer/viewer_text', globals())
+    viewer_hocr = PageTemplateFile('zpt/viewer/viewer_hocr', globals())
     viewer_xml = PageTemplateFile('zpt/viewer/viewer_xml', globals())
     viewer_image = PageTemplateFile('zpt/viewer/viewer_image', globals())
 …
     def getTextPage(self, **args):
         """returns full text content of page"""
         return self.template.fulltextclient.getTextPage(**args)
     def getSearchResults(self, **args):
 …
         @param url: url which contains display information
         @param mode: defines how to access the document behind url
         @param viewMode: 'image': display images, 'text': display text, 'xml': display xml, default is 'auto'
+        @param viewMode: 'image': display images, 'text': display text, 'xml': display xml, default is 'auto', 'hocr' : hocr format
         @param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
         @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
 …
             logging.error("template folder missing!")
             return "ERROR: template folder missing!"
         if not getattr(self, 'digilibBaseUrl', None):
             self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
 …
             viewMode = 'image'
             self.REQUEST['viewMode'] = 'image'
         # safe viewLayer in userinfo
 …
             metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
             docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
+        elif mode=="hocr":
+            # url points to folder with images, index.meta optional
+            # asssume index.meta in parent dir
+            docUrl = getParentPath(url)
+            metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
+            docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
+            docinfo['textURLPath'] = url.replace('/mpiwg/online', '', 1)
+            if docinfo.get("creator", None) is None:
+                docinfo['creator'] = ""
+            if docinfo.get("title", None) is None:
+                docinfo['title'] = ""
+            if docinfo.get("documentPath", None) is None:
+                docinfo['documentPath'] = url.replace('/mpiwg/online', '', 1)
+                docinfo['documentPath'] = url.replace('/pages', '', 1)
         elif mode=="filepath":
 …
         # old style text URL
         textUrl = getMDText(texttool.get('text', None))
         if textUrl and docPath:
             if urlparse.urlparse(textUrl)[0] == "": #keine url

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 617:7aefbddddaf9 in documentViewer for documentViewer.py

Legend:

documentViewer.py

Download in other formats: