Mercurial > hg > documentViewer
diff documentViewer.py @ 617:7aefbddddaf9
alpaha of hocr server support
author | dwinter |
---|---|
date | Wed, 23 Jul 2014 17:36:04 +0200 |
parents | c57d80a649ea |
children | 54d3498a6e78 |
line wrap: on
line diff
--- a/documentViewer.py Wed Jul 23 17:20:34 2014 +0200 +++ b/documentViewer.py Wed Jul 23 17:36:04 2014 +0200 @@ -21,9 +21,21 @@ def getMDText(node): """returns the @text content from the MetaDataProvider metadata node""" + + + if isinstance(node, dict): return node.get('@text', None) + if isinstance(node,list): #more than one text file if there is an attribute don't choose it + for nodeInList in node: + attr = nodeInList.get("@attr",None) + if attr is None: + return node.get('@text',None) + return None + + + return node def getParentPath(path, cnt=1): @@ -82,6 +94,7 @@ # # viewMode templates viewer_text = PageTemplateFile('zpt/viewer/viewer_text', globals()) + viewer_hocr = PageTemplateFile('zpt/viewer/viewer_hocr', globals()) viewer_xml = PageTemplateFile('zpt/viewer/viewer_xml', globals()) viewer_image = PageTemplateFile('zpt/viewer/viewer_image', globals()) viewer_index = PageTemplateFile('zpt/viewer/viewer_index', globals()) @@ -164,7 +177,11 @@ # proxy text server methods to fulltextclient def getTextPage(self, **args): """returns full text content of page""" + return self.template.fulltextclient.getTextPage(**args) + + + def getSearchResults(self, **args): """loads list of search results and stores XML in docinfo""" @@ -239,7 +256,7 @@ show page @param url: url which contains display information @param mode: defines how to access the document behind url - @param viewMode: 'image': display images, 'text': display text, 'xml': display xml, default is 'auto' + @param viewMode: 'image': display images, 'text': display text, 'xml': display xml, default is 'auto', 'hocr' : hocr format @param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text' @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) """ @@ -250,7 +267,9 @@ # this won't work logging.error("template folder missing!") return "ERROR: template folder missing!" - + + + if not getattr(self, 'digilibBaseUrl', None): self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary" @@ -287,6 +306,9 @@ # legacy fix viewMode = 'image' self.REQUEST['viewMode'] = 'image' + + + # safe viewLayer in userinfo userinfo['viewLayer'] = viewLayer @@ -532,6 +554,23 @@ docUrl = getParentPath(url) metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1) + + elif mode=="hocr": + # url points to folder with images, index.meta optional + # asssume index.meta in parent dir + docUrl = getParentPath(url) + metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) + docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1) + docinfo['textURLPath'] = url.replace('/mpiwg/online', '', 1) + if docinfo.get("creator", None) is None: + docinfo['creator'] = "" + + if docinfo.get("title", None) is None: + docinfo['title'] = "" + + if docinfo.get("documentPath", None) is None: + docinfo['documentPath'] = url.replace('/mpiwg/online', '', 1) + docinfo['documentPath'] = url.replace('/pages', '', 1) elif mode=="filepath": # url points to image file, index.meta optional @@ -725,6 +764,10 @@ # old style text URL textUrl = getMDText(texttool.get('text', None)) + + + + if textUrl and docPath: if urlparse.urlparse(textUrl)[0] == "": #keine url textUrl = os.path.join(docPath, textUrl)