Mercurial > hg > documentViewer
comparison documentViewer.py @ 617:7aefbddddaf9
alpaha of hocr server support
author | dwinter |
---|---|
date | Wed, 23 Jul 2014 17:36:04 +0200 |
parents | c57d80a649ea |
children | 54d3498a6e78 |
comparison
equal
deleted
inserted
replaced
616:3f9b42840901 | 617:7aefbddddaf9 |
---|---|
19 from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml | 19 from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml |
20 | 20 |
21 | 21 |
22 def getMDText(node): | 22 def getMDText(node): |
23 """returns the @text content from the MetaDataProvider metadata node""" | 23 """returns the @text content from the MetaDataProvider metadata node""" |
24 | |
25 | |
26 | |
24 if isinstance(node, dict): | 27 if isinstance(node, dict): |
25 return node.get('@text', None) | 28 return node.get('@text', None) |
26 | 29 |
30 if isinstance(node,list): #more than one text file if there is an attribute don't choose it | |
31 for nodeInList in node: | |
32 attr = nodeInList.get("@attr",None) | |
33 if attr is None: | |
34 return node.get('@text',None) | |
35 return None | |
36 | |
37 | |
38 | |
27 return node | 39 return node |
28 | 40 |
29 def getParentPath(path, cnt=1): | 41 def getParentPath(path, cnt=1): |
30 """returns pathname shortened by cnt""" | 42 """returns pathname shortened by cnt""" |
31 # make sure path doesn't end with / | 43 # make sure path doesn't end with / |
80 # | 92 # |
81 # templates and forms | 93 # templates and forms |
82 # | 94 # |
83 # viewMode templates | 95 # viewMode templates |
84 viewer_text = PageTemplateFile('zpt/viewer/viewer_text', globals()) | 96 viewer_text = PageTemplateFile('zpt/viewer/viewer_text', globals()) |
97 viewer_hocr = PageTemplateFile('zpt/viewer/viewer_hocr', globals()) | |
85 viewer_xml = PageTemplateFile('zpt/viewer/viewer_xml', globals()) | 98 viewer_xml = PageTemplateFile('zpt/viewer/viewer_xml', globals()) |
86 viewer_image = PageTemplateFile('zpt/viewer/viewer_image', globals()) | 99 viewer_image = PageTemplateFile('zpt/viewer/viewer_image', globals()) |
87 viewer_index = PageTemplateFile('zpt/viewer/viewer_index', globals()) | 100 viewer_index = PageTemplateFile('zpt/viewer/viewer_index', globals()) |
88 viewer_thumbs = PageTemplateFile('zpt/viewer/viewer_thumbs', globals()) | 101 viewer_thumbs = PageTemplateFile('zpt/viewer/viewer_thumbs', globals()) |
89 viewer_indexonly = PageTemplateFile('zpt/viewer/viewer_indexonly', globals()) | 102 viewer_indexonly = PageTemplateFile('zpt/viewer/viewer_indexonly', globals()) |
162 | 175 |
163 | 176 |
164 # proxy text server methods to fulltextclient | 177 # proxy text server methods to fulltextclient |
165 def getTextPage(self, **args): | 178 def getTextPage(self, **args): |
166 """returns full text content of page""" | 179 """returns full text content of page""" |
180 | |
167 return self.template.fulltextclient.getTextPage(**args) | 181 return self.template.fulltextclient.getTextPage(**args) |
182 | |
183 | |
184 | |
168 | 185 |
169 def getSearchResults(self, **args): | 186 def getSearchResults(self, **args): |
170 """loads list of search results and stores XML in docinfo""" | 187 """loads list of search results and stores XML in docinfo""" |
171 return self.template.fulltextclient.getSearchResults(**args) | 188 return self.template.fulltextclient.getSearchResults(**args) |
172 | 189 |
237 def index_html(self, url, mode="texttool", viewMode="auto", viewLayer=None, tocMode=None, start=None, pn=None, pf=None): | 254 def index_html(self, url, mode="texttool", viewMode="auto", viewLayer=None, tocMode=None, start=None, pn=None, pf=None): |
238 """ | 255 """ |
239 show page | 256 show page |
240 @param url: url which contains display information | 257 @param url: url which contains display information |
241 @param mode: defines how to access the document behind url | 258 @param mode: defines how to access the document behind url |
242 @param viewMode: 'image': display images, 'text': display text, 'xml': display xml, default is 'auto' | 259 @param viewMode: 'image': display images, 'text': display text, 'xml': display xml, default is 'auto', 'hocr' : hocr format |
243 @param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text' | 260 @param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text' |
244 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) | 261 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) |
245 """ | 262 """ |
246 | 263 |
247 logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s pf=%s"%(mode,url,viewMode,viewLayer,start,pn,pf)) | 264 logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s pf=%s"%(mode,url,viewMode,viewLayer,start,pn,pf)) |
248 | 265 |
249 if not hasattr(self, 'template'): | 266 if not hasattr(self, 'template'): |
250 # this won't work | 267 # this won't work |
251 logging.error("template folder missing!") | 268 logging.error("template folder missing!") |
252 return "ERROR: template folder missing!" | 269 return "ERROR: template folder missing!" |
253 | 270 |
271 | |
272 | |
254 if not getattr(self, 'digilibBaseUrl', None): | 273 if not getattr(self, 'digilibBaseUrl', None): |
255 self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary" | 274 self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary" |
256 | 275 |
257 # mode=filepath should not have toc-thumbs | 276 # mode=filepath should not have toc-thumbs |
258 if tocMode is None: | 277 if tocMode is None: |
285 | 304 |
286 elif viewMode == 'images': | 305 elif viewMode == 'images': |
287 # legacy fix | 306 # legacy fix |
288 viewMode = 'image' | 307 viewMode = 'image' |
289 self.REQUEST['viewMode'] = 'image' | 308 self.REQUEST['viewMode'] = 'image' |
309 | |
310 | |
311 | |
290 | 312 |
291 # safe viewLayer in userinfo | 313 # safe viewLayer in userinfo |
292 userinfo['viewLayer'] = viewLayer | 314 userinfo['viewLayer'] = viewLayer |
293 | 315 |
294 # pageinfo: information about page (not cached) | 316 # pageinfo: information about page (not cached) |
530 # url points to folder with images, index.meta optional | 552 # url points to folder with images, index.meta optional |
531 # asssume index.meta in parent dir | 553 # asssume index.meta in parent dir |
532 docUrl = getParentPath(url) | 554 docUrl = getParentPath(url) |
533 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) | 555 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) |
534 docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1) | 556 docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1) |
557 | |
558 elif mode=="hocr": | |
559 # url points to folder with images, index.meta optional | |
560 # asssume index.meta in parent dir | |
561 docUrl = getParentPath(url) | |
562 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) | |
563 docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1) | |
564 docinfo['textURLPath'] = url.replace('/mpiwg/online', '', 1) | |
565 if docinfo.get("creator", None) is None: | |
566 docinfo['creator'] = "" | |
567 | |
568 if docinfo.get("title", None) is None: | |
569 docinfo['title'] = "" | |
570 | |
571 if docinfo.get("documentPath", None) is None: | |
572 docinfo['documentPath'] = url.replace('/mpiwg/online', '', 1) | |
573 docinfo['documentPath'] = url.replace('/pages', '', 1) | |
535 | 574 |
536 elif mode=="filepath": | 575 elif mode=="filepath": |
537 # url points to image file, index.meta optional | 576 # url points to image file, index.meta optional |
538 docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url) | 577 docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url) |
539 docinfo['numPages'] = 1 | 578 docinfo['numPages'] = 1 |
723 if imgEndNo: | 762 if imgEndNo: |
724 docinfo['maxPageNo'] = getInt(imgEndNo) | 763 docinfo['maxPageNo'] = getInt(imgEndNo) |
725 | 764 |
726 # old style text URL | 765 # old style text URL |
727 textUrl = getMDText(texttool.get('text', None)) | 766 textUrl = getMDText(texttool.get('text', None)) |
767 | |
768 | |
769 | |
770 | |
728 if textUrl and docPath: | 771 if textUrl and docPath: |
729 if urlparse.urlparse(textUrl)[0] == "": #keine url | 772 if urlparse.urlparse(textUrl)[0] == "": #keine url |
730 textUrl = os.path.join(docPath, textUrl) | 773 textUrl = os.path.join(docPath, textUrl) |
731 | 774 |
732 docinfo['textURL'] = textUrl | 775 docinfo['textURL'] = textUrl |