comparison documentViewer.py @ 617:7aefbddddaf9

alpaha of hocr server support
author dwinter
date Wed, 23 Jul 2014 17:36:04 +0200
parents c57d80a649ea
children 54d3498a6e78
comparison
equal deleted inserted replaced
616:3f9b42840901 617:7aefbddddaf9
19 from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml 19 from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
20 20
21 21
22 def getMDText(node): 22 def getMDText(node):
23 """returns the @text content from the MetaDataProvider metadata node""" 23 """returns the @text content from the MetaDataProvider metadata node"""
24
25
26
24 if isinstance(node, dict): 27 if isinstance(node, dict):
25 return node.get('@text', None) 28 return node.get('@text', None)
26 29
30 if isinstance(node,list): #more than one text file if there is an attribute don't choose it
31 for nodeInList in node:
32 attr = nodeInList.get("@attr",None)
33 if attr is None:
34 return node.get('@text',None)
35 return None
36
37
38
27 return node 39 return node
28 40
29 def getParentPath(path, cnt=1): 41 def getParentPath(path, cnt=1):
30 """returns pathname shortened by cnt""" 42 """returns pathname shortened by cnt"""
31 # make sure path doesn't end with / 43 # make sure path doesn't end with /
80 # 92 #
81 # templates and forms 93 # templates and forms
82 # 94 #
83 # viewMode templates 95 # viewMode templates
84 viewer_text = PageTemplateFile('zpt/viewer/viewer_text', globals()) 96 viewer_text = PageTemplateFile('zpt/viewer/viewer_text', globals())
97 viewer_hocr = PageTemplateFile('zpt/viewer/viewer_hocr', globals())
85 viewer_xml = PageTemplateFile('zpt/viewer/viewer_xml', globals()) 98 viewer_xml = PageTemplateFile('zpt/viewer/viewer_xml', globals())
86 viewer_image = PageTemplateFile('zpt/viewer/viewer_image', globals()) 99 viewer_image = PageTemplateFile('zpt/viewer/viewer_image', globals())
87 viewer_index = PageTemplateFile('zpt/viewer/viewer_index', globals()) 100 viewer_index = PageTemplateFile('zpt/viewer/viewer_index', globals())
88 viewer_thumbs = PageTemplateFile('zpt/viewer/viewer_thumbs', globals()) 101 viewer_thumbs = PageTemplateFile('zpt/viewer/viewer_thumbs', globals())
89 viewer_indexonly = PageTemplateFile('zpt/viewer/viewer_indexonly', globals()) 102 viewer_indexonly = PageTemplateFile('zpt/viewer/viewer_indexonly', globals())
162 175
163 176
164 # proxy text server methods to fulltextclient 177 # proxy text server methods to fulltextclient
165 def getTextPage(self, **args): 178 def getTextPage(self, **args):
166 """returns full text content of page""" 179 """returns full text content of page"""
180
167 return self.template.fulltextclient.getTextPage(**args) 181 return self.template.fulltextclient.getTextPage(**args)
182
183
184
168 185
169 def getSearchResults(self, **args): 186 def getSearchResults(self, **args):
170 """loads list of search results and stores XML in docinfo""" 187 """loads list of search results and stores XML in docinfo"""
171 return self.template.fulltextclient.getSearchResults(**args) 188 return self.template.fulltextclient.getSearchResults(**args)
172 189
237 def index_html(self, url, mode="texttool", viewMode="auto", viewLayer=None, tocMode=None, start=None, pn=None, pf=None): 254 def index_html(self, url, mode="texttool", viewMode="auto", viewLayer=None, tocMode=None, start=None, pn=None, pf=None):
238 """ 255 """
239 show page 256 show page
240 @param url: url which contains display information 257 @param url: url which contains display information
241 @param mode: defines how to access the document behind url 258 @param mode: defines how to access the document behind url
242 @param viewMode: 'image': display images, 'text': display text, 'xml': display xml, default is 'auto' 259 @param viewMode: 'image': display images, 'text': display text, 'xml': display xml, default is 'auto', 'hocr' : hocr format
243 @param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text' 260 @param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
244 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) 261 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
245 """ 262 """
246 263
247 logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s pf=%s"%(mode,url,viewMode,viewLayer,start,pn,pf)) 264 logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s pf=%s"%(mode,url,viewMode,viewLayer,start,pn,pf))
248 265
249 if not hasattr(self, 'template'): 266 if not hasattr(self, 'template'):
250 # this won't work 267 # this won't work
251 logging.error("template folder missing!") 268 logging.error("template folder missing!")
252 return "ERROR: template folder missing!" 269 return "ERROR: template folder missing!"
253 270
271
272
254 if not getattr(self, 'digilibBaseUrl', None): 273 if not getattr(self, 'digilibBaseUrl', None):
255 self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary" 274 self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
256 275
257 # mode=filepath should not have toc-thumbs 276 # mode=filepath should not have toc-thumbs
258 if tocMode is None: 277 if tocMode is None:
285 304
286 elif viewMode == 'images': 305 elif viewMode == 'images':
287 # legacy fix 306 # legacy fix
288 viewMode = 'image' 307 viewMode = 'image'
289 self.REQUEST['viewMode'] = 'image' 308 self.REQUEST['viewMode'] = 'image'
309
310
311
290 312
291 # safe viewLayer in userinfo 313 # safe viewLayer in userinfo
292 userinfo['viewLayer'] = viewLayer 314 userinfo['viewLayer'] = viewLayer
293 315
294 # pageinfo: information about page (not cached) 316 # pageinfo: information about page (not cached)
530 # url points to folder with images, index.meta optional 552 # url points to folder with images, index.meta optional
531 # asssume index.meta in parent dir 553 # asssume index.meta in parent dir
532 docUrl = getParentPath(url) 554 docUrl = getParentPath(url)
533 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) 555 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
534 docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1) 556 docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
557
558 elif mode=="hocr":
559 # url points to folder with images, index.meta optional
560 # asssume index.meta in parent dir
561 docUrl = getParentPath(url)
562 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
563 docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
564 docinfo['textURLPath'] = url.replace('/mpiwg/online', '', 1)
565 if docinfo.get("creator", None) is None:
566 docinfo['creator'] = ""
567
568 if docinfo.get("title", None) is None:
569 docinfo['title'] = ""
570
571 if docinfo.get("documentPath", None) is None:
572 docinfo['documentPath'] = url.replace('/mpiwg/online', '', 1)
573 docinfo['documentPath'] = url.replace('/pages', '', 1)
535 574
536 elif mode=="filepath": 575 elif mode=="filepath":
537 # url points to image file, index.meta optional 576 # url points to image file, index.meta optional
538 docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url) 577 docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url)
539 docinfo['numPages'] = 1 578 docinfo['numPages'] = 1
723 if imgEndNo: 762 if imgEndNo:
724 docinfo['maxPageNo'] = getInt(imgEndNo) 763 docinfo['maxPageNo'] = getInt(imgEndNo)
725 764
726 # old style text URL 765 # old style text URL
727 textUrl = getMDText(texttool.get('text', None)) 766 textUrl = getMDText(texttool.get('text', None))
767
768
769
770
728 if textUrl and docPath: 771 if textUrl and docPath:
729 if urlparse.urlparse(textUrl)[0] == "": #keine url 772 if urlparse.urlparse(textUrl)[0] == "": #keine url
730 textUrl = os.path.join(docPath, textUrl) 773 textUrl = os.path.join(docPath, textUrl)
731 774
732 docinfo['textURL'] = textUrl 775 docinfo['textURL'] = textUrl