Context Navigation

source: documentViewer/documentViewer.py @ 593:eb46138db658

Last change on this file since 593:eb46138db658 was 593:eb46138db658, checked in by casties, 11 years ago
changed layer image_ocrsearch to image_search.
File size: 42.4 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def getMDText(node):
37	"""returns the @text content from the MetaDataProvider metadata node"""
38	if isinstance(node, dict):
39	return node.get('@text', None)
40
41	return node
42
43	def getParentPath(path, cnt=1):
44	"""returns pathname shortened by cnt"""
45	# make sure path doesn't end with /
46	path = path.rstrip('/')
47	# split by /, shorten, and reassemble
48	return '/'.join(path.split('/')[0:-cnt])
49
50	def getPnForPf(docinfo, pf, default=0):
51	"""returns image number for image file name or default"""
52	if 'imgFileNames' in docinfo:
53	pn = docinfo['imgFileNames'].get(pf, None)
54	if pn is None:
55	# try to cut extension
56	xi = pf.rfind('.')
57	if xi > 0:
58	pf = pf[:xi]
59	# try again, else return 0
60	pn = docinfo['imgFileNames'].get(pf, default)
61	else:
62	# no extension
63	pn = default
64
65	return pn
66
67	return default
68
69	def getPfForPn(docinfo, pn, default=None):
70	"""returns image file name for image number or default"""
71	if 'imgFileIndexes' in docinfo:
72	pn = docinfo['imgFileIndexes'].get(pn, default)
73	return pn
74
75	return default
76
77
78	##
79	## documentViewer class
80	##
81	class documentViewer(Folder):
82	"""document viewer"""
83	meta_type="Document viewer"
84
85	security=ClassSecurityInfo()
86	manage_options=Folder.manage_options+(
87	{'label':'Configuration','action':'changeDocumentViewerForm'},
88	)
89
90	metadataService = None
91	"""MetaDataFolder instance"""
92
93
94	#
95	# templates and forms
96	#
97	# viewMode templates
98	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
99	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
100	viewer_image = PageTemplateFile('zpt/viewer_image', globals())
101	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
102	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
103	viewer_indexonly = PageTemplateFile('zpt/viewer_indexonly', globals())
104	# available layer types (annotator not default)
105	builtinLayers = {'text': ['dict','search','gis'],
106	'xml': None, 'image': None, 'index': ['extended']}
107	availableLayers = builtinLayers;
108	# layer templates
109	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
110	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
111	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
112	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
113	layer_text_pundit = PageTemplateFile('zpt/layer_text_pundit', globals())
114	layer_image_annotator = PageTemplateFile('zpt/layer_image_annotator', globals())
115	layer_image_search = PageTemplateFile('zpt/layer_image_search', globals())
116	layer_index_extended = PageTemplateFile('zpt/layer_index_extended', globals())
117	# toc templates
118	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
119	toc_text = PageTemplateFile('zpt/toc_text', globals())
120	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
121	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
122	toc_handwritten = PageTemplateFile('zpt/toc_handwritten', globals())
123	toc_none = PageTemplateFile('zpt/toc_none', globals())
124	# other templates
125	common_template = PageTemplateFile('zpt/common_template', globals())
126	info_xml = PageTemplateFile('zpt/info_xml', globals())
127	docuviewer_css = ImageFile('css/docuviewer.css',globals())
128	# make docuviewer_css refreshable for development
129	docuviewer_css.index_html = refreshingImageFileIndexHtml
130	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
131	# make docuviewer_ie_css refreshable for development
132	#docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
133	jquery_js = ImageFile('js/jquery.js',globals())
134
135
136	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
137	"""init document viewer"""
138	self.id=id
139	self.title=title
140	self.thumbcols = thumbcols
141	self.thumbrows = thumbrows
142	# authgroups is list of authorized groups (delimited by ,)
143	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
144	# create template folder so we can always use template.something
145
146	templateFolder = Folder('template')
147	self['template'] = templateFolder # Zope-2.12 style
148	#self._setObject('template',templateFolder) # old style
149	try:
150	import MpdlXmlTextServer
151	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
152	templateFolder['fulltextclient'] = textServer
153	#templateFolder._setObject('fulltextclient',textServer)
154	except Exception, e:
155	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
156
157	try:
158	from Products.zogiLib.zogiLib import zogiLib
159	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
160	templateFolder['zogilib'] = zogilib
161	#templateFolder._setObject('zogilib',zogilib)
162	except Exception, e:
163	logging.error("Unable to create zogiLib for zogilib: "+str(e))
164
165	try:
166	# assume MetaDataFolder instance is called metadata
167	self.metadataService = getattr(self, 'metadata')
168	except Exception, e:
169	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
170
171	if digilibBaseUrl is not None:
172	self.digilibBaseUrl = digilibBaseUrl
173	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
174	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
175
176
177	# proxy text server methods to fulltextclient
178	def getTextPage(self, **args):
179	"""returns full text content of page"""
180	return self.template.fulltextclient.getTextPage(**args)
181
182	def getSearchResults(self, **args):
183	"""loads list of search results and stores XML in docinfo"""
184	return self.template.fulltextclient.getSearchResults(**args)
185
186	def getResultsPage(self, **args):
187	"""returns one page of the search results"""
188	return self.template.fulltextclient.getResultsPage(**args)
189
190	def getTextInfo(self, **args):
191	"""returns document info from the text server"""
192	return self.template.fulltextclient.getTextInfo(**args)
193
194	def getToc(self, **args):
195	"""loads table of contents and stores XML in docinfo"""
196	return self.template.fulltextclient.getToc(**args)
197
198	def getTocPage(self, **args):
199	"""returns one page of the table of contents"""
200	return self.template.fulltextclient.getTocPage(**args)
201
202	def getRepositoryType(self, **args):
203	"""get repository type"""
204	return self.template.fulltextclient.getRepositoryType(**args)
205
206	def getTextDownloadUrl(self, **args):
207	"""get list of gis places on one page"""
208	return self.template.fulltextclient.getTextDownloadUrl(**args)
209
210	def getPlacesOnPage(self, **args):
211	"""get list of gis places on one page"""
212	return self.template.fulltextclient.getPlacesOnPage(**args)
213
214	# Thumb list for CoolIris Plugin
215	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
216	security.declareProtected('View','thumbs_rss')
217	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
218	'''
219	view it
220	@param mode: defines how to access the document behind url
221	@param url: url which contains display information
222	@param viewMode: image: display images, text: display text, default is auto (try text, else image)
223
224	'''
225
226	if not hasattr(self, 'template'):
227	# this won't work
228	logging.error("template folder missing!")
229	return "ERROR: template folder missing!"
230
231	if not self.digilibBaseUrl:
232	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
233
234	docinfo = self.getDocinfo(mode=mode,url=url)
235	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
236	pageinfo = self.getPageinfo(start=start,pn=pn, docinfo=docinfo)
237	''' ZDES '''
238	pt = getattr(self.template, 'thumbs_main_rss')
239
240	if viewMode=="auto": # automodus gewaehlt
241	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
242	viewMode="text"
243	else:
244	viewMode="image"
245
246	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
247
248
249	security.declareProtected('View','index_html')
250	def index_html(self, url, mode="texttool", viewMode="auto", viewLayer=None, tocMode=None, start=None, pn=None, pf=None):
251	"""
252	show page
253	@param url: url which contains display information
254	@param mode: defines how to access the document behind url
255	@param viewMode: 'image': display images, 'text': display text, 'xml': display xml, default is 'auto'
256	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
257	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
258	"""
259
260	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s pf=%s"%(mode,url,viewMode,viewLayer,start,pn,pf))
261
262	if not hasattr(self, 'template'):
263	# this won't work
264	logging.error("template folder missing!")
265	return "ERROR: template folder missing!"
266
267	if not getattr(self, 'digilibBaseUrl', None):
268	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
269
270	# mode=filepath should not have toc-thumbs
271	if tocMode is None:
272	if mode == "filepath":
273	tocMode = "none"
274	else:
275	tocMode = "thumbs"
276
277	# docinfo: information about document (cached)
278	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
279
280	# userinfo: user settings (cached)
281	userinfo = self.getUserinfo()
282
283	# auto viewMode: text if there is a text else images
284	if viewMode=="auto":
285	if docinfo.get('textURLPath', None):
286	# docinfo.get('textURL', None) not implemented yet
287	viewMode = "text"
288	if viewLayer is None and 'viewLayer' not in userinfo:
289	# use layer dict as default
290	viewLayer = "dict"
291	else:
292	viewMode = "image"
293
294	elif viewMode == "text_dict":
295	# legacy fix
296	viewMode = "text"
297	viewLayer = "dict"
298
299	elif viewMode == 'images':
300	# legacy fix
301	viewMode = 'image'
302	self.REQUEST['viewMode'] = 'image'
303
304	# safe viewLayer in userinfo
305	userinfo['viewLayer'] = viewLayer
306
307	# pageinfo: information about page (not cached)
308	pageinfo = self.getPageinfo(start=start, pn=pn, pf=pf, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
309
310	# get template /template/viewer_$viewMode
311	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
312	if pt is None:
313	logging.error("No template for viewMode=%s!"%viewMode)
314	# TODO: error page?
315	return "No template for viewMode=%s!"%viewMode
316
317	# and execute with parameters
318	return pt(docinfo=docinfo, pageinfo=pageinfo)
319
320	def getAvailableLayers(self):
321	"""returns dict with list of available layers per viewMode"""
322	return self.availableLayers
323
324	def findDigilibUrl(self):
325	"""try to get the digilib URL from zogilib"""
326	url = self.template.zogilib.getDLBaseUrl()
327	return url
328
329	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
330	"""returns URL to digilib Scaler with params"""
331	url = None
332	if docinfo is not None:
333	url = docinfo.get('imageURL', None)
334
335	if url is None:
336	url = self.digilibScalerUrl
337	if fn is None and docinfo is not None:
338	fn = docinfo.get('imagePath','')
339
340	url += "fn=%s"%fn
341
342	if pn:
343	url += "&pn=%s"%pn
344
345	url += "&dw=%s&dh=%s"%(dw,dh)
346	return url
347
348	def getDocumentViewerURL(self):
349	"""returns the URL of this instance"""
350	return self.absolute_url()
351
352	def getStyle(self, idx, selected, style=""):
353	"""returns a string with the given style and append 'sel' if idx == selected."""
354	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
355	if idx == selected:
356	return style + 'sel'
357	else:
358	return style
359
360	def getParams(self, param=None, val=None, params=None, duplicates=None):
361	"""returns dict with URL parameters.
362
363	Takes URL parameters and additionally param=val or dict params.
364	Deletes key if value is None."""
365	# copy existing request params
366	newParams=self.REQUEST.form.copy()
367	# change single param
368	if param is not None:
369	if val is None:
370	if newParams.has_key(param):
371	del newParams[param]
372	else:
373	newParams[param] = str(val)
374
375	# change more params
376	if params is not None:
377	for (k, v) in params.items():
378	if v is None:
379	# val=None removes param
380	if newParams.has_key(k):
381	del newParams[k]
382
383	else:
384	newParams[k] = v
385
386	if duplicates:
387	# eliminate lists (coming from duplicate keys)
388	for (k,v) in newParams.items():
389	if isinstance(v, list):
390	if duplicates == 'comma':
391	# make comma-separated list of non-empty entries
392	newParams[k] = ','.join([t for t in v if t])
393	elif duplicates == 'first':
394	# take first non-empty entry
395	newParams[k] = [t for t in v if t][0]
396
397	return newParams
398
399	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
400	"""returns URL to documentviewer with parameter param set to val or from dict params"""
401	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
402	# quote values and assemble into query string (not escaping '/')
403	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
404	if baseUrl is None:
405	baseUrl = self.getDocumentViewerURL()
406
407	url = "%s?%s"%(baseUrl, ps)
408	return url
409
410	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
411	"""link to documentviewer with parameter param set to val"""
412	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
413
414
415	def setAvailableLayers(self, newLayerString=None):
416	"""sets availableLayers to newLayerString or tries to autodetect available layers.
417	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
418	newLayerString is parsed as JSON."""
419	if newLayerString is not None:
420	try:
421	layers = json.loads(newLayerString)
422	if 'text' in layers and 'image' in layers:
423	self.availableLayers = layers
424	return
425	except:
426	pass
427
428	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
429
430	# start with builtin layers
431	self.availableLayers = self.builtinLayers.copy()
432	# add layers from templates
433	for t in self.template:
434	if t.startswith('layer_'):
435	try:
436	(x, m, l) = t.split('_', 3)
437	if m not in self.availableLayers:
438	# mode m doesn't exist -> new list
439	self.availableLayers[m] = [l]
440
441	else:
442	# m exists -> append
443	if l not in self.availableLayers[m]:
444	self.availableLayers[m].append()
445
446	except:
447	pass
448
449	def getAvailableLayersJson(self):
450	"""returns available layers as JSON string."""
451	return json.dumps(self.availableLayers)
452
453
454	def getInfo_xml(self,url,mode):
455	"""returns info about the document as XML"""
456	if not self.digilibBaseUrl:
457	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
458
459	docinfo = self.getDocinfo(mode=mode,url=url)
460	pt = getattr(self.template, 'info_xml')
461	return pt(docinfo=docinfo)
462
463	def getAuthenticatedUser(self, anon=None):
464	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
465	user = getSecurityManager().getUser()
466	if user is not None and user.getUserName() != "Anonymous User":
467	return user
468	else:
469	return anon
470
471	def isAccessible(self, docinfo):
472	"""returns if access to the resource is granted"""
473	access = docinfo.get('accessType', None)
474	logging.debug("documentViewer (accessOK) access type %s"%access)
475	if access == 'free':
476	logging.debug("documentViewer (accessOK) access is free")
477	return True
478
479	elif access is None or access in self.authgroups:
480	# only local access -- only logged in users
481	user = self.getAuthenticatedUser()
482	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
483	return (user is not None)
484
485	logging.error("documentViewer (accessOK) unknown access type %s"%access)
486	return False
487
488	def getUserinfo(self):
489	"""returns userinfo object"""
490	logging.debug("getUserinfo")
491	userinfo = {}
492	# look for cached userinfo in session
493	if self.REQUEST.SESSION.has_key('userinfo'):
494	userinfo = self.REQUEST.SESSION['userinfo']
495	# check if its still current?
496	else:
497	# store in session
498	self.REQUEST.SESSION['userinfo'] = userinfo
499
500	return userinfo
501
502	def getDocinfo(self, mode, url, tocMode=None):
503	"""returns docinfo depending on mode"""
504	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
505	# look for cached docinfo in session
506	if self.REQUEST.SESSION.has_key('docinfo'):
507	docinfo = self.REQUEST.SESSION['docinfo']
508	# check if its still current
509	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
510	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
511	return docinfo
512
513	# new docinfo
514	docinfo = {'mode': mode, 'url': url}
515	# add self url
516	docinfo['viewerUrl'] = self.getDocumentViewerURL()
517	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
518	docinfo['digilibScalerUrl'] = self.digilibScalerUrl
519	docinfo['digilibViewerUrl'] = self.digilibViewerUrl
520	# get index.meta DOM
521	docUrl = None
522	metaDom = None
523	if mode=="texttool":
524	# url points to document dir or index.meta
525	metaDom = self.metadataService.getDomFromPathOrUrl(url)
526	if metaDom is None:
527	raise IOError("Unable to find index.meta for mode=texttool!")
528
529	docUrl = url.replace('/index.meta', '')
530	if url.startswith('/mpiwg/online/'):
531	docUrl = url.replace('/mpiwg/online/', '', 1)
532
533	elif mode=="imagepath":
534	# url points to folder with images, index.meta optional
535	# asssume index.meta in parent dir
536	docUrl = getParentPath(url)
537	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
538	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
539
540	elif mode=="filepath":
541	# url points to image file, index.meta optional
542	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url)
543	docinfo['numPages'] = 1
544	# asssume index.meta is two path segments up
545	docUrl = getParentPath(url, 2)
546	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
547
548	else:
549	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
550	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
551
552	docinfo['documentUrl'] = docUrl
553	# process index.meta contents
554	if metaDom is not None and metaDom.tag == 'resource':
555	# document directory name and path
556	resource = self.metadataService.getResourceData(dom=metaDom)
557	if resource:
558	docinfo = self.getDocinfoFromResource(docinfo, resource)
559
560	# texttool info
561	texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
562	if texttool:
563	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
564	# document info from full text server
565	if docinfo.get('textURLPath', None):
566	docinfo = self.getTextInfo(mode=None, docinfo=docinfo)
567	# include list of pages TODO: do we need this always?
568	docinfo = self.getTextInfo(mode='pages', docinfo=docinfo)
569
570	# bib info
571	bib = self.metadataService.getBibData(dom=metaDom)
572	if bib:
573	# save extended version as 'bibx' TODO: ugly
574	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
575	if len(bibx) == 1:
576	# unwrap list if possible
577	bibx = bibx[0]
578
579	docinfo['bibx'] = bibx
580	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
581	else:
582	# no bib - try info.xml
583	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
584
585	# auth info
586	access = self.metadataService.getAccessData(dom=metaDom)
587	if access:
588	docinfo = self.getDocinfoFromAccess(docinfo, access)
589
590	# attribution info
591	attribution = self.metadataService.getAttributionData(dom=metaDom)
592	if attribution:
593	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
594	docinfo['attribution'] = attribution
595
596	# copyright info
597	copyright = self.metadataService.getCopyrightData(dom=metaDom)
598	if copyright:
599	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
600	docinfo['copyright'] = copyright
601
602	# DRI (permanent ID)
603	dri = self.metadataService.getDRI(dom=metaDom, type='mpiwg')
604	if dri:
605	docinfo['DRI'] = dri
606
607	# (presentation) context
608	ctx = self.metadataService.getContextData(dom=metaDom, all=True)
609	if ctx:
610	logging.debug("getcontext: ctx=%s"%repr(ctx))
611	docinfo['presentationContext'] = ctx
612
613	# image path
614	if mode != 'texttool':
615	# override image path from texttool with url parameter TODO: how about mode=auto?
616	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
617
618	# check numPages
619	if docinfo.get('numPages', 0) == 0:
620	# number of images from digilib
621	if docinfo.get('imagePath', None):
622	imgpath = docinfo['imagePath'].replace('/mpiwg/online/', '', 1)
623	logging.debug("imgpath=%s"%imgpath)
624	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, imgpath)
625	docinfo = self.getDocinfoFromDigilib(docinfo, imgpath)
626	else:
627	# imagePath still missing? try "./pageimg"
628	imgPath = os.path.join(docUrl, 'pageimg')
629	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
630	if docinfo.get('numPages', 0) > 0:
631	# there are pages
632	docinfo['imagePath'] = imgPath
633	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, docinfo['imagePath'])
634
635	# check numPages
636	if docinfo.get('numPages', 0) == 0:
637	if docinfo.get('numTextPages', 0) > 0:
638	# replace with numTextPages (text-only?)
639	docinfo['numPages'] = docinfo['numTextPages']
640
641	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
642	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
643	# store in session
644	self.REQUEST.SESSION['docinfo'] = docinfo
645	return docinfo
646
647
648	def getDocinfoFromResource(self, docinfo, resource):
649	"""reads contents of resource element into docinfo"""
650	docName = resource.get('name', None)
651	docinfo['documentName'] = docName
652	docPath = resource.get('archive-path', None)
653	if docPath:
654	# clean up document path
655	if docPath[0] != '/':
656	docPath = '/' + docPath
657
658	if docName and (not docPath.endswith(docName)):
659	docPath += "/" + docName
660
661	else:
662	# use docUrl as docPath
663	docUrl = docinfo['documentURL']
664	if not docUrl.startswith('http:'):
665	docPath = docUrl
666	if docPath:
667	# fix URLs starting with /mpiwg/online
668	docPath = docPath.replace('/mpiwg/online', '', 1)
669
670	docinfo['documentPath'] = docPath
671	return docinfo
672
673	def getDocinfoFromTexttool(self, docinfo, texttool):
674	"""reads contents of texttool element into docinfo"""
675	logging.debug("texttool=%s"%repr(texttool))
676	# unpack list if necessary
677	if isinstance(texttool, list):
678	texttool = texttool[0]
679
680	# image dir
681	imageDir = getMDText(texttool.get('image', None))
682	docPath = getMDText(docinfo.get('documentPath', None))
683	if imageDir and docPath:
684	imageDir = os.path.join(docPath, imageDir)
685	imageDir = imageDir.replace('/mpiwg/online', '', 1)
686	docinfo['imagePath'] = imageDir
687
688	# old style text URL
689	textUrl = getMDText(texttool.get('text', None))
690	if textUrl and docPath:
691	if urlparse.urlparse(textUrl)[0] == "": #keine url
692	textUrl = os.path.join(docPath, textUrl)
693
694	docinfo['textURL'] = textUrl
695
696	# new style text-url-path (can be more than one with "repository" attribute)
697	textUrlNode = texttool.get('text-url-path', None)
698	if not isinstance(textUrlNode, list):
699	textUrlNode = [textUrlNode]
700
701	for tun in textUrlNode:
702	textUrl = getMDText(tun)
703	if textUrl:
704	textUrlAtts = tun.get('@attr')
705	if (textUrlAtts and 'repository' in textUrlAtts):
706	textRepo = textUrlAtts['repository']
707	# use matching repository
708	if self.getRepositoryType() == textRepo:
709	docinfo['textURLPath'] = textUrl
710	docinfo['textURLRepository'] = textRepo
711	break
712
713	else:
714	# no repo attribute - use always
715	docinfo['textURLPath'] = textUrl
716
717	# page flow
718	docinfo['pageFlow'] = getMDText(texttool.get('page-flow', 'ltr'))
719
720	# odd pages are left
721	docinfo['oddPage'] = getMDText(texttool.get('odd-scan-position', 'left'))
722
723	# number of title page (default 1)
724	docinfo['titlePage'] = getMDText(texttool.get('title-scan-no', 1))
725
726	# old presentation stuff
727	presentation = getMDText(texttool.get('presentation', None))
728	if presentation and docPath:
729	if presentation.startswith('http:'):
730	docinfo['presentationUrl'] = presentation
731	else:
732	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
733
734	return docinfo
735
736	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
737	"""reads contents of bib element into docinfo"""
738	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
739	# put all raw bib fields in dict "bib"
740	docinfo['bib'] = bib
741	bibtype = bib.get('@type', None)
742	docinfo['bibType'] = bibtype
743	# also store DC metadata for convenience
744	dc = self.metadataService.getDCMappedData(bib)
745	docinfo['creator'] = dc.get('creator','')
746	docinfo['title'] = dc.get('title','')
747	docinfo['date'] = dc.get('date','')
748	return docinfo
749
750	def getDocinfoFromAccess(self, docinfo, acc):
751	"""reads contents of access element into docinfo"""
752	#TODO: also read resource type
753	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
754	try:
755	acctype = acc['@attr']['type']
756	if acctype:
757	access=acctype
758	if access in ['group', 'institution']:
759	access = acc['name'].lower()
760
761	docinfo['accessType'] = access
762
763	except:
764	pass
765
766	return docinfo
767
768	def getDocinfoFromDigilib(self, docinfo, path):
769	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?fn="+path
770	# fetch data
771	txt = getHttpData(infoUrl)
772	if not txt:
773	logging.error("Unable to get dir-info from %s"%(infoUrl))
774	return docinfo
775
776	dom = ET.fromstring(txt)
777	dir = dom
778	# save size
779	size = dir.findtext('size')
780	logging.debug("getDocinfoFromDigilib: size=%s"%size)
781	if size:
782	docinfo['numPages'] = int(size)
783	else:
784	docinfo['numPages'] = 0
785	return docinfo
786
787	# save list of image names and numbers
788	imgNames = {}
789	imgIndexes = {}
790	for f in dir:
791	fn = f.findtext('name')
792	pn = getInt(f.findtext('index'))
793	imgNames[fn] = pn
794	imgIndexes[pn] = fn
795
796	docinfo['imgFileNames'] = imgNames
797	docinfo['imgFileIndexes'] = imgIndexes
798	return docinfo
799
800
801	def getDocinfoFromPresentationInfoXml(self,docinfo):
802	"""gets DC-like bibliographical information from the presentation entry in texttools"""
803	url = docinfo.get('presentationUrl', None)
804	if not url:
805	logging.error("getDocinfoFromPresentation: no URL!")
806	return docinfo
807
808	dom = None
809	metaUrl = None
810	if url.startswith("http://"):
811	# real URL
812	metaUrl = url
813	else:
814	# online path
815	server=self.digilibBaseUrl+"/servlet/Texter?fn="
816	metaUrl=server+url
817
818	txt=getHttpData(metaUrl)
819	if txt is None:
820	logging.error("Unable to read info.xml from %s"%(url))
821	return docinfo
822
823	dom = ET.fromstring(txt)
824	docinfo['creator']=getText(dom.find(".//author"))
825	docinfo['title']=getText(dom.find(".//title"))
826	docinfo['date']=getText(dom.find(".//date"))
827	return docinfo
828
829
830	def getPageinfo(self, pn=None, pf=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
831	"""returns pageinfo with the given parameters"""
832	logging.debug("getPageInfo(pn=%s, pf=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(pn,pf,start,rows,cols,viewMode,viewLayer,tocMode))
833	pageinfo = {}
834	pageinfo['viewMode'] = viewMode
835	# split viewLayer if necessary
836	if isinstance(viewLayer,basestring):
837	viewLayer = viewLayer.split(',')
838
839	if isinstance(viewLayer, list):
840	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
841	# save (unique) list in viewLayers
842	seen = set()
843	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
844	pageinfo['viewLayers'] = viewLayers
845	# stringify viewLayer
846	viewLayer = ','.join(viewLayers)
847	else:
848	#create list
849	pageinfo['viewLayers'] = [viewLayer]
850
851	pageinfo['viewLayer'] = viewLayer
852	pageinfo['tocMode'] = tocMode
853
854	# pf takes precedence over pn
855	if pf:
856	pageinfo['pf'] = pf
857	pn = getPnForPf(docinfo, pf)
858	# replace pf in request params (used for creating new URLs)
859	self.REQUEST.form.pop('pf', None)
860	self.REQUEST.form['pn'] = pn
861	else:
862	pn = getInt(pn, 1)
863	pf = getPfForPn(docinfo, pn)
864	pageinfo['pf'] = pf
865
866	pageinfo['pn'] = pn
867	rows = int(rows or self.thumbrows)
868	pageinfo['rows'] = rows
869	cols = int(cols or self.thumbcols)
870	pageinfo['cols'] = cols
871	grpsize = cols * rows
872	pageinfo['groupsize'] = grpsize
873	# is start is empty use one around pn
874	start = getInt(start, default=(math.ceil(float(pn)/float(grpsize))*grpsize-(grpsize-1)))
875	# int(current / grpsize) * grpsize +1))
876	pageinfo['start'] = start
877	# get number of pages
878	np = int(docinfo.get('numPages', 0))
879	if np == 0:
880	# try numTextPages
881	np = docinfo.get('numTextPages', 0)
882	if np != 0:
883	docinfo['numPages'] = np
884
885	# cache table of contents
886	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
887	pageinfo['numgroups'] = int(np / grpsize)
888	if np % grpsize > 0:
889	pageinfo['numgroups'] += 1
890
891	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
892	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
893	# add zeroth page for two columns
894	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
895	pageinfo['pageZero'] = pageZero
896	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
897	# more page parameters
898	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
899	if docinfo.get('pageNumbers'):
900	# get original page numbers
901	pageNumber = docinfo['pageNumbers'].get(pn, None)
902	if pageNumber is not None:
903	pageinfo['pageNumberOrig'] = pageNumber['no']
904	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
905
906	# cache search results
907	query = self.REQUEST.get('query',None)
908	pageinfo['query'] = query
909	if query and viewMode == 'text':
910	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
911	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
912	pageinfo['queryType'] = queryType
913	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
914	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
915
916	# highlighting
917	highlightQuery = self.REQUEST.get('highlightQuery', None)
918	if highlightQuery:
919	pageinfo['highlightQuery'] = highlightQuery
920	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
921	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
922
923	return pageinfo
924
925
926	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
927	"""returns dict with array of page information for one screenfull of thumbnails"""
928	batch = {}
929	grpsize = rows * cols
930	if maxIdx == 0:
931	maxIdx = start + grpsize
932
933	nb = int(math.ceil(maxIdx / float(grpsize)))
934	# list of all batch start and end points
935	batches = []
936	if pageZero:
937	ofs = 0
938	else:
939	ofs = 1
940
941	for i in range(nb):
942	s = i * grpsize + ofs
943	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
944	batches.append({'start':s, 'end':e})
945
946	batch['batches'] = batches
947
948	pages = []
949	if pageZero and start == 1:
950	# correct beginning
951	idx = 0
952	else:
953	idx = start
954
955	for r in range(rows):
956	row = []
957	for c in range(cols):
958	if idx < minIdx or idx > maxIdx:
959	page = {'idx':None}
960	else:
961	page = {'idx':idx}
962
963	idx += 1
964	if pageFlowLtr:
965	row.append(page)
966	else:
967	row.insert(0, page)
968
969	pages.append(row)
970
971	if start > 1:
972	batch['prevStart'] = max(start - grpsize, 1)
973	else:
974	batch['prevStart'] = None
975
976	if start + grpsize <= maxIdx:
977	batch['nextStart'] = start + grpsize
978	else:
979	batch['nextStart'] = None
980
981	batch['pages'] = pages
982	batch['first'] = minIdx
983	batch['last'] = maxIdx
984	return batch
985
986	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
987	"""returns dict with information for one screenfull of data."""
988	batch = {}
989	if end == 0:
990	end = start + size
991
992	nb = int(math.ceil(end / float(size)))
993	# list of all batch start and end points
994	batches = []
995	for i in range(nb):
996	s = i * size + 1
997	e = min((i + 1) * size, end)
998	batches.append({'start':s, 'end':e})
999
1000	batch['batches'] = batches
1001	# list of elements in this batch
1002	this = []
1003	j = 0
1004	for i in range(start, min(start+size, end+1)):
1005	if data:
1006	if fullData:
1007	d = data.get(i, None)
1008	else:
1009	d = data.get(j, None)
1010	j += 1
1011
1012	else:
1013	d = i+1
1014
1015	this.append(d)
1016
1017	batch['this'] = this
1018	if start > 1:
1019	batch['prevStart'] = max(start - size, 1)
1020	else:
1021	batch['prevStart'] = None
1022
1023	if start + size < end:
1024	batch['nextStart'] = start + size
1025	else:
1026	batch['nextStart'] = None
1027
1028	batch['first'] = start
1029	batch['last'] = end
1030	return batch
1031
1032
1033	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
1034	"""returns list of groups {name:, id:} on the annotation server for the user"""
1035	groups = []
1036	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
1037	data = getHttpData(url=groupsUrl, noExceptions=True)
1038	if data:
1039	res = json.loads(data)
1040	rows = res.get('rows', None)
1041	if rows is None:
1042	return groups
1043	for r in rows:
1044	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
1045
1046	return groups
1047
1048
1049	security.declareProtected('View management screens','changeDocumentViewerForm')
1050	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1051
1052	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1053	"""init document viewer"""
1054	self.title=title
1055	self.digilibBaseUrl = digilibBaseUrl
1056	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
1057	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
1058	self.thumbrows = thumbrows
1059	self.thumbcols = thumbcols
1060	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1061	try:
1062	# assume MetaDataFolder instance is called metadata
1063	self.metadataService = getattr(self, 'metadata')
1064	except Exception, e:
1065	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1066
1067	self.setAvailableLayers(availableLayers)
1068
1069	if RESPONSE is not None:
1070	RESPONSE.redirect('manage_main')
1071
1072	def manage_AddDocumentViewerForm(self):
1073	"""add the viewer form"""
1074	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1075	return pt()
1076
1077	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1078	"""add the viewer"""
1079	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1080	self._setObject(id,newObj)
1081
1082	if RESPONSE is not None:
1083	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: