Context Navigation

source: documentViewer/documentViewer.py @ 624:80a0191ae51c

Last change on this file since 624:80a0191ae51c was 624:80a0191ae51c, checked in by casties, 9 years ago
move sslification to only client-visible urls.
File size: 47.5 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7
8	import xml.etree.ElementTree as ET
9
10	import os
11	import urllib
12	import logging
13	import math
14	import urlparse
15	import json
16
17	from Products.MetaDataProvider import MetaDataFolder
18
19	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml, sslifyUrl
20
21
22	def getMDText(node):
23	"""returns the @text content from the MetaDataProvider metadata node"""
24	if isinstance(node, dict):
25	return node.get('@text', None)
26
27	if isinstance(node,list): #more than one text file if there is an attribute don't choose it
28	for nodeInList in node:
29	attr = nodeInList.get("@attr",None)
30	if attr is None:
31	return node.get('@text',None)
32	return None
33
34	return node
35
36	def getParentPath(path, cnt=1):
37	"""returns pathname shortened by cnt"""
38	# make sure path doesn't end with /
39	path = path.rstrip('/')
40	# split by /, shorten, and reassemble
41	return '/'.join(path.split('/')[0:-cnt])
42
43	def getPnForPf(docinfo, pf, default=0):
44	"""returns image number for image file name or default"""
45	if 'imgFileNames' in docinfo:
46	pn = docinfo['imgFileNames'].get(pf, None)
47	if pn is None:
48	# try to cut extension
49	xi = pf.rfind('.')
50	if xi > 0:
51	pf = pf[:xi]
52	# try again, else return 0
53	pn = docinfo['imgFileNames'].get(pf, default)
54	else:
55	# no extension
56	pn = default
57
58	return pn
59
60	return default
61
62	def getPfForPn(docinfo, pn, default=None):
63	"""returns image file name for image number or default"""
64	if 'imgFileIndexes' in docinfo:
65	pn = docinfo['imgFileIndexes'].get(pn, default)
66	return pn
67
68	return default
69
70
71	##
72	## documentViewer class
73	##
74	class documentViewer(Folder):
75	"""document viewer"""
76	meta_type="Document viewer"
77
78	security=ClassSecurityInfo()
79	manage_options=Folder.manage_options+(
80	{'label':'Configuration','action':'changeDocumentViewerForm'},
81	)
82
83	metadataService = None
84	"""MetaDataFolder instance"""
85
86
87	#
88	# templates and forms
89	#
90	# viewMode templates
91	viewer_text = PageTemplateFile('zpt/viewer/viewer_text', globals())
92	viewer_hocr = PageTemplateFile('zpt/viewer/viewer_hocr', globals())
93	viewer_xml = PageTemplateFile('zpt/viewer/viewer_xml', globals())
94	viewer_image = PageTemplateFile('zpt/viewer/viewer_image', globals())
95	viewer_index = PageTemplateFile('zpt/viewer/viewer_index', globals())
96	viewer_thumbs = PageTemplateFile('zpt/viewer/viewer_thumbs', globals())
97	viewer_indexonly = PageTemplateFile('zpt/viewer/viewer_indexonly', globals())
98	# available layer types (annotator not default)
99	builtinLayers = {'text': ['dict','search','gis'],
100	'xml': None, 'image': None, 'index': ['extended']}
101	availableLayers = builtinLayers;
102	# layer templates
103	layer_text_dict = PageTemplateFile('zpt/viewer/layer_text_dict', globals())
104	layer_text_search = PageTemplateFile('zpt/viewer/layer_text_search', globals())
105	layer_text_annotator = PageTemplateFile('zpt/viewer/layer_text_annotator', globals())
106	layer_text_gis = PageTemplateFile('zpt/viewer/layer_text_gis', globals())
107	layer_text_pundit = PageTemplateFile('zpt/viewer/layer_text_pundit', globals())
108	layer_image_annotator = PageTemplateFile('zpt/viewer/layer_image_annotator', globals())
109	layer_image_search = PageTemplateFile('zpt/viewer/layer_image_search', globals())
110	layer_index_extended = PageTemplateFile('zpt/viewer/layer_index_extended', globals())
111	# toc templates
112	toc_thumbs = PageTemplateFile('zpt/viewer/toc_thumbs', globals())
113	toc_text = PageTemplateFile('zpt/viewer/toc_text', globals())
114	toc_figures = PageTemplateFile('zpt/viewer/toc_figures', globals())
115	toc_concordance = PageTemplateFile('zpt/viewer/toc_concordance', globals())
116	toc_notes = PageTemplateFile('zpt/viewer/toc_notes', globals())
117	toc_handwritten = PageTemplateFile('zpt/viewer/toc_handwritten', globals())
118	toc_none = PageTemplateFile('zpt/viewer/toc_none', globals())
119	# other templates
120	common_template = PageTemplateFile('zpt/viewer/common_template', globals())
121	info_xml = PageTemplateFile('zpt/viewer/info_xml', globals())
122	docuviewer_css = ImageFile('css/docuviewer.css',globals())
123	# make docuviewer_css refreshable for development
124	docuviewer_css.index_html = refreshingImageFileIndexHtml
125	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
126	# make docuviewer_ie_css refreshable for development
127	#docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
128	jquery_js = ImageFile('js/jquery.js',globals())
129
130
131	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
132	"""init document viewer"""
133	self.id=id
134	self.title=title
135	self.thumbcols = thumbcols
136	self.thumbrows = thumbrows
137	# authgroups is list of authorized groups (delimited by ,)
138	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
139	# create template folder so we can always use template.something
140
141	templateFolder = Folder('template')
142	self['template'] = templateFolder # Zope-2.12 style
143	#self._setObject('template',templateFolder) # old style
144	try:
145	import MpdlXmlTextServer
146	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
147	templateFolder['fulltextclient'] = textServer
148	#templateFolder._setObject('fulltextclient',textServer)
149	except Exception, e:
150	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
151
152	try:
153	from Products.zogiLib.zogiLib import zogiLib
154	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
155	templateFolder['zogilib'] = zogilib
156	#templateFolder._setObject('zogilib',zogilib)
157	except Exception, e:
158	logging.error("Unable to create zogiLib for 'zogilib': "+str(e))
159
160	try:
161	# assume MetaDataFolder instance is called metadata
162	self.metadataService = getattr(self, 'metadata')
163	except Exception, e:
164	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
165
166	if digilibBaseUrl is not None:
167	self.digilibBaseUrl = digilibBaseUrl
168	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
169	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
170
171
172	# proxy text server methods to fulltextclient
173	def getTextPage(self, **args):
174	"""returns full text content of page"""
175	return self.template.fulltextclient.getTextPage(**args)
176
177	def getSearchResults(self, **args):
178	"""loads list of search results and stores XML in docinfo"""
179	return self.template.fulltextclient.getSearchResults(**args)
180
181	def getResultsPage(self, **args):
182	"""returns one page of the search results"""
183	return self.template.fulltextclient.getResultsPage(**args)
184
185	def getTextInfo(self, **args):
186	"""returns document info from the text server"""
187	return self.template.fulltextclient.getTextInfo(**args)
188
189	def getToc(self, **args):
190	"""loads table of contents and stores XML in docinfo"""
191	return self.template.fulltextclient.getToc(**args)
192
193	def getTocPage(self, **args):
194	"""returns one page of the table of contents"""
195	return self.template.fulltextclient.getTocPage(**args)
196
197	def getRepositoryType(self, **args):
198	"""get repository type"""
199	return self.template.fulltextclient.getRepositoryType(**args)
200
201	def getTextDownloadUrl(self, **args):
202	"""get URL to download the full text"""
203	return self.template.fulltextclient.getTextDownloadUrl(**args)
204
205	def getPlacesOnPage(self, **args):
206	"""get list of gis places on one page"""
207	return self.template.fulltextclient.getPlacesOnPage(**args)
208
209	# Thumb list for CoolIris Plugin
210	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
211	security.declareProtected('View','thumbs_rss')
212	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
213	'''
214	view it
215	@param mode: defines how to access the document behind url
216	@param url: url which contains display information
217	@param viewMode: image: display images, text: display text, default is auto (try text, else image)
218
219	'''
220
221	if not hasattr(self, 'template'):
222	# this won't work
223	logging.error("template folder missing!")
224	return "ERROR: template folder missing!"
225
226	if not self.digilibBaseUrl:
227	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
228
229	docinfo = self.getDocinfo(mode=mode,url=url)
230	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
231	pageinfo = self.getPageinfo(start=start,pn=pn, docinfo=docinfo)
232	''' ZDES '''
233	pt = getattr(self.template, 'thumbs_main_rss')
234
235	if viewMode=="auto": # automodus gewaehlt
236	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
237	viewMode="text"
238	else:
239	viewMode="image"
240
241	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
242
243
244	security.declareProtected('View','index_html')
245	def index_html(self, url, mode="texttool", viewMode="auto", viewLayer=None, tocMode=None, start=None, pn=None, pf=None):
246	"""
247	show page
248	@param url: url which contains display information
249	@param mode: defines how to access the document behind url
250	@param viewMode: 'image': display images, 'text': display text, 'xml': display xml, default is 'auto', 'hocr' : hocr format
251	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
252	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
253	"""
254
255	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s pf=%s"%(mode,url,viewMode,viewLayer,start,pn,pf))
256
257	if not hasattr(self, 'template'):
258	# this won't work
259	logging.error("template folder missing!")
260	return "ERROR: template folder missing!"
261
262	if not getattr(self, 'digilibBaseUrl', None):
263	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
264
265	# mode=filepath should not have toc-thumbs
266	if tocMode is None:
267	if mode == "filepath":
268	tocMode = "none"
269	else:
270	tocMode = "thumbs"
271
272	# docinfo: information about document (cached)
273	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
274
275	# userinfo: user settings (cached)
276	userinfo = self.getUserinfo()
277
278	# auto viewMode: text if there is a text else images
279	if viewMode=="auto":
280	if docinfo.get('textURLPath', None):
281	# docinfo.get('textURL', None) not implemented yet
282	viewMode = "text"
283	if viewLayer is None and 'viewLayer' not in userinfo:
284	# use layer dict as default
285	viewLayer = "dict"
286	else:
287	viewMode = "image"
288
289	elif viewMode == "text_dict":
290	# legacy fix
291	viewMode = "text"
292	viewLayer = "dict"
293
294	elif viewMode == 'images':
295	# legacy fix
296	viewMode = 'image'
297	self.REQUEST['viewMode'] = 'image'
298
299	# safe viewLayer in userinfo
300	userinfo['viewLayer'] = viewLayer
301
302	# pageinfo: information about page (not cached)
303	pageinfo = self.getPageinfo(start=start, pn=pn, pf=pf, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
304
305	# get template /template/viewer_$viewMode
306	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
307	if pt is None:
308	logging.error("No template for viewMode=%s!"%viewMode)
309	# TODO: error page?
310	return "No template for viewMode=%s!"%viewMode
311
312	# and execute with parameters
313	return pt(docinfo=docinfo, pageinfo=pageinfo)
314
315	def getAvailableLayers(self):
316	"""returns dict with list of available layers per viewMode"""
317	return self.availableLayers
318
319	def findDigilibUrl(self):
320	"""try to get the digilib URL from zogilib"""
321	url = self.template.zogilib.getDLBaseUrl()
322	return url
323
324	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
325	"""returns URL to digilib Scaler with params"""
326	url = None
327	if docinfo is not None:
328	url = docinfo.get('imageURL', None)
329
330	if url is None:
331	url = self.digilibScalerUrl
332	if fn is None and docinfo is not None:
333	fn = docinfo.get('imagePath','')
334
335	url += "fn=%s"%fn
336
337	if pn:
338	url += "&pn=%s"%pn
339
340	url += "&dw=%s&dh=%s"%(dw,dh)
341	return sslifyUrl(url, self, force=True)
342
343	def getDocumentViewerURL(self):
344	"""returns the URL of this instance"""
345	return self.absolute_url()
346
347	def getStyle(self, idx, selected, style=""):
348	"""returns a string with the given style and append 'sel' if idx == selected."""
349	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
350	if idx == selected:
351	return style + 'sel'
352	else:
353	return style
354
355	def getParams(self, param=None, val=None, params=None, duplicates=None):
356	"""returns dict with URL parameters.
357
358	Takes URL parameters and additionally param=val or dict params.
359	Deletes key if value is None."""
360	# copy existing request params
361	newParams=self.REQUEST.form.copy()
362	# change single param
363	if param is not None:
364	if val is None:
365	if newParams.has_key(param):
366	del newParams[param]
367	else:
368	newParams[param] = str(val)
369
370	# change more params
371	if params is not None:
372	for (k, v) in params.items():
373	if v is None:
374	# val=None removes param
375	if newParams.has_key(k):
376	del newParams[k]
377
378	else:
379	newParams[k] = v
380
381	if duplicates:
382	# eliminate lists (coming from duplicate keys)
383	for (k,v) in newParams.items():
384	if isinstance(v, list):
385	if duplicates == 'comma':
386	# make comma-separated list of non-empty entries
387	newParams[k] = ','.join([t for t in v if t])
388	elif duplicates == 'first':
389	# take first non-empty entry
390	newParams[k] = [t for t in v if t][0]
391
392	return newParams
393
394	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
395	"""returns URL to documentviewer with parameter param set to val or from dict params"""
396	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
397	# quote values and assemble into query string (not escaping '/')
398	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
399	if baseUrl is None:
400	baseUrl = self.getDocumentViewerURL()
401
402	url = "%s?%s"%(baseUrl, ps)
403	return url
404
405	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
406	"""link to documentviewer with parameter param set to val"""
407	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
408
409
410	def setAvailableLayers(self, newLayerString=None):
411	"""sets availableLayers to newLayerString or tries to autodetect available layers.
412	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
413	newLayerString is parsed as JSON."""
414	if newLayerString is not None:
415	try:
416	layers = json.loads(newLayerString)
417	if 'text' in layers and 'image' in layers:
418	self.availableLayers = layers
419	return
420	except:
421	pass
422
423	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
424
425	# start with builtin layers
426	self.availableLayers = self.builtinLayers.copy()
427	# add layers from templates
428	for t in self.template:
429	if t.startswith('layer_'):
430	try:
431	(x, m, l) = t.split('_', 3)
432	if m not in self.availableLayers:
433	# mode m doesn't exist -> new list
434	self.availableLayers[m] = [l]
435
436	else:
437	# m exists -> append
438	if l not in self.availableLayers[m]:
439	self.availableLayers[m].append()
440
441	except:
442	pass
443
444	def getAvailableLayersJson(self):
445	"""returns available layers as JSON string."""
446	return json.dumps(self.availableLayers)
447
448
449	def getInfo_xml(self,url,mode):
450	"""returns info about the document as XML"""
451	if not self.digilibBaseUrl:
452	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
453
454	docinfo = self.getDocinfo(mode=mode,url=url)
455	pt = getattr(self.template, 'info_xml')
456	return pt(docinfo=docinfo)
457
458	def getAuthenticatedUser(self, anon=None):
459	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
460	user = getSecurityManager().getUser()
461	if user is not None and user.getUserName() != "Anonymous User":
462	return user
463	else:
464	return anon
465
466	def isAccessible(self, docinfo):
467	"""returns if access to the resource is granted"""
468	access = docinfo.get('accessType', None)
469	logging.debug("documentViewer (accessOK) access type %s"%access)
470	if access == 'free':
471	logging.debug("documentViewer (accessOK) access is free")
472	return True
473
474	elif access is None or access in self.authgroups:
475	# only local access -- only logged in users
476	user = self.getAuthenticatedUser()
477	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
478	return (user is not None)
479
480	logging.error("documentViewer (accessOK) unknown access type %s"%access)
481	return False
482
483	def getUserinfo(self):
484	"""returns userinfo object"""
485	logging.debug("getUserinfo")
486	userinfo = {}
487	# look for cached userinfo in session
488	if self.REQUEST.SESSION.has_key('userinfo'):
489	userinfo = self.REQUEST.SESSION['userinfo']
490	# check if its still current?
491	else:
492	# store in session
493	self.REQUEST.SESSION['userinfo'] = userinfo
494
495	return userinfo
496
497	def getDocinfoJSON(self, mode, url, tocMode=None):
498	"""returns docinfo depending on mode"""
499	import json
500
501	dc = self.getDocinfo( mode, url, tocMode)
502
503	return json.dumps(dc)
504
505
506	def getDocinfo(self, mode, url, tocMode=None):
507	"""returns docinfo depending on mode"""
508	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
509	# look for cached docinfo in session
510	if self.REQUEST.SESSION.has_key('docinfo'):
511	docinfo = self.REQUEST.SESSION['docinfo']
512	# check if its still current
513	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
514	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
515	return docinfo
516
517	# new docinfo
518	docinfo = {'mode': mode, 'url': url}
519	# add self url
520	docinfo['viewerUrl'] = self.getDocumentViewerURL()
521	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
522	docinfo['digilibScalerUrl'] = self.digilibScalerUrl
523	docinfo['digilibViewerUrl'] = self.digilibViewerUrl
524	# get index.meta DOM
525	docUrl = None
526	metaDom = None
527	if mode=="texttool":
528	# url points to document dir or index.meta
529	metaDom = self.metadataService.getDomFromPathOrUrl(url)
530	if metaDom is None:
531	raise IOError("Unable to find index.meta for mode=texttool!")
532
533	docUrl = url.replace('/index.meta', '')
534	if url.startswith('/mpiwg/online/'):
535	docUrl = url.replace('/mpiwg/online/', '', 1)
536
537	elif mode=="imagepath":
538	# url points to folder with images, index.meta optional
539	# asssume index.meta in parent dir
540	docUrl = getParentPath(url)
541	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
542	docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
543
544	elif mode=="hocr":
545	# url points to folder with images, index.meta optional
546	# asssume index.meta in parent dir
547	docUrl = getParentPath(url)
548	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
549	docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
550	docinfo['textURLPath'] = url.replace('/mpiwg/online', '', 1)
551	if docinfo.get("creator", None) is None:
552	docinfo['creator'] = ""
553
554	if docinfo.get("title", None) is None:
555	docinfo['title'] = ""
556
557	if docinfo.get("documentPath", None) is None:
558	docinfo['documentPath'] = url.replace('/mpiwg/online', '', 1)
559	docinfo['documentPath'] = url.replace('/pages', '', 1)
560
561	elif mode=="filepath":
562	# url points to image file, index.meta optional
563	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url)
564	docinfo['numPages'] = 1
565	# asssume index.meta is two path segments up
566	docUrl = getParentPath(url, 2)
567	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
568
569	else:
570	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
571	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
572
573	docinfo['documentUrl'] = docUrl
574	# process index.meta contents
575	if metaDom is not None and metaDom.tag == 'resource':
576	# document directory name and path
577	resource = self.metadataService.getResourceData(dom=metaDom, recursive=1)
578	if resource:
579	docinfo = self.getDocinfoFromResource(docinfo, resource)
580
581	# texttool info
582	texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
583	if texttool:
584	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
585	# document info from full text server
586	if docinfo.get('textURLPath', None):
587	docinfo = self.getTextInfo(mode=None, docinfo=docinfo)
588	# include list of pages TODO: do we need this always?
589	docinfo = self.getTextInfo(mode='pages', docinfo=docinfo)
590
591	# bib info
592	bib = self.metadataService.getBibData(dom=metaDom)
593	if bib:
594	# save extended version as 'bibx' TODO: ugly
595	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
596	if len(bibx) == 1:
597	# unwrap list if possible
598	bibx = bibx[0]
599
600	docinfo['bibx'] = bibx
601	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
602	else:
603	# no bib - try info.xml
604	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
605
606	# auth info
607	access = self.metadataService.getAccessData(dom=metaDom)
608	if access:
609	docinfo = self.getDocinfoFromAccess(docinfo, access)
610
611	# attribution info
612	attribution = self.metadataService.getAttributionData(dom=metaDom)
613	if attribution:
614	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
615	docinfo['attribution'] = attribution
616
617	# copyright info
618	copyright = self.metadataService.getCopyrightData(dom=metaDom)
619	if copyright:
620	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
621	docinfo['copyright'] = copyright
622
623	# DRI (permanent ID)
624	dri = self.metadataService.getDRI(dom=metaDom, type='mpiwg')
625	if dri:
626	docinfo['DRI'] = dri
627
628	# (presentation) context
629	ctx = self.metadataService.getContextData(dom=metaDom, all=True)
630	if ctx:
631	logging.debug("getcontext: ctx=%s"%repr(ctx))
632	docinfo['presentationContext'] = ctx
633
634	# image path
635	if mode != 'texttool':
636	# override image path from texttool with url parameter TODO: how about mode=auto?
637	docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
638
639	# check numPages
640	if docinfo.get('numPages', 0) == 0:
641	# number of images from digilib
642	if docinfo.get('imagePath', None):
643	imgpath = docinfo['imagePath'].replace('/mpiwg/online', '', 1)
644	logging.debug("imgpath=%s"%imgpath)
645	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, imgpath)
646	docinfo = self.getDocinfoFromDigilib(docinfo, imgpath)
647	else:
648	# imagePath still missing? try "./pageimg"
649	imgPath = os.path.join(docUrl, 'pageimg')
650	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
651	if docinfo.get('numPages', 0) > 0:
652	# there are pages
653	docinfo['imagePath'] = imgPath
654	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, docinfo['imagePath'])
655
656	# check numPages
657	if docinfo.get('numPages', 0) == 0:
658	if docinfo.get('numTextPages', 0) > 0:
659	# replace with numTextPages (text-only?)
660	docinfo['numPages'] = docinfo['numTextPages']
661
662	# min and max page no
663	docinfo['minPageNo'] = docinfo.get('minPageNo', 1)
664	docinfo['maxPageNo'] = docinfo.get('maxPageNo', docinfo['numPages'])
665
666	# part-of information
667	partOfPath = docinfo.get('partOfPath', None)
668	if partOfPath is not None:
669	partOfDom = self.metadataService.getDomFromPathOrUrl(partOfPath)
670	if partOfDom is not None:
671	docinfo['partOfLabel'] = self.metadataService.getBibFormattedLabel(dom=partOfDom)
672	docinfo['partOfUrl'] = "%s?url=%s"%(self.getDocumentViewerURL(), partOfPath)
673	logging.debug("partOfLabel=%s partOfUrl=%s"%(docinfo['partOfLabel'],docinfo['partOfUrl']))
674
675	# normalize path
676	if 'imagePath' in docinfo and not docinfo['imagePath'].startswith('/'):
677	docinfo['imagePath'] = '/' + docinfo['imagePath']
678
679	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
680	# store in session
681	self.REQUEST.SESSION['docinfo'] = docinfo
682	return docinfo
683
684
685	def getDocinfoFromResource(self, docinfo, resource):
686	"""reads contents of resource element into docinfo"""
687	logging.debug("getDocinfoFromResource: resource=%s"%(repr(resource)))
688	docName = getMDText(resource.get('name', None))
689	docinfo['documentName'] = docName
690	docPath = getMDText(resource.get('archive-path', None))
691	if docPath:
692	# clean up document path
693	if docPath[0] != '/':
694	docPath = '/' + docPath
695
696	if docName and (not docPath.endswith(docName)):
697	docPath += "/" + docName
698
699	else:
700	# use docUrl as docPath
701	docUrl = docinfo['documentURL']
702	if not docUrl.startswith('http:'):
703	docPath = docUrl
704
705	if docPath:
706	# fix URLs starting with /mpiwg/online
707	docPath = docPath.replace('/mpiwg/online', '', 1)
708
709	docinfo['documentPath'] = docPath
710
711	# is this part-of?
712	partOf = resource.get('is-part-of', None)
713	if partOf is not None:
714	partOf = getMDText(partOf.get('archive-path', None))
715	if partOf is not None:
716	docinfo['partOfPath'] = partOf.strip()
717
718	return docinfo
719
720	def getDocinfoFromTexttool(self, docinfo, texttool):
721	"""reads contents of texttool element into docinfo"""
722	logging.debug("texttool=%s"%repr(texttool))
723	# unpack list if necessary
724	if isinstance(texttool, list):
725	texttool = texttool[0]
726
727	# image dir
728	imageDir = getMDText(texttool.get('image', None))
729	docPath = getMDText(docinfo.get('documentPath', None))
730	if imageDir:
731	if imageDir.startswith('/'):
732	# absolute path
733	imageDir = imageDir.replace('/mpiwg/online', '', 1)
734	docinfo['imagePath'] = imageDir
735
736	elif docPath:
737	# relative path
738	imageDir = os.path.join(docPath, imageDir)
739	imageDir = imageDir.replace('/mpiwg/online', '', 1)
740	docinfo['imagePath'] = imageDir
741
742	# start and end page (for subdocuments of other documents)
743	imgStartNo = getMDText(texttool.get('image-start-no', None))
744	minPageNo = getInt(imgStartNo, 1)
745	docinfo['minPageNo'] = minPageNo
746
747	imgEndNo = getMDText(texttool.get('image-end-no', None))
748	if imgEndNo:
749	docinfo['maxPageNo'] = getInt(imgEndNo)
750
751	# old style text URL
752	textUrl = getMDText(texttool.get('text', None))
753
754	if textUrl and docPath:
755	if urlparse.urlparse(textUrl)[0] == "": #keine url
756	textUrl = os.path.join(docPath, textUrl)
757
758	docinfo['textURL'] = textUrl
759
760	# new style text-url-path (can be more than one with "repository" attribute)
761	textUrlNode = texttool.get('text-url-path', None)
762	if not isinstance(textUrlNode, list):
763	textUrlNode = [textUrlNode]
764
765	for tun in textUrlNode:
766	textUrl = getMDText(tun)
767	if textUrl:
768	textUrlAtts = tun.get('@attr')
769	if (textUrlAtts and 'repository' in textUrlAtts):
770	textRepo = textUrlAtts['repository']
771	# use matching repository
772	if self.getRepositoryType() == textRepo:
773	docinfo['textURLPath'] = textUrl
774	docinfo['textURLRepository'] = textRepo
775	break
776
777	else:
778	# no repo attribute - use always
779	docinfo['textURLPath'] = textUrl
780
781	# page flow
782	docinfo['pageFlow'] = getMDText(texttool.get('page-flow', 'ltr'))
783
784	# odd pages are left
785	docinfo['oddPage'] = getMDText(texttool.get('odd-scan-position', 'left'))
786
787	# number of title page (default 1)
788	docinfo['titlePage'] = getMDText(texttool.get('title-scan-no', minPageNo))
789
790	# old presentation stuff
791	presentation = getMDText(texttool.get('presentation', None))
792	if presentation and docPath:
793	if presentation.startswith('http:'):
794	docinfo['presentationUrl'] = presentation
795	else:
796	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
797
798	# make sure we have at least fake DC data
799	if 'creator' not in docinfo:
800	docinfo['creator'] = '[no author found]'
801
802	if 'title' not in docinfo:
803	docinfo['title'] = '[no title found]'
804
805	if 'date' not in docinfo:
806	docinfo['date'] = '[no date found]'
807
808	return docinfo
809
810	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
811	"""reads contents of bib element into docinfo"""
812	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
813	# put all raw bib fields in dict "bib"
814	docinfo['bib'] = bib
815	bibtype = bib.get('@type', None)
816	docinfo['bibType'] = bibtype
817	# also store DC metadata for convenience
818	dc = self.metadataService.getDCMappedData(bib)
819	docinfo['creator'] = dc.get('creator','')
820	docinfo['title'] = dc.get('title','')
821	docinfo['date'] = dc.get('date','')
822	return docinfo
823
824	def getDocinfoFromAccess(self, docinfo, acc):
825	"""reads contents of access element into docinfo"""
826	#TODO: also read resource type
827	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
828	try:
829	acctype = acc['@attr']['type']
830	if acctype:
831	access=acctype
832	if access in ['group', 'institution']:
833	access = acc['name'].lower()
834
835	docinfo['accessType'] = access
836
837	except:
838	pass
839
840	return docinfo
841
842	def getDocinfoFromDigilib(self, docinfo, path):
843	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?fn="+path
844	# fetch data
845	txt = getHttpData(infoUrl)
846	if not txt:
847	logging.error("Unable to get dir-info from %s"%(infoUrl))
848	return docinfo
849
850	dom = ET.fromstring(txt)
851	dir = dom
852	# save size
853	size = dir.findtext('size')
854	logging.debug("getDocinfoFromDigilib: size=%s"%size)
855	if size:
856	docinfo['numPages'] = int(size)
857	else:
858	docinfo['numPages'] = 0
859	return docinfo
860
861	# save list of image names and numbers
862	imgNames = {}
863	imgIndexes = {}
864	for f in dir:
865	fn = f.findtext('name')
866	pn = getInt(f.findtext('index'))
867	imgNames[fn] = pn
868	imgIndexes[pn] = fn
869
870	docinfo['imgFileNames'] = imgNames
871	docinfo['imgFileIndexes'] = imgIndexes
872	return docinfo
873
874
875	def getDocinfoFromPresentationInfoXml(self,docinfo):
876	"""gets DC-like bibliographical information from the presentation entry in texttools"""
877	url = docinfo.get('presentationUrl', None)
878	if not url:
879	logging.error("getDocinfoFromPresentation: no URL!")
880	return docinfo
881
882	dom = None
883	metaUrl = None
884	if url.startswith("http://"):
885	# real URL
886	metaUrl = url
887	else:
888	# online path
889	server=self.digilibBaseUrl+"/servlet/Texter?fn="
890	metaUrl=server+url
891
892	txt=getHttpData(metaUrl)
893	if txt is None:
894	logging.error("Unable to read info.xml from %s"%(url))
895	return docinfo
896
897	dom = ET.fromstring(txt)
898	docinfo['creator']=getText(dom.find(".//author"))
899	docinfo['title']=getText(dom.find(".//title"))
900	docinfo['date']=getText(dom.find(".//date"))
901	return docinfo
902
903
904	def getPageinfo(self, pn=None, pf=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
905	"""returns pageinfo with the given parameters"""
906	logging.debug("getPageInfo(pn=%s, pf=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(pn,pf,start,rows,cols,viewMode,viewLayer,tocMode))
907	pageinfo = {}
908	pageinfo['viewMode'] = viewMode
909	# split viewLayer if necessary
910	if isinstance(viewLayer,basestring):
911	viewLayer = viewLayer.split(',')
912
913	if isinstance(viewLayer, list):
914	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
915	# save (unique) list in viewLayers
916	seen = set()
917	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
918	pageinfo['viewLayers'] = viewLayers
919	# stringify viewLayer
920	viewLayer = ','.join(viewLayers)
921	else:
922	#create list
923	pageinfo['viewLayers'] = [viewLayer]
924
925	pageinfo['viewLayer'] = viewLayer
926	pageinfo['tocMode'] = tocMode
927
928	minPageNo = docinfo.get('minPageNo', 1)
929
930	# pf takes precedence over pn
931	if pf:
932	pageinfo['pf'] = pf
933	pn = getPnForPf(docinfo, pf)
934	# replace pf in request params (used for creating new URLs)
935	self.REQUEST.form.pop('pf', None)
936	self.REQUEST.form['pn'] = pn
937	else:
938	pn = getInt(pn, minPageNo)
939	pf = getPfForPn(docinfo, pn)
940	pageinfo['pf'] = pf
941
942	pageinfo['pn'] = pn
943	rows = int(rows or self.thumbrows)
944	pageinfo['rows'] = rows
945	cols = int(cols or self.thumbcols)
946	pageinfo['cols'] = cols
947	grpsize = cols * rows
948	pageinfo['groupsize'] = grpsize
949	# if start is empty use one around pn
950	grouppn = math.ceil(float(pn)/float(grpsize))*grpsize-(grpsize-1)
951	# but not smaller than minPageNo
952	start = getInt(start, max(grouppn, minPageNo))
953	pageinfo['start'] = start
954	# get number of pages
955	numPages = int(docinfo.get('numPages', 0))
956	if numPages == 0:
957	# try numTextPages
958	numPages = docinfo.get('numTextPages', 0)
959	if numPages != 0:
960	docinfo['numPages'] = numPages
961
962	maxPageNo = docinfo.get('maxPageNo', numPages)
963	logging.debug("minPageNo=%s maxPageNo=%s start=%s numPages=%s"%(minPageNo,maxPageNo,start,numPages))
964	np = maxPageNo
965
966	# cache table of contents
967	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
968	pageinfo['numgroups'] = int(np / grpsize)
969	if np % grpsize > 0:
970	pageinfo['numgroups'] += 1
971
972	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
973	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
974	# add zeroth page for two columns
975	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
976	pageinfo['pageZero'] = pageZero
977	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=minPageNo, maxIdx=np)
978	# more page parameters
979	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
980	if docinfo.get('pageNumbers'):
981	# get original page numbers
982	pageNumber = docinfo['pageNumbers'].get(pn, None)
983	if pageNumber is not None:
984	pageinfo['pageNumberOrig'] = pageNumber['no']
985	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
986
987	# cache search results
988	query = self.REQUEST.get('query',None)
989	pageinfo['query'] = query
990	if query and viewMode == 'text':
991	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
992	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
993	pageinfo['queryType'] = queryType
994	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
995	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
996
997	# highlighting
998	highlightQuery = self.REQUEST.get('highlightQuery', None)
999	if highlightQuery:
1000	pageinfo['highlightQuery'] = highlightQuery
1001	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
1002	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
1003
1004	return pageinfo
1005
1006
1007	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
1008	"""Return dict with array of page information for one screenfull of thumbnails.
1009
1010	:param start: index of current page
1011	:param rows: number of rows in one batch
1012	:param cols: number of columns in one batch
1013	:param pageFlowLtr: do indexes increase from left to right
1014	:param pageZero: is there a zeroth non-visible page
1015	:param minIdx: minimum index to use
1016	:param maxIdx: maximum index to use
1017	:returns: dict with
1018	first: first page index
1019	last: last page index
1020	batches: list of all possible batches(dict: 'start': index, 'end': index)
1021	pages: list for current batch of rows(list of cols(list of pages(dict: 'idx': index)))
1022	nextStart: first index of next batch
1023	prevStart: first index of previous batch
1024	"""
1025	logging.debug("getPageBatch start=%s minIdx=%s maxIdx=%s"%(start,minIdx,maxIdx))
1026	batch = {}
1027	grpsize = rows * cols
1028	if maxIdx == 0:
1029	maxIdx = start + grpsize
1030
1031	np = maxIdx - minIdx + 1
1032	if pageZero:
1033	# correct number of pages for batching
1034	np += 1
1035
1036	nb = int(math.ceil(np / float(grpsize)))
1037
1038	# list of all batch start and end points
1039	batches = []
1040	if pageZero:
1041	ofs = minIdx - 1
1042	else:
1043	ofs = minIdx
1044
1045	for i in range(nb):
1046	s = i * grpsize + ofs
1047	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
1048	batches.append({'start':s, 'end':e})
1049
1050	batch['batches'] = batches
1051
1052	# list of pages for current screen
1053	pages = []
1054	if pageZero and start == minIdx:
1055	# correct beginning
1056	idx = minIdx - 1
1057	else:
1058	idx = start
1059
1060	for r in range(rows):
1061	row = []
1062	for c in range(cols):
1063	if idx < minIdx or idx > maxIdx:
1064	page = {'idx':None}
1065	else:
1066	page = {'idx':idx}
1067
1068	idx += 1
1069	if pageFlowLtr:
1070	row.append(page)
1071	else:
1072	row.insert(0, page)
1073
1074	pages.append(row)
1075
1076	if start > minIdx:
1077	batch['prevStart'] = max(start - grpsize, minIdx)
1078	else:
1079	batch['prevStart'] = None
1080
1081	if start + grpsize <= maxIdx:
1082	if pageZero and start == minIdx:
1083	# correct nextStart for pageZero
1084	batch['nextStart'] = grpsize
1085	else:
1086	batch['nextStart'] = start + grpsize
1087	else:
1088	batch['nextStart'] = None
1089
1090	batch['pages'] = pages
1091	batch['first'] = minIdx
1092	batch['last'] = maxIdx
1093	logging.debug("batch: %s"%repr(batch))
1094	return batch
1095
1096
1097	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
1098	"""returns dict with information for one screenfull of data."""
1099	batch = {}
1100	if end == 0:
1101	end = start + size
1102
1103	nb = int(math.ceil(end / float(size)))
1104	# list of all batch start and end points
1105	batches = []
1106	for i in range(nb):
1107	s = i * size + 1
1108	e = min((i + 1) * size, end)
1109	batches.append({'start':s, 'end':e})
1110
1111	batch['batches'] = batches
1112	# list of elements in this batch
1113	this = []
1114	j = 0
1115	for i in range(start, min(start+size, end+1)):
1116	if data:
1117	if fullData:
1118	d = data.get(i, None)
1119	else:
1120	d = data.get(j, None)
1121	j += 1
1122
1123	else:
1124	d = i+1
1125
1126	this.append(d)
1127
1128	batch['this'] = this
1129	if start > 1:
1130	batch['prevStart'] = max(start - size, 1)
1131	else:
1132	batch['prevStart'] = None
1133
1134	if start + size < end:
1135	batch['nextStart'] = start + size
1136	else:
1137	batch['nextStart'] = None
1138
1139	batch['first'] = start
1140	batch['last'] = end
1141	return batch
1142
1143
1144	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
1145	"""returns list of groups {name:, id:} on the annotation server for the user"""
1146	groups = []
1147	# add matching http(s) from our URL
1148	annotationServerUrl = sslifyUrl(annotationServerUrl, self)
1149
1150	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
1151	data = getHttpData(url=groupsUrl, noExceptions=True)
1152	if data:
1153	res = json.loads(data)
1154	rows = res.get('rows', None)
1155	if rows is None:
1156	return groups
1157	for r in rows:
1158	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
1159
1160	return groups
1161
1162
1163	security.declareProtected('View management screens','changeDocumentViewerForm')
1164	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1165
1166	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1167	"""init document viewer"""
1168	self.title=title
1169	self.digilibBaseUrl = digilibBaseUrl
1170	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
1171	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
1172	self.thumbrows = thumbrows
1173	self.thumbcols = thumbcols
1174	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1175	try:
1176	# assume MetaDataFolder instance is called metadata
1177	self.metadataService = getattr(self, 'metadata')
1178	except Exception, e:
1179	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1180
1181	self.setAvailableLayers(availableLayers)
1182
1183	if RESPONSE is not None:
1184	RESPONSE.redirect('manage_main')
1185
1186
1187	def manage_AddDocumentViewerForm(self):
1188	"""add the viewer form"""
1189	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1190	return pt()
1191
1192	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1193	"""add the viewer"""
1194	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1195	self._setObject(id,newObj)
1196
1197	if RESPONSE is not None:
1198	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: