Context Navigation

source: documentViewer/documentViewer.py @ 613:c57d80a649ea

Last change on this file since 613:c57d80a649ea was 613:c57d80a649ea, checked in by casties, 11 years ago
CLOSED - # 281: List of thumbnails verschluckt Seite, wenn odd-scan-position gesetzt ist https://it-dev.mpiwg-berlin.mpg.de/tracs/mpdl-project-software/ticket/281
File size: 46.2 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7
8	import xml.etree.ElementTree as ET
9
10	import os
11	import urllib
12	import logging
13	import math
14	import urlparse
15	import json
16
17	from Products.MetaDataProvider import MetaDataFolder
18
19	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
20
21
22	def getMDText(node):
23	"""returns the @text content from the MetaDataProvider metadata node"""
24	if isinstance(node, dict):
25	return node.get('@text', None)
26
27	return node
28
29	def getParentPath(path, cnt=1):
30	"""returns pathname shortened by cnt"""
31	# make sure path doesn't end with /
32	path = path.rstrip('/')
33	# split by /, shorten, and reassemble
34	return '/'.join(path.split('/')[0:-cnt])
35
36	def getPnForPf(docinfo, pf, default=0):
37	"""returns image number for image file name or default"""
38	if 'imgFileNames' in docinfo:
39	pn = docinfo['imgFileNames'].get(pf, None)
40	if pn is None:
41	# try to cut extension
42	xi = pf.rfind('.')
43	if xi > 0:
44	pf = pf[:xi]
45	# try again, else return 0
46	pn = docinfo['imgFileNames'].get(pf, default)
47	else:
48	# no extension
49	pn = default
50
51	return pn
52
53	return default
54
55	def getPfForPn(docinfo, pn, default=None):
56	"""returns image file name for image number or default"""
57	if 'imgFileIndexes' in docinfo:
58	pn = docinfo['imgFileIndexes'].get(pn, default)
59	return pn
60
61	return default
62
63
64	##
65	## documentViewer class
66	##
67	class documentViewer(Folder):
68	"""document viewer"""
69	meta_type="Document viewer"
70
71	security=ClassSecurityInfo()
72	manage_options=Folder.manage_options+(
73	{'label':'Configuration','action':'changeDocumentViewerForm'},
74	)
75
76	metadataService = None
77	"""MetaDataFolder instance"""
78
79
80	#
81	# templates and forms
82	#
83	# viewMode templates
84	viewer_text = PageTemplateFile('zpt/viewer/viewer_text', globals())
85	viewer_xml = PageTemplateFile('zpt/viewer/viewer_xml', globals())
86	viewer_image = PageTemplateFile('zpt/viewer/viewer_image', globals())
87	viewer_index = PageTemplateFile('zpt/viewer/viewer_index', globals())
88	viewer_thumbs = PageTemplateFile('zpt/viewer/viewer_thumbs', globals())
89	viewer_indexonly = PageTemplateFile('zpt/viewer/viewer_indexonly', globals())
90	# available layer types (annotator not default)
91	builtinLayers = {'text': ['dict','search','gis'],
92	'xml': None, 'image': None, 'index': ['extended']}
93	availableLayers = builtinLayers;
94	# layer templates
95	layer_text_dict = PageTemplateFile('zpt/viewer/layer_text_dict', globals())
96	layer_text_search = PageTemplateFile('zpt/viewer/layer_text_search', globals())
97	layer_text_annotator = PageTemplateFile('zpt/viewer/layer_text_annotator', globals())
98	layer_text_gis = PageTemplateFile('zpt/viewer/layer_text_gis', globals())
99	layer_text_pundit = PageTemplateFile('zpt/viewer/layer_text_pundit', globals())
100	layer_image_annotator = PageTemplateFile('zpt/viewer/layer_image_annotator', globals())
101	layer_image_search = PageTemplateFile('zpt/viewer/layer_image_search', globals())
102	layer_index_extended = PageTemplateFile('zpt/viewer/layer_index_extended', globals())
103	# toc templates
104	toc_thumbs = PageTemplateFile('zpt/viewer/toc_thumbs', globals())
105	toc_text = PageTemplateFile('zpt/viewer/toc_text', globals())
106	toc_figures = PageTemplateFile('zpt/viewer/toc_figures', globals())
107	toc_concordance = PageTemplateFile('zpt/viewer/toc_concordance', globals())
108	toc_notes = PageTemplateFile('zpt/viewer/toc_notes', globals())
109	toc_handwritten = PageTemplateFile('zpt/viewer/toc_handwritten', globals())
110	toc_none = PageTemplateFile('zpt/viewer/toc_none', globals())
111	# other templates
112	common_template = PageTemplateFile('zpt/viewer/common_template', globals())
113	info_xml = PageTemplateFile('zpt/viewer/info_xml', globals())
114	docuviewer_css = ImageFile('css/docuviewer.css',globals())
115	# make docuviewer_css refreshable for development
116	docuviewer_css.index_html = refreshingImageFileIndexHtml
117	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
118	# make docuviewer_ie_css refreshable for development
119	#docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
120	jquery_js = ImageFile('js/jquery.js',globals())
121
122
123	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
124	"""init document viewer"""
125	self.id=id
126	self.title=title
127	self.thumbcols = thumbcols
128	self.thumbrows = thumbrows
129	# authgroups is list of authorized groups (delimited by ,)
130	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
131	# create template folder so we can always use template.something
132
133	templateFolder = Folder('template')
134	self['template'] = templateFolder # Zope-2.12 style
135	#self._setObject('template',templateFolder) # old style
136	try:
137	import MpdlXmlTextServer
138	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
139	templateFolder['fulltextclient'] = textServer
140	#templateFolder._setObject('fulltextclient',textServer)
141	except Exception, e:
142	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
143
144	try:
145	from Products.zogiLib.zogiLib import zogiLib
146	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
147	templateFolder['zogilib'] = zogilib
148	#templateFolder._setObject('zogilib',zogilib)
149	except Exception, e:
150	logging.error("Unable to create zogiLib for 'zogilib': "+str(e))
151
152	try:
153	# assume MetaDataFolder instance is called metadata
154	self.metadataService = getattr(self, 'metadata')
155	except Exception, e:
156	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
157
158	if digilibBaseUrl is not None:
159	self.digilibBaseUrl = digilibBaseUrl
160	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
161	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
162
163
164	# proxy text server methods to fulltextclient
165	def getTextPage(self, **args):
166	"""returns full text content of page"""
167	return self.template.fulltextclient.getTextPage(**args)
168
169	def getSearchResults(self, **args):
170	"""loads list of search results and stores XML in docinfo"""
171	return self.template.fulltextclient.getSearchResults(**args)
172
173	def getResultsPage(self, **args):
174	"""returns one page of the search results"""
175	return self.template.fulltextclient.getResultsPage(**args)
176
177	def getTextInfo(self, **args):
178	"""returns document info from the text server"""
179	return self.template.fulltextclient.getTextInfo(**args)
180
181	def getToc(self, **args):
182	"""loads table of contents and stores XML in docinfo"""
183	return self.template.fulltextclient.getToc(**args)
184
185	def getTocPage(self, **args):
186	"""returns one page of the table of contents"""
187	return self.template.fulltextclient.getTocPage(**args)
188
189	def getRepositoryType(self, **args):
190	"""get repository type"""
191	return self.template.fulltextclient.getRepositoryType(**args)
192
193	def getTextDownloadUrl(self, **args):
194	"""get URL to download the full text"""
195	return self.template.fulltextclient.getTextDownloadUrl(**args)
196
197	def getPlacesOnPage(self, **args):
198	"""get list of gis places on one page"""
199	return self.template.fulltextclient.getPlacesOnPage(**args)
200
201	# Thumb list for CoolIris Plugin
202	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
203	security.declareProtected('View','thumbs_rss')
204	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
205	'''
206	view it
207	@param mode: defines how to access the document behind url
208	@param url: url which contains display information
209	@param viewMode: image: display images, text: display text, default is auto (try text, else image)
210
211	'''
212
213	if not hasattr(self, 'template'):
214	# this won't work
215	logging.error("template folder missing!")
216	return "ERROR: template folder missing!"
217
218	if not self.digilibBaseUrl:
219	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
220
221	docinfo = self.getDocinfo(mode=mode,url=url)
222	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
223	pageinfo = self.getPageinfo(start=start,pn=pn, docinfo=docinfo)
224	''' ZDES '''
225	pt = getattr(self.template, 'thumbs_main_rss')
226
227	if viewMode=="auto": # automodus gewaehlt
228	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
229	viewMode="text"
230	else:
231	viewMode="image"
232
233	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
234
235
236	security.declareProtected('View','index_html')
237	def index_html(self, url, mode="texttool", viewMode="auto", viewLayer=None, tocMode=None, start=None, pn=None, pf=None):
238	"""
239	show page
240	@param url: url which contains display information
241	@param mode: defines how to access the document behind url
242	@param viewMode: 'image': display images, 'text': display text, 'xml': display xml, default is 'auto'
243	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
244	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
245	"""
246
247	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s pf=%s"%(mode,url,viewMode,viewLayer,start,pn,pf))
248
249	if not hasattr(self, 'template'):
250	# this won't work
251	logging.error("template folder missing!")
252	return "ERROR: template folder missing!"
253
254	if not getattr(self, 'digilibBaseUrl', None):
255	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
256
257	# mode=filepath should not have toc-thumbs
258	if tocMode is None:
259	if mode == "filepath":
260	tocMode = "none"
261	else:
262	tocMode = "thumbs"
263
264	# docinfo: information about document (cached)
265	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
266
267	# userinfo: user settings (cached)
268	userinfo = self.getUserinfo()
269
270	# auto viewMode: text if there is a text else images
271	if viewMode=="auto":
272	if docinfo.get('textURLPath', None):
273	# docinfo.get('textURL', None) not implemented yet
274	viewMode = "text"
275	if viewLayer is None and 'viewLayer' not in userinfo:
276	# use layer dict as default
277	viewLayer = "dict"
278	else:
279	viewMode = "image"
280
281	elif viewMode == "text_dict":
282	# legacy fix
283	viewMode = "text"
284	viewLayer = "dict"
285
286	elif viewMode == 'images':
287	# legacy fix
288	viewMode = 'image'
289	self.REQUEST['viewMode'] = 'image'
290
291	# safe viewLayer in userinfo
292	userinfo['viewLayer'] = viewLayer
293
294	# pageinfo: information about page (not cached)
295	pageinfo = self.getPageinfo(start=start, pn=pn, pf=pf, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
296
297	# get template /template/viewer_$viewMode
298	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
299	if pt is None:
300	logging.error("No template for viewMode=%s!"%viewMode)
301	# TODO: error page?
302	return "No template for viewMode=%s!"%viewMode
303
304	# and execute with parameters
305	return pt(docinfo=docinfo, pageinfo=pageinfo)
306
307	def getAvailableLayers(self):
308	"""returns dict with list of available layers per viewMode"""
309	return self.availableLayers
310
311	def findDigilibUrl(self):
312	"""try to get the digilib URL from zogilib"""
313	url = self.template.zogilib.getDLBaseUrl()
314	return url
315
316	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
317	"""returns URL to digilib Scaler with params"""
318	url = None
319	if docinfo is not None:
320	url = docinfo.get('imageURL', None)
321
322	if url is None:
323	url = self.digilibScalerUrl
324	if fn is None and docinfo is not None:
325	fn = docinfo.get('imagePath','')
326
327	url += "fn=%s"%fn
328
329	if pn:
330	url += "&pn=%s"%pn
331
332	url += "&dw=%s&dh=%s"%(dw,dh)
333	return url
334
335	def getDocumentViewerURL(self):
336	"""returns the URL of this instance"""
337	return self.absolute_url()
338
339	def getStyle(self, idx, selected, style=""):
340	"""returns a string with the given style and append 'sel' if idx == selected."""
341	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
342	if idx == selected:
343	return style + 'sel'
344	else:
345	return style
346
347	def getParams(self, param=None, val=None, params=None, duplicates=None):
348	"""returns dict with URL parameters.
349
350	Takes URL parameters and additionally param=val or dict params.
351	Deletes key if value is None."""
352	# copy existing request params
353	newParams=self.REQUEST.form.copy()
354	# change single param
355	if param is not None:
356	if val is None:
357	if newParams.has_key(param):
358	del newParams[param]
359	else:
360	newParams[param] = str(val)
361
362	# change more params
363	if params is not None:
364	for (k, v) in params.items():
365	if v is None:
366	# val=None removes param
367	if newParams.has_key(k):
368	del newParams[k]
369
370	else:
371	newParams[k] = v
372
373	if duplicates:
374	# eliminate lists (coming from duplicate keys)
375	for (k,v) in newParams.items():
376	if isinstance(v, list):
377	if duplicates == 'comma':
378	# make comma-separated list of non-empty entries
379	newParams[k] = ','.join([t for t in v if t])
380	elif duplicates == 'first':
381	# take first non-empty entry
382	newParams[k] = [t for t in v if t][0]
383
384	return newParams
385
386	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
387	"""returns URL to documentviewer with parameter param set to val or from dict params"""
388	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
389	# quote values and assemble into query string (not escaping '/')
390	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
391	if baseUrl is None:
392	baseUrl = self.getDocumentViewerURL()
393
394	url = "%s?%s"%(baseUrl, ps)
395	return url
396
397	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
398	"""link to documentviewer with parameter param set to val"""
399	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
400
401
402	def setAvailableLayers(self, newLayerString=None):
403	"""sets availableLayers to newLayerString or tries to autodetect available layers.
404	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
405	newLayerString is parsed as JSON."""
406	if newLayerString is not None:
407	try:
408	layers = json.loads(newLayerString)
409	if 'text' in layers and 'image' in layers:
410	self.availableLayers = layers
411	return
412	except:
413	pass
414
415	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
416
417	# start with builtin layers
418	self.availableLayers = self.builtinLayers.copy()
419	# add layers from templates
420	for t in self.template:
421	if t.startswith('layer_'):
422	try:
423	(x, m, l) = t.split('_', 3)
424	if m not in self.availableLayers:
425	# mode m doesn't exist -> new list
426	self.availableLayers[m] = [l]
427
428	else:
429	# m exists -> append
430	if l not in self.availableLayers[m]:
431	self.availableLayers[m].append()
432
433	except:
434	pass
435
436	def getAvailableLayersJson(self):
437	"""returns available layers as JSON string."""
438	return json.dumps(self.availableLayers)
439
440
441	def getInfo_xml(self,url,mode):
442	"""returns info about the document as XML"""
443	if not self.digilibBaseUrl:
444	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
445
446	docinfo = self.getDocinfo(mode=mode,url=url)
447	pt = getattr(self.template, 'info_xml')
448	return pt(docinfo=docinfo)
449
450	def getAuthenticatedUser(self, anon=None):
451	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
452	user = getSecurityManager().getUser()
453	if user is not None and user.getUserName() != "Anonymous User":
454	return user
455	else:
456	return anon
457
458	def isAccessible(self, docinfo):
459	"""returns if access to the resource is granted"""
460	access = docinfo.get('accessType', None)
461	logging.debug("documentViewer (accessOK) access type %s"%access)
462	if access == 'free':
463	logging.debug("documentViewer (accessOK) access is free")
464	return True
465
466	elif access is None or access in self.authgroups:
467	# only local access -- only logged in users
468	user = self.getAuthenticatedUser()
469	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
470	return (user is not None)
471
472	logging.error("documentViewer (accessOK) unknown access type %s"%access)
473	return False
474
475	def getUserinfo(self):
476	"""returns userinfo object"""
477	logging.debug("getUserinfo")
478	userinfo = {}
479	# look for cached userinfo in session
480	if self.REQUEST.SESSION.has_key('userinfo'):
481	userinfo = self.REQUEST.SESSION['userinfo']
482	# check if its still current?
483	else:
484	# store in session
485	self.REQUEST.SESSION['userinfo'] = userinfo
486
487	return userinfo
488
489	def getDocinfoJSON(self, mode, url, tocMode=None):
490	"""returns docinfo depending on mode"""
491	import json
492
493	dc = self.getDocinfo( mode, url, tocMode)
494
495	return json.dumps(dc)
496
497
498	def getDocinfo(self, mode, url, tocMode=None):
499	"""returns docinfo depending on mode"""
500	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
501	# look for cached docinfo in session
502	if self.REQUEST.SESSION.has_key('docinfo'):
503	docinfo = self.REQUEST.SESSION['docinfo']
504	# check if its still current
505	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
506	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
507	return docinfo
508
509	# new docinfo
510	docinfo = {'mode': mode, 'url': url}
511	# add self url
512	docinfo['viewerUrl'] = self.getDocumentViewerURL()
513	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
514	docinfo['digilibScalerUrl'] = self.digilibScalerUrl
515	docinfo['digilibViewerUrl'] = self.digilibViewerUrl
516	# get index.meta DOM
517	docUrl = None
518	metaDom = None
519	if mode=="texttool":
520	# url points to document dir or index.meta
521	metaDom = self.metadataService.getDomFromPathOrUrl(url)
522	if metaDom is None:
523	raise IOError("Unable to find index.meta for mode=texttool!")
524
525	docUrl = url.replace('/index.meta', '')
526	if url.startswith('/mpiwg/online/'):
527	docUrl = url.replace('/mpiwg/online/', '', 1)
528
529	elif mode=="imagepath":
530	# url points to folder with images, index.meta optional
531	# asssume index.meta in parent dir
532	docUrl = getParentPath(url)
533	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
534	docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
535
536	elif mode=="filepath":
537	# url points to image file, index.meta optional
538	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url)
539	docinfo['numPages'] = 1
540	# asssume index.meta is two path segments up
541	docUrl = getParentPath(url, 2)
542	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
543
544	else:
545	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
546	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
547
548	docinfo['documentUrl'] = docUrl
549	# process index.meta contents
550	if metaDom is not None and metaDom.tag == 'resource':
551	# document directory name and path
552	resource = self.metadataService.getResourceData(dom=metaDom, recursive=1)
553	if resource:
554	docinfo = self.getDocinfoFromResource(docinfo, resource)
555
556	# texttool info
557	texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
558	if texttool:
559	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
560	# document info from full text server
561	if docinfo.get('textURLPath', None):
562	docinfo = self.getTextInfo(mode=None, docinfo=docinfo)
563	# include list of pages TODO: do we need this always?
564	docinfo = self.getTextInfo(mode='pages', docinfo=docinfo)
565
566	# bib info
567	bib = self.metadataService.getBibData(dom=metaDom)
568	if bib:
569	# save extended version as 'bibx' TODO: ugly
570	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
571	if len(bibx) == 1:
572	# unwrap list if possible
573	bibx = bibx[0]
574
575	docinfo['bibx'] = bibx
576	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
577	else:
578	# no bib - try info.xml
579	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
580
581	# auth info
582	access = self.metadataService.getAccessData(dom=metaDom)
583	if access:
584	docinfo = self.getDocinfoFromAccess(docinfo, access)
585
586	# attribution info
587	attribution = self.metadataService.getAttributionData(dom=metaDom)
588	if attribution:
589	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
590	docinfo['attribution'] = attribution
591
592	# copyright info
593	copyright = self.metadataService.getCopyrightData(dom=metaDom)
594	if copyright:
595	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
596	docinfo['copyright'] = copyright
597
598	# DRI (permanent ID)
599	dri = self.metadataService.getDRI(dom=metaDom, type='mpiwg')
600	if dri:
601	docinfo['DRI'] = dri
602
603	# (presentation) context
604	ctx = self.metadataService.getContextData(dom=metaDom, all=True)
605	if ctx:
606	logging.debug("getcontext: ctx=%s"%repr(ctx))
607	docinfo['presentationContext'] = ctx
608
609	# image path
610	if mode != 'texttool':
611	# override image path from texttool with url parameter TODO: how about mode=auto?
612	docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
613
614	# check numPages
615	if docinfo.get('numPages', 0) == 0:
616	# number of images from digilib
617	if docinfo.get('imagePath', None):
618	imgpath = docinfo['imagePath'].replace('/mpiwg/online', '', 1)
619	logging.debug("imgpath=%s"%imgpath)
620	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, imgpath)
621	docinfo = self.getDocinfoFromDigilib(docinfo, imgpath)
622	else:
623	# imagePath still missing? try "./pageimg"
624	imgPath = os.path.join(docUrl, 'pageimg')
625	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
626	if docinfo.get('numPages', 0) > 0:
627	# there are pages
628	docinfo['imagePath'] = imgPath
629	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, docinfo['imagePath'])
630
631	# check numPages
632	if docinfo.get('numPages', 0) == 0:
633	if docinfo.get('numTextPages', 0) > 0:
634	# replace with numTextPages (text-only?)
635	docinfo['numPages'] = docinfo['numTextPages']
636
637	# min and max page no
638	docinfo['minPageNo'] = docinfo.get('minPageNo', 1)
639	docinfo['maxPageNo'] = docinfo.get('maxPageNo', docinfo['numPages'])
640
641	# part-of information
642	partOfPath = docinfo.get('partOfPath', None)
643	if partOfPath is not None:
644	partOfDom = self.metadataService.getDomFromPathOrUrl(partOfPath)
645	if partOfDom is not None:
646	docinfo['partOfLabel'] = self.metadataService.getBibFormattedLabel(dom=partOfDom)
647	docinfo['partOfUrl'] = "%s?url=%s"%(self.getDocumentViewerURL(), partOfPath)
648	logging.debug("partOfLabel=%s partOfUrl=%s"%(docinfo['partOfLabel'],docinfo['partOfUrl']))
649
650	# normalize path
651	if 'imagePath' in docinfo and not docinfo['imagePath'].startswith('/'):
652	docinfo['imagePath'] = '/' + docinfo['imagePath']
653
654	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
655	# store in session
656	self.REQUEST.SESSION['docinfo'] = docinfo
657	return docinfo
658
659
660	def getDocinfoFromResource(self, docinfo, resource):
661	"""reads contents of resource element into docinfo"""
662	logging.debug("getDocinfoFromResource: resource=%s"%(repr(resource)))
663	docName = getMDText(resource.get('name', None))
664	docinfo['documentName'] = docName
665	docPath = getMDText(resource.get('archive-path', None))
666	if docPath:
667	# clean up document path
668	if docPath[0] != '/':
669	docPath = '/' + docPath
670
671	if docName and (not docPath.endswith(docName)):
672	docPath += "/" + docName
673
674	else:
675	# use docUrl as docPath
676	docUrl = docinfo['documentURL']
677	if not docUrl.startswith('http:'):
678	docPath = docUrl
679
680	if docPath:
681	# fix URLs starting with /mpiwg/online
682	docPath = docPath.replace('/mpiwg/online', '', 1)
683
684	docinfo['documentPath'] = docPath
685
686	# is this part-of?
687	partOf = resource.get('is-part-of', None)
688	if partOf is not None:
689	partOf = getMDText(partOf.get('archive-path', None))
690	if partOf is not None:
691	docinfo['partOfPath'] = partOf.strip()
692
693	return docinfo
694
695	def getDocinfoFromTexttool(self, docinfo, texttool):
696	"""reads contents of texttool element into docinfo"""
697	logging.debug("texttool=%s"%repr(texttool))
698	# unpack list if necessary
699	if isinstance(texttool, list):
700	texttool = texttool[0]
701
702	# image dir
703	imageDir = getMDText(texttool.get('image', None))
704	docPath = getMDText(docinfo.get('documentPath', None))
705	if imageDir:
706	if imageDir.startswith('/'):
707	# absolute path
708	imageDir = imageDir.replace('/mpiwg/online', '', 1)
709	docinfo['imagePath'] = imageDir
710
711	elif docPath:
712	# relative path
713	imageDir = os.path.join(docPath, imageDir)
714	imageDir = imageDir.replace('/mpiwg/online', '', 1)
715	docinfo['imagePath'] = imageDir
716
717	# start and end page (for subdocuments of other documents)
718	imgStartNo = getMDText(texttool.get('image-start-no', None))
719	minPageNo = getInt(imgStartNo, 1)
720	docinfo['minPageNo'] = minPageNo
721
722	imgEndNo = getMDText(texttool.get('image-end-no', None))
723	if imgEndNo:
724	docinfo['maxPageNo'] = getInt(imgEndNo)
725
726	# old style text URL
727	textUrl = getMDText(texttool.get('text', None))
728	if textUrl and docPath:
729	if urlparse.urlparse(textUrl)[0] == "": #keine url
730	textUrl = os.path.join(docPath, textUrl)
731
732	docinfo['textURL'] = textUrl
733
734	# new style text-url-path (can be more than one with "repository" attribute)
735	textUrlNode = texttool.get('text-url-path', None)
736	if not isinstance(textUrlNode, list):
737	textUrlNode = [textUrlNode]
738
739	for tun in textUrlNode:
740	textUrl = getMDText(tun)
741	if textUrl:
742	textUrlAtts = tun.get('@attr')
743	if (textUrlAtts and 'repository' in textUrlAtts):
744	textRepo = textUrlAtts['repository']
745	# use matching repository
746	if self.getRepositoryType() == textRepo:
747	docinfo['textURLPath'] = textUrl
748	docinfo['textURLRepository'] = textRepo
749	break
750
751	else:
752	# no repo attribute - use always
753	docinfo['textURLPath'] = textUrl
754
755	# page flow
756	docinfo['pageFlow'] = getMDText(texttool.get('page-flow', 'ltr'))
757
758	# odd pages are left
759	docinfo['oddPage'] = getMDText(texttool.get('odd-scan-position', 'left'))
760
761	# number of title page (default 1)
762	docinfo['titlePage'] = getMDText(texttool.get('title-scan-no', minPageNo))
763
764	# old presentation stuff
765	presentation = getMDText(texttool.get('presentation', None))
766	if presentation and docPath:
767	if presentation.startswith('http:'):
768	docinfo['presentationUrl'] = presentation
769	else:
770	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
771
772	# make sure we have at least fake DC data
773	if 'creator' not in docinfo:
774	docinfo['creator'] = '[no author found]'
775
776	if 'title' not in docinfo:
777	docinfo['title'] = '[no title found]'
778
779	if 'date' not in docinfo:
780	docinfo['date'] = '[no date found]'
781
782	return docinfo
783
784	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
785	"""reads contents of bib element into docinfo"""
786	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
787	# put all raw bib fields in dict "bib"
788	docinfo['bib'] = bib
789	bibtype = bib.get('@type', None)
790	docinfo['bibType'] = bibtype
791	# also store DC metadata for convenience
792	dc = self.metadataService.getDCMappedData(bib)
793	docinfo['creator'] = dc.get('creator','')
794	docinfo['title'] = dc.get('title','')
795	docinfo['date'] = dc.get('date','')
796	return docinfo
797
798	def getDocinfoFromAccess(self, docinfo, acc):
799	"""reads contents of access element into docinfo"""
800	#TODO: also read resource type
801	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
802	try:
803	acctype = acc['@attr']['type']
804	if acctype:
805	access=acctype
806	if access in ['group', 'institution']:
807	access = acc['name'].lower()
808
809	docinfo['accessType'] = access
810
811	except:
812	pass
813
814	return docinfo
815
816	def getDocinfoFromDigilib(self, docinfo, path):
817	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?fn="+path
818	# fetch data
819	txt = getHttpData(infoUrl)
820	if not txt:
821	logging.error("Unable to get dir-info from %s"%(infoUrl))
822	return docinfo
823
824	dom = ET.fromstring(txt)
825	dir = dom
826	# save size
827	size = dir.findtext('size')
828	logging.debug("getDocinfoFromDigilib: size=%s"%size)
829	if size:
830	docinfo['numPages'] = int(size)
831	else:
832	docinfo['numPages'] = 0
833	return docinfo
834
835	# save list of image names and numbers
836	imgNames = {}
837	imgIndexes = {}
838	for f in dir:
839	fn = f.findtext('name')
840	pn = getInt(f.findtext('index'))
841	imgNames[fn] = pn
842	imgIndexes[pn] = fn
843
844	docinfo['imgFileNames'] = imgNames
845	docinfo['imgFileIndexes'] = imgIndexes
846	return docinfo
847
848
849	def getDocinfoFromPresentationInfoXml(self,docinfo):
850	"""gets DC-like bibliographical information from the presentation entry in texttools"""
851	url = docinfo.get('presentationUrl', None)
852	if not url:
853	logging.error("getDocinfoFromPresentation: no URL!")
854	return docinfo
855
856	dom = None
857	metaUrl = None
858	if url.startswith("http://"):
859	# real URL
860	metaUrl = url
861	else:
862	# online path
863	server=self.digilibBaseUrl+"/servlet/Texter?fn="
864	metaUrl=server+url
865
866	txt=getHttpData(metaUrl)
867	if txt is None:
868	logging.error("Unable to read info.xml from %s"%(url))
869	return docinfo
870
871	dom = ET.fromstring(txt)
872	docinfo['creator']=getText(dom.find(".//author"))
873	docinfo['title']=getText(dom.find(".//title"))
874	docinfo['date']=getText(dom.find(".//date"))
875	return docinfo
876
877
878	def getPageinfo(self, pn=None, pf=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
879	"""returns pageinfo with the given parameters"""
880	logging.debug("getPageInfo(pn=%s, pf=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(pn,pf,start,rows,cols,viewMode,viewLayer,tocMode))
881	pageinfo = {}
882	pageinfo['viewMode'] = viewMode
883	# split viewLayer if necessary
884	if isinstance(viewLayer,basestring):
885	viewLayer = viewLayer.split(',')
886
887	if isinstance(viewLayer, list):
888	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
889	# save (unique) list in viewLayers
890	seen = set()
891	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
892	pageinfo['viewLayers'] = viewLayers
893	# stringify viewLayer
894	viewLayer = ','.join(viewLayers)
895	else:
896	#create list
897	pageinfo['viewLayers'] = [viewLayer]
898
899	pageinfo['viewLayer'] = viewLayer
900	pageinfo['tocMode'] = tocMode
901
902	minPageNo = docinfo.get('minPageNo', 1)
903
904	# pf takes precedence over pn
905	if pf:
906	pageinfo['pf'] = pf
907	pn = getPnForPf(docinfo, pf)
908	# replace pf in request params (used for creating new URLs)
909	self.REQUEST.form.pop('pf', None)
910	self.REQUEST.form['pn'] = pn
911	else:
912	pn = getInt(pn, minPageNo)
913	pf = getPfForPn(docinfo, pn)
914	pageinfo['pf'] = pf
915
916	pageinfo['pn'] = pn
917	rows = int(rows or self.thumbrows)
918	pageinfo['rows'] = rows
919	cols = int(cols or self.thumbcols)
920	pageinfo['cols'] = cols
921	grpsize = cols * rows
922	pageinfo['groupsize'] = grpsize
923	# if start is empty use one around pn
924	grouppn = math.ceil(float(pn)/float(grpsize))*grpsize-(grpsize-1)
925	# but not smaller than minPageNo
926	start = getInt(start, max(grouppn, minPageNo))
927	pageinfo['start'] = start
928	# get number of pages
929	numPages = int(docinfo.get('numPages', 0))
930	if numPages == 0:
931	# try numTextPages
932	numPages = docinfo.get('numTextPages', 0)
933	if numPages != 0:
934	docinfo['numPages'] = numPages
935
936	maxPageNo = docinfo.get('maxPageNo', numPages)
937	logging.debug("minPageNo=%s maxPageNo=%s start=%s numPages=%s"%(minPageNo,maxPageNo,start,numPages))
938	np = maxPageNo
939
940	# cache table of contents
941	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
942	pageinfo['numgroups'] = int(np / grpsize)
943	if np % grpsize > 0:
944	pageinfo['numgroups'] += 1
945
946	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
947	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
948	# add zeroth page for two columns
949	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
950	pageinfo['pageZero'] = pageZero
951	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=minPageNo, maxIdx=np)
952	# more page parameters
953	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
954	if docinfo.get('pageNumbers'):
955	# get original page numbers
956	pageNumber = docinfo['pageNumbers'].get(pn, None)
957	if pageNumber is not None:
958	pageinfo['pageNumberOrig'] = pageNumber['no']
959	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
960
961	# cache search results
962	query = self.REQUEST.get('query',None)
963	pageinfo['query'] = query
964	if query and viewMode == 'text':
965	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
966	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
967	pageinfo['queryType'] = queryType
968	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
969	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
970
971	# highlighting
972	highlightQuery = self.REQUEST.get('highlightQuery', None)
973	if highlightQuery:
974	pageinfo['highlightQuery'] = highlightQuery
975	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
976	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
977
978	return pageinfo
979
980
981	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
982	"""Return dict with array of page information for one screenfull of thumbnails.
983
984	:param start: index of current page
985	:param rows: number of rows in one batch
986	:param cols: number of columns in one batch
987	:param pageFlowLtr: do indexes increase from left to right
988	:param pageZero: is there a zeroth non-visible page
989	:param minIdx: minimum index to use
990	:param maxIdx: maximum index to use
991	:returns: dict with
992	first: first page index
993	last: last page index
994	batches: list of all possible batches(dict: 'start': index, 'end': index)
995	pages: list for current batch of rows(list of cols(list of pages(dict: 'idx': index)))
996	nextStart: first index of next batch
997	prevStart: first index of previous batch
998	"""
999	logging.debug("getPageBatch start=%s minIdx=%s maxIdx=%s"%(start,minIdx,maxIdx))
1000	batch = {}
1001	grpsize = rows * cols
1002	if maxIdx == 0:
1003	maxIdx = start + grpsize
1004
1005	np = maxIdx - minIdx + 1
1006	if pageZero:
1007	# correct number of pages for batching
1008	np += 1
1009
1010	nb = int(math.ceil(np / float(grpsize)))
1011
1012	# list of all batch start and end points
1013	batches = []
1014	if pageZero:
1015	ofs = minIdx - 1
1016	else:
1017	ofs = minIdx
1018
1019	for i in range(nb):
1020	s = i * grpsize + ofs
1021	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
1022	batches.append({'start':s, 'end':e})
1023
1024	batch['batches'] = batches
1025
1026	# list of pages for current screen
1027	pages = []
1028	if pageZero and start == minIdx:
1029	# correct beginning
1030	idx = minIdx - 1
1031	else:
1032	idx = start
1033
1034	for r in range(rows):
1035	row = []
1036	for c in range(cols):
1037	if idx < minIdx or idx > maxIdx:
1038	page = {'idx':None}
1039	else:
1040	page = {'idx':idx}
1041
1042	idx += 1
1043	if pageFlowLtr:
1044	row.append(page)
1045	else:
1046	row.insert(0, page)
1047
1048	pages.append(row)
1049
1050	if start > minIdx:
1051	batch['prevStart'] = max(start - grpsize, minIdx)
1052	else:
1053	batch['prevStart'] = None
1054
1055	if start + grpsize <= maxIdx:
1056	if pageZero and start == minIdx:
1057	# correct nextStart for pageZero
1058	batch['nextStart'] = grpsize
1059	else:
1060	batch['nextStart'] = start + grpsize
1061	else:
1062	batch['nextStart'] = None
1063
1064	batch['pages'] = pages
1065	batch['first'] = minIdx
1066	batch['last'] = maxIdx
1067	logging.debug("batch: %s"%repr(batch))
1068	return batch
1069
1070
1071	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
1072	"""returns dict with information for one screenfull of data."""
1073	batch = {}
1074	if end == 0:
1075	end = start + size
1076
1077	nb = int(math.ceil(end / float(size)))
1078	# list of all batch start and end points
1079	batches = []
1080	for i in range(nb):
1081	s = i * size + 1
1082	e = min((i + 1) * size, end)
1083	batches.append({'start':s, 'end':e})
1084
1085	batch['batches'] = batches
1086	# list of elements in this batch
1087	this = []
1088	j = 0
1089	for i in range(start, min(start+size, end+1)):
1090	if data:
1091	if fullData:
1092	d = data.get(i, None)
1093	else:
1094	d = data.get(j, None)
1095	j += 1
1096
1097	else:
1098	d = i+1
1099
1100	this.append(d)
1101
1102	batch['this'] = this
1103	if start > 1:
1104	batch['prevStart'] = max(start - size, 1)
1105	else:
1106	batch['prevStart'] = None
1107
1108	if start + size < end:
1109	batch['nextStart'] = start + size
1110	else:
1111	batch['nextStart'] = None
1112
1113	batch['first'] = start
1114	batch['last'] = end
1115	return batch
1116
1117
1118	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
1119	"""returns list of groups {name:, id:} on the annotation server for the user"""
1120	groups = []
1121	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
1122	data = getHttpData(url=groupsUrl, noExceptions=True)
1123	if data:
1124	res = json.loads(data)
1125	rows = res.get('rows', None)
1126	if rows is None:
1127	return groups
1128	for r in rows:
1129	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
1130
1131	return groups
1132
1133
1134	security.declareProtected('View management screens','changeDocumentViewerForm')
1135	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1136
1137	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1138	"""init document viewer"""
1139	self.title=title
1140	self.digilibBaseUrl = digilibBaseUrl
1141	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
1142	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
1143	self.thumbrows = thumbrows
1144	self.thumbcols = thumbcols
1145	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1146	try:
1147	# assume MetaDataFolder instance is called metadata
1148	self.metadataService = getattr(self, 'metadata')
1149	except Exception, e:
1150	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1151
1152	self.setAvailableLayers(availableLayers)
1153
1154	if RESPONSE is not None:
1155	RESPONSE.redirect('manage_main')
1156
1157	def manage_AddDocumentViewerForm(self):
1158	"""add the viewer form"""
1159	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1160	return pt()
1161
1162	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1163	"""add the viewer"""
1164	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1165	self._setObject(id,newObj)
1166
1167	if RESPONSE is not None:
1168	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: