Context Navigation

source: documentViewer/documentViewer.py @ 589:d8d6975cebcb

Last change on this file since 589:d8d6975cebcb was 589:d8d6975cebcb, checked in by casties, 11 years ago
more fixes for pf-parameter
File size: 41.7 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def getMDText(node):
37	"""returns the @text content from the MetaDataProvider metadata node"""
38	if isinstance(node, dict):
39	return node.get('@text', None)
40
41	return node
42
43	def getParentPath(path, cnt=1):
44	"""returns pathname shortened by cnt"""
45	# make sure path doesn't end with /
46	path = path.rstrip('/')
47	# split by /, shorten, and reassemble
48	return '/'.join(path.split('/')[0:-cnt])
49
50	def getPnForPf(docinfo, pf, default=0):
51	"""returns image number for image file name or default"""
52	if 'imgFileNames' in docinfo:
53	pn = docinfo['imgFileNames'].get(pf, None)
54	if pn is None:
55	# try to cut extension
56	xi = pf.rfind('.')
57	if xi > 0:
58	pf = pf[:xi]
59	# try again, else return 0
60	pn = docinfo['imgFileNames'].get(pf, default)
61	else:
62	# no extension
63	pn = default
64
65	return pn
66
67	return default
68
69
70	##
71	## documentViewer class
72	##
73	class documentViewer(Folder):
74	"""document viewer"""
75	meta_type="Document viewer"
76
77	security=ClassSecurityInfo()
78	manage_options=Folder.manage_options+(
79	{'label':'Configuration','action':'changeDocumentViewerForm'},
80	)
81
82	metadataService = None
83	"""MetaDataFolder instance"""
84
85
86	#
87	# templates and forms
88	#
89	# viewMode templates
90	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
91	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
92	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
93	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
94	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
95	viewer_indexonly = PageTemplateFile('zpt/viewer_indexonly', globals())
96	# available layer types (annotator not default)
97	builtinLayers = {'text': ['dict','search','gis'],
98	'xml': None, 'images': None, 'index': ['extended']}
99	availableLayers = builtinLayers;
100	# layer templates
101	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
102	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
103	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
104	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
105	layer_text_pundit = PageTemplateFile('zpt/layer_text_pundit', globals())
106	layer_images_annotator = PageTemplateFile('zpt/layer_images_annotator', globals())
107	layer_index_extended = PageTemplateFile('zpt/layer_index_extended', globals())
108	# toc templates
109	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
110	toc_text = PageTemplateFile('zpt/toc_text', globals())
111	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
112	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
113	toc_handwritten = PageTemplateFile('zpt/toc_handwritten', globals())
114	toc_none = PageTemplateFile('zpt/toc_none', globals())
115	# other templates
116	common_template = PageTemplateFile('zpt/common_template', globals())
117	info_xml = PageTemplateFile('zpt/info_xml', globals())
118	docuviewer_css = ImageFile('css/docuviewer.css',globals())
119	# make docuviewer_css refreshable for development
120	docuviewer_css.index_html = refreshingImageFileIndexHtml
121	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
122	# make docuviewer_ie_css refreshable for development
123	#docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
124	jquery_js = ImageFile('js/jquery.js',globals())
125
126
127	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
128	"""init document viewer"""
129	self.id=id
130	self.title=title
131	self.thumbcols = thumbcols
132	self.thumbrows = thumbrows
133	# authgroups is list of authorized groups (delimited by ,)
134	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
135	# create template folder so we can always use template.something
136
137	templateFolder = Folder('template')
138	self['template'] = templateFolder # Zope-2.12 style
139	#self._setObject('template',templateFolder) # old style
140	try:
141	import MpdlXmlTextServer
142	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
143	templateFolder['fulltextclient'] = textServer
144	#templateFolder._setObject('fulltextclient',textServer)
145	except Exception, e:
146	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
147
148	try:
149	from Products.zogiLib.zogiLib import zogiLib
150	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
151	templateFolder['zogilib'] = zogilib
152	#templateFolder._setObject('zogilib',zogilib)
153	except Exception, e:
154	logging.error("Unable to create zogiLib for zogilib: "+str(e))
155
156	try:
157	# assume MetaDataFolder instance is called metadata
158	self.metadataService = getattr(self, 'metadata')
159	except Exception, e:
160	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
161
162	if digilibBaseUrl is not None:
163	self.digilibBaseUrl = digilibBaseUrl
164	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
165	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
166
167
168	# proxy text server methods to fulltextclient
169	def getTextPage(self, **args):
170	"""returns full text content of page"""
171	return self.template.fulltextclient.getTextPage(**args)
172
173	def getSearchResults(self, **args):
174	"""loads list of search results and stores XML in docinfo"""
175	return self.template.fulltextclient.getSearchResults(**args)
176
177	def getResultsPage(self, **args):
178	"""returns one page of the search results"""
179	return self.template.fulltextclient.getResultsPage(**args)
180
181	def getTextInfo(self, **args):
182	"""returns document info from the text server"""
183	return self.template.fulltextclient.getTextInfo(**args)
184
185	def getToc(self, **args):
186	"""loads table of contents and stores XML in docinfo"""
187	return self.template.fulltextclient.getToc(**args)
188
189	def getTocPage(self, **args):
190	"""returns one page of the table of contents"""
191	return self.template.fulltextclient.getTocPage(**args)
192
193	def getRepositoryType(self, **args):
194	"""get repository type"""
195	return self.template.fulltextclient.getRepositoryType(**args)
196
197	def getTextDownloadUrl(self, **args):
198	"""get list of gis places on one page"""
199	return self.template.fulltextclient.getTextDownloadUrl(**args)
200
201	def getPlacesOnPage(self, **args):
202	"""get list of gis places on one page"""
203	return self.template.fulltextclient.getPlacesOnPage(**args)
204
205	# Thumb list for CoolIris Plugin
206	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
207	security.declareProtected('View','thumbs_rss')
208	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
209	'''
210	view it
211	@param mode: defines how to access the document behind url
212	@param url: url which contains display information
213	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
214
215	'''
216
217	if not hasattr(self, 'template'):
218	# create template folder if it doesn't exist
219	self.manage_addFolder('template')
220
221	if not self.digilibBaseUrl:
222	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
223
224	docinfo = self.getDocinfo(mode=mode,url=url)
225	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
226	pageinfo = self.getPageinfo(start=start,pn=pn, docinfo=docinfo)
227	''' ZDES '''
228	pt = getattr(self.template, 'thumbs_main_rss')
229
230	if viewMode=="auto": # automodus gewaehlt
231	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
232	viewMode="text"
233	else:
234	viewMode="images"
235
236	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
237
238
239	security.declareProtected('View','index_html')
240	def index_html(self, url, mode="texttool", viewMode="auto", viewLayer=None, tocMode=None, start=None, pn=None, pf=None):
241	"""
242	show page
243	@param url: url which contains display information
244	@param mode: defines how to access the document behind url
245	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
246	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
247	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
248	"""
249
250	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s pf=%s"%(mode,url,viewMode,viewLayer,start,pn,pf))
251
252	if not hasattr(self, 'template'):
253	# this won't work
254	logging.error("template folder missing!")
255	return "ERROR: template folder missing!"
256
257	if not getattr(self, 'digilibBaseUrl', None):
258	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
259
260	# mode=filepath should not have toc-thumbs
261	if tocMode is None:
262	if mode == "filepath":
263	tocMode = "none"
264	else:
265	tocMode = "thumbs"
266
267	# docinfo: information about document (cached)
268	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
269
270	# userinfo: user settings (cached)
271	userinfo = self.getUserinfo()
272
273	# auto viewMode: text if there is a text else images
274	if viewMode=="auto":
275	if docinfo.get('textURLPath', None):
276	# docinfo.get('textURL', None) not implemented yet
277	viewMode = "text"
278	if viewLayer is None and 'viewLayer' not in userinfo:
279	# use layer dict as default
280	viewLayer = "dict"
281	else:
282	viewMode = "images"
283
284	elif viewMode == "text_dict":
285	# legacy fix
286	viewMode = "text"
287	viewLayer = "dict"
288
289	# safe viewLayer in userinfo
290	userinfo['viewLayer'] = viewLayer
291
292	# pageinfo: information about page (not cached)
293	pageinfo = self.getPageinfo(start=start, pn=pn, pf=pf, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
294
295	# get template /template/viewer_$viewMode
296	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
297	if pt is None:
298	logging.error("No template for viewMode=%s!"%viewMode)
299	# TODO: error page?
300	return "No template for viewMode=%s!"%viewMode
301
302	# and execute with parameters
303	return pt(docinfo=docinfo, pageinfo=pageinfo)
304
305	def getAvailableLayers(self):
306	"""returns dict with list of available layers per viewMode"""
307	return self.availableLayers
308
309	def findDigilibUrl(self):
310	"""try to get the digilib URL from zogilib"""
311	url = self.template.zogilib.getDLBaseUrl()
312	return url
313
314	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
315	"""returns URL to digilib Scaler with params"""
316	url = None
317	if docinfo is not None:
318	url = docinfo.get('imageURL', None)
319
320	if url is None:
321	url = self.digilibScalerUrl
322	if fn is None and docinfo is not None:
323	fn = docinfo.get('imagePath','')
324
325	url += "fn=%s"%fn
326
327	if pn:
328	url += "&pn=%s"%pn
329
330	url += "&dw=%s&dh=%s"%(dw,dh)
331	return url
332
333	def getDocumentViewerURL(self):
334	"""returns the URL of this instance"""
335	return self.absolute_url()
336
337	def getStyle(self, idx, selected, style=""):
338	"""returns a string with the given style and append 'sel' if idx == selected."""
339	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
340	if idx == selected:
341	return style + 'sel'
342	else:
343	return style
344
345	def getParams(self, param=None, val=None, params=None, duplicates=None):
346	"""returns dict with URL parameters.
347
348	Takes URL parameters and additionally param=val or dict params.
349	Deletes key if value is None."""
350	# copy existing request params
351	newParams=self.REQUEST.form.copy()
352	# change single param
353	if param is not None:
354	if val is None:
355	if newParams.has_key(param):
356	del newParams[param]
357	else:
358	newParams[param] = str(val)
359
360	# change more params
361	if params is not None:
362	for (k, v) in params.items():
363	if v is None:
364	# val=None removes param
365	if newParams.has_key(k):
366	del newParams[k]
367
368	else:
369	newParams[k] = v
370
371	if duplicates:
372	# eliminate lists (coming from duplicate keys)
373	for (k,v) in newParams.items():
374	if isinstance(v, list):
375	if duplicates == 'comma':
376	# make comma-separated list of non-empty entries
377	newParams[k] = ','.join([t for t in v if t])
378	elif duplicates == 'first':
379	# take first non-empty entry
380	newParams[k] = [t for t in v if t][0]
381
382	return newParams
383
384	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
385	"""returns URL to documentviewer with parameter param set to val or from dict params"""
386	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
387	# quote values and assemble into query string (not escaping '/')
388	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
389	if baseUrl is None:
390	baseUrl = self.getDocumentViewerURL()
391
392	url = "%s?%s"%(baseUrl, ps)
393	return url
394
395	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
396	"""link to documentviewer with parameter param set to val"""
397	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
398
399
400	def setAvailableLayers(self, newLayerString=None):
401	"""sets availableLayers to newLayerString or tries to autodetect available layers.
402	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
403	newLayerString is parsed as JSON."""
404	if newLayerString is not None:
405	try:
406	layers = json.loads(newLayerString)
407	if 'text' in layers and 'images' in layers:
408	self.availableLayers = layers
409	return
410	except:
411	pass
412
413	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
414
415	# start with builtin layers
416	self.availableLayers = self.builtinLayers.copy()
417	# add layers from templates
418	for t in self.template:
419	if t.startswith('layer_'):
420	try:
421	(x, m, l) = t.split('_', 3)
422	if m not in self.availableLayers:
423	# mode m doesn't exist -> new list
424	self.availableLayers[m] = [l]
425
426	else:
427	# m exists -> append
428	if l not in self.availableLayers[m]:
429	self.availableLayers[m].append()
430
431	except:
432	pass
433
434	def getAvailableLayersJson(self):
435	"""returns available layers as JSON string."""
436	return json.dumps(self.availableLayers)
437
438
439	def getInfo_xml(self,url,mode):
440	"""returns info about the document as XML"""
441	if not self.digilibBaseUrl:
442	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
443
444	docinfo = self.getDocinfo(mode=mode,url=url)
445	pt = getattr(self.template, 'info_xml')
446	return pt(docinfo=docinfo)
447
448	def getAuthenticatedUser(self, anon=None):
449	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
450	user = getSecurityManager().getUser()
451	if user is not None and user.getUserName() != "Anonymous User":
452	return user
453	else:
454	return anon
455
456	def isAccessible(self, docinfo):
457	"""returns if access to the resource is granted"""
458	access = docinfo.get('accessType', None)
459	logging.debug("documentViewer (accessOK) access type %s"%access)
460	if access == 'free':
461	logging.debug("documentViewer (accessOK) access is free")
462	return True
463
464	elif access is None or access in self.authgroups:
465	# only local access -- only logged in users
466	user = self.getAuthenticatedUser()
467	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
468	return (user is not None)
469
470	logging.error("documentViewer (accessOK) unknown access type %s"%access)
471	return False
472
473	def getUserinfo(self):
474	"""returns userinfo object"""
475	logging.debug("getUserinfo")
476	userinfo = {}
477	# look for cached userinfo in session
478	if self.REQUEST.SESSION.has_key('userinfo'):
479	userinfo = self.REQUEST.SESSION['userinfo']
480	# check if its still current?
481	else:
482	# store in session
483	self.REQUEST.SESSION['userinfo'] = userinfo
484
485	return userinfo
486
487	def getDocinfo(self, mode, url, tocMode=None):
488	"""returns docinfo depending on mode"""
489	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
490	# look for cached docinfo in session
491	if self.REQUEST.SESSION.has_key('docinfo'):
492	docinfo = self.REQUEST.SESSION['docinfo']
493	# check if its still current
494	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
495	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
496	return docinfo
497
498	# new docinfo
499	docinfo = {'mode': mode, 'url': url}
500	# add self url
501	docinfo['viewerUrl'] = self.getDocumentViewerURL()
502	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
503	docinfo['digilibScalerUrl'] = self.digilibScalerUrl
504	docinfo['digilibViewerUrl'] = self.digilibViewerUrl
505	# get index.meta DOM
506	docUrl = None
507	metaDom = None
508	if mode=="texttool":
509	# url points to document dir or index.meta
510	metaDom = self.metadataService.getDomFromPathOrUrl(url)
511	if metaDom is None:
512	raise IOError("Unable to find index.meta for mode=texttool!")
513
514	docUrl = url.replace('/index.meta', '')
515	if url.startswith('/mpiwg/online/'):
516	docUrl = url.replace('/mpiwg/online/', '', 1)
517
518	elif mode=="imagepath":
519	# url points to folder with images, index.meta optional
520	# asssume index.meta in parent dir
521	docUrl = getParentPath(url)
522	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
523	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
524
525	elif mode=="filepath":
526	# url points to image file, index.meta optional
527	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url)
528	docinfo['numPages'] = 1
529	# asssume index.meta is two path segments up
530	docUrl = getParentPath(url, 2)
531	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
532
533	else:
534	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
535	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
536
537	docinfo['documentUrl'] = docUrl
538	# process index.meta contents
539	if metaDom is not None and metaDom.tag == 'resource':
540	# document directory name and path
541	resource = self.metadataService.getResourceData(dom=metaDom)
542	if resource:
543	docinfo = self.getDocinfoFromResource(docinfo, resource)
544
545	# texttool info
546	texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
547	if texttool:
548	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
549	# document info from full text server
550	if docinfo.get('textURLPath', None):
551	docinfo = self.getTextInfo(mode=None, docinfo=docinfo)
552	# include list of pages TODO: do we need this always?
553	docinfo = self.getTextInfo(mode='pages', docinfo=docinfo)
554
555	# bib info
556	bib = self.metadataService.getBibData(dom=metaDom)
557	if bib:
558	# save extended version as 'bibx' TODO: ugly
559	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
560	if len(bibx) == 1:
561	# unwrap list if possible
562	bibx = bibx[0]
563
564	docinfo['bibx'] = bibx
565	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
566	else:
567	# no bib - try info.xml
568	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
569
570	# auth info
571	access = self.metadataService.getAccessData(dom=metaDom)
572	if access:
573	docinfo = self.getDocinfoFromAccess(docinfo, access)
574
575	# attribution info
576	attribution = self.metadataService.getAttributionData(dom=metaDom)
577	if attribution:
578	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
579	docinfo['attribution'] = attribution
580
581	# copyright info
582	copyright = self.metadataService.getCopyrightData(dom=metaDom)
583	if copyright:
584	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
585	docinfo['copyright'] = copyright
586
587	# DRI (permanent ID)
588	dri = self.metadataService.getDRI(dom=metaDom, type='mpiwg')
589	if dri:
590	docinfo['DRI'] = dri
591
592	# (presentation) context
593	ctx = self.metadataService.getContextData(dom=metaDom, all=True)
594	if ctx:
595	logging.debug("getcontext: ctx=%s"%repr(ctx))
596	docinfo['presentationContext'] = ctx
597
598	# image path
599	if mode != 'texttool':
600	# override image path from texttool with url parameter TODO: how about mode=auto?
601	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
602
603	# check numPages
604	if docinfo.get('numPages', 0) == 0:
605	# number of images from digilib
606	if docinfo.get('imagePath', None):
607	imgpath = docinfo['imagePath'].replace('/mpiwg/online/', '', 1)
608	logging.debug("imgpath=%s"%imgpath)
609	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, imgpath)
610	docinfo = self.getDocinfoFromDigilib(docinfo, imgpath)
611	else:
612	# imagePath still missing? try "./pageimg"
613	imgPath = os.path.join(docUrl, 'pageimg')
614	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
615	if docinfo.get('numPages', 0) > 0:
616	# there are pages
617	docinfo['imagePath'] = imgPath
618	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, docinfo['imagePath'])
619
620	# check numPages
621	if docinfo.get('numPages', 0) == 0:
622	if docinfo.get('numTextPages', 0) > 0:
623	# replace with numTextPages (text-only?)
624	docinfo['numPages'] = docinfo['numTextPages']
625
626	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
627	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
628	# store in session
629	self.REQUEST.SESSION['docinfo'] = docinfo
630	return docinfo
631
632
633	def getDocinfoFromResource(self, docinfo, resource):
634	"""reads contents of resource element into docinfo"""
635	docName = resource.get('name', None)
636	docinfo['documentName'] = docName
637	docPath = resource.get('archive-path', None)
638	if docPath:
639	# clean up document path
640	if docPath[0] != '/':
641	docPath = '/' + docPath
642
643	if docName and (not docPath.endswith(docName)):
644	docPath += "/" + docName
645
646	else:
647	# use docUrl as docPath
648	docUrl = docinfo['documentURL']
649	if not docUrl.startswith('http:'):
650	docPath = docUrl
651	if docPath:
652	# fix URLs starting with /mpiwg/online
653	docPath = docPath.replace('/mpiwg/online', '', 1)
654
655	docinfo['documentPath'] = docPath
656	return docinfo
657
658	def getDocinfoFromTexttool(self, docinfo, texttool):
659	"""reads contents of texttool element into docinfo"""
660	logging.debug("texttool=%s"%repr(texttool))
661	# unpack list if necessary
662	if isinstance(texttool, list):
663	texttool = texttool[0]
664
665	# image dir
666	imageDir = getMDText(texttool.get('image', None))
667	docPath = getMDText(docinfo.get('documentPath', None))
668	if imageDir and docPath:
669	imageDir = os.path.join(docPath, imageDir)
670	imageDir = imageDir.replace('/mpiwg/online', '', 1)
671	docinfo['imagePath'] = imageDir
672
673	# old style text URL
674	textUrl = getMDText(texttool.get('text', None))
675	if textUrl and docPath:
676	if urlparse.urlparse(textUrl)[0] == "": #keine url
677	textUrl = os.path.join(docPath, textUrl)
678
679	docinfo['textURL'] = textUrl
680
681	# new style text-url-path (can be more than one with "repository" attribute)
682	textUrlNode = texttool.get('text-url-path', None)
683	if not isinstance(textUrlNode, list):
684	textUrlNode = [textUrlNode]
685
686	for tun in textUrlNode:
687	textUrl = getMDText(tun)
688	if textUrl:
689	textUrlAtts = tun.get('@attr')
690	if (textUrlAtts and 'repository' in textUrlAtts):
691	textRepo = textUrlAtts['repository']
692	# use matching repository
693	if self.getRepositoryType() == textRepo:
694	docinfo['textURLPath'] = textUrl
695	docinfo['textURLRepository'] = textRepo
696	break
697
698	else:
699	# no repo attribute - use always
700	docinfo['textURLPath'] = textUrl
701
702	# page flow
703	docinfo['pageFlow'] = getMDText(texttool.get('page-flow', 'ltr'))
704
705	# odd pages are left
706	docinfo['oddPage'] = getMDText(texttool.get('odd-scan-position', 'left'))
707
708	# number of title page (default 1)
709	docinfo['titlePage'] = getMDText(texttool.get('title-scan-no', 1))
710
711	# old presentation stuff
712	presentation = getMDText(texttool.get('presentation', None))
713	if presentation and docPath:
714	if presentation.startswith('http:'):
715	docinfo['presentationUrl'] = presentation
716	else:
717	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
718
719	return docinfo
720
721	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
722	"""reads contents of bib element into docinfo"""
723	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
724	# put all raw bib fields in dict "bib"
725	docinfo['bib'] = bib
726	bibtype = bib.get('@type', None)
727	docinfo['bibType'] = bibtype
728	# also store DC metadata for convenience
729	dc = self.metadataService.getDCMappedData(bib)
730	docinfo['creator'] = dc.get('creator','')
731	docinfo['title'] = dc.get('title','')
732	docinfo['date'] = dc.get('date','')
733	return docinfo
734
735	def getDocinfoFromAccess(self, docinfo, acc):
736	"""reads contents of access element into docinfo"""
737	#TODO: also read resource type
738	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
739	try:
740	acctype = acc['@attr']['type']
741	if acctype:
742	access=acctype
743	if access in ['group', 'institution']:
744	access = acc['name'].lower()
745
746	docinfo['accessType'] = access
747
748	except:
749	pass
750
751	return docinfo
752
753	def getDocinfoFromDigilib(self, docinfo, path):
754	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?fn="+path
755	# fetch data
756	txt = getHttpData(infoUrl)
757	if not txt:
758	logging.error("Unable to get dir-info from %s"%(infoUrl))
759	return docinfo
760
761	dom = ET.fromstring(txt)
762	dir = dom
763	# save size
764	size = dir.findtext('size')
765	logging.debug("getDocinfoFromDigilib: size=%s"%size)
766	if size:
767	docinfo['numPages'] = int(size)
768	else:
769	docinfo['numPages'] = 0
770	return docinfo
771
772	# save list of image names and numbers
773	imgNames = {}
774	for f in dir:
775	fn = f.findtext('name')
776	pn = f.findtext('index')
777	imgNames[fn] = getInt(pn)
778
779	docinfo['imgFileNames'] = imgNames
780	return docinfo
781
782
783	def getDocinfoFromPresentationInfoXml(self,docinfo):
784	"""gets DC-like bibliographical information from the presentation entry in texttools"""
785	url = docinfo.get('presentationUrl', None)
786	if not url:
787	logging.error("getDocinfoFromPresentation: no URL!")
788	return docinfo
789
790	dom = None
791	metaUrl = None
792	if url.startswith("http://"):
793	# real URL
794	metaUrl = url
795	else:
796	# online path
797	server=self.digilibBaseUrl+"/servlet/Texter?fn="
798	metaUrl=server+url
799
800	txt=getHttpData(metaUrl)
801	if txt is None:
802	logging.error("Unable to read info.xml from %s"%(url))
803	return docinfo
804
805	dom = ET.fromstring(txt)
806	docinfo['creator']=getText(dom.find(".//author"))
807	docinfo['title']=getText(dom.find(".//title"))
808	docinfo['date']=getText(dom.find(".//date"))
809	return docinfo
810
811
812	def getPageinfo(self, pn=None, pf=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
813	"""returns pageinfo with the given parameters"""
814	logging.debug("getPageInfo(pn=%s, pf=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(pn,pf,start,rows,cols,viewMode,viewLayer,tocMode))
815	pageinfo = {}
816	pageinfo['viewMode'] = viewMode
817	# split viewLayer if necessary
818	if isinstance(viewLayer,basestring):
819	viewLayer = viewLayer.split(',')
820
821	if isinstance(viewLayer, list):
822	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
823	# save (unique) list in viewLayers
824	seen = set()
825	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
826	pageinfo['viewLayers'] = viewLayers
827	# stringify viewLayer
828	viewLayer = ','.join(viewLayers)
829	else:
830	#create list
831	pageinfo['viewLayers'] = [viewLayer]
832
833	pageinfo['viewLayer'] = viewLayer
834	pageinfo['tocMode'] = tocMode
835
836	# pf takes precedence over pn
837	if pf:
838	pageinfo['pf'] = pf
839	pn = getPnForPf(docinfo, pf)
840	# replace pf in request params (used for creating new URLs)
841	self.REQUEST.form.pop('pf', None)
842	self.REQUEST.form['pn'] = pn
843	else:
844	pn = getInt(pn, 1)
845
846	pageinfo['pn'] = pn
847	rows = int(rows or self.thumbrows)
848	pageinfo['rows'] = rows
849	cols = int(cols or self.thumbcols)
850	pageinfo['cols'] = cols
851	grpsize = cols * rows
852	pageinfo['groupsize'] = grpsize
853	# is start is empty use one around pn
854	start = getInt(start, default=(math.ceil(float(pn)/float(grpsize))*grpsize-(grpsize-1)))
855	# int(current / grpsize) * grpsize +1))
856	pageinfo['start'] = start
857	# get number of pages
858	np = int(docinfo.get('numPages', 0))
859	if np == 0:
860	# try numTextPages
861	np = docinfo.get('numTextPages', 0)
862	if np != 0:
863	docinfo['numPages'] = np
864
865	# cache table of contents
866	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
867	pageinfo['numgroups'] = int(np / grpsize)
868	if np % grpsize > 0:
869	pageinfo['numgroups'] += 1
870
871	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
872	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
873	# add zeroth page for two columns
874	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
875	pageinfo['pageZero'] = pageZero
876	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
877	# more page parameters
878	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
879	if docinfo.get('pageNumbers'):
880	# get original page numbers
881	pageNumber = docinfo['pageNumbers'].get(pn, None)
882	if pageNumber is not None:
883	pageinfo['pageNumberOrig'] = pageNumber['no']
884	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
885
886	# cache search results
887	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
888	query = self.REQUEST.get('query',None)
889	pageinfo['query'] = query
890	if query:
891	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
892	pageinfo['queryType'] = queryType
893	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
894	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
895
896	# highlighting
897	highlightQuery = self.REQUEST.get('highlightQuery', None)
898	if highlightQuery:
899	pageinfo['highlightQuery'] = highlightQuery
900	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
901	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
902
903	return pageinfo
904
905
906	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
907	"""returns dict with array of page informations for one screenfull of thumbnails"""
908	batch = {}
909	grpsize = rows * cols
910	if maxIdx == 0:
911	maxIdx = start + grpsize
912
913	nb = int(math.ceil(maxIdx / float(grpsize)))
914	# list of all batch start and end points
915	batches = []
916	if pageZero:
917	ofs = 0
918	else:
919	ofs = 1
920
921	for i in range(nb):
922	s = i * grpsize + ofs
923	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
924	batches.append({'start':s, 'end':e})
925
926	batch['batches'] = batches
927
928	pages = []
929	if pageZero and start == 1:
930	# correct beginning
931	idx = 0
932	else:
933	idx = start
934
935	for r in range(rows):
936	row = []
937	for c in range(cols):
938	if idx < minIdx or idx > maxIdx:
939	page = {'idx':None}
940	else:
941	page = {'idx':idx}
942
943	idx += 1
944	if pageFlowLtr:
945	row.append(page)
946	else:
947	row.insert(0, page)
948
949	pages.append(row)
950
951	if start > 1:
952	batch['prevStart'] = max(start - grpsize, 1)
953	else:
954	batch['prevStart'] = None
955
956	if start + grpsize <= maxIdx:
957	batch['nextStart'] = start + grpsize
958	else:
959	batch['nextStart'] = None
960
961	batch['pages'] = pages
962	batch['first'] = minIdx
963	batch['last'] = maxIdx
964	return batch
965
966	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
967	"""returns dict with information for one screenfull of data."""
968	batch = {}
969	if end == 0:
970	end = start + size
971
972	nb = int(math.ceil(end / float(size)))
973	# list of all batch start and end points
974	batches = []
975	for i in range(nb):
976	s = i * size + 1
977	e = min((i + 1) * size, end)
978	batches.append({'start':s, 'end':e})
979
980	batch['batches'] = batches
981	# list of elements in this batch
982	this = []
983	j = 0
984	for i in range(start, min(start+size, end+1)):
985	if data:
986	if fullData:
987	d = data.get(i, None)
988	else:
989	d = data.get(j, None)
990	j += 1
991
992	else:
993	d = i+1
994
995	this.append(d)
996
997	batch['this'] = this
998	if start > 1:
999	batch['prevStart'] = max(start - size, 1)
1000	else:
1001	batch['prevStart'] = None
1002
1003	if start + size < end:
1004	batch['nextStart'] = start + size
1005	else:
1006	batch['nextStart'] = None
1007
1008	batch['first'] = start
1009	batch['last'] = end
1010	return batch
1011
1012
1013	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
1014	"""returns list of groups {name:, id:} on the annotation server for the user"""
1015	groups = []
1016	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
1017	data = getHttpData(url=groupsUrl, noExceptions=True)
1018	if data:
1019	res = json.loads(data)
1020	rows = res.get('rows', None)
1021	if rows is None:
1022	return groups
1023	for r in rows:
1024	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
1025
1026	return groups
1027
1028
1029	security.declareProtected('View management screens','changeDocumentViewerForm')
1030	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1031
1032	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1033	"""init document viewer"""
1034	self.title=title
1035	self.digilibBaseUrl = digilibBaseUrl
1036	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
1037	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
1038	self.thumbrows = thumbrows
1039	self.thumbcols = thumbcols
1040	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1041	try:
1042	# assume MetaDataFolder instance is called metadata
1043	self.metadataService = getattr(self, 'metadata')
1044	except Exception, e:
1045	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1046
1047	self.setAvailableLayers(availableLayers)
1048
1049	if RESPONSE is not None:
1050	RESPONSE.redirect('manage_main')
1051
1052	def manage_AddDocumentViewerForm(self):
1053	"""add the viewer form"""
1054	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1055	return pt()
1056
1057	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1058	"""add the viewer"""
1059	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1060	self._setObject(id,newObj)
1061
1062	if RESPONSE is not None:
1063	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: