Context Navigation

source: documentViewer/documentViewer.py @ 612:a79e4e4b3e37

Last change on this file since 612:a79e4e4b3e37 was 612:a79e4e4b3e37, checked in by dwinter, 11 years ago
json schnittsellen
File size: 45.5 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def getMDText(node):
37	"""returns the @text content from the MetaDataProvider metadata node"""
38	if isinstance(node, dict):
39	return node.get('@text', None)
40
41	return node
42
43	def getParentPath(path, cnt=1):
44	"""returns pathname shortened by cnt"""
45	# make sure path doesn't end with /
46	path = path.rstrip('/')
47	# split by /, shorten, and reassemble
48	return '/'.join(path.split('/')[0:-cnt])
49
50	def getPnForPf(docinfo, pf, default=0):
51	"""returns image number for image file name or default"""
52	if 'imgFileNames' in docinfo:
53	pn = docinfo['imgFileNames'].get(pf, None)
54	if pn is None:
55	# try to cut extension
56	xi = pf.rfind('.')
57	if xi > 0:
58	pf = pf[:xi]
59	# try again, else return 0
60	pn = docinfo['imgFileNames'].get(pf, default)
61	else:
62	# no extension
63	pn = default
64
65	return pn
66
67	return default
68
69	def getPfForPn(docinfo, pn, default=None):
70	"""returns image file name for image number or default"""
71	if 'imgFileIndexes' in docinfo:
72	pn = docinfo['imgFileIndexes'].get(pn, default)
73	return pn
74
75	return default
76
77
78	##
79	## documentViewer class
80	##
81	class documentViewer(Folder):
82	"""document viewer"""
83	meta_type="Document viewer"
84
85	security=ClassSecurityInfo()
86	manage_options=Folder.manage_options+(
87	{'label':'Configuration','action':'changeDocumentViewerForm'},
88	)
89
90	metadataService = None
91	"""MetaDataFolder instance"""
92
93
94	#
95	# templates and forms
96	#
97	# viewMode templates
98	viewer_text = PageTemplateFile('zpt/viewer/viewer_text', globals())
99	viewer_xml = PageTemplateFile('zpt/viewer/viewer_xml', globals())
100	viewer_image = PageTemplateFile('zpt/viewer/viewer_image', globals())
101	viewer_index = PageTemplateFile('zpt/viewer/viewer_index', globals())
102	viewer_thumbs = PageTemplateFile('zpt/viewer/viewer_thumbs', globals())
103	viewer_indexonly = PageTemplateFile('zpt/viewer/viewer_indexonly', globals())
104	# available layer types (annotator not default)
105	builtinLayers = {'text': ['dict','search','gis'],
106	'xml': None, 'image': None, 'index': ['extended']}
107	availableLayers = builtinLayers;
108	# layer templates
109	layer_text_dict = PageTemplateFile('zpt/viewer/layer_text_dict', globals())
110	layer_text_search = PageTemplateFile('zpt/viewer/layer_text_search', globals())
111	layer_text_annotator = PageTemplateFile('zpt/viewer/layer_text_annotator', globals())
112	layer_text_gis = PageTemplateFile('zpt/viewer/layer_text_gis', globals())
113	layer_text_pundit = PageTemplateFile('zpt/viewer/layer_text_pundit', globals())
114	layer_image_annotator = PageTemplateFile('zpt/viewer/layer_image_annotator', globals())
115	layer_image_search = PageTemplateFile('zpt/viewer/layer_image_search', globals())
116	layer_index_extended = PageTemplateFile('zpt/viewer/layer_index_extended', globals())
117	# toc templates
118	toc_thumbs = PageTemplateFile('zpt/viewer/toc_thumbs', globals())
119	toc_text = PageTemplateFile('zpt/viewer/toc_text', globals())
120	toc_figures = PageTemplateFile('zpt/viewer/toc_figures', globals())
121	toc_concordance = PageTemplateFile('zpt/viewer/toc_concordance', globals())
122	toc_notes = PageTemplateFile('zpt/viewer/toc_notes', globals())
123	toc_handwritten = PageTemplateFile('zpt/viewer/toc_handwritten', globals())
124	toc_none = PageTemplateFile('zpt/viewer/toc_none', globals())
125	# other templates
126	common_template = PageTemplateFile('zpt/viewer/common_template', globals())
127	info_xml = PageTemplateFile('zpt/viewer/info_xml', globals())
128	docuviewer_css = ImageFile('css/docuviewer.css',globals())
129	# make docuviewer_css refreshable for development
130	docuviewer_css.index_html = refreshingImageFileIndexHtml
131	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
132	# make docuviewer_ie_css refreshable for development
133	#docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
134	jquery_js = ImageFile('js/jquery.js',globals())
135
136
137	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
138	"""init document viewer"""
139	self.id=id
140	self.title=title
141	self.thumbcols = thumbcols
142	self.thumbrows = thumbrows
143	# authgroups is list of authorized groups (delimited by ,)
144	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
145	# create template folder so we can always use template.something
146
147	templateFolder = Folder('template')
148	self['template'] = templateFolder # Zope-2.12 style
149	#self._setObject('template',templateFolder) # old style
150	try:
151	import MpdlXmlTextServer
152	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
153	templateFolder['fulltextclient'] = textServer
154	#templateFolder._setObject('fulltextclient',textServer)
155	except Exception, e:
156	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
157
158	try:
159	from Products.zogiLib.zogiLib import zogiLib
160	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
161	templateFolder['zogilib'] = zogilib
162	#templateFolder._setObject('zogilib',zogilib)
163	except Exception, e:
164	logging.error("Unable to create zogiLib for 'zogilib': "+str(e))
165
166	try:
167	# assume MetaDataFolder instance is called metadata
168	self.metadataService = getattr(self, 'metadata')
169	except Exception, e:
170	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
171
172	if digilibBaseUrl is not None:
173	self.digilibBaseUrl = digilibBaseUrl
174	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
175	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
176
177
178	# proxy text server methods to fulltextclient
179	def getTextPage(self, **args):
180	"""returns full text content of page"""
181	return self.template.fulltextclient.getTextPage(**args)
182
183	def getSearchResults(self, **args):
184	"""loads list of search results and stores XML in docinfo"""
185	return self.template.fulltextclient.getSearchResults(**args)
186
187	def getResultsPage(self, **args):
188	"""returns one page of the search results"""
189	return self.template.fulltextclient.getResultsPage(**args)
190
191	def getTextInfo(self, **args):
192	"""returns document info from the text server"""
193	return self.template.fulltextclient.getTextInfo(**args)
194
195	def getToc(self, **args):
196	"""loads table of contents and stores XML in docinfo"""
197	return self.template.fulltextclient.getToc(**args)
198
199	def getTocPage(self, **args):
200	"""returns one page of the table of contents"""
201	return self.template.fulltextclient.getTocPage(**args)
202
203	def getRepositoryType(self, **args):
204	"""get repository type"""
205	return self.template.fulltextclient.getRepositoryType(**args)
206
207	def getTextDownloadUrl(self, **args):
208	"""get URL to download the full text"""
209	return self.template.fulltextclient.getTextDownloadUrl(**args)
210
211	def getPlacesOnPage(self, **args):
212	"""get list of gis places on one page"""
213	return self.template.fulltextclient.getPlacesOnPage(**args)
214
215	# Thumb list for CoolIris Plugin
216	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
217	security.declareProtected('View','thumbs_rss')
218	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
219	'''
220	view it
221	@param mode: defines how to access the document behind url
222	@param url: url which contains display information
223	@param viewMode: image: display images, text: display text, default is auto (try text, else image)
224
225	'''
226
227	if not hasattr(self, 'template'):
228	# this won't work
229	logging.error("template folder missing!")
230	return "ERROR: template folder missing!"
231
232	if not self.digilibBaseUrl:
233	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
234
235	docinfo = self.getDocinfo(mode=mode,url=url)
236	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
237	pageinfo = self.getPageinfo(start=start,pn=pn, docinfo=docinfo)
238	''' ZDES '''
239	pt = getattr(self.template, 'thumbs_main_rss')
240
241	if viewMode=="auto": # automodus gewaehlt
242	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
243	viewMode="text"
244	else:
245	viewMode="image"
246
247	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
248
249
250	security.declareProtected('View','index_html')
251	def index_html(self, url, mode="texttool", viewMode="auto", viewLayer=None, tocMode=None, start=None, pn=None, pf=None):
252	"""
253	show page
254	@param url: url which contains display information
255	@param mode: defines how to access the document behind url
256	@param viewMode: 'image': display images, 'text': display text, 'xml': display xml, default is 'auto'
257	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
258	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
259	"""
260
261	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s pf=%s"%(mode,url,viewMode,viewLayer,start,pn,pf))
262
263	if not hasattr(self, 'template'):
264	# this won't work
265	logging.error("template folder missing!")
266	return "ERROR: template folder missing!"
267
268	if not getattr(self, 'digilibBaseUrl', None):
269	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
270
271	# mode=filepath should not have toc-thumbs
272	if tocMode is None:
273	if mode == "filepath":
274	tocMode = "none"
275	else:
276	tocMode = "thumbs"
277
278	# docinfo: information about document (cached)
279	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
280
281	# userinfo: user settings (cached)
282	userinfo = self.getUserinfo()
283
284	# auto viewMode: text if there is a text else images
285	if viewMode=="auto":
286	if docinfo.get('textURLPath', None):
287	# docinfo.get('textURL', None) not implemented yet
288	viewMode = "text"
289	if viewLayer is None and 'viewLayer' not in userinfo:
290	# use layer dict as default
291	viewLayer = "dict"
292	else:
293	viewMode = "image"
294
295	elif viewMode == "text_dict":
296	# legacy fix
297	viewMode = "text"
298	viewLayer = "dict"
299
300	elif viewMode == 'images':
301	# legacy fix
302	viewMode = 'image'
303	self.REQUEST['viewMode'] = 'image'
304
305	# safe viewLayer in userinfo
306	userinfo['viewLayer'] = viewLayer
307
308	# pageinfo: information about page (not cached)
309	pageinfo = self.getPageinfo(start=start, pn=pn, pf=pf, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
310
311	# get template /template/viewer_$viewMode
312	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
313	if pt is None:
314	logging.error("No template for viewMode=%s!"%viewMode)
315	# TODO: error page?
316	return "No template for viewMode=%s!"%viewMode
317
318	# and execute with parameters
319	return pt(docinfo=docinfo, pageinfo=pageinfo)
320
321	def getAvailableLayers(self):
322	"""returns dict with list of available layers per viewMode"""
323	return self.availableLayers
324
325	def findDigilibUrl(self):
326	"""try to get the digilib URL from zogilib"""
327	url = self.template.zogilib.getDLBaseUrl()
328	return url
329
330	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
331	"""returns URL to digilib Scaler with params"""
332	url = None
333	if docinfo is not None:
334	url = docinfo.get('imageURL', None)
335
336	if url is None:
337	url = self.digilibScalerUrl
338	if fn is None and docinfo is not None:
339	fn = docinfo.get('imagePath','')
340
341	url += "fn=%s"%fn
342
343	if pn:
344	url += "&pn=%s"%pn
345
346	url += "&dw=%s&dh=%s"%(dw,dh)
347	return url
348
349	def getDocumentViewerURL(self):
350	"""returns the URL of this instance"""
351	return self.absolute_url()
352
353	def getStyle(self, idx, selected, style=""):
354	"""returns a string with the given style and append 'sel' if idx == selected."""
355	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
356	if idx == selected:
357	return style + 'sel'
358	else:
359	return style
360
361	def getParams(self, param=None, val=None, params=None, duplicates=None):
362	"""returns dict with URL parameters.
363
364	Takes URL parameters and additionally param=val or dict params.
365	Deletes key if value is None."""
366	# copy existing request params
367	newParams=self.REQUEST.form.copy()
368	# change single param
369	if param is not None:
370	if val is None:
371	if newParams.has_key(param):
372	del newParams[param]
373	else:
374	newParams[param] = str(val)
375
376	# change more params
377	if params is not None:
378	for (k, v) in params.items():
379	if v is None:
380	# val=None removes param
381	if newParams.has_key(k):
382	del newParams[k]
383
384	else:
385	newParams[k] = v
386
387	if duplicates:
388	# eliminate lists (coming from duplicate keys)
389	for (k,v) in newParams.items():
390	if isinstance(v, list):
391	if duplicates == 'comma':
392	# make comma-separated list of non-empty entries
393	newParams[k] = ','.join([t for t in v if t])
394	elif duplicates == 'first':
395	# take first non-empty entry
396	newParams[k] = [t for t in v if t][0]
397
398	return newParams
399
400	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
401	"""returns URL to documentviewer with parameter param set to val or from dict params"""
402	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
403	# quote values and assemble into query string (not escaping '/')
404	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
405	if baseUrl is None:
406	baseUrl = self.getDocumentViewerURL()
407
408	url = "%s?%s"%(baseUrl, ps)
409	return url
410
411	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
412	"""link to documentviewer with parameter param set to val"""
413	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
414
415
416	def setAvailableLayers(self, newLayerString=None):
417	"""sets availableLayers to newLayerString or tries to autodetect available layers.
418	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
419	newLayerString is parsed as JSON."""
420	if newLayerString is not None:
421	try:
422	layers = json.loads(newLayerString)
423	if 'text' in layers and 'image' in layers:
424	self.availableLayers = layers
425	return
426	except:
427	pass
428
429	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
430
431	# start with builtin layers
432	self.availableLayers = self.builtinLayers.copy()
433	# add layers from templates
434	for t in self.template:
435	if t.startswith('layer_'):
436	try:
437	(x, m, l) = t.split('_', 3)
438	if m not in self.availableLayers:
439	# mode m doesn't exist -> new list
440	self.availableLayers[m] = [l]
441
442	else:
443	# m exists -> append
444	if l not in self.availableLayers[m]:
445	self.availableLayers[m].append()
446
447	except:
448	pass
449
450	def getAvailableLayersJson(self):
451	"""returns available layers as JSON string."""
452	return json.dumps(self.availableLayers)
453
454
455	def getInfo_xml(self,url,mode):
456	"""returns info about the document as XML"""
457	if not self.digilibBaseUrl:
458	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
459
460	docinfo = self.getDocinfo(mode=mode,url=url)
461	pt = getattr(self.template, 'info_xml')
462	return pt(docinfo=docinfo)
463
464	def getAuthenticatedUser(self, anon=None):
465	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
466	user = getSecurityManager().getUser()
467	if user is not None and user.getUserName() != "Anonymous User":
468	return user
469	else:
470	return anon
471
472	def isAccessible(self, docinfo):
473	"""returns if access to the resource is granted"""
474	access = docinfo.get('accessType', None)
475	logging.debug("documentViewer (accessOK) access type %s"%access)
476	if access == 'free':
477	logging.debug("documentViewer (accessOK) access is free")
478	return True
479
480	elif access is None or access in self.authgroups:
481	# only local access -- only logged in users
482	user = self.getAuthenticatedUser()
483	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
484	return (user is not None)
485
486	logging.error("documentViewer (accessOK) unknown access type %s"%access)
487	return False
488
489	def getUserinfo(self):
490	"""returns userinfo object"""
491	logging.debug("getUserinfo")
492	userinfo = {}
493	# look for cached userinfo in session
494	if self.REQUEST.SESSION.has_key('userinfo'):
495	userinfo = self.REQUEST.SESSION['userinfo']
496	# check if its still current?
497	else:
498	# store in session
499	self.REQUEST.SESSION['userinfo'] = userinfo
500
501	return userinfo
502
503	def getDocinfoJSON(self, mode, url, tocMode=None):
504	"""returns docinfo depending on mode"""
505	import json
506
507	dc = self.getDocinfo( mode, url, tocMode)
508
509	return json.dumps(dc)
510
511
512	def getDocinfo(self, mode, url, tocMode=None):
513	"""returns docinfo depending on mode"""
514	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
515	# look for cached docinfo in session
516	if self.REQUEST.SESSION.has_key('docinfo'):
517	docinfo = self.REQUEST.SESSION['docinfo']
518	# check if its still current
519	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
520	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
521	return docinfo
522
523	# new docinfo
524	docinfo = {'mode': mode, 'url': url}
525	# add self url
526	docinfo['viewerUrl'] = self.getDocumentViewerURL()
527	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
528	docinfo['digilibScalerUrl'] = self.digilibScalerUrl
529	docinfo['digilibViewerUrl'] = self.digilibViewerUrl
530	# get index.meta DOM
531	docUrl = None
532	metaDom = None
533	if mode=="texttool":
534	# url points to document dir or index.meta
535	metaDom = self.metadataService.getDomFromPathOrUrl(url)
536	if metaDom is None:
537	raise IOError("Unable to find index.meta for mode=texttool!")
538
539	docUrl = url.replace('/index.meta', '')
540	if url.startswith('/mpiwg/online/'):
541	docUrl = url.replace('/mpiwg/online/', '', 1)
542
543	elif mode=="imagepath":
544	# url points to folder with images, index.meta optional
545	# asssume index.meta in parent dir
546	docUrl = getParentPath(url)
547	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
548	docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
549
550	elif mode=="filepath":
551	# url points to image file, index.meta optional
552	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url)
553	docinfo['numPages'] = 1
554	# asssume index.meta is two path segments up
555	docUrl = getParentPath(url, 2)
556	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
557
558	else:
559	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
560	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
561
562	docinfo['documentUrl'] = docUrl
563	# process index.meta contents
564	if metaDom is not None and metaDom.tag == 'resource':
565	# document directory name and path
566	resource = self.metadataService.getResourceData(dom=metaDom, recursive=1)
567	if resource:
568	docinfo = self.getDocinfoFromResource(docinfo, resource)
569
570	# texttool info
571	texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
572	if texttool:
573	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
574	# document info from full text server
575	if docinfo.get('textURLPath', None):
576	docinfo = self.getTextInfo(mode=None, docinfo=docinfo)
577	# include list of pages TODO: do we need this always?
578	docinfo = self.getTextInfo(mode='pages', docinfo=docinfo)
579
580	# bib info
581	bib = self.metadataService.getBibData(dom=metaDom)
582	if bib:
583	# save extended version as 'bibx' TODO: ugly
584	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
585	if len(bibx) == 1:
586	# unwrap list if possible
587	bibx = bibx[0]
588
589	docinfo['bibx'] = bibx
590	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
591	else:
592	# no bib - try info.xml
593	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
594
595	# auth info
596	access = self.metadataService.getAccessData(dom=metaDom)
597	if access:
598	docinfo = self.getDocinfoFromAccess(docinfo, access)
599
600	# attribution info
601	attribution = self.metadataService.getAttributionData(dom=metaDom)
602	if attribution:
603	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
604	docinfo['attribution'] = attribution
605
606	# copyright info
607	copyright = self.metadataService.getCopyrightData(dom=metaDom)
608	if copyright:
609	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
610	docinfo['copyright'] = copyright
611
612	# DRI (permanent ID)
613	dri = self.metadataService.getDRI(dom=metaDom, type='mpiwg')
614	if dri:
615	docinfo['DRI'] = dri
616
617	# (presentation) context
618	ctx = self.metadataService.getContextData(dom=metaDom, all=True)
619	if ctx:
620	logging.debug("getcontext: ctx=%s"%repr(ctx))
621	docinfo['presentationContext'] = ctx
622
623	# image path
624	if mode != 'texttool':
625	# override image path from texttool with url parameter TODO: how about mode=auto?
626	docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
627
628	# check numPages
629	if docinfo.get('numPages', 0) == 0:
630	# number of images from digilib
631	if docinfo.get('imagePath', None):
632	imgpath = docinfo['imagePath'].replace('/mpiwg/online', '', 1)
633	logging.debug("imgpath=%s"%imgpath)
634	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, imgpath)
635	docinfo = self.getDocinfoFromDigilib(docinfo, imgpath)
636	else:
637	# imagePath still missing? try "./pageimg"
638	imgPath = os.path.join(docUrl, 'pageimg')
639	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
640	if docinfo.get('numPages', 0) > 0:
641	# there are pages
642	docinfo['imagePath'] = imgPath
643	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, docinfo['imagePath'])
644
645	# check numPages
646	if docinfo.get('numPages', 0) == 0:
647	if docinfo.get('numTextPages', 0) > 0:
648	# replace with numTextPages (text-only?)
649	docinfo['numPages'] = docinfo['numTextPages']
650
651	# min and max page no
652	docinfo['minPageNo'] = docinfo.get('minPageNo', 1)
653	docinfo['maxPageNo'] = docinfo.get('maxPageNo', docinfo['numPages'])
654
655	# part-of information
656	partOfPath = docinfo.get('partOfPath', None)
657	if partOfPath is not None:
658	partOfDom = self.metadataService.getDomFromPathOrUrl(partOfPath)
659	if partOfDom is not None:
660	docinfo['partOfLabel'] = self.metadataService.getBibFormattedLabel(dom=partOfDom)
661	docinfo['partOfUrl'] = "%s?url=%s"%(self.getDocumentViewerURL(), partOfPath)
662	logging.debug("partOfLabel=%s partOfUrl=%s"%(docinfo['partOfLabel'],docinfo['partOfUrl']))
663
664	# normalize path
665	if 'imagePath' in docinfo and not docinfo['imagePath'].startswith('/'):
666	docinfo['imagePath'] = '/' + docinfo['imagePath']
667
668	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
669	# store in session
670	self.REQUEST.SESSION['docinfo'] = docinfo
671	return docinfo
672
673
674	def getDocinfoFromResource(self, docinfo, resource):
675	"""reads contents of resource element into docinfo"""
676	logging.debug("getDocinfoFromResource: resource=%s"%(repr(resource)))
677	docName = getMDText(resource.get('name', None))
678	docinfo['documentName'] = docName
679	docPath = getMDText(resource.get('archive-path', None))
680	if docPath:
681	# clean up document path
682	if docPath[0] != '/':
683	docPath = '/' + docPath
684
685	if docName and (not docPath.endswith(docName)):
686	docPath += "/" + docName
687
688	else:
689	# use docUrl as docPath
690	docUrl = docinfo['documentURL']
691	if not docUrl.startswith('http:'):
692	docPath = docUrl
693
694	if docPath:
695	# fix URLs starting with /mpiwg/online
696	docPath = docPath.replace('/mpiwg/online', '', 1)
697
698	docinfo['documentPath'] = docPath
699
700	# is this part-of?
701	partOf = resource.get('is-part-of', None)
702	if partOf is not None:
703	partOf = getMDText(partOf.get('archive-path', None))
704	if partOf is not None:
705	docinfo['partOfPath'] = partOf.strip()
706
707	return docinfo
708
709	def getDocinfoFromTexttool(self, docinfo, texttool):
710	"""reads contents of texttool element into docinfo"""
711	logging.debug("texttool=%s"%repr(texttool))
712	# unpack list if necessary
713	if isinstance(texttool, list):
714	texttool = texttool[0]
715
716	# image dir
717	imageDir = getMDText(texttool.get('image', None))
718	docPath = getMDText(docinfo.get('documentPath', None))
719	if imageDir:
720	if imageDir.startswith('/'):
721	# absolute path
722	imageDir = imageDir.replace('/mpiwg/online', '', 1)
723	docinfo['imagePath'] = imageDir
724
725	elif docPath:
726	# relative path
727	imageDir = os.path.join(docPath, imageDir)
728	imageDir = imageDir.replace('/mpiwg/online', '', 1)
729	docinfo['imagePath'] = imageDir
730
731	# start and end page (for subdocuments of other documents)
732	imgStartNo = getMDText(texttool.get('image-start-no', None))
733	minPageNo = getInt(imgStartNo, 1)
734	docinfo['minPageNo'] = minPageNo
735
736	imgEndNo = getMDText(texttool.get('image-end-no', None))
737	if imgEndNo:
738	docinfo['maxPageNo'] = getInt(imgEndNo)
739
740	# old style text URL
741	textUrl = getMDText(texttool.get('text', None))
742	if textUrl and docPath:
743	if urlparse.urlparse(textUrl)[0] == "": #keine url
744	textUrl = os.path.join(docPath, textUrl)
745
746	docinfo['textURL'] = textUrl
747
748	# new style text-url-path (can be more than one with "repository" attribute)
749	textUrlNode = texttool.get('text-url-path', None)
750	if not isinstance(textUrlNode, list):
751	textUrlNode = [textUrlNode]
752
753	for tun in textUrlNode:
754	textUrl = getMDText(tun)
755	if textUrl:
756	textUrlAtts = tun.get('@attr')
757	if (textUrlAtts and 'repository' in textUrlAtts):
758	textRepo = textUrlAtts['repository']
759	# use matching repository
760	if self.getRepositoryType() == textRepo:
761	docinfo['textURLPath'] = textUrl
762	docinfo['textURLRepository'] = textRepo
763	break
764
765	else:
766	# no repo attribute - use always
767	docinfo['textURLPath'] = textUrl
768
769	# page flow
770	docinfo['pageFlow'] = getMDText(texttool.get('page-flow', 'ltr'))
771
772	# odd pages are left
773	docinfo['oddPage'] = getMDText(texttool.get('odd-scan-position', 'left'))
774
775	# number of title page (default 1)
776	docinfo['titlePage'] = getMDText(texttool.get('title-scan-no', minPageNo))
777
778	# old presentation stuff
779	presentation = getMDText(texttool.get('presentation', None))
780	if presentation and docPath:
781	if presentation.startswith('http:'):
782	docinfo['presentationUrl'] = presentation
783	else:
784	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
785
786	# make sure we have at least fake DC data
787	if 'creator' not in docinfo:
788	docinfo['creator'] = '[no author found]'
789
790	if 'title' not in docinfo:
791	docinfo['title'] = '[no title found]'
792
793	if 'date' not in docinfo:
794	docinfo['date'] = '[no date found]'
795
796	return docinfo
797
798	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
799	"""reads contents of bib element into docinfo"""
800	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
801	# put all raw bib fields in dict "bib"
802	docinfo['bib'] = bib
803	bibtype = bib.get('@type', None)
804	docinfo['bibType'] = bibtype
805	# also store DC metadata for convenience
806	dc = self.metadataService.getDCMappedData(bib)
807	docinfo['creator'] = dc.get('creator','')
808	docinfo['title'] = dc.get('title','')
809	docinfo['date'] = dc.get('date','')
810	return docinfo
811
812	def getDocinfoFromAccess(self, docinfo, acc):
813	"""reads contents of access element into docinfo"""
814	#TODO: also read resource type
815	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
816	try:
817	acctype = acc['@attr']['type']
818	if acctype:
819	access=acctype
820	if access in ['group', 'institution']:
821	access = acc['name'].lower()
822
823	docinfo['accessType'] = access
824
825	except:
826	pass
827
828	return docinfo
829
830	def getDocinfoFromDigilib(self, docinfo, path):
831	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?fn="+path
832	# fetch data
833	txt = getHttpData(infoUrl)
834	if not txt:
835	logging.error("Unable to get dir-info from %s"%(infoUrl))
836	return docinfo
837
838	dom = ET.fromstring(txt)
839	dir = dom
840	# save size
841	size = dir.findtext('size')
842	logging.debug("getDocinfoFromDigilib: size=%s"%size)
843	if size:
844	docinfo['numPages'] = int(size)
845	else:
846	docinfo['numPages'] = 0
847	return docinfo
848
849	# save list of image names and numbers
850	imgNames = {}
851	imgIndexes = {}
852	for f in dir:
853	fn = f.findtext('name')
854	pn = getInt(f.findtext('index'))
855	imgNames[fn] = pn
856	imgIndexes[pn] = fn
857
858	docinfo['imgFileNames'] = imgNames
859	docinfo['imgFileIndexes'] = imgIndexes
860	return docinfo
861
862
863	def getDocinfoFromPresentationInfoXml(self,docinfo):
864	"""gets DC-like bibliographical information from the presentation entry in texttools"""
865	url = docinfo.get('presentationUrl', None)
866	if not url:
867	logging.error("getDocinfoFromPresentation: no URL!")
868	return docinfo
869
870	dom = None
871	metaUrl = None
872	if url.startswith("http://"):
873	# real URL
874	metaUrl = url
875	else:
876	# online path
877	server=self.digilibBaseUrl+"/servlet/Texter?fn="
878	metaUrl=server+url
879
880	txt=getHttpData(metaUrl)
881	if txt is None:
882	logging.error("Unable to read info.xml from %s"%(url))
883	return docinfo
884
885	dom = ET.fromstring(txt)
886	docinfo['creator']=getText(dom.find(".//author"))
887	docinfo['title']=getText(dom.find(".//title"))
888	docinfo['date']=getText(dom.find(".//date"))
889	return docinfo
890
891
892	def getPageinfo(self, pn=None, pf=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
893	"""returns pageinfo with the given parameters"""
894	logging.debug("getPageInfo(pn=%s, pf=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(pn,pf,start,rows,cols,viewMode,viewLayer,tocMode))
895	pageinfo = {}
896	pageinfo['viewMode'] = viewMode
897	# split viewLayer if necessary
898	if isinstance(viewLayer,basestring):
899	viewLayer = viewLayer.split(',')
900
901	if isinstance(viewLayer, list):
902	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
903	# save (unique) list in viewLayers
904	seen = set()
905	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
906	pageinfo['viewLayers'] = viewLayers
907	# stringify viewLayer
908	viewLayer = ','.join(viewLayers)
909	else:
910	#create list
911	pageinfo['viewLayers'] = [viewLayer]
912
913	pageinfo['viewLayer'] = viewLayer
914	pageinfo['tocMode'] = tocMode
915
916	minPageNo = docinfo.get('minPageNo', 1)
917
918	# pf takes precedence over pn
919	if pf:
920	pageinfo['pf'] = pf
921	pn = getPnForPf(docinfo, pf)
922	# replace pf in request params (used for creating new URLs)
923	self.REQUEST.form.pop('pf', None)
924	self.REQUEST.form['pn'] = pn
925	else:
926	pn = getInt(pn, minPageNo)
927	pf = getPfForPn(docinfo, pn)
928	pageinfo['pf'] = pf
929
930	pageinfo['pn'] = pn
931	rows = int(rows or self.thumbrows)
932	pageinfo['rows'] = rows
933	cols = int(cols or self.thumbcols)
934	pageinfo['cols'] = cols
935	grpsize = cols * rows
936	pageinfo['groupsize'] = grpsize
937	# if start is empty use one around pn
938	grouppn = math.ceil(float(pn)/float(grpsize))*grpsize-(grpsize-1)
939	# but not smaller than minPageNo
940	start = getInt(start, max(grouppn, minPageNo))
941	pageinfo['start'] = start
942	# get number of pages
943	numPages = int(docinfo.get('numPages', 0))
944	if numPages == 0:
945	# try numTextPages
946	numPages = docinfo.get('numTextPages', 0)
947	if numPages != 0:
948	docinfo['numPages'] = numPages
949
950	maxPageNo = docinfo.get('maxPageNo', numPages)
951	logging.debug("minPageNo=%s maxPageNo=%s start=%s numPages=%s"%(minPageNo,maxPageNo,start,numPages))
952	np = maxPageNo
953
954	# cache table of contents
955	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
956	pageinfo['numgroups'] = int(np / grpsize)
957	if np % grpsize > 0:
958	pageinfo['numgroups'] += 1
959
960	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
961	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
962	# add zeroth page for two columns
963	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
964	pageinfo['pageZero'] = pageZero
965	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=minPageNo, maxIdx=np)
966	# more page parameters
967	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
968	if docinfo.get('pageNumbers'):
969	# get original page numbers
970	pageNumber = docinfo['pageNumbers'].get(pn, None)
971	if pageNumber is not None:
972	pageinfo['pageNumberOrig'] = pageNumber['no']
973	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
974
975	# cache search results
976	query = self.REQUEST.get('query',None)
977	pageinfo['query'] = query
978	if query and viewMode == 'text':
979	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
980	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
981	pageinfo['queryType'] = queryType
982	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
983	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
984
985	# highlighting
986	highlightQuery = self.REQUEST.get('highlightQuery', None)
987	if highlightQuery:
988	pageinfo['highlightQuery'] = highlightQuery
989	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
990	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
991
992	return pageinfo
993
994
995	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
996	"""returns dict with array of page information for one screenfull of thumbnails"""
997	logging.debug("getPageBatch start=%s minIdx=%s maxIdx=%s"%(start,minIdx,maxIdx))
998	batch = {}
999	grpsize = rows * cols
1000	if maxIdx == 0:
1001	maxIdx = start + grpsize
1002
1003	np = maxIdx - minIdx + 1
1004	nb = int(math.ceil(np / float(grpsize)))
1005	# list of all batch start and end points
1006	batches = []
1007	if pageZero:
1008	ofs = minIdx - 1
1009	else:
1010	ofs = minIdx
1011
1012	for i in range(nb):
1013	s = i * grpsize + ofs
1014	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
1015	batches.append({'start':s, 'end':e})
1016
1017	batch['batches'] = batches
1018
1019	pages = []
1020	if pageZero and start == minIdx:
1021	# correct beginning
1022	idx = minIdx - 1
1023	else:
1024	idx = start
1025
1026	for r in range(rows):
1027	row = []
1028	for c in range(cols):
1029	if idx < minIdx or idx > maxIdx:
1030	page = {'idx':None}
1031	else:
1032	page = {'idx':idx}
1033
1034	idx += 1
1035	if pageFlowLtr:
1036	row.append(page)
1037	else:
1038	row.insert(0, page)
1039
1040	pages.append(row)
1041
1042	if start > minIdx:
1043	batch['prevStart'] = max(start - grpsize, minIdx)
1044	else:
1045	batch['prevStart'] = None
1046
1047	if start + grpsize <= maxIdx:
1048	batch['nextStart'] = start + grpsize
1049	else:
1050	batch['nextStart'] = None
1051
1052	batch['pages'] = pages
1053	batch['first'] = minIdx
1054	batch['last'] = maxIdx
1055	return batch
1056
1057	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
1058	"""returns dict with information for one screenfull of data."""
1059	batch = {}
1060	if end == 0:
1061	end = start + size
1062
1063	nb = int(math.ceil(end / float(size)))
1064	# list of all batch start and end points
1065	batches = []
1066	for i in range(nb):
1067	s = i * size + 1
1068	e = min((i + 1) * size, end)
1069	batches.append({'start':s, 'end':e})
1070
1071	batch['batches'] = batches
1072	# list of elements in this batch
1073	this = []
1074	j = 0
1075	for i in range(start, min(start+size, end+1)):
1076	if data:
1077	if fullData:
1078	d = data.get(i, None)
1079	else:
1080	d = data.get(j, None)
1081	j += 1
1082
1083	else:
1084	d = i+1
1085
1086	this.append(d)
1087
1088	batch['this'] = this
1089	if start > 1:
1090	batch['prevStart'] = max(start - size, 1)
1091	else:
1092	batch['prevStart'] = None
1093
1094	if start + size < end:
1095	batch['nextStart'] = start + size
1096	else:
1097	batch['nextStart'] = None
1098
1099	batch['first'] = start
1100	batch['last'] = end
1101	return batch
1102
1103
1104	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
1105	"""returns list of groups {name:, id:} on the annotation server for the user"""
1106	groups = []
1107	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
1108	data = getHttpData(url=groupsUrl, noExceptions=True)
1109	if data:
1110	res = json.loads(data)
1111	rows = res.get('rows', None)
1112	if rows is None:
1113	return groups
1114	for r in rows:
1115	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
1116
1117	return groups
1118
1119
1120	security.declareProtected('View management screens','changeDocumentViewerForm')
1121	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1122
1123	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1124	"""init document viewer"""
1125	self.title=title
1126	self.digilibBaseUrl = digilibBaseUrl
1127	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
1128	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
1129	self.thumbrows = thumbrows
1130	self.thumbcols = thumbcols
1131	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1132	try:
1133	# assume MetaDataFolder instance is called metadata
1134	self.metadataService = getattr(self, 'metadata')
1135	except Exception, e:
1136	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1137
1138	self.setAvailableLayers(availableLayers)
1139
1140	if RESPONSE is not None:
1141	RESPONSE.redirect('manage_main')
1142
1143	def manage_AddDocumentViewerForm(self):
1144	"""add the viewer form"""
1145	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1146	return pt()
1147
1148	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1149	"""add the viewer"""
1150	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1151	self._setObject(id,newObj)
1152
1153	if RESPONSE is not None:
1154	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: