Context Navigation

source: documentViewer/documentViewer.py @ 610:0488cd12355b

Last change on this file since 610:0488cd12355b was 610:0488cd12355b, checked in by casties, 12 years ago
gis mode works again.
File size: 45.2 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def getMDText(node):
37	"""returns the @text content from the MetaDataProvider metadata node"""
38	if isinstance(node, dict):
39	return node.get('@text', None)
40
41	return node
42
43	def getParentPath(path, cnt=1):
44	"""returns pathname shortened by cnt"""
45	# make sure path doesn't end with /
46	path = path.rstrip('/')
47	# split by /, shorten, and reassemble
48	return '/'.join(path.split('/')[0:-cnt])
49
50	def getPnForPf(docinfo, pf, default=0):
51	"""returns image number for image file name or default"""
52	if 'imgFileNames' in docinfo:
53	pn = docinfo['imgFileNames'].get(pf, None)
54	if pn is None:
55	# try to cut extension
56	xi = pf.rfind('.')
57	if xi > 0:
58	pf = pf[:xi]
59	# try again, else return 0
60	pn = docinfo['imgFileNames'].get(pf, default)
61	else:
62	# no extension
63	pn = default
64
65	return pn
66
67	return default
68
69	def getPfForPn(docinfo, pn, default=None):
70	"""returns image file name for image number or default"""
71	if 'imgFileIndexes' in docinfo:
72	pn = docinfo['imgFileIndexes'].get(pn, default)
73	return pn
74
75	return default
76
77
78	##
79	## documentViewer class
80	##
81	class documentViewer(Folder):
82	"""document viewer"""
83	meta_type="Document viewer"
84
85	security=ClassSecurityInfo()
86	manage_options=Folder.manage_options+(
87	{'label':'Configuration','action':'changeDocumentViewerForm'},
88	)
89
90	metadataService = None
91	"""MetaDataFolder instance"""
92
93
94	#
95	# templates and forms
96	#
97	# viewMode templates
98	viewer_text = PageTemplateFile('zpt/viewer/viewer_text', globals())
99	viewer_xml = PageTemplateFile('zpt/viewer/viewer_xml', globals())
100	viewer_image = PageTemplateFile('zpt/viewer/viewer_image', globals())
101	viewer_index = PageTemplateFile('zpt/viewer/viewer_index', globals())
102	viewer_thumbs = PageTemplateFile('zpt/viewer/viewer_thumbs', globals())
103	viewer_indexonly = PageTemplateFile('zpt/viewer/viewer_indexonly', globals())
104	# available layer types (annotator not default)
105	builtinLayers = {'text': ['dict','search','gis'],
106	'xml': None, 'image': None, 'index': ['extended']}
107	availableLayers = builtinLayers;
108	# layer templates
109	layer_text_dict = PageTemplateFile('zpt/viewer/layer_text_dict', globals())
110	layer_text_search = PageTemplateFile('zpt/viewer/layer_text_search', globals())
111	layer_text_annotator = PageTemplateFile('zpt/viewer/layer_text_annotator', globals())
112	layer_text_gis = PageTemplateFile('zpt/viewer/layer_text_gis', globals())
113	layer_text_pundit = PageTemplateFile('zpt/viewer/layer_text_pundit', globals())
114	layer_image_annotator = PageTemplateFile('zpt/viewer/layer_image_annotator', globals())
115	layer_image_search = PageTemplateFile('zpt/viewer/layer_image_search', globals())
116	layer_index_extended = PageTemplateFile('zpt/viewer/layer_index_extended', globals())
117	# toc templates
118	toc_thumbs = PageTemplateFile('zpt/viewer/toc_thumbs', globals())
119	toc_text = PageTemplateFile('zpt/viewer/toc_text', globals())
120	toc_figures = PageTemplateFile('zpt/viewer/toc_figures', globals())
121	toc_concordance = PageTemplateFile('zpt/viewer/toc_concordance', globals())
122	toc_notes = PageTemplateFile('zpt/viewer/toc_notes', globals())
123	toc_handwritten = PageTemplateFile('zpt/viewer/toc_handwritten', globals())
124	toc_none = PageTemplateFile('zpt/viewer/toc_none', globals())
125	# other templates
126	common_template = PageTemplateFile('zpt/viewer/common_template', globals())
127	info_xml = PageTemplateFile('zpt/viewer/info_xml', globals())
128	docuviewer_css = ImageFile('css/docuviewer.css',globals())
129	# make docuviewer_css refreshable for development
130	docuviewer_css.index_html = refreshingImageFileIndexHtml
131	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
132	# make docuviewer_ie_css refreshable for development
133	#docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
134	jquery_js = ImageFile('js/jquery.js',globals())
135
136
137	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
138	"""init document viewer"""
139	self.id=id
140	self.title=title
141	self.thumbcols = thumbcols
142	self.thumbrows = thumbrows
143	# authgroups is list of authorized groups (delimited by ,)
144	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
145	# create template folder so we can always use template.something
146
147	templateFolder = Folder('template')
148	self['template'] = templateFolder # Zope-2.12 style
149	#self._setObject('template',templateFolder) # old style
150	try:
151	import MpdlXmlTextServer
152	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
153	templateFolder['fulltextclient'] = textServer
154	#templateFolder._setObject('fulltextclient',textServer)
155	except Exception, e:
156	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
157
158	try:
159	from Products.zogiLib.zogiLib import zogiLib
160	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
161	templateFolder['zogilib'] = zogilib
162	#templateFolder._setObject('zogilib',zogilib)
163	except Exception, e:
164	logging.error("Unable to create zogiLib for 'zogilib': "+str(e))
165
166	try:
167	# assume MetaDataFolder instance is called metadata
168	self.metadataService = getattr(self, 'metadata')
169	except Exception, e:
170	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
171
172	if digilibBaseUrl is not None:
173	self.digilibBaseUrl = digilibBaseUrl
174	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
175	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
176
177
178	# proxy text server methods to fulltextclient
179	def getTextPage(self, **args):
180	"""returns full text content of page"""
181	return self.template.fulltextclient.getTextPage(**args)
182
183	def getSearchResults(self, **args):
184	"""loads list of search results and stores XML in docinfo"""
185	return self.template.fulltextclient.getSearchResults(**args)
186
187	def getResultsPage(self, **args):
188	"""returns one page of the search results"""
189	return self.template.fulltextclient.getResultsPage(**args)
190
191	def getTextInfo(self, **args):
192	"""returns document info from the text server"""
193	return self.template.fulltextclient.getTextInfo(**args)
194
195	def getToc(self, **args):
196	"""loads table of contents and stores XML in docinfo"""
197	return self.template.fulltextclient.getToc(**args)
198
199	def getTocPage(self, **args):
200	"""returns one page of the table of contents"""
201	return self.template.fulltextclient.getTocPage(**args)
202
203	def getRepositoryType(self, **args):
204	"""get repository type"""
205	return self.template.fulltextclient.getRepositoryType(**args)
206
207	def getTextDownloadUrl(self, **args):
208	"""get URL to download the full text"""
209	return self.template.fulltextclient.getTextDownloadUrl(**args)
210
211	def getPlacesOnPage(self, **args):
212	"""get list of gis places on one page"""
213	return self.template.fulltextclient.getPlacesOnPage(**args)
214
215	# Thumb list for CoolIris Plugin
216	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
217	security.declareProtected('View','thumbs_rss')
218	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
219	'''
220	view it
221	@param mode: defines how to access the document behind url
222	@param url: url which contains display information
223	@param viewMode: image: display images, text: display text, default is auto (try text, else image)
224
225	'''
226
227	if not hasattr(self, 'template'):
228	# this won't work
229	logging.error("template folder missing!")
230	return "ERROR: template folder missing!"
231
232	if not self.digilibBaseUrl:
233	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
234
235	docinfo = self.getDocinfo(mode=mode,url=url)
236	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
237	pageinfo = self.getPageinfo(start=start,pn=pn, docinfo=docinfo)
238	''' ZDES '''
239	pt = getattr(self.template, 'thumbs_main_rss')
240
241	if viewMode=="auto": # automodus gewaehlt
242	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
243	viewMode="text"
244	else:
245	viewMode="image"
246
247	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
248
249
250	security.declareProtected('View','index_html')
251	def index_html(self, url, mode="texttool", viewMode="auto", viewLayer=None, tocMode=None, start=None, pn=None, pf=None):
252	"""
253	show page
254	@param url: url which contains display information
255	@param mode: defines how to access the document behind url
256	@param viewMode: 'image': display images, 'text': display text, 'xml': display xml, default is 'auto'
257	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
258	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
259	"""
260
261	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s pf=%s"%(mode,url,viewMode,viewLayer,start,pn,pf))
262
263	if not hasattr(self, 'template'):
264	# this won't work
265	logging.error("template folder missing!")
266	return "ERROR: template folder missing!"
267
268	if not getattr(self, 'digilibBaseUrl', None):
269	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
270
271	# mode=filepath should not have toc-thumbs
272	if tocMode is None:
273	if mode == "filepath":
274	tocMode = "none"
275	else:
276	tocMode = "thumbs"
277
278	# docinfo: information about document (cached)
279	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
280
281	# userinfo: user settings (cached)
282	userinfo = self.getUserinfo()
283
284	# auto viewMode: text if there is a text else images
285	if viewMode=="auto":
286	if docinfo.get('textURLPath', None):
287	# docinfo.get('textURL', None) not implemented yet
288	viewMode = "text"
289	if viewLayer is None and 'viewLayer' not in userinfo:
290	# use layer dict as default
291	viewLayer = "dict"
292	else:
293	viewMode = "image"
294
295	elif viewMode == "text_dict":
296	# legacy fix
297	viewMode = "text"
298	viewLayer = "dict"
299
300	elif viewMode == 'images':
301	# legacy fix
302	viewMode = 'image'
303	self.REQUEST['viewMode'] = 'image'
304
305	# safe viewLayer in userinfo
306	userinfo['viewLayer'] = viewLayer
307
308	# pageinfo: information about page (not cached)
309	pageinfo = self.getPageinfo(start=start, pn=pn, pf=pf, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
310
311	# get template /template/viewer_$viewMode
312	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
313	if pt is None:
314	logging.error("No template for viewMode=%s!"%viewMode)
315	# TODO: error page?
316	return "No template for viewMode=%s!"%viewMode
317
318	# and execute with parameters
319	return pt(docinfo=docinfo, pageinfo=pageinfo)
320
321	def getAvailableLayers(self):
322	"""returns dict with list of available layers per viewMode"""
323	return self.availableLayers
324
325	def findDigilibUrl(self):
326	"""try to get the digilib URL from zogilib"""
327	url = self.template.zogilib.getDLBaseUrl()
328	return url
329
330	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
331	"""returns URL to digilib Scaler with params"""
332	url = None
333	if docinfo is not None:
334	url = docinfo.get('imageURL', None)
335
336	if url is None:
337	url = self.digilibScalerUrl
338	if fn is None and docinfo is not None:
339	fn = docinfo.get('imagePath','')
340
341	url += "fn=%s"%fn
342
343	if pn:
344	url += "&pn=%s"%pn
345
346	url += "&dw=%s&dh=%s"%(dw,dh)
347	return url
348
349	def getDocumentViewerURL(self):
350	"""returns the URL of this instance"""
351	return self.absolute_url()
352
353	def getStyle(self, idx, selected, style=""):
354	"""returns a string with the given style and append 'sel' if idx == selected."""
355	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
356	if idx == selected:
357	return style + 'sel'
358	else:
359	return style
360
361	def getParams(self, param=None, val=None, params=None, duplicates=None):
362	"""returns dict with URL parameters.
363
364	Takes URL parameters and additionally param=val or dict params.
365	Deletes key if value is None."""
366	# copy existing request params
367	newParams=self.REQUEST.form.copy()
368	# change single param
369	if param is not None:
370	if val is None:
371	if newParams.has_key(param):
372	del newParams[param]
373	else:
374	newParams[param] = str(val)
375
376	# change more params
377	if params is not None:
378	for (k, v) in params.items():
379	if v is None:
380	# val=None removes param
381	if newParams.has_key(k):
382	del newParams[k]
383
384	else:
385	newParams[k] = v
386
387	if duplicates:
388	# eliminate lists (coming from duplicate keys)
389	for (k,v) in newParams.items():
390	if isinstance(v, list):
391	if duplicates == 'comma':
392	# make comma-separated list of non-empty entries
393	newParams[k] = ','.join([t for t in v if t])
394	elif duplicates == 'first':
395	# take first non-empty entry
396	newParams[k] = [t for t in v if t][0]
397
398	return newParams
399
400	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
401	"""returns URL to documentviewer with parameter param set to val or from dict params"""
402	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
403	# quote values and assemble into query string (not escaping '/')
404	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
405	if baseUrl is None:
406	baseUrl = self.getDocumentViewerURL()
407
408	url = "%s?%s"%(baseUrl, ps)
409	return url
410
411	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
412	"""link to documentviewer with parameter param set to val"""
413	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
414
415
416	def setAvailableLayers(self, newLayerString=None):
417	"""sets availableLayers to newLayerString or tries to autodetect available layers.
418	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
419	newLayerString is parsed as JSON."""
420	if newLayerString is not None:
421	try:
422	layers = json.loads(newLayerString)
423	if 'text' in layers and 'image' in layers:
424	self.availableLayers = layers
425	return
426	except:
427	pass
428
429	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
430
431	# start with builtin layers
432	self.availableLayers = self.builtinLayers.copy()
433	# add layers from templates
434	for t in self.template:
435	if t.startswith('layer_'):
436	try:
437	(x, m, l) = t.split('_', 3)
438	if m not in self.availableLayers:
439	# mode m doesn't exist -> new list
440	self.availableLayers[m] = [l]
441
442	else:
443	# m exists -> append
444	if l not in self.availableLayers[m]:
445	self.availableLayers[m].append()
446
447	except:
448	pass
449
450	def getAvailableLayersJson(self):
451	"""returns available layers as JSON string."""
452	return json.dumps(self.availableLayers)
453
454
455	def getInfo_xml(self,url,mode):
456	"""returns info about the document as XML"""
457	if not self.digilibBaseUrl:
458	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
459
460	docinfo = self.getDocinfo(mode=mode,url=url)
461	pt = getattr(self.template, 'info_xml')
462	return pt(docinfo=docinfo)
463
464	def getAuthenticatedUser(self, anon=None):
465	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
466	user = getSecurityManager().getUser()
467	if user is not None and user.getUserName() != "Anonymous User":
468	return user
469	else:
470	return anon
471
472	def isAccessible(self, docinfo):
473	"""returns if access to the resource is granted"""
474	access = docinfo.get('accessType', None)
475	logging.debug("documentViewer (accessOK) access type %s"%access)
476	if access == 'free':
477	logging.debug("documentViewer (accessOK) access is free")
478	return True
479
480	elif access is None or access in self.authgroups:
481	# only local access -- only logged in users
482	user = self.getAuthenticatedUser()
483	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
484	return (user is not None)
485
486	logging.error("documentViewer (accessOK) unknown access type %s"%access)
487	return False
488
489	def getUserinfo(self):
490	"""returns userinfo object"""
491	logging.debug("getUserinfo")
492	userinfo = {}
493	# look for cached userinfo in session
494	if self.REQUEST.SESSION.has_key('userinfo'):
495	userinfo = self.REQUEST.SESSION['userinfo']
496	# check if its still current?
497	else:
498	# store in session
499	self.REQUEST.SESSION['userinfo'] = userinfo
500
501	return userinfo
502
503	def getDocinfo(self, mode, url, tocMode=None):
504	"""returns docinfo depending on mode"""
505	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
506	# look for cached docinfo in session
507	if self.REQUEST.SESSION.has_key('docinfo'):
508	docinfo = self.REQUEST.SESSION['docinfo']
509	# check if its still current
510	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
511	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
512	return docinfo
513
514	# new docinfo
515	docinfo = {'mode': mode, 'url': url}
516	# add self url
517	docinfo['viewerUrl'] = self.getDocumentViewerURL()
518	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
519	docinfo['digilibScalerUrl'] = self.digilibScalerUrl
520	docinfo['digilibViewerUrl'] = self.digilibViewerUrl
521	# get index.meta DOM
522	docUrl = None
523	metaDom = None
524	if mode=="texttool":
525	# url points to document dir or index.meta
526	metaDom = self.metadataService.getDomFromPathOrUrl(url)
527	if metaDom is None:
528	raise IOError("Unable to find index.meta for mode=texttool!")
529
530	docUrl = url.replace('/index.meta', '')
531	if url.startswith('/mpiwg/online/'):
532	docUrl = url.replace('/mpiwg/online/', '', 1)
533
534	elif mode=="imagepath":
535	# url points to folder with images, index.meta optional
536	# asssume index.meta in parent dir
537	docUrl = getParentPath(url)
538	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
539	docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
540
541	elif mode=="filepath":
542	# url points to image file, index.meta optional
543	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url)
544	docinfo['numPages'] = 1
545	# asssume index.meta is two path segments up
546	docUrl = getParentPath(url, 2)
547	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
548
549	else:
550	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
551	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
552
553	docinfo['documentUrl'] = docUrl
554	# process index.meta contents
555	if metaDom is not None and metaDom.tag == 'resource':
556	# document directory name and path
557	resource = self.metadataService.getResourceData(dom=metaDom, recursive=1)
558	if resource:
559	docinfo = self.getDocinfoFromResource(docinfo, resource)
560
561	# texttool info
562	texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
563	if texttool:
564	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
565	# document info from full text server
566	if docinfo.get('textURLPath', None):
567	docinfo = self.getTextInfo(mode=None, docinfo=docinfo)
568	# include list of pages TODO: do we need this always?
569	docinfo = self.getTextInfo(mode='pages', docinfo=docinfo)
570
571	# bib info
572	bib = self.metadataService.getBibData(dom=metaDom)
573	if bib:
574	# save extended version as 'bibx' TODO: ugly
575	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
576	if len(bibx) == 1:
577	# unwrap list if possible
578	bibx = bibx[0]
579
580	docinfo['bibx'] = bibx
581	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
582	else:
583	# no bib - try info.xml
584	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
585
586	# auth info
587	access = self.metadataService.getAccessData(dom=metaDom)
588	if access:
589	docinfo = self.getDocinfoFromAccess(docinfo, access)
590
591	# attribution info
592	attribution = self.metadataService.getAttributionData(dom=metaDom)
593	if attribution:
594	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
595	docinfo['attribution'] = attribution
596
597	# copyright info
598	copyright = self.metadataService.getCopyrightData(dom=metaDom)
599	if copyright:
600	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
601	docinfo['copyright'] = copyright
602
603	# DRI (permanent ID)
604	dri = self.metadataService.getDRI(dom=metaDom, type='mpiwg')
605	if dri:
606	docinfo['DRI'] = dri
607
608	# (presentation) context
609	ctx = self.metadataService.getContextData(dom=metaDom, all=True)
610	if ctx:
611	logging.debug("getcontext: ctx=%s"%repr(ctx))
612	docinfo['presentationContext'] = ctx
613
614	# image path
615	if mode != 'texttool':
616	# override image path from texttool with url parameter TODO: how about mode=auto?
617	docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
618
619	# check numPages
620	if docinfo.get('numPages', 0) == 0:
621	# number of images from digilib
622	if docinfo.get('imagePath', None):
623	imgpath = docinfo['imagePath'].replace('/mpiwg/online', '', 1)
624	logging.debug("imgpath=%s"%imgpath)
625	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, imgpath)
626	docinfo = self.getDocinfoFromDigilib(docinfo, imgpath)
627	else:
628	# imagePath still missing? try "./pageimg"
629	imgPath = os.path.join(docUrl, 'pageimg')
630	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
631	if docinfo.get('numPages', 0) > 0:
632	# there are pages
633	docinfo['imagePath'] = imgPath
634	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, docinfo['imagePath'])
635
636	# check numPages
637	if docinfo.get('numPages', 0) == 0:
638	if docinfo.get('numTextPages', 0) > 0:
639	# replace with numTextPages (text-only?)
640	docinfo['numPages'] = docinfo['numTextPages']
641
642	# min and max page no
643	docinfo['minPageNo'] = docinfo.get('minPageNo', 1)
644	docinfo['maxPageNo'] = docinfo.get('maxPageNo', docinfo['numPages'])
645
646	# part-of information
647	partOfPath = docinfo.get('partOfPath', None)
648	if partOfPath is not None:
649	partOfDom = self.metadataService.getDomFromPathOrUrl(partOfPath)
650	if partOfDom is not None:
651	docinfo['partOfLabel'] = self.metadataService.getBibFormattedLabel(dom=partOfDom)
652	docinfo['partOfUrl'] = "%s?url=%s"%(self.getDocumentViewerURL(), partOfPath)
653	logging.debug("partOfLabel=%s partOfUrl=%s"%(docinfo['partOfLabel'],docinfo['partOfUrl']))
654
655	# normalize path
656	if 'imagePath' in docinfo and not docinfo['imagePath'].startswith('/'):
657	docinfo['imagePath'] = '/' + docinfo['imagePath']
658
659	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
660	# store in session
661	self.REQUEST.SESSION['docinfo'] = docinfo
662	return docinfo
663
664
665	def getDocinfoFromResource(self, docinfo, resource):
666	"""reads contents of resource element into docinfo"""
667	logging.debug("getDocinfoFromResource: resource=%s"%(repr(resource)))
668	docName = getMDText(resource.get('name', None))
669	docinfo['documentName'] = docName
670	docPath = getMDText(resource.get('archive-path', None))
671	if docPath:
672	# clean up document path
673	if docPath[0] != '/':
674	docPath = '/' + docPath
675
676	if docName and (not docPath.endswith(docName)):
677	docPath += "/" + docName
678
679	else:
680	# use docUrl as docPath
681	docUrl = docinfo['documentURL']
682	if not docUrl.startswith('http:'):
683	docPath = docUrl
684
685	if docPath:
686	# fix URLs starting with /mpiwg/online
687	docPath = docPath.replace('/mpiwg/online', '', 1)
688
689	docinfo['documentPath'] = docPath
690
691	# is this part-of?
692	partOf = resource.get('is-part-of', None)
693	if partOf is not None:
694	partOf = getMDText(partOf.get('archive-path', None))
695	if partOf is not None:
696	docinfo['partOfPath'] = partOf.strip()
697
698	return docinfo
699
700	def getDocinfoFromTexttool(self, docinfo, texttool):
701	"""reads contents of texttool element into docinfo"""
702	logging.debug("texttool=%s"%repr(texttool))
703	# unpack list if necessary
704	if isinstance(texttool, list):
705	texttool = texttool[0]
706
707	# image dir
708	imageDir = getMDText(texttool.get('image', None))
709	docPath = getMDText(docinfo.get('documentPath', None))
710	if imageDir:
711	if imageDir.startswith('/'):
712	# absolute path
713	imageDir = imageDir.replace('/mpiwg/online', '', 1)
714	docinfo['imagePath'] = imageDir
715
716	elif docPath:
717	# relative path
718	imageDir = os.path.join(docPath, imageDir)
719	imageDir = imageDir.replace('/mpiwg/online', '', 1)
720	docinfo['imagePath'] = imageDir
721
722	# start and end page (for subdocuments of other documents)
723	imgStartNo = getMDText(texttool.get('image-start-no', None))
724	minPageNo = getInt(imgStartNo, 1)
725	docinfo['minPageNo'] = minPageNo
726
727	imgEndNo = getMDText(texttool.get('image-end-no', None))
728	if imgEndNo:
729	docinfo['maxPageNo'] = getInt(imgEndNo)
730
731	# old style text URL
732	textUrl = getMDText(texttool.get('text', None))
733	if textUrl and docPath:
734	if urlparse.urlparse(textUrl)[0] == "": #keine url
735	textUrl = os.path.join(docPath, textUrl)
736
737	docinfo['textURL'] = textUrl
738
739	# new style text-url-path (can be more than one with "repository" attribute)
740	textUrlNode = texttool.get('text-url-path', None)
741	if not isinstance(textUrlNode, list):
742	textUrlNode = [textUrlNode]
743
744	for tun in textUrlNode:
745	textUrl = getMDText(tun)
746	if textUrl:
747	textUrlAtts = tun.get('@attr')
748	if (textUrlAtts and 'repository' in textUrlAtts):
749	textRepo = textUrlAtts['repository']
750	# use matching repository
751	if self.getRepositoryType() == textRepo:
752	docinfo['textURLPath'] = textUrl
753	docinfo['textURLRepository'] = textRepo
754	break
755
756	else:
757	# no repo attribute - use always
758	docinfo['textURLPath'] = textUrl
759
760	# page flow
761	docinfo['pageFlow'] = getMDText(texttool.get('page-flow', 'ltr'))
762
763	# odd pages are left
764	docinfo['oddPage'] = getMDText(texttool.get('odd-scan-position', 'left'))
765
766	# number of title page (default 1)
767	docinfo['titlePage'] = getMDText(texttool.get('title-scan-no', minPageNo))
768
769	# old presentation stuff
770	presentation = getMDText(texttool.get('presentation', None))
771	if presentation and docPath:
772	if presentation.startswith('http:'):
773	docinfo['presentationUrl'] = presentation
774	else:
775	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
776
777	# make sure we have at least fake DC data
778	if 'creator' not in docinfo:
779	docinfo['creator'] = '[no author found]'
780
781	if 'title' not in docinfo:
782	docinfo['title'] = '[no title found]'
783
784	if 'date' not in docinfo:
785	docinfo['date'] = '[no date found]'
786
787	return docinfo
788
789	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
790	"""reads contents of bib element into docinfo"""
791	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
792	# put all raw bib fields in dict "bib"
793	docinfo['bib'] = bib
794	bibtype = bib.get('@type', None)
795	docinfo['bibType'] = bibtype
796	# also store DC metadata for convenience
797	dc = self.metadataService.getDCMappedData(bib)
798	docinfo['creator'] = dc.get('creator','')
799	docinfo['title'] = dc.get('title','')
800	docinfo['date'] = dc.get('date','')
801	return docinfo
802
803	def getDocinfoFromAccess(self, docinfo, acc):
804	"""reads contents of access element into docinfo"""
805	#TODO: also read resource type
806	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
807	try:
808	acctype = acc['@attr']['type']
809	if acctype:
810	access=acctype
811	if access in ['group', 'institution']:
812	access = acc['name'].lower()
813
814	docinfo['accessType'] = access
815
816	except:
817	pass
818
819	return docinfo
820
821	def getDocinfoFromDigilib(self, docinfo, path):
822	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?fn="+path
823	# fetch data
824	txt = getHttpData(infoUrl)
825	if not txt:
826	logging.error("Unable to get dir-info from %s"%(infoUrl))
827	return docinfo
828
829	dom = ET.fromstring(txt)
830	dir = dom
831	# save size
832	size = dir.findtext('size')
833	logging.debug("getDocinfoFromDigilib: size=%s"%size)
834	if size:
835	docinfo['numPages'] = int(size)
836	else:
837	docinfo['numPages'] = 0
838	return docinfo
839
840	# save list of image names and numbers
841	imgNames = {}
842	imgIndexes = {}
843	for f in dir:
844	fn = f.findtext('name')
845	pn = getInt(f.findtext('index'))
846	imgNames[fn] = pn
847	imgIndexes[pn] = fn
848
849	docinfo['imgFileNames'] = imgNames
850	docinfo['imgFileIndexes'] = imgIndexes
851	return docinfo
852
853
854	def getDocinfoFromPresentationInfoXml(self,docinfo):
855	"""gets DC-like bibliographical information from the presentation entry in texttools"""
856	url = docinfo.get('presentationUrl', None)
857	if not url:
858	logging.error("getDocinfoFromPresentation: no URL!")
859	return docinfo
860
861	dom = None
862	metaUrl = None
863	if url.startswith("http://"):
864	# real URL
865	metaUrl = url
866	else:
867	# online path
868	server=self.digilibBaseUrl+"/servlet/Texter?fn="
869	metaUrl=server+url
870
871	txt=getHttpData(metaUrl)
872	if txt is None:
873	logging.error("Unable to read info.xml from %s"%(url))
874	return docinfo
875
876	dom = ET.fromstring(txt)
877	docinfo['creator']=getText(dom.find(".//author"))
878	docinfo['title']=getText(dom.find(".//title"))
879	docinfo['date']=getText(dom.find(".//date"))
880	return docinfo
881
882
883	def getPageinfo(self, pn=None, pf=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
884	"""returns pageinfo with the given parameters"""
885	logging.debug("getPageInfo(pn=%s, pf=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(pn,pf,start,rows,cols,viewMode,viewLayer,tocMode))
886	pageinfo = {}
887	pageinfo['viewMode'] = viewMode
888	# split viewLayer if necessary
889	if isinstance(viewLayer,basestring):
890	viewLayer = viewLayer.split(',')
891
892	if isinstance(viewLayer, list):
893	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
894	# save (unique) list in viewLayers
895	seen = set()
896	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
897	pageinfo['viewLayers'] = viewLayers
898	# stringify viewLayer
899	viewLayer = ','.join(viewLayers)
900	else:
901	#create list
902	pageinfo['viewLayers'] = [viewLayer]
903
904	pageinfo['viewLayer'] = viewLayer
905	pageinfo['tocMode'] = tocMode
906
907	minPageNo = docinfo.get('minPageNo', 1)
908
909	# pf takes precedence over pn
910	if pf:
911	pageinfo['pf'] = pf
912	pn = getPnForPf(docinfo, pf)
913	# replace pf in request params (used for creating new URLs)
914	self.REQUEST.form.pop('pf', None)
915	self.REQUEST.form['pn'] = pn
916	else:
917	pn = getInt(pn, minPageNo)
918	pf = getPfForPn(docinfo, pn)
919	pageinfo['pf'] = pf
920
921	pageinfo['pn'] = pn
922	rows = int(rows or self.thumbrows)
923	pageinfo['rows'] = rows
924	cols = int(cols or self.thumbcols)
925	pageinfo['cols'] = cols
926	grpsize = cols * rows
927	pageinfo['groupsize'] = grpsize
928	# if start is empty use one around pn
929	grouppn = math.ceil(float(pn)/float(grpsize))*grpsize-(grpsize-1)
930	# but not smaller than minPageNo
931	start = getInt(start, max(grouppn, minPageNo))
932	pageinfo['start'] = start
933	# get number of pages
934	numPages = int(docinfo.get('numPages', 0))
935	if numPages == 0:
936	# try numTextPages
937	numPages = docinfo.get('numTextPages', 0)
938	if numPages != 0:
939	docinfo['numPages'] = numPages
940
941	maxPageNo = docinfo.get('maxPageNo', numPages)
942	logging.debug("minPageNo=%s maxPageNo=%s start=%s numPages=%s"%(minPageNo,maxPageNo,start,numPages))
943	np = maxPageNo
944
945	# cache table of contents
946	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
947	pageinfo['numgroups'] = int(np / grpsize)
948	if np % grpsize > 0:
949	pageinfo['numgroups'] += 1
950
951	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
952	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
953	# add zeroth page for two columns
954	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
955	pageinfo['pageZero'] = pageZero
956	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=minPageNo, maxIdx=np)
957	# more page parameters
958	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
959	if docinfo.get('pageNumbers'):
960	# get original page numbers
961	pageNumber = docinfo['pageNumbers'].get(pn, None)
962	if pageNumber is not None:
963	pageinfo['pageNumberOrig'] = pageNumber['no']
964	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
965
966	# cache search results
967	query = self.REQUEST.get('query',None)
968	pageinfo['query'] = query
969	if query and viewMode == 'text':
970	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
971	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
972	pageinfo['queryType'] = queryType
973	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
974	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
975
976	# highlighting
977	highlightQuery = self.REQUEST.get('highlightQuery', None)
978	if highlightQuery:
979	pageinfo['highlightQuery'] = highlightQuery
980	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
981	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
982
983	return pageinfo
984
985
986	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
987	"""returns dict with array of page information for one screenfull of thumbnails"""
988	logging.debug("getPageBatch start=%s minIdx=%s maxIdx=%s"%(start,minIdx,maxIdx))
989	batch = {}
990	grpsize = rows * cols
991	if maxIdx == 0:
992	maxIdx = start + grpsize
993
994	np = maxIdx - minIdx + 1
995	nb = int(math.ceil(np / float(grpsize)))
996	# list of all batch start and end points
997	batches = []
998	if pageZero:
999	ofs = minIdx - 1
1000	else:
1001	ofs = minIdx
1002
1003	for i in range(nb):
1004	s = i * grpsize + ofs
1005	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
1006	batches.append({'start':s, 'end':e})
1007
1008	batch['batches'] = batches
1009
1010	pages = []
1011	if pageZero and start == minIdx:
1012	# correct beginning
1013	idx = minIdx - 1
1014	else:
1015	idx = start
1016
1017	for r in range(rows):
1018	row = []
1019	for c in range(cols):
1020	if idx < minIdx or idx > maxIdx:
1021	page = {'idx':None}
1022	else:
1023	page = {'idx':idx}
1024
1025	idx += 1
1026	if pageFlowLtr:
1027	row.append(page)
1028	else:
1029	row.insert(0, page)
1030
1031	pages.append(row)
1032
1033	if start > minIdx:
1034	batch['prevStart'] = max(start - grpsize, minIdx)
1035	else:
1036	batch['prevStart'] = None
1037
1038	if start + grpsize <= maxIdx:
1039	batch['nextStart'] = start + grpsize
1040	else:
1041	batch['nextStart'] = None
1042
1043	batch['pages'] = pages
1044	batch['first'] = minIdx
1045	batch['last'] = maxIdx
1046	return batch
1047
1048	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
1049	"""returns dict with information for one screenfull of data."""
1050	batch = {}
1051	if end == 0:
1052	end = start + size
1053
1054	nb = int(math.ceil(end / float(size)))
1055	# list of all batch start and end points
1056	batches = []
1057	for i in range(nb):
1058	s = i * size + 1
1059	e = min((i + 1) * size, end)
1060	batches.append({'start':s, 'end':e})
1061
1062	batch['batches'] = batches
1063	# list of elements in this batch
1064	this = []
1065	j = 0
1066	for i in range(start, min(start+size, end+1)):
1067	if data:
1068	if fullData:
1069	d = data.get(i, None)
1070	else:
1071	d = data.get(j, None)
1072	j += 1
1073
1074	else:
1075	d = i+1
1076
1077	this.append(d)
1078
1079	batch['this'] = this
1080	if start > 1:
1081	batch['prevStart'] = max(start - size, 1)
1082	else:
1083	batch['prevStart'] = None
1084
1085	if start + size < end:
1086	batch['nextStart'] = start + size
1087	else:
1088	batch['nextStart'] = None
1089
1090	batch['first'] = start
1091	batch['last'] = end
1092	return batch
1093
1094
1095	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
1096	"""returns list of groups {name:, id:} on the annotation server for the user"""
1097	groups = []
1098	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
1099	data = getHttpData(url=groupsUrl, noExceptions=True)
1100	if data:
1101	res = json.loads(data)
1102	rows = res.get('rows', None)
1103	if rows is None:
1104	return groups
1105	for r in rows:
1106	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
1107
1108	return groups
1109
1110
1111	security.declareProtected('View management screens','changeDocumentViewerForm')
1112	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1113
1114	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1115	"""init document viewer"""
1116	self.title=title
1117	self.digilibBaseUrl = digilibBaseUrl
1118	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
1119	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
1120	self.thumbrows = thumbrows
1121	self.thumbcols = thumbcols
1122	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1123	try:
1124	# assume MetaDataFolder instance is called metadata
1125	self.metadataService = getattr(self, 'metadata')
1126	except Exception, e:
1127	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1128
1129	self.setAvailableLayers(availableLayers)
1130
1131	if RESPONSE is not None:
1132	RESPONSE.redirect('manage_main')
1133
1134	def manage_AddDocumentViewerForm(self):
1135	"""add the viewer form"""
1136	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1137	return pt()
1138
1139	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1140	"""add the viewer"""
1141	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1142	self._setObject(id,newObj)
1143
1144	if RESPONSE is not None:
1145	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: