Context Navigation

source: documentViewer/documentViewer.py @ 590:ed4485d2748e

Last change on this file since 590:ed4485d2748e was 590:ed4485d2748e, checked in by casties, 11 years ago
viewMode "images" changed to "image".
File size: 41.9 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def getMDText(node):
37	"""returns the @text content from the MetaDataProvider metadata node"""
38	if isinstance(node, dict):
39	return node.get('@text', None)
40
41	return node
42
43	def getParentPath(path, cnt=1):
44	"""returns pathname shortened by cnt"""
45	# make sure path doesn't end with /
46	path = path.rstrip('/')
47	# split by /, shorten, and reassemble
48	return '/'.join(path.split('/')[0:-cnt])
49
50	def getPnForPf(docinfo, pf, default=0):
51	"""returns image number for image file name or default"""
52	if 'imgFileNames' in docinfo:
53	pn = docinfo['imgFileNames'].get(pf, None)
54	if pn is None:
55	# try to cut extension
56	xi = pf.rfind('.')
57	if xi > 0:
58	pf = pf[:xi]
59	# try again, else return 0
60	pn = docinfo['imgFileNames'].get(pf, default)
61	else:
62	# no extension
63	pn = default
64
65	return pn
66
67	return default
68
69
70	##
71	## documentViewer class
72	##
73	class documentViewer(Folder):
74	"""document viewer"""
75	meta_type="Document viewer"
76
77	security=ClassSecurityInfo()
78	manage_options=Folder.manage_options+(
79	{'label':'Configuration','action':'changeDocumentViewerForm'},
80	)
81
82	metadataService = None
83	"""MetaDataFolder instance"""
84
85
86	#
87	# templates and forms
88	#
89	# viewMode templates
90	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
91	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
92	viewer_image = PageTemplateFile('zpt/viewer_image', globals())
93	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
94	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
95	viewer_indexonly = PageTemplateFile('zpt/viewer_indexonly', globals())
96	# available layer types (annotator not default)
97	builtinLayers = {'text': ['dict','search','gis'],
98	'xml': None, 'image': None, 'index': ['extended']}
99	availableLayers = builtinLayers;
100	# layer templates
101	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
102	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
103	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
104	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
105	layer_text_pundit = PageTemplateFile('zpt/layer_text_pundit', globals())
106	layer_image_annotator = PageTemplateFile('zpt/layer_image_annotator', globals())
107	layer_index_extended = PageTemplateFile('zpt/layer_index_extended', globals())
108	# toc templates
109	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
110	toc_text = PageTemplateFile('zpt/toc_text', globals())
111	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
112	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
113	toc_handwritten = PageTemplateFile('zpt/toc_handwritten', globals())
114	toc_none = PageTemplateFile('zpt/toc_none', globals())
115	# other templates
116	common_template = PageTemplateFile('zpt/common_template', globals())
117	info_xml = PageTemplateFile('zpt/info_xml', globals())
118	docuviewer_css = ImageFile('css/docuviewer.css',globals())
119	# make docuviewer_css refreshable for development
120	docuviewer_css.index_html = refreshingImageFileIndexHtml
121	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
122	# make docuviewer_ie_css refreshable for development
123	#docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
124	jquery_js = ImageFile('js/jquery.js',globals())
125
126
127	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
128	"""init document viewer"""
129	self.id=id
130	self.title=title
131	self.thumbcols = thumbcols
132	self.thumbrows = thumbrows
133	# authgroups is list of authorized groups (delimited by ,)
134	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
135	# create template folder so we can always use template.something
136
137	templateFolder = Folder('template')
138	self['template'] = templateFolder # Zope-2.12 style
139	#self._setObject('template',templateFolder) # old style
140	try:
141	import MpdlXmlTextServer
142	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
143	templateFolder['fulltextclient'] = textServer
144	#templateFolder._setObject('fulltextclient',textServer)
145	except Exception, e:
146	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
147
148	try:
149	from Products.zogiLib.zogiLib import zogiLib
150	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
151	templateFolder['zogilib'] = zogilib
152	#templateFolder._setObject('zogilib',zogilib)
153	except Exception, e:
154	logging.error("Unable to create zogiLib for zogilib: "+str(e))
155
156	try:
157	# assume MetaDataFolder instance is called metadata
158	self.metadataService = getattr(self, 'metadata')
159	except Exception, e:
160	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
161
162	if digilibBaseUrl is not None:
163	self.digilibBaseUrl = digilibBaseUrl
164	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
165	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
166
167
168	# proxy text server methods to fulltextclient
169	def getTextPage(self, **args):
170	"""returns full text content of page"""
171	return self.template.fulltextclient.getTextPage(**args)
172
173	def getSearchResults(self, **args):
174	"""loads list of search results and stores XML in docinfo"""
175	return self.template.fulltextclient.getSearchResults(**args)
176
177	def getResultsPage(self, **args):
178	"""returns one page of the search results"""
179	return self.template.fulltextclient.getResultsPage(**args)
180
181	def getTextInfo(self, **args):
182	"""returns document info from the text server"""
183	return self.template.fulltextclient.getTextInfo(**args)
184
185	def getToc(self, **args):
186	"""loads table of contents and stores XML in docinfo"""
187	return self.template.fulltextclient.getToc(**args)
188
189	def getTocPage(self, **args):
190	"""returns one page of the table of contents"""
191	return self.template.fulltextclient.getTocPage(**args)
192
193	def getRepositoryType(self, **args):
194	"""get repository type"""
195	return self.template.fulltextclient.getRepositoryType(**args)
196
197	def getTextDownloadUrl(self, **args):
198	"""get list of gis places on one page"""
199	return self.template.fulltextclient.getTextDownloadUrl(**args)
200
201	def getPlacesOnPage(self, **args):
202	"""get list of gis places on one page"""
203	return self.template.fulltextclient.getPlacesOnPage(**args)
204
205	# Thumb list for CoolIris Plugin
206	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
207	security.declareProtected('View','thumbs_rss')
208	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
209	'''
210	view it
211	@param mode: defines how to access the document behind url
212	@param url: url which contains display information
213	@param viewMode: image: display images, text: display text, default is auto (try text, else image)
214
215	'''
216
217	if not hasattr(self, 'template'):
218	# this won't work
219	logging.error("template folder missing!")
220	return "ERROR: template folder missing!"
221
222	if not self.digilibBaseUrl:
223	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
224
225	docinfo = self.getDocinfo(mode=mode,url=url)
226	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
227	pageinfo = self.getPageinfo(start=start,pn=pn, docinfo=docinfo)
228	''' ZDES '''
229	pt = getattr(self.template, 'thumbs_main_rss')
230
231	if viewMode=="auto": # automodus gewaehlt
232	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
233	viewMode="text"
234	else:
235	viewMode="image"
236
237	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
238
239
240	security.declareProtected('View','index_html')
241	def index_html(self, url, mode="texttool", viewMode="auto", viewLayer=None, tocMode=None, start=None, pn=None, pf=None):
242	"""
243	show page
244	@param url: url which contains display information
245	@param mode: defines how to access the document behind url
246	@param viewMode: 'image': display images, 'text': display text, 'xml': display xml, default is 'auto'
247	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
248	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
249	"""
250
251	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s pf=%s"%(mode,url,viewMode,viewLayer,start,pn,pf))
252
253	if not hasattr(self, 'template'):
254	# this won't work
255	logging.error("template folder missing!")
256	return "ERROR: template folder missing!"
257
258	if not getattr(self, 'digilibBaseUrl', None):
259	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
260
261	# mode=filepath should not have toc-thumbs
262	if tocMode is None:
263	if mode == "filepath":
264	tocMode = "none"
265	else:
266	tocMode = "thumbs"
267
268	# docinfo: information about document (cached)
269	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
270
271	# userinfo: user settings (cached)
272	userinfo = self.getUserinfo()
273
274	# auto viewMode: text if there is a text else images
275	if viewMode=="auto":
276	if docinfo.get('textURLPath', None):
277	# docinfo.get('textURL', None) not implemented yet
278	viewMode = "text"
279	if viewLayer is None and 'viewLayer' not in userinfo:
280	# use layer dict as default
281	viewLayer = "dict"
282	else:
283	viewMode = "image"
284
285	elif viewMode == "text_dict":
286	# legacy fix
287	viewMode = "text"
288	viewLayer = "dict"
289
290	elif viewMode == 'images':
291	# legacy fix
292	viewMode = 'image'
293	self.REQUEST['viewMode'] = 'image'
294
295	# safe viewLayer in userinfo
296	userinfo['viewLayer'] = viewLayer
297
298	# pageinfo: information about page (not cached)
299	pageinfo = self.getPageinfo(start=start, pn=pn, pf=pf, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
300
301	# get template /template/viewer_$viewMode
302	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
303	if pt is None:
304	logging.error("No template for viewMode=%s!"%viewMode)
305	# TODO: error page?
306	return "No template for viewMode=%s!"%viewMode
307
308	# and execute with parameters
309	return pt(docinfo=docinfo, pageinfo=pageinfo)
310
311	def getAvailableLayers(self):
312	"""returns dict with list of available layers per viewMode"""
313	return self.availableLayers
314
315	def findDigilibUrl(self):
316	"""try to get the digilib URL from zogilib"""
317	url = self.template.zogilib.getDLBaseUrl()
318	return url
319
320	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
321	"""returns URL to digilib Scaler with params"""
322	url = None
323	if docinfo is not None:
324	url = docinfo.get('imageURL', None)
325
326	if url is None:
327	url = self.digilibScalerUrl
328	if fn is None and docinfo is not None:
329	fn = docinfo.get('imagePath','')
330
331	url += "fn=%s"%fn
332
333	if pn:
334	url += "&pn=%s"%pn
335
336	url += "&dw=%s&dh=%s"%(dw,dh)
337	return url
338
339	def getDocumentViewerURL(self):
340	"""returns the URL of this instance"""
341	return self.absolute_url()
342
343	def getStyle(self, idx, selected, style=""):
344	"""returns a string with the given style and append 'sel' if idx == selected."""
345	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
346	if idx == selected:
347	return style + 'sel'
348	else:
349	return style
350
351	def getParams(self, param=None, val=None, params=None, duplicates=None):
352	"""returns dict with URL parameters.
353
354	Takes URL parameters and additionally param=val or dict params.
355	Deletes key if value is None."""
356	# copy existing request params
357	newParams=self.REQUEST.form.copy()
358	# change single param
359	if param is not None:
360	if val is None:
361	if newParams.has_key(param):
362	del newParams[param]
363	else:
364	newParams[param] = str(val)
365
366	# change more params
367	if params is not None:
368	for (k, v) in params.items():
369	if v is None:
370	# val=None removes param
371	if newParams.has_key(k):
372	del newParams[k]
373
374	else:
375	newParams[k] = v
376
377	if duplicates:
378	# eliminate lists (coming from duplicate keys)
379	for (k,v) in newParams.items():
380	if isinstance(v, list):
381	if duplicates == 'comma':
382	# make comma-separated list of non-empty entries
383	newParams[k] = ','.join([t for t in v if t])
384	elif duplicates == 'first':
385	# take first non-empty entry
386	newParams[k] = [t for t in v if t][0]
387
388	return newParams
389
390	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
391	"""returns URL to documentviewer with parameter param set to val or from dict params"""
392	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
393	# quote values and assemble into query string (not escaping '/')
394	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
395	if baseUrl is None:
396	baseUrl = self.getDocumentViewerURL()
397
398	url = "%s?%s"%(baseUrl, ps)
399	return url
400
401	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
402	"""link to documentviewer with parameter param set to val"""
403	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
404
405
406	def setAvailableLayers(self, newLayerString=None):
407	"""sets availableLayers to newLayerString or tries to autodetect available layers.
408	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
409	newLayerString is parsed as JSON."""
410	if newLayerString is not None:
411	try:
412	layers = json.loads(newLayerString)
413	if 'text' in layers and 'image' in layers:
414	self.availableLayers = layers
415	return
416	except:
417	pass
418
419	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
420
421	# start with builtin layers
422	self.availableLayers = self.builtinLayers.copy()
423	# add layers from templates
424	for t in self.template:
425	if t.startswith('layer_'):
426	try:
427	(x, m, l) = t.split('_', 3)
428	if m not in self.availableLayers:
429	# mode m doesn't exist -> new list
430	self.availableLayers[m] = [l]
431
432	else:
433	# m exists -> append
434	if l not in self.availableLayers[m]:
435	self.availableLayers[m].append()
436
437	except:
438	pass
439
440	def getAvailableLayersJson(self):
441	"""returns available layers as JSON string."""
442	return json.dumps(self.availableLayers)
443
444
445	def getInfo_xml(self,url,mode):
446	"""returns info about the document as XML"""
447	if not self.digilibBaseUrl:
448	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
449
450	docinfo = self.getDocinfo(mode=mode,url=url)
451	pt = getattr(self.template, 'info_xml')
452	return pt(docinfo=docinfo)
453
454	def getAuthenticatedUser(self, anon=None):
455	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
456	user = getSecurityManager().getUser()
457	if user is not None and user.getUserName() != "Anonymous User":
458	return user
459	else:
460	return anon
461
462	def isAccessible(self, docinfo):
463	"""returns if access to the resource is granted"""
464	access = docinfo.get('accessType', None)
465	logging.debug("documentViewer (accessOK) access type %s"%access)
466	if access == 'free':
467	logging.debug("documentViewer (accessOK) access is free")
468	return True
469
470	elif access is None or access in self.authgroups:
471	# only local access -- only logged in users
472	user = self.getAuthenticatedUser()
473	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
474	return (user is not None)
475
476	logging.error("documentViewer (accessOK) unknown access type %s"%access)
477	return False
478
479	def getUserinfo(self):
480	"""returns userinfo object"""
481	logging.debug("getUserinfo")
482	userinfo = {}
483	# look for cached userinfo in session
484	if self.REQUEST.SESSION.has_key('userinfo'):
485	userinfo = self.REQUEST.SESSION['userinfo']
486	# check if its still current?
487	else:
488	# store in session
489	self.REQUEST.SESSION['userinfo'] = userinfo
490
491	return userinfo
492
493	def getDocinfo(self, mode, url, tocMode=None):
494	"""returns docinfo depending on mode"""
495	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
496	# look for cached docinfo in session
497	if self.REQUEST.SESSION.has_key('docinfo'):
498	docinfo = self.REQUEST.SESSION['docinfo']
499	# check if its still current
500	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
501	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
502	return docinfo
503
504	# new docinfo
505	docinfo = {'mode': mode, 'url': url}
506	# add self url
507	docinfo['viewerUrl'] = self.getDocumentViewerURL()
508	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
509	docinfo['digilibScalerUrl'] = self.digilibScalerUrl
510	docinfo['digilibViewerUrl'] = self.digilibViewerUrl
511	# get index.meta DOM
512	docUrl = None
513	metaDom = None
514	if mode=="texttool":
515	# url points to document dir or index.meta
516	metaDom = self.metadataService.getDomFromPathOrUrl(url)
517	if metaDom is None:
518	raise IOError("Unable to find index.meta for mode=texttool!")
519
520	docUrl = url.replace('/index.meta', '')
521	if url.startswith('/mpiwg/online/'):
522	docUrl = url.replace('/mpiwg/online/', '', 1)
523
524	elif mode=="imagepath":
525	# url points to folder with images, index.meta optional
526	# asssume index.meta in parent dir
527	docUrl = getParentPath(url)
528	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
529	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
530
531	elif mode=="filepath":
532	# url points to image file, index.meta optional
533	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url)
534	docinfo['numPages'] = 1
535	# asssume index.meta is two path segments up
536	docUrl = getParentPath(url, 2)
537	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
538
539	else:
540	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
541	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
542
543	docinfo['documentUrl'] = docUrl
544	# process index.meta contents
545	if metaDom is not None and metaDom.tag == 'resource':
546	# document directory name and path
547	resource = self.metadataService.getResourceData(dom=metaDom)
548	if resource:
549	docinfo = self.getDocinfoFromResource(docinfo, resource)
550
551	# texttool info
552	texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
553	if texttool:
554	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
555	# document info from full text server
556	if docinfo.get('textURLPath', None):
557	docinfo = self.getTextInfo(mode=None, docinfo=docinfo)
558	# include list of pages TODO: do we need this always?
559	docinfo = self.getTextInfo(mode='pages', docinfo=docinfo)
560
561	# bib info
562	bib = self.metadataService.getBibData(dom=metaDom)
563	if bib:
564	# save extended version as 'bibx' TODO: ugly
565	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
566	if len(bibx) == 1:
567	# unwrap list if possible
568	bibx = bibx[0]
569
570	docinfo['bibx'] = bibx
571	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
572	else:
573	# no bib - try info.xml
574	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
575
576	# auth info
577	access = self.metadataService.getAccessData(dom=metaDom)
578	if access:
579	docinfo = self.getDocinfoFromAccess(docinfo, access)
580
581	# attribution info
582	attribution = self.metadataService.getAttributionData(dom=metaDom)
583	if attribution:
584	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
585	docinfo['attribution'] = attribution
586
587	# copyright info
588	copyright = self.metadataService.getCopyrightData(dom=metaDom)
589	if copyright:
590	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
591	docinfo['copyright'] = copyright
592
593	# DRI (permanent ID)
594	dri = self.metadataService.getDRI(dom=metaDom, type='mpiwg')
595	if dri:
596	docinfo['DRI'] = dri
597
598	# (presentation) context
599	ctx = self.metadataService.getContextData(dom=metaDom, all=True)
600	if ctx:
601	logging.debug("getcontext: ctx=%s"%repr(ctx))
602	docinfo['presentationContext'] = ctx
603
604	# image path
605	if mode != 'texttool':
606	# override image path from texttool with url parameter TODO: how about mode=auto?
607	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
608
609	# check numPages
610	if docinfo.get('numPages', 0) == 0:
611	# number of images from digilib
612	if docinfo.get('imagePath', None):
613	imgpath = docinfo['imagePath'].replace('/mpiwg/online/', '', 1)
614	logging.debug("imgpath=%s"%imgpath)
615	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, imgpath)
616	docinfo = self.getDocinfoFromDigilib(docinfo, imgpath)
617	else:
618	# imagePath still missing? try "./pageimg"
619	imgPath = os.path.join(docUrl, 'pageimg')
620	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
621	if docinfo.get('numPages', 0) > 0:
622	# there are pages
623	docinfo['imagePath'] = imgPath
624	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, docinfo['imagePath'])
625
626	# check numPages
627	if docinfo.get('numPages', 0) == 0:
628	if docinfo.get('numTextPages', 0) > 0:
629	# replace with numTextPages (text-only?)
630	docinfo['numPages'] = docinfo['numTextPages']
631
632	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
633	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
634	# store in session
635	self.REQUEST.SESSION['docinfo'] = docinfo
636	return docinfo
637
638
639	def getDocinfoFromResource(self, docinfo, resource):
640	"""reads contents of resource element into docinfo"""
641	docName = resource.get('name', None)
642	docinfo['documentName'] = docName
643	docPath = resource.get('archive-path', None)
644	if docPath:
645	# clean up document path
646	if docPath[0] != '/':
647	docPath = '/' + docPath
648
649	if docName and (not docPath.endswith(docName)):
650	docPath += "/" + docName
651
652	else:
653	# use docUrl as docPath
654	docUrl = docinfo['documentURL']
655	if not docUrl.startswith('http:'):
656	docPath = docUrl
657	if docPath:
658	# fix URLs starting with /mpiwg/online
659	docPath = docPath.replace('/mpiwg/online', '', 1)
660
661	docinfo['documentPath'] = docPath
662	return docinfo
663
664	def getDocinfoFromTexttool(self, docinfo, texttool):
665	"""reads contents of texttool element into docinfo"""
666	logging.debug("texttool=%s"%repr(texttool))
667	# unpack list if necessary
668	if isinstance(texttool, list):
669	texttool = texttool[0]
670
671	# image dir
672	imageDir = getMDText(texttool.get('image', None))
673	docPath = getMDText(docinfo.get('documentPath', None))
674	if imageDir and docPath:
675	imageDir = os.path.join(docPath, imageDir)
676	imageDir = imageDir.replace('/mpiwg/online', '', 1)
677	docinfo['imagePath'] = imageDir
678
679	# old style text URL
680	textUrl = getMDText(texttool.get('text', None))
681	if textUrl and docPath:
682	if urlparse.urlparse(textUrl)[0] == "": #keine url
683	textUrl = os.path.join(docPath, textUrl)
684
685	docinfo['textURL'] = textUrl
686
687	# new style text-url-path (can be more than one with "repository" attribute)
688	textUrlNode = texttool.get('text-url-path', None)
689	if not isinstance(textUrlNode, list):
690	textUrlNode = [textUrlNode]
691
692	for tun in textUrlNode:
693	textUrl = getMDText(tun)
694	if textUrl:
695	textUrlAtts = tun.get('@attr')
696	if (textUrlAtts and 'repository' in textUrlAtts):
697	textRepo = textUrlAtts['repository']
698	# use matching repository
699	if self.getRepositoryType() == textRepo:
700	docinfo['textURLPath'] = textUrl
701	docinfo['textURLRepository'] = textRepo
702	break
703
704	else:
705	# no repo attribute - use always
706	docinfo['textURLPath'] = textUrl
707
708	# page flow
709	docinfo['pageFlow'] = getMDText(texttool.get('page-flow', 'ltr'))
710
711	# odd pages are left
712	docinfo['oddPage'] = getMDText(texttool.get('odd-scan-position', 'left'))
713
714	# number of title page (default 1)
715	docinfo['titlePage'] = getMDText(texttool.get('title-scan-no', 1))
716
717	# old presentation stuff
718	presentation = getMDText(texttool.get('presentation', None))
719	if presentation and docPath:
720	if presentation.startswith('http:'):
721	docinfo['presentationUrl'] = presentation
722	else:
723	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
724
725	return docinfo
726
727	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
728	"""reads contents of bib element into docinfo"""
729	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
730	# put all raw bib fields in dict "bib"
731	docinfo['bib'] = bib
732	bibtype = bib.get('@type', None)
733	docinfo['bibType'] = bibtype
734	# also store DC metadata for convenience
735	dc = self.metadataService.getDCMappedData(bib)
736	docinfo['creator'] = dc.get('creator','')
737	docinfo['title'] = dc.get('title','')
738	docinfo['date'] = dc.get('date','')
739	return docinfo
740
741	def getDocinfoFromAccess(self, docinfo, acc):
742	"""reads contents of access element into docinfo"""
743	#TODO: also read resource type
744	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
745	try:
746	acctype = acc['@attr']['type']
747	if acctype:
748	access=acctype
749	if access in ['group', 'institution']:
750	access = acc['name'].lower()
751
752	docinfo['accessType'] = access
753
754	except:
755	pass
756
757	return docinfo
758
759	def getDocinfoFromDigilib(self, docinfo, path):
760	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?fn="+path
761	# fetch data
762	txt = getHttpData(infoUrl)
763	if not txt:
764	logging.error("Unable to get dir-info from %s"%(infoUrl))
765	return docinfo
766
767	dom = ET.fromstring(txt)
768	dir = dom
769	# save size
770	size = dir.findtext('size')
771	logging.debug("getDocinfoFromDigilib: size=%s"%size)
772	if size:
773	docinfo['numPages'] = int(size)
774	else:
775	docinfo['numPages'] = 0
776	return docinfo
777
778	# save list of image names and numbers
779	imgNames = {}
780	for f in dir:
781	fn = f.findtext('name')
782	pn = f.findtext('index')
783	imgNames[fn] = getInt(pn)
784
785	docinfo['imgFileNames'] = imgNames
786	return docinfo
787
788
789	def getDocinfoFromPresentationInfoXml(self,docinfo):
790	"""gets DC-like bibliographical information from the presentation entry in texttools"""
791	url = docinfo.get('presentationUrl', None)
792	if not url:
793	logging.error("getDocinfoFromPresentation: no URL!")
794	return docinfo
795
796	dom = None
797	metaUrl = None
798	if url.startswith("http://"):
799	# real URL
800	metaUrl = url
801	else:
802	# online path
803	server=self.digilibBaseUrl+"/servlet/Texter?fn="
804	metaUrl=server+url
805
806	txt=getHttpData(metaUrl)
807	if txt is None:
808	logging.error("Unable to read info.xml from %s"%(url))
809	return docinfo
810
811	dom = ET.fromstring(txt)
812	docinfo['creator']=getText(dom.find(".//author"))
813	docinfo['title']=getText(dom.find(".//title"))
814	docinfo['date']=getText(dom.find(".//date"))
815	return docinfo
816
817
818	def getPageinfo(self, pn=None, pf=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
819	"""returns pageinfo with the given parameters"""
820	logging.debug("getPageInfo(pn=%s, pf=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(pn,pf,start,rows,cols,viewMode,viewLayer,tocMode))
821	pageinfo = {}
822	pageinfo['viewMode'] = viewMode
823	# split viewLayer if necessary
824	if isinstance(viewLayer,basestring):
825	viewLayer = viewLayer.split(',')
826
827	if isinstance(viewLayer, list):
828	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
829	# save (unique) list in viewLayers
830	seen = set()
831	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
832	pageinfo['viewLayers'] = viewLayers
833	# stringify viewLayer
834	viewLayer = ','.join(viewLayers)
835	else:
836	#create list
837	pageinfo['viewLayers'] = [viewLayer]
838
839	pageinfo['viewLayer'] = viewLayer
840	pageinfo['tocMode'] = tocMode
841
842	# pf takes precedence over pn
843	if pf:
844	pageinfo['pf'] = pf
845	pn = getPnForPf(docinfo, pf)
846	# replace pf in request params (used for creating new URLs)
847	self.REQUEST.form.pop('pf', None)
848	self.REQUEST.form['pn'] = pn
849	else:
850	pn = getInt(pn, 1)
851
852	pageinfo['pn'] = pn
853	rows = int(rows or self.thumbrows)
854	pageinfo['rows'] = rows
855	cols = int(cols or self.thumbcols)
856	pageinfo['cols'] = cols
857	grpsize = cols * rows
858	pageinfo['groupsize'] = grpsize
859	# is start is empty use one around pn
860	start = getInt(start, default=(math.ceil(float(pn)/float(grpsize))*grpsize-(grpsize-1)))
861	# int(current / grpsize) * grpsize +1))
862	pageinfo['start'] = start
863	# get number of pages
864	np = int(docinfo.get('numPages', 0))
865	if np == 0:
866	# try numTextPages
867	np = docinfo.get('numTextPages', 0)
868	if np != 0:
869	docinfo['numPages'] = np
870
871	# cache table of contents
872	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
873	pageinfo['numgroups'] = int(np / grpsize)
874	if np % grpsize > 0:
875	pageinfo['numgroups'] += 1
876
877	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
878	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
879	# add zeroth page for two columns
880	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
881	pageinfo['pageZero'] = pageZero
882	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
883	# more page parameters
884	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
885	if docinfo.get('pageNumbers'):
886	# get original page numbers
887	pageNumber = docinfo['pageNumbers'].get(pn, None)
888	if pageNumber is not None:
889	pageinfo['pageNumberOrig'] = pageNumber['no']
890	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
891
892	# cache search results
893	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
894	query = self.REQUEST.get('query',None)
895	pageinfo['query'] = query
896	if query:
897	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
898	pageinfo['queryType'] = queryType
899	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
900	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
901
902	# highlighting
903	highlightQuery = self.REQUEST.get('highlightQuery', None)
904	if highlightQuery:
905	pageinfo['highlightQuery'] = highlightQuery
906	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
907	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
908
909	return pageinfo
910
911
912	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
913	"""returns dict with array of page informations for one screenfull of thumbnails"""
914	batch = {}
915	grpsize = rows * cols
916	if maxIdx == 0:
917	maxIdx = start + grpsize
918
919	nb = int(math.ceil(maxIdx / float(grpsize)))
920	# list of all batch start and end points
921	batches = []
922	if pageZero:
923	ofs = 0
924	else:
925	ofs = 1
926
927	for i in range(nb):
928	s = i * grpsize + ofs
929	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
930	batches.append({'start':s, 'end':e})
931
932	batch['batches'] = batches
933
934	pages = []
935	if pageZero and start == 1:
936	# correct beginning
937	idx = 0
938	else:
939	idx = start
940
941	for r in range(rows):
942	row = []
943	for c in range(cols):
944	if idx < minIdx or idx > maxIdx:
945	page = {'idx':None}
946	else:
947	page = {'idx':idx}
948
949	idx += 1
950	if pageFlowLtr:
951	row.append(page)
952	else:
953	row.insert(0, page)
954
955	pages.append(row)
956
957	if start > 1:
958	batch['prevStart'] = max(start - grpsize, 1)
959	else:
960	batch['prevStart'] = None
961
962	if start + grpsize <= maxIdx:
963	batch['nextStart'] = start + grpsize
964	else:
965	batch['nextStart'] = None
966
967	batch['pages'] = pages
968	batch['first'] = minIdx
969	batch['last'] = maxIdx
970	return batch
971
972	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
973	"""returns dict with information for one screenfull of data."""
974	batch = {}
975	if end == 0:
976	end = start + size
977
978	nb = int(math.ceil(end / float(size)))
979	# list of all batch start and end points
980	batches = []
981	for i in range(nb):
982	s = i * size + 1
983	e = min((i + 1) * size, end)
984	batches.append({'start':s, 'end':e})
985
986	batch['batches'] = batches
987	# list of elements in this batch
988	this = []
989	j = 0
990	for i in range(start, min(start+size, end+1)):
991	if data:
992	if fullData:
993	d = data.get(i, None)
994	else:
995	d = data.get(j, None)
996	j += 1
997
998	else:
999	d = i+1
1000
1001	this.append(d)
1002
1003	batch['this'] = this
1004	if start > 1:
1005	batch['prevStart'] = max(start - size, 1)
1006	else:
1007	batch['prevStart'] = None
1008
1009	if start + size < end:
1010	batch['nextStart'] = start + size
1011	else:
1012	batch['nextStart'] = None
1013
1014	batch['first'] = start
1015	batch['last'] = end
1016	return batch
1017
1018
1019	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
1020	"""returns list of groups {name:, id:} on the annotation server for the user"""
1021	groups = []
1022	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
1023	data = getHttpData(url=groupsUrl, noExceptions=True)
1024	if data:
1025	res = json.loads(data)
1026	rows = res.get('rows', None)
1027	if rows is None:
1028	return groups
1029	for r in rows:
1030	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
1031
1032	return groups
1033
1034
1035	security.declareProtected('View management screens','changeDocumentViewerForm')
1036	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1037
1038	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1039	"""init document viewer"""
1040	self.title=title
1041	self.digilibBaseUrl = digilibBaseUrl
1042	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
1043	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
1044	self.thumbrows = thumbrows
1045	self.thumbcols = thumbcols
1046	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1047	try:
1048	# assume MetaDataFolder instance is called metadata
1049	self.metadataService = getattr(self, 'metadata')
1050	except Exception, e:
1051	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1052
1053	self.setAvailableLayers(availableLayers)
1054
1055	if RESPONSE is not None:
1056	RESPONSE.redirect('manage_main')
1057
1058	def manage_AddDocumentViewerForm(self):
1059	"""add the viewer form"""
1060	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1061	return pt()
1062
1063	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1064	"""add the viewer"""
1065	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1066	self._setObject(id,newObj)
1067
1068	if RESPONSE is not None:
1069	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: