Context Navigation

source: documentViewer/documentViewer.py @ 607:cb5a9c4f5e3a

Last change on this file since 607:cb5a9c4f5e3a was 607:cb5a9c4f5e3a, checked in by casties, 11 years ago
CLOSED - # 268: display of subdocuments https://it-dev.mpiwg-berlin.mpg.de/tracs/mpdl-project-software/ticket/268
File size: 44.2 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def getMDText(node):
37	"""returns the @text content from the MetaDataProvider metadata node"""
38	if isinstance(node, dict):
39	return node.get('@text', None)
40
41	return node
42
43	def getParentPath(path, cnt=1):
44	"""returns pathname shortened by cnt"""
45	# make sure path doesn't end with /
46	path = path.rstrip('/')
47	# split by /, shorten, and reassemble
48	return '/'.join(path.split('/')[0:-cnt])
49
50	def getPnForPf(docinfo, pf, default=0):
51	"""returns image number for image file name or default"""
52	if 'imgFileNames' in docinfo:
53	pn = docinfo['imgFileNames'].get(pf, None)
54	if pn is None:
55	# try to cut extension
56	xi = pf.rfind('.')
57	if xi > 0:
58	pf = pf[:xi]
59	# try again, else return 0
60	pn = docinfo['imgFileNames'].get(pf, default)
61	else:
62	# no extension
63	pn = default
64
65	return pn
66
67	return default
68
69	def getPfForPn(docinfo, pn, default=None):
70	"""returns image file name for image number or default"""
71	if 'imgFileIndexes' in docinfo:
72	pn = docinfo['imgFileIndexes'].get(pn, default)
73	return pn
74
75	return default
76
77
78	##
79	## documentViewer class
80	##
81	class documentViewer(Folder):
82	"""document viewer"""
83	meta_type="Document viewer"
84
85	security=ClassSecurityInfo()
86	manage_options=Folder.manage_options+(
87	{'label':'Configuration','action':'changeDocumentViewerForm'},
88	)
89
90	metadataService = None
91	"""MetaDataFolder instance"""
92
93
94	#
95	# templates and forms
96	#
97	# viewMode templates
98	viewer_text = PageTemplateFile('zpt/viewer/viewer_text', globals())
99	viewer_xml = PageTemplateFile('zpt/viewer/viewer_xml', globals())
100	viewer_image = PageTemplateFile('zpt/viewer/viewer_image', globals())
101	viewer_index = PageTemplateFile('zpt/viewer/viewer_index', globals())
102	viewer_thumbs = PageTemplateFile('zpt/viewer/viewer_thumbs', globals())
103	viewer_indexonly = PageTemplateFile('zpt/viewer/viewer_indexonly', globals())
104	# available layer types (annotator not default)
105	builtinLayers = {'text': ['dict','search','gis'],
106	'xml': None, 'image': None, 'index': ['extended']}
107	availableLayers = builtinLayers;
108	# layer templates
109	layer_text_dict = PageTemplateFile('zpt/viewer/layer_text_dict', globals())
110	layer_text_search = PageTemplateFile('zpt/viewer/layer_text_search', globals())
111	layer_text_annotator = PageTemplateFile('zpt/viewer/layer_text_annotator', globals())
112	layer_text_gis = PageTemplateFile('zpt/viewer/layer_text_gis', globals())
113	layer_text_pundit = PageTemplateFile('zpt/viewer/layer_text_pundit', globals())
114	layer_image_annotator = PageTemplateFile('zpt/viewer/layer_image_annotator', globals())
115	layer_image_search = PageTemplateFile('zpt/viewer/layer_image_search', globals())
116	layer_index_extended = PageTemplateFile('zpt/viewer/layer_index_extended', globals())
117	# toc templates
118	toc_thumbs = PageTemplateFile('zpt/viewer/toc_thumbs', globals())
119	toc_text = PageTemplateFile('zpt/viewer/toc_text', globals())
120	toc_figures = PageTemplateFile('zpt/viewer/toc_figures', globals())
121	toc_concordance = PageTemplateFile('zpt/viewer/toc_concordance', globals())
122	toc_handwritten = PageTemplateFile('zpt/viewer/toc_handwritten', globals())
123	toc_none = PageTemplateFile('zpt/viewer/toc_none', globals())
124	# other templates
125	common_template = PageTemplateFile('zpt/viewer/common_template', globals())
126	info_xml = PageTemplateFile('zpt/viewer/info_xml', globals())
127	docuviewer_css = ImageFile('css/docuviewer.css',globals())
128	# make docuviewer_css refreshable for development
129	docuviewer_css.index_html = refreshingImageFileIndexHtml
130	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
131	# make docuviewer_ie_css refreshable for development
132	#docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
133	jquery_js = ImageFile('js/jquery.js',globals())
134
135
136	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
137	"""init document viewer"""
138	self.id=id
139	self.title=title
140	self.thumbcols = thumbcols
141	self.thumbrows = thumbrows
142	# authgroups is list of authorized groups (delimited by ,)
143	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
144	# create template folder so we can always use template.something
145
146	templateFolder = Folder('template')
147	self['template'] = templateFolder # Zope-2.12 style
148	#self._setObject('template',templateFolder) # old style
149	try:
150	import MpdlXmlTextServer
151	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
152	templateFolder['fulltextclient'] = textServer
153	#templateFolder._setObject('fulltextclient',textServer)
154	except Exception, e:
155	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
156
157	try:
158	from Products.zogiLib.zogiLib import zogiLib
159	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
160	templateFolder['zogilib'] = zogilib
161	#templateFolder._setObject('zogilib',zogilib)
162	except Exception, e:
163	logging.error("Unable to create zogiLib for 'zogilib': "+str(e))
164
165	try:
166	# assume MetaDataFolder instance is called metadata
167	self.metadataService = getattr(self, 'metadata')
168	except Exception, e:
169	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
170
171	if digilibBaseUrl is not None:
172	self.digilibBaseUrl = digilibBaseUrl
173	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
174	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
175
176
177	# proxy text server methods to fulltextclient
178	def getTextPage(self, **args):
179	"""returns full text content of page"""
180	return self.template.fulltextclient.getTextPage(**args)
181
182	def getSearchResults(self, **args):
183	"""loads list of search results and stores XML in docinfo"""
184	return self.template.fulltextclient.getSearchResults(**args)
185
186	def getResultsPage(self, **args):
187	"""returns one page of the search results"""
188	return self.template.fulltextclient.getResultsPage(**args)
189
190	def getTextInfo(self, **args):
191	"""returns document info from the text server"""
192	return self.template.fulltextclient.getTextInfo(**args)
193
194	def getToc(self, **args):
195	"""loads table of contents and stores XML in docinfo"""
196	return self.template.fulltextclient.getToc(**args)
197
198	def getTocPage(self, **args):
199	"""returns one page of the table of contents"""
200	return self.template.fulltextclient.getTocPage(**args)
201
202	def getRepositoryType(self, **args):
203	"""get repository type"""
204	return self.template.fulltextclient.getRepositoryType(**args)
205
206	def getTextDownloadUrl(self, **args):
207	"""get list of gis places on one page"""
208	return self.template.fulltextclient.getTextDownloadUrl(**args)
209
210	def getPlacesOnPage(self, **args):
211	"""get list of gis places on one page"""
212	return self.template.fulltextclient.getPlacesOnPage(**args)
213
214	# Thumb list for CoolIris Plugin
215	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
216	security.declareProtected('View','thumbs_rss')
217	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
218	'''
219	view it
220	@param mode: defines how to access the document behind url
221	@param url: url which contains display information
222	@param viewMode: image: display images, text: display text, default is auto (try text, else image)
223
224	'''
225
226	if not hasattr(self, 'template'):
227	# this won't work
228	logging.error("template folder missing!")
229	return "ERROR: template folder missing!"
230
231	if not self.digilibBaseUrl:
232	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
233
234	docinfo = self.getDocinfo(mode=mode,url=url)
235	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
236	pageinfo = self.getPageinfo(start=start,pn=pn, docinfo=docinfo)
237	''' ZDES '''
238	pt = getattr(self.template, 'thumbs_main_rss')
239
240	if viewMode=="auto": # automodus gewaehlt
241	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
242	viewMode="text"
243	else:
244	viewMode="image"
245
246	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
247
248
249	security.declareProtected('View','index_html')
250	def index_html(self, url, mode="texttool", viewMode="auto", viewLayer=None, tocMode=None, start=None, pn=None, pf=None):
251	"""
252	show page
253	@param url: url which contains display information
254	@param mode: defines how to access the document behind url
255	@param viewMode: 'image': display images, 'text': display text, 'xml': display xml, default is 'auto'
256	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
257	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
258	"""
259
260	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s pf=%s"%(mode,url,viewMode,viewLayer,start,pn,pf))
261
262	if not hasattr(self, 'template'):
263	# this won't work
264	logging.error("template folder missing!")
265	return "ERROR: template folder missing!"
266
267	if not getattr(self, 'digilibBaseUrl', None):
268	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
269
270	# mode=filepath should not have toc-thumbs
271	if tocMode is None:
272	if mode == "filepath":
273	tocMode = "none"
274	else:
275	tocMode = "thumbs"
276
277	# docinfo: information about document (cached)
278	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
279
280	# userinfo: user settings (cached)
281	userinfo = self.getUserinfo()
282
283	# auto viewMode: text if there is a text else images
284	if viewMode=="auto":
285	if docinfo.get('textURLPath', None):
286	# docinfo.get('textURL', None) not implemented yet
287	viewMode = "text"
288	if viewLayer is None and 'viewLayer' not in userinfo:
289	# use layer dict as default
290	viewLayer = "dict"
291	else:
292	viewMode = "image"
293
294	elif viewMode == "text_dict":
295	# legacy fix
296	viewMode = "text"
297	viewLayer = "dict"
298
299	elif viewMode == 'images':
300	# legacy fix
301	viewMode = 'image'
302	self.REQUEST['viewMode'] = 'image'
303
304	# safe viewLayer in userinfo
305	userinfo['viewLayer'] = viewLayer
306
307	# pageinfo: information about page (not cached)
308	pageinfo = self.getPageinfo(start=start, pn=pn, pf=pf, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
309
310	# get template /template/viewer_$viewMode
311	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
312	if pt is None:
313	logging.error("No template for viewMode=%s!"%viewMode)
314	# TODO: error page?
315	return "No template for viewMode=%s!"%viewMode
316
317	# and execute with parameters
318	return pt(docinfo=docinfo, pageinfo=pageinfo)
319
320	def getAvailableLayers(self):
321	"""returns dict with list of available layers per viewMode"""
322	return self.availableLayers
323
324	def findDigilibUrl(self):
325	"""try to get the digilib URL from zogilib"""
326	url = self.template.zogilib.getDLBaseUrl()
327	return url
328
329	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
330	"""returns URL to digilib Scaler with params"""
331	url = None
332	if docinfo is not None:
333	url = docinfo.get('imageURL', None)
334
335	if url is None:
336	url = self.digilibScalerUrl
337	if fn is None and docinfo is not None:
338	fn = docinfo.get('imagePath','')
339
340	url += "fn=%s"%fn
341
342	if pn:
343	url += "&pn=%s"%pn
344
345	url += "&dw=%s&dh=%s"%(dw,dh)
346	return url
347
348	def getDocumentViewerURL(self):
349	"""returns the URL of this instance"""
350	return self.absolute_url()
351
352	def getStyle(self, idx, selected, style=""):
353	"""returns a string with the given style and append 'sel' if idx == selected."""
354	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
355	if idx == selected:
356	return style + 'sel'
357	else:
358	return style
359
360	def getParams(self, param=None, val=None, params=None, duplicates=None):
361	"""returns dict with URL parameters.
362
363	Takes URL parameters and additionally param=val or dict params.
364	Deletes key if value is None."""
365	# copy existing request params
366	newParams=self.REQUEST.form.copy()
367	# change single param
368	if param is not None:
369	if val is None:
370	if newParams.has_key(param):
371	del newParams[param]
372	else:
373	newParams[param] = str(val)
374
375	# change more params
376	if params is not None:
377	for (k, v) in params.items():
378	if v is None:
379	# val=None removes param
380	if newParams.has_key(k):
381	del newParams[k]
382
383	else:
384	newParams[k] = v
385
386	if duplicates:
387	# eliminate lists (coming from duplicate keys)
388	for (k,v) in newParams.items():
389	if isinstance(v, list):
390	if duplicates == 'comma':
391	# make comma-separated list of non-empty entries
392	newParams[k] = ','.join([t for t in v if t])
393	elif duplicates == 'first':
394	# take first non-empty entry
395	newParams[k] = [t for t in v if t][0]
396
397	return newParams
398
399	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
400	"""returns URL to documentviewer with parameter param set to val or from dict params"""
401	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
402	# quote values and assemble into query string (not escaping '/')
403	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
404	if baseUrl is None:
405	baseUrl = self.getDocumentViewerURL()
406
407	url = "%s?%s"%(baseUrl, ps)
408	return url
409
410	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
411	"""link to documentviewer with parameter param set to val"""
412	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
413
414
415	def setAvailableLayers(self, newLayerString=None):
416	"""sets availableLayers to newLayerString or tries to autodetect available layers.
417	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
418	newLayerString is parsed as JSON."""
419	if newLayerString is not None:
420	try:
421	layers = json.loads(newLayerString)
422	if 'text' in layers and 'image' in layers:
423	self.availableLayers = layers
424	return
425	except:
426	pass
427
428	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
429
430	# start with builtin layers
431	self.availableLayers = self.builtinLayers.copy()
432	# add layers from templates
433	for t in self.template:
434	if t.startswith('layer_'):
435	try:
436	(x, m, l) = t.split('_', 3)
437	if m not in self.availableLayers:
438	# mode m doesn't exist -> new list
439	self.availableLayers[m] = [l]
440
441	else:
442	# m exists -> append
443	if l not in self.availableLayers[m]:
444	self.availableLayers[m].append()
445
446	except:
447	pass
448
449	def getAvailableLayersJson(self):
450	"""returns available layers as JSON string."""
451	return json.dumps(self.availableLayers)
452
453
454	def getInfo_xml(self,url,mode):
455	"""returns info about the document as XML"""
456	if not self.digilibBaseUrl:
457	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
458
459	docinfo = self.getDocinfo(mode=mode,url=url)
460	pt = getattr(self.template, 'info_xml')
461	return pt(docinfo=docinfo)
462
463	def getAuthenticatedUser(self, anon=None):
464	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
465	user = getSecurityManager().getUser()
466	if user is not None and user.getUserName() != "Anonymous User":
467	return user
468	else:
469	return anon
470
471	def isAccessible(self, docinfo):
472	"""returns if access to the resource is granted"""
473	access = docinfo.get('accessType', None)
474	logging.debug("documentViewer (accessOK) access type %s"%access)
475	if access == 'free':
476	logging.debug("documentViewer (accessOK) access is free")
477	return True
478
479	elif access is None or access in self.authgroups:
480	# only local access -- only logged in users
481	user = self.getAuthenticatedUser()
482	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
483	return (user is not None)
484
485	logging.error("documentViewer (accessOK) unknown access type %s"%access)
486	return False
487
488	def getUserinfo(self):
489	"""returns userinfo object"""
490	logging.debug("getUserinfo")
491	userinfo = {}
492	# look for cached userinfo in session
493	if self.REQUEST.SESSION.has_key('userinfo'):
494	userinfo = self.REQUEST.SESSION['userinfo']
495	# check if its still current?
496	else:
497	# store in session
498	self.REQUEST.SESSION['userinfo'] = userinfo
499
500	return userinfo
501
502	def getDocinfo(self, mode, url, tocMode=None):
503	"""returns docinfo depending on mode"""
504	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
505	# look for cached docinfo in session
506	if self.REQUEST.SESSION.has_key('docinfo'):
507	docinfo = self.REQUEST.SESSION['docinfo']
508	# check if its still current
509	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
510	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
511	return docinfo
512
513	# new docinfo
514	docinfo = {'mode': mode, 'url': url}
515	# add self url
516	docinfo['viewerUrl'] = self.getDocumentViewerURL()
517	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
518	docinfo['digilibScalerUrl'] = self.digilibScalerUrl
519	docinfo['digilibViewerUrl'] = self.digilibViewerUrl
520	# get index.meta DOM
521	docUrl = None
522	metaDom = None
523	if mode=="texttool":
524	# url points to document dir or index.meta
525	metaDom = self.metadataService.getDomFromPathOrUrl(url)
526	if metaDom is None:
527	raise IOError("Unable to find index.meta for mode=texttool!")
528
529	docUrl = url.replace('/index.meta', '')
530	if url.startswith('/mpiwg/online/'):
531	docUrl = url.replace('/mpiwg/online/', '', 1)
532
533	elif mode=="imagepath":
534	# url points to folder with images, index.meta optional
535	# asssume index.meta in parent dir
536	docUrl = getParentPath(url)
537	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
538	docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
539
540	elif mode=="filepath":
541	# url points to image file, index.meta optional
542	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url)
543	docinfo['numPages'] = 1
544	# asssume index.meta is two path segments up
545	docUrl = getParentPath(url, 2)
546	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
547
548	else:
549	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
550	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
551
552	docinfo['documentUrl'] = docUrl
553	# process index.meta contents
554	if metaDom is not None and metaDom.tag == 'resource':
555	# document directory name and path
556	resource = self.metadataService.getResourceData(dom=metaDom)
557	if resource:
558	docinfo = self.getDocinfoFromResource(docinfo, resource)
559
560	# texttool info
561	texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
562	if texttool:
563	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
564	# document info from full text server
565	if docinfo.get('textURLPath', None):
566	docinfo = self.getTextInfo(mode=None, docinfo=docinfo)
567	# include list of pages TODO: do we need this always?
568	docinfo = self.getTextInfo(mode='pages', docinfo=docinfo)
569
570	# bib info
571	bib = self.metadataService.getBibData(dom=metaDom)
572	if bib:
573	# save extended version as 'bibx' TODO: ugly
574	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
575	if len(bibx) == 1:
576	# unwrap list if possible
577	bibx = bibx[0]
578
579	docinfo['bibx'] = bibx
580	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
581	else:
582	# no bib - try info.xml
583	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
584
585	# auth info
586	access = self.metadataService.getAccessData(dom=metaDom)
587	if access:
588	docinfo = self.getDocinfoFromAccess(docinfo, access)
589
590	# attribution info
591	attribution = self.metadataService.getAttributionData(dom=metaDom)
592	if attribution:
593	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
594	docinfo['attribution'] = attribution
595
596	# copyright info
597	copyright = self.metadataService.getCopyrightData(dom=metaDom)
598	if copyright:
599	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
600	docinfo['copyright'] = copyright
601
602	# DRI (permanent ID)
603	dri = self.metadataService.getDRI(dom=metaDom, type='mpiwg')
604	if dri:
605	docinfo['DRI'] = dri
606
607	# (presentation) context
608	ctx = self.metadataService.getContextData(dom=metaDom, all=True)
609	if ctx:
610	logging.debug("getcontext: ctx=%s"%repr(ctx))
611	docinfo['presentationContext'] = ctx
612
613	# image path
614	if mode != 'texttool':
615	# override image path from texttool with url parameter TODO: how about mode=auto?
616	docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
617
618	# check numPages
619	if docinfo.get('numPages', 0) == 0:
620	# number of images from digilib
621	if docinfo.get('imagePath', None):
622	imgpath = docinfo['imagePath'].replace('/mpiwg/online', '', 1)
623	logging.debug("imgpath=%s"%imgpath)
624	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, imgpath)
625	docinfo = self.getDocinfoFromDigilib(docinfo, imgpath)
626	else:
627	# imagePath still missing? try "./pageimg"
628	imgPath = os.path.join(docUrl, 'pageimg')
629	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
630	if docinfo.get('numPages', 0) > 0:
631	# there are pages
632	docinfo['imagePath'] = imgPath
633	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, docinfo['imagePath'])
634
635	# check numPages
636	if docinfo.get('numPages', 0) == 0:
637	if docinfo.get('numTextPages', 0) > 0:
638	# replace with numTextPages (text-only?)
639	docinfo['numPages'] = docinfo['numTextPages']
640
641	# min and max page no
642	docinfo['minPageNo'] = docinfo.get('minPageNo', 1)
643	docinfo['maxPageNo'] = docinfo.get('maxPageNo', docinfo['numPages'])
644
645	# normalize path
646	if 'imagePath' in docinfo and not docinfo['imagePath'].startswith('/'):
647	docinfo['imagePath'] = '/' + docinfo['imagePath']
648
649	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
650	# store in session
651	self.REQUEST.SESSION['docinfo'] = docinfo
652	return docinfo
653
654
655	def getDocinfoFromResource(self, docinfo, resource):
656	"""reads contents of resource element into docinfo"""
657	docName = resource.get('name', None)
658	docinfo['documentName'] = docName
659	docPath = resource.get('archive-path', None)
660	if docPath:
661	# clean up document path
662	if docPath[0] != '/':
663	docPath = '/' + docPath
664
665	if docName and (not docPath.endswith(docName)):
666	docPath += "/" + docName
667
668	else:
669	# use docUrl as docPath
670	docUrl = docinfo['documentURL']
671	if not docUrl.startswith('http:'):
672	docPath = docUrl
673	if docPath:
674	# fix URLs starting with /mpiwg/online
675	docPath = docPath.replace('/mpiwg/online', '', 1)
676
677	docinfo['documentPath'] = docPath
678	return docinfo
679
680	def getDocinfoFromTexttool(self, docinfo, texttool):
681	"""reads contents of texttool element into docinfo"""
682	logging.debug("texttool=%s"%repr(texttool))
683	# unpack list if necessary
684	if isinstance(texttool, list):
685	texttool = texttool[0]
686
687	# image dir
688	imageDir = getMDText(texttool.get('image', None))
689	docPath = getMDText(docinfo.get('documentPath', None))
690	if imageDir:
691	if imageDir.startswith('/'):
692	# absolute path
693	imageDir = imageDir.replace('/mpiwg/online', '', 1)
694	docinfo['imagePath'] = imageDir
695
696	elif docPath:
697	# relative path
698	imageDir = os.path.join(docPath, imageDir)
699	imageDir = imageDir.replace('/mpiwg/online', '', 1)
700	docinfo['imagePath'] = imageDir
701
702	# start and end page (for subdocuments of other documents)
703	imgStartNo = getMDText(texttool.get('image-start-no', None))
704	minPageNo = getInt(imgStartNo, 1)
705	docinfo['minPageNo'] = minPageNo
706
707	imgEndNo = getMDText(texttool.get('image-end-no', None))
708	if imgEndNo:
709	docinfo['maxPageNo'] = getInt(imgEndNo)
710
711	# old style text URL
712	textUrl = getMDText(texttool.get('text', None))
713	if textUrl and docPath:
714	if urlparse.urlparse(textUrl)[0] == "": #keine url
715	textUrl = os.path.join(docPath, textUrl)
716
717	docinfo['textURL'] = textUrl
718
719	# new style text-url-path (can be more than one with "repository" attribute)
720	textUrlNode = texttool.get('text-url-path', None)
721	if not isinstance(textUrlNode, list):
722	textUrlNode = [textUrlNode]
723
724	for tun in textUrlNode:
725	textUrl = getMDText(tun)
726	if textUrl:
727	textUrlAtts = tun.get('@attr')
728	if (textUrlAtts and 'repository' in textUrlAtts):
729	textRepo = textUrlAtts['repository']
730	# use matching repository
731	if self.getRepositoryType() == textRepo:
732	docinfo['textURLPath'] = textUrl
733	docinfo['textURLRepository'] = textRepo
734	break
735
736	else:
737	# no repo attribute - use always
738	docinfo['textURLPath'] = textUrl
739
740	# page flow
741	docinfo['pageFlow'] = getMDText(texttool.get('page-flow', 'ltr'))
742
743	# odd pages are left
744	docinfo['oddPage'] = getMDText(texttool.get('odd-scan-position', 'left'))
745
746	# number of title page (default 1)
747	docinfo['titlePage'] = getMDText(texttool.get('title-scan-no', minPageNo))
748
749	# old presentation stuff
750	presentation = getMDText(texttool.get('presentation', None))
751	if presentation and docPath:
752	if presentation.startswith('http:'):
753	docinfo['presentationUrl'] = presentation
754	else:
755	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
756
757	# make sure we have at least fake DC data
758	if 'creator' not in docinfo:
759	docinfo['creator'] = '[no author found]'
760
761	if 'title' not in docinfo:
762	docinfo['title'] = '[no title found]'
763
764	if 'date' not in docinfo:
765	docinfo['date'] = '[no date found]'
766
767	return docinfo
768
769	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
770	"""reads contents of bib element into docinfo"""
771	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
772	# put all raw bib fields in dict "bib"
773	docinfo['bib'] = bib
774	bibtype = bib.get('@type', None)
775	docinfo['bibType'] = bibtype
776	# also store DC metadata for convenience
777	dc = self.metadataService.getDCMappedData(bib)
778	docinfo['creator'] = dc.get('creator','')
779	docinfo['title'] = dc.get('title','')
780	docinfo['date'] = dc.get('date','')
781	return docinfo
782
783	def getDocinfoFromAccess(self, docinfo, acc):
784	"""reads contents of access element into docinfo"""
785	#TODO: also read resource type
786	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
787	try:
788	acctype = acc['@attr']['type']
789	if acctype:
790	access=acctype
791	if access in ['group', 'institution']:
792	access = acc['name'].lower()
793
794	docinfo['accessType'] = access
795
796	except:
797	pass
798
799	return docinfo
800
801	def getDocinfoFromDigilib(self, docinfo, path):
802	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?fn="+path
803	# fetch data
804	txt = getHttpData(infoUrl)
805	if not txt:
806	logging.error("Unable to get dir-info from %s"%(infoUrl))
807	return docinfo
808
809	dom = ET.fromstring(txt)
810	dir = dom
811	# save size
812	size = dir.findtext('size')
813	logging.debug("getDocinfoFromDigilib: size=%s"%size)
814	if size:
815	docinfo['numPages'] = int(size)
816	else:
817	docinfo['numPages'] = 0
818	return docinfo
819
820	# save list of image names and numbers
821	imgNames = {}
822	imgIndexes = {}
823	for f in dir:
824	fn = f.findtext('name')
825	pn = getInt(f.findtext('index'))
826	imgNames[fn] = pn
827	imgIndexes[pn] = fn
828
829	docinfo['imgFileNames'] = imgNames
830	docinfo['imgFileIndexes'] = imgIndexes
831	return docinfo
832
833
834	def getDocinfoFromPresentationInfoXml(self,docinfo):
835	"""gets DC-like bibliographical information from the presentation entry in texttools"""
836	url = docinfo.get('presentationUrl', None)
837	if not url:
838	logging.error("getDocinfoFromPresentation: no URL!")
839	return docinfo
840
841	dom = None
842	metaUrl = None
843	if url.startswith("http://"):
844	# real URL
845	metaUrl = url
846	else:
847	# online path
848	server=self.digilibBaseUrl+"/servlet/Texter?fn="
849	metaUrl=server+url
850
851	txt=getHttpData(metaUrl)
852	if txt is None:
853	logging.error("Unable to read info.xml from %s"%(url))
854	return docinfo
855
856	dom = ET.fromstring(txt)
857	docinfo['creator']=getText(dom.find(".//author"))
858	docinfo['title']=getText(dom.find(".//title"))
859	docinfo['date']=getText(dom.find(".//date"))
860	return docinfo
861
862
863	def getPageinfo(self, pn=None, pf=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
864	"""returns pageinfo with the given parameters"""
865	logging.debug("getPageInfo(pn=%s, pf=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(pn,pf,start,rows,cols,viewMode,viewLayer,tocMode))
866	pageinfo = {}
867	pageinfo['viewMode'] = viewMode
868	# split viewLayer if necessary
869	if isinstance(viewLayer,basestring):
870	viewLayer = viewLayer.split(',')
871
872	if isinstance(viewLayer, list):
873	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
874	# save (unique) list in viewLayers
875	seen = set()
876	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
877	pageinfo['viewLayers'] = viewLayers
878	# stringify viewLayer
879	viewLayer = ','.join(viewLayers)
880	else:
881	#create list
882	pageinfo['viewLayers'] = [viewLayer]
883
884	pageinfo['viewLayer'] = viewLayer
885	pageinfo['tocMode'] = tocMode
886
887	minPageNo = docinfo.get('minPageNo', 1)
888
889	# pf takes precedence over pn
890	if pf:
891	pageinfo['pf'] = pf
892	pn = getPnForPf(docinfo, pf)
893	# replace pf in request params (used for creating new URLs)
894	self.REQUEST.form.pop('pf', None)
895	self.REQUEST.form['pn'] = pn
896	else:
897	pn = getInt(pn, minPageNo)
898	pf = getPfForPn(docinfo, pn)
899	pageinfo['pf'] = pf
900
901	pageinfo['pn'] = pn
902	rows = int(rows or self.thumbrows)
903	pageinfo['rows'] = rows
904	cols = int(cols or self.thumbcols)
905	pageinfo['cols'] = cols
906	grpsize = cols * rows
907	pageinfo['groupsize'] = grpsize
908	# if start is empty use one around pn
909	grouppn = math.ceil(float(pn)/float(grpsize))*grpsize-(grpsize-1)
910	# but not smaller than minPageNo
911	start = getInt(start, max(grouppn, minPageNo))
912	pageinfo['start'] = start
913	# get number of pages
914	numPages = int(docinfo.get('numPages', 0))
915	if numPages == 0:
916	# try numTextPages
917	numPages = docinfo.get('numTextPages', 0)
918	if numPages != 0:
919	docinfo['numPages'] = numPages
920
921	maxPageNo = docinfo.get('maxPageNo', numPages)
922	logging.debug("minPageNo=%s maxPageNo=%s start=%s numPages=%s"%(minPageNo,maxPageNo,start,numPages))
923	np = maxPageNo
924
925	# cache table of contents
926	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
927	pageinfo['numgroups'] = int(np / grpsize)
928	if np % grpsize > 0:
929	pageinfo['numgroups'] += 1
930
931	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
932	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
933	# add zeroth page for two columns
934	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
935	pageinfo['pageZero'] = pageZero
936	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=minPageNo, maxIdx=np)
937	# more page parameters
938	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
939	if docinfo.get('pageNumbers'):
940	# get original page numbers
941	pageNumber = docinfo['pageNumbers'].get(pn, None)
942	if pageNumber is not None:
943	pageinfo['pageNumberOrig'] = pageNumber['no']
944	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
945
946	# cache search results
947	query = self.REQUEST.get('query',None)
948	pageinfo['query'] = query
949	if query and viewMode == 'text':
950	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
951	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
952	pageinfo['queryType'] = queryType
953	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
954	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
955
956	# highlighting
957	highlightQuery = self.REQUEST.get('highlightQuery', None)
958	if highlightQuery:
959	pageinfo['highlightQuery'] = highlightQuery
960	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
961	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
962
963	return pageinfo
964
965
966	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
967	"""returns dict with array of page information for one screenfull of thumbnails"""
968	logging.debug("getPageBatch start=%s minIdx=%s maxIdx=%s"%(start,minIdx,maxIdx))
969	batch = {}
970	grpsize = rows * cols
971	if maxIdx == 0:
972	maxIdx = start + grpsize
973
974	np = maxIdx - minIdx + 1
975	nb = int(math.ceil(np / float(grpsize)))
976	# list of all batch start and end points
977	batches = []
978	if pageZero:
979	ofs = minIdx - 1
980	else:
981	ofs = minIdx
982
983	for i in range(nb):
984	s = i * grpsize + ofs
985	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
986	batches.append({'start':s, 'end':e})
987
988	batch['batches'] = batches
989
990	pages = []
991	if pageZero and start == minIdx:
992	# correct beginning
993	idx = minIdx - 1
994	else:
995	idx = start
996
997	for r in range(rows):
998	row = []
999	for c in range(cols):
1000	if idx < minIdx or idx > maxIdx:
1001	page = {'idx':None}
1002	else:
1003	page = {'idx':idx}
1004
1005	idx += 1
1006	if pageFlowLtr:
1007	row.append(page)
1008	else:
1009	row.insert(0, page)
1010
1011	pages.append(row)
1012
1013	if start > minIdx:
1014	batch['prevStart'] = max(start - grpsize, minIdx)
1015	else:
1016	batch['prevStart'] = None
1017
1018	if start + grpsize <= maxIdx:
1019	batch['nextStart'] = start + grpsize
1020	else:
1021	batch['nextStart'] = None
1022
1023	batch['pages'] = pages
1024	batch['first'] = minIdx
1025	batch['last'] = maxIdx
1026	return batch
1027
1028	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
1029	"""returns dict with information for one screenfull of data."""
1030	batch = {}
1031	if end == 0:
1032	end = start + size
1033
1034	nb = int(math.ceil(end / float(size)))
1035	# list of all batch start and end points
1036	batches = []
1037	for i in range(nb):
1038	s = i * size + 1
1039	e = min((i + 1) * size, end)
1040	batches.append({'start':s, 'end':e})
1041
1042	batch['batches'] = batches
1043	# list of elements in this batch
1044	this = []
1045	j = 0
1046	for i in range(start, min(start+size, end+1)):
1047	if data:
1048	if fullData:
1049	d = data.get(i, None)
1050	else:
1051	d = data.get(j, None)
1052	j += 1
1053
1054	else:
1055	d = i+1
1056
1057	this.append(d)
1058
1059	batch['this'] = this
1060	if start > 1:
1061	batch['prevStart'] = max(start - size, 1)
1062	else:
1063	batch['prevStart'] = None
1064
1065	if start + size < end:
1066	batch['nextStart'] = start + size
1067	else:
1068	batch['nextStart'] = None
1069
1070	batch['first'] = start
1071	batch['last'] = end
1072	return batch
1073
1074
1075	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
1076	"""returns list of groups {name:, id:} on the annotation server for the user"""
1077	groups = []
1078	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
1079	data = getHttpData(url=groupsUrl, noExceptions=True)
1080	if data:
1081	res = json.loads(data)
1082	rows = res.get('rows', None)
1083	if rows is None:
1084	return groups
1085	for r in rows:
1086	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
1087
1088	return groups
1089
1090
1091	security.declareProtected('View management screens','changeDocumentViewerForm')
1092	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1093
1094	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1095	"""init document viewer"""
1096	self.title=title
1097	self.digilibBaseUrl = digilibBaseUrl
1098	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
1099	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
1100	self.thumbrows = thumbrows
1101	self.thumbcols = thumbcols
1102	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1103	try:
1104	# assume MetaDataFolder instance is called metadata
1105	self.metadataService = getattr(self, 'metadata')
1106	except Exception, e:
1107	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1108
1109	self.setAvailableLayers(availableLayers)
1110
1111	if RESPONSE is not None:
1112	RESPONSE.redirect('manage_main')
1113
1114	def manage_AddDocumentViewerForm(self):
1115	"""add the viewer form"""
1116	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1117	return pt()
1118
1119	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1120	"""add the viewer"""
1121	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1122	self._setObject(id,newObj)
1123
1124	if RESPONSE is not None:
1125	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: