Context Navigation

source: documentViewer/documentViewer.py @ 587:6000c7e24d8a

Last change on this file since 587:6000c7e24d8a was 587:6000c7e24d8a, checked in by casties, 11 years ago
new parameter "pf" to specify image file name. (still some issues)
File size: 41.5 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def getMDText(node):
37	"""returns the @text content from the MetaDataProvider metadata node"""
38	if isinstance(node, dict):
39	return node.get('@text', None)
40
41	return node
42
43	def getParentPath(path, cnt=1):
44	"""returns pathname shortened by cnt"""
45	# make sure path doesn't end with /
46	path = path.rstrip('/')
47	# split by /, shorten, and reassemble
48	return '/'.join(path.split('/')[0:-cnt])
49
50	def getPnForPf(docinfo, pf):
51	"""returns image number for image file name or 0"""
52	if 'imgFileNames' in docinfo:
53	pn = docinfo['imgFileNames'].get(pf, None)
54	if pn is None:
55	# try to cut extension
56	xi = pf.rfind('.')
57	if xi > 0:
58	pf = pf[:xi]
59	# try again, else return 0
60	pn = docinfo['imgFileNames'].get(pf, 0)
61
62	return pn
63
64	return 0
65
66
67	##
68	## documentViewer class
69	##
70	class documentViewer(Folder):
71	"""document viewer"""
72	meta_type="Document viewer"
73
74	security=ClassSecurityInfo()
75	manage_options=Folder.manage_options+(
76	{'label':'Configuration','action':'changeDocumentViewerForm'},
77	)
78
79	metadataService = None
80	"""MetaDataFolder instance"""
81
82
83	#
84	# templates and forms
85	#
86	# viewMode templates
87	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
88	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
89	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
90	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
91	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
92	viewer_indexonly = PageTemplateFile('zpt/viewer_indexonly', globals())
93	# available layer types (annotator not default)
94	builtinLayers = {'text': ['dict','search','gis'],
95	'xml': None, 'images': None, 'index': ['extended']}
96	availableLayers = builtinLayers;
97	# layer templates
98	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
99	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
100	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
101	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
102	layer_text_pundit = PageTemplateFile('zpt/layer_text_pundit', globals())
103	layer_images_annotator = PageTemplateFile('zpt/layer_images_annotator', globals())
104	layer_index_extended = PageTemplateFile('zpt/layer_index_extended', globals())
105	# toc templates
106	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
107	toc_text = PageTemplateFile('zpt/toc_text', globals())
108	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
109	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
110	toc_handwritten = PageTemplateFile('zpt/toc_handwritten', globals())
111	toc_none = PageTemplateFile('zpt/toc_none', globals())
112	# other templates
113	common_template = PageTemplateFile('zpt/common_template', globals())
114	info_xml = PageTemplateFile('zpt/info_xml', globals())
115	docuviewer_css = ImageFile('css/docuviewer.css',globals())
116	# make docuviewer_css refreshable for development
117	docuviewer_css.index_html = refreshingImageFileIndexHtml
118	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
119	# make docuviewer_ie_css refreshable for development
120	#docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
121	jquery_js = ImageFile('js/jquery.js',globals())
122
123
124	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
125	"""init document viewer"""
126	self.id=id
127	self.title=title
128	self.thumbcols = thumbcols
129	self.thumbrows = thumbrows
130	# authgroups is list of authorized groups (delimited by ,)
131	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
132	# create template folder so we can always use template.something
133
134	templateFolder = Folder('template')
135	self['template'] = templateFolder # Zope-2.12 style
136	#self._setObject('template',templateFolder) # old style
137	try:
138	import MpdlXmlTextServer
139	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
140	templateFolder['fulltextclient'] = textServer
141	#templateFolder._setObject('fulltextclient',textServer)
142	except Exception, e:
143	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
144
145	try:
146	from Products.zogiLib.zogiLib import zogiLib
147	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
148	templateFolder['zogilib'] = zogilib
149	#templateFolder._setObject('zogilib',zogilib)
150	except Exception, e:
151	logging.error("Unable to create zogiLib for zogilib: "+str(e))
152
153	try:
154	# assume MetaDataFolder instance is called metadata
155	self.metadataService = getattr(self, 'metadata')
156	except Exception, e:
157	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
158
159	if digilibBaseUrl is not None:
160	self.digilibBaseUrl = digilibBaseUrl
161	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
162	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
163
164
165	# proxy text server methods to fulltextclient
166	def getTextPage(self, **args):
167	"""returns full text content of page"""
168	return self.template.fulltextclient.getTextPage(**args)
169
170	def getSearchResults(self, **args):
171	"""loads list of search results and stores XML in docinfo"""
172	return self.template.fulltextclient.getSearchResults(**args)
173
174	def getResultsPage(self, **args):
175	"""returns one page of the search results"""
176	return self.template.fulltextclient.getResultsPage(**args)
177
178	def getTextInfo(self, **args):
179	"""returns document info from the text server"""
180	return self.template.fulltextclient.getTextInfo(**args)
181
182	def getToc(self, **args):
183	"""loads table of contents and stores XML in docinfo"""
184	return self.template.fulltextclient.getToc(**args)
185
186	def getTocPage(self, **args):
187	"""returns one page of the table of contents"""
188	return self.template.fulltextclient.getTocPage(**args)
189
190	def getRepositoryType(self, **args):
191	"""get repository type"""
192	return self.template.fulltextclient.getRepositoryType(**args)
193
194	def getTextDownloadUrl(self, **args):
195	"""get list of gis places on one page"""
196	return self.template.fulltextclient.getTextDownloadUrl(**args)
197
198	def getPlacesOnPage(self, **args):
199	"""get list of gis places on one page"""
200	return self.template.fulltextclient.getPlacesOnPage(**args)
201
202	# Thumb list for CoolIris Plugin
203	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
204	security.declareProtected('View','thumbs_rss')
205	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
206	'''
207	view it
208	@param mode: defines how to access the document behind url
209	@param url: url which contains display information
210	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
211
212	'''
213
214	if not hasattr(self, 'template'):
215	# create template folder if it doesn't exist
216	self.manage_addFolder('template')
217
218	if not self.digilibBaseUrl:
219	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
220
221	docinfo = self.getDocinfo(mode=mode,url=url)
222	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
223	pageinfo = self.getPageinfo(start=start,pn=pn, docinfo=docinfo)
224	''' ZDES '''
225	pt = getattr(self.template, 'thumbs_main_rss')
226
227	if viewMode=="auto": # automodus gewaehlt
228	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
229	viewMode="text"
230	else:
231	viewMode="images"
232
233	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
234
235
236	security.declareProtected('View','index_html')
237	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode=None,start=None,pn=None,pf=None):
238	"""
239	show page
240	@param url: url which contains display information
241	@param mode: defines how to access the document behind url
242	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
243	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
244	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
245	"""
246
247	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s pf=%s"%(mode,url,viewMode,viewLayer,start,pn,pf))
248
249	if not hasattr(self, 'template'):
250	# this won't work
251	logging.error("template folder missing!")
252	return "ERROR: template folder missing!"
253
254	if not getattr(self, 'digilibBaseUrl', None):
255	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
256
257	# mode=filepath should not have toc-thumbs
258	if tocMode is None:
259	if mode == "filepath":
260	tocMode = "none"
261	else:
262	tocMode = "thumbs"
263
264	# docinfo: information about document (cached)
265	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
266
267	# userinfo: user settings (cached)
268	userinfo = self.getUserinfo()
269
270	# auto viewMode: text if there is a text else images
271	if viewMode=="auto":
272	if docinfo.get('textURLPath', None):
273	# docinfo.get('textURL', None) not implemented yet
274	viewMode = "text"
275	if viewLayer is None and 'viewLayer' not in userinfo:
276	# use layer dict as default
277	viewLayer = "dict"
278	else:
279	viewMode = "images"
280
281	elif viewMode == "text_dict":
282	# legacy fix
283	viewMode = "text"
284	viewLayer = "dict"
285
286	# safe viewLayer in userinfo
287	userinfo['viewLayer'] = viewLayer
288
289	# pageinfo: information about page (not cached)
290	pageinfo = self.getPageinfo(start=start, pn=pn, pf=pf, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
291
292	# get template /template/viewer_$viewMode
293	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
294	if pt is None:
295	logging.error("No template for viewMode=%s!"%viewMode)
296	# TODO: error page?
297	return "No template for viewMode=%s!"%viewMode
298
299	# and execute with parameters
300	return pt(docinfo=docinfo, pageinfo=pageinfo)
301
302	def getAvailableLayers(self):
303	"""returns dict with list of available layers per viewMode"""
304	return self.availableLayers
305
306	def findDigilibUrl(self):
307	"""try to get the digilib URL from zogilib"""
308	url = self.template.zogilib.getDLBaseUrl()
309	return url
310
311	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
312	"""returns URL to digilib Scaler with params"""
313	url = None
314	if docinfo is not None:
315	url = docinfo.get('imageURL', None)
316
317	if url is None:
318	url = self.digilibScalerUrl
319	if fn is None and docinfo is not None:
320	fn = docinfo.get('imagePath','')
321
322	url += "fn=%s"%fn
323
324	if pn:
325	url += "&pn=%s"%pn
326
327	url += "&dw=%s&dh=%s"%(dw,dh)
328	return url
329
330	def getDocumentViewerURL(self):
331	"""returns the URL of this instance"""
332	return self.absolute_url()
333
334	def getStyle(self, idx, selected, style=""):
335	"""returns a string with the given style and append 'sel' if idx == selected."""
336	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
337	if idx == selected:
338	return style + 'sel'
339	else:
340	return style
341
342	def getParams(self, param=None, val=None, params=None, duplicates=None):
343	"""returns dict with URL parameters.
344
345	Takes URL parameters and additionally param=val or dict params.
346	Deletes key if value is None."""
347	# copy existing request params
348	newParams=self.REQUEST.form.copy()
349	# change single param
350	if param is not None:
351	if val is None:
352	if newParams.has_key(param):
353	del newParams[param]
354	else:
355	newParams[param] = str(val)
356
357	# change more params
358	if params is not None:
359	for (k, v) in params.items():
360	if v is None:
361	# val=None removes param
362	if newParams.has_key(k):
363	del newParams[k]
364
365	else:
366	newParams[k] = v
367
368	if duplicates:
369	# eliminate lists (coming from duplicate keys)
370	for (k,v) in newParams.items():
371	if isinstance(v, list):
372	if duplicates == 'comma':
373	# make comma-separated list of non-empty entries
374	newParams[k] = ','.join([t for t in v if t])
375	elif duplicates == 'first':
376	# take first non-empty entry
377	newParams[k] = [t for t in v if t][0]
378
379	return newParams
380
381	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
382	"""returns URL to documentviewer with parameter param set to val or from dict params"""
383	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
384	# quote values and assemble into query string (not escaping '/')
385	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
386	if baseUrl is None:
387	baseUrl = self.getDocumentViewerURL()
388
389	url = "%s?%s"%(baseUrl, ps)
390	return url
391
392	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
393	"""link to documentviewer with parameter param set to val"""
394	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
395
396
397	def setAvailableLayers(self, newLayerString=None):
398	"""sets availableLayers to newLayerString or tries to autodetect available layers.
399	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
400	newLayerString is parsed as JSON."""
401	if newLayerString is not None:
402	try:
403	layers = json.loads(newLayerString)
404	if 'text' in layers and 'images' in layers:
405	self.availableLayers = layers
406	return
407	except:
408	pass
409
410	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
411
412	# start with builtin layers
413	self.availableLayers = self.builtinLayers.copy()
414	# add layers from templates
415	for t in self.template:
416	if t.startswith('layer_'):
417	try:
418	(x, m, l) = t.split('_', 3)
419	if m not in self.availableLayers:
420	# mode m doesn't exist -> new list
421	self.availableLayers[m] = [l]
422
423	else:
424	# m exists -> append
425	if l not in self.availableLayers[m]:
426	self.availableLayers[m].append()
427
428	except:
429	pass
430
431	def getAvailableLayersJson(self):
432	"""returns available layers as JSON string."""
433	return json.dumps(self.availableLayers)
434
435
436	def getInfo_xml(self,url,mode):
437	"""returns info about the document as XML"""
438	if not self.digilibBaseUrl:
439	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
440
441	docinfo = self.getDocinfo(mode=mode,url=url)
442	pt = getattr(self.template, 'info_xml')
443	return pt(docinfo=docinfo)
444
445	def getAuthenticatedUser(self, anon=None):
446	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
447	user = getSecurityManager().getUser()
448	if user is not None and user.getUserName() != "Anonymous User":
449	return user
450	else:
451	return anon
452
453	def isAccessible(self, docinfo):
454	"""returns if access to the resource is granted"""
455	access = docinfo.get('accessType', None)
456	logging.debug("documentViewer (accessOK) access type %s"%access)
457	if access == 'free':
458	logging.debug("documentViewer (accessOK) access is free")
459	return True
460
461	elif access is None or access in self.authgroups:
462	# only local access -- only logged in users
463	user = self.getAuthenticatedUser()
464	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
465	return (user is not None)
466
467	logging.error("documentViewer (accessOK) unknown access type %s"%access)
468	return False
469
470	def getUserinfo(self):
471	"""returns userinfo object"""
472	logging.debug("getUserinfo")
473	userinfo = {}
474	# look for cached userinfo in session
475	if self.REQUEST.SESSION.has_key('userinfo'):
476	userinfo = self.REQUEST.SESSION['userinfo']
477	# check if its still current?
478	else:
479	# store in session
480	self.REQUEST.SESSION['userinfo'] = userinfo
481
482	return userinfo
483
484	def getDocinfo(self, mode, url, tocMode=None):
485	"""returns docinfo depending on mode"""
486	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
487	# look for cached docinfo in session
488	if self.REQUEST.SESSION.has_key('docinfo'):
489	docinfo = self.REQUEST.SESSION['docinfo']
490	# check if its still current
491	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
492	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
493	return docinfo
494
495	# new docinfo
496	docinfo = {'mode': mode, 'url': url}
497	# add self url
498	docinfo['viewerUrl'] = self.getDocumentViewerURL()
499	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
500	docinfo['digilibScalerUrl'] = self.digilibScalerUrl
501	docinfo['digilibViewerUrl'] = self.digilibViewerUrl
502	# get index.meta DOM
503	docUrl = None
504	metaDom = None
505	if mode=="texttool":
506	# url points to document dir or index.meta
507	metaDom = self.metadataService.getDomFromPathOrUrl(url)
508	if metaDom is None:
509	raise IOError("Unable to find index.meta for mode=texttool!")
510
511	docUrl = url.replace('/index.meta', '')
512	if url.startswith('/mpiwg/online/'):
513	docUrl = url.replace('/mpiwg/online/', '', 1)
514
515	elif mode=="imagepath":
516	# url points to folder with images, index.meta optional
517	# asssume index.meta in parent dir
518	docUrl = getParentPath(url)
519	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
520	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
521
522	elif mode=="filepath":
523	# url points to image file, index.meta optional
524	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url)
525	docinfo['numPages'] = 1
526	# asssume index.meta is two path segments up
527	docUrl = getParentPath(url, 2)
528	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
529
530	else:
531	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
532	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
533
534	docinfo['documentUrl'] = docUrl
535	# process index.meta contents
536	if metaDom is not None and metaDom.tag == 'resource':
537	# document directory name and path
538	resource = self.metadataService.getResourceData(dom=metaDom)
539	if resource:
540	docinfo = self.getDocinfoFromResource(docinfo, resource)
541
542	# texttool info
543	texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
544	if texttool:
545	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
546	# document info from full text server
547	if docinfo.get('textURLPath', None):
548	docinfo = self.getTextInfo(mode=None, docinfo=docinfo)
549	# include list of pages TODO: do we need this always?
550	docinfo = self.getTextInfo(mode='pages', docinfo=docinfo)
551
552	# bib info
553	bib = self.metadataService.getBibData(dom=metaDom)
554	if bib:
555	# save extended version as 'bibx' TODO: ugly
556	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
557	if len(bibx) == 1:
558	# unwrap list if possible
559	bibx = bibx[0]
560
561	docinfo['bibx'] = bibx
562	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
563	else:
564	# no bib - try info.xml
565	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
566
567	# auth info
568	access = self.metadataService.getAccessData(dom=metaDom)
569	if access:
570	docinfo = self.getDocinfoFromAccess(docinfo, access)
571
572	# attribution info
573	attribution = self.metadataService.getAttributionData(dom=metaDom)
574	if attribution:
575	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
576	docinfo['attribution'] = attribution
577
578	# copyright info
579	copyright = self.metadataService.getCopyrightData(dom=metaDom)
580	if copyright:
581	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
582	docinfo['copyright'] = copyright
583
584	# DRI (permanent ID)
585	dri = self.metadataService.getDRI(dom=metaDom, type='mpiwg')
586	if dri:
587	docinfo['DRI'] = dri
588
589	# (presentation) context
590	ctx = self.metadataService.getContextData(dom=metaDom, all=True)
591	if ctx:
592	logging.debug("getcontext: ctx=%s"%repr(ctx))
593	docinfo['presentationContext'] = ctx
594
595	# image path
596	if mode != 'texttool':
597	# override image path from texttool with url parameter TODO: how about mode=auto?
598	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
599
600	# check numPages
601	if docinfo.get('numPages', 0) == 0:
602	# number of images from digilib
603	if docinfo.get('imagePath', None):
604	imgpath = docinfo['imagePath'].replace('/mpiwg/online/', '', 1)
605	logging.debug("imgpath=%s"%imgpath)
606	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, imgpath)
607	docinfo = self.getDocinfoFromDigilib(docinfo, imgpath)
608	else:
609	# imagePath still missing? try "./pageimg"
610	imgPath = os.path.join(docUrl, 'pageimg')
611	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
612	if docinfo.get('numPages', 0) > 0:
613	# there are pages
614	docinfo['imagePath'] = imgPath
615	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, docinfo['imagePath'])
616
617	# check numPages
618	if docinfo.get('numPages', 0) == 0:
619	if docinfo.get('numTextPages', 0) > 0:
620	# replace with numTextPages (text-only?)
621	docinfo['numPages'] = docinfo['numTextPages']
622
623	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
624	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
625	# store in session
626	self.REQUEST.SESSION['docinfo'] = docinfo
627	return docinfo
628
629
630	def getDocinfoFromResource(self, docinfo, resource):
631	"""reads contents of resource element into docinfo"""
632	docName = resource.get('name', None)
633	docinfo['documentName'] = docName
634	docPath = resource.get('archive-path', None)
635	if docPath:
636	# clean up document path
637	if docPath[0] != '/':
638	docPath = '/' + docPath
639
640	if docName and (not docPath.endswith(docName)):
641	docPath += "/" + docName
642
643	else:
644	# use docUrl as docPath
645	docUrl = docinfo['documentURL']
646	if not docUrl.startswith('http:'):
647	docPath = docUrl
648	if docPath:
649	# fix URLs starting with /mpiwg/online
650	docPath = docPath.replace('/mpiwg/online', '', 1)
651
652	docinfo['documentPath'] = docPath
653	return docinfo
654
655	def getDocinfoFromTexttool(self, docinfo, texttool):
656	"""reads contents of texttool element into docinfo"""
657	logging.debug("texttool=%s"%repr(texttool))
658	# unpack list if necessary
659	if isinstance(texttool, list):
660	texttool = texttool[0]
661
662	# image dir
663	imageDir = getMDText(texttool.get('image', None))
664	docPath = getMDText(docinfo.get('documentPath', None))
665	if imageDir and docPath:
666	imageDir = os.path.join(docPath, imageDir)
667	imageDir = imageDir.replace('/mpiwg/online', '', 1)
668	docinfo['imagePath'] = imageDir
669
670	# old style text URL
671	textUrl = getMDText(texttool.get('text', None))
672	if textUrl and docPath:
673	if urlparse.urlparse(textUrl)[0] == "": #keine url
674	textUrl = os.path.join(docPath, textUrl)
675
676	docinfo['textURL'] = textUrl
677
678	# new style text-url-path (can be more than one with "repository" attribute)
679	textUrlNode = texttool.get('text-url-path', None)
680	if not isinstance(textUrlNode, list):
681	textUrlNode = [textUrlNode]
682
683	for tun in textUrlNode:
684	textUrl = getMDText(tun)
685	if textUrl:
686	textUrlAtts = tun.get('@attr')
687	if (textUrlAtts and 'repository' in textUrlAtts):
688	textRepo = textUrlAtts['repository']
689	# use matching repository
690	if self.getRepositoryType() == textRepo:
691	docinfo['textURLPath'] = textUrl
692	docinfo['textURLRepository'] = textRepo
693	break
694
695	else:
696	# no repo attribute - use always
697	docinfo['textURLPath'] = textUrl
698
699	# page flow
700	docinfo['pageFlow'] = getMDText(texttool.get('page-flow', 'ltr'))
701
702	# odd pages are left
703	docinfo['oddPage'] = getMDText(texttool.get('odd-scan-position', 'left'))
704
705	# number of title page (default 1)
706	docinfo['titlePage'] = getMDText(texttool.get('title-scan-no', 1))
707
708	# old presentation stuff
709	presentation = getMDText(texttool.get('presentation', None))
710	if presentation and docPath:
711	if presentation.startswith('http:'):
712	docinfo['presentationUrl'] = presentation
713	else:
714	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
715
716	return docinfo
717
718	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
719	"""reads contents of bib element into docinfo"""
720	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
721	# put all raw bib fields in dict "bib"
722	docinfo['bib'] = bib
723	bibtype = bib.get('@type', None)
724	docinfo['bibType'] = bibtype
725	# also store DC metadata for convenience
726	dc = self.metadataService.getDCMappedData(bib)
727	docinfo['creator'] = dc.get('creator','')
728	docinfo['title'] = dc.get('title','')
729	docinfo['date'] = dc.get('date','')
730	return docinfo
731
732	def getDocinfoFromAccess(self, docinfo, acc):
733	"""reads contents of access element into docinfo"""
734	#TODO: also read resource type
735	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
736	try:
737	acctype = acc['@attr']['type']
738	if acctype:
739	access=acctype
740	if access in ['group', 'institution']:
741	access = acc['name'].lower()
742
743	docinfo['accessType'] = access
744
745	except:
746	pass
747
748	return docinfo
749
750	def getDocinfoFromDigilib(self, docinfo, path):
751	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?fn="+path
752	# fetch data
753	txt = getHttpData(infoUrl)
754	if not txt:
755	logging.error("Unable to get dir-info from %s"%(infoUrl))
756	return docinfo
757
758	dom = ET.fromstring(txt)
759	dir = dom
760	# save size
761	size = dir.findtext('size')
762	logging.debug("getDocinfoFromDigilib: size=%s"%size)
763	if size:
764	docinfo['numPages'] = int(size)
765	else:
766	docinfo['numPages'] = 0
767	return docinfo
768
769	# save list of image names and numbers
770	imgNames = {}
771	for f in dir:
772	fn = f.findtext('name')
773	pn = f.findtext('index')
774	imgNames[fn] = getInt(pn)
775
776	docinfo['imgFileNames'] = imgNames
777	return docinfo
778
779
780	def getDocinfoFromPresentationInfoXml(self,docinfo):
781	"""gets DC-like bibliographical information from the presentation entry in texttools"""
782	url = docinfo.get('presentationUrl', None)
783	if not url:
784	logging.error("getDocinfoFromPresentation: no URL!")
785	return docinfo
786
787	dom = None
788	metaUrl = None
789	if url.startswith("http://"):
790	# real URL
791	metaUrl = url
792	else:
793	# online path
794	server=self.digilibBaseUrl+"/servlet/Texter?fn="
795	metaUrl=server+url
796
797	txt=getHttpData(metaUrl)
798	if txt is None:
799	logging.error("Unable to read info.xml from %s"%(url))
800	return docinfo
801
802	dom = ET.fromstring(txt)
803	docinfo['creator']=getText(dom.find(".//author"))
804	docinfo['title']=getText(dom.find(".//title"))
805	docinfo['date']=getText(dom.find(".//date"))
806	return docinfo
807
808
809	def getPageinfo(self, pn=None, pf=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
810	"""returns pageinfo with the given parameters"""
811	logging.debug("getPageInfo(pn=%s, pf=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(pn,pf,start,rows,cols,viewMode,viewLayer,tocMode))
812	pageinfo = {}
813	pageinfo['viewMode'] = viewMode
814	# split viewLayer if necessary
815	if isinstance(viewLayer,basestring):
816	viewLayer = viewLayer.split(',')
817
818	if isinstance(viewLayer, list):
819	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
820	# save (unique) list in viewLayers
821	seen = set()
822	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
823	pageinfo['viewLayers'] = viewLayers
824	# stringify viewLayer
825	viewLayer = ','.join(viewLayers)
826	else:
827	#create list
828	pageinfo['viewLayers'] = [viewLayer]
829
830	pageinfo['viewLayer'] = viewLayer
831	pageinfo['tocMode'] = tocMode
832
833	# TODO: unify current and pn!
834	#pageinfo['current'] = current
835	# pf takes precedence over pn
836	if pf:
837	pageinfo['pf'] = pf
838	pn = getPnForPf(docinfo, pf)
839	else:
840	pn = getInt(pn, 1)
841
842	pageinfo['pn'] = pn
843	rows = int(rows or self.thumbrows)
844	pageinfo['rows'] = rows
845	cols = int(cols or self.thumbcols)
846	pageinfo['cols'] = cols
847	grpsize = cols * rows
848	pageinfo['groupsize'] = grpsize
849	# is start is empty use one around pn
850	start = getInt(start, default=(math.ceil(float(pn)/float(grpsize))*grpsize-(grpsize-1)))
851	# int(current / grpsize) * grpsize +1))
852	pageinfo['start'] = start
853	# get number of pages
854	np = int(docinfo.get('numPages', 0))
855	if np == 0:
856	# try numTextPages
857	np = docinfo.get('numTextPages', 0)
858	if np != 0:
859	docinfo['numPages'] = np
860
861	# cache table of contents
862	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
863	pageinfo['numgroups'] = int(np / grpsize)
864	if np % grpsize > 0:
865	pageinfo['numgroups'] += 1
866
867	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
868	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
869	# add zeroth page for two columns
870	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
871	pageinfo['pageZero'] = pageZero
872	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
873	# more page parameters
874	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
875	if docinfo.get('pageNumbers'):
876	# get original page numbers
877	pageNumber = docinfo['pageNumbers'].get(pn, None)
878	if pageNumber is not None:
879	pageinfo['pageNumberOrig'] = pageNumber['no']
880	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
881
882	# cache search results
883	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
884	query = self.REQUEST.get('query',None)
885	pageinfo['query'] = query
886	if query:
887	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
888	pageinfo['queryType'] = queryType
889	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
890	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
891
892	# highlighting
893	highlightQuery = self.REQUEST.get('highlightQuery', None)
894	if highlightQuery:
895	pageinfo['highlightQuery'] = highlightQuery
896	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
897	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
898
899	return pageinfo
900
901
902	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
903	"""returns dict with array of page informations for one screenfull of thumbnails"""
904	batch = {}
905	grpsize = rows * cols
906	if maxIdx == 0:
907	maxIdx = start + grpsize
908
909	nb = int(math.ceil(maxIdx / float(grpsize)))
910	# list of all batch start and end points
911	batches = []
912	if pageZero:
913	ofs = 0
914	else:
915	ofs = 1
916
917	for i in range(nb):
918	s = i * grpsize + ofs
919	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
920	batches.append({'start':s, 'end':e})
921
922	batch['batches'] = batches
923
924	pages = []
925	if pageZero and start == 1:
926	# correct beginning
927	idx = 0
928	else:
929	idx = start
930
931	for r in range(rows):
932	row = []
933	for c in range(cols):
934	if idx < minIdx or idx > maxIdx:
935	page = {'idx':None}
936	else:
937	page = {'idx':idx}
938
939	idx += 1
940	if pageFlowLtr:
941	row.append(page)
942	else:
943	row.insert(0, page)
944
945	pages.append(row)
946
947	if start > 1:
948	batch['prevStart'] = max(start - grpsize, 1)
949	else:
950	batch['prevStart'] = None
951
952	if start + grpsize <= maxIdx:
953	batch['nextStart'] = start + grpsize
954	else:
955	batch['nextStart'] = None
956
957	batch['pages'] = pages
958	batch['first'] = minIdx
959	batch['last'] = maxIdx
960	return batch
961
962	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
963	"""returns dict with information for one screenfull of data."""
964	batch = {}
965	if end == 0:
966	end = start + size
967
968	nb = int(math.ceil(end / float(size)))
969	# list of all batch start and end points
970	batches = []
971	for i in range(nb):
972	s = i * size + 1
973	e = min((i + 1) * size, end)
974	batches.append({'start':s, 'end':e})
975
976	batch['batches'] = batches
977	# list of elements in this batch
978	this = []
979	j = 0
980	for i in range(start, min(start+size, end+1)):
981	if data:
982	if fullData:
983	d = data.get(i, None)
984	else:
985	d = data.get(j, None)
986	j += 1
987
988	else:
989	d = i+1
990
991	this.append(d)
992
993	batch['this'] = this
994	if start > 1:
995	batch['prevStart'] = max(start - size, 1)
996	else:
997	batch['prevStart'] = None
998
999	if start + size < end:
1000	batch['nextStart'] = start + size
1001	else:
1002	batch['nextStart'] = None
1003
1004	batch['first'] = start
1005	batch['last'] = end
1006	return batch
1007
1008
1009	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
1010	"""returns list of groups {name:, id:} on the annotation server for the user"""
1011	groups = []
1012	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
1013	data = getHttpData(url=groupsUrl, noExceptions=True)
1014	if data:
1015	res = json.loads(data)
1016	rows = res.get('rows', None)
1017	if rows is None:
1018	return groups
1019	for r in rows:
1020	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
1021
1022	return groups
1023
1024
1025	security.declareProtected('View management screens','changeDocumentViewerForm')
1026	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1027
1028	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1029	"""init document viewer"""
1030	self.title=title
1031	self.digilibBaseUrl = digilibBaseUrl
1032	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
1033	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
1034	self.thumbrows = thumbrows
1035	self.thumbcols = thumbcols
1036	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1037	try:
1038	# assume MetaDataFolder instance is called metadata
1039	self.metadataService = getattr(self, 'metadata')
1040	except Exception, e:
1041	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1042
1043	self.setAvailableLayers(availableLayers)
1044
1045	if RESPONSE is not None:
1046	RESPONSE.redirect('manage_main')
1047
1048	def manage_AddDocumentViewerForm(self):
1049	"""add the viewer form"""
1050	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1051	return pt()
1052
1053	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1054	"""add the viewer"""
1055	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1056	self._setObject(id,newObj)
1057
1058	if RESPONSE is not None:
1059	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: