Context Navigation

source: documentViewer/documentViewer.py @ 585:83eeed69793f

Last change on this file since 585:83eeed69793f was 585:83eeed69793f, checked in by casties, 11 years ago
new annotator layer for images.
File size: 40.7 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def getMDText(node):
37	"""returns the @text content from the MetaDataProvider metadata node"""
38	if isinstance(node, dict):
39	return node.get('@text', None)
40
41	return node
42
43	def getParentPath(path, cnt=1):
44	"""returns pathname shortened by cnt"""
45	# make sure path doesn't end with /
46	path = path.rstrip('/')
47	# split by /, shorten, and reassemble
48	return '/'.join(path.split('/')[0:-cnt])
49
50
51	##
52	## documentViewer class
53	##
54	class documentViewer(Folder):
55	"""document viewer"""
56	meta_type="Document viewer"
57
58	security=ClassSecurityInfo()
59	manage_options=Folder.manage_options+(
60	{'label':'Configuration','action':'changeDocumentViewerForm'},
61	)
62
63	metadataService = None
64	"""MetaDataFolder instance"""
65
66
67	#
68	# templates and forms
69	#
70	# viewMode templates
71	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
72	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
73	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
74	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
75	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
76	viewer_indexonly = PageTemplateFile('zpt/viewer_indexonly', globals())
77	# available layer types (annotator not default)
78	builtinLayers = {'text': ['dict','search','gis'],
79	'xml': None, 'images': None, 'index': ['extended']}
80	availableLayers = builtinLayers;
81	# layer templates
82	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
83	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
84	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
85	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
86	layer_text_pundit = PageTemplateFile('zpt/layer_text_pundit', globals())
87	layer_images_annotator = PageTemplateFile('zpt/layer_images_annotator', globals())
88	layer_index_extended = PageTemplateFile('zpt/layer_index_extended', globals())
89	# toc templates
90	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
91	toc_text = PageTemplateFile('zpt/toc_text', globals())
92	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
93	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
94	toc_handwritten = PageTemplateFile('zpt/toc_handwritten', globals())
95	toc_none = PageTemplateFile('zpt/toc_none', globals())
96	# other templates
97	common_template = PageTemplateFile('zpt/common_template', globals())
98	info_xml = PageTemplateFile('zpt/info_xml', globals())
99	docuviewer_css = ImageFile('css/docuviewer.css',globals())
100	# make docuviewer_css refreshable for development
101	docuviewer_css.index_html = refreshingImageFileIndexHtml
102	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
103	# make docuviewer_ie_css refreshable for development
104	#docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
105	jquery_js = ImageFile('js/jquery.js',globals())
106
107
108	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
109	"""init document viewer"""
110	self.id=id
111	self.title=title
112	self.thumbcols = thumbcols
113	self.thumbrows = thumbrows
114	# authgroups is list of authorized groups (delimited by ,)
115	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
116	# create template folder so we can always use template.something
117
118	templateFolder = Folder('template')
119	self['template'] = templateFolder # Zope-2.12 style
120	#self._setObject('template',templateFolder) # old style
121	try:
122	import MpdlXmlTextServer
123	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
124	templateFolder['fulltextclient'] = textServer
125	#templateFolder._setObject('fulltextclient',textServer)
126	except Exception, e:
127	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
128
129	try:
130	from Products.zogiLib.zogiLib import zogiLib
131	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
132	templateFolder['zogilib'] = zogilib
133	#templateFolder._setObject('zogilib',zogilib)
134	except Exception, e:
135	logging.error("Unable to create zogiLib for zogilib: "+str(e))
136
137	try:
138	# assume MetaDataFolder instance is called metadata
139	self.metadataService = getattr(self, 'metadata')
140	except Exception, e:
141	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
142
143	if digilibBaseUrl is not None:
144	self.digilibBaseUrl = digilibBaseUrl
145	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
146	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
147
148
149	# proxy text server methods to fulltextclient
150	def getTextPage(self, **args):
151	"""returns full text content of page"""
152	return self.template.fulltextclient.getTextPage(**args)
153
154	def getSearchResults(self, **args):
155	"""loads list of search results and stores XML in docinfo"""
156	return self.template.fulltextclient.getSearchResults(**args)
157
158	def getResultsPage(self, **args):
159	"""returns one page of the search results"""
160	return self.template.fulltextclient.getResultsPage(**args)
161
162	def getTextInfo(self, **args):
163	"""returns document info from the text server"""
164	return self.template.fulltextclient.getTextInfo(**args)
165
166	def getToc(self, **args):
167	"""loads table of contents and stores XML in docinfo"""
168	return self.template.fulltextclient.getToc(**args)
169
170	def getTocPage(self, **args):
171	"""returns one page of the table of contents"""
172	return self.template.fulltextclient.getTocPage(**args)
173
174	def getRepositoryType(self, **args):
175	"""get repository type"""
176	return self.template.fulltextclient.getRepositoryType(**args)
177
178	def getTextDownloadUrl(self, **args):
179	"""get list of gis places on one page"""
180	return self.template.fulltextclient.getTextDownloadUrl(**args)
181
182	def getPlacesOnPage(self, **args):
183	"""get list of gis places on one page"""
184	return self.template.fulltextclient.getPlacesOnPage(**args)
185
186	# Thumb list for CoolIris Plugin
187	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
188	security.declareProtected('View','thumbs_rss')
189	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
190	'''
191	view it
192	@param mode: defines how to access the document behind url
193	@param url: url which contains display information
194	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
195
196	'''
197
198	if not hasattr(self, 'template'):
199	# create template folder if it doesn't exist
200	self.manage_addFolder('template')
201
202	if not self.digilibBaseUrl:
203	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
204
205	docinfo = self.getDocinfo(mode=mode,url=url)
206	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
207	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
208	''' ZDES '''
209	pt = getattr(self.template, 'thumbs_main_rss')
210
211	if viewMode=="auto": # automodus gewaehlt
212	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
213	viewMode="text"
214	else:
215	viewMode="images"
216
217	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
218
219
220	security.declareProtected('View','index_html')
221	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode=None,start=1,pn=1):
222	"""
223	show page
224	@param url: url which contains display information
225	@param mode: defines how to access the document behind url
226	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
227	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
228	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
229	"""
230
231	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
232
233	if not hasattr(self, 'template'):
234	# this won't work
235	logging.error("template folder missing!")
236	return "ERROR: template folder missing!"
237
238	if not getattr(self, 'digilibBaseUrl', None):
239	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
240
241	# mode=filepath should not have toc-thumbs
242	if tocMode is None:
243	if mode == "filepath":
244	tocMode = "none"
245	else:
246	tocMode = "thumbs"
247
248	# docinfo: information about document (cached)
249	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
250
251	# userinfo: user settings (cached)
252	userinfo = self.getUserinfo()
253
254	# auto viewMode: text if there is a text else images
255	if viewMode=="auto":
256	if docinfo.get('textURLPath', None):
257	# docinfo.get('textURL', None) not implemented yet
258	viewMode = "text"
259	if viewLayer is None and 'viewLayer' not in userinfo:
260	# use layer dict as default
261	viewLayer = "dict"
262	else:
263	viewMode = "images"
264
265	elif viewMode == "text_dict":
266	# legacy fix
267	viewMode = "text"
268	viewLayer = "dict"
269
270	# safe viewLayer in userinfo
271	userinfo['viewLayer'] = viewLayer
272
273	# pageinfo: information about page (not cached)
274	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
275
276	# get template /template/viewer_$viewMode
277	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
278	if pt is None:
279	logging.error("No template for viewMode=%s!"%viewMode)
280	# TODO: error page?
281	return "No template for viewMode=%s!"%viewMode
282
283	# and execute with parameters
284	return pt(docinfo=docinfo, pageinfo=pageinfo)
285
286	def getAvailableLayers(self):
287	"""returns dict with list of available layers per viewMode"""
288	return self.availableLayers
289
290	def findDigilibUrl(self):
291	"""try to get the digilib URL from zogilib"""
292	url = self.template.zogilib.getDLBaseUrl()
293	return url
294
295	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
296	"""returns URL to digilib Scaler with params"""
297	url = None
298	if docinfo is not None:
299	url = docinfo.get('imageURL', None)
300
301	if url is None:
302	url = self.digilibScalerUrl
303	if fn is None and docinfo is not None:
304	fn = docinfo.get('imagePath','')
305
306	url += "fn=%s"%fn
307
308	if pn:
309	url += "&pn=%s"%pn
310
311	url += "&dw=%s&dh=%s"%(dw,dh)
312	return url
313
314	def getDocumentViewerURL(self):
315	"""returns the URL of this instance"""
316	return self.absolute_url()
317
318	def getStyle(self, idx, selected, style=""):
319	"""returns a string with the given style and append 'sel' if idx == selected."""
320	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
321	if idx == selected:
322	return style + 'sel'
323	else:
324	return style
325
326	def getParams(self, param=None, val=None, params=None, duplicates=None):
327	"""returns dict with URL parameters.
328
329	Takes URL parameters and additionally param=val or dict params.
330	Deletes key if value is None."""
331	# copy existing request params
332	newParams=self.REQUEST.form.copy()
333	# change single param
334	if param is not None:
335	if val is None:
336	if newParams.has_key(param):
337	del newParams[param]
338	else:
339	newParams[param] = str(val)
340
341	# change more params
342	if params is not None:
343	for (k, v) in params.items():
344	if v is None:
345	# val=None removes param
346	if newParams.has_key(k):
347	del newParams[k]
348
349	else:
350	newParams[k] = v
351
352	if duplicates:
353	# eliminate lists (coming from duplicate keys)
354	for (k,v) in newParams.items():
355	if isinstance(v, list):
356	if duplicates == 'comma':
357	# make comma-separated list of non-empty entries
358	newParams[k] = ','.join([t for t in v if t])
359	elif duplicates == 'first':
360	# take first non-empty entry
361	newParams[k] = [t for t in v if t][0]
362
363	return newParams
364
365	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
366	"""returns URL to documentviewer with parameter param set to val or from dict params"""
367	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
368	# quote values and assemble into query string (not escaping '/')
369	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
370	if baseUrl is None:
371	baseUrl = self.getDocumentViewerURL()
372
373	url = "%s?%s"%(baseUrl, ps)
374	return url
375
376	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
377	"""link to documentviewer with parameter param set to val"""
378	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
379
380
381	def setAvailableLayers(self, newLayerString=None):
382	"""sets availableLayers to newLayerString or tries to autodetect available layers.
383	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
384	newLayerString is parsed as JSON."""
385	if newLayerString is not None:
386	try:
387	layers = json.loads(newLayerString)
388	if 'text' in layers and 'images' in layers:
389	self.availableLayers = layers
390	return
391	except:
392	pass
393
394	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
395
396	# start with builtin layers
397	self.availableLayers = self.builtinLayers.copy()
398	# add layers from templates
399	for t in self.template:
400	if t.startswith('layer_'):
401	try:
402	(x, m, l) = t.split('_', 3)
403	if m not in self.availableLayers:
404	# mode m doesn't exist -> new list
405	self.availableLayers[m] = [l]
406
407	else:
408	# m exists -> append
409	if l not in self.availableLayers[m]:
410	self.availableLayers[m].append()
411
412	except:
413	pass
414
415	def getAvailableLayersJson(self):
416	"""returns available layers as JSON string."""
417	return json.dumps(self.availableLayers)
418
419
420	def getInfo_xml(self,url,mode):
421	"""returns info about the document as XML"""
422	if not self.digilibBaseUrl:
423	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
424
425	docinfo = self.getDocinfo(mode=mode,url=url)
426	pt = getattr(self.template, 'info_xml')
427	return pt(docinfo=docinfo)
428
429	def getAuthenticatedUser(self, anon=None):
430	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
431	user = getSecurityManager().getUser()
432	if user is not None and user.getUserName() != "Anonymous User":
433	return user
434	else:
435	return anon
436
437	def isAccessible(self, docinfo):
438	"""returns if access to the resource is granted"""
439	access = docinfo.get('accessType', None)
440	logging.debug("documentViewer (accessOK) access type %s"%access)
441	if access == 'free':
442	logging.debug("documentViewer (accessOK) access is free")
443	return True
444
445	elif access is None or access in self.authgroups:
446	# only local access -- only logged in users
447	user = self.getAuthenticatedUser()
448	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
449	return (user is not None)
450
451	logging.error("documentViewer (accessOK) unknown access type %s"%access)
452	return False
453
454
455	def getUserinfo(self):
456	"""returns userinfo object"""
457	logging.debug("getUserinfo")
458	userinfo = {}
459	# look for cached userinfo in session
460	if self.REQUEST.SESSION.has_key('userinfo'):
461	userinfo = self.REQUEST.SESSION['userinfo']
462	# check if its still current?
463	else:
464	# store in session
465	self.REQUEST.SESSION['userinfo'] = userinfo
466
467	return userinfo
468
469	def getDocinfo(self, mode, url, tocMode=None):
470	"""returns docinfo depending on mode"""
471	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
472	# look for cached docinfo in session
473	if self.REQUEST.SESSION.has_key('docinfo'):
474	docinfo = self.REQUEST.SESSION['docinfo']
475	# check if its still current
476	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
477	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
478	return docinfo
479
480	# new docinfo
481	docinfo = {'mode': mode, 'url': url}
482	# add self url
483	docinfo['viewerUrl'] = self.getDocumentViewerURL()
484	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
485	docinfo['digilibScalerUrl'] = self.digilibScalerUrl
486	docinfo['digilibViewerUrl'] = self.digilibViewerUrl
487	# get index.meta DOM
488	docUrl = None
489	metaDom = None
490	if mode=="texttool":
491	# url points to document dir or index.meta
492	metaDom = self.metadataService.getDomFromPathOrUrl(url)
493	if metaDom is None:
494	raise IOError("Unable to find index.meta for mode=texttool!")
495
496	docUrl = url.replace('/index.meta', '')
497	if url.startswith('/mpiwg/online/'):
498	docUrl = url.replace('/mpiwg/online/', '', 1)
499
500	elif mode=="imagepath":
501	# url points to folder with images, index.meta optional
502	# asssume index.meta in parent dir
503	docUrl = getParentPath(url)
504	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
505	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
506
507	elif mode=="filepath":
508	# url points to image file, index.meta optional
509	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url)
510	docinfo['numPages'] = 1
511	# asssume index.meta is two path segments up
512	docUrl = getParentPath(url, 2)
513	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
514
515	else:
516	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
517	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
518
519	docinfo['documentUrl'] = docUrl
520	# process index.meta contents
521	if metaDom is not None and metaDom.tag == 'resource':
522	# document directory name and path
523	resource = self.metadataService.getResourceData(dom=metaDom)
524	if resource:
525	docinfo = self.getDocinfoFromResource(docinfo, resource)
526
527	# texttool info
528	texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
529	if texttool:
530	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
531	# document info from full text server
532	if docinfo.get('textURLPath', None):
533	docinfo = self.getTextInfo(mode=None, docinfo=docinfo)
534	# include list of pages TODO: do we need this always?
535	docinfo = self.getTextInfo(mode='pages', docinfo=docinfo)
536
537	# bib info
538	bib = self.metadataService.getBibData(dom=metaDom)
539	if bib:
540	# save extended version as 'bibx' TODO: ugly
541	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
542	if len(bibx) == 1:
543	# unwrap list if possible
544	bibx = bibx[0]
545
546	docinfo['bibx'] = bibx
547	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
548	else:
549	# no bib - try info.xml
550	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
551
552	# auth info
553	access = self.metadataService.getAccessData(dom=metaDom)
554	if access:
555	docinfo = self.getDocinfoFromAccess(docinfo, access)
556
557	# attribution info
558	attribution = self.metadataService.getAttributionData(dom=metaDom)
559	if attribution:
560	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
561	docinfo['attribution'] = attribution
562
563	# copyright info
564	copyright = self.metadataService.getCopyrightData(dom=metaDom)
565	if copyright:
566	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
567	docinfo['copyright'] = copyright
568
569	# DRI (permanent ID)
570	dri = self.metadataService.getDRI(dom=metaDom, type='mpiwg')
571	if dri:
572	docinfo['DRI'] = dri
573
574	# (presentation) context
575	ctx = self.metadataService.getContextData(dom=metaDom, all=True)
576	if ctx:
577	logging.debug("getcontext: ctx=%s"%repr(ctx))
578	docinfo['presentationContext'] = ctx
579
580	# image path
581	if mode != 'texttool':
582	# override image path from texttool with url parameter TODO: how about mode=auto?
583	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
584
585	# check numPages
586	if docinfo.get('numPages', 0) == 0:
587	# number of images from digilib
588	if docinfo.get('imagePath', None):
589	imgpath = docinfo['imagePath'].replace('/mpiwg/online/', '', 1)
590	logging.debug("imgpath=%s"%imgpath)
591	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, imgpath)
592	docinfo = self.getDocinfoFromDigilib(docinfo, imgpath)
593	else:
594	# imagePath still missing? try "./pageimg"
595	imgPath = os.path.join(docUrl, 'pageimg')
596	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
597	if docinfo.get('numPages', 0) > 0:
598	# there are pages
599	docinfo['imagePath'] = imgPath
600	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, docinfo['imagePath'])
601
602	# check numPages
603	if docinfo.get('numPages', 0) == 0:
604	if docinfo.get('numTextPages', 0) > 0:
605	# replace with numTextPages (text-only?)
606	docinfo['numPages'] = docinfo['numTextPages']
607
608	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
609	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
610	# store in session
611	self.REQUEST.SESSION['docinfo'] = docinfo
612	return docinfo
613
614
615	def getDocinfoFromResource(self, docinfo, resource):
616	"""reads contents of resource element into docinfo"""
617	docName = resource.get('name', None)
618	docinfo['documentName'] = docName
619	docPath = resource.get('archive-path', None)
620	if docPath:
621	# clean up document path
622	if docPath[0] != '/':
623	docPath = '/' + docPath
624
625	if docName and (not docPath.endswith(docName)):
626	docPath += "/" + docName
627
628	else:
629	# use docUrl as docPath
630	docUrl = docinfo['documentURL']
631	if not docUrl.startswith('http:'):
632	docPath = docUrl
633	if docPath:
634	# fix URLs starting with /mpiwg/online
635	docPath = docPath.replace('/mpiwg/online', '', 1)
636
637	docinfo['documentPath'] = docPath
638	return docinfo
639
640	def getDocinfoFromTexttool(self, docinfo, texttool):
641	"""reads contents of texttool element into docinfo"""
642	logging.debug("texttool=%s"%repr(texttool))
643	# unpack list if necessary
644	if isinstance(texttool, list):
645	texttool = texttool[0]
646
647	# image dir
648	imageDir = getMDText(texttool.get('image', None))
649	docPath = getMDText(docinfo.get('documentPath', None))
650	if imageDir and docPath:
651	imageDir = os.path.join(docPath, imageDir)
652	imageDir = imageDir.replace('/mpiwg/online', '', 1)
653	docinfo['imagePath'] = imageDir
654
655	# old style text URL
656	textUrl = getMDText(texttool.get('text', None))
657	if textUrl and docPath:
658	if urlparse.urlparse(textUrl)[0] == "": #keine url
659	textUrl = os.path.join(docPath, textUrl)
660
661	docinfo['textURL'] = textUrl
662
663	# new style text-url-path (can be more than one with "repository" attribute)
664	textUrlNode = texttool.get('text-url-path', None)
665	if not isinstance(textUrlNode, list):
666	textUrlNode = [textUrlNode]
667
668	for tun in textUrlNode:
669	textUrl = getMDText(tun)
670	if textUrl:
671	textUrlAtts = tun.get('@attr')
672	if (textUrlAtts and 'repository' in textUrlAtts):
673	textRepo = textUrlAtts['repository']
674	# use matching repository
675	if self.getRepositoryType() == textRepo:
676	docinfo['textURLPath'] = textUrl
677	docinfo['textURLRepository'] = textRepo
678	break
679
680	else:
681	# no repo attribute - use always
682	docinfo['textURLPath'] = textUrl
683
684	# page flow
685	docinfo['pageFlow'] = getMDText(texttool.get('page-flow', 'ltr'))
686
687	# odd pages are left
688	docinfo['oddPage'] = getMDText(texttool.get('odd-scan-position', 'left'))
689
690	# number of title page (default 1)
691	docinfo['titlePage'] = getMDText(texttool.get('title-scan-no', 1))
692
693	# old presentation stuff
694	presentation = getMDText(texttool.get('presentation', None))
695	if presentation and docPath:
696	if presentation.startswith('http:'):
697	docinfo['presentationUrl'] = presentation
698	else:
699	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
700
701	return docinfo
702
703	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
704	"""reads contents of bib element into docinfo"""
705	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
706	# put all raw bib fields in dict "bib"
707	docinfo['bib'] = bib
708	bibtype = bib.get('@type', None)
709	docinfo['bibType'] = bibtype
710	# also store DC metadata for convenience
711	dc = self.metadataService.getDCMappedData(bib)
712	docinfo['creator'] = dc.get('creator','')
713	docinfo['title'] = dc.get('title','')
714	docinfo['date'] = dc.get('date','')
715	return docinfo
716
717	def getDocinfoFromAccess(self, docinfo, acc):
718	"""reads contents of access element into docinfo"""
719	#TODO: also read resource type
720	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
721	try:
722	acctype = acc['@attr']['type']
723	if acctype:
724	access=acctype
725	if access in ['group', 'institution']:
726	access = acc['name'].lower()
727
728	docinfo['accessType'] = access
729
730	except:
731	pass
732
733	return docinfo
734
735	def getDocinfoFromDigilib(self, docinfo, path):
736	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
737	# fetch data
738	txt = getHttpData(infoUrl)
739	if not txt:
740	logging.error("Unable to get dir-info from %s"%(infoUrl))
741	return docinfo
742
743	dom = ET.fromstring(txt)
744	size = getText(dom.find("size"))
745	logging.debug("getDocinfoFromDigilib: size=%s"%size)
746	if size:
747	docinfo['numPages'] = int(size)
748	else:
749	docinfo['numPages'] = 0
750
751	# TODO: produce and keep list of image names and numbers
752	return docinfo
753
754
755	def getDocinfoFromPresentationInfoXml(self,docinfo):
756	"""gets DC-like bibliographical information from the presentation entry in texttools"""
757	url = docinfo.get('presentationUrl', None)
758	if not url:
759	logging.error("getDocinfoFromPresentation: no URL!")
760	return docinfo
761
762	dom = None
763	metaUrl = None
764	if url.startswith("http://"):
765	# real URL
766	metaUrl = url
767	else:
768	# online path
769
770	server=self.digilibBaseUrl+"/servlet/Texter?fn="
771	metaUrl=server+url
772
773	txt=getHttpData(metaUrl)
774	if txt is None:
775	logging.error("Unable to read info.xml from %s"%(url))
776	return docinfo
777
778	dom = ET.fromstring(txt)
779	docinfo['creator']=getText(dom.find(".//author"))
780	docinfo['title']=getText(dom.find(".//title"))
781	docinfo['date']=getText(dom.find(".//date"))
782	return docinfo
783
784
785	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
786	"""returns pageinfo with the given parameters"""
787	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
788	pageinfo = {}
789	pageinfo['viewMode'] = viewMode
790	# split viewLayer if necessary
791	if isinstance(viewLayer,basestring):
792	viewLayer = viewLayer.split(',')
793
794	if isinstance(viewLayer, list):
795	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
796	# save (unique) list in viewLayers
797	seen = set()
798	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
799	pageinfo['viewLayers'] = viewLayers
800	# stringify viewLayer
801	viewLayer = ','.join(viewLayers)
802	else:
803	#create list
804	pageinfo['viewLayers'] = [viewLayer]
805
806	pageinfo['viewLayer'] = viewLayer
807	pageinfo['tocMode'] = tocMode
808
809	# TODO: unify current and pn!
810	current = getInt(current)
811	pageinfo['current'] = current
812	pageinfo['pn'] = current
813	rows = int(rows or self.thumbrows)
814	pageinfo['rows'] = rows
815	cols = int(cols or self.thumbcols)
816	pageinfo['cols'] = cols
817	grpsize = cols * rows
818	pageinfo['groupsize'] = grpsize
819	# is start is empty use one around current
820	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
821	# int(current / grpsize) * grpsize +1))
822	pageinfo['start'] = start
823	# get number of pages
824	np = int(docinfo.get('numPages', 0))
825	if np == 0:
826	# try numTextPages
827	np = docinfo.get('numTextPages', 0)
828	if np != 0:
829	docinfo['numPages'] = np
830
831	# cache table of contents
832	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
833	pageinfo['numgroups'] = int(np / grpsize)
834	if np % grpsize > 0:
835	pageinfo['numgroups'] += 1
836
837	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
838	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
839	# add zeroth page for two columns
840	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
841	pageinfo['pageZero'] = pageZero
842	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
843	# more page parameters
844	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
845	if docinfo.get('pageNumbers'):
846	# get original page numbers
847	pageNumber = docinfo['pageNumbers'].get(current, None)
848	if pageNumber is not None:
849	pageinfo['pageNumberOrig'] = pageNumber['no']
850	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
851
852	# cache search results
853	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
854	query = self.REQUEST.get('query',None)
855	pageinfo['query'] = query
856	if query:
857	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
858	pageinfo['queryType'] = queryType
859	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
860	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
861
862	# highlighting
863	highlightQuery = self.REQUEST.get('highlightQuery', None)
864	if highlightQuery:
865	pageinfo['highlightQuery'] = highlightQuery
866	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
867	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
868
869	return pageinfo
870
871
872	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
873	"""returns dict with array of page informations for one screenfull of thumbnails"""
874	batch = {}
875	grpsize = rows * cols
876	if maxIdx == 0:
877	maxIdx = start + grpsize
878
879	nb = int(math.ceil(maxIdx / float(grpsize)))
880	# list of all batch start and end points
881	batches = []
882	if pageZero:
883	ofs = 0
884	else:
885	ofs = 1
886
887	for i in range(nb):
888	s = i * grpsize + ofs
889	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
890	batches.append({'start':s, 'end':e})
891
892	batch['batches'] = batches
893
894	pages = []
895	if pageZero and start == 1:
896	# correct beginning
897	idx = 0
898	else:
899	idx = start
900
901	for r in range(rows):
902	row = []
903	for c in range(cols):
904	if idx < minIdx or idx > maxIdx:
905	page = {'idx':None}
906	else:
907	page = {'idx':idx}
908
909	idx += 1
910	if pageFlowLtr:
911	row.append(page)
912	else:
913	row.insert(0, page)
914
915	pages.append(row)
916
917	if start > 1:
918	batch['prevStart'] = max(start - grpsize, 1)
919	else:
920	batch['prevStart'] = None
921
922	if start + grpsize <= maxIdx:
923	batch['nextStart'] = start + grpsize
924	else:
925	batch['nextStart'] = None
926
927	batch['pages'] = pages
928	batch['first'] = minIdx
929	batch['last'] = maxIdx
930	return batch
931
932	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
933	"""returns dict with information for one screenfull of data."""
934	batch = {}
935	if end == 0:
936	end = start + size
937
938	nb = int(math.ceil(end / float(size)))
939	# list of all batch start and end points
940	batches = []
941	for i in range(nb):
942	s = i * size + 1
943	e = min((i + 1) * size, end)
944	batches.append({'start':s, 'end':e})
945
946	batch['batches'] = batches
947	# list of elements in this batch
948	this = []
949	j = 0
950	for i in range(start, min(start+size, end+1)):
951	if data:
952	if fullData:
953	d = data.get(i, None)
954	else:
955	d = data.get(j, None)
956	j += 1
957
958	else:
959	d = i+1
960
961	this.append(d)
962
963	batch['this'] = this
964	if start > 1:
965	batch['prevStart'] = max(start - size, 1)
966	else:
967	batch['prevStart'] = None
968
969	if start + size < end:
970	batch['nextStart'] = start + size
971	else:
972	batch['nextStart'] = None
973
974	batch['first'] = start
975	batch['last'] = end
976	return batch
977
978
979	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
980	"""returns list of groups {name:, id:} on the annotation server for the user"""
981	groups = []
982	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
983	data = getHttpData(url=groupsUrl, noExceptions=True)
984	if data:
985	res = json.loads(data)
986	rows = res.get('rows', None)
987	if rows is None:
988	return groups
989	for r in rows:
990	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
991
992	return groups
993
994
995	security.declareProtected('View management screens','changeDocumentViewerForm')
996	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
997
998	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
999	"""init document viewer"""
1000	self.title=title
1001	self.digilibBaseUrl = digilibBaseUrl
1002	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
1003	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
1004	self.thumbrows = thumbrows
1005	self.thumbcols = thumbcols
1006	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1007	try:
1008	# assume MetaDataFolder instance is called metadata
1009	self.metadataService = getattr(self, 'metadata')
1010	except Exception, e:
1011	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1012
1013	self.setAvailableLayers(availableLayers)
1014
1015	if RESPONSE is not None:
1016	RESPONSE.redirect('manage_main')
1017
1018	def manage_AddDocumentViewerForm(self):
1019	"""add the viewer form"""
1020	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1021	return pt()
1022
1023	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1024	"""add the viewer"""
1025	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1026	self._setObject(id,newObj)
1027
1028	if RESPONSE is not None:
1029	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: