Context Navigation

source: documentViewer/documentViewer.py @ 578:024b75162437

Last change on this file since 578:024b75162437 was 578:024b75162437, checked in by casties, 12 years ago
displays context data on index page
File size: 42.5 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def getMDText(node):
37	"""returns the @text content from the MetaDataProvider metadata node"""
38	if isinstance(node, dict):
39	return node.get('@text', None)
40
41	return node
42
43	def browserCheck(self):
44	"""check the browsers request to find out the browser type"""
45	bt = {}
46	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
47	bt['ua'] = ua
48	bt['isIE'] = False
49	bt['isN4'] = False
50	bt['versFirefox']=""
51	bt['versIE']=""
52	bt['versSafariChrome']=""
53	bt['versOpera']=""
54
55	if string.find(ua, 'MSIE') > -1:
56	bt['isIE'] = True
57	else:
58	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
59	# Safari oder Chrome identification
60	try:
61	nav = ua[string.find(ua, '('):]
62	nav1=ua[string.find(ua,')'):]
63	nav2=nav1[string.find(nav1,'('):]
64	nav3=nav2[string.find(nav2,')'):]
65	ie = string.split(nav, "; ")[1]
66	ie1 =string.split(nav1, " ")[2]
67	ie2 =string.split(nav3, " ")[1]
68	ie3 =string.split(nav3, " ")[2]
69	if string.find(ie3, "Safari") >-1:
70	bt['versSafariChrome']=string.split(ie2, "/")[1]
71	except: pass
72	# IE identification
73	try:
74	nav = ua[string.find(ua, '('):]
75	ie = string.split(nav, "; ")[1]
76	if string.find(ie, "MSIE") > -1:
77	bt['versIE'] = string.split(ie, " ")[1]
78	except:pass
79	# Firefox identification
80	try:
81	nav = ua[string.find(ua, '('):]
82	nav1=ua[string.find(ua,')'):]
83	if string.find(ie1, "Firefox") >-1:
84	nav5= string.split(ie1, "/")[1]
85	logging.debug("FIREFOX: %s"%(nav5))
86	bt['versFirefox']=nav5[0:3]
87	except:pass
88	#Opera identification
89	try:
90	if string.find(ua,"Opera") >-1:
91	nav = ua[string.find(ua, '('):]
92	nav1=nav[string.find(nav,')'):]
93	bt['versOpera']=string.split(nav1,"/")[2]
94	except:pass
95
96	bt['isMac'] = string.find(ua, 'Macintosh') > -1
97	bt['isWin'] = string.find(ua, 'Windows') > -1
98	bt['isIEWin'] = bt['isIE'] and bt['isWin']
99	bt['isIEMac'] = bt['isIE'] and bt['isMac']
100	bt['staticHTML'] = False
101
102	return bt
103
104	def getParentPath(path, cnt=1):
105	"""returns pathname shortened by cnt"""
106	# make sure path doesn't end with /
107	path = path.rstrip('/')
108	# split by /, shorten, and reassemble
109	return '/'.join(path.split('/')[0:-cnt])
110
111	##
112	## documentViewer class
113	##
114	class documentViewer(Folder):
115	"""document viewer"""
116	meta_type="Document viewer"
117
118	security=ClassSecurityInfo()
119	manage_options=Folder.manage_options+(
120	{'label':'Configuration','action':'changeDocumentViewerForm'},
121	)
122
123	metadataService = None
124	"""MetaDataFolder instance"""
125
126
127	#
128	# templates and forms
129	#
130	# viewMode templates
131	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
132	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
133	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
134	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
135	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
136	# available layer types (annotator not default)
137	builtinLayers = {'text': ['dict','search','gis'],
138	'xml': None, 'images': None, 'index': ['extended']}
139	availableLayers = builtinLayers;
140	# layer templates
141	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
142	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
143	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
144	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
145	layer_text_pundit = PageTemplateFile('zpt/layer_text_pundit', globals())
146	layer_index_extended = PageTemplateFile('zpt/layer_index_extended', globals())
147	# toc templates
148	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
149	toc_text = PageTemplateFile('zpt/toc_text', globals())
150	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
151	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
152	toc_handwritten = PageTemplateFile('zpt/toc_handwritten', globals())
153	toc_none = PageTemplateFile('zpt/toc_none', globals())
154	# other templates
155	common_template = PageTemplateFile('zpt/common_template', globals())
156	info_xml = PageTemplateFile('zpt/info_xml', globals())
157	docuviewer_css = ImageFile('css/docuviewer.css',globals())
158	# make docuviewer_css refreshable for development
159	docuviewer_css.index_html = refreshingImageFileIndexHtml
160	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
161	# make docuviewer_ie_css refreshable for development
162	docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
163	jquery_js = ImageFile('js/jquery.js',globals())
164
165
166	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
167	"""init document viewer"""
168	self.id=id
169	self.title=title
170	self.thumbcols = thumbcols
171	self.thumbrows = thumbrows
172	# authgroups is list of authorized groups (delimited by ,)
173	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
174	# create template folder so we can always use template.something
175
176	templateFolder = Folder('template')
177	self['template'] = templateFolder # Zope-2.12 style
178	#self._setObject('template',templateFolder) # old style
179	try:
180	import MpdlXmlTextServer
181	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
182	templateFolder['fulltextclient'] = textServer
183	#templateFolder._setObject('fulltextclient',textServer)
184	except Exception, e:
185	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
186
187	try:
188	from Products.zogiLib.zogiLib import zogiLib
189	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
190	templateFolder['zogilib'] = zogilib
191	#templateFolder._setObject('zogilib',zogilib)
192	except Exception, e:
193	logging.error("Unable to create zogiLib for zogilib: "+str(e))
194
195	try:
196	# assume MetaDataFolder instance is called metadata
197	self.metadataService = getattr(self, 'metadata')
198	except Exception, e:
199	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
200
201	if digilibBaseUrl is not None:
202	self.digilibBaseUrl = digilibBaseUrl
203	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
204	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
205
206
207	# proxy text server methods to fulltextclient
208	def getTextPage(self, **args):
209	"""returns full text content of page"""
210	return self.template.fulltextclient.getTextPage(**args)
211
212	def getSearchResults(self, **args):
213	"""loads list of search results and stores XML in docinfo"""
214	return self.template.fulltextclient.getSearchResults(**args)
215
216	def getResultsPage(self, **args):
217	"""returns one page of the search results"""
218	return self.template.fulltextclient.getResultsPage(**args)
219
220	def getTextInfo(self, **args):
221	"""returns document info from the text server"""
222	return self.template.fulltextclient.getTextInfo(**args)
223
224	def getToc(self, **args):
225	"""loads table of contents and stores XML in docinfo"""
226	return self.template.fulltextclient.getToc(**args)
227
228	def getTocPage(self, **args):
229	"""returns one page of the table of contents"""
230	return self.template.fulltextclient.getTocPage(**args)
231
232	def getRepositoryType(self, **args):
233	"""get repository type"""
234	return self.template.fulltextclient.getRepositoryType(**args)
235
236	def getTextDownloadUrl(self, **args):
237	"""get list of gis places on one page"""
238	return self.template.fulltextclient.getTextDownloadUrl(**args)
239
240	def getPlacesOnPage(self, **args):
241	"""get list of gis places on one page"""
242	return self.template.fulltextclient.getPlacesOnPage(**args)
243
244	# Thumb list for CoolIris Plugin
245	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
246	security.declareProtected('View','thumbs_rss')
247	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
248	'''
249	view it
250	@param mode: defines how to access the document behind url
251	@param url: url which contains display information
252	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
253
254	'''
255
256	if not hasattr(self, 'template'):
257	# create template folder if it doesn't exist
258	self.manage_addFolder('template')
259
260	if not self.digilibBaseUrl:
261	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
262
263	docinfo = self.getDocinfo(mode=mode,url=url)
264	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
265	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
266	''' ZDES '''
267	pt = getattr(self.template, 'thumbs_main_rss')
268
269	if viewMode=="auto": # automodus gewaehlt
270	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
271	viewMode="text"
272	else:
273	viewMode="images"
274
275	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
276
277
278	security.declareProtected('View','index_html')
279	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode=None,start=1,pn=1):
280	"""
281	show page
282	@param url: url which contains display information
283	@param mode: defines how to access the document behind url
284	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
285	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
286	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
287	"""
288
289	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
290
291	if not hasattr(self, 'template'):
292	# this won't work
293	logging.error("template folder missing!")
294	return "ERROR: template folder missing!"
295
296	if not getattr(self, 'digilibBaseUrl', None):
297	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
298
299	# mode=filepath should not have toc-thumbs
300	if tocMode is None:
301	if mode == "filepath":
302	tocMode = "none"
303	else:
304	tocMode = "thumbs"
305
306	# docinfo: information about document (cached)
307	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
308
309	# userinfo: user settings (cached)
310	userinfo = self.getUserinfo()
311
312	# auto viewMode: text if there is a text else images
313	if viewMode=="auto":
314	if docinfo.get('textURLPath', None):
315	# docinfo.get('textURL', None) not implemented yet
316	viewMode = "text"
317	if viewLayer is None and 'viewLayer' not in userinfo:
318	# use layer dict as default
319	viewLayer = "dict"
320	else:
321	viewMode = "images"
322
323	elif viewMode == "text_dict":
324	# legacy fix
325	viewMode = "text"
326	viewLayer = "dict"
327
328	# safe viewLayer in userinfo
329	userinfo['viewLayer'] = viewLayer
330
331	# pageinfo: information about page (not cached)
332	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
333
334	# get template /template/viewer_$viewMode
335	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
336	if pt is None:
337	logging.error("No template for viewMode=%s!"%viewMode)
338	# TODO: error page?
339	return "No template for viewMode=%s!"%viewMode
340
341	# and execute with parameters
342	return pt(docinfo=docinfo, pageinfo=pageinfo)
343
344	def getAvailableLayers(self):
345	"""returns dict with list of available layers per viewMode"""
346	return self.availableLayers
347
348	def getBrowser(self):
349	"""getBrowser the version of browser """
350	bt = browserCheck(self)
351	logging.debug("BROWSER VERSION: %s"%(bt))
352	return bt
353
354	def findDigilibUrl(self):
355	"""try to get the digilib URL from zogilib"""
356	url = self.template.zogilib.getDLBaseUrl()
357	return url
358
359	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
360	"""returns URL to digilib Scaler with params"""
361	url = None
362	if docinfo is not None:
363	url = docinfo.get('imageURL', None)
364
365	if url is None:
366	url = self.digilibScalerUrl
367	if fn is None and docinfo is not None:
368	fn = docinfo.get('imagePath','')
369
370	url += "fn=%s"%fn
371
372	if pn:
373	url += "&pn=%s"%pn
374
375	url += "&dw=%s&dh=%s"%(dw,dh)
376	return url
377
378	def getDocumentViewerURL(self):
379	"""returns the URL of this instance"""
380	return self.absolute_url()
381
382	def getStyle(self, idx, selected, style=""):
383	"""returns a string with the given style and append 'sel' if idx == selected."""
384	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
385	if idx == selected:
386	return style + 'sel'
387	else:
388	return style
389
390	def getParams(self, param=None, val=None, params=None, duplicates=None):
391	"""returns dict with URL parameters.
392
393	Takes URL parameters and additionally param=val or dict params.
394	Deletes key if value is None."""
395	# copy existing request params
396	newParams=self.REQUEST.form.copy()
397	# change single param
398	if param is not None:
399	if val is None:
400	if newParams.has_key(param):
401	del newParams[param]
402	else:
403	newParams[param] = str(val)
404
405	# change more params
406	if params is not None:
407	for (k, v) in params.items():
408	if v is None:
409	# val=None removes param
410	if newParams.has_key(k):
411	del newParams[k]
412
413	else:
414	newParams[k] = v
415
416	if duplicates:
417	# eliminate lists (coming from duplicate keys)
418	for (k,v) in newParams.items():
419	if isinstance(v, list):
420	if duplicates == 'comma':
421	# make comma-separated list of non-empty entries
422	newParams[k] = ','.join([t for t in v if t])
423	elif duplicates == 'first':
424	# take first non-empty entry
425	newParams[k] = [t for t in v if t][0]
426
427	return newParams
428
429	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
430	"""returns URL to documentviewer with parameter param set to val or from dict params"""
431	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
432	# quote values and assemble into query string (not escaping '/')
433	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
434	if baseUrl is None:
435	baseUrl = self.getDocumentViewerURL()
436
437	url = "%s?%s"%(baseUrl, ps)
438	return url
439
440	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
441	"""link to documentviewer with parameter param set to val"""
442	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
443
444
445	def setAvailableLayers(self, newLayerString=None):
446	"""sets availableLayers to newLayerString or tries to autodetect available layers.
447	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
448	newLayerString is parsed as JSON."""
449	if newLayerString is not None:
450	try:
451	layers = json.loads(newLayerString)
452	if 'text' in layers and 'images' in layers:
453	self.availableLayers = layers
454	return
455	except:
456	pass
457
458	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
459
460	# start with builtin layers
461	self.availableLayers = self.builtinLayers.copy()
462	# add layers from templates
463	for t in self.template:
464	if t.startswith('layer_'):
465	try:
466	(x, m, l) = t.split('_', 3)
467	if m not in self.availableLayers:
468	# mode m doesn't exist -> new list
469	self.availableLayers[m] = [l]
470
471	else:
472	# m exists -> append
473	if l not in self.availableLayers[m]:
474	self.availableLayers[m].append()
475
476	except:
477	pass
478
479	def getAvailableLayersJson(self):
480	"""returns available layers as JSON string."""
481	return json.dumps(self.availableLayers)
482
483
484	def getInfo_xml(self,url,mode):
485	"""returns info about the document as XML"""
486	if not self.digilibBaseUrl:
487	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
488
489	docinfo = self.getDocinfo(mode=mode,url=url)
490	pt = getattr(self.template, 'info_xml')
491	return pt(docinfo=docinfo)
492
493	def getAuthenticatedUser(self, anon=None):
494	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
495	user = getSecurityManager().getUser()
496	if user is not None and user.getUserName() != "Anonymous User":
497	return user
498	else:
499	return anon
500
501	def isAccessible(self, docinfo):
502	"""returns if access to the resource is granted"""
503	access = docinfo.get('accessType', None)
504	logging.debug("documentViewer (accessOK) access type %s"%access)
505	if access == 'free':
506	logging.debug("documentViewer (accessOK) access is free")
507	return True
508
509	elif access is None or access in self.authgroups:
510	# only local access -- only logged in users
511	user = self.getAuthenticatedUser()
512	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
513	return (user is not None)
514
515	logging.error("documentViewer (accessOK) unknown access type %s"%access)
516	return False
517
518
519	def getUserinfo(self):
520	"""returns userinfo object"""
521	logging.debug("getUserinfo")
522	userinfo = {}
523	# look for cached userinfo in session
524	if self.REQUEST.SESSION.has_key('userinfo'):
525	userinfo = self.REQUEST.SESSION['userinfo']
526	# check if its still current?
527	else:
528	# store in session
529	self.REQUEST.SESSION['userinfo'] = userinfo
530
531	return userinfo
532
533	def getDocinfo(self, mode, url, tocMode=None):
534	"""returns docinfo depending on mode"""
535	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
536	# look for cached docinfo in session
537	if self.REQUEST.SESSION.has_key('docinfo'):
538	docinfo = self.REQUEST.SESSION['docinfo']
539	# check if its still current
540	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
541	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
542	return docinfo
543
544	# new docinfo
545	docinfo = {'mode': mode, 'url': url}
546	# add self url
547	docinfo['viewerUrl'] = self.getDocumentViewerURL()
548	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
549	docinfo['digilibScalerUrl'] = self.digilibScalerUrl
550	docinfo['digilibViewerUrl'] = self.digilibViewerUrl
551	# get index.meta DOM
552	docUrl = None
553	metaDom = None
554	if mode=="texttool":
555	# url points to document dir or index.meta
556	metaDom = self.metadataService.getDomFromPathOrUrl(url)
557	docUrl = url.replace('/index.meta', '')
558	if metaDom is None:
559	raise IOError("Unable to find index.meta for mode=texttool!")
560
561	elif mode=="imagepath":
562	# url points to folder with images, index.meta optional
563	# asssume index.meta in parent dir
564	docUrl = getParentPath(url)
565	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
566	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
567
568	elif mode=="filepath":
569	# url points to image file, index.meta optional
570	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url)
571	docinfo['numPages'] = 1
572	# asssume index.meta is two path segments up
573	docUrl = getParentPath(url, 2)
574	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
575
576	else:
577	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
578	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
579
580	docinfo['documentUrl'] = docUrl
581	# process index.meta contents
582	if metaDom is not None and metaDom.tag == 'resource':
583	# document directory name and path
584	resource = self.metadataService.getResourceData(dom=metaDom)
585	if resource:
586	docinfo = self.getDocinfoFromResource(docinfo, resource)
587
588	# texttool info
589	texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
590	if texttool:
591	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
592	# document info from full text server
593	if docinfo.get('textURLPath', None):
594	docinfo = self.getTextInfo(mode=None, docinfo=docinfo)
595	# include list of pages TODO: do we need this always?
596	docinfo = self.getTextInfo(mode='pages', docinfo=docinfo)
597
598	# bib info
599	bib = self.metadataService.getBibData(dom=metaDom)
600	if bib:
601	# save extended version as 'bibx' TODO: ugly
602	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
603	if len(bibx) == 1:
604	# unwrap list if possible
605	bibx = bibx[0]
606
607	docinfo['bibx'] = bibx
608	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
609	else:
610	# no bib - try info.xml
611	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
612
613	# auth info
614	access = self.metadataService.getAccessData(dom=metaDom)
615	if access:
616	docinfo = self.getDocinfoFromAccess(docinfo, access)
617
618	# attribution info
619	attribution = self.metadataService.getAttributionData(dom=metaDom)
620	if attribution:
621	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
622	docinfo['attribution'] = attribution
623
624	# copyright info
625	copyright = self.metadataService.getCopyrightData(dom=metaDom)
626	if copyright:
627	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
628	docinfo['copyright'] = copyright
629
630	# DRI (permanent ID)
631	dri = self.metadataService.getDRI(dom=metaDom, type='escidoc')
632	if dri:
633	docinfo['DRI'] = dri
634
635	# (presentation) context
636	ctx = self.metadataService.getContextData(dom=metaDom, all=True)
637	if ctx:
638	logging.debug("getcontext: ctx=%s"%repr(ctx))
639	docinfo['presentationContext'] = ctx
640
641	# image path
642	if mode != 'texttool':
643	# override image path from texttool with url parameter TODO: how about mode=auto?
644	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
645
646	# check numPages
647	if docinfo.get('numPages', 0) == 0:
648	# number of images from digilib
649	if docinfo.get('imagePath', None):
650	imgpath = docinfo['imagePath'].replace('/mpiwg/online/', '', 1)
651	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, imgpath)
652	docinfo = self.getDocinfoFromDigilib(docinfo, imgpath)
653	else:
654	# imagePath still missing? try "./pageimg"
655	imgPath = os.path.join(docUrl, 'pageimg')
656	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
657	if docinfo.get('numPages', 0) > 0:
658	# there are pages
659	docinfo['imagePath'] = imgPath
660	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, docinfo['imagePath'])
661
662	# check numPages
663	if docinfo.get('numPages', 0) == 0:
664	if docinfo.get('numTextPages', 0) > 0:
665	# replace with numTextPages (text-only?)
666	docinfo['numPages'] = docinfo['numTextPages']
667
668	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
669	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
670	# store in session
671	self.REQUEST.SESSION['docinfo'] = docinfo
672	return docinfo
673
674
675	def getDocinfoFromResource(self, docinfo, resource):
676	"""reads contents of resource element into docinfo"""
677	docName = resource.get('name', None)
678	docinfo['documentName'] = docName
679	docPath = resource.get('archive-path', None)
680	if docPath:
681	# clean up document path
682	if docPath[0] != '/':
683	docPath = '/' + docPath
684
685	if docName and (not docPath.endswith(docName)):
686	docPath += "/" + docName
687
688	else:
689	# use docUrl as docPath
690	docUrl = docinfo['documentURL']
691	if not docUrl.startswith('http:'):
692	docPath = docUrl
693	if docPath:
694	# fix URLs starting with /mpiwg/online
695	docPath = docPath.replace('/mpiwg/online', '', 1)
696
697	docinfo['documentPath'] = docPath
698	return docinfo
699
700	def getDocinfoFromTexttool(self, docinfo, texttool):
701	"""reads contents of texttool element into docinfo"""
702	logging.debug("texttool=%s"%repr(texttool))
703	# unpack list if necessary
704	if isinstance(texttool, list):
705	texttool = texttool[0]
706
707	# image dir
708	imageDir = getMDText(texttool.get('image', None))
709	docPath = getMDText(docinfo.get('documentPath', None))
710	if imageDir and docPath:
711	imageDir = os.path.join(docPath, imageDir)
712	imageDir = imageDir.replace('/mpiwg/online', '', 1)
713	docinfo['imagePath'] = imageDir
714
715	# old style text URL
716	textUrl = getMDText(texttool.get('text', None))
717	if textUrl and docPath:
718	if urlparse.urlparse(textUrl)[0] == "": #keine url
719	textUrl = os.path.join(docPath, textUrl)
720
721	docinfo['textURL'] = textUrl
722
723	# new style text-url-path (can be more than one with "repository" attribute)
724	textUrlNode = texttool.get('text-url-path', None)
725	if not isinstance(textUrlNode, list):
726	textUrlNode = [textUrlNode]
727
728	for tun in textUrlNode:
729	textUrl = getMDText(tun)
730	if textUrl:
731	textUrlAtts = tun.get('@attr')
732	if (textUrlAtts and 'repository' in textUrlAtts):
733	textRepo = textUrlAtts['repository']
734	# use matching repository
735	if self.getRepositoryType() == textRepo:
736	docinfo['textURLPath'] = textUrl
737	docinfo['textURLRepository'] = textRepo
738	break
739
740	else:
741	# no repo attribute - use always
742	docinfo['textURLPath'] = textUrl
743
744	# page flow
745	docinfo['pageFlow'] = getMDText(texttool.get('page-flow', 'ltr'))
746
747	# odd pages are left
748	docinfo['oddPage'] = getMDText(texttool.get('odd-scan-position', 'left'))
749
750	# number of title page (default 1)
751	docinfo['titlePage'] = getMDText(texttool.get('title-scan-no', 1))
752
753	# old presentation stuff
754	presentation = getMDText(texttool.get('presentation', None))
755	if presentation and docPath:
756	if presentation.startswith('http:'):
757	docinfo['presentationUrl'] = presentation
758	else:
759	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
760
761	return docinfo
762
763	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
764	"""reads contents of bib element into docinfo"""
765	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
766	# put all raw bib fields in dict "bib"
767	docinfo['bib'] = bib
768	bibtype = bib.get('@type', None)
769	docinfo['bibType'] = bibtype
770	# also store DC metadata for convenience
771	dc = self.metadataService.getDCMappedData(bib)
772	docinfo['creator'] = dc.get('creator','')
773	docinfo['title'] = dc.get('title','')
774	docinfo['date'] = dc.get('date','')
775	return docinfo
776
777	def getDocinfoFromAccess(self, docinfo, acc):
778	"""reads contents of access element into docinfo"""
779	#TODO: also read resource type
780	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
781	try:
782	acctype = acc['@attr']['type']
783	if acctype:
784	access=acctype
785	if access in ['group', 'institution']:
786	access = acc['name'].lower()
787
788	docinfo['accessType'] = access
789
790	except:
791	pass
792
793	return docinfo
794
795	def getDocinfoFromDigilib(self, docinfo, path):
796	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
797	# fetch data
798	txt = getHttpData(infoUrl)
799	if not txt:
800	logging.error("Unable to get dir-info from %s"%(infoUrl))
801	return docinfo
802
803	dom = ET.fromstring(txt)
804	size = getText(dom.find("size"))
805	logging.debug("getDocinfoFromDigilib: size=%s"%size)
806	if size:
807	docinfo['numPages'] = int(size)
808	else:
809	docinfo['numPages'] = 0
810
811	# TODO: produce and keep list of image names and numbers
812	return docinfo
813
814
815	def getDocinfoFromPresentationInfoXml(self,docinfo):
816	"""gets DC-like bibliographical information from the presentation entry in texttools"""
817	url = docinfo.get('presentationUrl', None)
818	if not url:
819	logging.error("getDocinfoFromPresentation: no URL!")
820	return docinfo
821
822	dom = None
823	metaUrl = None
824	if url.startswith("http://"):
825	# real URL
826	metaUrl = url
827	else:
828	# online path
829
830	server=self.digilibBaseUrl+"/servlet/Texter?fn="
831	metaUrl=server+url
832
833	txt=getHttpData(metaUrl)
834	if txt is None:
835	logging.error("Unable to read info.xml from %s"%(url))
836	return docinfo
837
838	dom = ET.fromstring(txt)
839	docinfo['creator']=getText(dom.find(".//author"))
840	docinfo['title']=getText(dom.find(".//title"))
841	docinfo['date']=getText(dom.find(".//date"))
842	return docinfo
843
844
845	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
846	"""returns pageinfo with the given parameters"""
847	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
848	pageinfo = {}
849	pageinfo['viewMode'] = viewMode
850	# split viewLayer if necessary
851	if isinstance(viewLayer,basestring):
852	viewLayer = viewLayer.split(',')
853
854	if isinstance(viewLayer, list):
855	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
856	# save (unique) list in viewLayers
857	seen = set()
858	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
859	pageinfo['viewLayers'] = viewLayers
860	# stringify viewLayer
861	viewLayer = ','.join(viewLayers)
862	else:
863	#create list
864	pageinfo['viewLayers'] = [viewLayer]
865
866	pageinfo['viewLayer'] = viewLayer
867	pageinfo['tocMode'] = tocMode
868
869	# TODO: unify current and pn!
870	current = getInt(current)
871	pageinfo['current'] = current
872	pageinfo['pn'] = current
873	rows = int(rows or self.thumbrows)
874	pageinfo['rows'] = rows
875	cols = int(cols or self.thumbcols)
876	pageinfo['cols'] = cols
877	grpsize = cols * rows
878	pageinfo['groupsize'] = grpsize
879	# is start is empty use one around current
880	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
881	# int(current / grpsize) * grpsize +1))
882	pageinfo['start'] = start
883	# get number of pages
884	np = int(docinfo.get('numPages', 0))
885	if np == 0:
886	# try numTextPages
887	np = docinfo.get('numTextPages', 0)
888	if np != 0:
889	docinfo['numPages'] = np
890
891	# cache table of contents
892	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
893	pageinfo['numgroups'] = int(np / grpsize)
894	if np % grpsize > 0:
895	pageinfo['numgroups'] += 1
896
897	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
898	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
899	# add zeroth page for two columns
900	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
901	pageinfo['pageZero'] = pageZero
902	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
903	# more page parameters
904	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
905	if docinfo.get('pageNumbers'):
906	# get original page numbers
907	pageNumber = docinfo['pageNumbers'].get(current, None)
908	if pageNumber is not None:
909	pageinfo['pageNumberOrig'] = pageNumber['no']
910	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
911
912	# cache search results
913	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
914	query = self.REQUEST.get('query',None)
915	pageinfo['query'] = query
916	if query:
917	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
918	pageinfo['queryType'] = queryType
919	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
920	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
921
922	# highlighting
923	highlightQuery = self.REQUEST.get('highlightQuery', None)
924	if highlightQuery:
925	pageinfo['highlightQuery'] = highlightQuery
926	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
927	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
928
929	return pageinfo
930
931
932	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
933	"""returns dict with array of page informations for one screenfull of thumbnails"""
934	batch = {}
935	grpsize = rows * cols
936	if maxIdx == 0:
937	maxIdx = start + grpsize
938
939	nb = int(math.ceil(maxIdx / float(grpsize)))
940	# list of all batch start and end points
941	batches = []
942	if pageZero:
943	ofs = 0
944	else:
945	ofs = 1
946
947	for i in range(nb):
948	s = i * grpsize + ofs
949	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
950	batches.append({'start':s, 'end':e})
951
952	batch['batches'] = batches
953
954	pages = []
955	if pageZero and start == 1:
956	# correct beginning
957	idx = 0
958	else:
959	idx = start
960
961	for r in range(rows):
962	row = []
963	for c in range(cols):
964	if idx < minIdx or idx > maxIdx:
965	page = {'idx':None}
966	else:
967	page = {'idx':idx}
968
969	idx += 1
970	if pageFlowLtr:
971	row.append(page)
972	else:
973	row.insert(0, page)
974
975	pages.append(row)
976
977	if start > 1:
978	batch['prevStart'] = max(start - grpsize, 1)
979	else:
980	batch['prevStart'] = None
981
982	if start + grpsize <= maxIdx:
983	batch['nextStart'] = start + grpsize
984	else:
985	batch['nextStart'] = None
986
987	batch['pages'] = pages
988	batch['first'] = minIdx
989	batch['last'] = maxIdx
990	return batch
991
992	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
993	"""returns dict with information for one screenfull of data."""
994	batch = {}
995	if end == 0:
996	end = start + size
997
998	nb = int(math.ceil(end / float(size)))
999	# list of all batch start and end points
1000	batches = []
1001	for i in range(nb):
1002	s = i * size + 1
1003	e = min((i + 1) * size, end)
1004	batches.append({'start':s, 'end':e})
1005
1006	batch['batches'] = batches
1007	# list of elements in this batch
1008	this = []
1009	j = 0
1010	for i in range(start, min(start+size, end+1)):
1011	if data:
1012	if fullData:
1013	d = data.get(i, None)
1014	else:
1015	d = data.get(j, None)
1016	j += 1
1017
1018	else:
1019	d = i+1
1020
1021	this.append(d)
1022
1023	batch['this'] = this
1024	if start > 1:
1025	batch['prevStart'] = max(start - size, 1)
1026	else:
1027	batch['prevStart'] = None
1028
1029	if start + size < end:
1030	batch['nextStart'] = start + size
1031	else:
1032	batch['nextStart'] = None
1033
1034	batch['first'] = start
1035	batch['last'] = end
1036	return batch
1037
1038
1039	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
1040	"""returns list of groups {name:, id:} on the annotation server for the user"""
1041	groups = []
1042	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
1043	data = getHttpData(url=groupsUrl, noExceptions=True)
1044	if data:
1045	res = json.loads(data)
1046	rows = res.get('rows', None)
1047	if rows is None:
1048	return groups
1049	for r in rows:
1050	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
1051
1052	return groups
1053
1054
1055	security.declareProtected('View management screens','changeDocumentViewerForm')
1056	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1057
1058	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1059	"""init document viewer"""
1060	self.title=title
1061	self.digilibBaseUrl = digilibBaseUrl
1062	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
1063	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
1064	self.thumbrows = thumbrows
1065	self.thumbcols = thumbcols
1066	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1067	try:
1068	# assume MetaDataFolder instance is called metadata
1069	self.metadataService = getattr(self, 'metadata')
1070	except Exception, e:
1071	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1072
1073	self.setAvailableLayers(availableLayers)
1074
1075	if RESPONSE is not None:
1076	RESPONSE.redirect('manage_main')
1077
1078	def manage_AddDocumentViewerForm(self):
1079	"""add the viewer form"""
1080	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1081	return pt()
1082
1083	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1084	"""add the viewer"""
1085	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1086	self._setObject(id,newObj)
1087
1088	if RESPONSE is not None:
1089	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: