Context Navigation

source: documentViewer/documentViewer.py @ 543:6cdc31e9ed8e

Last change on this file since 543:6cdc31e9ed8e was 543:6cdc31e9ed8e, checked in by casties, 12 years ago
fixed problem with dict-mode in default view. added configurable footer and logo in site_template.pt
File size: 39.2 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def browserCheck(self):
37	"""check the browsers request to find out the browser type"""
38	bt = {}
39	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
40	bt['ua'] = ua
41	bt['isIE'] = False
42	bt['isN4'] = False
43	bt['versFirefox']=""
44	bt['versIE']=""
45	bt['versSafariChrome']=""
46	bt['versOpera']=""
47
48	if string.find(ua, 'MSIE') > -1:
49	bt['isIE'] = True
50	else:
51	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
52	# Safari oder Chrome identification
53	try:
54	nav = ua[string.find(ua, '('):]
55	nav1=ua[string.find(ua,')'):]
56	nav2=nav1[string.find(nav1,'('):]
57	nav3=nav2[string.find(nav2,')'):]
58	ie = string.split(nav, "; ")[1]
59	ie1 =string.split(nav1, " ")[2]
60	ie2 =string.split(nav3, " ")[1]
61	ie3 =string.split(nav3, " ")[2]
62	if string.find(ie3, "Safari") >-1:
63	bt['versSafariChrome']=string.split(ie2, "/")[1]
64	except: pass
65	# IE identification
66	try:
67	nav = ua[string.find(ua, '('):]
68	ie = string.split(nav, "; ")[1]
69	if string.find(ie, "MSIE") > -1:
70	bt['versIE'] = string.split(ie, " ")[1]
71	except:pass
72	# Firefox identification
73	try:
74	nav = ua[string.find(ua, '('):]
75	nav1=ua[string.find(ua,')'):]
76	if string.find(ie1, "Firefox") >-1:
77	nav5= string.split(ie1, "/")[1]
78	logging.debug("FIREFOX: %s"%(nav5))
79	bt['versFirefox']=nav5[0:3]
80	except:pass
81	#Opera identification
82	try:
83	if string.find(ua,"Opera") >-1:
84	nav = ua[string.find(ua, '('):]
85	nav1=nav[string.find(nav,')'):]
86	bt['versOpera']=string.split(nav1,"/")[2]
87	except:pass
88
89	bt['isMac'] = string.find(ua, 'Macintosh') > -1
90	bt['isWin'] = string.find(ua, 'Windows') > -1
91	bt['isIEWin'] = bt['isIE'] and bt['isWin']
92	bt['isIEMac'] = bt['isIE'] and bt['isMac']
93	bt['staticHTML'] = False
94
95	return bt
96
97	def getParentPath(path, cnt=1):
98	"""returns pathname shortened by cnt"""
99	# make sure path doesn't end with /
100	path = path.rstrip('/')
101	# split by /, shorten, and reassemble
102	return '/'.join(path.split('/')[0:-cnt])
103
104	##
105	## documentViewer class
106	##
107	class documentViewer(Folder):
108	"""document viewer"""
109	meta_type="Document viewer"
110
111	security=ClassSecurityInfo()
112	manage_options=Folder.manage_options+(
113	{'label':'Configuration','action':'changeDocumentViewerForm'},
114	)
115
116	metadataService = None
117	"""MetaDataFolder instance"""
118
119
120	#
121	# templates and forms
122	#
123	# viewMode templates
124	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
125	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
126	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
127	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
128	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
129	# available layer types
130	builtinLayers = {'text': ['dict','search','gis','annotator'],
131	'xml': None, 'images': None, 'index': None}
132	availableLayers = builtinLayers;
133	# layer templates
134	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
135	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
136	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
137	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
138	# toc templates
139	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
140	toc_text = PageTemplateFile('zpt/toc_text', globals())
141	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
142	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
143	toc_none = PageTemplateFile('zpt/toc_none', globals())
144	# other templates
145	common_template = PageTemplateFile('zpt/common_template', globals())
146	info_xml = PageTemplateFile('zpt/info_xml', globals())
147	docuviewer_css = ImageFile('css/docuviewer.css',globals())
148	# make docuviewer_css refreshable for development
149	docuviewer_css.index_html = refreshingImageFileIndexHtml
150	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
151	# make docuviewer_ie_css refreshable for development
152	docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
153	jquery_js = ImageFile('js/jquery.js',globals())
154
155
156	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
157	"""init document viewer"""
158	self.id=id
159	self.title=title
160	self.thumbcols = thumbcols
161	self.thumbrows = thumbrows
162	# authgroups is list of authorized groups (delimited by ,)
163	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
164	# create template folder so we can always use template.something
165
166	templateFolder = Folder('template')
167	self['template'] = templateFolder # Zope-2.12 style
168	#self._setObject('template',templateFolder) # old style
169	try:
170	import MpdlXmlTextServer
171	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
172	templateFolder['fulltextclient'] = textServer
173	#templateFolder._setObject('fulltextclient',textServer)
174	except Exception, e:
175	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
176
177	try:
178	from Products.zogiLib.zogiLib import zogiLib
179	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
180	templateFolder['zogilib'] = zogilib
181	#templateFolder._setObject('zogilib',zogilib)
182	except Exception, e:
183	logging.error("Unable to create zogiLib for zogilib: "+str(e))
184
185	try:
186	# assume MetaDataFolder instance is called metadata
187	self.metadataService = getattr(self, 'metadata')
188	except Exception, e:
189	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
190
191	if digilibBaseUrl is not None:
192	self.digilibBaseUrl = digilibBaseUrl
193
194
195	# proxy text server methods to fulltextclient
196	def getTextPage(self, **args):
197	"""returns full text content of page"""
198	return self.template.fulltextclient.getTextPage(**args)
199
200	def getSearchResults(self, **args):
201	"""loads list of search results and stores XML in docinfo"""
202	return self.template.fulltextclient.getSearchResults(**args)
203
204	def getResultsPage(self, **args):
205	"""returns one page of the search results"""
206	return self.template.fulltextclient.getResultsPage(**args)
207
208	def getTextInfo(self, **args):
209	"""returns document info from the text server"""
210	return self.template.fulltextclient.getTextInfo(**args)
211
212	def getToc(self, **args):
213	"""loads table of contents and stores XML in docinfo"""
214	return self.template.fulltextclient.getToc(**args)
215
216	def getTocPage(self, **args):
217	"""returns one page of the table of contents"""
218	return self.template.fulltextclient.getTocPage(**args)
219
220	def getPlacesOnPage(self, **args):
221	"""get list of gis places on one page"""
222	return self.template.fulltextclient.getPlacesOnPage(**args)
223
224	#WTF?
225	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
226	security.declareProtected('View','thumbs_rss')
227	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
228	'''
229	view it
230	@param mode: defines how to access the document behind url
231	@param url: url which contains display information
232	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
233
234	'''
235	logging.debug("HHHHHHHHHHHHHH:load the rss")
236	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
237
238	if not hasattr(self, 'template'):
239	# create template folder if it doesn't exist
240	self.manage_addFolder('template')
241
242	if not self.digilibBaseUrl:
243	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
244
245	docinfo = self.getDocinfo(mode=mode,url=url)
246	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
247	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
248	''' ZDES '''
249	pt = getattr(self.template, 'thumbs_main_rss')
250
251	if viewMode=="auto": # automodus gewaehlt
252	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
253	viewMode="text"
254	else:
255	viewMode="images"
256
257	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
258
259
260	security.declareProtected('View','index_html')
261	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode="thumbs",start=1,pn=1):
262	"""
263	show page
264	@param url: url which contains display information
265	@param mode: defines how to access the document behind url
266	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
267	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
268	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
269	"""
270
271	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
272
273	if not hasattr(self, 'template'):
274	# this won't work
275	logging.error("template folder missing!")
276	return "ERROR: template folder missing!"
277
278	if not getattr(self, 'digilibBaseUrl', None):
279	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
280
281	# docinfo: information about document (cached)
282	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
283
284	# userinfo: user settings (cached)
285	userinfo = self.getUserinfo()
286
287	# auto viewMode: text if there is a text else images
288	if viewMode=="auto":
289	if docinfo.get('textURLPath', None):
290	# docinfo.get('textURL', None) not implemented yet
291	viewMode = "text"
292	if viewLayer is None and 'viewLayer' not in userinfo:
293	# use layer dict as default
294	viewLayer = "dict"
295	else:
296	viewMode = "images"
297
298	elif viewMode == "text_dict":
299	# legacy fix
300	viewMode = "text"
301	viewLayer = "dict"
302
303	# safe viewLayer in userinfo
304	userinfo['viewLayer'] = viewLayer
305
306	# pageinfo: information about page (not cached)
307	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
308
309	# get template /template/viewer_$viewMode
310	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
311	if pt is None:
312	logging.error("No template for viewMode=%s!"%viewMode)
313	# TODO: error page?
314	return "No template for viewMode=%s!"%viewMode
315
316	# and execute with parameters
317	return pt(docinfo=docinfo, pageinfo=pageinfo)
318
319	#WTF?
320	def generateMarks(self,mk):
321	ret=""
322	if mk is None:
323	return ""
324	if not isinstance(mk, list):
325	mk=[mk]
326	for m in mk:
327	ret+="mk=%s"%m
328	return ret
329
330
331	def getAvailableLayers(self):
332	"""returns dict with list of available layers per viewMode"""
333	return self.availableLayers
334
335	def getBrowser(self):
336	"""getBrowser the version of browser """
337	bt = browserCheck(self)
338	logging.debug("BROWSER VERSION: %s"%(bt))
339	return bt
340
341	def findDigilibUrl(self):
342	"""try to get the digilib URL from zogilib"""
343	url = self.template.zogilib.getDLBaseUrl()
344	return url
345
346	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
347	"""returns URL to digilib Scaler with params"""
348	url = None
349	if docinfo is not None:
350	url = docinfo.get('imageURL', None)
351
352	if url is None:
353	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
354	if fn is None and docinfo is not None:
355	fn = docinfo.get('imagePath','')
356
357	url += "fn=%s"%fn
358
359	if pn:
360	url += "&pn=%s"%pn
361
362	url += "&dw=%s&dh=%s"%(dw,dh)
363	return url
364
365	def getDocumentViewerURL(self):
366	"""returns the URL of this instance"""
367	return self.absolute_url()
368
369	def getStyle(self, idx, selected, style=""):
370	"""returns a string with the given style and append 'sel' if idx == selected."""
371	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
372	if idx == selected:
373	return style + 'sel'
374	else:
375	return style
376
377	def getParams(self, param=None, val=None, params=None, duplicates=None):
378	"""returns dict with URL parameters.
379
380	Takes URL parameters and additionally param=val or dict params.
381	Deletes key if value is None."""
382	# copy existing request params
383	newParams=self.REQUEST.form.copy()
384	# change single param
385	if param is not None:
386	if val is None:
387	if newParams.has_key(param):
388	del newParams[param]
389	else:
390	newParams[param] = str(val)
391
392	# change more params
393	if params is not None:
394	for (k, v) in params.items():
395	if v is None:
396	# val=None removes param
397	if newParams.has_key(k):
398	del newParams[k]
399
400	else:
401	newParams[k] = v
402
403	if duplicates:
404	# eliminate lists (coming from duplicate keys)
405	for (k,v) in newParams.items():
406	if isinstance(v, list):
407	if duplicates == 'comma':
408	# make comma-separated list of non-empty entries
409	newParams[k] = ','.join([t for t in v if t])
410	elif duplicates == 'first':
411	# take first non-empty entry
412	newParams[k] = [t for t in v if t][0]
413
414	return newParams
415
416	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
417	"""returns URL to documentviewer with parameter param set to val or from dict params"""
418	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
419	# quote values and assemble into query string (not escaping '/')
420	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
421	if baseUrl is None:
422	baseUrl = self.getDocumentViewerURL()
423
424	url = "%s?%s"%(baseUrl, ps)
425	return url
426
427	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
428	"""link to documentviewer with parameter param set to val"""
429	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
430
431
432	def setAvailableLayers(self, newLayerString=None):
433	"""sets availableLayers to newLayerString or tries to autodetect available layers.
434	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
435	newLayerString is parsed as JSON."""
436	if newLayerString is not None:
437	try:
438	layers = json.loads(newLayerString)
439	if 'text' in layers and 'images' in layers:
440	self.availableLayers = layers
441	return
442	except:
443	pass
444
445	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
446
447	# start with builtin layers
448	self.availableLayers = self.builtinLayers.copy()
449	# add layers from templates
450	for t in self.template:
451	if t.startswith('layer_'):
452	try:
453	(x, m, l) = t.split('_', 3)
454	if m not in self.availableLayers:
455	# mode m doesn't exist -> new list
456	self.availableLayers[m] = [l]
457
458	else:
459	# m exists -> append
460	if l not in self.availableLayers[m]:
461	self.availableLayers[m].append()
462
463	except:
464	pass
465
466	def getAvailableLayersJson(self):
467	"""returns available layers as JSON string."""
468	return json.dumps(self.availableLayers)
469
470
471	def getInfo_xml(self,url,mode):
472	"""returns info about the document as XML"""
473	if not self.digilibBaseUrl:
474	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
475
476	docinfo = self.getDocinfo(mode=mode,url=url)
477	pt = getattr(self.template, 'info_xml')
478	return pt(docinfo=docinfo)
479
480	def getAuthenticatedUser(self, anon=None):
481	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
482	user = getSecurityManager().getUser()
483	if user is not None and user.getUserName() != "Anonymous User":
484	return user
485	else:
486	return anon
487
488	def isAccessible(self, docinfo):
489	"""returns if access to the resource is granted"""
490	access = docinfo.get('accessType', None)
491	logging.debug("documentViewer (accessOK) access type %s"%access)
492	if access == 'free':
493	logging.debug("documentViewer (accessOK) access is free")
494	return True
495
496	elif access is None or access in self.authgroups:
497	# only local access -- only logged in users
498	user = self.getAuthenticatedUser()
499	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
500	return (user is not None)
501
502	logging.error("documentViewer (accessOK) unknown access type %s"%access)
503	return False
504
505
506	def getUserinfo(self):
507	"""returns userinfo object"""
508	logging.debug("getUserinfo")
509	userinfo = {}
510	# look for cached userinfo in session
511	if self.REQUEST.SESSION.has_key('userinfo'):
512	userinfo = self.REQUEST.SESSION['userinfo']
513	# check if its still current?
514	else:
515	# store in session
516	self.REQUEST.SESSION['userinfo'] = userinfo
517
518	return userinfo
519
520	def getDocinfo(self, mode, url, tocMode=None):
521	"""returns docinfo depending on mode"""
522	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
523	# look for cached docinfo in session
524	if self.REQUEST.SESSION.has_key('docinfo'):
525	docinfo = self.REQUEST.SESSION['docinfo']
526	# check if its still current
527	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
528	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
529	return docinfo
530
531	# new docinfo
532	docinfo = {'mode': mode, 'url': url}
533	# add self url
534	docinfo['viewerUrl'] = self.getDocumentViewerURL()
535	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
536	# get index.meta DOM
537	docUrl = None
538	metaDom = None
539	if mode=="texttool":
540	# url points to document dir or index.meta
541	metaDom = self.metadataService.getDomFromPathOrUrl(url)
542	docUrl = url.replace('/index.meta', '')
543	if metaDom is None:
544	raise IOError("Unable to find index.meta for mode=texttool!")
545
546	elif mode=="imagepath":
547	# url points to folder with images, index.meta optional
548	# asssume index.meta in parent dir
549	docUrl = getParentPath(url)
550	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
551
552	elif mode=="filepath":
553	# url points to image file, index.meta optional
554	# asssume index.meta is two path segments up
555	docUrl = getParentPath(url, 2)
556	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
557
558	else:
559	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
560	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
561
562	docinfo['documentUrl'] = docUrl
563	# process index.meta contents
564	if metaDom is not None and metaDom.tag == 'resource':
565	# document directory name and path
566	resource = self.metadataService.getResourceData(dom=metaDom)
567	if resource:
568	docinfo = self.getDocinfoFromResource(docinfo, resource)
569
570	# texttool info
571	texttool = self.metadataService.getTexttoolData(dom=metaDom)
572	if texttool:
573	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
574	# document info (including toc) from full text
575	if docinfo.get('textURLPath', None):
576	docinfo = self.getTextInfo(mode=tocMode, docinfo=docinfo)
577
578	# bib info
579	bib = self.metadataService.getBibData(dom=metaDom)
580	if bib:
581	# save extended version as 'bibx'
582	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
583	if len(bibx) == 1:
584	# unwrap list if possible
585	bibx = bibx[0]
586
587	docinfo['bibx'] = bibx
588	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
589	else:
590	# no bib - try info.xml
591	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
592
593	# auth info
594	access = self.metadataService.getAccessData(dom=metaDom)
595	if access:
596	docinfo = self.getDocinfoFromAccess(docinfo, access)
597
598	# attribution info
599	attribution = self.metadataService.getAttributionData(dom=metaDom)
600	if attribution:
601	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
602	docinfo['attribution'] = attribution
603
604	# copyright info
605	copyright = self.metadataService.getCopyrightData(dom=metaDom)
606	if copyright:
607	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
608	docinfo['copyright'] = copyright
609
610	# DRI (permanent ID)
611	dri = self.metadataService.getDRI(dom=metaDom, type='escidoc-test')
612	if dri:
613	logging.debug("getDRI: dri=%s"%repr(dri))
614	docinfo['DRI'] = dri
615
616	# image path
617	if mode != 'texttool':
618	# override image path from texttool with url parameter TODO: how about mode=auto?
619	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
620
621	# number of images from digilib
622	if docinfo.get('imagePath', None):
623	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
624	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
625	else:
626	# imagePath still missing? try "./pageimg"
627	imgPath = os.path.join(docUrl, 'pageimg')
628	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
629	if docinfo.get('numPages', 0) > 0:
630	# there are pages
631	docinfo['imagePath'] = imgPath
632	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
633
634	# check numPages
635	if docinfo.get('numPages', 0) == 0:
636	if docinfo.get('numTextPages', 0) > 0:
637	# replace with numTextPages (text-only?)
638	docinfo['numPages'] = docinfo['numTextPages']
639
640	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
641	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
642	# store in session
643	self.REQUEST.SESSION['docinfo'] = docinfo
644	return docinfo
645
646
647	def getDocinfoFromResource(self, docinfo, resource):
648	"""reads contents of resource element into docinfo"""
649	docName = resource.get('name', None)
650	docinfo['documentName'] = docName
651	docPath = resource.get('archive-path', None)
652	if docPath:
653	# clean up document path
654	if docPath[0] != '/':
655	docPath = '/' + docPath
656
657	if docName and (not docPath.endswith(docName)):
658	docPath += "/" + docName
659
660	else:
661	# use docUrl as docPath
662	docUrl = docinfo['documentURL']
663	if not docUrl.startswith('http:'):
664	docPath = docUrl
665	if docPath:
666	# fix URLs starting with /mpiwg/online
667	docPath = docPath.replace('/mpiwg/online', '', 1)
668
669	docinfo['documentPath'] = docPath
670	return docinfo
671
672	def getDocinfoFromTexttool(self, docinfo, texttool):
673	"""reads contents of texttool element into docinfo"""
674	# image dir
675	imageDir = texttool.get('image', None)
676	docPath = docinfo.get('documentPath', None)
677	if imageDir and docPath:
678	#print "image: ", imageDir, " archivepath: ", archivePath
679	imageDir = os.path.join(docPath, imageDir)
680	imageDir = imageDir.replace('/mpiwg/online', '', 1)
681	docinfo['imagePath'] = imageDir
682
683	# old style text URL
684	textUrl = texttool.get('text', None)
685	if textUrl and docPath:
686	if urlparse.urlparse(textUrl)[0] == "": #keine url
687	textUrl = os.path.join(docPath, textUrl)
688
689	docinfo['textURL'] = textUrl
690
691	# new style text-url-path
692	textUrl = texttool.get('text-url-path', None)
693	if textUrl:
694	docinfo['textURLPath'] = textUrl
695
696	# page flow
697	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
698
699	# odd pages are left
700	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
701
702	# number of title page (default 1)
703	docinfo['titlePage'] = texttool.get('title-scan-no', 1)
704
705	# old presentation stuff
706	presentation = texttool.get('presentation', None)
707	if presentation and docPath:
708	if presentation.startswith('http:'):
709	docinfo['presentationUrl'] = presentation
710	else:
711	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
712
713	return docinfo
714
715	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
716	"""reads contents of bib element into docinfo"""
717	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
718	# put all raw bib fields in dict "bib"
719	docinfo['bib'] = bib
720	bibtype = bib.get('@type', None)
721	docinfo['bibType'] = bibtype
722	# also store DC metadata for convenience
723	dc = self.metadataService.getDCMappedData(bib)
724	docinfo['creator'] = dc.get('creator','')
725	docinfo['title'] = dc.get('title','')
726	docinfo['date'] = dc.get('date','')
727	return docinfo
728
729	def getDocinfoFromAccess(self, docinfo, acc):
730	"""reads contents of access element into docinfo"""
731	#TODO: also read resource type
732	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
733	try:
734	acctype = acc['@attr']['type']
735	if acctype:
736	access=acctype
737	if access in ['group', 'institution']:
738	access = acc['name'].lower()
739
740	docinfo['accessType'] = access
741
742	except:
743	pass
744
745	return docinfo
746
747	def getDocinfoFromDigilib(self, docinfo, path):
748	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
749	# fetch data
750	txt = getHttpData(infoUrl)
751	if not txt:
752	logging.error("Unable to get dir-info from %s"%(infoUrl))
753	return docinfo
754
755	dom = ET.fromstring(txt)
756	size = getText(dom.find("size"))
757	logging.debug("getDocinfoFromDigilib: size=%s"%size)
758	if size:
759	docinfo['numPages'] = int(size)
760	else:
761	docinfo['numPages'] = 0
762
763	# TODO: produce and keep list of image names and numbers
764	return docinfo
765
766
767	def getDocinfoFromPresentationInfoXml(self,docinfo):
768	"""gets DC-like bibliographical information from the presentation entry in texttools"""
769	url = docinfo.get('presentationUrl', None)
770	if not url:
771	logging.error("getDocinfoFromPresentation: no URL!")
772	return docinfo
773
774	dom = None
775	metaUrl = None
776	if url.startswith("http://"):
777	# real URL
778	metaUrl = url
779	else:
780	# online path
781
782	server=self.digilibBaseUrl+"/servlet/Texter?fn="
783	metaUrl=server+url
784
785	txt=getHttpData(metaUrl)
786	if txt is None:
787	logging.error("Unable to read info.xml from %s"%(url))
788	return docinfo
789
790	dom = ET.fromstring(txt)
791	docinfo['creator']=getText(dom.find(".//author"))
792	docinfo['title']=getText(dom.find(".//title"))
793	docinfo['date']=getText(dom.find(".//date"))
794	return docinfo
795
796
797	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
798	"""returns pageinfo with the given parameters"""
799	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
800	pageinfo = {}
801	pageinfo['viewMode'] = viewMode
802	# split viewLayer if necessary
803	if isinstance(viewLayer,basestring):
804	viewLayer = viewLayer.split(',')
805
806	if isinstance(viewLayer, list):
807	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
808	# save (unique) list in viewLayers
809	seen = set()
810	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
811	pageinfo['viewLayers'] = viewLayers
812	# stringify viewLayer
813	viewLayer = ','.join(viewLayers)
814	else:
815	#create list
816	pageinfo['viewLayers'] = [viewLayer]
817
818	pageinfo['viewLayer'] = viewLayer
819	pageinfo['tocMode'] = tocMode
820
821	# TODO: unify current and pn!
822	current = getInt(current)
823	pageinfo['current'] = current
824	pageinfo['pn'] = current
825	rows = int(rows or self.thumbrows)
826	pageinfo['rows'] = rows
827	cols = int(cols or self.thumbcols)
828	pageinfo['cols'] = cols
829	grpsize = cols * rows
830	pageinfo['groupsize'] = grpsize
831	# is start is empty use one around current
832	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
833	# int(current / grpsize) * grpsize +1))
834	pageinfo['start'] = start
835	# get number of pages
836	np = int(docinfo.get('numPages', 0))
837	if np == 0:
838	# try numTextPages
839	np = docinfo.get('numTextPages', 0)
840	if np != 0:
841	docinfo['numPages'] = np
842
843	# cache table of contents
844	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
845	pageinfo['numgroups'] = int(np / grpsize)
846	if np % grpsize > 0:
847	pageinfo['numgroups'] += 1
848
849	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
850	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
851	# add zeroth page for two columns
852	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
853	pageinfo['pageZero'] = pageZero
854	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
855	# more page parameters
856	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
857	if docinfo.get('pageNumbers'):
858	# get original page numbers
859	pageNumber = docinfo['pageNumbers'].get(current, None)
860	if pageNumber is not None:
861	pageinfo['pageNumberOrig'] = pageNumber['no']
862	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
863
864	# cache search results
865	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
866	query = self.REQUEST.get('query',None)
867	pageinfo['query'] = query
868	if query:
869	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
870	pageinfo['queryType'] = queryType
871	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
872	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
873
874	# highlighting
875	highlightQuery = self.REQUEST.get('highlightQuery', None)
876	if highlightQuery:
877	pageinfo['highlightQuery'] = highlightQuery
878	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
879	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
880
881	return pageinfo
882
883
884	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
885	"""returns dict with array of page informations for one screenfull of thumbnails"""
886	batch = {}
887	grpsize = rows * cols
888	if maxIdx == 0:
889	maxIdx = start + grpsize
890
891	nb = int(math.ceil(maxIdx / float(grpsize)))
892	# list of all batch start and end points
893	batches = []
894	if pageZero:
895	ofs = 0
896	else:
897	ofs = 1
898
899	for i in range(nb):
900	s = i * grpsize + ofs
901	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
902	batches.append({'start':s, 'end':e})
903
904	batch['batches'] = batches
905
906	pages = []
907	if pageZero and start == 1:
908	# correct beginning
909	idx = 0
910	else:
911	idx = start
912
913	for r in range(rows):
914	row = []
915	for c in range(cols):
916	if idx < minIdx or idx > maxIdx:
917	page = {'idx':None}
918	else:
919	page = {'idx':idx}
920
921	idx += 1
922	if pageFlowLtr:
923	row.append(page)
924	else:
925	row.insert(0, page)
926
927	pages.append(row)
928
929	if start > 1:
930	batch['prevStart'] = max(start - grpsize, 1)
931	else:
932	batch['prevStart'] = None
933
934	if start + grpsize <= maxIdx:
935	batch['nextStart'] = start + grpsize
936	else:
937	batch['nextStart'] = None
938
939	batch['pages'] = pages
940	return batch
941
942	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
943	"""returns dict with information for one screenfull of data."""
944	batch = {}
945	if end == 0:
946	end = start + size
947
948	nb = int(math.ceil(end / float(size)))
949	# list of all batch start and end points
950	batches = []
951	for i in range(nb):
952	s = i * size + 1
953	e = min((i + 1) * size, end)
954	batches.append({'start':s, 'end':e})
955
956	batch['batches'] = batches
957	# list of elements in this batch
958	this = []
959	j = 0
960	for i in range(start, min(start+size, end+1)):
961	if data:
962	if fullData:
963	d = data.get(i, None)
964	else:
965	d = data.get(j, None)
966	j += 1
967
968	else:
969	d = i+1
970
971	this.append(d)
972
973	batch['this'] = this
974	if start > 1:
975	batch['prevStart'] = max(start - size, 1)
976	else:
977	batch['prevStart'] = None
978
979	if start + size < end:
980	batch['nextStart'] = start + size
981	else:
982	batch['nextStart'] = None
983
984	logging.debug("getBatch start=%s size=%s end=%s batch=%s"%(start,size,end,repr(batch)))
985	return batch
986
987
988	security.declareProtected('View management screens','changeDocumentViewerForm')
989	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
990
991	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
992	"""init document viewer"""
993	self.title=title
994	self.digilibBaseUrl = digilibBaseUrl
995	self.thumbrows = thumbrows
996	self.thumbcols = thumbcols
997	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
998	try:
999	# assume MetaDataFolder instance is called metadata
1000	self.metadataService = getattr(self, 'metadata')
1001	except Exception, e:
1002	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1003
1004	self.setAvailableLayers(availableLayers)
1005
1006	if RESPONSE is not None:
1007	RESPONSE.redirect('manage_main')
1008
1009	def manage_AddDocumentViewerForm(self):
1010	"""add the viewer form"""
1011	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1012	return pt()
1013
1014	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1015	"""add the viewer"""
1016	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1017	self._setObject(id,newObj)
1018
1019	if RESPONSE is not None:
1020	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: