Context Navigation

source: documentViewer/documentViewer.py @ 548:7acc919f52ff

Last change on this file since 548:7acc919f52ff was 548:7acc919f52ff, checked in by casties, 12 years ago
mode=filepath works again
File size: 40.0 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def browserCheck(self):
37	"""check the browsers request to find out the browser type"""
38	bt = {}
39	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
40	bt['ua'] = ua
41	bt['isIE'] = False
42	bt['isN4'] = False
43	bt['versFirefox']=""
44	bt['versIE']=""
45	bt['versSafariChrome']=""
46	bt['versOpera']=""
47
48	if string.find(ua, 'MSIE') > -1:
49	bt['isIE'] = True
50	else:
51	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
52	# Safari oder Chrome identification
53	try:
54	nav = ua[string.find(ua, '('):]
55	nav1=ua[string.find(ua,')'):]
56	nav2=nav1[string.find(nav1,'('):]
57	nav3=nav2[string.find(nav2,')'):]
58	ie = string.split(nav, "; ")[1]
59	ie1 =string.split(nav1, " ")[2]
60	ie2 =string.split(nav3, " ")[1]
61	ie3 =string.split(nav3, " ")[2]
62	if string.find(ie3, "Safari") >-1:
63	bt['versSafariChrome']=string.split(ie2, "/")[1]
64	except: pass
65	# IE identification
66	try:
67	nav = ua[string.find(ua, '('):]
68	ie = string.split(nav, "; ")[1]
69	if string.find(ie, "MSIE") > -1:
70	bt['versIE'] = string.split(ie, " ")[1]
71	except:pass
72	# Firefox identification
73	try:
74	nav = ua[string.find(ua, '('):]
75	nav1=ua[string.find(ua,')'):]
76	if string.find(ie1, "Firefox") >-1:
77	nav5= string.split(ie1, "/")[1]
78	logging.debug("FIREFOX: %s"%(nav5))
79	bt['versFirefox']=nav5[0:3]
80	except:pass
81	#Opera identification
82	try:
83	if string.find(ua,"Opera") >-1:
84	nav = ua[string.find(ua, '('):]
85	nav1=nav[string.find(nav,')'):]
86	bt['versOpera']=string.split(nav1,"/")[2]
87	except:pass
88
89	bt['isMac'] = string.find(ua, 'Macintosh') > -1
90	bt['isWin'] = string.find(ua, 'Windows') > -1
91	bt['isIEWin'] = bt['isIE'] and bt['isWin']
92	bt['isIEMac'] = bt['isIE'] and bt['isMac']
93	bt['staticHTML'] = False
94
95	return bt
96
97	def getParentPath(path, cnt=1):
98	"""returns pathname shortened by cnt"""
99	# make sure path doesn't end with /
100	path = path.rstrip('/')
101	# split by /, shorten, and reassemble
102	return '/'.join(path.split('/')[0:-cnt])
103
104	##
105	## documentViewer class
106	##
107	class documentViewer(Folder):
108	"""document viewer"""
109	meta_type="Document viewer"
110
111	security=ClassSecurityInfo()
112	manage_options=Folder.manage_options+(
113	{'label':'Configuration','action':'changeDocumentViewerForm'},
114	)
115
116	metadataService = None
117	"""MetaDataFolder instance"""
118
119
120	#
121	# templates and forms
122	#
123	# viewMode templates
124	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
125	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
126	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
127	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
128	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
129	# available layer types (annotator not default)
130	builtinLayers = {'text': ['dict','search','gis'],
131	'xml': None, 'images': None, 'index': None}
132	availableLayers = builtinLayers;
133	# layer templates
134	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
135	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
136	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
137	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
138	# toc templates
139	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
140	toc_text = PageTemplateFile('zpt/toc_text', globals())
141	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
142	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
143	toc_none = PageTemplateFile('zpt/toc_none', globals())
144	# other templates
145	common_template = PageTemplateFile('zpt/common_template', globals())
146	info_xml = PageTemplateFile('zpt/info_xml', globals())
147	docuviewer_css = ImageFile('css/docuviewer.css',globals())
148	# make docuviewer_css refreshable for development
149	docuviewer_css.index_html = refreshingImageFileIndexHtml
150	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
151	# make docuviewer_ie_css refreshable for development
152	docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
153	jquery_js = ImageFile('js/jquery.js',globals())
154
155
156	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
157	"""init document viewer"""
158	self.id=id
159	self.title=title
160	self.thumbcols = thumbcols
161	self.thumbrows = thumbrows
162	# authgroups is list of authorized groups (delimited by ,)
163	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
164	# create template folder so we can always use template.something
165
166	templateFolder = Folder('template')
167	self['template'] = templateFolder # Zope-2.12 style
168	#self._setObject('template',templateFolder) # old style
169	try:
170	import MpdlXmlTextServer
171	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
172	templateFolder['fulltextclient'] = textServer
173	#templateFolder._setObject('fulltextclient',textServer)
174	except Exception, e:
175	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
176
177	try:
178	from Products.zogiLib.zogiLib import zogiLib
179	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
180	templateFolder['zogilib'] = zogilib
181	#templateFolder._setObject('zogilib',zogilib)
182	except Exception, e:
183	logging.error("Unable to create zogiLib for zogilib: "+str(e))
184
185	try:
186	# assume MetaDataFolder instance is called metadata
187	self.metadataService = getattr(self, 'metadata')
188	except Exception, e:
189	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
190
191	if digilibBaseUrl is not None:
192	self.digilibBaseUrl = digilibBaseUrl
193
194
195	# proxy text server methods to fulltextclient
196	def getTextPage(self, **args):
197	"""returns full text content of page"""
198	return self.template.fulltextclient.getTextPage(**args)
199
200	def getSearchResults(self, **args):
201	"""loads list of search results and stores XML in docinfo"""
202	return self.template.fulltextclient.getSearchResults(**args)
203
204	def getResultsPage(self, **args):
205	"""returns one page of the search results"""
206	return self.template.fulltextclient.getResultsPage(**args)
207
208	def getTextInfo(self, **args):
209	"""returns document info from the text server"""
210	return self.template.fulltextclient.getTextInfo(**args)
211
212	def getToc(self, **args):
213	"""loads table of contents and stores XML in docinfo"""
214	return self.template.fulltextclient.getToc(**args)
215
216	def getTocPage(self, **args):
217	"""returns one page of the table of contents"""
218	return self.template.fulltextclient.getTocPage(**args)
219
220	def getPlacesOnPage(self, **args):
221	"""get list of gis places on one page"""
222	return self.template.fulltextclient.getPlacesOnPage(**args)
223
224	# Thumb list for CoolIris Plugin
225	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
226	security.declareProtected('View','thumbs_rss')
227	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
228	'''
229	view it
230	@param mode: defines how to access the document behind url
231	@param url: url which contains display information
232	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
233
234	'''
235
236	if not hasattr(self, 'template'):
237	# create template folder if it doesn't exist
238	self.manage_addFolder('template')
239
240	if not self.digilibBaseUrl:
241	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
242
243	docinfo = self.getDocinfo(mode=mode,url=url)
244	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
245	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
246	''' ZDES '''
247	pt = getattr(self.template, 'thumbs_main_rss')
248
249	if viewMode=="auto": # automodus gewaehlt
250	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
251	viewMode="text"
252	else:
253	viewMode="images"
254
255	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
256
257
258	security.declareProtected('View','index_html')
259	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode=None,start=1,pn=1):
260	"""
261	show page
262	@param url: url which contains display information
263	@param mode: defines how to access the document behind url
264	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
265	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
266	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
267	"""
268
269	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
270
271	if not hasattr(self, 'template'):
272	# this won't work
273	logging.error("template folder missing!")
274	return "ERROR: template folder missing!"
275
276	if not getattr(self, 'digilibBaseUrl', None):
277	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
278
279	# mode=filepath should not have toc-thumbs
280	if tocMode is None:
281	if mode == "filepath":
282	tocMode = "none"
283	else:
284	tocMode = "thumbs"
285
286	# docinfo: information about document (cached)
287	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
288
289	# userinfo: user settings (cached)
290	userinfo = self.getUserinfo()
291
292	# auto viewMode: text if there is a text else images
293	if viewMode=="auto":
294	if docinfo.get('textURLPath', None):
295	# docinfo.get('textURL', None) not implemented yet
296	viewMode = "text"
297	if viewLayer is None and 'viewLayer' not in userinfo:
298	# use layer dict as default
299	viewLayer = "dict"
300	else:
301	viewMode = "images"
302
303	elif viewMode == "text_dict":
304	# legacy fix
305	viewMode = "text"
306	viewLayer = "dict"
307
308	# safe viewLayer in userinfo
309	userinfo['viewLayer'] = viewLayer
310
311	# pageinfo: information about page (not cached)
312	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
313
314	# get template /template/viewer_$viewMode
315	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
316	if pt is None:
317	logging.error("No template for viewMode=%s!"%viewMode)
318	# TODO: error page?
319	return "No template for viewMode=%s!"%viewMode
320
321	# and execute with parameters
322	return pt(docinfo=docinfo, pageinfo=pageinfo)
323
324	def getAvailableLayers(self):
325	"""returns dict with list of available layers per viewMode"""
326	return self.availableLayers
327
328	def getBrowser(self):
329	"""getBrowser the version of browser """
330	bt = browserCheck(self)
331	logging.debug("BROWSER VERSION: %s"%(bt))
332	return bt
333
334	def findDigilibUrl(self):
335	"""try to get the digilib URL from zogilib"""
336	url = self.template.zogilib.getDLBaseUrl()
337	return url
338
339	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
340	"""returns URL to digilib Scaler with params"""
341	url = None
342	if docinfo is not None:
343	url = docinfo.get('imageURL', None)
344
345	if url is None:
346	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
347	if fn is None and docinfo is not None:
348	fn = docinfo.get('imagePath','')
349
350	url += "fn=%s"%fn
351
352	if pn:
353	url += "&pn=%s"%pn
354
355	url += "&dw=%s&dh=%s"%(dw,dh)
356	return url
357
358	def getDocumentViewerURL(self):
359	"""returns the URL of this instance"""
360	return self.absolute_url()
361
362	def getStyle(self, idx, selected, style=""):
363	"""returns a string with the given style and append 'sel' if idx == selected."""
364	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
365	if idx == selected:
366	return style + 'sel'
367	else:
368	return style
369
370	def getParams(self, param=None, val=None, params=None, duplicates=None):
371	"""returns dict with URL parameters.
372
373	Takes URL parameters and additionally param=val or dict params.
374	Deletes key if value is None."""
375	# copy existing request params
376	newParams=self.REQUEST.form.copy()
377	# change single param
378	if param is not None:
379	if val is None:
380	if newParams.has_key(param):
381	del newParams[param]
382	else:
383	newParams[param] = str(val)
384
385	# change more params
386	if params is not None:
387	for (k, v) in params.items():
388	if v is None:
389	# val=None removes param
390	if newParams.has_key(k):
391	del newParams[k]
392
393	else:
394	newParams[k] = v
395
396	if duplicates:
397	# eliminate lists (coming from duplicate keys)
398	for (k,v) in newParams.items():
399	if isinstance(v, list):
400	if duplicates == 'comma':
401	# make comma-separated list of non-empty entries
402	newParams[k] = ','.join([t for t in v if t])
403	elif duplicates == 'first':
404	# take first non-empty entry
405	newParams[k] = [t for t in v if t][0]
406
407	return newParams
408
409	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
410	"""returns URL to documentviewer with parameter param set to val or from dict params"""
411	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
412	# quote values and assemble into query string (not escaping '/')
413	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
414	if baseUrl is None:
415	baseUrl = self.getDocumentViewerURL()
416
417	url = "%s?%s"%(baseUrl, ps)
418	return url
419
420	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
421	"""link to documentviewer with parameter param set to val"""
422	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
423
424
425	def setAvailableLayers(self, newLayerString=None):
426	"""sets availableLayers to newLayerString or tries to autodetect available layers.
427	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
428	newLayerString is parsed as JSON."""
429	if newLayerString is not None:
430	try:
431	layers = json.loads(newLayerString)
432	if 'text' in layers and 'images' in layers:
433	self.availableLayers = layers
434	return
435	except:
436	pass
437
438	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
439
440	# start with builtin layers
441	self.availableLayers = self.builtinLayers.copy()
442	# add layers from templates
443	for t in self.template:
444	if t.startswith('layer_'):
445	try:
446	(x, m, l) = t.split('_', 3)
447	if m not in self.availableLayers:
448	# mode m doesn't exist -> new list
449	self.availableLayers[m] = [l]
450
451	else:
452	# m exists -> append
453	if l not in self.availableLayers[m]:
454	self.availableLayers[m].append()
455
456	except:
457	pass
458
459	def getAvailableLayersJson(self):
460	"""returns available layers as JSON string."""
461	return json.dumps(self.availableLayers)
462
463
464	def getInfo_xml(self,url,mode):
465	"""returns info about the document as XML"""
466	if not self.digilibBaseUrl:
467	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
468
469	docinfo = self.getDocinfo(mode=mode,url=url)
470	pt = getattr(self.template, 'info_xml')
471	return pt(docinfo=docinfo)
472
473	def getAuthenticatedUser(self, anon=None):
474	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
475	user = getSecurityManager().getUser()
476	if user is not None and user.getUserName() != "Anonymous User":
477	return user
478	else:
479	return anon
480
481	def isAccessible(self, docinfo):
482	"""returns if access to the resource is granted"""
483	access = docinfo.get('accessType', None)
484	logging.debug("documentViewer (accessOK) access type %s"%access)
485	if access == 'free':
486	logging.debug("documentViewer (accessOK) access is free")
487	return True
488
489	elif access is None or access in self.authgroups:
490	# only local access -- only logged in users
491	user = self.getAuthenticatedUser()
492	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
493	return (user is not None)
494
495	logging.error("documentViewer (accessOK) unknown access type %s"%access)
496	return False
497
498
499	def getUserinfo(self):
500	"""returns userinfo object"""
501	logging.debug("getUserinfo")
502	userinfo = {}
503	# look for cached userinfo in session
504	if self.REQUEST.SESSION.has_key('userinfo'):
505	userinfo = self.REQUEST.SESSION['userinfo']
506	# check if its still current?
507	else:
508	# store in session
509	self.REQUEST.SESSION['userinfo'] = userinfo
510
511	return userinfo
512
513	def getDocinfo(self, mode, url, tocMode=None):
514	"""returns docinfo depending on mode"""
515	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
516	# look for cached docinfo in session
517	if self.REQUEST.SESSION.has_key('docinfo'):
518	docinfo = self.REQUEST.SESSION['docinfo']
519	# check if its still current
520	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
521	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
522	return docinfo
523
524	# new docinfo
525	docinfo = {'mode': mode, 'url': url}
526	# add self url
527	docinfo['viewerUrl'] = self.getDocumentViewerURL()
528	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
529	# get index.meta DOM
530	docUrl = None
531	metaDom = None
532	if mode=="texttool":
533	# url points to document dir or index.meta
534	metaDom = self.metadataService.getDomFromPathOrUrl(url)
535	docUrl = url.replace('/index.meta', '')
536	if metaDom is None:
537	raise IOError("Unable to find index.meta for mode=texttool!")
538
539	elif mode=="imagepath":
540	# url points to folder with images, index.meta optional
541	# asssume index.meta in parent dir
542	docUrl = getParentPath(url)
543	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
544
545	elif mode=="filepath":
546	# url points to image file, index.meta optional
547	docinfo['imagePath'] = url
548	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + url
549	docinfo['numPages'] = 1
550	# asssume index.meta is two path segments up
551	docUrl = getParentPath(url, 2)
552	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
553
554	else:
555	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
556	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
557
558	docinfo['documentUrl'] = docUrl
559	# process index.meta contents
560	if metaDom is not None and metaDom.tag == 'resource':
561	# document directory name and path
562	resource = self.metadataService.getResourceData(dom=metaDom)
563	if resource:
564	docinfo = self.getDocinfoFromResource(docinfo, resource)
565
566	# texttool info
567	texttool = self.metadataService.getTexttoolData(dom=metaDom)
568	if texttool:
569	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
570	# document info (including toc) from full text
571	if docinfo.get('textURLPath', None):
572	docinfo = self.getTextInfo(mode=tocMode, docinfo=docinfo)
573
574	# bib info
575	bib = self.metadataService.getBibData(dom=metaDom)
576	if bib:
577	# save extended version as 'bibx' TODO: ugly
578	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
579	if len(bibx) == 1:
580	# unwrap list if possible
581	bibx = bibx[0]
582
583	docinfo['bibx'] = bibx
584	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
585	else:
586	# no bib - try info.xml
587	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
588
589	# auth info
590	access = self.metadataService.getAccessData(dom=metaDom)
591	if access:
592	docinfo = self.getDocinfoFromAccess(docinfo, access)
593
594	# attribution info
595	attribution = self.metadataService.getAttributionData(dom=metaDom)
596	if attribution:
597	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
598	docinfo['attribution'] = attribution
599
600	# copyright info
601	copyright = self.metadataService.getCopyrightData(dom=metaDom)
602	if copyright:
603	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
604	docinfo['copyright'] = copyright
605
606	# DRI (permanent ID)
607	dri = self.metadataService.getDRI(dom=metaDom, type='escidoc-test')
608	if dri:
609	logging.debug("getDRI: dri=%s"%repr(dri))
610	docinfo['DRI'] = dri
611
612	# image path
613	if mode != 'texttool':
614	# override image path from texttool with url parameter TODO: how about mode=auto?
615	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
616
617	# check numPages
618	if docinfo.get('numPages', 0) == 0:
619	# number of images from digilib
620	if docinfo.get('imagePath', None):
621	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
622	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
623	else:
624	# imagePath still missing? try "./pageimg"
625	imgPath = os.path.join(docUrl, 'pageimg')
626	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
627	if docinfo.get('numPages', 0) > 0:
628	# there are pages
629	docinfo['imagePath'] = imgPath
630	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
631
632	# check numPages
633	if docinfo.get('numPages', 0) == 0:
634	if docinfo.get('numTextPages', 0) > 0:
635	# replace with numTextPages (text-only?)
636	docinfo['numPages'] = docinfo['numTextPages']
637
638	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
639	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
640	# store in session
641	self.REQUEST.SESSION['docinfo'] = docinfo
642	return docinfo
643
644
645	def getDocinfoFromResource(self, docinfo, resource):
646	"""reads contents of resource element into docinfo"""
647	docName = resource.get('name', None)
648	docinfo['documentName'] = docName
649	docPath = resource.get('archive-path', None)
650	if docPath:
651	# clean up document path
652	if docPath[0] != '/':
653	docPath = '/' + docPath
654
655	if docName and (not docPath.endswith(docName)):
656	docPath += "/" + docName
657
658	else:
659	# use docUrl as docPath
660	docUrl = docinfo['documentURL']
661	if not docUrl.startswith('http:'):
662	docPath = docUrl
663	if docPath:
664	# fix URLs starting with /mpiwg/online
665	docPath = docPath.replace('/mpiwg/online', '', 1)
666
667	docinfo['documentPath'] = docPath
668	return docinfo
669
670	def getDocinfoFromTexttool(self, docinfo, texttool):
671	"""reads contents of texttool element into docinfo"""
672	# image dir
673	imageDir = texttool.get('image', None)
674	docPath = docinfo.get('documentPath', None)
675	if imageDir and docPath:
676	#print "image: ", imageDir, " archivepath: ", archivePath
677	imageDir = os.path.join(docPath, imageDir)
678	imageDir = imageDir.replace('/mpiwg/online', '', 1)
679	docinfo['imagePath'] = imageDir
680
681	# old style text URL
682	textUrl = texttool.get('text', None)
683	if textUrl and docPath:
684	if urlparse.urlparse(textUrl)[0] == "": #keine url
685	textUrl = os.path.join(docPath, textUrl)
686
687	docinfo['textURL'] = textUrl
688
689	# new style text-url-path
690	textUrl = texttool.get('text-url-path', None)
691	if textUrl:
692	docinfo['textURLPath'] = textUrl
693
694	# page flow
695	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
696
697	# odd pages are left
698	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
699
700	# number of title page (default 1)
701	docinfo['titlePage'] = texttool.get('title-scan-no', 1)
702
703	# old presentation stuff
704	presentation = texttool.get('presentation', None)
705	if presentation and docPath:
706	if presentation.startswith('http:'):
707	docinfo['presentationUrl'] = presentation
708	else:
709	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
710
711	return docinfo
712
713	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
714	"""reads contents of bib element into docinfo"""
715	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
716	# put all raw bib fields in dict "bib"
717	docinfo['bib'] = bib
718	bibtype = bib.get('@type', None)
719	docinfo['bibType'] = bibtype
720	# also store DC metadata for convenience
721	dc = self.metadataService.getDCMappedData(bib)
722	docinfo['creator'] = dc.get('creator','')
723	docinfo['title'] = dc.get('title','')
724	docinfo['date'] = dc.get('date','')
725	return docinfo
726
727	def getDocinfoFromAccess(self, docinfo, acc):
728	"""reads contents of access element into docinfo"""
729	#TODO: also read resource type
730	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
731	try:
732	acctype = acc['@attr']['type']
733	if acctype:
734	access=acctype
735	if access in ['group', 'institution']:
736	access = acc['name'].lower()
737
738	docinfo['accessType'] = access
739
740	except:
741	pass
742
743	return docinfo
744
745	def getDocinfoFromDigilib(self, docinfo, path):
746	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
747	# fetch data
748	txt = getHttpData(infoUrl)
749	if not txt:
750	logging.error("Unable to get dir-info from %s"%(infoUrl))
751	return docinfo
752
753	dom = ET.fromstring(txt)
754	size = getText(dom.find("size"))
755	logging.debug("getDocinfoFromDigilib: size=%s"%size)
756	if size:
757	docinfo['numPages'] = int(size)
758	else:
759	docinfo['numPages'] = 0
760
761	# TODO: produce and keep list of image names and numbers
762	return docinfo
763
764
765	def getDocinfoFromPresentationInfoXml(self,docinfo):
766	"""gets DC-like bibliographical information from the presentation entry in texttools"""
767	url = docinfo.get('presentationUrl', None)
768	if not url:
769	logging.error("getDocinfoFromPresentation: no URL!")
770	return docinfo
771
772	dom = None
773	metaUrl = None
774	if url.startswith("http://"):
775	# real URL
776	metaUrl = url
777	else:
778	# online path
779
780	server=self.digilibBaseUrl+"/servlet/Texter?fn="
781	metaUrl=server+url
782
783	txt=getHttpData(metaUrl)
784	if txt is None:
785	logging.error("Unable to read info.xml from %s"%(url))
786	return docinfo
787
788	dom = ET.fromstring(txt)
789	docinfo['creator']=getText(dom.find(".//author"))
790	docinfo['title']=getText(dom.find(".//title"))
791	docinfo['date']=getText(dom.find(".//date"))
792	return docinfo
793
794
795	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
796	"""returns pageinfo with the given parameters"""
797	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
798	pageinfo = {}
799	pageinfo['viewMode'] = viewMode
800	# split viewLayer if necessary
801	if isinstance(viewLayer,basestring):
802	viewLayer = viewLayer.split(',')
803
804	if isinstance(viewLayer, list):
805	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
806	# save (unique) list in viewLayers
807	seen = set()
808	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
809	pageinfo['viewLayers'] = viewLayers
810	# stringify viewLayer
811	viewLayer = ','.join(viewLayers)
812	else:
813	#create list
814	pageinfo['viewLayers'] = [viewLayer]
815
816	pageinfo['viewLayer'] = viewLayer
817	pageinfo['tocMode'] = tocMode
818
819	# TODO: unify current and pn!
820	current = getInt(current)
821	pageinfo['current'] = current
822	pageinfo['pn'] = current
823	rows = int(rows or self.thumbrows)
824	pageinfo['rows'] = rows
825	cols = int(cols or self.thumbcols)
826	pageinfo['cols'] = cols
827	grpsize = cols * rows
828	pageinfo['groupsize'] = grpsize
829	# is start is empty use one around current
830	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
831	# int(current / grpsize) * grpsize +1))
832	pageinfo['start'] = start
833	# get number of pages
834	np = int(docinfo.get('numPages', 0))
835	if np == 0:
836	# try numTextPages
837	np = docinfo.get('numTextPages', 0)
838	if np != 0:
839	docinfo['numPages'] = np
840
841	# cache table of contents
842	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
843	pageinfo['numgroups'] = int(np / grpsize)
844	if np % grpsize > 0:
845	pageinfo['numgroups'] += 1
846
847	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
848	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
849	# add zeroth page for two columns
850	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
851	pageinfo['pageZero'] = pageZero
852	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
853	# more page parameters
854	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
855	if docinfo.get('pageNumbers'):
856	# get original page numbers
857	pageNumber = docinfo['pageNumbers'].get(current, None)
858	if pageNumber is not None:
859	pageinfo['pageNumberOrig'] = pageNumber['no']
860	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
861
862	# cache search results
863	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
864	query = self.REQUEST.get('query',None)
865	pageinfo['query'] = query
866	if query:
867	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
868	pageinfo['queryType'] = queryType
869	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
870	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
871
872	# highlighting
873	highlightQuery = self.REQUEST.get('highlightQuery', None)
874	if highlightQuery:
875	pageinfo['highlightQuery'] = highlightQuery
876	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
877	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
878
879	return pageinfo
880
881
882	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
883	"""returns dict with array of page informations for one screenfull of thumbnails"""
884	batch = {}
885	grpsize = rows * cols
886	if maxIdx == 0:
887	maxIdx = start + grpsize
888
889	nb = int(math.ceil(maxIdx / float(grpsize)))
890	# list of all batch start and end points
891	batches = []
892	if pageZero:
893	ofs = 0
894	else:
895	ofs = 1
896
897	for i in range(nb):
898	s = i * grpsize + ofs
899	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
900	batches.append({'start':s, 'end':e})
901
902	batch['batches'] = batches
903
904	pages = []
905	if pageZero and start == 1:
906	# correct beginning
907	idx = 0
908	else:
909	idx = start
910
911	for r in range(rows):
912	row = []
913	for c in range(cols):
914	if idx < minIdx or idx > maxIdx:
915	page = {'idx':None}
916	else:
917	page = {'idx':idx}
918
919	idx += 1
920	if pageFlowLtr:
921	row.append(page)
922	else:
923	row.insert(0, page)
924
925	pages.append(row)
926
927	if start > 1:
928	batch['prevStart'] = max(start - grpsize, 1)
929	else:
930	batch['prevStart'] = None
931
932	if start + grpsize <= maxIdx:
933	batch['nextStart'] = start + grpsize
934	else:
935	batch['nextStart'] = None
936
937	batch['pages'] = pages
938	return batch
939
940	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
941	"""returns dict with information for one screenfull of data."""
942	batch = {}
943	if end == 0:
944	end = start + size
945
946	nb = int(math.ceil(end / float(size)))
947	# list of all batch start and end points
948	batches = []
949	for i in range(nb):
950	s = i * size + 1
951	e = min((i + 1) * size, end)
952	batches.append({'start':s, 'end':e})
953
954	batch['batches'] = batches
955	# list of elements in this batch
956	this = []
957	j = 0
958	for i in range(start, min(start+size, end+1)):
959	if data:
960	if fullData:
961	d = data.get(i, None)
962	else:
963	d = data.get(j, None)
964	j += 1
965
966	else:
967	d = i+1
968
969	this.append(d)
970
971	batch['this'] = this
972	if start > 1:
973	batch['prevStart'] = max(start - size, 1)
974	else:
975	batch['prevStart'] = None
976
977	if start + size < end:
978	batch['nextStart'] = start + size
979	else:
980	batch['nextStart'] = None
981
982	logging.debug("getBatch start=%s size=%s end=%s batch=%s"%(start,size,end,repr(batch)))
983	return batch
984
985
986	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
987	"""returns list of groups {name:, id:} on the annotation server for the user"""
988	groups = []
989	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
990	data = getHttpData(url=groupsUrl, noExceptions=True)
991	if data:
992	res = json.loads(data)
993	rows = res.get('rows', None)
994	if rows is None:
995	return groups
996	for r in rows:
997	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
998
999	return groups
1000
1001
1002	security.declareProtected('View management screens','changeDocumentViewerForm')
1003	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1004
1005	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1006	"""init document viewer"""
1007	self.title=title
1008	self.digilibBaseUrl = digilibBaseUrl
1009	self.thumbrows = thumbrows
1010	self.thumbcols = thumbcols
1011	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1012	try:
1013	# assume MetaDataFolder instance is called metadata
1014	self.metadataService = getattr(self, 'metadata')
1015	except Exception, e:
1016	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1017
1018	self.setAvailableLayers(availableLayers)
1019
1020	if RESPONSE is not None:
1021	RESPONSE.redirect('manage_main')
1022
1023	def manage_AddDocumentViewerForm(self):
1024	"""add the viewer form"""
1025	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1026	return pt()
1027
1028	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1029	"""add the viewer"""
1030	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1031	self._setObject(id,newObj)
1032
1033	if RESPONSE is not None:
1034	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: