Context Navigation

source: documentViewer/documentViewer.py @ 560:04c330b92cab

Last change on this file since 560:04c330b92cab was 560:04c330b92cab, checked in by casties, 12 years ago
uses escidoc-dri (instead of -test). updated default license display for access!=free.
File size: 40.3 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def browserCheck(self):
37	"""check the browsers request to find out the browser type"""
38	bt = {}
39	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
40	bt['ua'] = ua
41	bt['isIE'] = False
42	bt['isN4'] = False
43	bt['versFirefox']=""
44	bt['versIE']=""
45	bt['versSafariChrome']=""
46	bt['versOpera']=""
47
48	if string.find(ua, 'MSIE') > -1:
49	bt['isIE'] = True
50	else:
51	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
52	# Safari oder Chrome identification
53	try:
54	nav = ua[string.find(ua, '('):]
55	nav1=ua[string.find(ua,')'):]
56	nav2=nav1[string.find(nav1,'('):]
57	nav3=nav2[string.find(nav2,')'):]
58	ie = string.split(nav, "; ")[1]
59	ie1 =string.split(nav1, " ")[2]
60	ie2 =string.split(nav3, " ")[1]
61	ie3 =string.split(nav3, " ")[2]
62	if string.find(ie3, "Safari") >-1:
63	bt['versSafariChrome']=string.split(ie2, "/")[1]
64	except: pass
65	# IE identification
66	try:
67	nav = ua[string.find(ua, '('):]
68	ie = string.split(nav, "; ")[1]
69	if string.find(ie, "MSIE") > -1:
70	bt['versIE'] = string.split(ie, " ")[1]
71	except:pass
72	# Firefox identification
73	try:
74	nav = ua[string.find(ua, '('):]
75	nav1=ua[string.find(ua,')'):]
76	if string.find(ie1, "Firefox") >-1:
77	nav5= string.split(ie1, "/")[1]
78	logging.debug("FIREFOX: %s"%(nav5))
79	bt['versFirefox']=nav5[0:3]
80	except:pass
81	#Opera identification
82	try:
83	if string.find(ua,"Opera") >-1:
84	nav = ua[string.find(ua, '('):]
85	nav1=nav[string.find(nav,')'):]
86	bt['versOpera']=string.split(nav1,"/")[2]
87	except:pass
88
89	bt['isMac'] = string.find(ua, 'Macintosh') > -1
90	bt['isWin'] = string.find(ua, 'Windows') > -1
91	bt['isIEWin'] = bt['isIE'] and bt['isWin']
92	bt['isIEMac'] = bt['isIE'] and bt['isMac']
93	bt['staticHTML'] = False
94
95	return bt
96
97	def getParentPath(path, cnt=1):
98	"""returns pathname shortened by cnt"""
99	# make sure path doesn't end with /
100	path = path.rstrip('/')
101	# split by /, shorten, and reassemble
102	return '/'.join(path.split('/')[0:-cnt])
103
104	##
105	## documentViewer class
106	##
107	class documentViewer(Folder):
108	"""document viewer"""
109	meta_type="Document viewer"
110
111	security=ClassSecurityInfo()
112	manage_options=Folder.manage_options+(
113	{'label':'Configuration','action':'changeDocumentViewerForm'},
114	)
115
116	metadataService = None
117	"""MetaDataFolder instance"""
118
119
120	#
121	# templates and forms
122	#
123	# viewMode templates
124	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
125	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
126	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
127	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
128	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
129	# available layer types (annotator not default)
130	builtinLayers = {'text': ['dict','search','gis'],
131	'xml': None, 'images': None, 'index': ['extended']}
132	availableLayers = builtinLayers;
133	# layer templates
134	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
135	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
136	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
137	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
138	layer_text_pundit = PageTemplateFile('zpt/layer_text_pundit', globals())
139	layer_index_extended = PageTemplateFile('zpt/layer_index_extended', globals())
140	# toc templates
141	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
142	toc_text = PageTemplateFile('zpt/toc_text', globals())
143	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
144	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
145	toc_none = PageTemplateFile('zpt/toc_none', globals())
146	# other templates
147	common_template = PageTemplateFile('zpt/common_template', globals())
148	info_xml = PageTemplateFile('zpt/info_xml', globals())
149	docuviewer_css = ImageFile('css/docuviewer.css',globals())
150	# make docuviewer_css refreshable for development
151	docuviewer_css.index_html = refreshingImageFileIndexHtml
152	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
153	# make docuviewer_ie_css refreshable for development
154	docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
155	jquery_js = ImageFile('js/jquery.js',globals())
156
157
158	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
159	"""init document viewer"""
160	self.id=id
161	self.title=title
162	self.thumbcols = thumbcols
163	self.thumbrows = thumbrows
164	# authgroups is list of authorized groups (delimited by ,)
165	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
166	# create template folder so we can always use template.something
167
168	templateFolder = Folder('template')
169	self['template'] = templateFolder # Zope-2.12 style
170	#self._setObject('template',templateFolder) # old style
171	try:
172	import MpdlXmlTextServer
173	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
174	templateFolder['fulltextclient'] = textServer
175	#templateFolder._setObject('fulltextclient',textServer)
176	except Exception, e:
177	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
178
179	try:
180	from Products.zogiLib.zogiLib import zogiLib
181	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
182	templateFolder['zogilib'] = zogilib
183	#templateFolder._setObject('zogilib',zogilib)
184	except Exception, e:
185	logging.error("Unable to create zogiLib for zogilib: "+str(e))
186
187	try:
188	# assume MetaDataFolder instance is called metadata
189	self.metadataService = getattr(self, 'metadata')
190	except Exception, e:
191	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
192
193	if digilibBaseUrl is not None:
194	self.digilibBaseUrl = digilibBaseUrl
195
196
197	# proxy text server methods to fulltextclient
198	def getTextPage(self, **args):
199	"""returns full text content of page"""
200	return self.template.fulltextclient.getTextPage(**args)
201
202	def getSearchResults(self, **args):
203	"""loads list of search results and stores XML in docinfo"""
204	return self.template.fulltextclient.getSearchResults(**args)
205
206	def getResultsPage(self, **args):
207	"""returns one page of the search results"""
208	return self.template.fulltextclient.getResultsPage(**args)
209
210	def getTextInfo(self, **args):
211	"""returns document info from the text server"""
212	return self.template.fulltextclient.getTextInfo(**args)
213
214	def getToc(self, **args):
215	"""loads table of contents and stores XML in docinfo"""
216	return self.template.fulltextclient.getToc(**args)
217
218	def getTocPage(self, **args):
219	"""returns one page of the table of contents"""
220	return self.template.fulltextclient.getTocPage(**args)
221
222	def getTextDownloadUrl(self, **args):
223	"""get list of gis places on one page"""
224	return self.template.fulltextclient.getTextDownloadUrl(**args)
225
226	def getPlacesOnPage(self, **args):
227	"""get list of gis places on one page"""
228	return self.template.fulltextclient.getPlacesOnPage(**args)
229
230	# Thumb list for CoolIris Plugin
231	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
232	security.declareProtected('View','thumbs_rss')
233	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
234	'''
235	view it
236	@param mode: defines how to access the document behind url
237	@param url: url which contains display information
238	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
239
240	'''
241
242	if not hasattr(self, 'template'):
243	# create template folder if it doesn't exist
244	self.manage_addFolder('template')
245
246	if not self.digilibBaseUrl:
247	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
248
249	docinfo = self.getDocinfo(mode=mode,url=url)
250	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
251	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
252	''' ZDES '''
253	pt = getattr(self.template, 'thumbs_main_rss')
254
255	if viewMode=="auto": # automodus gewaehlt
256	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
257	viewMode="text"
258	else:
259	viewMode="images"
260
261	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
262
263
264	security.declareProtected('View','index_html')
265	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode=None,start=1,pn=1):
266	"""
267	show page
268	@param url: url which contains display information
269	@param mode: defines how to access the document behind url
270	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
271	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
272	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
273	"""
274
275	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
276
277	if not hasattr(self, 'template'):
278	# this won't work
279	logging.error("template folder missing!")
280	return "ERROR: template folder missing!"
281
282	if not getattr(self, 'digilibBaseUrl', None):
283	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
284
285	# mode=filepath should not have toc-thumbs
286	if tocMode is None:
287	if mode == "filepath":
288	tocMode = "none"
289	else:
290	tocMode = "thumbs"
291
292	# docinfo: information about document (cached)
293	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
294
295	# userinfo: user settings (cached)
296	userinfo = self.getUserinfo()
297
298	# auto viewMode: text if there is a text else images
299	if viewMode=="auto":
300	if docinfo.get('textURLPath', None):
301	# docinfo.get('textURL', None) not implemented yet
302	viewMode = "text"
303	if viewLayer is None and 'viewLayer' not in userinfo:
304	# use layer dict as default
305	viewLayer = "dict"
306	else:
307	viewMode = "images"
308
309	elif viewMode == "text_dict":
310	# legacy fix
311	viewMode = "text"
312	viewLayer = "dict"
313
314	# safe viewLayer in userinfo
315	userinfo['viewLayer'] = viewLayer
316
317	# pageinfo: information about page (not cached)
318	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
319
320	# get template /template/viewer_$viewMode
321	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
322	if pt is None:
323	logging.error("No template for viewMode=%s!"%viewMode)
324	# TODO: error page?
325	return "No template for viewMode=%s!"%viewMode
326
327	# and execute with parameters
328	return pt(docinfo=docinfo, pageinfo=pageinfo)
329
330	def getAvailableLayers(self):
331	"""returns dict with list of available layers per viewMode"""
332	return self.availableLayers
333
334	def getBrowser(self):
335	"""getBrowser the version of browser """
336	bt = browserCheck(self)
337	logging.debug("BROWSER VERSION: %s"%(bt))
338	return bt
339
340	def findDigilibUrl(self):
341	"""try to get the digilib URL from zogilib"""
342	url = self.template.zogilib.getDLBaseUrl()
343	return url
344
345	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
346	"""returns URL to digilib Scaler with params"""
347	url = None
348	if docinfo is not None:
349	url = docinfo.get('imageURL', None)
350
351	if url is None:
352	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
353	if fn is None and docinfo is not None:
354	fn = docinfo.get('imagePath','')
355
356	url += "fn=%s"%fn
357
358	if pn:
359	url += "&pn=%s"%pn
360
361	url += "&dw=%s&dh=%s"%(dw,dh)
362	return url
363
364	def getDocumentViewerURL(self):
365	"""returns the URL of this instance"""
366	return self.absolute_url()
367
368	def getStyle(self, idx, selected, style=""):
369	"""returns a string with the given style and append 'sel' if idx == selected."""
370	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
371	if idx == selected:
372	return style + 'sel'
373	else:
374	return style
375
376	def getParams(self, param=None, val=None, params=None, duplicates=None):
377	"""returns dict with URL parameters.
378
379	Takes URL parameters and additionally param=val or dict params.
380	Deletes key if value is None."""
381	# copy existing request params
382	newParams=self.REQUEST.form.copy()
383	# change single param
384	if param is not None:
385	if val is None:
386	if newParams.has_key(param):
387	del newParams[param]
388	else:
389	newParams[param] = str(val)
390
391	# change more params
392	if params is not None:
393	for (k, v) in params.items():
394	if v is None:
395	# val=None removes param
396	if newParams.has_key(k):
397	del newParams[k]
398
399	else:
400	newParams[k] = v
401
402	if duplicates:
403	# eliminate lists (coming from duplicate keys)
404	for (k,v) in newParams.items():
405	if isinstance(v, list):
406	if duplicates == 'comma':
407	# make comma-separated list of non-empty entries
408	newParams[k] = ','.join([t for t in v if t])
409	elif duplicates == 'first':
410	# take first non-empty entry
411	newParams[k] = [t for t in v if t][0]
412
413	return newParams
414
415	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
416	"""returns URL to documentviewer with parameter param set to val or from dict params"""
417	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
418	# quote values and assemble into query string (not escaping '/')
419	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
420	if baseUrl is None:
421	baseUrl = self.getDocumentViewerURL()
422
423	url = "%s?%s"%(baseUrl, ps)
424	return url
425
426	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
427	"""link to documentviewer with parameter param set to val"""
428	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
429
430
431	def setAvailableLayers(self, newLayerString=None):
432	"""sets availableLayers to newLayerString or tries to autodetect available layers.
433	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
434	newLayerString is parsed as JSON."""
435	if newLayerString is not None:
436	try:
437	layers = json.loads(newLayerString)
438	if 'text' in layers and 'images' in layers:
439	self.availableLayers = layers
440	return
441	except:
442	pass
443
444	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
445
446	# start with builtin layers
447	self.availableLayers = self.builtinLayers.copy()
448	# add layers from templates
449	for t in self.template:
450	if t.startswith('layer_'):
451	try:
452	(x, m, l) = t.split('_', 3)
453	if m not in self.availableLayers:
454	# mode m doesn't exist -> new list
455	self.availableLayers[m] = [l]
456
457	else:
458	# m exists -> append
459	if l not in self.availableLayers[m]:
460	self.availableLayers[m].append()
461
462	except:
463	pass
464
465	def getAvailableLayersJson(self):
466	"""returns available layers as JSON string."""
467	return json.dumps(self.availableLayers)
468
469
470	def getInfo_xml(self,url,mode):
471	"""returns info about the document as XML"""
472	if not self.digilibBaseUrl:
473	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
474
475	docinfo = self.getDocinfo(mode=mode,url=url)
476	pt = getattr(self.template, 'info_xml')
477	return pt(docinfo=docinfo)
478
479	def getAuthenticatedUser(self, anon=None):
480	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
481	user = getSecurityManager().getUser()
482	if user is not None and user.getUserName() != "Anonymous User":
483	return user
484	else:
485	return anon
486
487	def isAccessible(self, docinfo):
488	"""returns if access to the resource is granted"""
489	access = docinfo.get('accessType', None)
490	logging.debug("documentViewer (accessOK) access type %s"%access)
491	if access == 'free':
492	logging.debug("documentViewer (accessOK) access is free")
493	return True
494
495	elif access is None or access in self.authgroups:
496	# only local access -- only logged in users
497	user = self.getAuthenticatedUser()
498	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
499	return (user is not None)
500
501	logging.error("documentViewer (accessOK) unknown access type %s"%access)
502	return False
503
504
505	def getUserinfo(self):
506	"""returns userinfo object"""
507	logging.debug("getUserinfo")
508	userinfo = {}
509	# look for cached userinfo in session
510	if self.REQUEST.SESSION.has_key('userinfo'):
511	userinfo = self.REQUEST.SESSION['userinfo']
512	# check if its still current?
513	else:
514	# store in session
515	self.REQUEST.SESSION['userinfo'] = userinfo
516
517	return userinfo
518
519	def getDocinfo(self, mode, url, tocMode=None):
520	"""returns docinfo depending on mode"""
521	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
522	# look for cached docinfo in session
523	if self.REQUEST.SESSION.has_key('docinfo'):
524	docinfo = self.REQUEST.SESSION['docinfo']
525	# check if its still current
526	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
527	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
528	return docinfo
529
530	# new docinfo
531	docinfo = {'mode': mode, 'url': url}
532	# add self url
533	docinfo['viewerUrl'] = self.getDocumentViewerURL()
534	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
535	# get index.meta DOM
536	docUrl = None
537	metaDom = None
538	if mode=="texttool":
539	# url points to document dir or index.meta
540	metaDom = self.metadataService.getDomFromPathOrUrl(url)
541	docUrl = url.replace('/index.meta', '')
542	if metaDom is None:
543	raise IOError("Unable to find index.meta for mode=texttool!")
544
545	elif mode=="imagepath":
546	# url points to folder with images, index.meta optional
547	# asssume index.meta in parent dir
548	docUrl = getParentPath(url)
549	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
550
551	elif mode=="filepath":
552	# url points to image file, index.meta optional
553	docinfo['imagePath'] = url
554	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + url
555	docinfo['numPages'] = 1
556	# asssume index.meta is two path segments up
557	docUrl = getParentPath(url, 2)
558	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
559
560	else:
561	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
562	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
563
564	docinfo['documentUrl'] = docUrl
565	# process index.meta contents
566	if metaDom is not None and metaDom.tag == 'resource':
567	# document directory name and path
568	resource = self.metadataService.getResourceData(dom=metaDom)
569	if resource:
570	docinfo = self.getDocinfoFromResource(docinfo, resource)
571
572	# texttool info
573	texttool = self.metadataService.getTexttoolData(dom=metaDom)
574	if texttool:
575	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
576	# document info (including toc) from full text
577	if docinfo.get('textURLPath', None):
578	docinfo = self.getTextInfo(mode=tocMode, docinfo=docinfo)
579
580	# bib info
581	bib = self.metadataService.getBibData(dom=metaDom)
582	if bib:
583	# save extended version as 'bibx' TODO: ugly
584	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
585	if len(bibx) == 1:
586	# unwrap list if possible
587	bibx = bibx[0]
588
589	docinfo['bibx'] = bibx
590	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
591	else:
592	# no bib - try info.xml
593	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
594
595	# auth info
596	access = self.metadataService.getAccessData(dom=metaDom)
597	if access:
598	docinfo = self.getDocinfoFromAccess(docinfo, access)
599
600	# attribution info
601	attribution = self.metadataService.getAttributionData(dom=metaDom)
602	if attribution:
603	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
604	docinfo['attribution'] = attribution
605
606	# copyright info
607	copyright = self.metadataService.getCopyrightData(dom=metaDom)
608	if copyright:
609	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
610	docinfo['copyright'] = copyright
611
612	# DRI (permanent ID)
613	dri = self.metadataService.getDRI(dom=metaDom, type='escidoc')
614	if dri:
615	logging.debug("getDRI: dri=%s"%repr(dri))
616	docinfo['DRI'] = dri
617
618	# image path
619	if mode != 'texttool':
620	# override image path from texttool with url parameter TODO: how about mode=auto?
621	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
622
623	# check numPages
624	if docinfo.get('numPages', 0) == 0:
625	# number of images from digilib
626	if docinfo.get('imagePath', None):
627	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
628	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
629	else:
630	# imagePath still missing? try "./pageimg"
631	imgPath = os.path.join(docUrl, 'pageimg')
632	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
633	if docinfo.get('numPages', 0) > 0:
634	# there are pages
635	docinfo['imagePath'] = imgPath
636	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
637
638	# check numPages
639	if docinfo.get('numPages', 0) == 0:
640	if docinfo.get('numTextPages', 0) > 0:
641	# replace with numTextPages (text-only?)
642	docinfo['numPages'] = docinfo['numTextPages']
643
644	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
645	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
646	# store in session
647	self.REQUEST.SESSION['docinfo'] = docinfo
648	return docinfo
649
650
651	def getDocinfoFromResource(self, docinfo, resource):
652	"""reads contents of resource element into docinfo"""
653	docName = resource.get('name', None)
654	docinfo['documentName'] = docName
655	docPath = resource.get('archive-path', None)
656	if docPath:
657	# clean up document path
658	if docPath[0] != '/':
659	docPath = '/' + docPath
660
661	if docName and (not docPath.endswith(docName)):
662	docPath += "/" + docName
663
664	else:
665	# use docUrl as docPath
666	docUrl = docinfo['documentURL']
667	if not docUrl.startswith('http:'):
668	docPath = docUrl
669	if docPath:
670	# fix URLs starting with /mpiwg/online
671	docPath = docPath.replace('/mpiwg/online', '', 1)
672
673	docinfo['documentPath'] = docPath
674	return docinfo
675
676	def getDocinfoFromTexttool(self, docinfo, texttool):
677	"""reads contents of texttool element into docinfo"""
678	# image dir
679	imageDir = texttool.get('image', None)
680	docPath = docinfo.get('documentPath', None)
681	if imageDir and docPath:
682	#print "image: ", imageDir, " archivepath: ", archivePath
683	imageDir = os.path.join(docPath, imageDir)
684	imageDir = imageDir.replace('/mpiwg/online', '', 1)
685	docinfo['imagePath'] = imageDir
686
687	# old style text URL
688	textUrl = texttool.get('text', None)
689	if textUrl and docPath:
690	if urlparse.urlparse(textUrl)[0] == "": #keine url
691	textUrl = os.path.join(docPath, textUrl)
692
693	docinfo['textURL'] = textUrl
694
695	# new style text-url-path
696	textUrl = texttool.get('text-url-path', None)
697	if textUrl:
698	docinfo['textURLPath'] = textUrl
699
700	# page flow
701	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
702
703	# odd pages are left
704	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
705
706	# number of title page (default 1)
707	docinfo['titlePage'] = texttool.get('title-scan-no', 1)
708
709	# old presentation stuff
710	presentation = texttool.get('presentation', None)
711	if presentation and docPath:
712	if presentation.startswith('http:'):
713	docinfo['presentationUrl'] = presentation
714	else:
715	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
716
717	return docinfo
718
719	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
720	"""reads contents of bib element into docinfo"""
721	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
722	# put all raw bib fields in dict "bib"
723	docinfo['bib'] = bib
724	bibtype = bib.get('@type', None)
725	docinfo['bibType'] = bibtype
726	# also store DC metadata for convenience
727	dc = self.metadataService.getDCMappedData(bib)
728	docinfo['creator'] = dc.get('creator','')
729	docinfo['title'] = dc.get('title','')
730	docinfo['date'] = dc.get('date','')
731	return docinfo
732
733	def getDocinfoFromAccess(self, docinfo, acc):
734	"""reads contents of access element into docinfo"""
735	#TODO: also read resource type
736	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
737	try:
738	acctype = acc['@attr']['type']
739	if acctype:
740	access=acctype
741	if access in ['group', 'institution']:
742	access = acc['name'].lower()
743
744	docinfo['accessType'] = access
745
746	except:
747	pass
748
749	return docinfo
750
751	def getDocinfoFromDigilib(self, docinfo, path):
752	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
753	# fetch data
754	txt = getHttpData(infoUrl)
755	if not txt:
756	logging.error("Unable to get dir-info from %s"%(infoUrl))
757	return docinfo
758
759	dom = ET.fromstring(txt)
760	size = getText(dom.find("size"))
761	logging.debug("getDocinfoFromDigilib: size=%s"%size)
762	if size:
763	docinfo['numPages'] = int(size)
764	else:
765	docinfo['numPages'] = 0
766
767	# TODO: produce and keep list of image names and numbers
768	return docinfo
769
770
771	def getDocinfoFromPresentationInfoXml(self,docinfo):
772	"""gets DC-like bibliographical information from the presentation entry in texttools"""
773	url = docinfo.get('presentationUrl', None)
774	if not url:
775	logging.error("getDocinfoFromPresentation: no URL!")
776	return docinfo
777
778	dom = None
779	metaUrl = None
780	if url.startswith("http://"):
781	# real URL
782	metaUrl = url
783	else:
784	# online path
785
786	server=self.digilibBaseUrl+"/servlet/Texter?fn="
787	metaUrl=server+url
788
789	txt=getHttpData(metaUrl)
790	if txt is None:
791	logging.error("Unable to read info.xml from %s"%(url))
792	return docinfo
793
794	dom = ET.fromstring(txt)
795	docinfo['creator']=getText(dom.find(".//author"))
796	docinfo['title']=getText(dom.find(".//title"))
797	docinfo['date']=getText(dom.find(".//date"))
798	return docinfo
799
800
801	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
802	"""returns pageinfo with the given parameters"""
803	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
804	pageinfo = {}
805	pageinfo['viewMode'] = viewMode
806	# split viewLayer if necessary
807	if isinstance(viewLayer,basestring):
808	viewLayer = viewLayer.split(',')
809
810	if isinstance(viewLayer, list):
811	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
812	# save (unique) list in viewLayers
813	seen = set()
814	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
815	pageinfo['viewLayers'] = viewLayers
816	# stringify viewLayer
817	viewLayer = ','.join(viewLayers)
818	else:
819	#create list
820	pageinfo['viewLayers'] = [viewLayer]
821
822	pageinfo['viewLayer'] = viewLayer
823	pageinfo['tocMode'] = tocMode
824
825	# TODO: unify current and pn!
826	current = getInt(current)
827	pageinfo['current'] = current
828	pageinfo['pn'] = current
829	rows = int(rows or self.thumbrows)
830	pageinfo['rows'] = rows
831	cols = int(cols or self.thumbcols)
832	pageinfo['cols'] = cols
833	grpsize = cols * rows
834	pageinfo['groupsize'] = grpsize
835	# is start is empty use one around current
836	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
837	# int(current / grpsize) * grpsize +1))
838	pageinfo['start'] = start
839	# get number of pages
840	np = int(docinfo.get('numPages', 0))
841	if np == 0:
842	# try numTextPages
843	np = docinfo.get('numTextPages', 0)
844	if np != 0:
845	docinfo['numPages'] = np
846
847	# cache table of contents
848	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
849	pageinfo['numgroups'] = int(np / grpsize)
850	if np % grpsize > 0:
851	pageinfo['numgroups'] += 1
852
853	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
854	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
855	# add zeroth page for two columns
856	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
857	pageinfo['pageZero'] = pageZero
858	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
859	# more page parameters
860	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
861	if docinfo.get('pageNumbers'):
862	# get original page numbers
863	pageNumber = docinfo['pageNumbers'].get(current, None)
864	if pageNumber is not None:
865	pageinfo['pageNumberOrig'] = pageNumber['no']
866	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
867
868	# cache search results
869	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
870	query = self.REQUEST.get('query',None)
871	pageinfo['query'] = query
872	if query:
873	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
874	pageinfo['queryType'] = queryType
875	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
876	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
877
878	# highlighting
879	highlightQuery = self.REQUEST.get('highlightQuery', None)
880	if highlightQuery:
881	pageinfo['highlightQuery'] = highlightQuery
882	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
883	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
884
885	return pageinfo
886
887
888	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
889	"""returns dict with array of page informations for one screenfull of thumbnails"""
890	batch = {}
891	grpsize = rows * cols
892	if maxIdx == 0:
893	maxIdx = start + grpsize
894
895	nb = int(math.ceil(maxIdx / float(grpsize)))
896	# list of all batch start and end points
897	batches = []
898	if pageZero:
899	ofs = 0
900	else:
901	ofs = 1
902
903	for i in range(nb):
904	s = i * grpsize + ofs
905	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
906	batches.append({'start':s, 'end':e})
907
908	batch['batches'] = batches
909
910	pages = []
911	if pageZero and start == 1:
912	# correct beginning
913	idx = 0
914	else:
915	idx = start
916
917	for r in range(rows):
918	row = []
919	for c in range(cols):
920	if idx < minIdx or idx > maxIdx:
921	page = {'idx':None}
922	else:
923	page = {'idx':idx}
924
925	idx += 1
926	if pageFlowLtr:
927	row.append(page)
928	else:
929	row.insert(0, page)
930
931	pages.append(row)
932
933	if start > 1:
934	batch['prevStart'] = max(start - grpsize, 1)
935	else:
936	batch['prevStart'] = None
937
938	if start + grpsize <= maxIdx:
939	batch['nextStart'] = start + grpsize
940	else:
941	batch['nextStart'] = None
942
943	batch['pages'] = pages
944	return batch
945
946	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
947	"""returns dict with information for one screenfull of data."""
948	batch = {}
949	if end == 0:
950	end = start + size
951
952	nb = int(math.ceil(end / float(size)))
953	# list of all batch start and end points
954	batches = []
955	for i in range(nb):
956	s = i * size + 1
957	e = min((i + 1) * size, end)
958	batches.append({'start':s, 'end':e})
959
960	batch['batches'] = batches
961	# list of elements in this batch
962	this = []
963	j = 0
964	for i in range(start, min(start+size, end+1)):
965	if data:
966	if fullData:
967	d = data.get(i, None)
968	else:
969	d = data.get(j, None)
970	j += 1
971
972	else:
973	d = i+1
974
975	this.append(d)
976
977	batch['this'] = this
978	if start > 1:
979	batch['prevStart'] = max(start - size, 1)
980	else:
981	batch['prevStart'] = None
982
983	if start + size < end:
984	batch['nextStart'] = start + size
985	else:
986	batch['nextStart'] = None
987
988	logging.debug("getBatch start=%s size=%s end=%s batch=%s"%(start,size,end,repr(batch)))
989	return batch
990
991
992	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
993	"""returns list of groups {name:, id:} on the annotation server for the user"""
994	groups = []
995	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
996	data = getHttpData(url=groupsUrl, noExceptions=True)
997	if data:
998	res = json.loads(data)
999	rows = res.get('rows', None)
1000	if rows is None:
1001	return groups
1002	for r in rows:
1003	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
1004
1005	return groups
1006
1007
1008	security.declareProtected('View management screens','changeDocumentViewerForm')
1009	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1010
1011	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1012	"""init document viewer"""
1013	self.title=title
1014	self.digilibBaseUrl = digilibBaseUrl
1015	self.thumbrows = thumbrows
1016	self.thumbcols = thumbcols
1017	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1018	try:
1019	# assume MetaDataFolder instance is called metadata
1020	self.metadataService = getattr(self, 'metadata')
1021	except Exception, e:
1022	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1023
1024	self.setAvailableLayers(availableLayers)
1025
1026	if RESPONSE is not None:
1027	RESPONSE.redirect('manage_main')
1028
1029	def manage_AddDocumentViewerForm(self):
1030	"""add the viewer form"""
1031	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1032	return pt()
1033
1034	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1035	"""add the viewer"""
1036	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1037	self._setObject(id,newObj)
1038
1039	if RESPONSE is not None:
1040	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: