Context Navigation

source: documentViewer/documentViewer.py @ 571:f1906951be2a

Last change on this file since 571:f1906951be2a was 571:f1906951be2a, checked in by casties, 12 years ago
nicer batching of tocs
File size: 42.2 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def getMDText(node):
37	"""returns the @text content from the MetaDataProvider metadata node"""
38	if isinstance(node, dict):
39	return node.get('@text', None)
40
41	return node
42
43	def browserCheck(self):
44	"""check the browsers request to find out the browser type"""
45	bt = {}
46	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
47	bt['ua'] = ua
48	bt['isIE'] = False
49	bt['isN4'] = False
50	bt['versFirefox']=""
51	bt['versIE']=""
52	bt['versSafariChrome']=""
53	bt['versOpera']=""
54
55	if string.find(ua, 'MSIE') > -1:
56	bt['isIE'] = True
57	else:
58	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
59	# Safari oder Chrome identification
60	try:
61	nav = ua[string.find(ua, '('):]
62	nav1=ua[string.find(ua,')'):]
63	nav2=nav1[string.find(nav1,'('):]
64	nav3=nav2[string.find(nav2,')'):]
65	ie = string.split(nav, "; ")[1]
66	ie1 =string.split(nav1, " ")[2]
67	ie2 =string.split(nav3, " ")[1]
68	ie3 =string.split(nav3, " ")[2]
69	if string.find(ie3, "Safari") >-1:
70	bt['versSafariChrome']=string.split(ie2, "/")[1]
71	except: pass
72	# IE identification
73	try:
74	nav = ua[string.find(ua, '('):]
75	ie = string.split(nav, "; ")[1]
76	if string.find(ie, "MSIE") > -1:
77	bt['versIE'] = string.split(ie, " ")[1]
78	except:pass
79	# Firefox identification
80	try:
81	nav = ua[string.find(ua, '('):]
82	nav1=ua[string.find(ua,')'):]
83	if string.find(ie1, "Firefox") >-1:
84	nav5= string.split(ie1, "/")[1]
85	logging.debug("FIREFOX: %s"%(nav5))
86	bt['versFirefox']=nav5[0:3]
87	except:pass
88	#Opera identification
89	try:
90	if string.find(ua,"Opera") >-1:
91	nav = ua[string.find(ua, '('):]
92	nav1=nav[string.find(nav,')'):]
93	bt['versOpera']=string.split(nav1,"/")[2]
94	except:pass
95
96	bt['isMac'] = string.find(ua, 'Macintosh') > -1
97	bt['isWin'] = string.find(ua, 'Windows') > -1
98	bt['isIEWin'] = bt['isIE'] and bt['isWin']
99	bt['isIEMac'] = bt['isIE'] and bt['isMac']
100	bt['staticHTML'] = False
101
102	return bt
103
104	def getParentPath(path, cnt=1):
105	"""returns pathname shortened by cnt"""
106	# make sure path doesn't end with /
107	path = path.rstrip('/')
108	# split by /, shorten, and reassemble
109	return '/'.join(path.split('/')[0:-cnt])
110
111	##
112	## documentViewer class
113	##
114	class documentViewer(Folder):
115	"""document viewer"""
116	meta_type="Document viewer"
117
118	security=ClassSecurityInfo()
119	manage_options=Folder.manage_options+(
120	{'label':'Configuration','action':'changeDocumentViewerForm'},
121	)
122
123	metadataService = None
124	"""MetaDataFolder instance"""
125
126
127	#
128	# templates and forms
129	#
130	# viewMode templates
131	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
132	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
133	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
134	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
135	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
136	# available layer types (annotator not default)
137	builtinLayers = {'text': ['dict','search','gis'],
138	'xml': None, 'images': None, 'index': ['extended']}
139	availableLayers = builtinLayers;
140	# layer templates
141	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
142	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
143	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
144	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
145	layer_text_pundit = PageTemplateFile('zpt/layer_text_pundit', globals())
146	layer_index_extended = PageTemplateFile('zpt/layer_index_extended', globals())
147	# toc templates
148	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
149	toc_text = PageTemplateFile('zpt/toc_text', globals())
150	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
151	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
152	toc_none = PageTemplateFile('zpt/toc_none', globals())
153	# other templates
154	common_template = PageTemplateFile('zpt/common_template', globals())
155	info_xml = PageTemplateFile('zpt/info_xml', globals())
156	docuviewer_css = ImageFile('css/docuviewer.css',globals())
157	# make docuviewer_css refreshable for development
158	docuviewer_css.index_html = refreshingImageFileIndexHtml
159	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
160	# make docuviewer_ie_css refreshable for development
161	docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
162	jquery_js = ImageFile('js/jquery.js',globals())
163
164
165	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
166	"""init document viewer"""
167	self.id=id
168	self.title=title
169	self.thumbcols = thumbcols
170	self.thumbrows = thumbrows
171	# authgroups is list of authorized groups (delimited by ,)
172	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
173	# create template folder so we can always use template.something
174
175	templateFolder = Folder('template')
176	self['template'] = templateFolder # Zope-2.12 style
177	#self._setObject('template',templateFolder) # old style
178	try:
179	import MpdlXmlTextServer
180	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
181	templateFolder['fulltextclient'] = textServer
182	#templateFolder._setObject('fulltextclient',textServer)
183	except Exception, e:
184	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
185
186	try:
187	from Products.zogiLib.zogiLib import zogiLib
188	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
189	templateFolder['zogilib'] = zogilib
190	#templateFolder._setObject('zogilib',zogilib)
191	except Exception, e:
192	logging.error("Unable to create zogiLib for zogilib: "+str(e))
193
194	try:
195	# assume MetaDataFolder instance is called metadata
196	self.metadataService = getattr(self, 'metadata')
197	except Exception, e:
198	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
199
200	if digilibBaseUrl is not None:
201	self.digilibBaseUrl = digilibBaseUrl
202	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
203	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
204
205
206	# proxy text server methods to fulltextclient
207	def getTextPage(self, **args):
208	"""returns full text content of page"""
209	return self.template.fulltextclient.getTextPage(**args)
210
211	def getSearchResults(self, **args):
212	"""loads list of search results and stores XML in docinfo"""
213	return self.template.fulltextclient.getSearchResults(**args)
214
215	def getResultsPage(self, **args):
216	"""returns one page of the search results"""
217	return self.template.fulltextclient.getResultsPage(**args)
218
219	def getTextInfo(self, **args):
220	"""returns document info from the text server"""
221	return self.template.fulltextclient.getTextInfo(**args)
222
223	def getToc(self, **args):
224	"""loads table of contents and stores XML in docinfo"""
225	return self.template.fulltextclient.getToc(**args)
226
227	def getTocPage(self, **args):
228	"""returns one page of the table of contents"""
229	return self.template.fulltextclient.getTocPage(**args)
230
231	def getRepositoryType(self, **args):
232	"""get repository type"""
233	return self.template.fulltextclient.getRepositoryType(**args)
234
235	def getTextDownloadUrl(self, **args):
236	"""get list of gis places on one page"""
237	return self.template.fulltextclient.getTextDownloadUrl(**args)
238
239	def getPlacesOnPage(self, **args):
240	"""get list of gis places on one page"""
241	return self.template.fulltextclient.getPlacesOnPage(**args)
242
243	# Thumb list for CoolIris Plugin
244	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
245	security.declareProtected('View','thumbs_rss')
246	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
247	'''
248	view it
249	@param mode: defines how to access the document behind url
250	@param url: url which contains display information
251	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
252
253	'''
254
255	if not hasattr(self, 'template'):
256	# create template folder if it doesn't exist
257	self.manage_addFolder('template')
258
259	if not self.digilibBaseUrl:
260	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
261
262	docinfo = self.getDocinfo(mode=mode,url=url)
263	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
264	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
265	''' ZDES '''
266	pt = getattr(self.template, 'thumbs_main_rss')
267
268	if viewMode=="auto": # automodus gewaehlt
269	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
270	viewMode="text"
271	else:
272	viewMode="images"
273
274	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
275
276
277	security.declareProtected('View','index_html')
278	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode=None,start=1,pn=1):
279	"""
280	show page
281	@param url: url which contains display information
282	@param mode: defines how to access the document behind url
283	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
284	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
285	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
286	"""
287
288	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
289
290	if not hasattr(self, 'template'):
291	# this won't work
292	logging.error("template folder missing!")
293	return "ERROR: template folder missing!"
294
295	if not getattr(self, 'digilibBaseUrl', None):
296	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
297
298	# mode=filepath should not have toc-thumbs
299	if tocMode is None:
300	if mode == "filepath":
301	tocMode = "none"
302	else:
303	tocMode = "thumbs"
304
305	# docinfo: information about document (cached)
306	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
307
308	# userinfo: user settings (cached)
309	userinfo = self.getUserinfo()
310
311	# auto viewMode: text if there is a text else images
312	if viewMode=="auto":
313	if docinfo.get('textURLPath', None):
314	# docinfo.get('textURL', None) not implemented yet
315	viewMode = "text"
316	if viewLayer is None and 'viewLayer' not in userinfo:
317	# use layer dict as default
318	viewLayer = "dict"
319	else:
320	viewMode = "images"
321
322	elif viewMode == "text_dict":
323	# legacy fix
324	viewMode = "text"
325	viewLayer = "dict"
326
327	# safe viewLayer in userinfo
328	userinfo['viewLayer'] = viewLayer
329
330	# pageinfo: information about page (not cached)
331	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
332
333	# get template /template/viewer_$viewMode
334	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
335	if pt is None:
336	logging.error("No template for viewMode=%s!"%viewMode)
337	# TODO: error page?
338	return "No template for viewMode=%s!"%viewMode
339
340	# and execute with parameters
341	return pt(docinfo=docinfo, pageinfo=pageinfo)
342
343	def getAvailableLayers(self):
344	"""returns dict with list of available layers per viewMode"""
345	return self.availableLayers
346
347	def getBrowser(self):
348	"""getBrowser the version of browser """
349	bt = browserCheck(self)
350	logging.debug("BROWSER VERSION: %s"%(bt))
351	return bt
352
353	def findDigilibUrl(self):
354	"""try to get the digilib URL from zogilib"""
355	url = self.template.zogilib.getDLBaseUrl()
356	return url
357
358	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
359	"""returns URL to digilib Scaler with params"""
360	url = None
361	if docinfo is not None:
362	url = docinfo.get('imageURL', None)
363
364	if url is None:
365	url = self.digilibScalerUrl
366	if fn is None and docinfo is not None:
367	fn = docinfo.get('imagePath','')
368
369	url += "fn=%s"%fn
370
371	if pn:
372	url += "&pn=%s"%pn
373
374	url += "&dw=%s&dh=%s"%(dw,dh)
375	return url
376
377	def getDocumentViewerURL(self):
378	"""returns the URL of this instance"""
379	return self.absolute_url()
380
381	def getStyle(self, idx, selected, style=""):
382	"""returns a string with the given style and append 'sel' if idx == selected."""
383	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
384	if idx == selected:
385	return style + 'sel'
386	else:
387	return style
388
389	def getParams(self, param=None, val=None, params=None, duplicates=None):
390	"""returns dict with URL parameters.
391
392	Takes URL parameters and additionally param=val or dict params.
393	Deletes key if value is None."""
394	# copy existing request params
395	newParams=self.REQUEST.form.copy()
396	# change single param
397	if param is not None:
398	if val is None:
399	if newParams.has_key(param):
400	del newParams[param]
401	else:
402	newParams[param] = str(val)
403
404	# change more params
405	if params is not None:
406	for (k, v) in params.items():
407	if v is None:
408	# val=None removes param
409	if newParams.has_key(k):
410	del newParams[k]
411
412	else:
413	newParams[k] = v
414
415	if duplicates:
416	# eliminate lists (coming from duplicate keys)
417	for (k,v) in newParams.items():
418	if isinstance(v, list):
419	if duplicates == 'comma':
420	# make comma-separated list of non-empty entries
421	newParams[k] = ','.join([t for t in v if t])
422	elif duplicates == 'first':
423	# take first non-empty entry
424	newParams[k] = [t for t in v if t][0]
425
426	return newParams
427
428	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
429	"""returns URL to documentviewer with parameter param set to val or from dict params"""
430	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
431	# quote values and assemble into query string (not escaping '/')
432	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
433	if baseUrl is None:
434	baseUrl = self.getDocumentViewerURL()
435
436	url = "%s?%s"%(baseUrl, ps)
437	return url
438
439	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
440	"""link to documentviewer with parameter param set to val"""
441	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
442
443
444	def setAvailableLayers(self, newLayerString=None):
445	"""sets availableLayers to newLayerString or tries to autodetect available layers.
446	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
447	newLayerString is parsed as JSON."""
448	if newLayerString is not None:
449	try:
450	layers = json.loads(newLayerString)
451	if 'text' in layers and 'images' in layers:
452	self.availableLayers = layers
453	return
454	except:
455	pass
456
457	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
458
459	# start with builtin layers
460	self.availableLayers = self.builtinLayers.copy()
461	# add layers from templates
462	for t in self.template:
463	if t.startswith('layer_'):
464	try:
465	(x, m, l) = t.split('_', 3)
466	if m not in self.availableLayers:
467	# mode m doesn't exist -> new list
468	self.availableLayers[m] = [l]
469
470	else:
471	# m exists -> append
472	if l not in self.availableLayers[m]:
473	self.availableLayers[m].append()
474
475	except:
476	pass
477
478	def getAvailableLayersJson(self):
479	"""returns available layers as JSON string."""
480	return json.dumps(self.availableLayers)
481
482
483	def getInfo_xml(self,url,mode):
484	"""returns info about the document as XML"""
485	if not self.digilibBaseUrl:
486	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
487
488	docinfo = self.getDocinfo(mode=mode,url=url)
489	pt = getattr(self.template, 'info_xml')
490	return pt(docinfo=docinfo)
491
492	def getAuthenticatedUser(self, anon=None):
493	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
494	user = getSecurityManager().getUser()
495	if user is not None and user.getUserName() != "Anonymous User":
496	return user
497	else:
498	return anon
499
500	def isAccessible(self, docinfo):
501	"""returns if access to the resource is granted"""
502	access = docinfo.get('accessType', None)
503	logging.debug("documentViewer (accessOK) access type %s"%access)
504	if access == 'free':
505	logging.debug("documentViewer (accessOK) access is free")
506	return True
507
508	elif access is None or access in self.authgroups:
509	# only local access -- only logged in users
510	user = self.getAuthenticatedUser()
511	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
512	return (user is not None)
513
514	logging.error("documentViewer (accessOK) unknown access type %s"%access)
515	return False
516
517
518	def getUserinfo(self):
519	"""returns userinfo object"""
520	logging.debug("getUserinfo")
521	userinfo = {}
522	# look for cached userinfo in session
523	if self.REQUEST.SESSION.has_key('userinfo'):
524	userinfo = self.REQUEST.SESSION['userinfo']
525	# check if its still current?
526	else:
527	# store in session
528	self.REQUEST.SESSION['userinfo'] = userinfo
529
530	return userinfo
531
532	def getDocinfo(self, mode, url, tocMode=None):
533	"""returns docinfo depending on mode"""
534	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
535	# look for cached docinfo in session
536	if self.REQUEST.SESSION.has_key('docinfo'):
537	docinfo = self.REQUEST.SESSION['docinfo']
538	# check if its still current
539	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
540	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
541	return docinfo
542
543	# new docinfo
544	docinfo = {'mode': mode, 'url': url}
545	# add self url
546	docinfo['viewerUrl'] = self.getDocumentViewerURL()
547	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
548	docinfo['digilibScalerUrl'] = self.digilibScalerUrl
549	docinfo['digilibViewerUrl'] = self.digilibViewerUrl
550	# get index.meta DOM
551	docUrl = None
552	metaDom = None
553	if mode=="texttool":
554	# url points to document dir or index.meta
555	metaDom = self.metadataService.getDomFromPathOrUrl(url)
556	docUrl = url.replace('/index.meta', '')
557	if metaDom is None:
558	raise IOError("Unable to find index.meta for mode=texttool!")
559
560	elif mode=="imagepath":
561	# url points to folder with images, index.meta optional
562	# asssume index.meta in parent dir
563	docUrl = getParentPath(url)
564	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
565	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
566
567	elif mode=="filepath":
568	# url points to image file, index.meta optional
569	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url)
570	docinfo['numPages'] = 1
571	# asssume index.meta is two path segments up
572	docUrl = getParentPath(url, 2)
573	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
574
575	else:
576	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
577	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
578
579	docinfo['documentUrl'] = docUrl
580	# process index.meta contents
581	if metaDom is not None and metaDom.tag == 'resource':
582	# document directory name and path
583	resource = self.metadataService.getResourceData(dom=metaDom)
584	if resource:
585	docinfo = self.getDocinfoFromResource(docinfo, resource)
586
587	# texttool info
588	texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
589	if texttool:
590	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
591	# document info from full text server
592	if docinfo.get('textURLPath', None):
593	docinfo = self.getTextInfo(mode=None, docinfo=docinfo)
594	# include list of pages TODO: do we need this always?
595	docinfo = self.getTextInfo(mode='pages', docinfo=docinfo)
596
597	# bib info
598	bib = self.metadataService.getBibData(dom=metaDom)
599	if bib:
600	# save extended version as 'bibx' TODO: ugly
601	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
602	if len(bibx) == 1:
603	# unwrap list if possible
604	bibx = bibx[0]
605
606	docinfo['bibx'] = bibx
607	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
608	else:
609	# no bib - try info.xml
610	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
611
612	# auth info
613	access = self.metadataService.getAccessData(dom=metaDom)
614	if access:
615	docinfo = self.getDocinfoFromAccess(docinfo, access)
616
617	# attribution info
618	attribution = self.metadataService.getAttributionData(dom=metaDom)
619	if attribution:
620	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
621	docinfo['attribution'] = attribution
622
623	# copyright info
624	copyright = self.metadataService.getCopyrightData(dom=metaDom)
625	if copyright:
626	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
627	docinfo['copyright'] = copyright
628
629	# DRI (permanent ID)
630	dri = self.metadataService.getDRI(dom=metaDom, type='escidoc')
631	if dri:
632	logging.debug("getDRI: dri=%s"%repr(dri))
633	docinfo['DRI'] = dri
634
635	# image path
636	if mode != 'texttool':
637	# override image path from texttool with url parameter TODO: how about mode=auto?
638	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
639
640	# check numPages
641	if docinfo.get('numPages', 0) == 0:
642	# number of images from digilib
643	if docinfo.get('imagePath', None):
644	imgpath = docinfo['imagePath'].replace('/mpiwg/online/', '', 1)
645	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, imgpath)
646	docinfo = self.getDocinfoFromDigilib(docinfo, imgpath)
647	else:
648	# imagePath still missing? try "./pageimg"
649	imgPath = os.path.join(docUrl, 'pageimg')
650	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
651	if docinfo.get('numPages', 0) > 0:
652	# there are pages
653	docinfo['imagePath'] = imgPath
654	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, docinfo['imagePath'])
655
656	# check numPages
657	if docinfo.get('numPages', 0) == 0:
658	if docinfo.get('numTextPages', 0) > 0:
659	# replace with numTextPages (text-only?)
660	docinfo['numPages'] = docinfo['numTextPages']
661
662	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
663	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
664	# store in session
665	self.REQUEST.SESSION['docinfo'] = docinfo
666	return docinfo
667
668
669	def getDocinfoFromResource(self, docinfo, resource):
670	"""reads contents of resource element into docinfo"""
671	docName = resource.get('name', None)
672	docinfo['documentName'] = docName
673	docPath = resource.get('archive-path', None)
674	if docPath:
675	# clean up document path
676	if docPath[0] != '/':
677	docPath = '/' + docPath
678
679	if docName and (not docPath.endswith(docName)):
680	docPath += "/" + docName
681
682	else:
683	# use docUrl as docPath
684	docUrl = docinfo['documentURL']
685	if not docUrl.startswith('http:'):
686	docPath = docUrl
687	if docPath:
688	# fix URLs starting with /mpiwg/online
689	docPath = docPath.replace('/mpiwg/online', '', 1)
690
691	docinfo['documentPath'] = docPath
692	return docinfo
693
694	def getDocinfoFromTexttool(self, docinfo, texttool):
695	"""reads contents of texttool element into docinfo"""
696	logging.debug("texttool=%s"%repr(texttool))
697	# unpack list if necessary
698	if isinstance(texttool, list):
699	texttool = texttool[0]
700
701	# image dir
702	imageDir = getMDText(texttool.get('image', None))
703	docPath = getMDText(docinfo.get('documentPath', None))
704	if imageDir and docPath:
705	imageDir = os.path.join(docPath, imageDir)
706	imageDir = imageDir.replace('/mpiwg/online', '', 1)
707	docinfo['imagePath'] = imageDir
708
709	# old style text URL
710	textUrl = getMDText(texttool.get('text', None))
711	if textUrl and docPath:
712	if urlparse.urlparse(textUrl)[0] == "": #keine url
713	textUrl = os.path.join(docPath, textUrl)
714
715	docinfo['textURL'] = textUrl
716
717	# new style text-url-path (can be more than one with "repository" attribute)
718	textUrlNode = texttool.get('text-url-path', None)
719	if not isinstance(textUrlNode, list):
720	textUrlNode = [textUrlNode]
721
722	for tun in textUrlNode:
723	textUrl = getMDText(tun)
724	if textUrl:
725	textUrlAtts = tun.get('@attr')
726	if (textUrlAtts and 'repository' in textUrlAtts):
727	textRepo = textUrlAtts['repository']
728	# use matching repository
729	if self.getRepositoryType() == textRepo:
730	docinfo['textURLPath'] = textUrl
731	docinfo['textURLRepository'] = textRepo
732	break
733
734	else:
735	# no repo attribute - use always
736	docinfo['textURLPath'] = textUrl
737
738	# page flow
739	docinfo['pageFlow'] = getMDText(texttool.get('page-flow', 'ltr'))
740
741	# odd pages are left
742	docinfo['oddPage'] = getMDText(texttool.get('odd-scan-position', 'left'))
743
744	# number of title page (default 1)
745	docinfo['titlePage'] = getMDText(texttool.get('title-scan-no', 1))
746
747	# old presentation stuff
748	presentation = getMDText(texttool.get('presentation', None))
749	if presentation and docPath:
750	if presentation.startswith('http:'):
751	docinfo['presentationUrl'] = presentation
752	else:
753	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
754
755	return docinfo
756
757	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
758	"""reads contents of bib element into docinfo"""
759	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
760	# put all raw bib fields in dict "bib"
761	docinfo['bib'] = bib
762	bibtype = bib.get('@type', None)
763	docinfo['bibType'] = bibtype
764	# also store DC metadata for convenience
765	dc = self.metadataService.getDCMappedData(bib)
766	docinfo['creator'] = dc.get('creator','')
767	docinfo['title'] = dc.get('title','')
768	docinfo['date'] = dc.get('date','')
769	return docinfo
770
771	def getDocinfoFromAccess(self, docinfo, acc):
772	"""reads contents of access element into docinfo"""
773	#TODO: also read resource type
774	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
775	try:
776	acctype = acc['@attr']['type']
777	if acctype:
778	access=acctype
779	if access in ['group', 'institution']:
780	access = acc['name'].lower()
781
782	docinfo['accessType'] = access
783
784	except:
785	pass
786
787	return docinfo
788
789	def getDocinfoFromDigilib(self, docinfo, path):
790	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
791	# fetch data
792	txt = getHttpData(infoUrl)
793	if not txt:
794	logging.error("Unable to get dir-info from %s"%(infoUrl))
795	return docinfo
796
797	dom = ET.fromstring(txt)
798	size = getText(dom.find("size"))
799	logging.debug("getDocinfoFromDigilib: size=%s"%size)
800	if size:
801	docinfo['numPages'] = int(size)
802	else:
803	docinfo['numPages'] = 0
804
805	# TODO: produce and keep list of image names and numbers
806	return docinfo
807
808
809	def getDocinfoFromPresentationInfoXml(self,docinfo):
810	"""gets DC-like bibliographical information from the presentation entry in texttools"""
811	url = docinfo.get('presentationUrl', None)
812	if not url:
813	logging.error("getDocinfoFromPresentation: no URL!")
814	return docinfo
815
816	dom = None
817	metaUrl = None
818	if url.startswith("http://"):
819	# real URL
820	metaUrl = url
821	else:
822	# online path
823
824	server=self.digilibBaseUrl+"/servlet/Texter?fn="
825	metaUrl=server+url
826
827	txt=getHttpData(metaUrl)
828	if txt is None:
829	logging.error("Unable to read info.xml from %s"%(url))
830	return docinfo
831
832	dom = ET.fromstring(txt)
833	docinfo['creator']=getText(dom.find(".//author"))
834	docinfo['title']=getText(dom.find(".//title"))
835	docinfo['date']=getText(dom.find(".//date"))
836	return docinfo
837
838
839	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
840	"""returns pageinfo with the given parameters"""
841	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
842	pageinfo = {}
843	pageinfo['viewMode'] = viewMode
844	# split viewLayer if necessary
845	if isinstance(viewLayer,basestring):
846	viewLayer = viewLayer.split(',')
847
848	if isinstance(viewLayer, list):
849	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
850	# save (unique) list in viewLayers
851	seen = set()
852	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
853	pageinfo['viewLayers'] = viewLayers
854	# stringify viewLayer
855	viewLayer = ','.join(viewLayers)
856	else:
857	#create list
858	pageinfo['viewLayers'] = [viewLayer]
859
860	pageinfo['viewLayer'] = viewLayer
861	pageinfo['tocMode'] = tocMode
862
863	# TODO: unify current and pn!
864	current = getInt(current)
865	pageinfo['current'] = current
866	pageinfo['pn'] = current
867	rows = int(rows or self.thumbrows)
868	pageinfo['rows'] = rows
869	cols = int(cols or self.thumbcols)
870	pageinfo['cols'] = cols
871	grpsize = cols * rows
872	pageinfo['groupsize'] = grpsize
873	# is start is empty use one around current
874	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
875	# int(current / grpsize) * grpsize +1))
876	pageinfo['start'] = start
877	# get number of pages
878	np = int(docinfo.get('numPages', 0))
879	if np == 0:
880	# try numTextPages
881	np = docinfo.get('numTextPages', 0)
882	if np != 0:
883	docinfo['numPages'] = np
884
885	# cache table of contents
886	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
887	pageinfo['numgroups'] = int(np / grpsize)
888	if np % grpsize > 0:
889	pageinfo['numgroups'] += 1
890
891	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
892	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
893	# add zeroth page for two columns
894	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
895	pageinfo['pageZero'] = pageZero
896	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
897	# more page parameters
898	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
899	if docinfo.get('pageNumbers'):
900	# get original page numbers
901	pageNumber = docinfo['pageNumbers'].get(current, None)
902	if pageNumber is not None:
903	pageinfo['pageNumberOrig'] = pageNumber['no']
904	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
905
906	# cache search results
907	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
908	query = self.REQUEST.get('query',None)
909	pageinfo['query'] = query
910	if query:
911	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
912	pageinfo['queryType'] = queryType
913	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
914	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
915
916	# highlighting
917	highlightQuery = self.REQUEST.get('highlightQuery', None)
918	if highlightQuery:
919	pageinfo['highlightQuery'] = highlightQuery
920	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
921	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
922
923	return pageinfo
924
925
926	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
927	"""returns dict with array of page informations for one screenfull of thumbnails"""
928	batch = {}
929	grpsize = rows * cols
930	if maxIdx == 0:
931	maxIdx = start + grpsize
932
933	nb = int(math.ceil(maxIdx / float(grpsize)))
934	# list of all batch start and end points
935	batches = []
936	if pageZero:
937	ofs = 0
938	else:
939	ofs = 1
940
941	for i in range(nb):
942	s = i * grpsize + ofs
943	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
944	batches.append({'start':s, 'end':e})
945
946	batch['batches'] = batches
947
948	pages = []
949	if pageZero and start == 1:
950	# correct beginning
951	idx = 0
952	else:
953	idx = start
954
955	for r in range(rows):
956	row = []
957	for c in range(cols):
958	if idx < minIdx or idx > maxIdx:
959	page = {'idx':None}
960	else:
961	page = {'idx':idx}
962
963	idx += 1
964	if pageFlowLtr:
965	row.append(page)
966	else:
967	row.insert(0, page)
968
969	pages.append(row)
970
971	if start > 1:
972	batch['prevStart'] = max(start - grpsize, 1)
973	else:
974	batch['prevStart'] = None
975
976	if start + grpsize <= maxIdx:
977	batch['nextStart'] = start + grpsize
978	else:
979	batch['nextStart'] = None
980
981	batch['pages'] = pages
982	batch['first'] = minIdx
983	batch['last'] = maxIdx
984	return batch
985
986	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
987	"""returns dict with information for one screenfull of data."""
988	batch = {}
989	if end == 0:
990	end = start + size
991
992	nb = int(math.ceil(end / float(size)))
993	# list of all batch start and end points
994	batches = []
995	for i in range(nb):
996	s = i * size + 1
997	e = min((i + 1) * size, end)
998	batches.append({'start':s, 'end':e})
999
1000	batch['batches'] = batches
1001	# list of elements in this batch
1002	this = []
1003	j = 0
1004	for i in range(start, min(start+size, end+1)):
1005	if data:
1006	if fullData:
1007	d = data.get(i, None)
1008	else:
1009	d = data.get(j, None)
1010	j += 1
1011
1012	else:
1013	d = i+1
1014
1015	this.append(d)
1016
1017	batch['this'] = this
1018	if start > 1:
1019	batch['prevStart'] = max(start - size, 1)
1020	else:
1021	batch['prevStart'] = None
1022
1023	if start + size < end:
1024	batch['nextStart'] = start + size
1025	else:
1026	batch['nextStart'] = None
1027
1028	batch['first'] = start
1029	batch['last'] = end
1030	return batch
1031
1032
1033	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
1034	"""returns list of groups {name:, id:} on the annotation server for the user"""
1035	groups = []
1036	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
1037	data = getHttpData(url=groupsUrl, noExceptions=True)
1038	if data:
1039	res = json.loads(data)
1040	rows = res.get('rows', None)
1041	if rows is None:
1042	return groups
1043	for r in rows:
1044	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
1045
1046	return groups
1047
1048
1049	security.declareProtected('View management screens','changeDocumentViewerForm')
1050	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1051
1052	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1053	"""init document viewer"""
1054	self.title=title
1055	self.digilibBaseUrl = digilibBaseUrl
1056	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
1057	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
1058	self.thumbrows = thumbrows
1059	self.thumbcols = thumbcols
1060	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1061	try:
1062	# assume MetaDataFolder instance is called metadata
1063	self.metadataService = getattr(self, 'metadata')
1064	except Exception, e:
1065	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1066
1067	self.setAvailableLayers(availableLayers)
1068
1069	if RESPONSE is not None:
1070	RESPONSE.redirect('manage_main')
1071
1072	def manage_AddDocumentViewerForm(self):
1073	"""add the viewer form"""
1074	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1075	return pt()
1076
1077	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1078	"""add the viewer"""
1079	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1080	self._setObject(id,newObj)
1081
1082	if RESPONSE is not None:
1083	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: