Context Navigation

source: documentViewer/documentViewer.py @ 581:514902fb6b8a

Last change on this file since 581:514902fb6b8a was 581:514902fb6b8a, checked in by casties, 11 years ago
use dri repository=mpiwg as default
File size: 42.6 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def getMDText(node):
37	"""returns the @text content from the MetaDataProvider metadata node"""
38	if isinstance(node, dict):
39	return node.get('@text', None)
40
41	return node
42
43	def browserCheck(self):
44	"""check the browsers request to find out the browser type"""
45	bt = {}
46	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
47	bt['ua'] = ua
48	bt['isIE'] = False
49	bt['isN4'] = False
50	bt['versFirefox']=""
51	bt['versIE']=""
52	bt['versSafariChrome']=""
53	bt['versOpera']=""
54
55	if string.find(ua, 'MSIE') > -1:
56	bt['isIE'] = True
57	else:
58	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
59	# Safari oder Chrome identification
60	try:
61	nav = ua[string.find(ua, '('):]
62	nav1=ua[string.find(ua,')'):]
63	nav2=nav1[string.find(nav1,'('):]
64	nav3=nav2[string.find(nav2,')'):]
65	ie = string.split(nav, "; ")[1]
66	ie1 =string.split(nav1, " ")[2]
67	ie2 =string.split(nav3, " ")[1]
68	ie3 =string.split(nav3, " ")[2]
69	if string.find(ie3, "Safari") >-1:
70	bt['versSafariChrome']=string.split(ie2, "/")[1]
71	except: pass
72	# IE identification
73	try:
74	nav = ua[string.find(ua, '('):]
75	ie = string.split(nav, "; ")[1]
76	if string.find(ie, "MSIE") > -1:
77	bt['versIE'] = string.split(ie, " ")[1]
78	except:pass
79	# Firefox identification
80	try:
81	nav = ua[string.find(ua, '('):]
82	nav1=ua[string.find(ua,')'):]
83	if string.find(ie1, "Firefox") >-1:
84	nav5= string.split(ie1, "/")[1]
85	logging.debug("FIREFOX: %s"%(nav5))
86	bt['versFirefox']=nav5[0:3]
87	except:pass
88	#Opera identification
89	try:
90	if string.find(ua,"Opera") >-1:
91	nav = ua[string.find(ua, '('):]
92	nav1=nav[string.find(nav,')'):]
93	bt['versOpera']=string.split(nav1,"/")[2]
94	except:pass
95
96	bt['isMac'] = string.find(ua, 'Macintosh') > -1
97	bt['isWin'] = string.find(ua, 'Windows') > -1
98	bt['isIEWin'] = bt['isIE'] and bt['isWin']
99	bt['isIEMac'] = bt['isIE'] and bt['isMac']
100	bt['staticHTML'] = False
101
102	return bt
103
104	def getParentPath(path, cnt=1):
105	"""returns pathname shortened by cnt"""
106	# make sure path doesn't end with /
107	path = path.rstrip('/')
108	# split by /, shorten, and reassemble
109	return '/'.join(path.split('/')[0:-cnt])
110
111	##
112	## documentViewer class
113	##
114	class documentViewer(Folder):
115	"""document viewer"""
116	meta_type="Document viewer"
117
118	security=ClassSecurityInfo()
119	manage_options=Folder.manage_options+(
120	{'label':'Configuration','action':'changeDocumentViewerForm'},
121	)
122
123	metadataService = None
124	"""MetaDataFolder instance"""
125
126
127	#
128	# templates and forms
129	#
130	# viewMode templates
131	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
132	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
133	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
134	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
135	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
136	# available layer types (annotator not default)
137	builtinLayers = {'text': ['dict','search','gis'],
138	'xml': None, 'images': None, 'index': ['extended']}
139	availableLayers = builtinLayers;
140	# layer templates
141	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
142	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
143	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
144	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
145	layer_text_pundit = PageTemplateFile('zpt/layer_text_pundit', globals())
146	layer_index_extended = PageTemplateFile('zpt/layer_index_extended', globals())
147	# toc templates
148	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
149	toc_text = PageTemplateFile('zpt/toc_text', globals())
150	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
151	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
152	toc_handwritten = PageTemplateFile('zpt/toc_handwritten', globals())
153	toc_none = PageTemplateFile('zpt/toc_none', globals())
154	# other templates
155	common_template = PageTemplateFile('zpt/common_template', globals())
156	info_xml = PageTemplateFile('zpt/info_xml', globals())
157	docuviewer_css = ImageFile('css/docuviewer.css',globals())
158	# make docuviewer_css refreshable for development
159	docuviewer_css.index_html = refreshingImageFileIndexHtml
160	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
161	# make docuviewer_ie_css refreshable for development
162	docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
163	jquery_js = ImageFile('js/jquery.js',globals())
164
165
166	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
167	"""init document viewer"""
168	self.id=id
169	self.title=title
170	self.thumbcols = thumbcols
171	self.thumbrows = thumbrows
172	# authgroups is list of authorized groups (delimited by ,)
173	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
174	# create template folder so we can always use template.something
175
176	templateFolder = Folder('template')
177	self['template'] = templateFolder # Zope-2.12 style
178	#self._setObject('template',templateFolder) # old style
179	try:
180	import MpdlXmlTextServer
181	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
182	templateFolder['fulltextclient'] = textServer
183	#templateFolder._setObject('fulltextclient',textServer)
184	except Exception, e:
185	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
186
187	try:
188	from Products.zogiLib.zogiLib import zogiLib
189	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
190	templateFolder['zogilib'] = zogilib
191	#templateFolder._setObject('zogilib',zogilib)
192	except Exception, e:
193	logging.error("Unable to create zogiLib for zogilib: "+str(e))
194
195	try:
196	# assume MetaDataFolder instance is called metadata
197	self.metadataService = getattr(self, 'metadata')
198	except Exception, e:
199	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
200
201	if digilibBaseUrl is not None:
202	self.digilibBaseUrl = digilibBaseUrl
203	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
204	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
205
206
207	# proxy text server methods to fulltextclient
208	def getTextPage(self, **args):
209	"""returns full text content of page"""
210	return self.template.fulltextclient.getTextPage(**args)
211
212	def getSearchResults(self, **args):
213	"""loads list of search results and stores XML in docinfo"""
214	return self.template.fulltextclient.getSearchResults(**args)
215
216	def getResultsPage(self, **args):
217	"""returns one page of the search results"""
218	return self.template.fulltextclient.getResultsPage(**args)
219
220	def getTextInfo(self, **args):
221	"""returns document info from the text server"""
222	return self.template.fulltextclient.getTextInfo(**args)
223
224	def getToc(self, **args):
225	"""loads table of contents and stores XML in docinfo"""
226	return self.template.fulltextclient.getToc(**args)
227
228	def getTocPage(self, **args):
229	"""returns one page of the table of contents"""
230	return self.template.fulltextclient.getTocPage(**args)
231
232	def getRepositoryType(self, **args):
233	"""get repository type"""
234	return self.template.fulltextclient.getRepositoryType(**args)
235
236	def getTextDownloadUrl(self, **args):
237	"""get list of gis places on one page"""
238	return self.template.fulltextclient.getTextDownloadUrl(**args)
239
240	def getPlacesOnPage(self, **args):
241	"""get list of gis places on one page"""
242	return self.template.fulltextclient.getPlacesOnPage(**args)
243
244	# Thumb list for CoolIris Plugin
245	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
246	security.declareProtected('View','thumbs_rss')
247	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
248	'''
249	view it
250	@param mode: defines how to access the document behind url
251	@param url: url which contains display information
252	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
253
254	'''
255
256	if not hasattr(self, 'template'):
257	# create template folder if it doesn't exist
258	self.manage_addFolder('template')
259
260	if not self.digilibBaseUrl:
261	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
262
263	docinfo = self.getDocinfo(mode=mode,url=url)
264	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
265	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
266	''' ZDES '''
267	pt = getattr(self.template, 'thumbs_main_rss')
268
269	if viewMode=="auto": # automodus gewaehlt
270	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
271	viewMode="text"
272	else:
273	viewMode="images"
274
275	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
276
277
278	security.declareProtected('View','index_html')
279	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode=None,start=1,pn=1):
280	"""
281	show page
282	@param url: url which contains display information
283	@param mode: defines how to access the document behind url
284	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
285	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
286	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
287	"""
288
289	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
290
291	if not hasattr(self, 'template'):
292	# this won't work
293	logging.error("template folder missing!")
294	return "ERROR: template folder missing!"
295
296	if not getattr(self, 'digilibBaseUrl', None):
297	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
298
299	# mode=filepath should not have toc-thumbs
300	if tocMode is None:
301	if mode == "filepath":
302	tocMode = "none"
303	else:
304	tocMode = "thumbs"
305
306	# docinfo: information about document (cached)
307	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
308
309	# userinfo: user settings (cached)
310	userinfo = self.getUserinfo()
311
312	# auto viewMode: text if there is a text else images
313	if viewMode=="auto":
314	if docinfo.get('textURLPath', None):
315	# docinfo.get('textURL', None) not implemented yet
316	viewMode = "text"
317	if viewLayer is None and 'viewLayer' not in userinfo:
318	# use layer dict as default
319	viewLayer = "dict"
320	else:
321	viewMode = "images"
322
323	elif viewMode == "text_dict":
324	# legacy fix
325	viewMode = "text"
326	viewLayer = "dict"
327
328	# safe viewLayer in userinfo
329	userinfo['viewLayer'] = viewLayer
330
331	# pageinfo: information about page (not cached)
332	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
333
334	# get template /template/viewer_$viewMode
335	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
336	if pt is None:
337	logging.error("No template for viewMode=%s!"%viewMode)
338	# TODO: error page?
339	return "No template for viewMode=%s!"%viewMode
340
341	# and execute with parameters
342	return pt(docinfo=docinfo, pageinfo=pageinfo)
343
344	def getAvailableLayers(self):
345	"""returns dict with list of available layers per viewMode"""
346	return self.availableLayers
347
348	def getBrowser(self):
349	"""getBrowser the version of browser """
350	bt = browserCheck(self)
351	logging.debug("BROWSER VERSION: %s"%(bt))
352	return bt
353
354	def findDigilibUrl(self):
355	"""try to get the digilib URL from zogilib"""
356	url = self.template.zogilib.getDLBaseUrl()
357	return url
358
359	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
360	"""returns URL to digilib Scaler with params"""
361	url = None
362	if docinfo is not None:
363	url = docinfo.get('imageURL', None)
364
365	if url is None:
366	url = self.digilibScalerUrl
367	if fn is None and docinfo is not None:
368	fn = docinfo.get('imagePath','')
369
370	url += "fn=%s"%fn
371
372	if pn:
373	url += "&pn=%s"%pn
374
375	url += "&dw=%s&dh=%s"%(dw,dh)
376	return url
377
378	def getDocumentViewerURL(self):
379	"""returns the URL of this instance"""
380	return self.absolute_url()
381
382	def getStyle(self, idx, selected, style=""):
383	"""returns a string with the given style and append 'sel' if idx == selected."""
384	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
385	if idx == selected:
386	return style + 'sel'
387	else:
388	return style
389
390	def getParams(self, param=None, val=None, params=None, duplicates=None):
391	"""returns dict with URL parameters.
392
393	Takes URL parameters and additionally param=val or dict params.
394	Deletes key if value is None."""
395	# copy existing request params
396	newParams=self.REQUEST.form.copy()
397	# change single param
398	if param is not None:
399	if val is None:
400	if newParams.has_key(param):
401	del newParams[param]
402	else:
403	newParams[param] = str(val)
404
405	# change more params
406	if params is not None:
407	for (k, v) in params.items():
408	if v is None:
409	# val=None removes param
410	if newParams.has_key(k):
411	del newParams[k]
412
413	else:
414	newParams[k] = v
415
416	if duplicates:
417	# eliminate lists (coming from duplicate keys)
418	for (k,v) in newParams.items():
419	if isinstance(v, list):
420	if duplicates == 'comma':
421	# make comma-separated list of non-empty entries
422	newParams[k] = ','.join([t for t in v if t])
423	elif duplicates == 'first':
424	# take first non-empty entry
425	newParams[k] = [t for t in v if t][0]
426
427	return newParams
428
429	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
430	"""returns URL to documentviewer with parameter param set to val or from dict params"""
431	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
432	# quote values and assemble into query string (not escaping '/')
433	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
434	if baseUrl is None:
435	baseUrl = self.getDocumentViewerURL()
436
437	url = "%s?%s"%(baseUrl, ps)
438	return url
439
440	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
441	"""link to documentviewer with parameter param set to val"""
442	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
443
444
445	def setAvailableLayers(self, newLayerString=None):
446	"""sets availableLayers to newLayerString or tries to autodetect available layers.
447	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
448	newLayerString is parsed as JSON."""
449	if newLayerString is not None:
450	try:
451	layers = json.loads(newLayerString)
452	if 'text' in layers and 'images' in layers:
453	self.availableLayers = layers
454	return
455	except:
456	pass
457
458	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
459
460	# start with builtin layers
461	self.availableLayers = self.builtinLayers.copy()
462	# add layers from templates
463	for t in self.template:
464	if t.startswith('layer_'):
465	try:
466	(x, m, l) = t.split('_', 3)
467	if m not in self.availableLayers:
468	# mode m doesn't exist -> new list
469	self.availableLayers[m] = [l]
470
471	else:
472	# m exists -> append
473	if l not in self.availableLayers[m]:
474	self.availableLayers[m].append()
475
476	except:
477	pass
478
479	def getAvailableLayersJson(self):
480	"""returns available layers as JSON string."""
481	return json.dumps(self.availableLayers)
482
483
484	def getInfo_xml(self,url,mode):
485	"""returns info about the document as XML"""
486	if not self.digilibBaseUrl:
487	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
488
489	docinfo = self.getDocinfo(mode=mode,url=url)
490	pt = getattr(self.template, 'info_xml')
491	return pt(docinfo=docinfo)
492
493	def getAuthenticatedUser(self, anon=None):
494	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
495	user = getSecurityManager().getUser()
496	if user is not None and user.getUserName() != "Anonymous User":
497	return user
498	else:
499	return anon
500
501	def isAccessible(self, docinfo):
502	"""returns if access to the resource is granted"""
503	access = docinfo.get('accessType', None)
504	logging.debug("documentViewer (accessOK) access type %s"%access)
505	if access == 'free':
506	logging.debug("documentViewer (accessOK) access is free")
507	return True
508
509	elif access is None or access in self.authgroups:
510	# only local access -- only logged in users
511	user = self.getAuthenticatedUser()
512	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
513	return (user is not None)
514
515	logging.error("documentViewer (accessOK) unknown access type %s"%access)
516	return False
517
518
519	def getUserinfo(self):
520	"""returns userinfo object"""
521	logging.debug("getUserinfo")
522	userinfo = {}
523	# look for cached userinfo in session
524	if self.REQUEST.SESSION.has_key('userinfo'):
525	userinfo = self.REQUEST.SESSION['userinfo']
526	# check if its still current?
527	else:
528	# store in session
529	self.REQUEST.SESSION['userinfo'] = userinfo
530
531	return userinfo
532
533	def getDocinfo(self, mode, url, tocMode=None):
534	"""returns docinfo depending on mode"""
535	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
536	# look for cached docinfo in session
537	if self.REQUEST.SESSION.has_key('docinfo'):
538	docinfo = self.REQUEST.SESSION['docinfo']
539	# check if its still current
540	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
541	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
542	return docinfo
543
544	# new docinfo
545	docinfo = {'mode': mode, 'url': url}
546	# add self url
547	docinfo['viewerUrl'] = self.getDocumentViewerURL()
548	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
549	docinfo['digilibScalerUrl'] = self.digilibScalerUrl
550	docinfo['digilibViewerUrl'] = self.digilibViewerUrl
551	# get index.meta DOM
552	docUrl = None
553	metaDom = None
554	if mode=="texttool":
555	# url points to document dir or index.meta
556	metaDom = self.metadataService.getDomFromPathOrUrl(url)
557	if metaDom is None:
558	raise IOError("Unable to find index.meta for mode=texttool!")
559
560	docUrl = url.replace('/index.meta', '')
561	if url.startswith('/mpiwg/online/'):
562	docUrl = url.replace('/mpiwg/online/', '', 1)
563
564	elif mode=="imagepath":
565	# url points to folder with images, index.meta optional
566	# asssume index.meta in parent dir
567	docUrl = getParentPath(url)
568	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
569	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
570
571	elif mode=="filepath":
572	# url points to image file, index.meta optional
573	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url)
574	docinfo['numPages'] = 1
575	# asssume index.meta is two path segments up
576	docUrl = getParentPath(url, 2)
577	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
578
579	else:
580	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
581	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
582
583	docinfo['documentUrl'] = docUrl
584	# process index.meta contents
585	if metaDom is not None and metaDom.tag == 'resource':
586	# document directory name and path
587	resource = self.metadataService.getResourceData(dom=metaDom)
588	if resource:
589	docinfo = self.getDocinfoFromResource(docinfo, resource)
590
591	# texttool info
592	texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
593	if texttool:
594	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
595	# document info from full text server
596	if docinfo.get('textURLPath', None):
597	docinfo = self.getTextInfo(mode=None, docinfo=docinfo)
598	# include list of pages TODO: do we need this always?
599	docinfo = self.getTextInfo(mode='pages', docinfo=docinfo)
600
601	# bib info
602	bib = self.metadataService.getBibData(dom=metaDom)
603	if bib:
604	# save extended version as 'bibx' TODO: ugly
605	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
606	if len(bibx) == 1:
607	# unwrap list if possible
608	bibx = bibx[0]
609
610	docinfo['bibx'] = bibx
611	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
612	else:
613	# no bib - try info.xml
614	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
615
616	# auth info
617	access = self.metadataService.getAccessData(dom=metaDom)
618	if access:
619	docinfo = self.getDocinfoFromAccess(docinfo, access)
620
621	# attribution info
622	attribution = self.metadataService.getAttributionData(dom=metaDom)
623	if attribution:
624	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
625	docinfo['attribution'] = attribution
626
627	# copyright info
628	copyright = self.metadataService.getCopyrightData(dom=metaDom)
629	if copyright:
630	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
631	docinfo['copyright'] = copyright
632
633	# DRI (permanent ID)
634	dri = self.metadataService.getDRI(dom=metaDom, type='mpiwg')
635	if dri:
636	docinfo['DRI'] = dri
637
638	# (presentation) context
639	ctx = self.metadataService.getContextData(dom=metaDom, all=True)
640	if ctx:
641	logging.debug("getcontext: ctx=%s"%repr(ctx))
642	docinfo['presentationContext'] = ctx
643
644	# image path
645	if mode != 'texttool':
646	# override image path from texttool with url parameter TODO: how about mode=auto?
647	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
648
649	# check numPages
650	if docinfo.get('numPages', 0) == 0:
651	# number of images from digilib
652	if docinfo.get('imagePath', None):
653	imgpath = docinfo['imagePath'].replace('/mpiwg/online/', '', 1)
654	logging.debug("imgpath=%s"%imgpath)
655	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, imgpath)
656	docinfo = self.getDocinfoFromDigilib(docinfo, imgpath)
657	else:
658	# imagePath still missing? try "./pageimg"
659	imgPath = os.path.join(docUrl, 'pageimg')
660	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
661	if docinfo.get('numPages', 0) > 0:
662	# there are pages
663	docinfo['imagePath'] = imgPath
664	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, docinfo['imagePath'])
665
666	# check numPages
667	if docinfo.get('numPages', 0) == 0:
668	if docinfo.get('numTextPages', 0) > 0:
669	# replace with numTextPages (text-only?)
670	docinfo['numPages'] = docinfo['numTextPages']
671
672	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
673	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
674	# store in session
675	self.REQUEST.SESSION['docinfo'] = docinfo
676	return docinfo
677
678
679	def getDocinfoFromResource(self, docinfo, resource):
680	"""reads contents of resource element into docinfo"""
681	docName = resource.get('name', None)
682	docinfo['documentName'] = docName
683	docPath = resource.get('archive-path', None)
684	if docPath:
685	# clean up document path
686	if docPath[0] != '/':
687	docPath = '/' + docPath
688
689	if docName and (not docPath.endswith(docName)):
690	docPath += "/" + docName
691
692	else:
693	# use docUrl as docPath
694	docUrl = docinfo['documentURL']
695	if not docUrl.startswith('http:'):
696	docPath = docUrl
697	if docPath:
698	# fix URLs starting with /mpiwg/online
699	docPath = docPath.replace('/mpiwg/online', '', 1)
700
701	docinfo['documentPath'] = docPath
702	return docinfo
703
704	def getDocinfoFromTexttool(self, docinfo, texttool):
705	"""reads contents of texttool element into docinfo"""
706	logging.debug("texttool=%s"%repr(texttool))
707	# unpack list if necessary
708	if isinstance(texttool, list):
709	texttool = texttool[0]
710
711	# image dir
712	imageDir = getMDText(texttool.get('image', None))
713	docPath = getMDText(docinfo.get('documentPath', None))
714	if imageDir and docPath:
715	imageDir = os.path.join(docPath, imageDir)
716	imageDir = imageDir.replace('/mpiwg/online', '', 1)
717	docinfo['imagePath'] = imageDir
718
719	# old style text URL
720	textUrl = getMDText(texttool.get('text', None))
721	if textUrl and docPath:
722	if urlparse.urlparse(textUrl)[0] == "": #keine url
723	textUrl = os.path.join(docPath, textUrl)
724
725	docinfo['textURL'] = textUrl
726
727	# new style text-url-path (can be more than one with "repository" attribute)
728	textUrlNode = texttool.get('text-url-path', None)
729	if not isinstance(textUrlNode, list):
730	textUrlNode = [textUrlNode]
731
732	for tun in textUrlNode:
733	textUrl = getMDText(tun)
734	if textUrl:
735	textUrlAtts = tun.get('@attr')
736	if (textUrlAtts and 'repository' in textUrlAtts):
737	textRepo = textUrlAtts['repository']
738	# use matching repository
739	if self.getRepositoryType() == textRepo:
740	docinfo['textURLPath'] = textUrl
741	docinfo['textURLRepository'] = textRepo
742	break
743
744	else:
745	# no repo attribute - use always
746	docinfo['textURLPath'] = textUrl
747
748	# page flow
749	docinfo['pageFlow'] = getMDText(texttool.get('page-flow', 'ltr'))
750
751	# odd pages are left
752	docinfo['oddPage'] = getMDText(texttool.get('odd-scan-position', 'left'))
753
754	# number of title page (default 1)
755	docinfo['titlePage'] = getMDText(texttool.get('title-scan-no', 1))
756
757	# old presentation stuff
758	presentation = getMDText(texttool.get('presentation', None))
759	if presentation and docPath:
760	if presentation.startswith('http:'):
761	docinfo['presentationUrl'] = presentation
762	else:
763	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
764
765	return docinfo
766
767	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
768	"""reads contents of bib element into docinfo"""
769	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
770	# put all raw bib fields in dict "bib"
771	docinfo['bib'] = bib
772	bibtype = bib.get('@type', None)
773	docinfo['bibType'] = bibtype
774	# also store DC metadata for convenience
775	dc = self.metadataService.getDCMappedData(bib)
776	docinfo['creator'] = dc.get('creator','')
777	docinfo['title'] = dc.get('title','')
778	docinfo['date'] = dc.get('date','')
779	return docinfo
780
781	def getDocinfoFromAccess(self, docinfo, acc):
782	"""reads contents of access element into docinfo"""
783	#TODO: also read resource type
784	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
785	try:
786	acctype = acc['@attr']['type']
787	if acctype:
788	access=acctype
789	if access in ['group', 'institution']:
790	access = acc['name'].lower()
791
792	docinfo['accessType'] = access
793
794	except:
795	pass
796
797	return docinfo
798
799	def getDocinfoFromDigilib(self, docinfo, path):
800	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
801	# fetch data
802	txt = getHttpData(infoUrl)
803	if not txt:
804	logging.error("Unable to get dir-info from %s"%(infoUrl))
805	return docinfo
806
807	dom = ET.fromstring(txt)
808	size = getText(dom.find("size"))
809	logging.debug("getDocinfoFromDigilib: size=%s"%size)
810	if size:
811	docinfo['numPages'] = int(size)
812	else:
813	docinfo['numPages'] = 0
814
815	# TODO: produce and keep list of image names and numbers
816	return docinfo
817
818
819	def getDocinfoFromPresentationInfoXml(self,docinfo):
820	"""gets DC-like bibliographical information from the presentation entry in texttools"""
821	url = docinfo.get('presentationUrl', None)
822	if not url:
823	logging.error("getDocinfoFromPresentation: no URL!")
824	return docinfo
825
826	dom = None
827	metaUrl = None
828	if url.startswith("http://"):
829	# real URL
830	metaUrl = url
831	else:
832	# online path
833
834	server=self.digilibBaseUrl+"/servlet/Texter?fn="
835	metaUrl=server+url
836
837	txt=getHttpData(metaUrl)
838	if txt is None:
839	logging.error("Unable to read info.xml from %s"%(url))
840	return docinfo
841
842	dom = ET.fromstring(txt)
843	docinfo['creator']=getText(dom.find(".//author"))
844	docinfo['title']=getText(dom.find(".//title"))
845	docinfo['date']=getText(dom.find(".//date"))
846	return docinfo
847
848
849	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
850	"""returns pageinfo with the given parameters"""
851	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
852	pageinfo = {}
853	pageinfo['viewMode'] = viewMode
854	# split viewLayer if necessary
855	if isinstance(viewLayer,basestring):
856	viewLayer = viewLayer.split(',')
857
858	if isinstance(viewLayer, list):
859	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
860	# save (unique) list in viewLayers
861	seen = set()
862	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
863	pageinfo['viewLayers'] = viewLayers
864	# stringify viewLayer
865	viewLayer = ','.join(viewLayers)
866	else:
867	#create list
868	pageinfo['viewLayers'] = [viewLayer]
869
870	pageinfo['viewLayer'] = viewLayer
871	pageinfo['tocMode'] = tocMode
872
873	# TODO: unify current and pn!
874	current = getInt(current)
875	pageinfo['current'] = current
876	pageinfo['pn'] = current
877	rows = int(rows or self.thumbrows)
878	pageinfo['rows'] = rows
879	cols = int(cols or self.thumbcols)
880	pageinfo['cols'] = cols
881	grpsize = cols * rows
882	pageinfo['groupsize'] = grpsize
883	# is start is empty use one around current
884	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
885	# int(current / grpsize) * grpsize +1))
886	pageinfo['start'] = start
887	# get number of pages
888	np = int(docinfo.get('numPages', 0))
889	if np == 0:
890	# try numTextPages
891	np = docinfo.get('numTextPages', 0)
892	if np != 0:
893	docinfo['numPages'] = np
894
895	# cache table of contents
896	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
897	pageinfo['numgroups'] = int(np / grpsize)
898	if np % grpsize > 0:
899	pageinfo['numgroups'] += 1
900
901	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
902	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
903	# add zeroth page for two columns
904	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
905	pageinfo['pageZero'] = pageZero
906	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
907	# more page parameters
908	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
909	if docinfo.get('pageNumbers'):
910	# get original page numbers
911	pageNumber = docinfo['pageNumbers'].get(current, None)
912	if pageNumber is not None:
913	pageinfo['pageNumberOrig'] = pageNumber['no']
914	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
915
916	# cache search results
917	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
918	query = self.REQUEST.get('query',None)
919	pageinfo['query'] = query
920	if query:
921	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
922	pageinfo['queryType'] = queryType
923	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
924	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
925
926	# highlighting
927	highlightQuery = self.REQUEST.get('highlightQuery', None)
928	if highlightQuery:
929	pageinfo['highlightQuery'] = highlightQuery
930	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
931	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
932
933	return pageinfo
934
935
936	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
937	"""returns dict with array of page informations for one screenfull of thumbnails"""
938	batch = {}
939	grpsize = rows * cols
940	if maxIdx == 0:
941	maxIdx = start + grpsize
942
943	nb = int(math.ceil(maxIdx / float(grpsize)))
944	# list of all batch start and end points
945	batches = []
946	if pageZero:
947	ofs = 0
948	else:
949	ofs = 1
950
951	for i in range(nb):
952	s = i * grpsize + ofs
953	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
954	batches.append({'start':s, 'end':e})
955
956	batch['batches'] = batches
957
958	pages = []
959	if pageZero and start == 1:
960	# correct beginning
961	idx = 0
962	else:
963	idx = start
964
965	for r in range(rows):
966	row = []
967	for c in range(cols):
968	if idx < minIdx or idx > maxIdx:
969	page = {'idx':None}
970	else:
971	page = {'idx':idx}
972
973	idx += 1
974	if pageFlowLtr:
975	row.append(page)
976	else:
977	row.insert(0, page)
978
979	pages.append(row)
980
981	if start > 1:
982	batch['prevStart'] = max(start - grpsize, 1)
983	else:
984	batch['prevStart'] = None
985
986	if start + grpsize <= maxIdx:
987	batch['nextStart'] = start + grpsize
988	else:
989	batch['nextStart'] = None
990
991	batch['pages'] = pages
992	batch['first'] = minIdx
993	batch['last'] = maxIdx
994	return batch
995
996	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
997	"""returns dict with information for one screenfull of data."""
998	batch = {}
999	if end == 0:
1000	end = start + size
1001
1002	nb = int(math.ceil(end / float(size)))
1003	# list of all batch start and end points
1004	batches = []
1005	for i in range(nb):
1006	s = i * size + 1
1007	e = min((i + 1) * size, end)
1008	batches.append({'start':s, 'end':e})
1009
1010	batch['batches'] = batches
1011	# list of elements in this batch
1012	this = []
1013	j = 0
1014	for i in range(start, min(start+size, end+1)):
1015	if data:
1016	if fullData:
1017	d = data.get(i, None)
1018	else:
1019	d = data.get(j, None)
1020	j += 1
1021
1022	else:
1023	d = i+1
1024
1025	this.append(d)
1026
1027	batch['this'] = this
1028	if start > 1:
1029	batch['prevStart'] = max(start - size, 1)
1030	else:
1031	batch['prevStart'] = None
1032
1033	if start + size < end:
1034	batch['nextStart'] = start + size
1035	else:
1036	batch['nextStart'] = None
1037
1038	batch['first'] = start
1039	batch['last'] = end
1040	return batch
1041
1042
1043	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
1044	"""returns list of groups {name:, id:} on the annotation server for the user"""
1045	groups = []
1046	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
1047	data = getHttpData(url=groupsUrl, noExceptions=True)
1048	if data:
1049	res = json.loads(data)
1050	rows = res.get('rows', None)
1051	if rows is None:
1052	return groups
1053	for r in rows:
1054	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
1055
1056	return groups
1057
1058
1059	security.declareProtected('View management screens','changeDocumentViewerForm')
1060	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1061
1062	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1063	"""init document viewer"""
1064	self.title=title
1065	self.digilibBaseUrl = digilibBaseUrl
1066	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
1067	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
1068	self.thumbrows = thumbrows
1069	self.thumbcols = thumbcols
1070	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1071	try:
1072	# assume MetaDataFolder instance is called metadata
1073	self.metadataService = getattr(self, 'metadata')
1074	except Exception, e:
1075	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1076
1077	self.setAvailableLayers(availableLayers)
1078
1079	if RESPONSE is not None:
1080	RESPONSE.redirect('manage_main')
1081
1082	def manage_AddDocumentViewerForm(self):
1083	"""add the viewer form"""
1084	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1085	return pt()
1086
1087	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1088	"""add the viewer"""
1089	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1090	self._setObject(id,newObj)
1091
1092	if RESPONSE is not None:
1093	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: