Context Navigation

source: documentViewer/documentViewer.py @ 584:011905457a5f

Last change on this file since 584:011905457a5f was 584:011905457a5f, checked in by casties, 11 years ago
new viewmode=indexonly as coverpage for pdf-generation.
File size: 42.7 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def getMDText(node):
37	"""returns the @text content from the MetaDataProvider metadata node"""
38	if isinstance(node, dict):
39	return node.get('@text', None)
40
41	return node
42
43	def browserCheck(self):
44	"""check the browsers request to find out the browser type"""
45	bt = {}
46	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
47	bt['ua'] = ua
48	bt['isIE'] = False
49	bt['isN4'] = False
50	bt['versFirefox']=""
51	bt['versIE']=""
52	bt['versSafariChrome']=""
53	bt['versOpera']=""
54
55	if string.find(ua, 'MSIE') > -1:
56	bt['isIE'] = True
57	else:
58	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
59	# Safari oder Chrome identification
60	try:
61	nav = ua[string.find(ua, '('):]
62	nav1=ua[string.find(ua,')'):]
63	nav2=nav1[string.find(nav1,'('):]
64	nav3=nav2[string.find(nav2,')'):]
65	ie = string.split(nav, "; ")[1]
66	ie1 =string.split(nav1, " ")[2]
67	ie2 =string.split(nav3, " ")[1]
68	ie3 =string.split(nav3, " ")[2]
69	if string.find(ie3, "Safari") >-1:
70	bt['versSafariChrome']=string.split(ie2, "/")[1]
71	except: pass
72	# IE identification
73	try:
74	nav = ua[string.find(ua, '('):]
75	ie = string.split(nav, "; ")[1]
76	if string.find(ie, "MSIE") > -1:
77	bt['versIE'] = string.split(ie, " ")[1]
78	except:pass
79	# Firefox identification
80	try:
81	nav = ua[string.find(ua, '('):]
82	nav1=ua[string.find(ua,')'):]
83	if string.find(ie1, "Firefox") >-1:
84	nav5= string.split(ie1, "/")[1]
85	logging.debug("FIREFOX: %s"%(nav5))
86	bt['versFirefox']=nav5[0:3]
87	except:pass
88	#Opera identification
89	try:
90	if string.find(ua,"Opera") >-1:
91	nav = ua[string.find(ua, '('):]
92	nav1=nav[string.find(nav,')'):]
93	bt['versOpera']=string.split(nav1,"/")[2]
94	except:pass
95
96	bt['isMac'] = string.find(ua, 'Macintosh') > -1
97	bt['isWin'] = string.find(ua, 'Windows') > -1
98	bt['isIEWin'] = bt['isIE'] and bt['isWin']
99	bt['isIEMac'] = bt['isIE'] and bt['isMac']
100	bt['staticHTML'] = False
101
102	return bt
103
104	def getParentPath(path, cnt=1):
105	"""returns pathname shortened by cnt"""
106	# make sure path doesn't end with /
107	path = path.rstrip('/')
108	# split by /, shorten, and reassemble
109	return '/'.join(path.split('/')[0:-cnt])
110
111	##
112	## documentViewer class
113	##
114	class documentViewer(Folder):
115	"""document viewer"""
116	meta_type="Document viewer"
117
118	security=ClassSecurityInfo()
119	manage_options=Folder.manage_options+(
120	{'label':'Configuration','action':'changeDocumentViewerForm'},
121	)
122
123	metadataService = None
124	"""MetaDataFolder instance"""
125
126
127	#
128	# templates and forms
129	#
130	# viewMode templates
131	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
132	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
133	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
134	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
135	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
136	viewer_indexonly = PageTemplateFile('zpt/viewer_indexonly', globals())
137	# available layer types (annotator not default)
138	builtinLayers = {'text': ['dict','search','gis'],
139	'xml': None, 'images': None, 'index': ['extended']}
140	availableLayers = builtinLayers;
141	# layer templates
142	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
143	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
144	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
145	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
146	layer_text_pundit = PageTemplateFile('zpt/layer_text_pundit', globals())
147	layer_index_extended = PageTemplateFile('zpt/layer_index_extended', globals())
148	# toc templates
149	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
150	toc_text = PageTemplateFile('zpt/toc_text', globals())
151	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
152	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
153	toc_handwritten = PageTemplateFile('zpt/toc_handwritten', globals())
154	toc_none = PageTemplateFile('zpt/toc_none', globals())
155	# other templates
156	common_template = PageTemplateFile('zpt/common_template', globals())
157	info_xml = PageTemplateFile('zpt/info_xml', globals())
158	docuviewer_css = ImageFile('css/docuviewer.css',globals())
159	# make docuviewer_css refreshable for development
160	docuviewer_css.index_html = refreshingImageFileIndexHtml
161	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
162	# make docuviewer_ie_css refreshable for development
163	docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
164	jquery_js = ImageFile('js/jquery.js',globals())
165
166
167	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
168	"""init document viewer"""
169	self.id=id
170	self.title=title
171	self.thumbcols = thumbcols
172	self.thumbrows = thumbrows
173	# authgroups is list of authorized groups (delimited by ,)
174	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
175	# create template folder so we can always use template.something
176
177	templateFolder = Folder('template')
178	self['template'] = templateFolder # Zope-2.12 style
179	#self._setObject('template',templateFolder) # old style
180	try:
181	import MpdlXmlTextServer
182	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
183	templateFolder['fulltextclient'] = textServer
184	#templateFolder._setObject('fulltextclient',textServer)
185	except Exception, e:
186	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
187
188	try:
189	from Products.zogiLib.zogiLib import zogiLib
190	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
191	templateFolder['zogilib'] = zogilib
192	#templateFolder._setObject('zogilib',zogilib)
193	except Exception, e:
194	logging.error("Unable to create zogiLib for zogilib: "+str(e))
195
196	try:
197	# assume MetaDataFolder instance is called metadata
198	self.metadataService = getattr(self, 'metadata')
199	except Exception, e:
200	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
201
202	if digilibBaseUrl is not None:
203	self.digilibBaseUrl = digilibBaseUrl
204	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
205	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
206
207
208	# proxy text server methods to fulltextclient
209	def getTextPage(self, **args):
210	"""returns full text content of page"""
211	return self.template.fulltextclient.getTextPage(**args)
212
213	def getSearchResults(self, **args):
214	"""loads list of search results and stores XML in docinfo"""
215	return self.template.fulltextclient.getSearchResults(**args)
216
217	def getResultsPage(self, **args):
218	"""returns one page of the search results"""
219	return self.template.fulltextclient.getResultsPage(**args)
220
221	def getTextInfo(self, **args):
222	"""returns document info from the text server"""
223	return self.template.fulltextclient.getTextInfo(**args)
224
225	def getToc(self, **args):
226	"""loads table of contents and stores XML in docinfo"""
227	return self.template.fulltextclient.getToc(**args)
228
229	def getTocPage(self, **args):
230	"""returns one page of the table of contents"""
231	return self.template.fulltextclient.getTocPage(**args)
232
233	def getRepositoryType(self, **args):
234	"""get repository type"""
235	return self.template.fulltextclient.getRepositoryType(**args)
236
237	def getTextDownloadUrl(self, **args):
238	"""get list of gis places on one page"""
239	return self.template.fulltextclient.getTextDownloadUrl(**args)
240
241	def getPlacesOnPage(self, **args):
242	"""get list of gis places on one page"""
243	return self.template.fulltextclient.getPlacesOnPage(**args)
244
245	# Thumb list for CoolIris Plugin
246	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
247	security.declareProtected('View','thumbs_rss')
248	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
249	'''
250	view it
251	@param mode: defines how to access the document behind url
252	@param url: url which contains display information
253	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
254
255	'''
256
257	if not hasattr(self, 'template'):
258	# create template folder if it doesn't exist
259	self.manage_addFolder('template')
260
261	if not self.digilibBaseUrl:
262	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
263
264	docinfo = self.getDocinfo(mode=mode,url=url)
265	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
266	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
267	''' ZDES '''
268	pt = getattr(self.template, 'thumbs_main_rss')
269
270	if viewMode=="auto": # automodus gewaehlt
271	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
272	viewMode="text"
273	else:
274	viewMode="images"
275
276	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
277
278
279	security.declareProtected('View','index_html')
280	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode=None,start=1,pn=1):
281	"""
282	show page
283	@param url: url which contains display information
284	@param mode: defines how to access the document behind url
285	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
286	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
287	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
288	"""
289
290	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
291
292	if not hasattr(self, 'template'):
293	# this won't work
294	logging.error("template folder missing!")
295	return "ERROR: template folder missing!"
296
297	if not getattr(self, 'digilibBaseUrl', None):
298	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
299
300	# mode=filepath should not have toc-thumbs
301	if tocMode is None:
302	if mode == "filepath":
303	tocMode = "none"
304	else:
305	tocMode = "thumbs"
306
307	# docinfo: information about document (cached)
308	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
309
310	# userinfo: user settings (cached)
311	userinfo = self.getUserinfo()
312
313	# auto viewMode: text if there is a text else images
314	if viewMode=="auto":
315	if docinfo.get('textURLPath', None):
316	# docinfo.get('textURL', None) not implemented yet
317	viewMode = "text"
318	if viewLayer is None and 'viewLayer' not in userinfo:
319	# use layer dict as default
320	viewLayer = "dict"
321	else:
322	viewMode = "images"
323
324	elif viewMode == "text_dict":
325	# legacy fix
326	viewMode = "text"
327	viewLayer = "dict"
328
329	# safe viewLayer in userinfo
330	userinfo['viewLayer'] = viewLayer
331
332	# pageinfo: information about page (not cached)
333	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
334
335	# get template /template/viewer_$viewMode
336	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
337	if pt is None:
338	logging.error("No template for viewMode=%s!"%viewMode)
339	# TODO: error page?
340	return "No template for viewMode=%s!"%viewMode
341
342	# and execute with parameters
343	return pt(docinfo=docinfo, pageinfo=pageinfo)
344
345	def getAvailableLayers(self):
346	"""returns dict with list of available layers per viewMode"""
347	return self.availableLayers
348
349	def getBrowser(self):
350	"""getBrowser the version of browser """
351	bt = browserCheck(self)
352	logging.debug("BROWSER VERSION: %s"%(bt))
353	return bt
354
355	def findDigilibUrl(self):
356	"""try to get the digilib URL from zogilib"""
357	url = self.template.zogilib.getDLBaseUrl()
358	return url
359
360	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
361	"""returns URL to digilib Scaler with params"""
362	url = None
363	if docinfo is not None:
364	url = docinfo.get('imageURL', None)
365
366	if url is None:
367	url = self.digilibScalerUrl
368	if fn is None and docinfo is not None:
369	fn = docinfo.get('imagePath','')
370
371	url += "fn=%s"%fn
372
373	if pn:
374	url += "&pn=%s"%pn
375
376	url += "&dw=%s&dh=%s"%(dw,dh)
377	return url
378
379	def getDocumentViewerURL(self):
380	"""returns the URL of this instance"""
381	return self.absolute_url()
382
383	def getStyle(self, idx, selected, style=""):
384	"""returns a string with the given style and append 'sel' if idx == selected."""
385	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
386	if idx == selected:
387	return style + 'sel'
388	else:
389	return style
390
391	def getParams(self, param=None, val=None, params=None, duplicates=None):
392	"""returns dict with URL parameters.
393
394	Takes URL parameters and additionally param=val or dict params.
395	Deletes key if value is None."""
396	# copy existing request params
397	newParams=self.REQUEST.form.copy()
398	# change single param
399	if param is not None:
400	if val is None:
401	if newParams.has_key(param):
402	del newParams[param]
403	else:
404	newParams[param] = str(val)
405
406	# change more params
407	if params is not None:
408	for (k, v) in params.items():
409	if v is None:
410	# val=None removes param
411	if newParams.has_key(k):
412	del newParams[k]
413
414	else:
415	newParams[k] = v
416
417	if duplicates:
418	# eliminate lists (coming from duplicate keys)
419	for (k,v) in newParams.items():
420	if isinstance(v, list):
421	if duplicates == 'comma':
422	# make comma-separated list of non-empty entries
423	newParams[k] = ','.join([t for t in v if t])
424	elif duplicates == 'first':
425	# take first non-empty entry
426	newParams[k] = [t for t in v if t][0]
427
428	return newParams
429
430	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
431	"""returns URL to documentviewer with parameter param set to val or from dict params"""
432	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
433	# quote values and assemble into query string (not escaping '/')
434	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
435	if baseUrl is None:
436	baseUrl = self.getDocumentViewerURL()
437
438	url = "%s?%s"%(baseUrl, ps)
439	return url
440
441	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
442	"""link to documentviewer with parameter param set to val"""
443	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
444
445
446	def setAvailableLayers(self, newLayerString=None):
447	"""sets availableLayers to newLayerString or tries to autodetect available layers.
448	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
449	newLayerString is parsed as JSON."""
450	if newLayerString is not None:
451	try:
452	layers = json.loads(newLayerString)
453	if 'text' in layers and 'images' in layers:
454	self.availableLayers = layers
455	return
456	except:
457	pass
458
459	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
460
461	# start with builtin layers
462	self.availableLayers = self.builtinLayers.copy()
463	# add layers from templates
464	for t in self.template:
465	if t.startswith('layer_'):
466	try:
467	(x, m, l) = t.split('_', 3)
468	if m not in self.availableLayers:
469	# mode m doesn't exist -> new list
470	self.availableLayers[m] = [l]
471
472	else:
473	# m exists -> append
474	if l not in self.availableLayers[m]:
475	self.availableLayers[m].append()
476
477	except:
478	pass
479
480	def getAvailableLayersJson(self):
481	"""returns available layers as JSON string."""
482	return json.dumps(self.availableLayers)
483
484
485	def getInfo_xml(self,url,mode):
486	"""returns info about the document as XML"""
487	if not self.digilibBaseUrl:
488	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
489
490	docinfo = self.getDocinfo(mode=mode,url=url)
491	pt = getattr(self.template, 'info_xml')
492	return pt(docinfo=docinfo)
493
494	def getAuthenticatedUser(self, anon=None):
495	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
496	user = getSecurityManager().getUser()
497	if user is not None and user.getUserName() != "Anonymous User":
498	return user
499	else:
500	return anon
501
502	def isAccessible(self, docinfo):
503	"""returns if access to the resource is granted"""
504	access = docinfo.get('accessType', None)
505	logging.debug("documentViewer (accessOK) access type %s"%access)
506	if access == 'free':
507	logging.debug("documentViewer (accessOK) access is free")
508	return True
509
510	elif access is None or access in self.authgroups:
511	# only local access -- only logged in users
512	user = self.getAuthenticatedUser()
513	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
514	return (user is not None)
515
516	logging.error("documentViewer (accessOK) unknown access type %s"%access)
517	return False
518
519
520	def getUserinfo(self):
521	"""returns userinfo object"""
522	logging.debug("getUserinfo")
523	userinfo = {}
524	# look for cached userinfo in session
525	if self.REQUEST.SESSION.has_key('userinfo'):
526	userinfo = self.REQUEST.SESSION['userinfo']
527	# check if its still current?
528	else:
529	# store in session
530	self.REQUEST.SESSION['userinfo'] = userinfo
531
532	return userinfo
533
534	def getDocinfo(self, mode, url, tocMode=None):
535	"""returns docinfo depending on mode"""
536	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
537	# look for cached docinfo in session
538	if self.REQUEST.SESSION.has_key('docinfo'):
539	docinfo = self.REQUEST.SESSION['docinfo']
540	# check if its still current
541	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
542	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
543	return docinfo
544
545	# new docinfo
546	docinfo = {'mode': mode, 'url': url}
547	# add self url
548	docinfo['viewerUrl'] = self.getDocumentViewerURL()
549	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
550	docinfo['digilibScalerUrl'] = self.digilibScalerUrl
551	docinfo['digilibViewerUrl'] = self.digilibViewerUrl
552	# get index.meta DOM
553	docUrl = None
554	metaDom = None
555	if mode=="texttool":
556	# url points to document dir or index.meta
557	metaDom = self.metadataService.getDomFromPathOrUrl(url)
558	if metaDom is None:
559	raise IOError("Unable to find index.meta for mode=texttool!")
560
561	docUrl = url.replace('/index.meta', '')
562	if url.startswith('/mpiwg/online/'):
563	docUrl = url.replace('/mpiwg/online/', '', 1)
564
565	elif mode=="imagepath":
566	# url points to folder with images, index.meta optional
567	# asssume index.meta in parent dir
568	docUrl = getParentPath(url)
569	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
570	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
571
572	elif mode=="filepath":
573	# url points to image file, index.meta optional
574	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url)
575	docinfo['numPages'] = 1
576	# asssume index.meta is two path segments up
577	docUrl = getParentPath(url, 2)
578	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
579
580	else:
581	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
582	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
583
584	docinfo['documentUrl'] = docUrl
585	# process index.meta contents
586	if metaDom is not None and metaDom.tag == 'resource':
587	# document directory name and path
588	resource = self.metadataService.getResourceData(dom=metaDom)
589	if resource:
590	docinfo = self.getDocinfoFromResource(docinfo, resource)
591
592	# texttool info
593	texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
594	if texttool:
595	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
596	# document info from full text server
597	if docinfo.get('textURLPath', None):
598	docinfo = self.getTextInfo(mode=None, docinfo=docinfo)
599	# include list of pages TODO: do we need this always?
600	docinfo = self.getTextInfo(mode='pages', docinfo=docinfo)
601
602	# bib info
603	bib = self.metadataService.getBibData(dom=metaDom)
604	if bib:
605	# save extended version as 'bibx' TODO: ugly
606	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
607	if len(bibx) == 1:
608	# unwrap list if possible
609	bibx = bibx[0]
610
611	docinfo['bibx'] = bibx
612	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
613	else:
614	# no bib - try info.xml
615	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
616
617	# auth info
618	access = self.metadataService.getAccessData(dom=metaDom)
619	if access:
620	docinfo = self.getDocinfoFromAccess(docinfo, access)
621
622	# attribution info
623	attribution = self.metadataService.getAttributionData(dom=metaDom)
624	if attribution:
625	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
626	docinfo['attribution'] = attribution
627
628	# copyright info
629	copyright = self.metadataService.getCopyrightData(dom=metaDom)
630	if copyright:
631	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
632	docinfo['copyright'] = copyright
633
634	# DRI (permanent ID)
635	dri = self.metadataService.getDRI(dom=metaDom, type='mpiwg')
636	if dri:
637	docinfo['DRI'] = dri
638
639	# (presentation) context
640	ctx = self.metadataService.getContextData(dom=metaDom, all=True)
641	if ctx:
642	logging.debug("getcontext: ctx=%s"%repr(ctx))
643	docinfo['presentationContext'] = ctx
644
645	# image path
646	if mode != 'texttool':
647	# override image path from texttool with url parameter TODO: how about mode=auto?
648	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
649
650	# check numPages
651	if docinfo.get('numPages', 0) == 0:
652	# number of images from digilib
653	if docinfo.get('imagePath', None):
654	imgpath = docinfo['imagePath'].replace('/mpiwg/online/', '', 1)
655	logging.debug("imgpath=%s"%imgpath)
656	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, imgpath)
657	docinfo = self.getDocinfoFromDigilib(docinfo, imgpath)
658	else:
659	# imagePath still missing? try "./pageimg"
660	imgPath = os.path.join(docUrl, 'pageimg')
661	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
662	if docinfo.get('numPages', 0) > 0:
663	# there are pages
664	docinfo['imagePath'] = imgPath
665	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, docinfo['imagePath'])
666
667	# check numPages
668	if docinfo.get('numPages', 0) == 0:
669	if docinfo.get('numTextPages', 0) > 0:
670	# replace with numTextPages (text-only?)
671	docinfo['numPages'] = docinfo['numTextPages']
672
673	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
674	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
675	# store in session
676	self.REQUEST.SESSION['docinfo'] = docinfo
677	return docinfo
678
679
680	def getDocinfoFromResource(self, docinfo, resource):
681	"""reads contents of resource element into docinfo"""
682	docName = resource.get('name', None)
683	docinfo['documentName'] = docName
684	docPath = resource.get('archive-path', None)
685	if docPath:
686	# clean up document path
687	if docPath[0] != '/':
688	docPath = '/' + docPath
689
690	if docName and (not docPath.endswith(docName)):
691	docPath += "/" + docName
692
693	else:
694	# use docUrl as docPath
695	docUrl = docinfo['documentURL']
696	if not docUrl.startswith('http:'):
697	docPath = docUrl
698	if docPath:
699	# fix URLs starting with /mpiwg/online
700	docPath = docPath.replace('/mpiwg/online', '', 1)
701
702	docinfo['documentPath'] = docPath
703	return docinfo
704
705	def getDocinfoFromTexttool(self, docinfo, texttool):
706	"""reads contents of texttool element into docinfo"""
707	logging.debug("texttool=%s"%repr(texttool))
708	# unpack list if necessary
709	if isinstance(texttool, list):
710	texttool = texttool[0]
711
712	# image dir
713	imageDir = getMDText(texttool.get('image', None))
714	docPath = getMDText(docinfo.get('documentPath', None))
715	if imageDir and docPath:
716	imageDir = os.path.join(docPath, imageDir)
717	imageDir = imageDir.replace('/mpiwg/online', '', 1)
718	docinfo['imagePath'] = imageDir
719
720	# old style text URL
721	textUrl = getMDText(texttool.get('text', None))
722	if textUrl and docPath:
723	if urlparse.urlparse(textUrl)[0] == "": #keine url
724	textUrl = os.path.join(docPath, textUrl)
725
726	docinfo['textURL'] = textUrl
727
728	# new style text-url-path (can be more than one with "repository" attribute)
729	textUrlNode = texttool.get('text-url-path', None)
730	if not isinstance(textUrlNode, list):
731	textUrlNode = [textUrlNode]
732
733	for tun in textUrlNode:
734	textUrl = getMDText(tun)
735	if textUrl:
736	textUrlAtts = tun.get('@attr')
737	if (textUrlAtts and 'repository' in textUrlAtts):
738	textRepo = textUrlAtts['repository']
739	# use matching repository
740	if self.getRepositoryType() == textRepo:
741	docinfo['textURLPath'] = textUrl
742	docinfo['textURLRepository'] = textRepo
743	break
744
745	else:
746	# no repo attribute - use always
747	docinfo['textURLPath'] = textUrl
748
749	# page flow
750	docinfo['pageFlow'] = getMDText(texttool.get('page-flow', 'ltr'))
751
752	# odd pages are left
753	docinfo['oddPage'] = getMDText(texttool.get('odd-scan-position', 'left'))
754
755	# number of title page (default 1)
756	docinfo['titlePage'] = getMDText(texttool.get('title-scan-no', 1))
757
758	# old presentation stuff
759	presentation = getMDText(texttool.get('presentation', None))
760	if presentation and docPath:
761	if presentation.startswith('http:'):
762	docinfo['presentationUrl'] = presentation
763	else:
764	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
765
766	return docinfo
767
768	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
769	"""reads contents of bib element into docinfo"""
770	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
771	# put all raw bib fields in dict "bib"
772	docinfo['bib'] = bib
773	bibtype = bib.get('@type', None)
774	docinfo['bibType'] = bibtype
775	# also store DC metadata for convenience
776	dc = self.metadataService.getDCMappedData(bib)
777	docinfo['creator'] = dc.get('creator','')
778	docinfo['title'] = dc.get('title','')
779	docinfo['date'] = dc.get('date','')
780	return docinfo
781
782	def getDocinfoFromAccess(self, docinfo, acc):
783	"""reads contents of access element into docinfo"""
784	#TODO: also read resource type
785	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
786	try:
787	acctype = acc['@attr']['type']
788	if acctype:
789	access=acctype
790	if access in ['group', 'institution']:
791	access = acc['name'].lower()
792
793	docinfo['accessType'] = access
794
795	except:
796	pass
797
798	return docinfo
799
800	def getDocinfoFromDigilib(self, docinfo, path):
801	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
802	# fetch data
803	txt = getHttpData(infoUrl)
804	if not txt:
805	logging.error("Unable to get dir-info from %s"%(infoUrl))
806	return docinfo
807
808	dom = ET.fromstring(txt)
809	size = getText(dom.find("size"))
810	logging.debug("getDocinfoFromDigilib: size=%s"%size)
811	if size:
812	docinfo['numPages'] = int(size)
813	else:
814	docinfo['numPages'] = 0
815
816	# TODO: produce and keep list of image names and numbers
817	return docinfo
818
819
820	def getDocinfoFromPresentationInfoXml(self,docinfo):
821	"""gets DC-like bibliographical information from the presentation entry in texttools"""
822	url = docinfo.get('presentationUrl', None)
823	if not url:
824	logging.error("getDocinfoFromPresentation: no URL!")
825	return docinfo
826
827	dom = None
828	metaUrl = None
829	if url.startswith("http://"):
830	# real URL
831	metaUrl = url
832	else:
833	# online path
834
835	server=self.digilibBaseUrl+"/servlet/Texter?fn="
836	metaUrl=server+url
837
838	txt=getHttpData(metaUrl)
839	if txt is None:
840	logging.error("Unable to read info.xml from %s"%(url))
841	return docinfo
842
843	dom = ET.fromstring(txt)
844	docinfo['creator']=getText(dom.find(".//author"))
845	docinfo['title']=getText(dom.find(".//title"))
846	docinfo['date']=getText(dom.find(".//date"))
847	return docinfo
848
849
850	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
851	"""returns pageinfo with the given parameters"""
852	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
853	pageinfo = {}
854	pageinfo['viewMode'] = viewMode
855	# split viewLayer if necessary
856	if isinstance(viewLayer,basestring):
857	viewLayer = viewLayer.split(',')
858
859	if isinstance(viewLayer, list):
860	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
861	# save (unique) list in viewLayers
862	seen = set()
863	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
864	pageinfo['viewLayers'] = viewLayers
865	# stringify viewLayer
866	viewLayer = ','.join(viewLayers)
867	else:
868	#create list
869	pageinfo['viewLayers'] = [viewLayer]
870
871	pageinfo['viewLayer'] = viewLayer
872	pageinfo['tocMode'] = tocMode
873
874	# TODO: unify current and pn!
875	current = getInt(current)
876	pageinfo['current'] = current
877	pageinfo['pn'] = current
878	rows = int(rows or self.thumbrows)
879	pageinfo['rows'] = rows
880	cols = int(cols or self.thumbcols)
881	pageinfo['cols'] = cols
882	grpsize = cols * rows
883	pageinfo['groupsize'] = grpsize
884	# is start is empty use one around current
885	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
886	# int(current / grpsize) * grpsize +1))
887	pageinfo['start'] = start
888	# get number of pages
889	np = int(docinfo.get('numPages', 0))
890	if np == 0:
891	# try numTextPages
892	np = docinfo.get('numTextPages', 0)
893	if np != 0:
894	docinfo['numPages'] = np
895
896	# cache table of contents
897	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
898	pageinfo['numgroups'] = int(np / grpsize)
899	if np % grpsize > 0:
900	pageinfo['numgroups'] += 1
901
902	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
903	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
904	# add zeroth page for two columns
905	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
906	pageinfo['pageZero'] = pageZero
907	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
908	# more page parameters
909	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
910	if docinfo.get('pageNumbers'):
911	# get original page numbers
912	pageNumber = docinfo['pageNumbers'].get(current, None)
913	if pageNumber is not None:
914	pageinfo['pageNumberOrig'] = pageNumber['no']
915	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
916
917	# cache search results
918	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
919	query = self.REQUEST.get('query',None)
920	pageinfo['query'] = query
921	if query:
922	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
923	pageinfo['queryType'] = queryType
924	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
925	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
926
927	# highlighting
928	highlightQuery = self.REQUEST.get('highlightQuery', None)
929	if highlightQuery:
930	pageinfo['highlightQuery'] = highlightQuery
931	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
932	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
933
934	return pageinfo
935
936
937	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
938	"""returns dict with array of page informations for one screenfull of thumbnails"""
939	batch = {}
940	grpsize = rows * cols
941	if maxIdx == 0:
942	maxIdx = start + grpsize
943
944	nb = int(math.ceil(maxIdx / float(grpsize)))
945	# list of all batch start and end points
946	batches = []
947	if pageZero:
948	ofs = 0
949	else:
950	ofs = 1
951
952	for i in range(nb):
953	s = i * grpsize + ofs
954	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
955	batches.append({'start':s, 'end':e})
956
957	batch['batches'] = batches
958
959	pages = []
960	if pageZero and start == 1:
961	# correct beginning
962	idx = 0
963	else:
964	idx = start
965
966	for r in range(rows):
967	row = []
968	for c in range(cols):
969	if idx < minIdx or idx > maxIdx:
970	page = {'idx':None}
971	else:
972	page = {'idx':idx}
973
974	idx += 1
975	if pageFlowLtr:
976	row.append(page)
977	else:
978	row.insert(0, page)
979
980	pages.append(row)
981
982	if start > 1:
983	batch['prevStart'] = max(start - grpsize, 1)
984	else:
985	batch['prevStart'] = None
986
987	if start + grpsize <= maxIdx:
988	batch['nextStart'] = start + grpsize
989	else:
990	batch['nextStart'] = None
991
992	batch['pages'] = pages
993	batch['first'] = minIdx
994	batch['last'] = maxIdx
995	return batch
996
997	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
998	"""returns dict with information for one screenfull of data."""
999	batch = {}
1000	if end == 0:
1001	end = start + size
1002
1003	nb = int(math.ceil(end / float(size)))
1004	# list of all batch start and end points
1005	batches = []
1006	for i in range(nb):
1007	s = i * size + 1
1008	e = min((i + 1) * size, end)
1009	batches.append({'start':s, 'end':e})
1010
1011	batch['batches'] = batches
1012	# list of elements in this batch
1013	this = []
1014	j = 0
1015	for i in range(start, min(start+size, end+1)):
1016	if data:
1017	if fullData:
1018	d = data.get(i, None)
1019	else:
1020	d = data.get(j, None)
1021	j += 1
1022
1023	else:
1024	d = i+1
1025
1026	this.append(d)
1027
1028	batch['this'] = this
1029	if start > 1:
1030	batch['prevStart'] = max(start - size, 1)
1031	else:
1032	batch['prevStart'] = None
1033
1034	if start + size < end:
1035	batch['nextStart'] = start + size
1036	else:
1037	batch['nextStart'] = None
1038
1039	batch['first'] = start
1040	batch['last'] = end
1041	return batch
1042
1043
1044	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
1045	"""returns list of groups {name:, id:} on the annotation server for the user"""
1046	groups = []
1047	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
1048	data = getHttpData(url=groupsUrl, noExceptions=True)
1049	if data:
1050	res = json.loads(data)
1051	rows = res.get('rows', None)
1052	if rows is None:
1053	return groups
1054	for r in rows:
1055	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
1056
1057	return groups
1058
1059
1060	security.declareProtected('View management screens','changeDocumentViewerForm')
1061	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1062
1063	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1064	"""init document viewer"""
1065	self.title=title
1066	self.digilibBaseUrl = digilibBaseUrl
1067	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
1068	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
1069	self.thumbrows = thumbrows
1070	self.thumbcols = thumbcols
1071	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1072	try:
1073	# assume MetaDataFolder instance is called metadata
1074	self.metadataService = getattr(self, 'metadata')
1075	except Exception, e:
1076	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1077
1078	self.setAvailableLayers(availableLayers)
1079
1080	if RESPONSE is not None:
1081	RESPONSE.redirect('manage_main')
1082
1083	def manage_AddDocumentViewerForm(self):
1084	"""add the viewer form"""
1085	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1086	return pt()
1087
1088	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1089	"""add the viewer"""
1090	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1091	self._setObject(id,newObj)
1092
1093	if RESPONSE is not None:
1094	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: