Context Navigation

source: documentViewer/documentViewer.py @ 577:9251719154a3

Last change on this file since 577:9251719154a3 was 577:9251719154a3, checked in by casties, 12 years ago
toc with list of handwritten notes.
File size: 42.3 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def getMDText(node):
37	"""returns the @text content from the MetaDataProvider metadata node"""
38	if isinstance(node, dict):
39	return node.get('@text', None)
40
41	return node
42
43	def browserCheck(self):
44	"""check the browsers request to find out the browser type"""
45	bt = {}
46	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
47	bt['ua'] = ua
48	bt['isIE'] = False
49	bt['isN4'] = False
50	bt['versFirefox']=""
51	bt['versIE']=""
52	bt['versSafariChrome']=""
53	bt['versOpera']=""
54
55	if string.find(ua, 'MSIE') > -1:
56	bt['isIE'] = True
57	else:
58	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
59	# Safari oder Chrome identification
60	try:
61	nav = ua[string.find(ua, '('):]
62	nav1=ua[string.find(ua,')'):]
63	nav2=nav1[string.find(nav1,'('):]
64	nav3=nav2[string.find(nav2,')'):]
65	ie = string.split(nav, "; ")[1]
66	ie1 =string.split(nav1, " ")[2]
67	ie2 =string.split(nav3, " ")[1]
68	ie3 =string.split(nav3, " ")[2]
69	if string.find(ie3, "Safari") >-1:
70	bt['versSafariChrome']=string.split(ie2, "/")[1]
71	except: pass
72	# IE identification
73	try:
74	nav = ua[string.find(ua, '('):]
75	ie = string.split(nav, "; ")[1]
76	if string.find(ie, "MSIE") > -1:
77	bt['versIE'] = string.split(ie, " ")[1]
78	except:pass
79	# Firefox identification
80	try:
81	nav = ua[string.find(ua, '('):]
82	nav1=ua[string.find(ua,')'):]
83	if string.find(ie1, "Firefox") >-1:
84	nav5= string.split(ie1, "/")[1]
85	logging.debug("FIREFOX: %s"%(nav5))
86	bt['versFirefox']=nav5[0:3]
87	except:pass
88	#Opera identification
89	try:
90	if string.find(ua,"Opera") >-1:
91	nav = ua[string.find(ua, '('):]
92	nav1=nav[string.find(nav,')'):]
93	bt['versOpera']=string.split(nav1,"/")[2]
94	except:pass
95
96	bt['isMac'] = string.find(ua, 'Macintosh') > -1
97	bt['isWin'] = string.find(ua, 'Windows') > -1
98	bt['isIEWin'] = bt['isIE'] and bt['isWin']
99	bt['isIEMac'] = bt['isIE'] and bt['isMac']
100	bt['staticHTML'] = False
101
102	return bt
103
104	def getParentPath(path, cnt=1):
105	"""returns pathname shortened by cnt"""
106	# make sure path doesn't end with /
107	path = path.rstrip('/')
108	# split by /, shorten, and reassemble
109	return '/'.join(path.split('/')[0:-cnt])
110
111	##
112	## documentViewer class
113	##
114	class documentViewer(Folder):
115	"""document viewer"""
116	meta_type="Document viewer"
117
118	security=ClassSecurityInfo()
119	manage_options=Folder.manage_options+(
120	{'label':'Configuration','action':'changeDocumentViewerForm'},
121	)
122
123	metadataService = None
124	"""MetaDataFolder instance"""
125
126
127	#
128	# templates and forms
129	#
130	# viewMode templates
131	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
132	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
133	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
134	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
135	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
136	# available layer types (annotator not default)
137	builtinLayers = {'text': ['dict','search','gis'],
138	'xml': None, 'images': None, 'index': ['extended']}
139	availableLayers = builtinLayers;
140	# layer templates
141	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
142	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
143	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
144	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
145	layer_text_pundit = PageTemplateFile('zpt/layer_text_pundit', globals())
146	layer_index_extended = PageTemplateFile('zpt/layer_index_extended', globals())
147	# toc templates
148	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
149	toc_text = PageTemplateFile('zpt/toc_text', globals())
150	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
151	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
152	toc_handwritten = PageTemplateFile('zpt/toc_handwritten', globals())
153	toc_none = PageTemplateFile('zpt/toc_none', globals())
154	# other templates
155	common_template = PageTemplateFile('zpt/common_template', globals())
156	info_xml = PageTemplateFile('zpt/info_xml', globals())
157	docuviewer_css = ImageFile('css/docuviewer.css',globals())
158	# make docuviewer_css refreshable for development
159	docuviewer_css.index_html = refreshingImageFileIndexHtml
160	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
161	# make docuviewer_ie_css refreshable for development
162	docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
163	jquery_js = ImageFile('js/jquery.js',globals())
164
165
166	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
167	"""init document viewer"""
168	self.id=id
169	self.title=title
170	self.thumbcols = thumbcols
171	self.thumbrows = thumbrows
172	# authgroups is list of authorized groups (delimited by ,)
173	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
174	# create template folder so we can always use template.something
175
176	templateFolder = Folder('template')
177	self['template'] = templateFolder # Zope-2.12 style
178	#self._setObject('template',templateFolder) # old style
179	try:
180	import MpdlXmlTextServer
181	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
182	templateFolder['fulltextclient'] = textServer
183	#templateFolder._setObject('fulltextclient',textServer)
184	except Exception, e:
185	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
186
187	try:
188	from Products.zogiLib.zogiLib import zogiLib
189	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
190	templateFolder['zogilib'] = zogilib
191	#templateFolder._setObject('zogilib',zogilib)
192	except Exception, e:
193	logging.error("Unable to create zogiLib for zogilib: "+str(e))
194
195	try:
196	# assume MetaDataFolder instance is called metadata
197	self.metadataService = getattr(self, 'metadata')
198	except Exception, e:
199	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
200
201	if digilibBaseUrl is not None:
202	self.digilibBaseUrl = digilibBaseUrl
203	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
204	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
205
206
207	# proxy text server methods to fulltextclient
208	def getTextPage(self, **args):
209	"""returns full text content of page"""
210	return self.template.fulltextclient.getTextPage(**args)
211
212	def getSearchResults(self, **args):
213	"""loads list of search results and stores XML in docinfo"""
214	return self.template.fulltextclient.getSearchResults(**args)
215
216	def getResultsPage(self, **args):
217	"""returns one page of the search results"""
218	return self.template.fulltextclient.getResultsPage(**args)
219
220	def getTextInfo(self, **args):
221	"""returns document info from the text server"""
222	return self.template.fulltextclient.getTextInfo(**args)
223
224	def getToc(self, **args):
225	"""loads table of contents and stores XML in docinfo"""
226	return self.template.fulltextclient.getToc(**args)
227
228	def getTocPage(self, **args):
229	"""returns one page of the table of contents"""
230	return self.template.fulltextclient.getTocPage(**args)
231
232	def getRepositoryType(self, **args):
233	"""get repository type"""
234	return self.template.fulltextclient.getRepositoryType(**args)
235
236	def getTextDownloadUrl(self, **args):
237	"""get list of gis places on one page"""
238	return self.template.fulltextclient.getTextDownloadUrl(**args)
239
240	def getPlacesOnPage(self, **args):
241	"""get list of gis places on one page"""
242	return self.template.fulltextclient.getPlacesOnPage(**args)
243
244	# Thumb list for CoolIris Plugin
245	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
246	security.declareProtected('View','thumbs_rss')
247	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
248	'''
249	view it
250	@param mode: defines how to access the document behind url
251	@param url: url which contains display information
252	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
253
254	'''
255
256	if not hasattr(self, 'template'):
257	# create template folder if it doesn't exist
258	self.manage_addFolder('template')
259
260	if not self.digilibBaseUrl:
261	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
262
263	docinfo = self.getDocinfo(mode=mode,url=url)
264	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
265	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
266	''' ZDES '''
267	pt = getattr(self.template, 'thumbs_main_rss')
268
269	if viewMode=="auto": # automodus gewaehlt
270	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
271	viewMode="text"
272	else:
273	viewMode="images"
274
275	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
276
277
278	security.declareProtected('View','index_html')
279	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode=None,start=1,pn=1):
280	"""
281	show page
282	@param url: url which contains display information
283	@param mode: defines how to access the document behind url
284	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
285	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
286	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
287	"""
288
289	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
290
291	if not hasattr(self, 'template'):
292	# this won't work
293	logging.error("template folder missing!")
294	return "ERROR: template folder missing!"
295
296	if not getattr(self, 'digilibBaseUrl', None):
297	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
298
299	# mode=filepath should not have toc-thumbs
300	if tocMode is None:
301	if mode == "filepath":
302	tocMode = "none"
303	else:
304	tocMode = "thumbs"
305
306	# docinfo: information about document (cached)
307	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
308
309	# userinfo: user settings (cached)
310	userinfo = self.getUserinfo()
311
312	# auto viewMode: text if there is a text else images
313	if viewMode=="auto":
314	if docinfo.get('textURLPath', None):
315	# docinfo.get('textURL', None) not implemented yet
316	viewMode = "text"
317	if viewLayer is None and 'viewLayer' not in userinfo:
318	# use layer dict as default
319	viewLayer = "dict"
320	else:
321	viewMode = "images"
322
323	elif viewMode == "text_dict":
324	# legacy fix
325	viewMode = "text"
326	viewLayer = "dict"
327
328	# safe viewLayer in userinfo
329	userinfo['viewLayer'] = viewLayer
330
331	# pageinfo: information about page (not cached)
332	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
333
334	# get template /template/viewer_$viewMode
335	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
336	if pt is None:
337	logging.error("No template for viewMode=%s!"%viewMode)
338	# TODO: error page?
339	return "No template for viewMode=%s!"%viewMode
340
341	# and execute with parameters
342	return pt(docinfo=docinfo, pageinfo=pageinfo)
343
344	def getAvailableLayers(self):
345	"""returns dict with list of available layers per viewMode"""
346	return self.availableLayers
347
348	def getBrowser(self):
349	"""getBrowser the version of browser """
350	bt = browserCheck(self)
351	logging.debug("BROWSER VERSION: %s"%(bt))
352	return bt
353
354	def findDigilibUrl(self):
355	"""try to get the digilib URL from zogilib"""
356	url = self.template.zogilib.getDLBaseUrl()
357	return url
358
359	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
360	"""returns URL to digilib Scaler with params"""
361	url = None
362	if docinfo is not None:
363	url = docinfo.get('imageURL', None)
364
365	if url is None:
366	url = self.digilibScalerUrl
367	if fn is None and docinfo is not None:
368	fn = docinfo.get('imagePath','')
369
370	url += "fn=%s"%fn
371
372	if pn:
373	url += "&pn=%s"%pn
374
375	url += "&dw=%s&dh=%s"%(dw,dh)
376	return url
377
378	def getDocumentViewerURL(self):
379	"""returns the URL of this instance"""
380	return self.absolute_url()
381
382	def getStyle(self, idx, selected, style=""):
383	"""returns a string with the given style and append 'sel' if idx == selected."""
384	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
385	if idx == selected:
386	return style + 'sel'
387	else:
388	return style
389
390	def getParams(self, param=None, val=None, params=None, duplicates=None):
391	"""returns dict with URL parameters.
392
393	Takes URL parameters and additionally param=val or dict params.
394	Deletes key if value is None."""
395	# copy existing request params
396	newParams=self.REQUEST.form.copy()
397	# change single param
398	if param is not None:
399	if val is None:
400	if newParams.has_key(param):
401	del newParams[param]
402	else:
403	newParams[param] = str(val)
404
405	# change more params
406	if params is not None:
407	for (k, v) in params.items():
408	if v is None:
409	# val=None removes param
410	if newParams.has_key(k):
411	del newParams[k]
412
413	else:
414	newParams[k] = v
415
416	if duplicates:
417	# eliminate lists (coming from duplicate keys)
418	for (k,v) in newParams.items():
419	if isinstance(v, list):
420	if duplicates == 'comma':
421	# make comma-separated list of non-empty entries
422	newParams[k] = ','.join([t for t in v if t])
423	elif duplicates == 'first':
424	# take first non-empty entry
425	newParams[k] = [t for t in v if t][0]
426
427	return newParams
428
429	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
430	"""returns URL to documentviewer with parameter param set to val or from dict params"""
431	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
432	# quote values and assemble into query string (not escaping '/')
433	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
434	if baseUrl is None:
435	baseUrl = self.getDocumentViewerURL()
436
437	url = "%s?%s"%(baseUrl, ps)
438	return url
439
440	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
441	"""link to documentviewer with parameter param set to val"""
442	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
443
444
445	def setAvailableLayers(self, newLayerString=None):
446	"""sets availableLayers to newLayerString or tries to autodetect available layers.
447	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
448	newLayerString is parsed as JSON."""
449	if newLayerString is not None:
450	try:
451	layers = json.loads(newLayerString)
452	if 'text' in layers and 'images' in layers:
453	self.availableLayers = layers
454	return
455	except:
456	pass
457
458	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
459
460	# start with builtin layers
461	self.availableLayers = self.builtinLayers.copy()
462	# add layers from templates
463	for t in self.template:
464	if t.startswith('layer_'):
465	try:
466	(x, m, l) = t.split('_', 3)
467	if m not in self.availableLayers:
468	# mode m doesn't exist -> new list
469	self.availableLayers[m] = [l]
470
471	else:
472	# m exists -> append
473	if l not in self.availableLayers[m]:
474	self.availableLayers[m].append()
475
476	except:
477	pass
478
479	def getAvailableLayersJson(self):
480	"""returns available layers as JSON string."""
481	return json.dumps(self.availableLayers)
482
483
484	def getInfo_xml(self,url,mode):
485	"""returns info about the document as XML"""
486	if not self.digilibBaseUrl:
487	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
488
489	docinfo = self.getDocinfo(mode=mode,url=url)
490	pt = getattr(self.template, 'info_xml')
491	return pt(docinfo=docinfo)
492
493	def getAuthenticatedUser(self, anon=None):
494	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
495	user = getSecurityManager().getUser()
496	if user is not None and user.getUserName() != "Anonymous User":
497	return user
498	else:
499	return anon
500
501	def isAccessible(self, docinfo):
502	"""returns if access to the resource is granted"""
503	access = docinfo.get('accessType', None)
504	logging.debug("documentViewer (accessOK) access type %s"%access)
505	if access == 'free':
506	logging.debug("documentViewer (accessOK) access is free")
507	return True
508
509	elif access is None or access in self.authgroups:
510	# only local access -- only logged in users
511	user = self.getAuthenticatedUser()
512	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
513	return (user is not None)
514
515	logging.error("documentViewer (accessOK) unknown access type %s"%access)
516	return False
517
518
519	def getUserinfo(self):
520	"""returns userinfo object"""
521	logging.debug("getUserinfo")
522	userinfo = {}
523	# look for cached userinfo in session
524	if self.REQUEST.SESSION.has_key('userinfo'):
525	userinfo = self.REQUEST.SESSION['userinfo']
526	# check if its still current?
527	else:
528	# store in session
529	self.REQUEST.SESSION['userinfo'] = userinfo
530
531	return userinfo
532
533	def getDocinfo(self, mode, url, tocMode=None):
534	"""returns docinfo depending on mode"""
535	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
536	# look for cached docinfo in session
537	if self.REQUEST.SESSION.has_key('docinfo'):
538	docinfo = self.REQUEST.SESSION['docinfo']
539	# check if its still current
540	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
541	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
542	return docinfo
543
544	# new docinfo
545	docinfo = {'mode': mode, 'url': url}
546	# add self url
547	docinfo['viewerUrl'] = self.getDocumentViewerURL()
548	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
549	docinfo['digilibScalerUrl'] = self.digilibScalerUrl
550	docinfo['digilibViewerUrl'] = self.digilibViewerUrl
551	# get index.meta DOM
552	docUrl = None
553	metaDom = None
554	if mode=="texttool":
555	# url points to document dir or index.meta
556	metaDom = self.metadataService.getDomFromPathOrUrl(url)
557	docUrl = url.replace('/index.meta', '')
558	if metaDom is None:
559	raise IOError("Unable to find index.meta for mode=texttool!")
560
561	elif mode=="imagepath":
562	# url points to folder with images, index.meta optional
563	# asssume index.meta in parent dir
564	docUrl = getParentPath(url)
565	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
566	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
567
568	elif mode=="filepath":
569	# url points to image file, index.meta optional
570	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url)
571	docinfo['numPages'] = 1
572	# asssume index.meta is two path segments up
573	docUrl = getParentPath(url, 2)
574	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
575
576	else:
577	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
578	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
579
580	docinfo['documentUrl'] = docUrl
581	# process index.meta contents
582	if metaDom is not None and metaDom.tag == 'resource':
583	# document directory name and path
584	resource = self.metadataService.getResourceData(dom=metaDom)
585	if resource:
586	docinfo = self.getDocinfoFromResource(docinfo, resource)
587
588	# texttool info
589	texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
590	if texttool:
591	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
592	# document info from full text server
593	if docinfo.get('textURLPath', None):
594	docinfo = self.getTextInfo(mode=None, docinfo=docinfo)
595	# include list of pages TODO: do we need this always?
596	docinfo = self.getTextInfo(mode='pages', docinfo=docinfo)
597
598	# bib info
599	bib = self.metadataService.getBibData(dom=metaDom)
600	if bib:
601	# save extended version as 'bibx' TODO: ugly
602	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
603	if len(bibx) == 1:
604	# unwrap list if possible
605	bibx = bibx[0]
606
607	docinfo['bibx'] = bibx
608	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
609	else:
610	# no bib - try info.xml
611	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
612
613	# auth info
614	access = self.metadataService.getAccessData(dom=metaDom)
615	if access:
616	docinfo = self.getDocinfoFromAccess(docinfo, access)
617
618	# attribution info
619	attribution = self.metadataService.getAttributionData(dom=metaDom)
620	if attribution:
621	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
622	docinfo['attribution'] = attribution
623
624	# copyright info
625	copyright = self.metadataService.getCopyrightData(dom=metaDom)
626	if copyright:
627	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
628	docinfo['copyright'] = copyright
629
630	# DRI (permanent ID)
631	dri = self.metadataService.getDRI(dom=metaDom, type='escidoc')
632	if dri:
633	logging.debug("getDRI: dri=%s"%repr(dri))
634	docinfo['DRI'] = dri
635
636	# image path
637	if mode != 'texttool':
638	# override image path from texttool with url parameter TODO: how about mode=auto?
639	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
640
641	# check numPages
642	if docinfo.get('numPages', 0) == 0:
643	# number of images from digilib
644	if docinfo.get('imagePath', None):
645	imgpath = docinfo['imagePath'].replace('/mpiwg/online/', '', 1)
646	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, imgpath)
647	docinfo = self.getDocinfoFromDigilib(docinfo, imgpath)
648	else:
649	# imagePath still missing? try "./pageimg"
650	imgPath = os.path.join(docUrl, 'pageimg')
651	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
652	if docinfo.get('numPages', 0) > 0:
653	# there are pages
654	docinfo['imagePath'] = imgPath
655	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, docinfo['imagePath'])
656
657	# check numPages
658	if docinfo.get('numPages', 0) == 0:
659	if docinfo.get('numTextPages', 0) > 0:
660	# replace with numTextPages (text-only?)
661	docinfo['numPages'] = docinfo['numTextPages']
662
663	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
664	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
665	# store in session
666	self.REQUEST.SESSION['docinfo'] = docinfo
667	return docinfo
668
669
670	def getDocinfoFromResource(self, docinfo, resource):
671	"""reads contents of resource element into docinfo"""
672	docName = resource.get('name', None)
673	docinfo['documentName'] = docName
674	docPath = resource.get('archive-path', None)
675	if docPath:
676	# clean up document path
677	if docPath[0] != '/':
678	docPath = '/' + docPath
679
680	if docName and (not docPath.endswith(docName)):
681	docPath += "/" + docName
682
683	else:
684	# use docUrl as docPath
685	docUrl = docinfo['documentURL']
686	if not docUrl.startswith('http:'):
687	docPath = docUrl
688	if docPath:
689	# fix URLs starting with /mpiwg/online
690	docPath = docPath.replace('/mpiwg/online', '', 1)
691
692	docinfo['documentPath'] = docPath
693	return docinfo
694
695	def getDocinfoFromTexttool(self, docinfo, texttool):
696	"""reads contents of texttool element into docinfo"""
697	logging.debug("texttool=%s"%repr(texttool))
698	# unpack list if necessary
699	if isinstance(texttool, list):
700	texttool = texttool[0]
701
702	# image dir
703	imageDir = getMDText(texttool.get('image', None))
704	docPath = getMDText(docinfo.get('documentPath', None))
705	if imageDir and docPath:
706	imageDir = os.path.join(docPath, imageDir)
707	imageDir = imageDir.replace('/mpiwg/online', '', 1)
708	docinfo['imagePath'] = imageDir
709
710	# old style text URL
711	textUrl = getMDText(texttool.get('text', None))
712	if textUrl and docPath:
713	if urlparse.urlparse(textUrl)[0] == "": #keine url
714	textUrl = os.path.join(docPath, textUrl)
715
716	docinfo['textURL'] = textUrl
717
718	# new style text-url-path (can be more than one with "repository" attribute)
719	textUrlNode = texttool.get('text-url-path', None)
720	if not isinstance(textUrlNode, list):
721	textUrlNode = [textUrlNode]
722
723	for tun in textUrlNode:
724	textUrl = getMDText(tun)
725	if textUrl:
726	textUrlAtts = tun.get('@attr')
727	if (textUrlAtts and 'repository' in textUrlAtts):
728	textRepo = textUrlAtts['repository']
729	# use matching repository
730	if self.getRepositoryType() == textRepo:
731	docinfo['textURLPath'] = textUrl
732	docinfo['textURLRepository'] = textRepo
733	break
734
735	else:
736	# no repo attribute - use always
737	docinfo['textURLPath'] = textUrl
738
739	# page flow
740	docinfo['pageFlow'] = getMDText(texttool.get('page-flow', 'ltr'))
741
742	# odd pages are left
743	docinfo['oddPage'] = getMDText(texttool.get('odd-scan-position', 'left'))
744
745	# number of title page (default 1)
746	docinfo['titlePage'] = getMDText(texttool.get('title-scan-no', 1))
747
748	# old presentation stuff
749	presentation = getMDText(texttool.get('presentation', None))
750	if presentation and docPath:
751	if presentation.startswith('http:'):
752	docinfo['presentationUrl'] = presentation
753	else:
754	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
755
756	return docinfo
757
758	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
759	"""reads contents of bib element into docinfo"""
760	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
761	# put all raw bib fields in dict "bib"
762	docinfo['bib'] = bib
763	bibtype = bib.get('@type', None)
764	docinfo['bibType'] = bibtype
765	# also store DC metadata for convenience
766	dc = self.metadataService.getDCMappedData(bib)
767	docinfo['creator'] = dc.get('creator','')
768	docinfo['title'] = dc.get('title','')
769	docinfo['date'] = dc.get('date','')
770	return docinfo
771
772	def getDocinfoFromAccess(self, docinfo, acc):
773	"""reads contents of access element into docinfo"""
774	#TODO: also read resource type
775	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
776	try:
777	acctype = acc['@attr']['type']
778	if acctype:
779	access=acctype
780	if access in ['group', 'institution']:
781	access = acc['name'].lower()
782
783	docinfo['accessType'] = access
784
785	except:
786	pass
787
788	return docinfo
789
790	def getDocinfoFromDigilib(self, docinfo, path):
791	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
792	# fetch data
793	txt = getHttpData(infoUrl)
794	if not txt:
795	logging.error("Unable to get dir-info from %s"%(infoUrl))
796	return docinfo
797
798	dom = ET.fromstring(txt)
799	size = getText(dom.find("size"))
800	logging.debug("getDocinfoFromDigilib: size=%s"%size)
801	if size:
802	docinfo['numPages'] = int(size)
803	else:
804	docinfo['numPages'] = 0
805
806	# TODO: produce and keep list of image names and numbers
807	return docinfo
808
809
810	def getDocinfoFromPresentationInfoXml(self,docinfo):
811	"""gets DC-like bibliographical information from the presentation entry in texttools"""
812	url = docinfo.get('presentationUrl', None)
813	if not url:
814	logging.error("getDocinfoFromPresentation: no URL!")
815	return docinfo
816
817	dom = None
818	metaUrl = None
819	if url.startswith("http://"):
820	# real URL
821	metaUrl = url
822	else:
823	# online path
824
825	server=self.digilibBaseUrl+"/servlet/Texter?fn="
826	metaUrl=server+url
827
828	txt=getHttpData(metaUrl)
829	if txt is None:
830	logging.error("Unable to read info.xml from %s"%(url))
831	return docinfo
832
833	dom = ET.fromstring(txt)
834	docinfo['creator']=getText(dom.find(".//author"))
835	docinfo['title']=getText(dom.find(".//title"))
836	docinfo['date']=getText(dom.find(".//date"))
837	return docinfo
838
839
840	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
841	"""returns pageinfo with the given parameters"""
842	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
843	pageinfo = {}
844	pageinfo['viewMode'] = viewMode
845	# split viewLayer if necessary
846	if isinstance(viewLayer,basestring):
847	viewLayer = viewLayer.split(',')
848
849	if isinstance(viewLayer, list):
850	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
851	# save (unique) list in viewLayers
852	seen = set()
853	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
854	pageinfo['viewLayers'] = viewLayers
855	# stringify viewLayer
856	viewLayer = ','.join(viewLayers)
857	else:
858	#create list
859	pageinfo['viewLayers'] = [viewLayer]
860
861	pageinfo['viewLayer'] = viewLayer
862	pageinfo['tocMode'] = tocMode
863
864	# TODO: unify current and pn!
865	current = getInt(current)
866	pageinfo['current'] = current
867	pageinfo['pn'] = current
868	rows = int(rows or self.thumbrows)
869	pageinfo['rows'] = rows
870	cols = int(cols or self.thumbcols)
871	pageinfo['cols'] = cols
872	grpsize = cols * rows
873	pageinfo['groupsize'] = grpsize
874	# is start is empty use one around current
875	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
876	# int(current / grpsize) * grpsize +1))
877	pageinfo['start'] = start
878	# get number of pages
879	np = int(docinfo.get('numPages', 0))
880	if np == 0:
881	# try numTextPages
882	np = docinfo.get('numTextPages', 0)
883	if np != 0:
884	docinfo['numPages'] = np
885
886	# cache table of contents
887	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
888	pageinfo['numgroups'] = int(np / grpsize)
889	if np % grpsize > 0:
890	pageinfo['numgroups'] += 1
891
892	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
893	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
894	# add zeroth page for two columns
895	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
896	pageinfo['pageZero'] = pageZero
897	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
898	# more page parameters
899	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
900	if docinfo.get('pageNumbers'):
901	# get original page numbers
902	pageNumber = docinfo['pageNumbers'].get(current, None)
903	if pageNumber is not None:
904	pageinfo['pageNumberOrig'] = pageNumber['no']
905	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
906
907	# cache search results
908	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
909	query = self.REQUEST.get('query',None)
910	pageinfo['query'] = query
911	if query:
912	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
913	pageinfo['queryType'] = queryType
914	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
915	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
916
917	# highlighting
918	highlightQuery = self.REQUEST.get('highlightQuery', None)
919	if highlightQuery:
920	pageinfo['highlightQuery'] = highlightQuery
921	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
922	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
923
924	return pageinfo
925
926
927	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
928	"""returns dict with array of page informations for one screenfull of thumbnails"""
929	batch = {}
930	grpsize = rows * cols
931	if maxIdx == 0:
932	maxIdx = start + grpsize
933
934	nb = int(math.ceil(maxIdx / float(grpsize)))
935	# list of all batch start and end points
936	batches = []
937	if pageZero:
938	ofs = 0
939	else:
940	ofs = 1
941
942	for i in range(nb):
943	s = i * grpsize + ofs
944	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
945	batches.append({'start':s, 'end':e})
946
947	batch['batches'] = batches
948
949	pages = []
950	if pageZero and start == 1:
951	# correct beginning
952	idx = 0
953	else:
954	idx = start
955
956	for r in range(rows):
957	row = []
958	for c in range(cols):
959	if idx < minIdx or idx > maxIdx:
960	page = {'idx':None}
961	else:
962	page = {'idx':idx}
963
964	idx += 1
965	if pageFlowLtr:
966	row.append(page)
967	else:
968	row.insert(0, page)
969
970	pages.append(row)
971
972	if start > 1:
973	batch['prevStart'] = max(start - grpsize, 1)
974	else:
975	batch['prevStart'] = None
976
977	if start + grpsize <= maxIdx:
978	batch['nextStart'] = start + grpsize
979	else:
980	batch['nextStart'] = None
981
982	batch['pages'] = pages
983	batch['first'] = minIdx
984	batch['last'] = maxIdx
985	return batch
986
987	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
988	"""returns dict with information for one screenfull of data."""
989	batch = {}
990	if end == 0:
991	end = start + size
992
993	nb = int(math.ceil(end / float(size)))
994	# list of all batch start and end points
995	batches = []
996	for i in range(nb):
997	s = i * size + 1
998	e = min((i + 1) * size, end)
999	batches.append({'start':s, 'end':e})
1000
1001	batch['batches'] = batches
1002	# list of elements in this batch
1003	this = []
1004	j = 0
1005	for i in range(start, min(start+size, end+1)):
1006	if data:
1007	if fullData:
1008	d = data.get(i, None)
1009	else:
1010	d = data.get(j, None)
1011	j += 1
1012
1013	else:
1014	d = i+1
1015
1016	this.append(d)
1017
1018	batch['this'] = this
1019	if start > 1:
1020	batch['prevStart'] = max(start - size, 1)
1021	else:
1022	batch['prevStart'] = None
1023
1024	if start + size < end:
1025	batch['nextStart'] = start + size
1026	else:
1027	batch['nextStart'] = None
1028
1029	batch['first'] = start
1030	batch['last'] = end
1031	return batch
1032
1033
1034	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
1035	"""returns list of groups {name:, id:} on the annotation server for the user"""
1036	groups = []
1037	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
1038	data = getHttpData(url=groupsUrl, noExceptions=True)
1039	if data:
1040	res = json.loads(data)
1041	rows = res.get('rows', None)
1042	if rows is None:
1043	return groups
1044	for r in rows:
1045	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
1046
1047	return groups
1048
1049
1050	security.declareProtected('View management screens','changeDocumentViewerForm')
1051	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1052
1053	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1054	"""init document viewer"""
1055	self.title=title
1056	self.digilibBaseUrl = digilibBaseUrl
1057	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
1058	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
1059	self.thumbrows = thumbrows
1060	self.thumbcols = thumbcols
1061	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1062	try:
1063	# assume MetaDataFolder instance is called metadata
1064	self.metadataService = getattr(self, 'metadata')
1065	except Exception, e:
1066	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1067
1068	self.setAvailableLayers(availableLayers)
1069
1070	if RESPONSE is not None:
1071	RESPONSE.redirect('manage_main')
1072
1073	def manage_AddDocumentViewerForm(self):
1074	"""add the viewer form"""
1075	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1076	return pt()
1077
1078	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1079	"""add the viewer"""
1080	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1081	self._setObject(id,newObj)
1082
1083	if RESPONSE is not None:
1084	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: