Context Navigation

source: documentViewer/documentViewer.py @ 546:2928037f9a75

Last change on this file since 546:2928037f9a75 was 546:2928037f9a75, checked in by casties, 12 years ago
ASSIGNED - # 249: Annotations shared in groups https://it-dev.mpiwg-berlin.mpg.de/tracs/mpdl-project-software/ticket/249
File size: 39.6 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def browserCheck(self):
37	"""check the browsers request to find out the browser type"""
38	bt = {}
39	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
40	bt['ua'] = ua
41	bt['isIE'] = False
42	bt['isN4'] = False
43	bt['versFirefox']=""
44	bt['versIE']=""
45	bt['versSafariChrome']=""
46	bt['versOpera']=""
47
48	if string.find(ua, 'MSIE') > -1:
49	bt['isIE'] = True
50	else:
51	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
52	# Safari oder Chrome identification
53	try:
54	nav = ua[string.find(ua, '('):]
55	nav1=ua[string.find(ua,')'):]
56	nav2=nav1[string.find(nav1,'('):]
57	nav3=nav2[string.find(nav2,')'):]
58	ie = string.split(nav, "; ")[1]
59	ie1 =string.split(nav1, " ")[2]
60	ie2 =string.split(nav3, " ")[1]
61	ie3 =string.split(nav3, " ")[2]
62	if string.find(ie3, "Safari") >-1:
63	bt['versSafariChrome']=string.split(ie2, "/")[1]
64	except: pass
65	# IE identification
66	try:
67	nav = ua[string.find(ua, '('):]
68	ie = string.split(nav, "; ")[1]
69	if string.find(ie, "MSIE") > -1:
70	bt['versIE'] = string.split(ie, " ")[1]
71	except:pass
72	# Firefox identification
73	try:
74	nav = ua[string.find(ua, '('):]
75	nav1=ua[string.find(ua,')'):]
76	if string.find(ie1, "Firefox") >-1:
77	nav5= string.split(ie1, "/")[1]
78	logging.debug("FIREFOX: %s"%(nav5))
79	bt['versFirefox']=nav5[0:3]
80	except:pass
81	#Opera identification
82	try:
83	if string.find(ua,"Opera") >-1:
84	nav = ua[string.find(ua, '('):]
85	nav1=nav[string.find(nav,')'):]
86	bt['versOpera']=string.split(nav1,"/")[2]
87	except:pass
88
89	bt['isMac'] = string.find(ua, 'Macintosh') > -1
90	bt['isWin'] = string.find(ua, 'Windows') > -1
91	bt['isIEWin'] = bt['isIE'] and bt['isWin']
92	bt['isIEMac'] = bt['isIE'] and bt['isMac']
93	bt['staticHTML'] = False
94
95	return bt
96
97	def getParentPath(path, cnt=1):
98	"""returns pathname shortened by cnt"""
99	# make sure path doesn't end with /
100	path = path.rstrip('/')
101	# split by /, shorten, and reassemble
102	return '/'.join(path.split('/')[0:-cnt])
103
104	##
105	## documentViewer class
106	##
107	class documentViewer(Folder):
108	"""document viewer"""
109	meta_type="Document viewer"
110
111	security=ClassSecurityInfo()
112	manage_options=Folder.manage_options+(
113	{'label':'Configuration','action':'changeDocumentViewerForm'},
114	)
115
116	metadataService = None
117	"""MetaDataFolder instance"""
118
119
120	#
121	# templates and forms
122	#
123	# viewMode templates
124	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
125	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
126	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
127	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
128	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
129	# available layer types (annotator not default)
130	builtinLayers = {'text': ['dict','search','gis'],
131	'xml': None, 'images': None, 'index': None}
132	availableLayers = builtinLayers;
133	# layer templates
134	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
135	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
136	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
137	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
138	# toc templates
139	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
140	toc_text = PageTemplateFile('zpt/toc_text', globals())
141	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
142	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
143	toc_none = PageTemplateFile('zpt/toc_none', globals())
144	# other templates
145	common_template = PageTemplateFile('zpt/common_template', globals())
146	info_xml = PageTemplateFile('zpt/info_xml', globals())
147	docuviewer_css = ImageFile('css/docuviewer.css',globals())
148	# make docuviewer_css refreshable for development
149	docuviewer_css.index_html = refreshingImageFileIndexHtml
150	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
151	# make docuviewer_ie_css refreshable for development
152	docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
153	jquery_js = ImageFile('js/jquery.js',globals())
154
155
156	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
157	"""init document viewer"""
158	self.id=id
159	self.title=title
160	self.thumbcols = thumbcols
161	self.thumbrows = thumbrows
162	# authgroups is list of authorized groups (delimited by ,)
163	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
164	# create template folder so we can always use template.something
165
166	templateFolder = Folder('template')
167	self['template'] = templateFolder # Zope-2.12 style
168	#self._setObject('template',templateFolder) # old style
169	try:
170	import MpdlXmlTextServer
171	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
172	templateFolder['fulltextclient'] = textServer
173	#templateFolder._setObject('fulltextclient',textServer)
174	except Exception, e:
175	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
176
177	try:
178	from Products.zogiLib.zogiLib import zogiLib
179	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
180	templateFolder['zogilib'] = zogilib
181	#templateFolder._setObject('zogilib',zogilib)
182	except Exception, e:
183	logging.error("Unable to create zogiLib for zogilib: "+str(e))
184
185	try:
186	# assume MetaDataFolder instance is called metadata
187	self.metadataService = getattr(self, 'metadata')
188	except Exception, e:
189	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
190
191	if digilibBaseUrl is not None:
192	self.digilibBaseUrl = digilibBaseUrl
193
194
195	# proxy text server methods to fulltextclient
196	def getTextPage(self, **args):
197	"""returns full text content of page"""
198	return self.template.fulltextclient.getTextPage(**args)
199
200	def getSearchResults(self, **args):
201	"""loads list of search results and stores XML in docinfo"""
202	return self.template.fulltextclient.getSearchResults(**args)
203
204	def getResultsPage(self, **args):
205	"""returns one page of the search results"""
206	return self.template.fulltextclient.getResultsPage(**args)
207
208	def getTextInfo(self, **args):
209	"""returns document info from the text server"""
210	return self.template.fulltextclient.getTextInfo(**args)
211
212	def getToc(self, **args):
213	"""loads table of contents and stores XML in docinfo"""
214	return self.template.fulltextclient.getToc(**args)
215
216	def getTocPage(self, **args):
217	"""returns one page of the table of contents"""
218	return self.template.fulltextclient.getTocPage(**args)
219
220	def getPlacesOnPage(self, **args):
221	"""get list of gis places on one page"""
222	return self.template.fulltextclient.getPlacesOnPage(**args)
223
224	#WTF?
225	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
226	security.declareProtected('View','thumbs_rss')
227	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
228	'''
229	view it
230	@param mode: defines how to access the document behind url
231	@param url: url which contains display information
232	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
233
234	'''
235	logging.debug("HHHHHHHHHHHHHH:load the rss")
236	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
237
238	if not hasattr(self, 'template'):
239	# create template folder if it doesn't exist
240	self.manage_addFolder('template')
241
242	if not self.digilibBaseUrl:
243	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
244
245	docinfo = self.getDocinfo(mode=mode,url=url)
246	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
247	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
248	''' ZDES '''
249	pt = getattr(self.template, 'thumbs_main_rss')
250
251	if viewMode=="auto": # automodus gewaehlt
252	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
253	viewMode="text"
254	else:
255	viewMode="images"
256
257	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
258
259
260	security.declareProtected('View','index_html')
261	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode="thumbs",start=1,pn=1):
262	"""
263	show page
264	@param url: url which contains display information
265	@param mode: defines how to access the document behind url
266	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
267	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
268	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
269	"""
270
271	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
272
273	if not hasattr(self, 'template'):
274	# this won't work
275	logging.error("template folder missing!")
276	return "ERROR: template folder missing!"
277
278	if not getattr(self, 'digilibBaseUrl', None):
279	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
280
281	# docinfo: information about document (cached)
282	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
283
284	# userinfo: user settings (cached)
285	userinfo = self.getUserinfo()
286
287	# auto viewMode: text if there is a text else images
288	if viewMode=="auto":
289	if docinfo.get('textURLPath', None):
290	# docinfo.get('textURL', None) not implemented yet
291	viewMode = "text"
292	if viewLayer is None and 'viewLayer' not in userinfo:
293	# use layer dict as default
294	viewLayer = "dict"
295	else:
296	viewMode = "images"
297
298	elif viewMode == "text_dict":
299	# legacy fix
300	viewMode = "text"
301	viewLayer = "dict"
302
303	# safe viewLayer in userinfo
304	userinfo['viewLayer'] = viewLayer
305
306	# pageinfo: information about page (not cached)
307	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
308
309	# get template /template/viewer_$viewMode
310	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
311	if pt is None:
312	logging.error("No template for viewMode=%s!"%viewMode)
313	# TODO: error page?
314	return "No template for viewMode=%s!"%viewMode
315
316	# and execute with parameters
317	return pt(docinfo=docinfo, pageinfo=pageinfo)
318
319	def getAvailableLayers(self):
320	"""returns dict with list of available layers per viewMode"""
321	return self.availableLayers
322
323	def getBrowser(self):
324	"""getBrowser the version of browser """
325	bt = browserCheck(self)
326	logging.debug("BROWSER VERSION: %s"%(bt))
327	return bt
328
329	def findDigilibUrl(self):
330	"""try to get the digilib URL from zogilib"""
331	url = self.template.zogilib.getDLBaseUrl()
332	return url
333
334	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
335	"""returns URL to digilib Scaler with params"""
336	url = None
337	if docinfo is not None:
338	url = docinfo.get('imageURL', None)
339
340	if url is None:
341	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
342	if fn is None and docinfo is not None:
343	fn = docinfo.get('imagePath','')
344
345	url += "fn=%s"%fn
346
347	if pn:
348	url += "&pn=%s"%pn
349
350	url += "&dw=%s&dh=%s"%(dw,dh)
351	return url
352
353	def getDocumentViewerURL(self):
354	"""returns the URL of this instance"""
355	return self.absolute_url()
356
357	def getStyle(self, idx, selected, style=""):
358	"""returns a string with the given style and append 'sel' if idx == selected."""
359	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
360	if idx == selected:
361	return style + 'sel'
362	else:
363	return style
364
365	def getParams(self, param=None, val=None, params=None, duplicates=None):
366	"""returns dict with URL parameters.
367
368	Takes URL parameters and additionally param=val or dict params.
369	Deletes key if value is None."""
370	# copy existing request params
371	newParams=self.REQUEST.form.copy()
372	# change single param
373	if param is not None:
374	if val is None:
375	if newParams.has_key(param):
376	del newParams[param]
377	else:
378	newParams[param] = str(val)
379
380	# change more params
381	if params is not None:
382	for (k, v) in params.items():
383	if v is None:
384	# val=None removes param
385	if newParams.has_key(k):
386	del newParams[k]
387
388	else:
389	newParams[k] = v
390
391	if duplicates:
392	# eliminate lists (coming from duplicate keys)
393	for (k,v) in newParams.items():
394	if isinstance(v, list):
395	if duplicates == 'comma':
396	# make comma-separated list of non-empty entries
397	newParams[k] = ','.join([t for t in v if t])
398	elif duplicates == 'first':
399	# take first non-empty entry
400	newParams[k] = [t for t in v if t][0]
401
402	return newParams
403
404	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
405	"""returns URL to documentviewer with parameter param set to val or from dict params"""
406	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
407	# quote values and assemble into query string (not escaping '/')
408	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
409	if baseUrl is None:
410	baseUrl = self.getDocumentViewerURL()
411
412	url = "%s?%s"%(baseUrl, ps)
413	return url
414
415	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
416	"""link to documentviewer with parameter param set to val"""
417	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
418
419
420	def setAvailableLayers(self, newLayerString=None):
421	"""sets availableLayers to newLayerString or tries to autodetect available layers.
422	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
423	newLayerString is parsed as JSON."""
424	if newLayerString is not None:
425	try:
426	layers = json.loads(newLayerString)
427	if 'text' in layers and 'images' in layers:
428	self.availableLayers = layers
429	return
430	except:
431	pass
432
433	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
434
435	# start with builtin layers
436	self.availableLayers = self.builtinLayers.copy()
437	# add layers from templates
438	for t in self.template:
439	if t.startswith('layer_'):
440	try:
441	(x, m, l) = t.split('_', 3)
442	if m not in self.availableLayers:
443	# mode m doesn't exist -> new list
444	self.availableLayers[m] = [l]
445
446	else:
447	# m exists -> append
448	if l not in self.availableLayers[m]:
449	self.availableLayers[m].append()
450
451	except:
452	pass
453
454	def getAvailableLayersJson(self):
455	"""returns available layers as JSON string."""
456	return json.dumps(self.availableLayers)
457
458
459	def getInfo_xml(self,url,mode):
460	"""returns info about the document as XML"""
461	if not self.digilibBaseUrl:
462	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
463
464	docinfo = self.getDocinfo(mode=mode,url=url)
465	pt = getattr(self.template, 'info_xml')
466	return pt(docinfo=docinfo)
467
468	def getAuthenticatedUser(self, anon=None):
469	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
470	user = getSecurityManager().getUser()
471	if user is not None and user.getUserName() != "Anonymous User":
472	return user
473	else:
474	return anon
475
476	def isAccessible(self, docinfo):
477	"""returns if access to the resource is granted"""
478	access = docinfo.get('accessType', None)
479	logging.debug("documentViewer (accessOK) access type %s"%access)
480	if access == 'free':
481	logging.debug("documentViewer (accessOK) access is free")
482	return True
483
484	elif access is None or access in self.authgroups:
485	# only local access -- only logged in users
486	user = self.getAuthenticatedUser()
487	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
488	return (user is not None)
489
490	logging.error("documentViewer (accessOK) unknown access type %s"%access)
491	return False
492
493
494	def getUserinfo(self):
495	"""returns userinfo object"""
496	logging.debug("getUserinfo")
497	userinfo = {}
498	# look for cached userinfo in session
499	if self.REQUEST.SESSION.has_key('userinfo'):
500	userinfo = self.REQUEST.SESSION['userinfo']
501	# check if its still current?
502	else:
503	# store in session
504	self.REQUEST.SESSION['userinfo'] = userinfo
505
506	return userinfo
507
508	def getDocinfo(self, mode, url, tocMode=None):
509	"""returns docinfo depending on mode"""
510	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
511	# look for cached docinfo in session
512	if self.REQUEST.SESSION.has_key('docinfo'):
513	docinfo = self.REQUEST.SESSION['docinfo']
514	# check if its still current
515	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
516	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
517	return docinfo
518
519	# new docinfo
520	docinfo = {'mode': mode, 'url': url}
521	# add self url
522	docinfo['viewerUrl'] = self.getDocumentViewerURL()
523	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
524	# get index.meta DOM
525	docUrl = None
526	metaDom = None
527	if mode=="texttool":
528	# url points to document dir or index.meta
529	metaDom = self.metadataService.getDomFromPathOrUrl(url)
530	docUrl = url.replace('/index.meta', '')
531	if metaDom is None:
532	raise IOError("Unable to find index.meta for mode=texttool!")
533
534	elif mode=="imagepath":
535	# url points to folder with images, index.meta optional
536	# asssume index.meta in parent dir
537	docUrl = getParentPath(url)
538	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
539
540	elif mode=="filepath":
541	# url points to image file, index.meta optional
542	# asssume index.meta is two path segments up
543	docUrl = getParentPath(url, 2)
544	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
545
546	else:
547	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
548	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
549
550	docinfo['documentUrl'] = docUrl
551	# process index.meta contents
552	if metaDom is not None and metaDom.tag == 'resource':
553	# document directory name and path
554	resource = self.metadataService.getResourceData(dom=metaDom)
555	if resource:
556	docinfo = self.getDocinfoFromResource(docinfo, resource)
557
558	# texttool info
559	texttool = self.metadataService.getTexttoolData(dom=metaDom)
560	if texttool:
561	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
562	# document info (including toc) from full text
563	if docinfo.get('textURLPath', None):
564	docinfo = self.getTextInfo(mode=tocMode, docinfo=docinfo)
565
566	# bib info
567	bib = self.metadataService.getBibData(dom=metaDom)
568	if bib:
569	# save extended version as 'bibx'
570	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
571	if len(bibx) == 1:
572	# unwrap list if possible
573	bibx = bibx[0]
574
575	docinfo['bibx'] = bibx
576	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
577	else:
578	# no bib - try info.xml
579	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
580
581	# auth info
582	access = self.metadataService.getAccessData(dom=metaDom)
583	if access:
584	docinfo = self.getDocinfoFromAccess(docinfo, access)
585
586	# attribution info
587	attribution = self.metadataService.getAttributionData(dom=metaDom)
588	if attribution:
589	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
590	docinfo['attribution'] = attribution
591
592	# copyright info
593	copyright = self.metadataService.getCopyrightData(dom=metaDom)
594	if copyright:
595	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
596	docinfo['copyright'] = copyright
597
598	# DRI (permanent ID)
599	dri = self.metadataService.getDRI(dom=metaDom, type='escidoc-test')
600	if dri:
601	logging.debug("getDRI: dri=%s"%repr(dri))
602	docinfo['DRI'] = dri
603
604	# image path
605	if mode != 'texttool':
606	# override image path from texttool with url parameter TODO: how about mode=auto?
607	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
608
609	# number of images from digilib
610	if docinfo.get('imagePath', None):
611	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
612	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
613	else:
614	# imagePath still missing? try "./pageimg"
615	imgPath = os.path.join(docUrl, 'pageimg')
616	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
617	if docinfo.get('numPages', 0) > 0:
618	# there are pages
619	docinfo['imagePath'] = imgPath
620	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
621
622	# check numPages
623	if docinfo.get('numPages', 0) == 0:
624	if docinfo.get('numTextPages', 0) > 0:
625	# replace with numTextPages (text-only?)
626	docinfo['numPages'] = docinfo['numTextPages']
627
628	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
629	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
630	# store in session
631	self.REQUEST.SESSION['docinfo'] = docinfo
632	return docinfo
633
634
635	def getDocinfoFromResource(self, docinfo, resource):
636	"""reads contents of resource element into docinfo"""
637	docName = resource.get('name', None)
638	docinfo['documentName'] = docName
639	docPath = resource.get('archive-path', None)
640	if docPath:
641	# clean up document path
642	if docPath[0] != '/':
643	docPath = '/' + docPath
644
645	if docName and (not docPath.endswith(docName)):
646	docPath += "/" + docName
647
648	else:
649	# use docUrl as docPath
650	docUrl = docinfo['documentURL']
651	if not docUrl.startswith('http:'):
652	docPath = docUrl
653	if docPath:
654	# fix URLs starting with /mpiwg/online
655	docPath = docPath.replace('/mpiwg/online', '', 1)
656
657	docinfo['documentPath'] = docPath
658	return docinfo
659
660	def getDocinfoFromTexttool(self, docinfo, texttool):
661	"""reads contents of texttool element into docinfo"""
662	# image dir
663	imageDir = texttool.get('image', None)
664	docPath = docinfo.get('documentPath', None)
665	if imageDir and docPath:
666	#print "image: ", imageDir, " archivepath: ", archivePath
667	imageDir = os.path.join(docPath, imageDir)
668	imageDir = imageDir.replace('/mpiwg/online', '', 1)
669	docinfo['imagePath'] = imageDir
670
671	# old style text URL
672	textUrl = texttool.get('text', None)
673	if textUrl and docPath:
674	if urlparse.urlparse(textUrl)[0] == "": #keine url
675	textUrl = os.path.join(docPath, textUrl)
676
677	docinfo['textURL'] = textUrl
678
679	# new style text-url-path
680	textUrl = texttool.get('text-url-path', None)
681	if textUrl:
682	docinfo['textURLPath'] = textUrl
683
684	# page flow
685	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
686
687	# odd pages are left
688	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
689
690	# number of title page (default 1)
691	docinfo['titlePage'] = texttool.get('title-scan-no', 1)
692
693	# old presentation stuff
694	presentation = texttool.get('presentation', None)
695	if presentation and docPath:
696	if presentation.startswith('http:'):
697	docinfo['presentationUrl'] = presentation
698	else:
699	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
700
701	return docinfo
702
703	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
704	"""reads contents of bib element into docinfo"""
705	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
706	# put all raw bib fields in dict "bib"
707	docinfo['bib'] = bib
708	bibtype = bib.get('@type', None)
709	docinfo['bibType'] = bibtype
710	# also store DC metadata for convenience
711	dc = self.metadataService.getDCMappedData(bib)
712	docinfo['creator'] = dc.get('creator','')
713	docinfo['title'] = dc.get('title','')
714	docinfo['date'] = dc.get('date','')
715	return docinfo
716
717	def getDocinfoFromAccess(self, docinfo, acc):
718	"""reads contents of access element into docinfo"""
719	#TODO: also read resource type
720	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
721	try:
722	acctype = acc['@attr']['type']
723	if acctype:
724	access=acctype
725	if access in ['group', 'institution']:
726	access = acc['name'].lower()
727
728	docinfo['accessType'] = access
729
730	except:
731	pass
732
733	return docinfo
734
735	def getDocinfoFromDigilib(self, docinfo, path):
736	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
737	# fetch data
738	txt = getHttpData(infoUrl)
739	if not txt:
740	logging.error("Unable to get dir-info from %s"%(infoUrl))
741	return docinfo
742
743	dom = ET.fromstring(txt)
744	size = getText(dom.find("size"))
745	logging.debug("getDocinfoFromDigilib: size=%s"%size)
746	if size:
747	docinfo['numPages'] = int(size)
748	else:
749	docinfo['numPages'] = 0
750
751	# TODO: produce and keep list of image names and numbers
752	return docinfo
753
754
755	def getDocinfoFromPresentationInfoXml(self,docinfo):
756	"""gets DC-like bibliographical information from the presentation entry in texttools"""
757	url = docinfo.get('presentationUrl', None)
758	if not url:
759	logging.error("getDocinfoFromPresentation: no URL!")
760	return docinfo
761
762	dom = None
763	metaUrl = None
764	if url.startswith("http://"):
765	# real URL
766	metaUrl = url
767	else:
768	# online path
769
770	server=self.digilibBaseUrl+"/servlet/Texter?fn="
771	metaUrl=server+url
772
773	txt=getHttpData(metaUrl)
774	if txt is None:
775	logging.error("Unable to read info.xml from %s"%(url))
776	return docinfo
777
778	dom = ET.fromstring(txt)
779	docinfo['creator']=getText(dom.find(".//author"))
780	docinfo['title']=getText(dom.find(".//title"))
781	docinfo['date']=getText(dom.find(".//date"))
782	return docinfo
783
784
785	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
786	"""returns pageinfo with the given parameters"""
787	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
788	pageinfo = {}
789	pageinfo['viewMode'] = viewMode
790	# split viewLayer if necessary
791	if isinstance(viewLayer,basestring):
792	viewLayer = viewLayer.split(',')
793
794	if isinstance(viewLayer, list):
795	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
796	# save (unique) list in viewLayers
797	seen = set()
798	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
799	pageinfo['viewLayers'] = viewLayers
800	# stringify viewLayer
801	viewLayer = ','.join(viewLayers)
802	else:
803	#create list
804	pageinfo['viewLayers'] = [viewLayer]
805
806	pageinfo['viewLayer'] = viewLayer
807	pageinfo['tocMode'] = tocMode
808
809	# TODO: unify current and pn!
810	current = getInt(current)
811	pageinfo['current'] = current
812	pageinfo['pn'] = current
813	rows = int(rows or self.thumbrows)
814	pageinfo['rows'] = rows
815	cols = int(cols or self.thumbcols)
816	pageinfo['cols'] = cols
817	grpsize = cols * rows
818	pageinfo['groupsize'] = grpsize
819	# is start is empty use one around current
820	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
821	# int(current / grpsize) * grpsize +1))
822	pageinfo['start'] = start
823	# get number of pages
824	np = int(docinfo.get('numPages', 0))
825	if np == 0:
826	# try numTextPages
827	np = docinfo.get('numTextPages', 0)
828	if np != 0:
829	docinfo['numPages'] = np
830
831	# cache table of contents
832	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
833	pageinfo['numgroups'] = int(np / grpsize)
834	if np % grpsize > 0:
835	pageinfo['numgroups'] += 1
836
837	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
838	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
839	# add zeroth page for two columns
840	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
841	pageinfo['pageZero'] = pageZero
842	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
843	# more page parameters
844	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
845	if docinfo.get('pageNumbers'):
846	# get original page numbers
847	pageNumber = docinfo['pageNumbers'].get(current, None)
848	if pageNumber is not None:
849	pageinfo['pageNumberOrig'] = pageNumber['no']
850	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
851
852	# cache search results
853	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
854	query = self.REQUEST.get('query',None)
855	pageinfo['query'] = query
856	if query:
857	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
858	pageinfo['queryType'] = queryType
859	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
860	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
861
862	# highlighting
863	highlightQuery = self.REQUEST.get('highlightQuery', None)
864	if highlightQuery:
865	pageinfo['highlightQuery'] = highlightQuery
866	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
867	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
868
869	return pageinfo
870
871
872	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
873	"""returns dict with array of page informations for one screenfull of thumbnails"""
874	batch = {}
875	grpsize = rows * cols
876	if maxIdx == 0:
877	maxIdx = start + grpsize
878
879	nb = int(math.ceil(maxIdx / float(grpsize)))
880	# list of all batch start and end points
881	batches = []
882	if pageZero:
883	ofs = 0
884	else:
885	ofs = 1
886
887	for i in range(nb):
888	s = i * grpsize + ofs
889	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
890	batches.append({'start':s, 'end':e})
891
892	batch['batches'] = batches
893
894	pages = []
895	if pageZero and start == 1:
896	# correct beginning
897	idx = 0
898	else:
899	idx = start
900
901	for r in range(rows):
902	row = []
903	for c in range(cols):
904	if idx < minIdx or idx > maxIdx:
905	page = {'idx':None}
906	else:
907	page = {'idx':idx}
908
909	idx += 1
910	if pageFlowLtr:
911	row.append(page)
912	else:
913	row.insert(0, page)
914
915	pages.append(row)
916
917	if start > 1:
918	batch['prevStart'] = max(start - grpsize, 1)
919	else:
920	batch['prevStart'] = None
921
922	if start + grpsize <= maxIdx:
923	batch['nextStart'] = start + grpsize
924	else:
925	batch['nextStart'] = None
926
927	batch['pages'] = pages
928	return batch
929
930	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
931	"""returns dict with information for one screenfull of data."""
932	batch = {}
933	if end == 0:
934	end = start + size
935
936	nb = int(math.ceil(end / float(size)))
937	# list of all batch start and end points
938	batches = []
939	for i in range(nb):
940	s = i * size + 1
941	e = min((i + 1) * size, end)
942	batches.append({'start':s, 'end':e})
943
944	batch['batches'] = batches
945	# list of elements in this batch
946	this = []
947	j = 0
948	for i in range(start, min(start+size, end+1)):
949	if data:
950	if fullData:
951	d = data.get(i, None)
952	else:
953	d = data.get(j, None)
954	j += 1
955
956	else:
957	d = i+1
958
959	this.append(d)
960
961	batch['this'] = this
962	if start > 1:
963	batch['prevStart'] = max(start - size, 1)
964	else:
965	batch['prevStart'] = None
966
967	if start + size < end:
968	batch['nextStart'] = start + size
969	else:
970	batch['nextStart'] = None
971
972	logging.debug("getBatch start=%s size=%s end=%s batch=%s"%(start,size,end,repr(batch)))
973	return batch
974
975
976	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
977	"""returns list of groups {name:, id:} on the annotation server for the user"""
978	groups = []
979	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
980	data = getHttpData(url=groupsUrl, noExceptions=True)
981	if data:
982	res = json.loads(data)
983	rows = res.get('rows', None)
984	if rows is None:
985	return groups
986	for r in rows:
987	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
988
989	return groups
990
991
992	security.declareProtected('View management screens','changeDocumentViewerForm')
993	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
994
995	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
996	"""init document viewer"""
997	self.title=title
998	self.digilibBaseUrl = digilibBaseUrl
999	self.thumbrows = thumbrows
1000	self.thumbcols = thumbcols
1001	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1002	try:
1003	# assume MetaDataFolder instance is called metadata
1004	self.metadataService = getattr(self, 'metadata')
1005	except Exception, e:
1006	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1007
1008	self.setAvailableLayers(availableLayers)
1009
1010	if RESPONSE is not None:
1011	RESPONSE.redirect('manage_main')
1012
1013	def manage_AddDocumentViewerForm(self):
1014	"""add the viewer form"""
1015	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1016	return pt()
1017
1018	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1019	"""add the viewer"""
1020	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1021	self._setObject(id,newObj)
1022
1023	if RESPONSE is not None:
1024	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: