Context Navigation

source: documentViewer/documentViewer.py @ 558:6ab436383fca

Last change on this file since 558:6ab436383fca was 558:6ab436383fca, checked in by casties, 12 years ago
first step to layers for index view.
File size: 40.1 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def browserCheck(self):
37	"""check the browsers request to find out the browser type"""
38	bt = {}
39	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
40	bt['ua'] = ua
41	bt['isIE'] = False
42	bt['isN4'] = False
43	bt['versFirefox']=""
44	bt['versIE']=""
45	bt['versSafariChrome']=""
46	bt['versOpera']=""
47
48	if string.find(ua, 'MSIE') > -1:
49	bt['isIE'] = True
50	else:
51	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
52	# Safari oder Chrome identification
53	try:
54	nav = ua[string.find(ua, '('):]
55	nav1=ua[string.find(ua,')'):]
56	nav2=nav1[string.find(nav1,'('):]
57	nav3=nav2[string.find(nav2,')'):]
58	ie = string.split(nav, "; ")[1]
59	ie1 =string.split(nav1, " ")[2]
60	ie2 =string.split(nav3, " ")[1]
61	ie3 =string.split(nav3, " ")[2]
62	if string.find(ie3, "Safari") >-1:
63	bt['versSafariChrome']=string.split(ie2, "/")[1]
64	except: pass
65	# IE identification
66	try:
67	nav = ua[string.find(ua, '('):]
68	ie = string.split(nav, "; ")[1]
69	if string.find(ie, "MSIE") > -1:
70	bt['versIE'] = string.split(ie, " ")[1]
71	except:pass
72	# Firefox identification
73	try:
74	nav = ua[string.find(ua, '('):]
75	nav1=ua[string.find(ua,')'):]
76	if string.find(ie1, "Firefox") >-1:
77	nav5= string.split(ie1, "/")[1]
78	logging.debug("FIREFOX: %s"%(nav5))
79	bt['versFirefox']=nav5[0:3]
80	except:pass
81	#Opera identification
82	try:
83	if string.find(ua,"Opera") >-1:
84	nav = ua[string.find(ua, '('):]
85	nav1=nav[string.find(nav,')'):]
86	bt['versOpera']=string.split(nav1,"/")[2]
87	except:pass
88
89	bt['isMac'] = string.find(ua, 'Macintosh') > -1
90	bt['isWin'] = string.find(ua, 'Windows') > -1
91	bt['isIEWin'] = bt['isIE'] and bt['isWin']
92	bt['isIEMac'] = bt['isIE'] and bt['isMac']
93	bt['staticHTML'] = False
94
95	return bt
96
97	def getParentPath(path, cnt=1):
98	"""returns pathname shortened by cnt"""
99	# make sure path doesn't end with /
100	path = path.rstrip('/')
101	# split by /, shorten, and reassemble
102	return '/'.join(path.split('/')[0:-cnt])
103
104	##
105	## documentViewer class
106	##
107	class documentViewer(Folder):
108	"""document viewer"""
109	meta_type="Document viewer"
110
111	security=ClassSecurityInfo()
112	manage_options=Folder.manage_options+(
113	{'label':'Configuration','action':'changeDocumentViewerForm'},
114	)
115
116	metadataService = None
117	"""MetaDataFolder instance"""
118
119
120	#
121	# templates and forms
122	#
123	# viewMode templates
124	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
125	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
126	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
127	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
128	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
129	# available layer types (annotator not default)
130	builtinLayers = {'text': ['dict','search','gis'],
131	'xml': None, 'images': None, 'index': ['extended']}
132	availableLayers = builtinLayers;
133	# layer templates
134	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
135	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
136	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
137	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
138	layer_text_pundit = PageTemplateFile('zpt/layer_text_pundit', globals())
139	# toc templates
140	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
141	toc_text = PageTemplateFile('zpt/toc_text', globals())
142	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
143	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
144	toc_none = PageTemplateFile('zpt/toc_none', globals())
145	# other templates
146	common_template = PageTemplateFile('zpt/common_template', globals())
147	info_xml = PageTemplateFile('zpt/info_xml', globals())
148	docuviewer_css = ImageFile('css/docuviewer.css',globals())
149	# make docuviewer_css refreshable for development
150	docuviewer_css.index_html = refreshingImageFileIndexHtml
151	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
152	# make docuviewer_ie_css refreshable for development
153	docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
154	jquery_js = ImageFile('js/jquery.js',globals())
155
156
157	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
158	"""init document viewer"""
159	self.id=id
160	self.title=title
161	self.thumbcols = thumbcols
162	self.thumbrows = thumbrows
163	# authgroups is list of authorized groups (delimited by ,)
164	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
165	# create template folder so we can always use template.something
166
167	templateFolder = Folder('template')
168	self['template'] = templateFolder # Zope-2.12 style
169	#self._setObject('template',templateFolder) # old style
170	try:
171	import MpdlXmlTextServer
172	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
173	templateFolder['fulltextclient'] = textServer
174	#templateFolder._setObject('fulltextclient',textServer)
175	except Exception, e:
176	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
177
178	try:
179	from Products.zogiLib.zogiLib import zogiLib
180	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
181	templateFolder['zogilib'] = zogilib
182	#templateFolder._setObject('zogilib',zogilib)
183	except Exception, e:
184	logging.error("Unable to create zogiLib for zogilib: "+str(e))
185
186	try:
187	# assume MetaDataFolder instance is called metadata
188	self.metadataService = getattr(self, 'metadata')
189	except Exception, e:
190	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
191
192	if digilibBaseUrl is not None:
193	self.digilibBaseUrl = digilibBaseUrl
194
195
196	# proxy text server methods to fulltextclient
197	def getTextPage(self, **args):
198	"""returns full text content of page"""
199	return self.template.fulltextclient.getTextPage(**args)
200
201	def getSearchResults(self, **args):
202	"""loads list of search results and stores XML in docinfo"""
203	return self.template.fulltextclient.getSearchResults(**args)
204
205	def getResultsPage(self, **args):
206	"""returns one page of the search results"""
207	return self.template.fulltextclient.getResultsPage(**args)
208
209	def getTextInfo(self, **args):
210	"""returns document info from the text server"""
211	return self.template.fulltextclient.getTextInfo(**args)
212
213	def getToc(self, **args):
214	"""loads table of contents and stores XML in docinfo"""
215	return self.template.fulltextclient.getToc(**args)
216
217	def getTocPage(self, **args):
218	"""returns one page of the table of contents"""
219	return self.template.fulltextclient.getTocPage(**args)
220
221	def getPlacesOnPage(self, **args):
222	"""get list of gis places on one page"""
223	return self.template.fulltextclient.getPlacesOnPage(**args)
224
225	# Thumb list for CoolIris Plugin
226	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
227	security.declareProtected('View','thumbs_rss')
228	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
229	'''
230	view it
231	@param mode: defines how to access the document behind url
232	@param url: url which contains display information
233	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
234
235	'''
236
237	if not hasattr(self, 'template'):
238	# create template folder if it doesn't exist
239	self.manage_addFolder('template')
240
241	if not self.digilibBaseUrl:
242	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
243
244	docinfo = self.getDocinfo(mode=mode,url=url)
245	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
246	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
247	''' ZDES '''
248	pt = getattr(self.template, 'thumbs_main_rss')
249
250	if viewMode=="auto": # automodus gewaehlt
251	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
252	viewMode="text"
253	else:
254	viewMode="images"
255
256	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
257
258
259	security.declareProtected('View','index_html')
260	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode=None,start=1,pn=1):
261	"""
262	show page
263	@param url: url which contains display information
264	@param mode: defines how to access the document behind url
265	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
266	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
267	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
268	"""
269
270	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
271
272	if not hasattr(self, 'template'):
273	# this won't work
274	logging.error("template folder missing!")
275	return "ERROR: template folder missing!"
276
277	if not getattr(self, 'digilibBaseUrl', None):
278	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
279
280	# mode=filepath should not have toc-thumbs
281	if tocMode is None:
282	if mode == "filepath":
283	tocMode = "none"
284	else:
285	tocMode = "thumbs"
286
287	# docinfo: information about document (cached)
288	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
289
290	# userinfo: user settings (cached)
291	userinfo = self.getUserinfo()
292
293	# auto viewMode: text if there is a text else images
294	if viewMode=="auto":
295	if docinfo.get('textURLPath', None):
296	# docinfo.get('textURL', None) not implemented yet
297	viewMode = "text"
298	if viewLayer is None and 'viewLayer' not in userinfo:
299	# use layer dict as default
300	viewLayer = "dict"
301	else:
302	viewMode = "images"
303
304	elif viewMode == "text_dict":
305	# legacy fix
306	viewMode = "text"
307	viewLayer = "dict"
308
309	# safe viewLayer in userinfo
310	userinfo['viewLayer'] = viewLayer
311
312	# pageinfo: information about page (not cached)
313	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
314
315	# get template /template/viewer_$viewMode
316	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
317	if pt is None:
318	logging.error("No template for viewMode=%s!"%viewMode)
319	# TODO: error page?
320	return "No template for viewMode=%s!"%viewMode
321
322	# and execute with parameters
323	return pt(docinfo=docinfo, pageinfo=pageinfo)
324
325	def getAvailableLayers(self):
326	"""returns dict with list of available layers per viewMode"""
327	return self.availableLayers
328
329	def getBrowser(self):
330	"""getBrowser the version of browser """
331	bt = browserCheck(self)
332	logging.debug("BROWSER VERSION: %s"%(bt))
333	return bt
334
335	def findDigilibUrl(self):
336	"""try to get the digilib URL from zogilib"""
337	url = self.template.zogilib.getDLBaseUrl()
338	return url
339
340	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
341	"""returns URL to digilib Scaler with params"""
342	url = None
343	if docinfo is not None:
344	url = docinfo.get('imageURL', None)
345
346	if url is None:
347	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
348	if fn is None and docinfo is not None:
349	fn = docinfo.get('imagePath','')
350
351	url += "fn=%s"%fn
352
353	if pn:
354	url += "&pn=%s"%pn
355
356	url += "&dw=%s&dh=%s"%(dw,dh)
357	return url
358
359	def getDocumentViewerURL(self):
360	"""returns the URL of this instance"""
361	return self.absolute_url()
362
363	def getStyle(self, idx, selected, style=""):
364	"""returns a string with the given style and append 'sel' if idx == selected."""
365	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
366	if idx == selected:
367	return style + 'sel'
368	else:
369	return style
370
371	def getParams(self, param=None, val=None, params=None, duplicates=None):
372	"""returns dict with URL parameters.
373
374	Takes URL parameters and additionally param=val or dict params.
375	Deletes key if value is None."""
376	# copy existing request params
377	newParams=self.REQUEST.form.copy()
378	# change single param
379	if param is not None:
380	if val is None:
381	if newParams.has_key(param):
382	del newParams[param]
383	else:
384	newParams[param] = str(val)
385
386	# change more params
387	if params is not None:
388	for (k, v) in params.items():
389	if v is None:
390	# val=None removes param
391	if newParams.has_key(k):
392	del newParams[k]
393
394	else:
395	newParams[k] = v
396
397	if duplicates:
398	# eliminate lists (coming from duplicate keys)
399	for (k,v) in newParams.items():
400	if isinstance(v, list):
401	if duplicates == 'comma':
402	# make comma-separated list of non-empty entries
403	newParams[k] = ','.join([t for t in v if t])
404	elif duplicates == 'first':
405	# take first non-empty entry
406	newParams[k] = [t for t in v if t][0]
407
408	return newParams
409
410	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
411	"""returns URL to documentviewer with parameter param set to val or from dict params"""
412	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
413	# quote values and assemble into query string (not escaping '/')
414	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
415	if baseUrl is None:
416	baseUrl = self.getDocumentViewerURL()
417
418	url = "%s?%s"%(baseUrl, ps)
419	return url
420
421	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
422	"""link to documentviewer with parameter param set to val"""
423	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
424
425
426	def setAvailableLayers(self, newLayerString=None):
427	"""sets availableLayers to newLayerString or tries to autodetect available layers.
428	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
429	newLayerString is parsed as JSON."""
430	if newLayerString is not None:
431	try:
432	layers = json.loads(newLayerString)
433	if 'text' in layers and 'images' in layers:
434	self.availableLayers = layers
435	return
436	except:
437	pass
438
439	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
440
441	# start with builtin layers
442	self.availableLayers = self.builtinLayers.copy()
443	# add layers from templates
444	for t in self.template:
445	if t.startswith('layer_'):
446	try:
447	(x, m, l) = t.split('_', 3)
448	if m not in self.availableLayers:
449	# mode m doesn't exist -> new list
450	self.availableLayers[m] = [l]
451
452	else:
453	# m exists -> append
454	if l not in self.availableLayers[m]:
455	self.availableLayers[m].append()
456
457	except:
458	pass
459
460	def getAvailableLayersJson(self):
461	"""returns available layers as JSON string."""
462	return json.dumps(self.availableLayers)
463
464
465	def getInfo_xml(self,url,mode):
466	"""returns info about the document as XML"""
467	if not self.digilibBaseUrl:
468	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
469
470	docinfo = self.getDocinfo(mode=mode,url=url)
471	pt = getattr(self.template, 'info_xml')
472	return pt(docinfo=docinfo)
473
474	def getAuthenticatedUser(self, anon=None):
475	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
476	user = getSecurityManager().getUser()
477	if user is not None and user.getUserName() != "Anonymous User":
478	return user
479	else:
480	return anon
481
482	def isAccessible(self, docinfo):
483	"""returns if access to the resource is granted"""
484	access = docinfo.get('accessType', None)
485	logging.debug("documentViewer (accessOK) access type %s"%access)
486	if access == 'free':
487	logging.debug("documentViewer (accessOK) access is free")
488	return True
489
490	elif access is None or access in self.authgroups:
491	# only local access -- only logged in users
492	user = self.getAuthenticatedUser()
493	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
494	return (user is not None)
495
496	logging.error("documentViewer (accessOK) unknown access type %s"%access)
497	return False
498
499
500	def getUserinfo(self):
501	"""returns userinfo object"""
502	logging.debug("getUserinfo")
503	userinfo = {}
504	# look for cached userinfo in session
505	if self.REQUEST.SESSION.has_key('userinfo'):
506	userinfo = self.REQUEST.SESSION['userinfo']
507	# check if its still current?
508	else:
509	# store in session
510	self.REQUEST.SESSION['userinfo'] = userinfo
511
512	return userinfo
513
514	def getDocinfo(self, mode, url, tocMode=None):
515	"""returns docinfo depending on mode"""
516	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
517	# look for cached docinfo in session
518	if self.REQUEST.SESSION.has_key('docinfo'):
519	docinfo = self.REQUEST.SESSION['docinfo']
520	# check if its still current
521	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
522	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
523	return docinfo
524
525	# new docinfo
526	docinfo = {'mode': mode, 'url': url}
527	# add self url
528	docinfo['viewerUrl'] = self.getDocumentViewerURL()
529	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
530	# get index.meta DOM
531	docUrl = None
532	metaDom = None
533	if mode=="texttool":
534	# url points to document dir or index.meta
535	metaDom = self.metadataService.getDomFromPathOrUrl(url)
536	docUrl = url.replace('/index.meta', '')
537	if metaDom is None:
538	raise IOError("Unable to find index.meta for mode=texttool!")
539
540	elif mode=="imagepath":
541	# url points to folder with images, index.meta optional
542	# asssume index.meta in parent dir
543	docUrl = getParentPath(url)
544	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
545
546	elif mode=="filepath":
547	# url points to image file, index.meta optional
548	docinfo['imagePath'] = url
549	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + url
550	docinfo['numPages'] = 1
551	# asssume index.meta is two path segments up
552	docUrl = getParentPath(url, 2)
553	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
554
555	else:
556	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
557	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
558
559	docinfo['documentUrl'] = docUrl
560	# process index.meta contents
561	if metaDom is not None and metaDom.tag == 'resource':
562	# document directory name and path
563	resource = self.metadataService.getResourceData(dom=metaDom)
564	if resource:
565	docinfo = self.getDocinfoFromResource(docinfo, resource)
566
567	# texttool info
568	texttool = self.metadataService.getTexttoolData(dom=metaDom)
569	if texttool:
570	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
571	# document info (including toc) from full text
572	if docinfo.get('textURLPath', None):
573	docinfo = self.getTextInfo(mode=tocMode, docinfo=docinfo)
574
575	# bib info
576	bib = self.metadataService.getBibData(dom=metaDom)
577	if bib:
578	# save extended version as 'bibx' TODO: ugly
579	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
580	if len(bibx) == 1:
581	# unwrap list if possible
582	bibx = bibx[0]
583
584	docinfo['bibx'] = bibx
585	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
586	else:
587	# no bib - try info.xml
588	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
589
590	# auth info
591	access = self.metadataService.getAccessData(dom=metaDom)
592	if access:
593	docinfo = self.getDocinfoFromAccess(docinfo, access)
594
595	# attribution info
596	attribution = self.metadataService.getAttributionData(dom=metaDom)
597	if attribution:
598	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
599	docinfo['attribution'] = attribution
600
601	# copyright info
602	copyright = self.metadataService.getCopyrightData(dom=metaDom)
603	if copyright:
604	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
605	docinfo['copyright'] = copyright
606
607	# DRI (permanent ID)
608	dri = self.metadataService.getDRI(dom=metaDom, type='escidoc-test')
609	if dri:
610	logging.debug("getDRI: dri=%s"%repr(dri))
611	docinfo['DRI'] = dri
612
613	# image path
614	if mode != 'texttool':
615	# override image path from texttool with url parameter TODO: how about mode=auto?
616	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
617
618	# check numPages
619	if docinfo.get('numPages', 0) == 0:
620	# number of images from digilib
621	if docinfo.get('imagePath', None):
622	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
623	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
624	else:
625	# imagePath still missing? try "./pageimg"
626	imgPath = os.path.join(docUrl, 'pageimg')
627	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
628	if docinfo.get('numPages', 0) > 0:
629	# there are pages
630	docinfo['imagePath'] = imgPath
631	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
632
633	# check numPages
634	if docinfo.get('numPages', 0) == 0:
635	if docinfo.get('numTextPages', 0) > 0:
636	# replace with numTextPages (text-only?)
637	docinfo['numPages'] = docinfo['numTextPages']
638
639	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
640	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
641	# store in session
642	self.REQUEST.SESSION['docinfo'] = docinfo
643	return docinfo
644
645
646	def getDocinfoFromResource(self, docinfo, resource):
647	"""reads contents of resource element into docinfo"""
648	docName = resource.get('name', None)
649	docinfo['documentName'] = docName
650	docPath = resource.get('archive-path', None)
651	if docPath:
652	# clean up document path
653	if docPath[0] != '/':
654	docPath = '/' + docPath
655
656	if docName and (not docPath.endswith(docName)):
657	docPath += "/" + docName
658
659	else:
660	# use docUrl as docPath
661	docUrl = docinfo['documentURL']
662	if not docUrl.startswith('http:'):
663	docPath = docUrl
664	if docPath:
665	# fix URLs starting with /mpiwg/online
666	docPath = docPath.replace('/mpiwg/online', '', 1)
667
668	docinfo['documentPath'] = docPath
669	return docinfo
670
671	def getDocinfoFromTexttool(self, docinfo, texttool):
672	"""reads contents of texttool element into docinfo"""
673	# image dir
674	imageDir = texttool.get('image', None)
675	docPath = docinfo.get('documentPath', None)
676	if imageDir and docPath:
677	#print "image: ", imageDir, " archivepath: ", archivePath
678	imageDir = os.path.join(docPath, imageDir)
679	imageDir = imageDir.replace('/mpiwg/online', '', 1)
680	docinfo['imagePath'] = imageDir
681
682	# old style text URL
683	textUrl = texttool.get('text', None)
684	if textUrl and docPath:
685	if urlparse.urlparse(textUrl)[0] == "": #keine url
686	textUrl = os.path.join(docPath, textUrl)
687
688	docinfo['textURL'] = textUrl
689
690	# new style text-url-path
691	textUrl = texttool.get('text-url-path', None)
692	if textUrl:
693	docinfo['textURLPath'] = textUrl
694
695	# page flow
696	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
697
698	# odd pages are left
699	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
700
701	# number of title page (default 1)
702	docinfo['titlePage'] = texttool.get('title-scan-no', 1)
703
704	# old presentation stuff
705	presentation = texttool.get('presentation', None)
706	if presentation and docPath:
707	if presentation.startswith('http:'):
708	docinfo['presentationUrl'] = presentation
709	else:
710	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
711
712	return docinfo
713
714	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
715	"""reads contents of bib element into docinfo"""
716	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
717	# put all raw bib fields in dict "bib"
718	docinfo['bib'] = bib
719	bibtype = bib.get('@type', None)
720	docinfo['bibType'] = bibtype
721	# also store DC metadata for convenience
722	dc = self.metadataService.getDCMappedData(bib)
723	docinfo['creator'] = dc.get('creator','')
724	docinfo['title'] = dc.get('title','')
725	docinfo['date'] = dc.get('date','')
726	return docinfo
727
728	def getDocinfoFromAccess(self, docinfo, acc):
729	"""reads contents of access element into docinfo"""
730	#TODO: also read resource type
731	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
732	try:
733	acctype = acc['@attr']['type']
734	if acctype:
735	access=acctype
736	if access in ['group', 'institution']:
737	access = acc['name'].lower()
738
739	docinfo['accessType'] = access
740
741	except:
742	pass
743
744	return docinfo
745
746	def getDocinfoFromDigilib(self, docinfo, path):
747	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
748	# fetch data
749	txt = getHttpData(infoUrl)
750	if not txt:
751	logging.error("Unable to get dir-info from %s"%(infoUrl))
752	return docinfo
753
754	dom = ET.fromstring(txt)
755	size = getText(dom.find("size"))
756	logging.debug("getDocinfoFromDigilib: size=%s"%size)
757	if size:
758	docinfo['numPages'] = int(size)
759	else:
760	docinfo['numPages'] = 0
761
762	# TODO: produce and keep list of image names and numbers
763	return docinfo
764
765
766	def getDocinfoFromPresentationInfoXml(self,docinfo):
767	"""gets DC-like bibliographical information from the presentation entry in texttools"""
768	url = docinfo.get('presentationUrl', None)
769	if not url:
770	logging.error("getDocinfoFromPresentation: no URL!")
771	return docinfo
772
773	dom = None
774	metaUrl = None
775	if url.startswith("http://"):
776	# real URL
777	metaUrl = url
778	else:
779	# online path
780
781	server=self.digilibBaseUrl+"/servlet/Texter?fn="
782	metaUrl=server+url
783
784	txt=getHttpData(metaUrl)
785	if txt is None:
786	logging.error("Unable to read info.xml from %s"%(url))
787	return docinfo
788
789	dom = ET.fromstring(txt)
790	docinfo['creator']=getText(dom.find(".//author"))
791	docinfo['title']=getText(dom.find(".//title"))
792	docinfo['date']=getText(dom.find(".//date"))
793	return docinfo
794
795
796	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
797	"""returns pageinfo with the given parameters"""
798	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
799	pageinfo = {}
800	pageinfo['viewMode'] = viewMode
801	# split viewLayer if necessary
802	if isinstance(viewLayer,basestring):
803	viewLayer = viewLayer.split(',')
804
805	if isinstance(viewLayer, list):
806	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
807	# save (unique) list in viewLayers
808	seen = set()
809	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
810	pageinfo['viewLayers'] = viewLayers
811	# stringify viewLayer
812	viewLayer = ','.join(viewLayers)
813	else:
814	#create list
815	pageinfo['viewLayers'] = [viewLayer]
816
817	pageinfo['viewLayer'] = viewLayer
818	pageinfo['tocMode'] = tocMode
819
820	# TODO: unify current and pn!
821	current = getInt(current)
822	pageinfo['current'] = current
823	pageinfo['pn'] = current
824	rows = int(rows or self.thumbrows)
825	pageinfo['rows'] = rows
826	cols = int(cols or self.thumbcols)
827	pageinfo['cols'] = cols
828	grpsize = cols * rows
829	pageinfo['groupsize'] = grpsize
830	# is start is empty use one around current
831	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
832	# int(current / grpsize) * grpsize +1))
833	pageinfo['start'] = start
834	# get number of pages
835	np = int(docinfo.get('numPages', 0))
836	if np == 0:
837	# try numTextPages
838	np = docinfo.get('numTextPages', 0)
839	if np != 0:
840	docinfo['numPages'] = np
841
842	# cache table of contents
843	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
844	pageinfo['numgroups'] = int(np / grpsize)
845	if np % grpsize > 0:
846	pageinfo['numgroups'] += 1
847
848	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
849	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
850	# add zeroth page for two columns
851	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
852	pageinfo['pageZero'] = pageZero
853	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
854	# more page parameters
855	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
856	if docinfo.get('pageNumbers'):
857	# get original page numbers
858	pageNumber = docinfo['pageNumbers'].get(current, None)
859	if pageNumber is not None:
860	pageinfo['pageNumberOrig'] = pageNumber['no']
861	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
862
863	# cache search results
864	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
865	query = self.REQUEST.get('query',None)
866	pageinfo['query'] = query
867	if query:
868	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
869	pageinfo['queryType'] = queryType
870	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
871	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
872
873	# highlighting
874	highlightQuery = self.REQUEST.get('highlightQuery', None)
875	if highlightQuery:
876	pageinfo['highlightQuery'] = highlightQuery
877	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
878	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
879
880	return pageinfo
881
882
883	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
884	"""returns dict with array of page informations for one screenfull of thumbnails"""
885	batch = {}
886	grpsize = rows * cols
887	if maxIdx == 0:
888	maxIdx = start + grpsize
889
890	nb = int(math.ceil(maxIdx / float(grpsize)))
891	# list of all batch start and end points
892	batches = []
893	if pageZero:
894	ofs = 0
895	else:
896	ofs = 1
897
898	for i in range(nb):
899	s = i * grpsize + ofs
900	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
901	batches.append({'start':s, 'end':e})
902
903	batch['batches'] = batches
904
905	pages = []
906	if pageZero and start == 1:
907	# correct beginning
908	idx = 0
909	else:
910	idx = start
911
912	for r in range(rows):
913	row = []
914	for c in range(cols):
915	if idx < minIdx or idx > maxIdx:
916	page = {'idx':None}
917	else:
918	page = {'idx':idx}
919
920	idx += 1
921	if pageFlowLtr:
922	row.append(page)
923	else:
924	row.insert(0, page)
925
926	pages.append(row)
927
928	if start > 1:
929	batch['prevStart'] = max(start - grpsize, 1)
930	else:
931	batch['prevStart'] = None
932
933	if start + grpsize <= maxIdx:
934	batch['nextStart'] = start + grpsize
935	else:
936	batch['nextStart'] = None
937
938	batch['pages'] = pages
939	return batch
940
941	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
942	"""returns dict with information for one screenfull of data."""
943	batch = {}
944	if end == 0:
945	end = start + size
946
947	nb = int(math.ceil(end / float(size)))
948	# list of all batch start and end points
949	batches = []
950	for i in range(nb):
951	s = i * size + 1
952	e = min((i + 1) * size, end)
953	batches.append({'start':s, 'end':e})
954
955	batch['batches'] = batches
956	# list of elements in this batch
957	this = []
958	j = 0
959	for i in range(start, min(start+size, end+1)):
960	if data:
961	if fullData:
962	d = data.get(i, None)
963	else:
964	d = data.get(j, None)
965	j += 1
966
967	else:
968	d = i+1
969
970	this.append(d)
971
972	batch['this'] = this
973	if start > 1:
974	batch['prevStart'] = max(start - size, 1)
975	else:
976	batch['prevStart'] = None
977
978	if start + size < end:
979	batch['nextStart'] = start + size
980	else:
981	batch['nextStart'] = None
982
983	logging.debug("getBatch start=%s size=%s end=%s batch=%s"%(start,size,end,repr(batch)))
984	return batch
985
986
987	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
988	"""returns list of groups {name:, id:} on the annotation server for the user"""
989	groups = []
990	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
991	data = getHttpData(url=groupsUrl, noExceptions=True)
992	if data:
993	res = json.loads(data)
994	rows = res.get('rows', None)
995	if rows is None:
996	return groups
997	for r in rows:
998	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
999
1000	return groups
1001
1002
1003	security.declareProtected('View management screens','changeDocumentViewerForm')
1004	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1005
1006	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1007	"""init document viewer"""
1008	self.title=title
1009	self.digilibBaseUrl = digilibBaseUrl
1010	self.thumbrows = thumbrows
1011	self.thumbcols = thumbcols
1012	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1013	try:
1014	# assume MetaDataFolder instance is called metadata
1015	self.metadataService = getattr(self, 'metadata')
1016	except Exception, e:
1017	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1018
1019	self.setAvailableLayers(availableLayers)
1020
1021	if RESPONSE is not None:
1022	RESPONSE.redirect('manage_main')
1023
1024	def manage_AddDocumentViewerForm(self):
1025	"""add the viewer form"""
1026	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1027	return pt()
1028
1029	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1030	"""add the viewer"""
1031	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1032	self._setObject(id,newObj)
1033
1034	if RESPONSE is not None:
1035	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: