Context Navigation

source: documentViewer/documentViewer.py @ 538:dbf25bd05fc6

Last change on this file since 538:dbf25bd05fc6 was 538:dbf25bd05fc6, checked in by casties, 12 years ago
digilib buttons get icons. pid on index page.
File size: 37.9 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
22
23	def serializeNode(node, encoding="utf-8"):
24	"""returns a string containing node as XML"""
25	s = ET.tostring(node)
26
27	# 4Suite:
28	# stream = cStringIO.StringIO()
29	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
30	# s = stream.getvalue()
31	# stream.close()
32	return s
33
34	def browserCheck(self):
35	"""check the browsers request to find out the browser type"""
36	bt = {}
37	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
38	bt['ua'] = ua
39	bt['isIE'] = False
40	bt['isN4'] = False
41	bt['versFirefox']=""
42	bt['versIE']=""
43	bt['versSafariChrome']=""
44	bt['versOpera']=""
45
46	if string.find(ua, 'MSIE') > -1:
47	bt['isIE'] = True
48	else:
49	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
50	# Safari oder Chrome identification
51	try:
52	nav = ua[string.find(ua, '('):]
53	nav1=ua[string.find(ua,')'):]
54	nav2=nav1[string.find(nav1,'('):]
55	nav3=nav2[string.find(nav2,')'):]
56	ie = string.split(nav, "; ")[1]
57	ie1 =string.split(nav1, " ")[2]
58	ie2 =string.split(nav3, " ")[1]
59	ie3 =string.split(nav3, " ")[2]
60	if string.find(ie3, "Safari") >-1:
61	bt['versSafariChrome']=string.split(ie2, "/")[1]
62	except: pass
63	# IE identification
64	try:
65	nav = ua[string.find(ua, '('):]
66	ie = string.split(nav, "; ")[1]
67	if string.find(ie, "MSIE") > -1:
68	bt['versIE'] = string.split(ie, " ")[1]
69	except:pass
70	# Firefox identification
71	try:
72	nav = ua[string.find(ua, '('):]
73	nav1=ua[string.find(ua,')'):]
74	if string.find(ie1, "Firefox") >-1:
75	nav5= string.split(ie1, "/")[1]
76	logging.debug("FIREFOX: %s"%(nav5))
77	bt['versFirefox']=nav5[0:3]
78	except:pass
79	#Opera identification
80	try:
81	if string.find(ua,"Opera") >-1:
82	nav = ua[string.find(ua, '('):]
83	nav1=nav[string.find(nav,')'):]
84	bt['versOpera']=string.split(nav1,"/")[2]
85	except:pass
86
87	bt['isMac'] = string.find(ua, 'Macintosh') > -1
88	bt['isWin'] = string.find(ua, 'Windows') > -1
89	bt['isIEWin'] = bt['isIE'] and bt['isWin']
90	bt['isIEMac'] = bt['isIE'] and bt['isMac']
91	bt['staticHTML'] = False
92
93	return bt
94
95	def getParentPath(path, cnt=1):
96	"""returns pathname shortened by cnt"""
97	# make sure path doesn't end with /
98	path = path.rstrip('/')
99	# split by /, shorten, and reassemble
100	return '/'.join(path.split('/')[0:-cnt])
101
102	##
103	## documentViewer class
104	##
105	class documentViewer(Folder):
106	"""document viewer"""
107	meta_type="Document viewer"
108
109	security=ClassSecurityInfo()
110	manage_options=Folder.manage_options+(
111	{'label':'Configuration','action':'changeDocumentViewerForm'},
112	)
113
114	metadataService = None
115	"""MetaDataFolder instance"""
116
117
118	#
119	# templates and forms
120	#
121	# viewMode templates
122	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
123	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
124	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
125	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
126	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
127	# available layer types
128	builtinLayers = {'text': ['dict','search','gis','annotator'],
129	'xml': None, 'images': None, 'index': None}
130	availableLayers = builtinLayers;
131	# layer templates
132	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
133	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
134	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
135	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
136	# toc templates
137	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
138	toc_text = PageTemplateFile('zpt/toc_text', globals())
139	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
140	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
141	toc_none = PageTemplateFile('zpt/toc_none', globals())
142	# other templates
143	common_template = PageTemplateFile('zpt/common_template', globals())
144	info_xml = PageTemplateFile('zpt/info_xml', globals())
145	docuviewer_css = ImageFile('css/docuviewer.css',globals())
146	# make docuviewer_css refreshable for development
147	docuviewer_css.index_html = refreshingImageFileIndexHtml
148	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
149	# make docuviewer_ie_css refreshable for development
150	docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
151	jquery_js = ImageFile('js/jquery.js',globals())
152
153
154	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
155	"""init document viewer"""
156	self.id=id
157	self.title=title
158	self.thumbcols = thumbcols
159	self.thumbrows = thumbrows
160	# authgroups is list of authorized groups (delimited by ,)
161	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
162	# create template folder so we can always use template.something
163
164	templateFolder = Folder('template')
165	self['template'] = templateFolder # Zope-2.12 style
166	#self._setObject('template',templateFolder) # old style
167	try:
168	import MpdlXmlTextServer
169	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
170	templateFolder['fulltextclient'] = textServer
171	#templateFolder._setObject('fulltextclient',textServer)
172	except Exception, e:
173	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
174
175	try:
176	from Products.zogiLib.zogiLib import zogiLib
177	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
178	templateFolder['zogilib'] = zogilib
179	#templateFolder._setObject('zogilib',zogilib)
180	except Exception, e:
181	logging.error("Unable to create zogiLib for zogilib: "+str(e))
182
183	try:
184	# assume MetaDataFolder instance is called metadata
185	self.metadataService = getattr(self, 'metadata')
186	except Exception, e:
187	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
188
189	if digilibBaseUrl is not None:
190	self.digilibBaseUrl = digilibBaseUrl
191
192
193	# proxy text server methods to fulltextclient
194	def getTextPage(self, **args):
195	"""returns full text content of page"""
196	return self.template.fulltextclient.getTextPage(**args)
197
198	def getSearchResults(self, **args):
199	"""loads list of search results and stores XML in docinfo"""
200	return self.template.fulltextclient.getSearchResults(**args)
201
202	def getResultsPage(self, **args):
203	"""returns one page of the search results"""
204	return self.template.fulltextclient.getResultsPage(**args)
205
206	def getTextInfo(self, **args):
207	"""returns document info from the text server"""
208	return self.template.fulltextclient.getTextInfo(**args)
209
210	def getToc(self, **args):
211	"""loads table of contents and stores XML in docinfo"""
212	return self.template.fulltextclient.getToc(**args)
213
214	def getTocPage(self, **args):
215	"""returns one page of the table of contents"""
216	return self.template.fulltextclient.getTocPage(**args)
217
218	def getPlacesOnPage(self, **args):
219	"""get list of gis places on one page"""
220	return self.template.fulltextclient.getPlacesOnPage(**args)
221
222	#WTF?
223	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
224	security.declareProtected('View','thumbs_rss')
225	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
226	'''
227	view it
228	@param mode: defines how to access the document behind url
229	@param url: url which contains display information
230	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
231
232	'''
233	logging.debug("HHHHHHHHHHHHHH:load the rss")
234	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
235
236	if not hasattr(self, 'template'):
237	# create template folder if it doesn't exist
238	self.manage_addFolder('template')
239
240	if not self.digilibBaseUrl:
241	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
242
243	docinfo = self.getDocinfo(mode=mode,url=url)
244	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
245	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
246	''' ZDES '''
247	pt = getattr(self.template, 'thumbs_main_rss')
248
249	if viewMode=="auto": # automodus gewaehlt
250	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
251	viewMode="text"
252	else:
253	viewMode="images"
254
255	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
256
257
258	security.declareProtected('View','index_html')
259	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode="thumbs",start=1,pn=1):
260	"""
261	view page
262	@param url: url which contains display information
263	@param mode: defines how to access the document behind url
264	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
265	@param viewLayer: sub-type of viewMode, e.g. 'dict' for viewMode='text'
266	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
267	"""
268
269	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
270
271	if not hasattr(self, 'template'):
272	# this won't work
273	logging.error("template folder missing!")
274	return "ERROR: template folder missing!"
275
276	if not getattr(self, 'digilibBaseUrl', None):
277	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
278
279	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
280
281	# auto viewMode: text if there is a text else images
282	if viewMode=="auto":
283	if docinfo.get('textURLPath', None):
284	# docinfo.get('textURL', None) not implemented yet
285	viewMode = "text"
286	if viewLayer is None:
287	viewLayer = "dict"
288	else:
289	viewMode = "images"
290
291	elif viewMode == "text_dict":
292	# legacy fix
293	viewMode = "text"
294	viewLayer = "dict"
295
296	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
297
298	# get template /template/viewer_$viewMode
299	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
300	if pt is None:
301	logging.error("No template for viewMode=%s!"%viewMode)
302	# TODO: error page?
303	return "No template for viewMode=%s!"%viewMode
304
305	# and execute with parameters
306	return pt(docinfo=docinfo, pageinfo=pageinfo)
307
308	#WTF?
309	def generateMarks(self,mk):
310	ret=""
311	if mk is None:
312	return ""
313	if not isinstance(mk, list):
314	mk=[mk]
315	for m in mk:
316	ret+="mk=%s"%m
317	return ret
318
319
320	def getAvailableLayers(self):
321	"""returns dict with list of available layers per viewMode"""
322	return self.availableLayers
323
324	def getBrowser(self):
325	"""getBrowser the version of browser """
326	bt = browserCheck(self)
327	logging.debug("BROWSER VERSION: %s"%(bt))
328	return bt
329
330	def findDigilibUrl(self):
331	"""try to get the digilib URL from zogilib"""
332	url = self.template.zogilib.getDLBaseUrl()
333	return url
334
335	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
336	"""returns URL to digilib Scaler with params"""
337	url = None
338	if docinfo is not None:
339	url = docinfo.get('imageURL', None)
340
341	if url is None:
342	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
343	if fn is None and docinfo is not None:
344	fn = docinfo.get('imagePath','')
345
346	url += "fn=%s"%fn
347
348	if pn:
349	url += "&pn=%s"%pn
350
351	url += "&dw=%s&dh=%s"%(dw,dh)
352	return url
353
354	def getDocumentViewerURL(self):
355	"""returns the URL of this instance"""
356	return self.absolute_url()
357
358	def getStyle(self, idx, selected, style=""):
359	"""returns a string with the given style and append 'sel' if idx == selected."""
360	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
361	if idx == selected:
362	return style + 'sel'
363	else:
364	return style
365
366	def getParams(self, param=None, val=None, params=None, duplicates=None):
367	"""returns dict with URL parameters.
368
369	Takes URL parameters and additionally param=val or dict params.
370	Deletes key if value is None."""
371	# copy existing request params
372	newParams=self.REQUEST.form.copy()
373	# change single param
374	if param is not None:
375	if val is None:
376	if newParams.has_key(param):
377	del newParams[param]
378	else:
379	newParams[param] = str(val)
380
381	# change more params
382	if params is not None:
383	for (k, v) in params.items():
384	if v is None:
385	# val=None removes param
386	if newParams.has_key(k):
387	del newParams[k]
388
389	else:
390	newParams[k] = v
391
392	if duplicates:
393	# eliminate lists (coming from duplicate keys)
394	for (k,v) in newParams.items():
395	if isinstance(v, list):
396	if duplicates == 'comma':
397	# make comma-separated list of non-empty entries
398	newParams[k] = ','.join([t for t in v if t])
399	elif duplicates == 'first':
400	# take first non-empty entry
401	newParams[k] = [t for t in v if t][0]
402
403	return newParams
404
405	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
406	"""returns URL to documentviewer with parameter param set to val or from dict params"""
407	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
408	# quote values and assemble into query string (not escaping '/')
409	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
410	if baseUrl is None:
411	baseUrl = self.getDocumentViewerURL()
412
413	url = "%s?%s"%(baseUrl, ps)
414	return url
415
416	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
417	"""link to documentviewer with parameter param set to val"""
418	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
419
420
421	def setAvailableLayers(self, newLayerString=None):
422	"""sets availableLayers to newLayerString or tries to autodetect available layers.
423	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
424	newLayerString is parsed as JSON."""
425	if newLayerString is not None:
426	try:
427	layers = json.loads(newLayerString)
428	if 'text' in layers and 'images' in layers:
429	self.availableLayers = layers
430	return
431	except:
432	pass
433
434	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
435
436	# start with builtin layers
437	self.availableLayers = self.builtinLayers.copy()
438	# add layers from templates
439	for t in self.template:
440	if t.startswith('layer_'):
441	try:
442	(x, m, l) = t.split('_', 3)
443	if m not in self.availableLayers:
444	# mode m doesn't exist -> new list
445	self.availableLayers[m] = [l]
446
447	else:
448	# m exists -> append
449	if l not in self.availableLayers[m]:
450	self.availableLayers[m].append()
451
452	except:
453	pass
454
455	def getAvailableLayersJson(self):
456	"""returns available layers as JSON string."""
457	return json.dumps(self.availableLayers)
458
459
460	def getInfo_xml(self,url,mode):
461	"""returns info about the document as XML"""
462	if not self.digilibBaseUrl:
463	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
464
465	docinfo = self.getDocinfo(mode=mode,url=url)
466	pt = getattr(self.template, 'info_xml')
467	return pt(docinfo=docinfo)
468
469	def getAuthenticatedUser(self, anon=None):
470	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
471	user = getSecurityManager().getUser()
472	if user is not None and user.getUserName() != "Anonymous User":
473	return user
474	else:
475	return anon
476
477	def isAccessible(self, docinfo):
478	"""returns if access to the resource is granted"""
479	access = docinfo.get('accessType', None)
480	logging.debug("documentViewer (accessOK) access type %s"%access)
481	if access == 'free':
482	logging.debug("documentViewer (accessOK) access is free")
483	return True
484
485	elif access is None or access in self.authgroups:
486	# only local access -- only logged in users
487	user = self.getAuthenticatedUser()
488	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
489	return (user is not None)
490
491	logging.error("documentViewer (accessOK) unknown access type %s"%access)
492	return False
493
494
495	def getDocinfo(self, mode, url, tocMode=None):
496	"""returns docinfo depending on mode"""
497	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
498	# look for cached docinfo in session
499	if self.REQUEST.SESSION.has_key('docinfo'):
500	docinfo = self.REQUEST.SESSION['docinfo']
501	# check if its still current
502	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
503	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
504	return docinfo
505
506	# new docinfo
507	docinfo = {'mode': mode, 'url': url}
508	# add self url
509	docinfo['viewerUrl'] = self.getDocumentViewerURL()
510	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
511	# get index.meta DOM
512	docUrl = None
513	metaDom = None
514	if mode=="texttool":
515	# url points to document dir or index.meta
516	metaDom = self.metadataService.getDomFromPathOrUrl(url)
517	docUrl = url.replace('/index.meta', '')
518	if metaDom is None:
519	raise IOError("Unable to find index.meta for mode=texttool!")
520
521	elif mode=="imagepath":
522	# url points to folder with images, index.meta optional
523	# asssume index.meta in parent dir
524	docUrl = getParentPath(url)
525	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
526
527	elif mode=="filepath":
528	# url points to image file, index.meta optional
529	# asssume index.meta is two path segments up
530	docUrl = getParentPath(url, 2)
531	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
532
533	else:
534	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
535	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
536
537	docinfo['documentUrl'] = docUrl
538	# process index.meta contents
539	if metaDom is not None and metaDom.tag == 'resource':
540	# document directory name and path
541	resource = self.metadataService.getResourceData(dom=metaDom)
542	if resource:
543	docinfo = self.getDocinfoFromResource(docinfo, resource)
544
545	# texttool info
546	texttool = self.metadataService.getTexttoolData(dom=metaDom)
547	if texttool:
548	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
549	# document info (including toc) from full text
550	if docinfo.get('textURLPath', None):
551	docinfo = self.getTextInfo(mode=tocMode, docinfo=docinfo)
552
553	# bib info
554	bib = self.metadataService.getBibData(dom=metaDom)
555	if bib:
556	docinfo = self.getDocinfoFromBib(docinfo, bib)
557	else:
558	# no bib - try info.xml
559	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
560
561	# auth info
562	access = self.metadataService.getAccessData(dom=metaDom)
563	if access:
564	docinfo = self.getDocinfoFromAccess(docinfo, access)
565
566	# attribution info
567	attribution = self.metadataService.getAttributionData(dom=metaDom)
568	if attribution:
569	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
570	docinfo['attribution'] = attribution
571
572	# copyright info
573	copyright = self.metadataService.getCopyrightData(dom=metaDom)
574	if copyright:
575	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
576	docinfo['copyright'] = copyright
577
578	# DRI (permanent ID)
579	dri = self.metadataService.getDRI(dom=metaDom, type='escidoc-test')
580	if dri:
581	logging.debug("getDRI: dri=%s"%repr(dri))
582	docinfo['DRI'] = dri
583
584	# image path
585	if mode != 'texttool':
586	# override image path from texttool with url parameter TODO: how about mode=auto?
587	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
588
589	# number of images from digilib
590	if docinfo.get('imagePath', None):
591	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
592	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
593	else:
594	# imagePath still missing? try "./pageimg"
595	imgPath = os.path.join(docUrl, 'pageimg')
596	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
597	if docinfo.get('numPages', 0) > 0:
598	# there are pages
599	docinfo['imagePath'] = imgPath
600	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
601
602	# check numPages
603	if docinfo.get('numPages', 0) == 0:
604	if docinfo.get('numTextPages', 0) > 0:
605	# replace with numTextPages (text-only?)
606	docinfo['numPages'] = docinfo['numTextPages']
607
608	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
609	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
610	# store in session
611	self.REQUEST.SESSION['docinfo'] = docinfo
612	return docinfo
613
614
615	def getDocinfoFromResource(self, docinfo, resource):
616	"""reads contents of resource element into docinfo"""
617	docName = resource.get('name', None)
618	docinfo['documentName'] = docName
619	docPath = resource.get('archive-path', None)
620	if docPath:
621	# clean up document path
622	if docPath[0] != '/':
623	docPath = '/' + docPath
624
625	if docName and (not docPath.endswith(docName)):
626	docPath += "/" + docName
627
628	else:
629	# use docUrl as docPath
630	docUrl = docinfo['documentURL']
631	if not docUrl.startswith('http:'):
632	docPath = docUrl
633	if docPath:
634	# fix URLs starting with /mpiwg/online
635	docPath = docPath.replace('/mpiwg/online', '', 1)
636
637	docinfo['documentPath'] = docPath
638	return docinfo
639
640	def getDocinfoFromTexttool(self, docinfo, texttool):
641	"""reads contents of texttool element into docinfo"""
642	# image dir
643	imageDir = texttool.get('image', None)
644	docPath = docinfo.get('documentPath', None)
645	if imageDir and docPath:
646	#print "image: ", imageDir, " archivepath: ", archivePath
647	imageDir = os.path.join(docPath, imageDir)
648	imageDir = imageDir.replace('/mpiwg/online', '', 1)
649	docinfo['imagePath'] = imageDir
650
651	# old style text URL
652	textUrl = texttool.get('text', None)
653	if textUrl and docPath:
654	if urlparse.urlparse(textUrl)[0] == "": #keine url
655	textUrl = os.path.join(docPath, textUrl)
656
657	docinfo['textURL'] = textUrl
658
659	# new style text-url-path
660	textUrl = texttool.get('text-url-path', None)
661	if textUrl:
662	docinfo['textURLPath'] = textUrl
663
664	# page flow
665	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
666
667	# odd pages are left
668	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
669
670	# number of title page (0: not defined)
671	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
672
673	# old presentation stuff
674	presentation = texttool.get('presentation', None)
675	if presentation and docPath:
676	if presentation.startswith('http:'):
677	docinfo['presentationUrl'] = presentation
678	else:
679	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
680
681	return docinfo
682
683	def getDocinfoFromBib(self, docinfo, bib):
684	"""reads contents of bib element into docinfo"""
685	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
686	# put all raw bib fields in dict "bib"
687	docinfo['bib'] = bib
688	bibtype = bib.get('@type', None)
689	docinfo['bibType'] = bibtype
690	# also store DC metadata for convenience
691	dc = self.metadataService.getDCMappedData(bib)
692	docinfo['creator'] = dc.get('creator','')
693	docinfo['title'] = dc.get('title','')
694	docinfo['date'] = dc.get('date','')
695	return docinfo
696
697	def getDocinfoFromAccess(self, docinfo, acc):
698	"""reads contents of access element into docinfo"""
699	#TODO: also read resource type
700	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
701	try:
702	acctype = acc['@attr']['type']
703	if acctype:
704	access=acctype
705	if access in ['group', 'institution']:
706	access = acc['name'].lower()
707
708	docinfo['accessType'] = access
709
710	except:
711	pass
712
713	return docinfo
714
715	def getDocinfoFromDigilib(self, docinfo, path):
716	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
717	# fetch data
718	txt = getHttpData(infoUrl)
719	if not txt:
720	logging.error("Unable to get dir-info from %s"%(infoUrl))
721	return docinfo
722
723	dom = ET.fromstring(txt)
724	size = getText(dom.find("size"))
725	logging.debug("getDocinfoFromDigilib: size=%s"%size)
726	if size:
727	docinfo['numPages'] = int(size)
728	else:
729	docinfo['numPages'] = 0
730
731	# TODO: produce and keep list of image names and numbers
732	return docinfo
733
734
735	def getDocinfoFromPresentationInfoXml(self,docinfo):
736	"""gets DC-like bibliographical information from the presentation entry in texttools"""
737	url = docinfo.get('presentationUrl', None)
738	if not url:
739	logging.error("getDocinfoFromPresentation: no URL!")
740	return docinfo
741
742	dom = None
743	metaUrl = None
744	if url.startswith("http://"):
745	# real URL
746	metaUrl = url
747	else:
748	# online path
749
750	server=self.digilibBaseUrl+"/servlet/Texter?fn="
751	metaUrl=server+url
752
753	txt=getHttpData(metaUrl)
754	if txt is None:
755	logging.error("Unable to read info.xml from %s"%(url))
756	return docinfo
757
758	dom = ET.fromstring(txt)
759	docinfo['creator']=getText(dom.find(".//author"))
760	docinfo['title']=getText(dom.find(".//title"))
761	docinfo['date']=getText(dom.find(".//date"))
762	return docinfo
763
764
765	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewLayer=None, tocMode=None):
766	"""returns pageinfo with the given parameters"""
767	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
768	pageinfo = {}
769	pageinfo['viewMode'] = viewMode
770	# split viewLayer if necessary
771	if isinstance(viewLayer,basestring):
772	viewLayer = viewLayer.split(',')
773
774	if isinstance(viewLayer, list):
775	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
776	# save (unique) list in viewLayers
777	seen = set()
778	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
779	pageinfo['viewLayers'] = viewLayers
780	# stringify viewLayer
781	viewLayer = ','.join(viewLayers)
782	else:
783	#create list
784	pageinfo['viewLayers'] = [viewLayer]
785
786	pageinfo['viewLayer'] = viewLayer
787	pageinfo['tocMode'] = tocMode
788
789	# TODO: unify current and pn!
790	current = getInt(current)
791	pageinfo['current'] = current
792	pageinfo['pn'] = current
793	rows = int(rows or self.thumbrows)
794	pageinfo['rows'] = rows
795	cols = int(cols or self.thumbcols)
796	pageinfo['cols'] = cols
797	grpsize = cols * rows
798	pageinfo['groupsize'] = grpsize
799	# is start is empty use one around current
800	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
801	# int(current / grpsize) * grpsize +1))
802	pageinfo['start'] = start
803	# get number of pages
804	np = int(docinfo.get('numPages', 0))
805	if np == 0:
806	# try numTextPages
807	np = docinfo.get('numTextPages', 0)
808	if np != 0:
809	docinfo['numPages'] = np
810
811	# cache table of contents
812	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
813	pageinfo['numgroups'] = int(np / grpsize)
814	if np % grpsize > 0:
815	pageinfo['numgroups'] += 1
816
817	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
818	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
819	# add zeroth page for two columns
820	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
821	pageinfo['pageZero'] = pageZero
822	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
823	# more page parameters
824	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
825	if docinfo.get('pageNumbers'):
826	# get original page numbers
827	pageNumber = docinfo['pageNumbers'].get(current, None)
828	if pageNumber is not None:
829	pageinfo['pageNumberOrig'] = pageNumber['no']
830	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
831
832	# cache search results
833	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
834	query = self.REQUEST.get('query',None)
835	pageinfo['query'] = query
836	if query:
837	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
838	pageinfo['queryType'] = queryType
839	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
840	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
841
842	# highlighting
843	highlightQuery = self.REQUEST.get('highlightQuery', None)
844	if highlightQuery:
845	pageinfo['highlightQuery'] = highlightQuery
846	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
847	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
848
849	return pageinfo
850
851
852	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
853	"""returns dict with array of page informations for one screenfull of thumbnails"""
854	batch = {}
855	grpsize = rows * cols
856	if maxIdx == 0:
857	maxIdx = start + grpsize
858
859	nb = int(math.ceil(maxIdx / float(grpsize)))
860	# list of all batch start and end points
861	batches = []
862	if pageZero:
863	ofs = 0
864	else:
865	ofs = 1
866
867	for i in range(nb):
868	s = i * grpsize + ofs
869	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
870	batches.append({'start':s, 'end':e})
871
872	batch['batches'] = batches
873
874	pages = []
875	if pageZero and start == 1:
876	# correct beginning
877	idx = 0
878	else:
879	idx = start
880
881	for r in range(rows):
882	row = []
883	for c in range(cols):
884	if idx < minIdx or idx > maxIdx:
885	page = {'idx':None}
886	else:
887	page = {'idx':idx}
888
889	idx += 1
890	if pageFlowLtr:
891	row.append(page)
892	else:
893	row.insert(0, page)
894
895	pages.append(row)
896
897	if start > 1:
898	batch['prevStart'] = max(start - grpsize, 1)
899	else:
900	batch['prevStart'] = None
901
902	if start + grpsize <= maxIdx:
903	batch['nextStart'] = start + grpsize
904	else:
905	batch['nextStart'] = None
906
907	batch['pages'] = pages
908	return batch
909
910	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
911	"""returns dict with information for one screenfull of data."""
912	batch = {}
913	if end == 0:
914	end = start + size
915
916	nb = int(math.ceil(end / float(size)))
917	# list of all batch start and end points
918	batches = []
919	for i in range(nb):
920	s = i * size + 1
921	e = min((i + 1) * size, end)
922	batches.append({'start':s, 'end':e})
923
924	batch['batches'] = batches
925	# list of elements in this batch
926	this = []
927	j = 0
928	for i in range(start, min(start+size, end+1)):
929	if data:
930	if fullData:
931	d = data.get(i, None)
932	else:
933	d = data.get(j, None)
934	j += 1
935
936	else:
937	d = i+1
938
939	this.append(d)
940
941	batch['this'] = this
942	if start > 1:
943	batch['prevStart'] = max(start - size, 1)
944	else:
945	batch['prevStart'] = None
946
947	if start + size < end:
948	batch['nextStart'] = start + size
949	else:
950	batch['nextStart'] = None
951
952	logging.debug("getBatch start=%s size=%s end=%s batch=%s"%(start,size,end,repr(batch)))
953	return batch
954
955
956	security.declareProtected('View management screens','changeDocumentViewerForm')
957	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
958
959	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
960	"""init document viewer"""
961	self.title=title
962	self.digilibBaseUrl = digilibBaseUrl
963	self.thumbrows = thumbrows
964	self.thumbcols = thumbcols
965	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
966	try:
967	# assume MetaDataFolder instance is called metadata
968	self.metadataService = getattr(self, 'metadata')
969	except Exception, e:
970	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
971
972	self.setAvailableLayers(availableLayers)
973
974	if RESPONSE is not None:
975	RESPONSE.redirect('manage_main')
976
977	def manage_AddDocumentViewerForm(self):
978	"""add the viewer form"""
979	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
980	return pt()
981
982	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
983	"""add the viewer"""
984	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
985	self._setObject(id,newObj)
986
987	if RESPONSE is not None:
988	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: