Context Navigation

source: documentViewer/documentViewer.py @ 536:abd36d4d97b8

Last change on this file since 536:abd36d4d97b8 was 536:abd36d4d97b8, checked in by casties, 12 years ago
new version of index page. improvements for digilib page and thumbnail overview.
File size: 37.9 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
22
23	def serializeNode(node, encoding="utf-8"):
24	"""returns a string containing node as XML"""
25	s = ET.tostring(node)
26
27	# 4Suite:
28	# stream = cStringIO.StringIO()
29	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
30	# s = stream.getvalue()
31	# stream.close()
32	return s
33
34	def browserCheck(self):
35	"""check the browsers request to find out the browser type"""
36	bt = {}
37	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
38	bt['ua'] = ua
39	bt['isIE'] = False
40	bt['isN4'] = False
41	bt['versFirefox']=""
42	bt['versIE']=""
43	bt['versSafariChrome']=""
44	bt['versOpera']=""
45
46	if string.find(ua, 'MSIE') > -1:
47	bt['isIE'] = True
48	else:
49	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
50	# Safari oder Chrome identification
51	try:
52	nav = ua[string.find(ua, '('):]
53	nav1=ua[string.find(ua,')'):]
54	nav2=nav1[string.find(nav1,'('):]
55	nav3=nav2[string.find(nav2,')'):]
56	ie = string.split(nav, "; ")[1]
57	ie1 =string.split(nav1, " ")[2]
58	ie2 =string.split(nav3, " ")[1]
59	ie3 =string.split(nav3, " ")[2]
60	if string.find(ie3, "Safari") >-1:
61	bt['versSafariChrome']=string.split(ie2, "/")[1]
62	except: pass
63	# IE identification
64	try:
65	nav = ua[string.find(ua, '('):]
66	ie = string.split(nav, "; ")[1]
67	if string.find(ie, "MSIE") > -1:
68	bt['versIE'] = string.split(ie, " ")[1]
69	except:pass
70	# Firefox identification
71	try:
72	nav = ua[string.find(ua, '('):]
73	nav1=ua[string.find(ua,')'):]
74	if string.find(ie1, "Firefox") >-1:
75	nav5= string.split(ie1, "/")[1]
76	logging.debug("FIREFOX: %s"%(nav5))
77	bt['versFirefox']=nav5[0:3]
78	except:pass
79	#Opera identification
80	try:
81	if string.find(ua,"Opera") >-1:
82	nav = ua[string.find(ua, '('):]
83	nav1=nav[string.find(nav,')'):]
84	bt['versOpera']=string.split(nav1,"/")[2]
85	except:pass
86
87	bt['isMac'] = string.find(ua, 'Macintosh') > -1
88	bt['isWin'] = string.find(ua, 'Windows') > -1
89	bt['isIEWin'] = bt['isIE'] and bt['isWin']
90	bt['isIEMac'] = bt['isIE'] and bt['isMac']
91	bt['staticHTML'] = False
92
93	return bt
94
95	def getParentPath(path, cnt=1):
96	"""returns pathname shortened by cnt"""
97	# make sure path doesn't end with /
98	path = path.rstrip('/')
99	# split by /, shorten, and reassemble
100	return '/'.join(path.split('/')[0:-cnt])
101
102	##
103	## documentViewer class
104	##
105	class documentViewer(Folder):
106	"""document viewer"""
107	meta_type="Document viewer"
108
109	security=ClassSecurityInfo()
110	manage_options=Folder.manage_options+(
111	{'label':'Configuration','action':'changeDocumentViewerForm'},
112	)
113
114	metadataService = None
115	"""MetaDataFolder instance"""
116
117
118	#
119	# templates and forms
120	#
121	# viewMode templates
122	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
123	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
124	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
125	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
126	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
127	# available layer types
128	builtinLayers = {'text': ['dict','search','gis','annotator'],
129	'xml': None, 'images': None, 'index': None}
130	availableLayers = builtinLayers;
131	# layer templates
132	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
133	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
134	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
135	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
136	# toc templates
137	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
138	toc_text = PageTemplateFile('zpt/toc_text', globals())
139	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
140	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
141	toc_none = PageTemplateFile('zpt/toc_none', globals())
142	# other templates
143	common_template = PageTemplateFile('zpt/common_template', globals())
144	info_xml = PageTemplateFile('zpt/info_xml', globals())
145	docuviewer_css = ImageFile('css/docuviewer.css',globals())
146	# make docuviewer_css refreshable for development
147	docuviewer_css.index_html = refreshingImageFileIndexHtml
148	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
149	# make docuviewer_ie_css refreshable for development
150	docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
151	jquery_js = ImageFile('js/jquery.js',globals())
152
153
154	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
155	"""init document viewer"""
156	self.id=id
157	self.title=title
158	self.thumbcols = thumbcols
159	self.thumbrows = thumbrows
160	# authgroups is list of authorized groups (delimited by ,)
161	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
162	# create template folder so we can always use template.something
163
164	templateFolder = Folder('template')
165	self['template'] = templateFolder # Zope-2.12 style
166	#self._setObject('template',templateFolder) # old style
167	try:
168	import MpdlXmlTextServer
169	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
170	templateFolder['fulltextclient'] = textServer
171	#templateFolder._setObject('fulltextclient',textServer)
172	except Exception, e:
173	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
174
175	try:
176	from Products.zogiLib.zogiLib import zogiLib
177	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
178	templateFolder['zogilib'] = zogilib
179	#templateFolder._setObject('zogilib',zogilib)
180	except Exception, e:
181	logging.error("Unable to create zogiLib for zogilib: "+str(e))
182
183	try:
184	# assume MetaDataFolder instance is called metadata
185	self.metadataService = getattr(self, 'metadata')
186	except Exception, e:
187	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
188
189	if digilibBaseUrl is not None:
190	self.digilibBaseUrl = digilibBaseUrl
191
192
193	# proxy text server methods to fulltextclient
194	def getTextPage(self, **args):
195	"""returns full text content of page"""
196	return self.template.fulltextclient.getTextPage(**args)
197
198	def getSearchResults(self, **args):
199	"""loads list of search results and stores XML in docinfo"""
200	return self.template.fulltextclient.getSearchResults(**args)
201
202	def getResultsPage(self, **args):
203	"""returns one page of the search results"""
204	return self.template.fulltextclient.getResultsPage(**args)
205
206	def getTextInfo(self, **args):
207	"""returns document info from the text server"""
208	return self.template.fulltextclient.getTextInfo(**args)
209
210	def getToc(self, **args):
211	"""loads table of contents and stores XML in docinfo"""
212	return self.template.fulltextclient.getToc(**args)
213
214	def getTocPage(self, **args):
215	"""returns one page of the table of contents"""
216	return self.template.fulltextclient.getTocPage(**args)
217
218	def getPlacesOnPage(self, **args):
219	"""get list of gis places on one page"""
220	return self.template.fulltextclient.getPlacesOnPage(**args)
221
222	#WTF?
223	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
224	security.declareProtected('View','thumbs_rss')
225	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
226	'''
227	view it
228	@param mode: defines how to access the document behind url
229	@param url: url which contains display information
230	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
231
232	'''
233	logging.debug("HHHHHHHHHHHHHH:load the rss")
234	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
235
236	if not hasattr(self, 'template'):
237	# create template folder if it doesn't exist
238	self.manage_addFolder('template')
239
240	if not self.digilibBaseUrl:
241	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
242
243	docinfo = self.getDocinfo(mode=mode,url=url)
244	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
245	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
246	''' ZDES '''
247	pt = getattr(self.template, 'thumbs_main_rss')
248
249	if viewMode=="auto": # automodus gewaehlt
250	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
251	viewMode="text"
252	else:
253	viewMode="images"
254
255	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
256
257
258	security.declareProtected('View','index_html')
259	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode="thumbs",start=1,pn=1):
260	"""
261	view page
262	@param url: url which contains display information
263	@param mode: defines how to access the document behind url
264	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
265	@param viewLayer: sub-type of viewMode, e.g. 'dict' for viewMode='text'
266	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
267	"""
268
269	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
270
271	if not hasattr(self, 'template'):
272	# this won't work
273	logging.error("template folder missing!")
274	return "ERROR: template folder missing!"
275
276	if not getattr(self, 'digilibBaseUrl', None):
277	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
278
279	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
280
281	# auto viewMode: text if there is a text else images
282	if viewMode=="auto":
283	if docinfo.get('textURLPath', None):
284	# docinfo.get('textURL', None) not implemented yet
285	viewMode = "text"
286	if viewLayer is None:
287	viewLayer = "dict"
288	else:
289	viewMode = "images"
290
291	elif viewMode == "text_dict":
292	# legacy fix
293	viewMode = "text"
294	viewLayer = "dict"
295
296	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
297
298	# get template /template/viewer_$viewMode
299	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
300	if pt is None:
301	logging.error("No template for viewMode=%s!"%viewMode)
302	# TODO: error page?
303	return "No template for viewMode=%s!"%viewMode
304
305	# and execute with parameters
306	return pt(docinfo=docinfo, pageinfo=pageinfo)
307
308	#WTF?
309	def generateMarks(self,mk):
310	ret=""
311	if mk is None:
312	return ""
313	if not isinstance(mk, list):
314	mk=[mk]
315	for m in mk:
316	ret+="mk=%s"%m
317	return ret
318
319
320	def getAvailableLayers(self):
321	"""returns dict with list of available layers per viewMode"""
322	return self.availableLayers
323
324	def getBrowser(self):
325	"""getBrowser the version of browser """
326	bt = browserCheck(self)
327	logging.debug("BROWSER VERSION: %s"%(bt))
328	return bt
329
330	def findDigilibUrl(self):
331	"""try to get the digilib URL from zogilib"""
332	url = self.template.zogilib.getDLBaseUrl()
333	return url
334
335	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
336	"""returns URL to digilib Scaler with params"""
337	url = None
338	if docinfo is not None:
339	url = docinfo.get('imageURL', None)
340
341	if url is None:
342	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
343	if fn is None and docinfo is not None:
344	fn = docinfo.get('imagePath','')
345
346	url += "fn=%s"%fn
347
348	if pn:
349	url += "&pn=%s"%pn
350
351	url += "&dw=%s&dh=%s"%(dw,dh)
352	return url
353
354	def getDocumentViewerURL(self):
355	"""returns the URL of this instance"""
356	return self.absolute_url()
357
358	def getStyle(self, idx, selected, style=""):
359	"""returns a string with the given style and append 'sel' if idx == selected."""
360	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
361	if idx == selected:
362	return style + 'sel'
363	else:
364	return style
365
366	def getParams(self, param=None, val=None, params=None, duplicates=None):
367	"""returns dict with URL parameters.
368
369	Takes URL parameters and additionally param=val or dict params.
370	Deletes key if value is None."""
371	# copy existing request params
372	newParams=self.REQUEST.form.copy()
373	# change single param
374	if param is not None:
375	if val is None:
376	if newParams.has_key(param):
377	del newParams[param]
378	else:
379	newParams[param] = str(val)
380
381	# change more params
382	if params is not None:
383	for (k, v) in params.items():
384	if v is None:
385	# val=None removes param
386	if newParams.has_key(k):
387	del newParams[k]
388
389	else:
390	newParams[k] = v
391
392	if duplicates:
393	# eliminate lists (coming from duplicate keys)
394	for (k,v) in newParams.items():
395	if isinstance(v, list):
396	if duplicates == 'comma':
397	# make comma-separated list of non-empty entries
398	newParams[k] = ','.join([t for t in v if t])
399	elif duplicates == 'first':
400	# take first non-empty entry
401	newParams[k] = [t for t in v if t][0]
402
403	return newParams
404
405	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
406	"""returns URL to documentviewer with parameter param set to val or from dict params"""
407	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
408	# quote values and assemble into query string (not escaping '/')
409	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
410	if baseUrl is None:
411	baseUrl = self.getDocumentViewerURL()
412
413	url = "%s?%s"%(baseUrl, ps)
414	return url
415
416	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
417	"""link to documentviewer with parameter param set to val"""
418	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
419
420
421	def setAvailableLayers(self, newLayerString=None):
422	"""sets availableLayers to newLayerString or tries to autodetect available layers.
423	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
424	newLayerString is parsed as JSON."""
425	if newLayerString is not None:
426	try:
427	layers = json.loads(newLayerString)
428	if 'text' in layers and 'images' in layers:
429	self.availableLayers = layers
430	return
431	except:
432	pass
433
434	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
435
436	# start with builtin layers
437	self.availableLayers = self.builtinLayers.copy()
438	# add layers from templates
439	for t in self.template:
440	if t.startswith('layer_'):
441	try:
442	(x, m, l) = t.split('_', 3)
443	if m not in self.availableLayers:
444	# mode m doesn't exist -> new list
445	self.availableLayers[m] = [l]
446
447	else:
448	# m exists -> append
449	if l not in self.availableLayers[m]:
450	self.availableLayers[m].append()
451
452	except:
453	pass
454
455	def getAvailableLayersJson(self):
456	"""returns available layers as JSON string."""
457	return json.dumps(self.availableLayers)
458
459
460	def getInfo_xml(self,url,mode):
461	"""returns info about the document as XML"""
462	if not self.digilibBaseUrl:
463	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
464
465	docinfo = self.getDocinfo(mode=mode,url=url)
466	pt = getattr(self.template, 'info_xml')
467	return pt(docinfo=docinfo)
468
469	def getAuthenticatedUser(self, anon=None):
470	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
471	user = getSecurityManager().getUser()
472	if user is not None and user.getUserName() != "Anonymous User":
473	return user
474	else:
475	return anon
476
477	def isAccessible(self, docinfo):
478	"""returns if access to the resource is granted"""
479	access = docinfo.get('accessType', None)
480	logging.debug("documentViewer (accessOK) access type %s"%access)
481	if access == 'free':
482	logging.debug("documentViewer (accessOK) access is free")
483	return True
484
485	elif access is None or access in self.authgroups:
486	# only local access -- only logged in users
487	user = self.getAuthenticatedUser()
488	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
489	return (user is not None)
490
491	logging.error("documentViewer (accessOK) unknown access type %s"%access)
492	return False
493
494
495	def getDocinfo(self, mode, url, tocMode=None):
496	"""returns docinfo depending on mode"""
497	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
498	# look for cached docinfo in session
499	if self.REQUEST.SESSION.has_key('docinfo'):
500	docinfo = self.REQUEST.SESSION['docinfo']
501	# check if its still current
502	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
503	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
504	return docinfo
505
506	# new docinfo
507	docinfo = {'mode': mode, 'url': url}
508	# add self url
509	docinfo['viewerUrl'] = self.getDocumentViewerURL()
510	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
511	# get index.meta DOM
512	docUrl = None
513	metaDom = None
514	if mode=="texttool":
515	# url points to document dir or index.meta
516	metaDom = self.metadataService.getDomFromPathOrUrl(url)
517	docUrl = url.replace('/index.meta', '')
518	if metaDom is None:
519	raise IOError("Unable to find index.meta for mode=texttool!")
520
521	elif mode=="imagepath":
522	# url points to folder with images, index.meta optional
523	# asssume index.meta in parent dir
524	docUrl = getParentPath(url)
525	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
526
527	elif mode=="filepath":
528	# url points to image file, index.meta optional
529	# asssume index.meta is two path segments up
530	docUrl = getParentPath(url, 2)
531	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
532
533	else:
534	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
535	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
536
537	docinfo['documentUrl'] = docUrl
538	# process index.meta contents
539	if metaDom is not None and metaDom.tag == 'resource':
540	# document directory name and path
541	resource = self.metadataService.getResourceData(dom=metaDom)
542	if resource:
543	docinfo = self.getDocinfoFromResource(docinfo, resource)
544
545	# texttool info
546	texttool = self.metadataService.getTexttoolData(dom=metaDom)
547	if texttool:
548	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
549	# document info (including toc) from full text
550	if docinfo.get('textURLPath', None):
551	docinfo = self.getTextInfo(mode=tocMode, docinfo=docinfo)
552
553	# bib info
554	bib = self.metadataService.getBibData(dom=metaDom)
555	if bib:
556	docinfo = self.getDocinfoFromBib(docinfo, bib)
557	else:
558	# no bib - try info.xml
559	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
560
561	# auth info
562	access = self.metadataService.getAccessData(dom=metaDom)
563	if access:
564	docinfo = self.getDocinfoFromAccess(docinfo, access)
565
566	# attribution info
567	attribution = self.metadataService.getAttributionData(dom=metaDom)
568	if attribution:
569	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
570	docinfo['attribution'] = attribution
571	#docinfo = self.getDocinfoFromAccess(docinfo, access)
572
573	# copyright info
574	copyright = self.metadataService.getCopyrightData(dom=metaDom)
575	if copyright:
576	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
577	docinfo['copyright'] = copyright
578	#docinfo = self.getDocinfoFromAccess(docinfo, access)
579
580	# image path
581	if mode != 'texttool':
582	# override image path from texttool with url parameter TODO: how about mode=auto?
583	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
584
585	# number of images from digilib
586	if docinfo.get('imagePath', None):
587	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
588	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
589	else:
590	# imagePath still missing? try "./pageimg"
591	imgPath = os.path.join(docUrl, 'pageimg')
592	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
593	if docinfo.get('numPages', 0) > 0:
594	# there are pages
595	docinfo['imagePath'] = imgPath
596	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
597
598	# check numPages
599	if docinfo.get('numPages', 0) == 0:
600	if docinfo.get('numTextPages', 0) > 0:
601	# replace with numTextPages (text-only?)
602	docinfo['numPages'] = docinfo['numTextPages']
603
604	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
605	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
606	# store in session
607	self.REQUEST.SESSION['docinfo'] = docinfo
608	return docinfo
609
610
611	def getDocinfoFromResource(self, docinfo, resource):
612	"""reads contents of resource element into docinfo"""
613	docName = resource.get('name', None)
614	docinfo['documentName'] = docName
615	docPath = resource.get('archive-path', None)
616	if docPath:
617	# clean up document path
618	if docPath[0] != '/':
619	docPath = '/' + docPath
620
621	if docName and (not docPath.endswith(docName)):
622	docPath += "/" + docName
623
624	else:
625	# use docUrl as docPath
626	docUrl = docinfo['documentURL']
627	if not docUrl.startswith('http:'):
628	docPath = docUrl
629	if docPath:
630	# fix URLs starting with /mpiwg/online
631	docPath = docPath.replace('/mpiwg/online', '', 1)
632
633	docinfo['documentPath'] = docPath
634	return docinfo
635
636	def getDocinfoFromTexttool(self, docinfo, texttool):
637	"""reads contents of texttool element into docinfo"""
638	# image dir
639	imageDir = texttool.get('image', None)
640	docPath = docinfo.get('documentPath', None)
641	if imageDir and docPath:
642	#print "image: ", imageDir, " archivepath: ", archivePath
643	imageDir = os.path.join(docPath, imageDir)
644	imageDir = imageDir.replace('/mpiwg/online', '', 1)
645	docinfo['imagePath'] = imageDir
646
647	# old style text URL
648	textUrl = texttool.get('text', None)
649	if textUrl and docPath:
650	if urlparse.urlparse(textUrl)[0] == "": #keine url
651	textUrl = os.path.join(docPath, textUrl)
652
653	docinfo['textURL'] = textUrl
654
655	# new style text-url-path
656	textUrl = texttool.get('text-url-path', None)
657	if textUrl:
658	docinfo['textURLPath'] = textUrl
659
660	# page flow
661	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
662
663	# odd pages are left
664	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
665
666	# number of title page (0: not defined)
667	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
668
669	# old presentation stuff
670	presentation = texttool.get('presentation', None)
671	if presentation and docPath:
672	if presentation.startswith('http:'):
673	docinfo['presentationUrl'] = presentation
674	else:
675	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
676
677	return docinfo
678
679	def getDocinfoFromBib(self, docinfo, bib):
680	"""reads contents of bib element into docinfo"""
681	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
682	# put all raw bib fields in dict "bib"
683	docinfo['bib'] = bib
684	bibtype = bib.get('@type', None)
685	docinfo['bibType'] = bibtype
686	# also store DC metadata for convenience
687	dc = self.metadataService.getDCMappedData(bib)
688	docinfo['creator'] = dc.get('creator','')
689	docinfo['title'] = dc.get('title','')
690	docinfo['date'] = dc.get('date','')
691	return docinfo
692
693	def getDocinfoFromAccess(self, docinfo, acc):
694	"""reads contents of access element into docinfo"""
695	#TODO: also read resource type
696	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
697	try:
698	acctype = acc['@attr']['type']
699	if acctype:
700	access=acctype
701	if access in ['group', 'institution']:
702	access = acc['name'].lower()
703
704	docinfo['accessType'] = access
705
706	except:
707	pass
708
709	return docinfo
710
711	def getDocinfoFromDigilib(self, docinfo, path):
712	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
713	# fetch data
714	txt = getHttpData(infoUrl)
715	if not txt:
716	logging.error("Unable to get dir-info from %s"%(infoUrl))
717	return docinfo
718
719	dom = ET.fromstring(txt)
720	size = getText(dom.find("size"))
721	logging.debug("getDocinfoFromDigilib: size=%s"%size)
722	if size:
723	docinfo['numPages'] = int(size)
724	else:
725	docinfo['numPages'] = 0
726
727	# TODO: produce and keep list of image names and numbers
728	return docinfo
729
730
731	def getDocinfoFromPresentationInfoXml(self,docinfo):
732	"""gets DC-like bibliographical information from the presentation entry in texttools"""
733	url = docinfo.get('presentationUrl', None)
734	if not url:
735	logging.error("getDocinfoFromPresentation: no URL!")
736	return docinfo
737
738	dom = None
739	metaUrl = None
740	if url.startswith("http://"):
741	# real URL
742	metaUrl = url
743	else:
744	# online path
745
746	server=self.digilibBaseUrl+"/servlet/Texter?fn="
747	metaUrl=server+url
748
749	txt=getHttpData(metaUrl)
750	if txt is None:
751	logging.error("Unable to read info.xml from %s"%(url))
752	return docinfo
753
754	dom = ET.fromstring(txt)
755	docinfo['creator']=getText(dom.find(".//author"))
756	docinfo['title']=getText(dom.find(".//title"))
757	docinfo['date']=getText(dom.find(".//date"))
758	return docinfo
759
760
761	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewLayer=None, tocMode=None):
762	"""returns pageinfo with the given parameters"""
763	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
764	pageinfo = {}
765	pageinfo['viewMode'] = viewMode
766	# split viewLayer if necessary
767	if isinstance(viewLayer,basestring):
768	viewLayer = viewLayer.split(',')
769
770	if isinstance(viewLayer, list):
771	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
772	# save (unique) list in viewLayers
773	seen = set()
774	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
775	pageinfo['viewLayers'] = viewLayers
776	# stringify viewLayer
777	viewLayer = ','.join(viewLayers)
778	else:
779	#create list
780	pageinfo['viewLayers'] = [viewLayer]
781
782	pageinfo['viewLayer'] = viewLayer
783	pageinfo['tocMode'] = tocMode
784
785	# TODO: unify current and pn!
786	current = getInt(current)
787	pageinfo['current'] = current
788	pageinfo['pn'] = current
789	rows = int(rows or self.thumbrows)
790	pageinfo['rows'] = rows
791	cols = int(cols or self.thumbcols)
792	pageinfo['cols'] = cols
793	grpsize = cols * rows
794	pageinfo['groupsize'] = grpsize
795	# is start is empty use one around current
796	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
797	# int(current / grpsize) * grpsize +1))
798	pageinfo['start'] = start
799	# get number of pages
800	np = int(docinfo.get('numPages', 0))
801	if np == 0:
802	# try numTextPages
803	np = docinfo.get('numTextPages', 0)
804	if np != 0:
805	docinfo['numPages'] = np
806
807	# cache table of contents
808	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
809	pageinfo['numgroups'] = int(np / grpsize)
810	if np % grpsize > 0:
811	pageinfo['numgroups'] += 1
812
813	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
814	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
815	# add zeroth page for two columns
816	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
817	pageinfo['pageZero'] = pageZero
818	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
819	# more page parameters
820	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
821	if docinfo.get('pageNumbers'):
822	# get original page numbers
823	pageNumber = docinfo['pageNumbers'].get(current, None)
824	if pageNumber is not None:
825	pageinfo['pageNumberOrig'] = pageNumber['no']
826	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
827
828	# cache search results
829	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
830	query = self.REQUEST.get('query',None)
831	pageinfo['query'] = query
832	if query:
833	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
834	pageinfo['queryType'] = queryType
835	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
836	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
837
838	# highlighting
839	highlightQuery = self.REQUEST.get('highlightQuery', None)
840	if highlightQuery:
841	pageinfo['highlightQuery'] = highlightQuery
842	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
843	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
844
845	return pageinfo
846
847
848	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
849	"""returns dict with array of page informations for one screenfull of thumbnails"""
850	batch = {}
851	grpsize = rows * cols
852	if maxIdx == 0:
853	maxIdx = start + grpsize
854
855	nb = int(math.ceil(maxIdx / float(grpsize)))
856	# list of all batch start and end points
857	batches = []
858	if pageZero:
859	ofs = 0
860	else:
861	ofs = 1
862
863	for i in range(nb):
864	s = i * grpsize + ofs
865	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
866	batches.append({'start':s, 'end':e})
867
868	batch['batches'] = batches
869
870	pages = []
871	if pageZero and start == 1:
872	# correct beginning
873	idx = 0
874	else:
875	idx = start
876
877	for r in range(rows):
878	row = []
879	for c in range(cols):
880	if idx < minIdx or idx > maxIdx:
881	page = {'idx':None}
882	else:
883	page = {'idx':idx}
884
885	idx += 1
886	if pageFlowLtr:
887	row.append(page)
888	else:
889	row.insert(0, page)
890
891	pages.append(row)
892
893	if start > 1:
894	batch['prevStart'] = max(start - grpsize, 1)
895	else:
896	batch['prevStart'] = None
897
898	if start + grpsize <= maxIdx:
899	batch['nextStart'] = start + grpsize
900	else:
901	batch['nextStart'] = None
902
903	batch['pages'] = pages
904	return batch
905
906	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
907	"""returns dict with information for one screenfull of data."""
908	batch = {}
909	if end == 0:
910	end = start + size
911
912	nb = int(math.ceil(end / float(size)))
913	# list of all batch start and end points
914	batches = []
915	for i in range(nb):
916	s = i * size + 1
917	e = min((i + 1) * size, end)
918	batches.append({'start':s, 'end':e})
919
920	batch['batches'] = batches
921	# list of elements in this batch
922	this = []
923	j = 0
924	for i in range(start, min(start+size, end+1)):
925	if data:
926	if fullData:
927	d = data.get(i, None)
928	else:
929	d = data.get(j, None)
930	j += 1
931
932	else:
933	d = i+1
934
935	this.append(d)
936
937	batch['this'] = this
938	if start > 1:
939	batch['prevStart'] = max(start - size, 1)
940	else:
941	batch['prevStart'] = None
942
943	if start + size < end:
944	batch['nextStart'] = start + size
945	else:
946	batch['nextStart'] = None
947
948	logging.debug("getBatch start=%s size=%s end=%s batch=%s"%(start,size,end,repr(batch)))
949	return batch
950
951
952	security.declareProtected('View management screens','changeDocumentViewerForm')
953	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
954
955	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
956	"""init document viewer"""
957	self.title=title
958	self.digilibBaseUrl = digilibBaseUrl
959	self.thumbrows = thumbrows
960	self.thumbcols = thumbcols
961	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
962	try:
963	# assume MetaDataFolder instance is called metadata
964	self.metadataService = getattr(self, 'metadata')
965	except Exception, e:
966	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
967
968	self.setAvailableLayers(availableLayers)
969
970	if RESPONSE is not None:
971	RESPONSE.redirect('manage_main')
972
973	def manage_AddDocumentViewerForm(self):
974	"""add the viewer form"""
975	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
976	return pt()
977
978	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
979	"""add the viewer"""
980	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
981	self._setObject(id,newObj)
982
983	if RESPONSE is not None:
984	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: