Context Navigation

source: documentViewer/documentViewer.py @ 541:c4cc01b104d7

Last change on this file since 541:c4cc01b104d7 was 541:c4cc01b104d7, checked in by casties, 12 years ago
better metadata display for index page.
File size: 38.3 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def browserCheck(self):
37	"""check the browsers request to find out the browser type"""
38	bt = {}
39	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
40	bt['ua'] = ua
41	bt['isIE'] = False
42	bt['isN4'] = False
43	bt['versFirefox']=""
44	bt['versIE']=""
45	bt['versSafariChrome']=""
46	bt['versOpera']=""
47
48	if string.find(ua, 'MSIE') > -1:
49	bt['isIE'] = True
50	else:
51	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
52	# Safari oder Chrome identification
53	try:
54	nav = ua[string.find(ua, '('):]
55	nav1=ua[string.find(ua,')'):]
56	nav2=nav1[string.find(nav1,'('):]
57	nav3=nav2[string.find(nav2,')'):]
58	ie = string.split(nav, "; ")[1]
59	ie1 =string.split(nav1, " ")[2]
60	ie2 =string.split(nav3, " ")[1]
61	ie3 =string.split(nav3, " ")[2]
62	if string.find(ie3, "Safari") >-1:
63	bt['versSafariChrome']=string.split(ie2, "/")[1]
64	except: pass
65	# IE identification
66	try:
67	nav = ua[string.find(ua, '('):]
68	ie = string.split(nav, "; ")[1]
69	if string.find(ie, "MSIE") > -1:
70	bt['versIE'] = string.split(ie, " ")[1]
71	except:pass
72	# Firefox identification
73	try:
74	nav = ua[string.find(ua, '('):]
75	nav1=ua[string.find(ua,')'):]
76	if string.find(ie1, "Firefox") >-1:
77	nav5= string.split(ie1, "/")[1]
78	logging.debug("FIREFOX: %s"%(nav5))
79	bt['versFirefox']=nav5[0:3]
80	except:pass
81	#Opera identification
82	try:
83	if string.find(ua,"Opera") >-1:
84	nav = ua[string.find(ua, '('):]
85	nav1=nav[string.find(nav,')'):]
86	bt['versOpera']=string.split(nav1,"/")[2]
87	except:pass
88
89	bt['isMac'] = string.find(ua, 'Macintosh') > -1
90	bt['isWin'] = string.find(ua, 'Windows') > -1
91	bt['isIEWin'] = bt['isIE'] and bt['isWin']
92	bt['isIEMac'] = bt['isIE'] and bt['isMac']
93	bt['staticHTML'] = False
94
95	return bt
96
97	def getParentPath(path, cnt=1):
98	"""returns pathname shortened by cnt"""
99	# make sure path doesn't end with /
100	path = path.rstrip('/')
101	# split by /, shorten, and reassemble
102	return '/'.join(path.split('/')[0:-cnt])
103
104	##
105	## documentViewer class
106	##
107	class documentViewer(Folder):
108	"""document viewer"""
109	meta_type="Document viewer"
110
111	security=ClassSecurityInfo()
112	manage_options=Folder.manage_options+(
113	{'label':'Configuration','action':'changeDocumentViewerForm'},
114	)
115
116	metadataService = None
117	"""MetaDataFolder instance"""
118
119
120	#
121	# templates and forms
122	#
123	# viewMode templates
124	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
125	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
126	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
127	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
128	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
129	# available layer types
130	builtinLayers = {'text': ['dict','search','gis','annotator'],
131	'xml': None, 'images': None, 'index': None}
132	availableLayers = builtinLayers;
133	# layer templates
134	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
135	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
136	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
137	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
138	# toc templates
139	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
140	toc_text = PageTemplateFile('zpt/toc_text', globals())
141	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
142	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
143	toc_none = PageTemplateFile('zpt/toc_none', globals())
144	# other templates
145	common_template = PageTemplateFile('zpt/common_template', globals())
146	info_xml = PageTemplateFile('zpt/info_xml', globals())
147	docuviewer_css = ImageFile('css/docuviewer.css',globals())
148	# make docuviewer_css refreshable for development
149	docuviewer_css.index_html = refreshingImageFileIndexHtml
150	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
151	# make docuviewer_ie_css refreshable for development
152	docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
153	jquery_js = ImageFile('js/jquery.js',globals())
154
155
156	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
157	"""init document viewer"""
158	self.id=id
159	self.title=title
160	self.thumbcols = thumbcols
161	self.thumbrows = thumbrows
162	# authgroups is list of authorized groups (delimited by ,)
163	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
164	# create template folder so we can always use template.something
165
166	templateFolder = Folder('template')
167	self['template'] = templateFolder # Zope-2.12 style
168	#self._setObject('template',templateFolder) # old style
169	try:
170	import MpdlXmlTextServer
171	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
172	templateFolder['fulltextclient'] = textServer
173	#templateFolder._setObject('fulltextclient',textServer)
174	except Exception, e:
175	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
176
177	try:
178	from Products.zogiLib.zogiLib import zogiLib
179	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
180	templateFolder['zogilib'] = zogilib
181	#templateFolder._setObject('zogilib',zogilib)
182	except Exception, e:
183	logging.error("Unable to create zogiLib for zogilib: "+str(e))
184
185	try:
186	# assume MetaDataFolder instance is called metadata
187	self.metadataService = getattr(self, 'metadata')
188	except Exception, e:
189	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
190
191	if digilibBaseUrl is not None:
192	self.digilibBaseUrl = digilibBaseUrl
193
194
195	# proxy text server methods to fulltextclient
196	def getTextPage(self, **args):
197	"""returns full text content of page"""
198	return self.template.fulltextclient.getTextPage(**args)
199
200	def getSearchResults(self, **args):
201	"""loads list of search results and stores XML in docinfo"""
202	return self.template.fulltextclient.getSearchResults(**args)
203
204	def getResultsPage(self, **args):
205	"""returns one page of the search results"""
206	return self.template.fulltextclient.getResultsPage(**args)
207
208	def getTextInfo(self, **args):
209	"""returns document info from the text server"""
210	return self.template.fulltextclient.getTextInfo(**args)
211
212	def getToc(self, **args):
213	"""loads table of contents and stores XML in docinfo"""
214	return self.template.fulltextclient.getToc(**args)
215
216	def getTocPage(self, **args):
217	"""returns one page of the table of contents"""
218	return self.template.fulltextclient.getTocPage(**args)
219
220	def getPlacesOnPage(self, **args):
221	"""get list of gis places on one page"""
222	return self.template.fulltextclient.getPlacesOnPage(**args)
223
224	#WTF?
225	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
226	security.declareProtected('View','thumbs_rss')
227	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
228	'''
229	view it
230	@param mode: defines how to access the document behind url
231	@param url: url which contains display information
232	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
233
234	'''
235	logging.debug("HHHHHHHHHHHHHH:load the rss")
236	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
237
238	if not hasattr(self, 'template'):
239	# create template folder if it doesn't exist
240	self.manage_addFolder('template')
241
242	if not self.digilibBaseUrl:
243	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
244
245	docinfo = self.getDocinfo(mode=mode,url=url)
246	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
247	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
248	''' ZDES '''
249	pt = getattr(self.template, 'thumbs_main_rss')
250
251	if viewMode=="auto": # automodus gewaehlt
252	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
253	viewMode="text"
254	else:
255	viewMode="images"
256
257	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
258
259
260	security.declareProtected('View','index_html')
261	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode="thumbs",start=1,pn=1):
262	"""
263	view page
264	@param url: url which contains display information
265	@param mode: defines how to access the document behind url
266	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
267	@param viewLayer: sub-type of viewMode, e.g. 'dict' for viewMode='text'
268	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
269	"""
270
271	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
272
273	if not hasattr(self, 'template'):
274	# this won't work
275	logging.error("template folder missing!")
276	return "ERROR: template folder missing!"
277
278	if not getattr(self, 'digilibBaseUrl', None):
279	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
280
281	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
282
283	# auto viewMode: text if there is a text else images
284	if viewMode=="auto":
285	if docinfo.get('textURLPath', None):
286	# docinfo.get('textURL', None) not implemented yet
287	viewMode = "text"
288	if viewLayer is None:
289	viewLayer = "dict"
290	else:
291	viewMode = "images"
292
293	elif viewMode == "text_dict":
294	# legacy fix
295	viewMode = "text"
296	viewLayer = "dict"
297
298	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
299
300	# get template /template/viewer_$viewMode
301	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
302	if pt is None:
303	logging.error("No template for viewMode=%s!"%viewMode)
304	# TODO: error page?
305	return "No template for viewMode=%s!"%viewMode
306
307	# and execute with parameters
308	return pt(docinfo=docinfo, pageinfo=pageinfo)
309
310	#WTF?
311	def generateMarks(self,mk):
312	ret=""
313	if mk is None:
314	return ""
315	if not isinstance(mk, list):
316	mk=[mk]
317	for m in mk:
318	ret+="mk=%s"%m
319	return ret
320
321
322	def getAvailableLayers(self):
323	"""returns dict with list of available layers per viewMode"""
324	return self.availableLayers
325
326	def getBrowser(self):
327	"""getBrowser the version of browser """
328	bt = browserCheck(self)
329	logging.debug("BROWSER VERSION: %s"%(bt))
330	return bt
331
332	def findDigilibUrl(self):
333	"""try to get the digilib URL from zogilib"""
334	url = self.template.zogilib.getDLBaseUrl()
335	return url
336
337	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
338	"""returns URL to digilib Scaler with params"""
339	url = None
340	if docinfo is not None:
341	url = docinfo.get('imageURL', None)
342
343	if url is None:
344	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
345	if fn is None and docinfo is not None:
346	fn = docinfo.get('imagePath','')
347
348	url += "fn=%s"%fn
349
350	if pn:
351	url += "&pn=%s"%pn
352
353	url += "&dw=%s&dh=%s"%(dw,dh)
354	return url
355
356	def getDocumentViewerURL(self):
357	"""returns the URL of this instance"""
358	return self.absolute_url()
359
360	def getStyle(self, idx, selected, style=""):
361	"""returns a string with the given style and append 'sel' if idx == selected."""
362	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
363	if idx == selected:
364	return style + 'sel'
365	else:
366	return style
367
368	def getParams(self, param=None, val=None, params=None, duplicates=None):
369	"""returns dict with URL parameters.
370
371	Takes URL parameters and additionally param=val or dict params.
372	Deletes key if value is None."""
373	# copy existing request params
374	newParams=self.REQUEST.form.copy()
375	# change single param
376	if param is not None:
377	if val is None:
378	if newParams.has_key(param):
379	del newParams[param]
380	else:
381	newParams[param] = str(val)
382
383	# change more params
384	if params is not None:
385	for (k, v) in params.items():
386	if v is None:
387	# val=None removes param
388	if newParams.has_key(k):
389	del newParams[k]
390
391	else:
392	newParams[k] = v
393
394	if duplicates:
395	# eliminate lists (coming from duplicate keys)
396	for (k,v) in newParams.items():
397	if isinstance(v, list):
398	if duplicates == 'comma':
399	# make comma-separated list of non-empty entries
400	newParams[k] = ','.join([t for t in v if t])
401	elif duplicates == 'first':
402	# take first non-empty entry
403	newParams[k] = [t for t in v if t][0]
404
405	return newParams
406
407	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
408	"""returns URL to documentviewer with parameter param set to val or from dict params"""
409	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
410	# quote values and assemble into query string (not escaping '/')
411	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
412	if baseUrl is None:
413	baseUrl = self.getDocumentViewerURL()
414
415	url = "%s?%s"%(baseUrl, ps)
416	return url
417
418	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
419	"""link to documentviewer with parameter param set to val"""
420	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
421
422
423	def setAvailableLayers(self, newLayerString=None):
424	"""sets availableLayers to newLayerString or tries to autodetect available layers.
425	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
426	newLayerString is parsed as JSON."""
427	if newLayerString is not None:
428	try:
429	layers = json.loads(newLayerString)
430	if 'text' in layers and 'images' in layers:
431	self.availableLayers = layers
432	return
433	except:
434	pass
435
436	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
437
438	# start with builtin layers
439	self.availableLayers = self.builtinLayers.copy()
440	# add layers from templates
441	for t in self.template:
442	if t.startswith('layer_'):
443	try:
444	(x, m, l) = t.split('_', 3)
445	if m not in self.availableLayers:
446	# mode m doesn't exist -> new list
447	self.availableLayers[m] = [l]
448
449	else:
450	# m exists -> append
451	if l not in self.availableLayers[m]:
452	self.availableLayers[m].append()
453
454	except:
455	pass
456
457	def getAvailableLayersJson(self):
458	"""returns available layers as JSON string."""
459	return json.dumps(self.availableLayers)
460
461
462	def getInfo_xml(self,url,mode):
463	"""returns info about the document as XML"""
464	if not self.digilibBaseUrl:
465	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
466
467	docinfo = self.getDocinfo(mode=mode,url=url)
468	pt = getattr(self.template, 'info_xml')
469	return pt(docinfo=docinfo)
470
471	def getAuthenticatedUser(self, anon=None):
472	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
473	user = getSecurityManager().getUser()
474	if user is not None and user.getUserName() != "Anonymous User":
475	return user
476	else:
477	return anon
478
479	def isAccessible(self, docinfo):
480	"""returns if access to the resource is granted"""
481	access = docinfo.get('accessType', None)
482	logging.debug("documentViewer (accessOK) access type %s"%access)
483	if access == 'free':
484	logging.debug("documentViewer (accessOK) access is free")
485	return True
486
487	elif access is None or access in self.authgroups:
488	# only local access -- only logged in users
489	user = self.getAuthenticatedUser()
490	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
491	return (user is not None)
492
493	logging.error("documentViewer (accessOK) unknown access type %s"%access)
494	return False
495
496
497	def getDocinfo(self, mode, url, tocMode=None):
498	"""returns docinfo depending on mode"""
499	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
500	# look for cached docinfo in session
501	if self.REQUEST.SESSION.has_key('docinfo'):
502	docinfo = self.REQUEST.SESSION['docinfo']
503	# check if its still current
504	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
505	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
506	return docinfo
507
508	# new docinfo
509	docinfo = {'mode': mode, 'url': url}
510	# add self url
511	docinfo['viewerUrl'] = self.getDocumentViewerURL()
512	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
513	# get index.meta DOM
514	docUrl = None
515	metaDom = None
516	if mode=="texttool":
517	# url points to document dir or index.meta
518	metaDom = self.metadataService.getDomFromPathOrUrl(url)
519	docUrl = url.replace('/index.meta', '')
520	if metaDom is None:
521	raise IOError("Unable to find index.meta for mode=texttool!")
522
523	elif mode=="imagepath":
524	# url points to folder with images, index.meta optional
525	# asssume index.meta in parent dir
526	docUrl = getParentPath(url)
527	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
528
529	elif mode=="filepath":
530	# url points to image file, index.meta optional
531	# asssume index.meta is two path segments up
532	docUrl = getParentPath(url, 2)
533	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
534
535	else:
536	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
537	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
538
539	docinfo['documentUrl'] = docUrl
540	# process index.meta contents
541	if metaDom is not None and metaDom.tag == 'resource':
542	# document directory name and path
543	resource = self.metadataService.getResourceData(dom=metaDom)
544	if resource:
545	docinfo = self.getDocinfoFromResource(docinfo, resource)
546
547	# texttool info
548	texttool = self.metadataService.getTexttoolData(dom=metaDom)
549	if texttool:
550	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
551	# document info (including toc) from full text
552	if docinfo.get('textURLPath', None):
553	docinfo = self.getTextInfo(mode=tocMode, docinfo=docinfo)
554
555	# bib info
556	bib = self.metadataService.getBibData(dom=metaDom)
557	if bib:
558	# save extended version as 'bibx'
559	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
560	if len(bibx) == 1:
561	# unwrap list if possible
562	bibx = bibx[0]
563
564	docinfo['bibx'] = bibx
565	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
566	else:
567	# no bib - try info.xml
568	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
569
570	# auth info
571	access = self.metadataService.getAccessData(dom=metaDom)
572	if access:
573	docinfo = self.getDocinfoFromAccess(docinfo, access)
574
575	# attribution info
576	attribution = self.metadataService.getAttributionData(dom=metaDom)
577	if attribution:
578	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
579	docinfo['attribution'] = attribution
580
581	# copyright info
582	copyright = self.metadataService.getCopyrightData(dom=metaDom)
583	if copyright:
584	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
585	docinfo['copyright'] = copyright
586
587	# DRI (permanent ID)
588	dri = self.metadataService.getDRI(dom=metaDom, type='escidoc-test')
589	if dri:
590	logging.debug("getDRI: dri=%s"%repr(dri))
591	docinfo['DRI'] = dri
592
593	# image path
594	if mode != 'texttool':
595	# override image path from texttool with url parameter TODO: how about mode=auto?
596	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
597
598	# number of images from digilib
599	if docinfo.get('imagePath', None):
600	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
601	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
602	else:
603	# imagePath still missing? try "./pageimg"
604	imgPath = os.path.join(docUrl, 'pageimg')
605	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
606	if docinfo.get('numPages', 0) > 0:
607	# there are pages
608	docinfo['imagePath'] = imgPath
609	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
610
611	# check numPages
612	if docinfo.get('numPages', 0) == 0:
613	if docinfo.get('numTextPages', 0) > 0:
614	# replace with numTextPages (text-only?)
615	docinfo['numPages'] = docinfo['numTextPages']
616
617	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
618	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
619	# store in session
620	self.REQUEST.SESSION['docinfo'] = docinfo
621	return docinfo
622
623
624	def getDocinfoFromResource(self, docinfo, resource):
625	"""reads contents of resource element into docinfo"""
626	docName = resource.get('name', None)
627	docinfo['documentName'] = docName
628	docPath = resource.get('archive-path', None)
629	if docPath:
630	# clean up document path
631	if docPath[0] != '/':
632	docPath = '/' + docPath
633
634	if docName and (not docPath.endswith(docName)):
635	docPath += "/" + docName
636
637	else:
638	# use docUrl as docPath
639	docUrl = docinfo['documentURL']
640	if not docUrl.startswith('http:'):
641	docPath = docUrl
642	if docPath:
643	# fix URLs starting with /mpiwg/online
644	docPath = docPath.replace('/mpiwg/online', '', 1)
645
646	docinfo['documentPath'] = docPath
647	return docinfo
648
649	def getDocinfoFromTexttool(self, docinfo, texttool):
650	"""reads contents of texttool element into docinfo"""
651	# image dir
652	imageDir = texttool.get('image', None)
653	docPath = docinfo.get('documentPath', None)
654	if imageDir and docPath:
655	#print "image: ", imageDir, " archivepath: ", archivePath
656	imageDir = os.path.join(docPath, imageDir)
657	imageDir = imageDir.replace('/mpiwg/online', '', 1)
658	docinfo['imagePath'] = imageDir
659
660	# old style text URL
661	textUrl = texttool.get('text', None)
662	if textUrl and docPath:
663	if urlparse.urlparse(textUrl)[0] == "": #keine url
664	textUrl = os.path.join(docPath, textUrl)
665
666	docinfo['textURL'] = textUrl
667
668	# new style text-url-path
669	textUrl = texttool.get('text-url-path', None)
670	if textUrl:
671	docinfo['textURLPath'] = textUrl
672
673	# page flow
674	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
675
676	# odd pages are left
677	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
678
679	# number of title page (default 1)
680	docinfo['titlePage'] = texttool.get('title-scan-no', 1)
681
682	# old presentation stuff
683	presentation = texttool.get('presentation', None)
684	if presentation and docPath:
685	if presentation.startswith('http:'):
686	docinfo['presentationUrl'] = presentation
687	else:
688	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
689
690	return docinfo
691
692	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
693	"""reads contents of bib element into docinfo"""
694	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
695	# put all raw bib fields in dict "bib"
696	docinfo['bib'] = bib
697	bibtype = bib.get('@type', None)
698	docinfo['bibType'] = bibtype
699	# also store DC metadata for convenience
700	dc = self.metadataService.getDCMappedData(bib)
701	docinfo['creator'] = dc.get('creator','')
702	docinfo['title'] = dc.get('title','')
703	docinfo['date'] = dc.get('date','')
704	return docinfo
705
706	def getDocinfoFromAccess(self, docinfo, acc):
707	"""reads contents of access element into docinfo"""
708	#TODO: also read resource type
709	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
710	try:
711	acctype = acc['@attr']['type']
712	if acctype:
713	access=acctype
714	if access in ['group', 'institution']:
715	access = acc['name'].lower()
716
717	docinfo['accessType'] = access
718
719	except:
720	pass
721
722	return docinfo
723
724	def getDocinfoFromDigilib(self, docinfo, path):
725	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
726	# fetch data
727	txt = getHttpData(infoUrl)
728	if not txt:
729	logging.error("Unable to get dir-info from %s"%(infoUrl))
730	return docinfo
731
732	dom = ET.fromstring(txt)
733	size = getText(dom.find("size"))
734	logging.debug("getDocinfoFromDigilib: size=%s"%size)
735	if size:
736	docinfo['numPages'] = int(size)
737	else:
738	docinfo['numPages'] = 0
739
740	# TODO: produce and keep list of image names and numbers
741	return docinfo
742
743
744	def getDocinfoFromPresentationInfoXml(self,docinfo):
745	"""gets DC-like bibliographical information from the presentation entry in texttools"""
746	url = docinfo.get('presentationUrl', None)
747	if not url:
748	logging.error("getDocinfoFromPresentation: no URL!")
749	return docinfo
750
751	dom = None
752	metaUrl = None
753	if url.startswith("http://"):
754	# real URL
755	metaUrl = url
756	else:
757	# online path
758
759	server=self.digilibBaseUrl+"/servlet/Texter?fn="
760	metaUrl=server+url
761
762	txt=getHttpData(metaUrl)
763	if txt is None:
764	logging.error("Unable to read info.xml from %s"%(url))
765	return docinfo
766
767	dom = ET.fromstring(txt)
768	docinfo['creator']=getText(dom.find(".//author"))
769	docinfo['title']=getText(dom.find(".//title"))
770	docinfo['date']=getText(dom.find(".//date"))
771	return docinfo
772
773
774	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewLayer=None, tocMode=None):
775	"""returns pageinfo with the given parameters"""
776	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
777	pageinfo = {}
778	pageinfo['viewMode'] = viewMode
779	# split viewLayer if necessary
780	if isinstance(viewLayer,basestring):
781	viewLayer = viewLayer.split(',')
782
783	if isinstance(viewLayer, list):
784	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
785	# save (unique) list in viewLayers
786	seen = set()
787	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
788	pageinfo['viewLayers'] = viewLayers
789	# stringify viewLayer
790	viewLayer = ','.join(viewLayers)
791	else:
792	#create list
793	pageinfo['viewLayers'] = [viewLayer]
794
795	pageinfo['viewLayer'] = viewLayer
796	pageinfo['tocMode'] = tocMode
797
798	# TODO: unify current and pn!
799	current = getInt(current)
800	pageinfo['current'] = current
801	pageinfo['pn'] = current
802	rows = int(rows or self.thumbrows)
803	pageinfo['rows'] = rows
804	cols = int(cols or self.thumbcols)
805	pageinfo['cols'] = cols
806	grpsize = cols * rows
807	pageinfo['groupsize'] = grpsize
808	# is start is empty use one around current
809	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
810	# int(current / grpsize) * grpsize +1))
811	pageinfo['start'] = start
812	# get number of pages
813	np = int(docinfo.get('numPages', 0))
814	if np == 0:
815	# try numTextPages
816	np = docinfo.get('numTextPages', 0)
817	if np != 0:
818	docinfo['numPages'] = np
819
820	# cache table of contents
821	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
822	pageinfo['numgroups'] = int(np / grpsize)
823	if np % grpsize > 0:
824	pageinfo['numgroups'] += 1
825
826	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
827	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
828	# add zeroth page for two columns
829	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
830	pageinfo['pageZero'] = pageZero
831	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
832	# more page parameters
833	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
834	if docinfo.get('pageNumbers'):
835	# get original page numbers
836	pageNumber = docinfo['pageNumbers'].get(current, None)
837	if pageNumber is not None:
838	pageinfo['pageNumberOrig'] = pageNumber['no']
839	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
840
841	# cache search results
842	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
843	query = self.REQUEST.get('query',None)
844	pageinfo['query'] = query
845	if query:
846	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
847	pageinfo['queryType'] = queryType
848	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
849	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
850
851	# highlighting
852	highlightQuery = self.REQUEST.get('highlightQuery', None)
853	if highlightQuery:
854	pageinfo['highlightQuery'] = highlightQuery
855	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
856	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
857
858	return pageinfo
859
860
861	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
862	"""returns dict with array of page informations for one screenfull of thumbnails"""
863	batch = {}
864	grpsize = rows * cols
865	if maxIdx == 0:
866	maxIdx = start + grpsize
867
868	nb = int(math.ceil(maxIdx / float(grpsize)))
869	# list of all batch start and end points
870	batches = []
871	if pageZero:
872	ofs = 0
873	else:
874	ofs = 1
875
876	for i in range(nb):
877	s = i * grpsize + ofs
878	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
879	batches.append({'start':s, 'end':e})
880
881	batch['batches'] = batches
882
883	pages = []
884	if pageZero and start == 1:
885	# correct beginning
886	idx = 0
887	else:
888	idx = start
889
890	for r in range(rows):
891	row = []
892	for c in range(cols):
893	if idx < minIdx or idx > maxIdx:
894	page = {'idx':None}
895	else:
896	page = {'idx':idx}
897
898	idx += 1
899	if pageFlowLtr:
900	row.append(page)
901	else:
902	row.insert(0, page)
903
904	pages.append(row)
905
906	if start > 1:
907	batch['prevStart'] = max(start - grpsize, 1)
908	else:
909	batch['prevStart'] = None
910
911	if start + grpsize <= maxIdx:
912	batch['nextStart'] = start + grpsize
913	else:
914	batch['nextStart'] = None
915
916	batch['pages'] = pages
917	return batch
918
919	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
920	"""returns dict with information for one screenfull of data."""
921	batch = {}
922	if end == 0:
923	end = start + size
924
925	nb = int(math.ceil(end / float(size)))
926	# list of all batch start and end points
927	batches = []
928	for i in range(nb):
929	s = i * size + 1
930	e = min((i + 1) * size, end)
931	batches.append({'start':s, 'end':e})
932
933	batch['batches'] = batches
934	# list of elements in this batch
935	this = []
936	j = 0
937	for i in range(start, min(start+size, end+1)):
938	if data:
939	if fullData:
940	d = data.get(i, None)
941	else:
942	d = data.get(j, None)
943	j += 1
944
945	else:
946	d = i+1
947
948	this.append(d)
949
950	batch['this'] = this
951	if start > 1:
952	batch['prevStart'] = max(start - size, 1)
953	else:
954	batch['prevStart'] = None
955
956	if start + size < end:
957	batch['nextStart'] = start + size
958	else:
959	batch['nextStart'] = None
960
961	logging.debug("getBatch start=%s size=%s end=%s batch=%s"%(start,size,end,repr(batch)))
962	return batch
963
964
965	security.declareProtected('View management screens','changeDocumentViewerForm')
966	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
967
968	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
969	"""init document viewer"""
970	self.title=title
971	self.digilibBaseUrl = digilibBaseUrl
972	self.thumbrows = thumbrows
973	self.thumbcols = thumbcols
974	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
975	try:
976	# assume MetaDataFolder instance is called metadata
977	self.metadataService = getattr(self, 'metadata')
978	except Exception, e:
979	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
980
981	self.setAvailableLayers(availableLayers)
982
983	if RESPONSE is not None:
984	RESPONSE.redirect('manage_main')
985
986	def manage_AddDocumentViewerForm(self):
987	"""add the viewer form"""
988	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
989	return pt()
990
991	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
992	"""add the viewer"""
993	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
994	self._setObject(id,newObj)
995
996	if RESPONSE is not None:
997	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: