Context Navigation

source: documentViewer/documentViewer.py @ 528:f8a5f63eafc0

Last change on this file since 528:f8a5f63eafc0 was 528:f8a5f63eafc0, checked in by casties, 12 years ago
new viewMode=thumbs.
File size: 37.2 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
22
23	def serializeNode(node, encoding="utf-8"):
24	"""returns a string containing node as XML"""
25	s = ET.tostring(node)
26
27	# 4Suite:
28	# stream = cStringIO.StringIO()
29	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
30	# s = stream.getvalue()
31	# stream.close()
32	return s
33
34	def browserCheck(self):
35	"""check the browsers request to find out the browser type"""
36	bt = {}
37	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
38	bt['ua'] = ua
39	bt['isIE'] = False
40	bt['isN4'] = False
41	bt['versFirefox']=""
42	bt['versIE']=""
43	bt['versSafariChrome']=""
44	bt['versOpera']=""
45
46	if string.find(ua, 'MSIE') > -1:
47	bt['isIE'] = True
48	else:
49	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
50	# Safari oder Chrome identification
51	try:
52	nav = ua[string.find(ua, '('):]
53	nav1=ua[string.find(ua,')'):]
54	nav2=nav1[string.find(nav1,'('):]
55	nav3=nav2[string.find(nav2,')'):]
56	ie = string.split(nav, "; ")[1]
57	ie1 =string.split(nav1, " ")[2]
58	ie2 =string.split(nav3, " ")[1]
59	ie3 =string.split(nav3, " ")[2]
60	if string.find(ie3, "Safari") >-1:
61	bt['versSafariChrome']=string.split(ie2, "/")[1]
62	except: pass
63	# IE identification
64	try:
65	nav = ua[string.find(ua, '('):]
66	ie = string.split(nav, "; ")[1]
67	if string.find(ie, "MSIE") > -1:
68	bt['versIE'] = string.split(ie, " ")[1]
69	except:pass
70	# Firefox identification
71	try:
72	nav = ua[string.find(ua, '('):]
73	nav1=ua[string.find(ua,')'):]
74	if string.find(ie1, "Firefox") >-1:
75	nav5= string.split(ie1, "/")[1]
76	logging.debug("FIREFOX: %s"%(nav5))
77	bt['versFirefox']=nav5[0:3]
78	except:pass
79	#Opera identification
80	try:
81	if string.find(ua,"Opera") >-1:
82	nav = ua[string.find(ua, '('):]
83	nav1=nav[string.find(nav,')'):]
84	bt['versOpera']=string.split(nav1,"/")[2]
85	except:pass
86
87	bt['isMac'] = string.find(ua, 'Macintosh') > -1
88	bt['isWin'] = string.find(ua, 'Windows') > -1
89	bt['isIEWin'] = bt['isIE'] and bt['isWin']
90	bt['isIEMac'] = bt['isIE'] and bt['isMac']
91	bt['staticHTML'] = False
92
93	return bt
94
95	def getParentPath(path, cnt=1):
96	"""returns pathname shortened by cnt"""
97	# make sure path doesn't end with /
98	path = path.rstrip('/')
99	# split by /, shorten, and reassemble
100	return '/'.join(path.split('/')[0:-cnt])
101
102	##
103	## documentViewer class
104	##
105	class documentViewer(Folder):
106	"""document viewer"""
107	meta_type="Document viewer"
108
109	security=ClassSecurityInfo()
110	manage_options=Folder.manage_options+(
111	{'label':'Configuration','action':'changeDocumentViewerForm'},
112	)
113
114	metadataService = None
115	"""MetaDataFolder instance"""
116
117
118	#
119	# templates and forms
120	#
121	# viewMode templates
122	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
123	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
124	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
125	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
126	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
127	# available layer types
128	builtinLayers = {'text': ['dict','search','gis','annotator'],
129	'xml': None, 'images': None, 'index': None}
130	availableLayers = builtinLayers;
131	# layer templates
132	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
133	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
134	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
135	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
136	# toc templates
137	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
138	toc_text = PageTemplateFile('zpt/toc_text', globals())
139	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
140	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
141	toc_none = PageTemplateFile('zpt/toc_none', globals())
142	# other templates
143	common_template = PageTemplateFile('zpt/common_template', globals())
144	info_xml = PageTemplateFile('zpt/info_xml', globals())
145	docuviewer_css = ImageFile('css/docuviewer.css',globals())
146	# make ImageFile better for development
147	docuviewer_css.index_html = refreshingImageFileIndexHtml
148	jquery_js = ImageFile('js/jquery.js',globals())
149
150
151	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
152	"""init document viewer"""
153	self.id=id
154	self.title=title
155	self.thumbcols = thumbcols
156	self.thumbrows = thumbrows
157	# authgroups is list of authorized groups (delimited by ,)
158	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
159	# create template folder so we can always use template.something
160
161	templateFolder = Folder('template')
162	self['template'] = templateFolder # Zope-2.12 style
163	#self._setObject('template',templateFolder) # old style
164	try:
165	import MpdlXmlTextServer
166	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
167	templateFolder['fulltextclient'] = textServer
168	#templateFolder._setObject('fulltextclient',textServer)
169	except Exception, e:
170	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
171
172	try:
173	from Products.zogiLib.zogiLib import zogiLib
174	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
175	templateFolder['zogilib'] = zogilib
176	#templateFolder._setObject('zogilib',zogilib)
177	except Exception, e:
178	logging.error("Unable to create zogiLib for zogilib: "+str(e))
179
180	try:
181	# assume MetaDataFolder instance is called metadata
182	self.metadataService = getattr(self, 'metadata')
183	except Exception, e:
184	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
185
186	if digilibBaseUrl is not None:
187	self.digilibBaseUrl = digilibBaseUrl
188
189
190	# proxy text server methods to fulltextclient
191	def getTextPage(self, **args):
192	"""returns full text content of page"""
193	return self.template.fulltextclient.getTextPage(**args)
194
195	def getSearchResults(self, **args):
196	"""loads list of search results and stores XML in docinfo"""
197	return self.template.fulltextclient.getSearchResults(**args)
198
199	def getResultsPage(self, **args):
200	"""returns one page of the search results"""
201	return self.template.fulltextclient.getResultsPage(**args)
202
203	def getTextInfo(self, **args):
204	"""returns document info from the text server"""
205	return self.template.fulltextclient.getTextInfo(**args)
206
207	def getToc(self, **args):
208	"""loads table of contents and stores XML in docinfo"""
209	return self.template.fulltextclient.getToc(**args)
210
211	def getTocPage(self, **args):
212	"""returns one page of the table of contents"""
213	return self.template.fulltextclient.getTocPage(**args)
214
215	def getPlacesOnPage(self, **args):
216	"""get list of gis places on one page"""
217	return self.template.fulltextclient.getPlacesOnPage(**args)
218
219	#WTF?
220	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
221	security.declareProtected('View','thumbs_rss')
222	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
223	'''
224	view it
225	@param mode: defines how to access the document behind url
226	@param url: url which contains display information
227	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
228
229	'''
230	logging.debug("HHHHHHHHHHHHHH:load the rss")
231	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
232
233	if not hasattr(self, 'template'):
234	# create template folder if it doesn't exist
235	self.manage_addFolder('template')
236
237	if not self.digilibBaseUrl:
238	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
239
240	docinfo = self.getDocinfo(mode=mode,url=url)
241	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
242	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
243	''' ZDES '''
244	pt = getattr(self.template, 'thumbs_main_rss')
245
246	if viewMode=="auto": # automodus gewaehlt
247	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
248	viewMode="text"
249	else:
250	viewMode="images"
251
252	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
253
254
255	security.declareProtected('View','index_html')
256	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode="thumbs",start=1,pn=1):
257	"""
258	view page
259	@param url: url which contains display information
260	@param mode: defines how to access the document behind url
261	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
262	@param viewLayer: sub-type of viewMode, e.g. 'dict' for viewMode='text'
263	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
264	"""
265
266	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
267
268	if not hasattr(self, 'template'):
269	# this won't work
270	logging.error("template folder missing!")
271	return "ERROR: template folder missing!"
272
273	if not getattr(self, 'digilibBaseUrl', None):
274	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
275
276	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
277
278	# auto viewMode: text if there is a text else images
279	if viewMode=="auto":
280	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
281	viewMode = "text"
282	if viewLayer is None:
283	viewLayer = "dict"
284	else:
285	viewMode = "images"
286
287	elif viewMode == "text_dict":
288	# legacy fix
289	viewMode = "text"
290	viewLayer = "dict"
291
292	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
293
294	# get template /template/viewer_$viewMode
295	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
296	if pt is None:
297	logging.error("No template for viewMode=%s!"%viewMode)
298	# TODO: error page?
299	return "No template for viewMode=%s!"%viewMode
300
301	# and execute with parameters
302	return pt(docinfo=docinfo, pageinfo=pageinfo)
303
304	#WTF?
305	def generateMarks(self,mk):
306	ret=""
307	if mk is None:
308	return ""
309	if not isinstance(mk, list):
310	mk=[mk]
311	for m in mk:
312	ret+="mk=%s"%m
313	return ret
314
315
316	def getAvailableLayers(self):
317	"""returns dict with list of available layers per viewMode"""
318	return self.availableLayers
319
320	def getBrowser(self):
321	"""getBrowser the version of browser """
322	bt = browserCheck(self)
323	logging.debug("BROWSER VERSION: %s"%(bt))
324	return bt
325
326	def findDigilibUrl(self):
327	"""try to get the digilib URL from zogilib"""
328	url = self.template.zogilib.getDLBaseUrl()
329	return url
330
331	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
332	"""returns URL to digilib Scaler with params"""
333	url = None
334	if docinfo is not None:
335	url = docinfo.get('imageURL', None)
336
337	if url is None:
338	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
339	if fn is None and docinfo is not None:
340	fn = docinfo.get('imagePath','')
341
342	url += "fn=%s"%fn
343
344	if pn:
345	url += "&pn=%s"%pn
346
347	url += "&dw=%s&dh=%s"%(dw,dh)
348	return url
349
350	def getDocumentViewerURL(self):
351	"""returns the URL of this instance"""
352	return self.absolute_url()
353
354	def getStyle(self, idx, selected, style=""):
355	"""returns a string with the given style and append 'sel' if idx == selected."""
356	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
357	if idx == selected:
358	return style + 'sel'
359	else:
360	return style
361
362	def getParams(self, param=None, val=None, params=None, duplicates=None):
363	"""returns dict with URL parameters.
364
365	Takes URL parameters and additionally param=val or dict params.
366	Deletes key if value is None."""
367	# copy existing request params
368	newParams=self.REQUEST.form.copy()
369	# change single param
370	if param is not None:
371	if val is None:
372	if newParams.has_key(param):
373	del newParams[param]
374	else:
375	newParams[param] = str(val)
376
377	# change more params
378	if params is not None:
379	for (k, v) in params.items():
380	if v is None:
381	# val=None removes param
382	if newParams.has_key(k):
383	del newParams[k]
384
385	else:
386	newParams[k] = v
387
388	if duplicates:
389	# eliminate lists (coming from duplicate keys)
390	for (k,v) in newParams.items():
391	if isinstance(v, list):
392	if duplicates == 'comma':
393	# make comma-separated list of non-empty entries
394	newParams[k] = ','.join([t for t in v if t])
395	elif duplicates == 'first':
396	# take first non-empty entry
397	newParams[k] = [t for t in v if t][0]
398
399	return newParams
400
401	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
402	"""returns URL to documentviewer with parameter param set to val or from dict params"""
403	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
404	# quote values and assemble into query string (not escaping '/')
405	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
406	if baseUrl is None:
407	baseUrl = self.getDocumentViewerURL()
408
409	url = "%s?%s"%(baseUrl, ps)
410	return url
411
412	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
413	"""link to documentviewer with parameter param set to val"""
414	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
415
416
417	def setAvailableLayers(self, newLayerString=None):
418	"""sets availableLayers to newLayerString or tries to autodetect available layers.
419	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
420	newLayerString is parsed as JSON."""
421	if newLayerString is not None:
422	try:
423	layers = json.loads(newLayerString)
424	if 'text' in layers and 'images' in layers:
425	self.availableLayers = layers
426	return
427	except:
428	pass
429
430	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
431
432	# start with builtin layers
433	self.availableLayers = self.builtinLayers.copy()
434	# add layers from templates
435	for t in self.template:
436	if t.startswith('layer_'):
437	try:
438	(x, m, l) = t.split('_', 3)
439	if m not in self.availableLayers:
440	# mode m doesn't exist -> new list
441	self.availableLayers[m] = [l]
442
443	else:
444	# m exists -> append
445	if l not in self.availableLayers[m]:
446	self.availableLayers[m].append()
447
448	except:
449	pass
450
451	def getAvailableLayersJson(self):
452	"""returns available layers as JSON string."""
453	return json.dumps(self.availableLayers)
454
455
456	def getInfo_xml(self,url,mode):
457	"""returns info about the document as XML"""
458	if not self.digilibBaseUrl:
459	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
460
461	docinfo = self.getDocinfo(mode=mode,url=url)
462	pt = getattr(self.template, 'info_xml')
463	return pt(docinfo=docinfo)
464
465	def getAuthenticatedUser(self, anon=None):
466	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
467	user = getSecurityManager().getUser()
468	if user is not None and user.getUserName() != "Anonymous User":
469	return user
470	else:
471	return anon
472
473	def isAccessible(self, docinfo):
474	"""returns if access to the resource is granted"""
475	access = docinfo.get('accessType', None)
476	logging.debug("documentViewer (accessOK) access type %s"%access)
477	if access == 'free':
478	logging.debug("documentViewer (accessOK) access is free")
479	return True
480
481	elif access is None or access in self.authgroups:
482	# only local access -- only logged in users
483	user = self.getAuthenticatedUser()
484	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
485	return (user is not None)
486
487	logging.error("documentViewer (accessOK) unknown access type %s"%access)
488	return False
489
490
491	def getDocinfo(self, mode, url, tocMode=None):
492	"""returns docinfo depending on mode"""
493	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
494	# look for cached docinfo in session
495	if self.REQUEST.SESSION.has_key('docinfo'):
496	docinfo = self.REQUEST.SESSION['docinfo']
497	# check if its still current
498	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
499	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
500	return docinfo
501
502	# new docinfo
503	docinfo = {'mode': mode, 'url': url}
504	# add self url
505	docinfo['viewerUrl'] = self.getDocumentViewerURL()
506	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
507	# get index.meta DOM
508	docUrl = None
509	metaDom = None
510	if mode=="texttool":
511	# url points to document dir or index.meta
512	metaDom = self.metadataService.getDomFromPathOrUrl(url)
513	docUrl = url.replace('/index.meta', '')
514	if metaDom is None:
515	raise IOError("Unable to find index.meta for mode=texttool!")
516
517	elif mode=="imagepath":
518	# url points to folder with images, index.meta optional
519	# asssume index.meta in parent dir
520	docUrl = getParentPath(url)
521	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
522
523	elif mode=="filepath":
524	# url points to image file, index.meta optional
525	# asssume index.meta is two path segments up
526	docUrl = getParentPath(url, 2)
527	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
528
529	else:
530	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
531	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
532
533	docinfo['documentUrl'] = docUrl
534	# process index.meta contents
535	if metaDom is not None and metaDom.tag == 'resource':
536	# document directory name and path
537	resource = self.metadataService.getResourceData(dom=metaDom)
538	if resource:
539	docinfo = self.getDocinfoFromResource(docinfo, resource)
540
541	# texttool info
542	texttool = self.metadataService.getTexttoolData(dom=metaDom)
543	if texttool:
544	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
545	# document info (including toc) from full text
546	if docinfo.get('textURLPath', None):
547	docinfo = self.getTextInfo(mode=tocMode, docinfo=docinfo)
548
549	# bib info
550	bib = self.metadataService.getBibData(dom=metaDom)
551	if bib:
552	docinfo = self.getDocinfoFromBib(docinfo, bib)
553	else:
554	# no bib - try info.xml
555	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
556
557	# auth info
558	access = self.metadataService.getAccessData(dom=metaDom)
559	if access:
560	docinfo = self.getDocinfoFromAccess(docinfo, access)
561
562	# attribution info
563	attribution = self.metadataService.getAttributionData(dom=metaDom)
564	if attribution:
565	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
566	docinfo['attribution'] = attribution
567	#docinfo = self.getDocinfoFromAccess(docinfo, access)
568
569	# copyright info
570	copyright = self.metadataService.getCopyrightData(dom=metaDom)
571	if copyright:
572	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
573	docinfo['copyright'] = copyright
574	#docinfo = self.getDocinfoFromAccess(docinfo, access)
575
576	# image path
577	if mode != 'texttool':
578	# override image path from texttool with url TODO: how about mode=auto?
579	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
580
581	# number of images from digilib
582	if docinfo.get('imagePath', None):
583	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
584	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
585
586	# check numPages
587	if docinfo.get('numPages', 0) == 0:
588	if docinfo.get('numTextPages', 0) > 0:
589	# replace with numTextPages (text-only?)
590	docinfo['numPages'] = docinfo['numTextPages']
591
592	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
593	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
594	# store in session
595	self.REQUEST.SESSION['docinfo'] = docinfo
596	return docinfo
597
598
599	def getDocinfoFromResource(self, docinfo, resource):
600	"""reads contents of resource element into docinfo"""
601	docName = resource.get('name', None)
602	docinfo['documentName'] = docName
603	docPath = resource.get('archive-path', None)
604	if docPath:
605	# clean up document path
606	if docPath[0] != '/':
607	docPath = '/' + docPath
608
609	if docName and (not docPath.endswith(docName)):
610	docPath += "/" + docName
611
612	else:
613	# use docUrl as docPath
614	docUrl = docinfo['documentURL']
615	if not docUrl.startswith('http:'):
616	docPath = docUrl
617	if docPath:
618	# fix URLs starting with /mpiwg/online
619	docPath = docPath.replace('/mpiwg/online', '', 1)
620
621	docinfo['documentPath'] = docPath
622	return docinfo
623
624	def getDocinfoFromTexttool(self, docinfo, texttool):
625	"""reads contents of texttool element into docinfo"""
626	# image dir
627	imageDir = texttool.get('image', None)
628	docPath = docinfo.get('documentPath', None)
629	if imageDir and docPath:
630	#print "image: ", imageDir, " archivepath: ", archivePath
631	imageDir = os.path.join(docPath, imageDir)
632	imageDir = imageDir.replace('/mpiwg/online', '', 1)
633	docinfo['imagePath'] = imageDir
634
635	# old style text URL
636	textUrl = texttool.get('text', None)
637	if textUrl and docPath:
638	if urlparse.urlparse(textUrl)[0] == "": #keine url
639	textUrl = os.path.join(docPath, textUrl)
640
641	docinfo['textURL'] = textUrl
642
643	# new style text-url-path
644	textUrl = texttool.get('text-url-path', None)
645	if textUrl:
646	docinfo['textURLPath'] = textUrl
647
648	# page flow
649	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
650
651	# odd pages are left
652	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
653
654	# number of title page (0: not defined)
655	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
656
657	# old presentation stuff
658	presentation = texttool.get('presentation', None)
659	if presentation and docPath:
660	if presentation.startswith('http:'):
661	docinfo['presentationUrl'] = presentation
662	else:
663	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
664
665
666	return docinfo
667
668	def getDocinfoFromBib(self, docinfo, bib):
669	"""reads contents of bib element into docinfo"""
670	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
671	# put all raw bib fields in dict "bib"
672	docinfo['bib'] = bib
673	bibtype = bib.get('@type', None)
674	docinfo['bibType'] = bibtype
675	# also store DC metadata for convenience
676	dc = self.metadataService.getDCMappedData(bib)
677	docinfo['creator'] = dc.get('creator',None)
678	docinfo['title'] = dc.get('title',None)
679	docinfo['date'] = dc.get('date',None)
680	return docinfo
681
682	def getDocinfoFromAccess(self, docinfo, acc):
683	"""reads contents of access element into docinfo"""
684	#TODO: also read resource type
685	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
686	try:
687	acctype = acc['@attr']['type']
688	if acctype:
689	access=acctype
690	if access in ['group', 'institution']:
691	access = acc['name'].lower()
692
693	docinfo['accessType'] = access
694
695	except:
696	pass
697
698	return docinfo
699
700	def getDocinfoFromDigilib(self, docinfo, path):
701	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
702	# fetch data
703	txt = getHttpData(infoUrl)
704	if not txt:
705	logging.error("Unable to get dir-info from %s"%(infoUrl))
706	return docinfo
707
708	dom = ET.fromstring(txt)
709	size = getText(dom.find("size"))
710	logging.debug("getDocinfoFromDigilib: size=%s"%size)
711	if size:
712	docinfo['numPages'] = int(size)
713	else:
714	docinfo['numPages'] = 0
715
716	# TODO: produce and keep list of image names and numbers
717	return docinfo
718
719
720	def getDocinfoFromPresentationInfoXml(self,docinfo):
721	"""gets DC-like bibliographical information from the presentation entry in texttools"""
722	url = docinfo.get('presentationUrl', None)
723	if not url:
724	logging.error("getDocinfoFromPresentation: no URL!")
725	return docinfo
726
727	dom = None
728	metaUrl = None
729	if url.startswith("http://"):
730	# real URL
731	metaUrl = url
732	else:
733	# online path
734
735	server=self.digilibBaseUrl+"/servlet/Texter?fn="
736	metaUrl=server+url
737
738	txt=getHttpData(metaUrl)
739	if txt is None:
740	logging.error("Unable to read info.xml from %s"%(url))
741	return docinfo
742
743	dom = ET.fromstring(txt)
744	docinfo['creator']=getText(dom.find(".//author"))
745	docinfo['title']=getText(dom.find(".//title"))
746	docinfo['date']=getText(dom.find(".//date"))
747	return docinfo
748
749
750	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewLayer=None, tocMode=None):
751	"""returns pageinfo with the given parameters"""
752	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
753	pageinfo = {}
754	pageinfo['viewMode'] = viewMode
755	# split viewLayer if necessary
756	if isinstance(viewLayer,basestring):
757	viewLayer = viewLayer.split(',')
758
759	if isinstance(viewLayer, list):
760	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
761	# save (unique) list in viewLayers
762	seen = set()
763	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
764	pageinfo['viewLayers'] = viewLayers
765	# stringify viewLayer
766	viewLayer = ','.join(viewLayers)
767	else:
768	#create list
769	pageinfo['viewLayers'] = [viewLayer]
770
771	pageinfo['viewLayer'] = viewLayer
772	pageinfo['tocMode'] = tocMode
773
774	# TODO: unify current and pn!
775	current = getInt(current)
776	pageinfo['current'] = current
777	pageinfo['pn'] = current
778	rows = int(rows or self.thumbrows)
779	pageinfo['rows'] = rows
780	cols = int(cols or self.thumbcols)
781	pageinfo['cols'] = cols
782	grpsize = cols * rows
783	pageinfo['groupsize'] = grpsize
784	# is start is empty use one around current
785	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
786	# int(current / grpsize) * grpsize +1))
787	pageinfo['start'] = start
788	# get number of pages
789	np = int(docinfo.get('numPages', 0))
790	if np == 0:
791	# try numTextPages
792	np = docinfo.get('numTextPages', 0)
793	if np != 0:
794	docinfo['numPages'] = np
795
796	# cache table of contents
797	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
798	pageinfo['numgroups'] = int(np / grpsize)
799	if np % grpsize > 0:
800	pageinfo['numgroups'] += 1
801
802	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
803	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
804	# add zeroth page for two columns
805	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
806	pageinfo['pageZero'] = pageZero
807	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
808	# more page parameters
809	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
810	if docinfo.get('pageNumbers'):
811	# get original page numbers
812	pageNumber = docinfo['pageNumbers'].get(current, None)
813	if pageNumber is not None:
814	pageinfo['pageNumberOrig'] = pageNumber['no']
815	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
816
817	# cache search results
818	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
819	query = self.REQUEST.get('query',None)
820	pageinfo['query'] = query
821	if query:
822	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
823	pageinfo['queryType'] = queryType
824	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
825	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
826
827	# highlighting
828	highlightQuery = self.REQUEST.get('highlightQuery', None)
829	if highlightQuery:
830	pageinfo['highlightQuery'] = highlightQuery
831	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
832	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
833
834	return pageinfo
835
836
837	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
838	"""returns dict with array of page informations for one screenfull of thumbnails"""
839	batch = {}
840	grpsize = rows * cols
841	if maxIdx == 0:
842	maxIdx = start + grpsize
843
844	nb = int(math.ceil(maxIdx / float(grpsize)))
845	# list of all batch start and end points
846	batches = []
847	if pageZero:
848	ofs = 0
849	else:
850	ofs = 1
851
852	for i in range(nb):
853	s = i * grpsize + ofs
854	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
855	batches.append({'start':s, 'end':e})
856
857	batch['batches'] = batches
858
859	pages = []
860	if pageZero and start == 1:
861	# correct beginning
862	idx = 0
863	else:
864	idx = start
865
866	for r in range(rows):
867	row = []
868	for c in range(cols):
869	if idx < minIdx or idx > maxIdx:
870	page = {'idx':None}
871	else:
872	page = {'idx':idx}
873
874	idx += 1
875	if pageFlowLtr:
876	row.append(page)
877	else:
878	row.insert(0, page)
879
880	pages.append(row)
881
882	if start > 1:
883	batch['prevStart'] = max(start - grpsize, 1)
884	else:
885	batch['prevStart'] = None
886
887	if start + grpsize <= maxIdx:
888	batch['nextStart'] = start + grpsize
889	else:
890	batch['nextStart'] = None
891
892	batch['pages'] = pages
893	return batch
894
895	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
896	"""returns dict with information for one screenfull of data."""
897	batch = {}
898	if end == 0:
899	end = start + size
900
901	nb = int(math.ceil(end / float(size)))
902	# list of all batch start and end points
903	batches = []
904	for i in range(nb):
905	s = i * size + 1
906	e = min((i + 1) * size, end)
907	batches.append({'start':s, 'end':e})
908
909	batch['batches'] = batches
910	# list of elements in this batch
911	this = []
912	j = 0
913	for i in range(start, min(start+size, end+1)):
914	if data:
915	if fullData:
916	d = data.get(i, None)
917	else:
918	d = data.get(j, None)
919	j += 1
920
921	else:
922	d = i+1
923
924	this.append(d)
925
926	batch['this'] = this
927	if start > 1:
928	batch['prevStart'] = max(start - size, 1)
929	else:
930	batch['prevStart'] = None
931
932	if start + size < end:
933	batch['nextStart'] = start + size
934	else:
935	batch['nextStart'] = None
936
937	logging.debug("getBatch start=%s size=%s end=%s batch=%s"%(start,size,end,repr(batch)))
938	return batch
939
940
941	security.declareProtected('View management screens','changeDocumentViewerForm')
942	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
943
944	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
945	"""init document viewer"""
946	self.title=title
947	self.digilibBaseUrl = digilibBaseUrl
948	self.thumbrows = thumbrows
949	self.thumbcols = thumbcols
950	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
951	try:
952	# assume MetaDataFolder instance is called metadata
953	self.metadataService = getattr(self, 'metadata')
954	except Exception, e:
955	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
956
957	self.setAvailableLayers(availableLayers)
958
959	if RESPONSE is not None:
960	RESPONSE.redirect('manage_main')
961
962	def manage_AddDocumentViewerForm(self):
963	"""add the viewer form"""
964	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
965	return pt()
966
967	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
968	"""add the viewer"""
969	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
970	self._setObject(id,newObj)
971
972	if RESPONSE is not None:
973	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: