Context Navigation

source: documentViewer/documentViewer.py @ 532:0b8bed1223ad

Last change on this file since 532:0b8bed1223ad was 532:0b8bed1223ad, checked in by casties, 12 years ago
new layout and styles.
File size: 37.7 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
22
23	def serializeNode(node, encoding="utf-8"):
24	"""returns a string containing node as XML"""
25	s = ET.tostring(node)
26
27	# 4Suite:
28	# stream = cStringIO.StringIO()
29	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
30	# s = stream.getvalue()
31	# stream.close()
32	return s
33
34	def browserCheck(self):
35	"""check the browsers request to find out the browser type"""
36	bt = {}
37	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
38	bt['ua'] = ua
39	bt['isIE'] = False
40	bt['isN4'] = False
41	bt['versFirefox']=""
42	bt['versIE']=""
43	bt['versSafariChrome']=""
44	bt['versOpera']=""
45
46	if string.find(ua, 'MSIE') > -1:
47	bt['isIE'] = True
48	else:
49	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
50	# Safari oder Chrome identification
51	try:
52	nav = ua[string.find(ua, '('):]
53	nav1=ua[string.find(ua,')'):]
54	nav2=nav1[string.find(nav1,'('):]
55	nav3=nav2[string.find(nav2,')'):]
56	ie = string.split(nav, "; ")[1]
57	ie1 =string.split(nav1, " ")[2]
58	ie2 =string.split(nav3, " ")[1]
59	ie3 =string.split(nav3, " ")[2]
60	if string.find(ie3, "Safari") >-1:
61	bt['versSafariChrome']=string.split(ie2, "/")[1]
62	except: pass
63	# IE identification
64	try:
65	nav = ua[string.find(ua, '('):]
66	ie = string.split(nav, "; ")[1]
67	if string.find(ie, "MSIE") > -1:
68	bt['versIE'] = string.split(ie, " ")[1]
69	except:pass
70	# Firefox identification
71	try:
72	nav = ua[string.find(ua, '('):]
73	nav1=ua[string.find(ua,')'):]
74	if string.find(ie1, "Firefox") >-1:
75	nav5= string.split(ie1, "/")[1]
76	logging.debug("FIREFOX: %s"%(nav5))
77	bt['versFirefox']=nav5[0:3]
78	except:pass
79	#Opera identification
80	try:
81	if string.find(ua,"Opera") >-1:
82	nav = ua[string.find(ua, '('):]
83	nav1=nav[string.find(nav,')'):]
84	bt['versOpera']=string.split(nav1,"/")[2]
85	except:pass
86
87	bt['isMac'] = string.find(ua, 'Macintosh') > -1
88	bt['isWin'] = string.find(ua, 'Windows') > -1
89	bt['isIEWin'] = bt['isIE'] and bt['isWin']
90	bt['isIEMac'] = bt['isIE'] and bt['isMac']
91	bt['staticHTML'] = False
92
93	return bt
94
95	def getParentPath(path, cnt=1):
96	"""returns pathname shortened by cnt"""
97	# make sure path doesn't end with /
98	path = path.rstrip('/')
99	# split by /, shorten, and reassemble
100	return '/'.join(path.split('/')[0:-cnt])
101
102	##
103	## documentViewer class
104	##
105	class documentViewer(Folder):
106	"""document viewer"""
107	meta_type="Document viewer"
108
109	security=ClassSecurityInfo()
110	manage_options=Folder.manage_options+(
111	{'label':'Configuration','action':'changeDocumentViewerForm'},
112	)
113
114	metadataService = None
115	"""MetaDataFolder instance"""
116
117
118	#
119	# templates and forms
120	#
121	# viewMode templates
122	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
123	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
124	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
125	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
126	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
127	# available layer types
128	builtinLayers = {'text': ['dict','search','gis','annotator'],
129	'xml': None, 'images': None, 'index': None}
130	availableLayers = builtinLayers;
131	# layer templates
132	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
133	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
134	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
135	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
136	# toc templates
137	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
138	toc_text = PageTemplateFile('zpt/toc_text', globals())
139	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
140	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
141	toc_none = PageTemplateFile('zpt/toc_none', globals())
142	# other templates
143	common_template = PageTemplateFile('zpt/common_template', globals())
144	info_xml = PageTemplateFile('zpt/info_xml', globals())
145	docuviewer_css = ImageFile('css/docuviewer.css',globals())
146	# make docuviewer_css refreshable for development
147	docuviewer_css.index_html = refreshingImageFileIndexHtml
148	jquery_js = ImageFile('js/jquery.js',globals())
149
150
151	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
152	"""init document viewer"""
153	self.id=id
154	self.title=title
155	self.thumbcols = thumbcols
156	self.thumbrows = thumbrows
157	# authgroups is list of authorized groups (delimited by ,)
158	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
159	# create template folder so we can always use template.something
160
161	templateFolder = Folder('template')
162	self['template'] = templateFolder # Zope-2.12 style
163	#self._setObject('template',templateFolder) # old style
164	try:
165	import MpdlXmlTextServer
166	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
167	templateFolder['fulltextclient'] = textServer
168	#templateFolder._setObject('fulltextclient',textServer)
169	except Exception, e:
170	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
171
172	try:
173	from Products.zogiLib.zogiLib import zogiLib
174	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
175	templateFolder['zogilib'] = zogilib
176	#templateFolder._setObject('zogilib',zogilib)
177	except Exception, e:
178	logging.error("Unable to create zogiLib for zogilib: "+str(e))
179
180	try:
181	# assume MetaDataFolder instance is called metadata
182	self.metadataService = getattr(self, 'metadata')
183	except Exception, e:
184	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
185
186	if digilibBaseUrl is not None:
187	self.digilibBaseUrl = digilibBaseUrl
188
189
190	# proxy text server methods to fulltextclient
191	def getTextPage(self, **args):
192	"""returns full text content of page"""
193	return self.template.fulltextclient.getTextPage(**args)
194
195	def getSearchResults(self, **args):
196	"""loads list of search results and stores XML in docinfo"""
197	return self.template.fulltextclient.getSearchResults(**args)
198
199	def getResultsPage(self, **args):
200	"""returns one page of the search results"""
201	return self.template.fulltextclient.getResultsPage(**args)
202
203	def getTextInfo(self, **args):
204	"""returns document info from the text server"""
205	return self.template.fulltextclient.getTextInfo(**args)
206
207	def getToc(self, **args):
208	"""loads table of contents and stores XML in docinfo"""
209	return self.template.fulltextclient.getToc(**args)
210
211	def getTocPage(self, **args):
212	"""returns one page of the table of contents"""
213	return self.template.fulltextclient.getTocPage(**args)
214
215	def getPlacesOnPage(self, **args):
216	"""get list of gis places on one page"""
217	return self.template.fulltextclient.getPlacesOnPage(**args)
218
219	#WTF?
220	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
221	security.declareProtected('View','thumbs_rss')
222	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
223	'''
224	view it
225	@param mode: defines how to access the document behind url
226	@param url: url which contains display information
227	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
228
229	'''
230	logging.debug("HHHHHHHHHHHHHH:load the rss")
231	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
232
233	if not hasattr(self, 'template'):
234	# create template folder if it doesn't exist
235	self.manage_addFolder('template')
236
237	if not self.digilibBaseUrl:
238	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
239
240	docinfo = self.getDocinfo(mode=mode,url=url)
241	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
242	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
243	''' ZDES '''
244	pt = getattr(self.template, 'thumbs_main_rss')
245
246	if viewMode=="auto": # automodus gewaehlt
247	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
248	viewMode="text"
249	else:
250	viewMode="images"
251
252	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
253
254
255	security.declareProtected('View','index_html')
256	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode="thumbs",start=1,pn=1):
257	"""
258	view page
259	@param url: url which contains display information
260	@param mode: defines how to access the document behind url
261	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
262	@param viewLayer: sub-type of viewMode, e.g. 'dict' for viewMode='text'
263	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
264	"""
265
266	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
267
268	if not hasattr(self, 'template'):
269	# this won't work
270	logging.error("template folder missing!")
271	return "ERROR: template folder missing!"
272
273	if not getattr(self, 'digilibBaseUrl', None):
274	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
275
276	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
277
278	# auto viewMode: text if there is a text else images
279	if viewMode=="auto":
280	if docinfo.get('textURLPath', None):
281	# docinfo.get('textURL', None) not implemented yet
282	viewMode = "text"
283	if viewLayer is None:
284	viewLayer = "dict"
285	else:
286	viewMode = "images"
287
288	elif viewMode == "text_dict":
289	# legacy fix
290	viewMode = "text"
291	viewLayer = "dict"
292
293	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
294
295	# get template /template/viewer_$viewMode
296	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
297	if pt is None:
298	logging.error("No template for viewMode=%s!"%viewMode)
299	# TODO: error page?
300	return "No template for viewMode=%s!"%viewMode
301
302	# and execute with parameters
303	return pt(docinfo=docinfo, pageinfo=pageinfo)
304
305	#WTF?
306	def generateMarks(self,mk):
307	ret=""
308	if mk is None:
309	return ""
310	if not isinstance(mk, list):
311	mk=[mk]
312	for m in mk:
313	ret+="mk=%s"%m
314	return ret
315
316
317	def getAvailableLayers(self):
318	"""returns dict with list of available layers per viewMode"""
319	return self.availableLayers
320
321	def getBrowser(self):
322	"""getBrowser the version of browser """
323	bt = browserCheck(self)
324	logging.debug("BROWSER VERSION: %s"%(bt))
325	return bt
326
327	def findDigilibUrl(self):
328	"""try to get the digilib URL from zogilib"""
329	url = self.template.zogilib.getDLBaseUrl()
330	return url
331
332	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
333	"""returns URL to digilib Scaler with params"""
334	url = None
335	if docinfo is not None:
336	url = docinfo.get('imageURL', None)
337
338	if url is None:
339	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
340	if fn is None and docinfo is not None:
341	fn = docinfo.get('imagePath','')
342
343	url += "fn=%s"%fn
344
345	if pn:
346	url += "&pn=%s"%pn
347
348	url += "&dw=%s&dh=%s"%(dw,dh)
349	return url
350
351	def getDocumentViewerURL(self):
352	"""returns the URL of this instance"""
353	return self.absolute_url()
354
355	def getStyle(self, idx, selected, style=""):
356	"""returns a string with the given style and append 'sel' if idx == selected."""
357	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
358	if idx == selected:
359	return style + 'sel'
360	else:
361	return style
362
363	def getParams(self, param=None, val=None, params=None, duplicates=None):
364	"""returns dict with URL parameters.
365
366	Takes URL parameters and additionally param=val or dict params.
367	Deletes key if value is None."""
368	# copy existing request params
369	newParams=self.REQUEST.form.copy()
370	# change single param
371	if param is not None:
372	if val is None:
373	if newParams.has_key(param):
374	del newParams[param]
375	else:
376	newParams[param] = str(val)
377
378	# change more params
379	if params is not None:
380	for (k, v) in params.items():
381	if v is None:
382	# val=None removes param
383	if newParams.has_key(k):
384	del newParams[k]
385
386	else:
387	newParams[k] = v
388
389	if duplicates:
390	# eliminate lists (coming from duplicate keys)
391	for (k,v) in newParams.items():
392	if isinstance(v, list):
393	if duplicates == 'comma':
394	# make comma-separated list of non-empty entries
395	newParams[k] = ','.join([t for t in v if t])
396	elif duplicates == 'first':
397	# take first non-empty entry
398	newParams[k] = [t for t in v if t][0]
399
400	return newParams
401
402	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
403	"""returns URL to documentviewer with parameter param set to val or from dict params"""
404	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
405	# quote values and assemble into query string (not escaping '/')
406	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
407	if baseUrl is None:
408	baseUrl = self.getDocumentViewerURL()
409
410	url = "%s?%s"%(baseUrl, ps)
411	return url
412
413	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
414	"""link to documentviewer with parameter param set to val"""
415	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
416
417
418	def setAvailableLayers(self, newLayerString=None):
419	"""sets availableLayers to newLayerString or tries to autodetect available layers.
420	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
421	newLayerString is parsed as JSON."""
422	if newLayerString is not None:
423	try:
424	layers = json.loads(newLayerString)
425	if 'text' in layers and 'images' in layers:
426	self.availableLayers = layers
427	return
428	except:
429	pass
430
431	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
432
433	# start with builtin layers
434	self.availableLayers = self.builtinLayers.copy()
435	# add layers from templates
436	for t in self.template:
437	if t.startswith('layer_'):
438	try:
439	(x, m, l) = t.split('_', 3)
440	if m not in self.availableLayers:
441	# mode m doesn't exist -> new list
442	self.availableLayers[m] = [l]
443
444	else:
445	# m exists -> append
446	if l not in self.availableLayers[m]:
447	self.availableLayers[m].append()
448
449	except:
450	pass
451
452	def getAvailableLayersJson(self):
453	"""returns available layers as JSON string."""
454	return json.dumps(self.availableLayers)
455
456
457	def getInfo_xml(self,url,mode):
458	"""returns info about the document as XML"""
459	if not self.digilibBaseUrl:
460	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
461
462	docinfo = self.getDocinfo(mode=mode,url=url)
463	pt = getattr(self.template, 'info_xml')
464	return pt(docinfo=docinfo)
465
466	def getAuthenticatedUser(self, anon=None):
467	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
468	user = getSecurityManager().getUser()
469	if user is not None and user.getUserName() != "Anonymous User":
470	return user
471	else:
472	return anon
473
474	def isAccessible(self, docinfo):
475	"""returns if access to the resource is granted"""
476	access = docinfo.get('accessType', None)
477	logging.debug("documentViewer (accessOK) access type %s"%access)
478	if access == 'free':
479	logging.debug("documentViewer (accessOK) access is free")
480	return True
481
482	elif access is None or access in self.authgroups:
483	# only local access -- only logged in users
484	user = self.getAuthenticatedUser()
485	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
486	return (user is not None)
487
488	logging.error("documentViewer (accessOK) unknown access type %s"%access)
489	return False
490
491
492	def getDocinfo(self, mode, url, tocMode=None):
493	"""returns docinfo depending on mode"""
494	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
495	# look for cached docinfo in session
496	if self.REQUEST.SESSION.has_key('docinfo'):
497	docinfo = self.REQUEST.SESSION['docinfo']
498	# check if its still current
499	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
500	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
501	return docinfo
502
503	# new docinfo
504	docinfo = {'mode': mode, 'url': url}
505	# add self url
506	docinfo['viewerUrl'] = self.getDocumentViewerURL()
507	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
508	# get index.meta DOM
509	docUrl = None
510	metaDom = None
511	if mode=="texttool":
512	# url points to document dir or index.meta
513	metaDom = self.metadataService.getDomFromPathOrUrl(url)
514	docUrl = url.replace('/index.meta', '')
515	if metaDom is None:
516	raise IOError("Unable to find index.meta for mode=texttool!")
517
518	elif mode=="imagepath":
519	# url points to folder with images, index.meta optional
520	# asssume index.meta in parent dir
521	docUrl = getParentPath(url)
522	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
523
524	elif mode=="filepath":
525	# url points to image file, index.meta optional
526	# asssume index.meta is two path segments up
527	docUrl = getParentPath(url, 2)
528	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
529
530	else:
531	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
532	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
533
534	docinfo['documentUrl'] = docUrl
535	# process index.meta contents
536	if metaDom is not None and metaDom.tag == 'resource':
537	# document directory name and path
538	resource = self.metadataService.getResourceData(dom=metaDom)
539	if resource:
540	docinfo = self.getDocinfoFromResource(docinfo, resource)
541
542	# texttool info
543	texttool = self.metadataService.getTexttoolData(dom=metaDom)
544	if texttool:
545	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
546	# document info (including toc) from full text
547	if docinfo.get('textURLPath', None):
548	docinfo = self.getTextInfo(mode=tocMode, docinfo=docinfo)
549
550	# bib info
551	bib = self.metadataService.getBibData(dom=metaDom)
552	if bib:
553	docinfo = self.getDocinfoFromBib(docinfo, bib)
554	else:
555	# no bib - try info.xml
556	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
557
558	# auth info
559	access = self.metadataService.getAccessData(dom=metaDom)
560	if access:
561	docinfo = self.getDocinfoFromAccess(docinfo, access)
562
563	# attribution info
564	attribution = self.metadataService.getAttributionData(dom=metaDom)
565	if attribution:
566	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
567	docinfo['attribution'] = attribution
568	#docinfo = self.getDocinfoFromAccess(docinfo, access)
569
570	# copyright info
571	copyright = self.metadataService.getCopyrightData(dom=metaDom)
572	if copyright:
573	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
574	docinfo['copyright'] = copyright
575	#docinfo = self.getDocinfoFromAccess(docinfo, access)
576
577	# image path
578	if mode != 'texttool':
579	# override image path from texttool with url parameter TODO: how about mode=auto?
580	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
581
582	# number of images from digilib
583	if docinfo.get('imagePath', None):
584	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
585	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
586	else:
587	# imagePath still missing? try "./pageimg"
588	imgPath = os.path.join(docUrl, 'pageimg')
589	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
590	if docinfo.get('numPages', 0) > 0:
591	# there are pages
592	docinfo['imagePath'] = imgPath
593	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
594
595	# check numPages
596	if docinfo.get('numPages', 0) == 0:
597	if docinfo.get('numTextPages', 0) > 0:
598	# replace with numTextPages (text-only?)
599	docinfo['numPages'] = docinfo['numTextPages']
600
601	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
602	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
603	# store in session
604	self.REQUEST.SESSION['docinfo'] = docinfo
605	return docinfo
606
607
608	def getDocinfoFromResource(self, docinfo, resource):
609	"""reads contents of resource element into docinfo"""
610	docName = resource.get('name', None)
611	docinfo['documentName'] = docName
612	docPath = resource.get('archive-path', None)
613	if docPath:
614	# clean up document path
615	if docPath[0] != '/':
616	docPath = '/' + docPath
617
618	if docName and (not docPath.endswith(docName)):
619	docPath += "/" + docName
620
621	else:
622	# use docUrl as docPath
623	docUrl = docinfo['documentURL']
624	if not docUrl.startswith('http:'):
625	docPath = docUrl
626	if docPath:
627	# fix URLs starting with /mpiwg/online
628	docPath = docPath.replace('/mpiwg/online', '', 1)
629
630	docinfo['documentPath'] = docPath
631	return docinfo
632
633	def getDocinfoFromTexttool(self, docinfo, texttool):
634	"""reads contents of texttool element into docinfo"""
635	# image dir
636	imageDir = texttool.get('image', None)
637	docPath = docinfo.get('documentPath', None)
638	if imageDir and docPath:
639	#print "image: ", imageDir, " archivepath: ", archivePath
640	imageDir = os.path.join(docPath, imageDir)
641	imageDir = imageDir.replace('/mpiwg/online', '', 1)
642	docinfo['imagePath'] = imageDir
643
644	# old style text URL
645	textUrl = texttool.get('text', None)
646	if textUrl and docPath:
647	if urlparse.urlparse(textUrl)[0] == "": #keine url
648	textUrl = os.path.join(docPath, textUrl)
649
650	docinfo['textURL'] = textUrl
651
652	# new style text-url-path
653	textUrl = texttool.get('text-url-path', None)
654	if textUrl:
655	docinfo['textURLPath'] = textUrl
656
657	# page flow
658	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
659
660	# odd pages are left
661	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
662
663	# number of title page (0: not defined)
664	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
665
666	# old presentation stuff
667	presentation = texttool.get('presentation', None)
668	if presentation and docPath:
669	if presentation.startswith('http:'):
670	docinfo['presentationUrl'] = presentation
671	else:
672	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
673
674	return docinfo
675
676	def getDocinfoFromBib(self, docinfo, bib):
677	"""reads contents of bib element into docinfo"""
678	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
679	# put all raw bib fields in dict "bib"
680	docinfo['bib'] = bib
681	bibtype = bib.get('@type', None)
682	docinfo['bibType'] = bibtype
683	# also store DC metadata for convenience
684	dc = self.metadataService.getDCMappedData(bib)
685	docinfo['creator'] = dc.get('creator',None)
686	docinfo['title'] = dc.get('title',None)
687	docinfo['date'] = dc.get('date',None)
688	return docinfo
689
690	def getDocinfoFromAccess(self, docinfo, acc):
691	"""reads contents of access element into docinfo"""
692	#TODO: also read resource type
693	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
694	try:
695	acctype = acc['@attr']['type']
696	if acctype:
697	access=acctype
698	if access in ['group', 'institution']:
699	access = acc['name'].lower()
700
701	docinfo['accessType'] = access
702
703	except:
704	pass
705
706	return docinfo
707
708	def getDocinfoFromDigilib(self, docinfo, path):
709	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
710	# fetch data
711	txt = getHttpData(infoUrl)
712	if not txt:
713	logging.error("Unable to get dir-info from %s"%(infoUrl))
714	return docinfo
715
716	dom = ET.fromstring(txt)
717	size = getText(dom.find("size"))
718	logging.debug("getDocinfoFromDigilib: size=%s"%size)
719	if size:
720	docinfo['numPages'] = int(size)
721	else:
722	docinfo['numPages'] = 0
723
724	# TODO: produce and keep list of image names and numbers
725	return docinfo
726
727
728	def getDocinfoFromPresentationInfoXml(self,docinfo):
729	"""gets DC-like bibliographical information from the presentation entry in texttools"""
730	url = docinfo.get('presentationUrl', None)
731	if not url:
732	logging.error("getDocinfoFromPresentation: no URL!")
733	return docinfo
734
735	dom = None
736	metaUrl = None
737	if url.startswith("http://"):
738	# real URL
739	metaUrl = url
740	else:
741	# online path
742
743	server=self.digilibBaseUrl+"/servlet/Texter?fn="
744	metaUrl=server+url
745
746	txt=getHttpData(metaUrl)
747	if txt is None:
748	logging.error("Unable to read info.xml from %s"%(url))
749	return docinfo
750
751	dom = ET.fromstring(txt)
752	docinfo['creator']=getText(dom.find(".//author"))
753	docinfo['title']=getText(dom.find(".//title"))
754	docinfo['date']=getText(dom.find(".//date"))
755	return docinfo
756
757
758	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewLayer=None, tocMode=None):
759	"""returns pageinfo with the given parameters"""
760	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
761	pageinfo = {}
762	pageinfo['viewMode'] = viewMode
763	# split viewLayer if necessary
764	if isinstance(viewLayer,basestring):
765	viewLayer = viewLayer.split(',')
766
767	if isinstance(viewLayer, list):
768	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
769	# save (unique) list in viewLayers
770	seen = set()
771	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
772	pageinfo['viewLayers'] = viewLayers
773	# stringify viewLayer
774	viewLayer = ','.join(viewLayers)
775	else:
776	#create list
777	pageinfo['viewLayers'] = [viewLayer]
778
779	pageinfo['viewLayer'] = viewLayer
780	pageinfo['tocMode'] = tocMode
781
782	# TODO: unify current and pn!
783	current = getInt(current)
784	pageinfo['current'] = current
785	pageinfo['pn'] = current
786	rows = int(rows or self.thumbrows)
787	pageinfo['rows'] = rows
788	cols = int(cols or self.thumbcols)
789	pageinfo['cols'] = cols
790	grpsize = cols * rows
791	pageinfo['groupsize'] = grpsize
792	# is start is empty use one around current
793	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
794	# int(current / grpsize) * grpsize +1))
795	pageinfo['start'] = start
796	# get number of pages
797	np = int(docinfo.get('numPages', 0))
798	if np == 0:
799	# try numTextPages
800	np = docinfo.get('numTextPages', 0)
801	if np != 0:
802	docinfo['numPages'] = np
803
804	# cache table of contents
805	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
806	pageinfo['numgroups'] = int(np / grpsize)
807	if np % grpsize > 0:
808	pageinfo['numgroups'] += 1
809
810	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
811	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
812	# add zeroth page for two columns
813	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
814	pageinfo['pageZero'] = pageZero
815	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
816	# more page parameters
817	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
818	if docinfo.get('pageNumbers'):
819	# get original page numbers
820	pageNumber = docinfo['pageNumbers'].get(current, None)
821	if pageNumber is not None:
822	pageinfo['pageNumberOrig'] = pageNumber['no']
823	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
824
825	# cache search results
826	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
827	query = self.REQUEST.get('query',None)
828	pageinfo['query'] = query
829	if query:
830	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
831	pageinfo['queryType'] = queryType
832	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
833	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
834
835	# highlighting
836	highlightQuery = self.REQUEST.get('highlightQuery', None)
837	if highlightQuery:
838	pageinfo['highlightQuery'] = highlightQuery
839	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
840	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
841
842	return pageinfo
843
844
845	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
846	"""returns dict with array of page informations for one screenfull of thumbnails"""
847	batch = {}
848	grpsize = rows * cols
849	if maxIdx == 0:
850	maxIdx = start + grpsize
851
852	nb = int(math.ceil(maxIdx / float(grpsize)))
853	# list of all batch start and end points
854	batches = []
855	if pageZero:
856	ofs = 0
857	else:
858	ofs = 1
859
860	for i in range(nb):
861	s = i * grpsize + ofs
862	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
863	batches.append({'start':s, 'end':e})
864
865	batch['batches'] = batches
866
867	pages = []
868	if pageZero and start == 1:
869	# correct beginning
870	idx = 0
871	else:
872	idx = start
873
874	for r in range(rows):
875	row = []
876	for c in range(cols):
877	if idx < minIdx or idx > maxIdx:
878	page = {'idx':None}
879	else:
880	page = {'idx':idx}
881
882	idx += 1
883	if pageFlowLtr:
884	row.append(page)
885	else:
886	row.insert(0, page)
887
888	pages.append(row)
889
890	if start > 1:
891	batch['prevStart'] = max(start - grpsize, 1)
892	else:
893	batch['prevStart'] = None
894
895	if start + grpsize <= maxIdx:
896	batch['nextStart'] = start + grpsize
897	else:
898	batch['nextStart'] = None
899
900	batch['pages'] = pages
901	return batch
902
903	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
904	"""returns dict with information for one screenfull of data."""
905	batch = {}
906	if end == 0:
907	end = start + size
908
909	nb = int(math.ceil(end / float(size)))
910	# list of all batch start and end points
911	batches = []
912	for i in range(nb):
913	s = i * size + 1
914	e = min((i + 1) * size, end)
915	batches.append({'start':s, 'end':e})
916
917	batch['batches'] = batches
918	# list of elements in this batch
919	this = []
920	j = 0
921	for i in range(start, min(start+size, end+1)):
922	if data:
923	if fullData:
924	d = data.get(i, None)
925	else:
926	d = data.get(j, None)
927	j += 1
928
929	else:
930	d = i+1
931
932	this.append(d)
933
934	batch['this'] = this
935	if start > 1:
936	batch['prevStart'] = max(start - size, 1)
937	else:
938	batch['prevStart'] = None
939
940	if start + size < end:
941	batch['nextStart'] = start + size
942	else:
943	batch['nextStart'] = None
944
945	logging.debug("getBatch start=%s size=%s end=%s batch=%s"%(start,size,end,repr(batch)))
946	return batch
947
948
949	security.declareProtected('View management screens','changeDocumentViewerForm')
950	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
951
952	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
953	"""init document viewer"""
954	self.title=title
955	self.digilibBaseUrl = digilibBaseUrl
956	self.thumbrows = thumbrows
957	self.thumbcols = thumbcols
958	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
959	try:
960	# assume MetaDataFolder instance is called metadata
961	self.metadataService = getattr(self, 'metadata')
962	except Exception, e:
963	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
964
965	self.setAvailableLayers(availableLayers)
966
967	if RESPONSE is not None:
968	RESPONSE.redirect('manage_main')
969
970	def manage_AddDocumentViewerForm(self):
971	"""add the viewer form"""
972	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
973	return pt()
974
975	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
976	"""add the viewer"""
977	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
978	self._setObject(id,newObj)
979
980	if RESPONSE is not None:
981	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: