Context Navigation

source: documentViewer/documentViewer.py @ 525:70c3ae5eac7c

Last change on this file since 525:70c3ae5eac7c was 525:70c3ae5eac7c, checked in by casties, 12 years ago
layers can have their own templates. first version of annotations layer.
File size: 35.1 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19
20	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
21
22	def serializeNode(node, encoding="utf-8"):
23	"""returns a string containing node as XML"""
24	s = ET.tostring(node)
25
26	# 4Suite:
27	# stream = cStringIO.StringIO()
28	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
29	# s = stream.getvalue()
30	# stream.close()
31	return s
32
33	def browserCheck(self):
34	"""check the browsers request to find out the browser type"""
35	bt = {}
36	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
37	bt['ua'] = ua
38	bt['isIE'] = False
39	bt['isN4'] = False
40	bt['versFirefox']=""
41	bt['versIE']=""
42	bt['versSafariChrome']=""
43	bt['versOpera']=""
44
45	if string.find(ua, 'MSIE') > -1:
46	bt['isIE'] = True
47	else:
48	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
49	# Safari oder Chrome identification
50	try:
51	nav = ua[string.find(ua, '('):]
52	nav1=ua[string.find(ua,')'):]
53	nav2=nav1[string.find(nav1,'('):]
54	nav3=nav2[string.find(nav2,')'):]
55	ie = string.split(nav, "; ")[1]
56	ie1 =string.split(nav1, " ")[2]
57	ie2 =string.split(nav3, " ")[1]
58	ie3 =string.split(nav3, " ")[2]
59	if string.find(ie3, "Safari") >-1:
60	bt['versSafariChrome']=string.split(ie2, "/")[1]
61	except: pass
62	# IE identification
63	try:
64	nav = ua[string.find(ua, '('):]
65	ie = string.split(nav, "; ")[1]
66	if string.find(ie, "MSIE") > -1:
67	bt['versIE'] = string.split(ie, " ")[1]
68	except:pass
69	# Firefox identification
70	try:
71	nav = ua[string.find(ua, '('):]
72	nav1=ua[string.find(ua,')'):]
73	if string.find(ie1, "Firefox") >-1:
74	nav5= string.split(ie1, "/")[1]
75	logging.debug("FIREFOX: %s"%(nav5))
76	bt['versFirefox']=nav5[0:3]
77	except:pass
78	#Opera identification
79	try:
80	if string.find(ua,"Opera") >-1:
81	nav = ua[string.find(ua, '('):]
82	nav1=nav[string.find(nav,')'):]
83	bt['versOpera']=string.split(nav1,"/")[2]
84	except:pass
85
86	bt['isMac'] = string.find(ua, 'Macintosh') > -1
87	bt['isWin'] = string.find(ua, 'Windows') > -1
88	bt['isIEWin'] = bt['isIE'] and bt['isWin']
89	bt['isIEMac'] = bt['isIE'] and bt['isMac']
90	bt['staticHTML'] = False
91
92	return bt
93
94	def getParentPath(path, cnt=1):
95	"""returns pathname shortened by cnt"""
96	# make sure path doesn't end with /
97	path = path.rstrip('/')
98	# split by /, shorten, and reassemble
99	return '/'.join(path.split('/')[0:-cnt])
100
101	##
102	## documentViewer class
103	##
104	class documentViewer(Folder):
105	"""document viewer"""
106	meta_type="Document viewer"
107
108	security=ClassSecurityInfo()
109	manage_options=Folder.manage_options+(
110	{'label':'Configuration','action':'changeDocumentViewerForm'},
111	)
112
113	metadataService = None
114	"""MetaDataFolder instance"""
115
116
117	#
118	# templates and forms
119	#
120	# viewMode templates
121	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
122	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
123	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
124	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
125	# available layer types
126	availableLayers = {'text': ['dict','search','gis','annotator'],
127	'xml': None, 'images': None, 'index': None}
128	# layer templates
129	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
130	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
131	# toc templates
132	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
133	toc_text = PageTemplateFile('zpt/toc_text', globals())
134	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
135	toc_none = PageTemplateFile('zpt/toc_none', globals())
136	# other templates
137	common_template = PageTemplateFile('zpt/common_template', globals())
138	search_template = PageTemplateFile('zpt/search_template', globals())
139	info_xml = PageTemplateFile('zpt/info_xml', globals())
140	docuviewer_css = ImageFile('css/docuviewer.css',globals())
141	# make ImageFile better for development
142	docuviewer_css.index_html = refreshingImageFileIndexHtml
143	jquery_js = ImageFile('js/jquery.js',globals())
144
145
146	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
147	"""init document viewer"""
148	self.id=id
149	self.title=title
150	self.thumbcols = thumbcols
151	self.thumbrows = thumbrows
152	# authgroups is list of authorized groups (delimited by ,)
153	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
154	# create template folder so we can always use template.something
155
156	templateFolder = Folder('template')
157	self['template'] = templateFolder # Zope-2.12 style
158	#self._setObject('template',templateFolder) # old style
159	try:
160	import MpdlXmlTextServer
161	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
162	templateFolder['fulltextclient'] = textServer
163	#templateFolder._setObject('fulltextclient',textServer)
164	except Exception, e:
165	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
166
167	try:
168	from Products.zogiLib.zogiLib import zogiLib
169	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
170	templateFolder['zogilib'] = zogilib
171	#templateFolder._setObject('zogilib',zogilib)
172	except Exception, e:
173	logging.error("Unable to create zogiLib for zogilib: "+str(e))
174
175	try:
176	# assume MetaDataFolder instance is called metadata
177	self.metadataService = getattr(self, 'metadata')
178	except Exception, e:
179	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
180
181	if digilibBaseUrl is not None:
182	self.digilibBaseUrl = digilibBaseUrl
183
184
185	# proxy text server methods to fulltextclient
186	def getTextPage(self, **args):
187	"""returns full text content of page"""
188	return self.template.fulltextclient.getTextPage(**args)
189
190	def getSearchResults(self, **args):
191	"""loads list of search results and stores XML in docinfo"""
192	return self.template.fulltextclient.getSearchResults(**args)
193
194	def getResultsPage(self, **args):
195	"""returns one page of the search results"""
196	return self.template.fulltextclient.getResultsPage(**args)
197
198	def getTextInfo(self, **args):
199	"""returns document info from the text server"""
200	return self.template.fulltextclient.getTextInfo(**args)
201
202	def getToc(self, **args):
203	"""loads table of contents and stores XML in docinfo"""
204	return self.template.fulltextclient.getToc(**args)
205
206	def getTocPage(self, **args):
207	"""returns one page of the table of contents"""
208	return self.template.fulltextclient.getTocPage(**args)
209
210	def getPlacesOnPage(self, **args):
211	"""get list of gis places on one page"""
212	return self.template.fulltextclient.getPlacesOnPage(**args)
213
214	#WTF?
215	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
216	security.declareProtected('View','thumbs_rss')
217	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
218	'''
219	view it
220	@param mode: defines how to access the document behind url
221	@param url: url which contains display information
222	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
223
224	'''
225	logging.debug("HHHHHHHHHHHHHH:load the rss")
226	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
227
228	if not hasattr(self, 'template'):
229	# create template folder if it doesn't exist
230	self.manage_addFolder('template')
231
232	if not self.digilibBaseUrl:
233	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
234
235	docinfo = self.getDocinfo(mode=mode,url=url)
236	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
237	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
238	''' ZDES '''
239	pt = getattr(self.template, 'thumbs_main_rss')
240
241	if viewMode=="auto": # automodus gewaehlt
242	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
243	viewMode="text"
244	else:
245	viewMode="images"
246
247	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
248
249
250	security.declareProtected('View','index_html')
251	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode="thumbs",start=1,pn=1):
252	"""
253	view page
254	@param url: url which contains display information
255	@param mode: defines how to access the document behind url
256	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
257	@param viewLayer: sub-type of viewMode, e.g. 'dict' for viewMode='text'
258	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
259	"""
260
261	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
262
263	if not hasattr(self, 'template'):
264	# this won't work
265	logging.error("template folder missing!")
266	return "ERROR: template folder missing!"
267
268	if not getattr(self, 'digilibBaseUrl', None):
269	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
270
271	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
272
273	# auto viewMode: text if there is a text else images
274	if viewMode=="auto":
275	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
276	viewMode = "text"
277	if viewLayer is None:
278	viewLayer = "dict"
279	else:
280	viewMode = "images"
281
282	elif viewMode == "text_dict":
283	# legacy fix
284	viewMode = "text"
285	viewLayer = "dict"
286
287	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
288
289	# get template /template/viewer_$viewMode
290	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
291	if pt is None:
292	logging.error("No template for viewMode=%s!"%viewMode)
293	# TODO: error page?
294	return "No template for viewMode=%s!"%viewMode
295
296	# and execute with parameters
297	return pt(docinfo=docinfo, pageinfo=pageinfo)
298
299	#WTF?
300	def generateMarks(self,mk):
301	ret=""
302	if mk is None:
303	return ""
304	if not isinstance(mk, list):
305	mk=[mk]
306	for m in mk:
307	ret+="mk=%s"%m
308	return ret
309
310
311	def getAvailableLayers(self):
312	"""returns dict with list of available layers per viewMode"""
313	return self.availableLayers
314
315	def getBrowser(self):
316	"""getBrowser the version of browser """
317	bt = browserCheck(self)
318	logging.debug("BROWSER VERSION: %s"%(bt))
319	return bt
320
321	def findDigilibUrl(self):
322	"""try to get the digilib URL from zogilib"""
323	url = self.template.zogilib.getDLBaseUrl()
324	return url
325
326	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
327	"""returns URL to digilib Scaler with params"""
328	url = None
329	if docinfo is not None:
330	url = docinfo.get('imageURL', None)
331
332	if url is None:
333	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
334	if fn is None and docinfo is not None:
335	fn = docinfo.get('imagePath','')
336
337	url += "fn=%s"%fn
338
339	if pn:
340	url += "&pn=%s"%pn
341
342	url += "&dw=%s&dh=%s"%(dw,dh)
343	return url
344
345	def getDocumentViewerURL(self):
346	"""returns the URL of this instance"""
347	return self.absolute_url()
348
349	def getStyle(self, idx, selected, style=""):
350	"""returns a string with the given style and append 'sel' if idx == selected."""
351	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
352	if idx == selected:
353	return style + 'sel'
354	else:
355	return style
356
357	def getParams(self, param=None, val=None, params=None, duplicates=None):
358	"""returns dict with URL parameters.
359
360	Takes URL parameters and additionally param=val or dict params.
361	Deletes key if value is None."""
362	# copy existing request params
363	newParams=self.REQUEST.form.copy()
364	# change single param
365	if param is not None:
366	if val is None:
367	if newParams.has_key(param):
368	del newParams[param]
369	else:
370	newParams[param] = str(val)
371
372	# change more params
373	if params is not None:
374	for (k, v) in params.items():
375	if v is None:
376	# val=None removes param
377	if newParams.has_key(k):
378	del newParams[k]
379
380	else:
381	newParams[k] = v
382
383	if duplicates:
384	# eliminate lists (coming from duplicate keys)
385	for (k,v) in newParams.items():
386	if isinstance(v, list):
387	if duplicates == 'comma':
388	# make comma-separated list of non-empty entries
389	newParams[k] = ','.join([t for t in v if t])
390	elif duplicates == 'first':
391	# take first non-empty entry
392	newParams[k] = [t for t in v if t][0]
393
394	return newParams
395
396	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
397	"""returns URL to documentviewer with parameter param set to val or from dict params"""
398	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
399	# quote values and assemble into query string (not escaping '/')
400	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
401	if baseUrl is None:
402	baseUrl = self.getDocumentViewerURL()
403
404	url = "%s?%s"%(baseUrl, ps)
405	return url
406
407	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
408	"""link to documentviewer with parameter param set to val"""
409	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
410
411
412	def getInfo_xml(self,url,mode):
413	"""returns info about the document as XML"""
414	if not self.digilibBaseUrl:
415	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
416
417	docinfo = self.getDocinfo(mode=mode,url=url)
418	pt = getattr(self.template, 'info_xml')
419	return pt(docinfo=docinfo)
420
421	def isAccessible(self, docinfo):
422	"""returns if access to the resource is granted"""
423	access = docinfo.get('accessType', None)
424	logging.debug("documentViewer (accessOK) access type %s"%access)
425	if access == 'free':
426	logging.debug("documentViewer (accessOK) access is free")
427	return True
428
429	elif access is None or access in self.authgroups:
430	# only local access -- only logged in users
431	user = getSecurityManager().getUser()
432	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
433	if user is not None:
434	#print "user: ", user
435	return (user.getUserName() != "Anonymous User")
436	else:
437	return False
438
439	logging.error("documentViewer (accessOK) unknown access type %s"%access)
440	return False
441
442
443
444	def getDocinfo(self, mode, url, tocMode=None):
445	"""returns docinfo depending on mode"""
446	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
447	# look for cached docinfo in session
448	if self.REQUEST.SESSION.has_key('docinfo'):
449	docinfo = self.REQUEST.SESSION['docinfo']
450	# check if its still current
451	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
452	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
453	return docinfo
454
455	# new docinfo
456	docinfo = {'mode': mode, 'url': url}
457	# add self url
458	docinfo['viewerUrl'] = self.getDocumentViewerURL()
459	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
460	# get index.meta DOM
461	docUrl = None
462	metaDom = None
463	if mode=="texttool":
464	# url points to document dir or index.meta
465	metaDom = self.metadataService.getDomFromPathOrUrl(url)
466	docUrl = url.replace('/index.meta', '')
467	if metaDom is None:
468	raise IOError("Unable to find index.meta for mode=texttool!")
469
470	elif mode=="imagepath":
471	# url points to folder with images, index.meta optional
472	# asssume index.meta in parent dir
473	docUrl = getParentPath(url)
474	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
475
476	elif mode=="filepath":
477	# url points to image file, index.meta optional
478	# asssume index.meta is two path segments up
479	docUrl = getParentPath(url, 2)
480	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
481
482	else:
483	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
484	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
485
486	docinfo['documentUrl'] = docUrl
487	# process index.meta contents
488	if metaDom is not None and metaDom.tag == 'resource':
489	# document directory name and path
490	resource = self.metadataService.getResourceData(dom=metaDom)
491	if resource:
492	docinfo = self.getDocinfoFromResource(docinfo, resource)
493
494	# texttool info
495	texttool = self.metadataService.getTexttoolData(dom=metaDom)
496	if texttool:
497	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
498	# document info (including toc) from full text
499	if docinfo.get('textURLPath', None):
500	docinfo = self.getTextInfo(mode=tocMode, docinfo=docinfo)
501
502	# bib info
503	bib = self.metadataService.getBibData(dom=metaDom)
504	if bib:
505	docinfo = self.getDocinfoFromBib(docinfo, bib)
506	else:
507	# no bib - try info.xml
508	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
509
510	# auth info
511	access = self.metadataService.getAccessData(dom=metaDom)
512	if access:
513	docinfo = self.getDocinfoFromAccess(docinfo, access)
514
515	# attribution info
516	attribution = self.metadataService.getAttributionData(dom=metaDom)
517	if attribution:
518	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
519	docinfo['attribution'] = attribution
520	#docinfo = self.getDocinfoFromAccess(docinfo, access)
521
522	# copyright info
523	copyright = self.metadataService.getCopyrightData(dom=metaDom)
524	if copyright:
525	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
526	docinfo['copyright'] = copyright
527	#docinfo = self.getDocinfoFromAccess(docinfo, access)
528
529	# image path
530	if mode != 'texttool':
531	# override image path from texttool with url TODO: how about mode=auto?
532	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
533
534	# number of images from digilib
535	if docinfo.get('imagePath', None):
536	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
537	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
538
539	# check numPages
540	if docinfo.get('numPages', 0) == 0:
541	if docinfo.get('numTextPages', 0) > 0:
542	# replace with numTextPages (text-only?)
543	docinfo['numPages'] = docinfo['numTextPages']
544
545	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
546	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
547	# store in session
548	self.REQUEST.SESSION['docinfo'] = docinfo
549	return docinfo
550
551
552	def getDocinfoFromResource(self, docinfo, resource):
553	"""reads contents of resource element into docinfo"""
554	docName = resource.get('name', None)
555	docinfo['documentName'] = docName
556	docPath = resource.get('archive-path', None)
557	if docPath:
558	# clean up document path
559	if docPath[0] != '/':
560	docPath = '/' + docPath
561
562	if docName and (not docPath.endswith(docName)):
563	docPath += "/" + docName
564
565	else:
566	# use docUrl as docPath
567	docUrl = docinfo['documentURL']
568	if not docUrl.startswith('http:'):
569	docPath = docUrl
570	if docPath:
571	# fix URLs starting with /mpiwg/online
572	docPath = docPath.replace('/mpiwg/online', '', 1)
573
574	docinfo['documentPath'] = docPath
575	return docinfo
576
577	def getDocinfoFromTexttool(self, docinfo, texttool):
578	"""reads contents of texttool element into docinfo"""
579	# image dir
580	imageDir = texttool.get('image', None)
581	docPath = docinfo.get('documentPath', None)
582	if imageDir and docPath:
583	#print "image: ", imageDir, " archivepath: ", archivePath
584	imageDir = os.path.join(docPath, imageDir)
585	imageDir = imageDir.replace('/mpiwg/online', '', 1)
586	docinfo['imagePath'] = imageDir
587
588	# old style text URL
589	textUrl = texttool.get('text', None)
590	if textUrl and docPath:
591	if urlparse.urlparse(textUrl)[0] == "": #keine url
592	textUrl = os.path.join(docPath, textUrl)
593
594	docinfo['textURL'] = textUrl
595
596	# new style text-url-path
597	textUrl = texttool.get('text-url-path', None)
598	if textUrl:
599	docinfo['textURLPath'] = textUrl
600
601	# page flow
602	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
603
604	# odd pages are left
605	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
606
607	# number of title page (0: not defined)
608	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
609
610	# old presentation stuff
611	presentation = texttool.get('presentation', None)
612	if presentation and docPath:
613	if presentation.startswith('http:'):
614	docinfo['presentationUrl'] = presentation
615	else:
616	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
617
618
619	return docinfo
620
621	def getDocinfoFromBib(self, docinfo, bib):
622	"""reads contents of bib element into docinfo"""
623	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
624	# put all raw bib fields in dict "bib"
625	docinfo['bib'] = bib
626	bibtype = bib.get('@type', None)
627	docinfo['bibType'] = bibtype
628	# also store DC metadata for convenience
629	dc = self.metadataService.getDCMappedData(bib)
630	docinfo['creator'] = dc.get('creator',None)
631	docinfo['title'] = dc.get('title',None)
632	docinfo['date'] = dc.get('date',None)
633	return docinfo
634
635	def getDocinfoFromAccess(self, docinfo, acc):
636	"""reads contents of access element into docinfo"""
637	#TODO: also read resource type
638	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
639	try:
640	acctype = acc['@attr']['type']
641	if acctype:
642	access=acctype
643	if access in ['group', 'institution']:
644	access = acc['name'].lower()
645
646	docinfo['accessType'] = access
647
648	except:
649	pass
650
651	return docinfo
652
653	def getDocinfoFromDigilib(self, docinfo, path):
654	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
655	# fetch data
656	txt = getHttpData(infoUrl)
657	if not txt:
658	logging.error("Unable to get dir-info from %s"%(infoUrl))
659	return docinfo
660
661	dom = ET.fromstring(txt)
662	size = getText(dom.find("size"))
663	logging.debug("getDocinfoFromDigilib: size=%s"%size)
664	if size:
665	docinfo['numPages'] = int(size)
666	else:
667	docinfo['numPages'] = 0
668
669	# TODO: produce and keep list of image names and numbers
670	return docinfo
671
672
673	def getDocinfoFromPresentationInfoXml(self,docinfo):
674	"""gets DC-like bibliographical information from the presentation entry in texttools"""
675	url = docinfo.get('presentationUrl', None)
676	if not url:
677	logging.error("getDocinfoFromPresentation: no URL!")
678	return docinfo
679
680	dom = None
681	metaUrl = None
682	if url.startswith("http://"):
683	# real URL
684	metaUrl = url
685	else:
686	# online path
687
688	server=self.digilibBaseUrl+"/servlet/Texter?fn="
689	metaUrl=server+url
690
691	txt=getHttpData(metaUrl)
692	if txt is None:
693	logging.error("Unable to read info.xml from %s"%(url))
694	return docinfo
695
696	dom = ET.fromstring(txt)
697	docinfo['creator']=getText(dom.find(".//author"))
698	docinfo['title']=getText(dom.find(".//title"))
699	docinfo['date']=getText(dom.find(".//date"))
700	return docinfo
701
702
703	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewLayer=None, tocMode=None):
704	"""returns pageinfo with the given parameters"""
705	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
706	pageinfo = {}
707	pageinfo['viewMode'] = viewMode
708	# split viewLayer if necessary
709	if isinstance(viewLayer,basestring):
710	viewLayer = viewLayer.split(',')
711
712	if isinstance(viewLayer, list):
713	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
714	# save (unique) list in viewLayers
715	seen = set()
716	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
717	pageinfo['viewLayers'] = viewLayers
718	# stringify viewLayer
719	viewLayer = ','.join(viewLayers)
720	else:
721	#create list
722	pageinfo['viewLayers'] = [viewLayer]
723
724	pageinfo['viewLayer'] = viewLayer
725	pageinfo['tocMode'] = tocMode
726
727	# TODO: unify current and pn!
728	current = getInt(current)
729	pageinfo['current'] = current
730	pageinfo['pn'] = current
731	rows = int(rows or self.thumbrows)
732	pageinfo['rows'] = rows
733	cols = int(cols or self.thumbcols)
734	pageinfo['cols'] = cols
735	grpsize = cols * rows
736	pageinfo['groupsize'] = grpsize
737	# is start is empty use one around current
738	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
739	# int(current / grpsize) * grpsize +1))
740	pageinfo['start'] = start
741	# get number of pages
742	np = int(docinfo.get('numPages', 0))
743	if np == 0:
744	# try numTextPages
745	np = docinfo.get('numTextPages', 0)
746	if np != 0:
747	docinfo['numPages'] = np
748
749	# cache table of contents
750	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
751	pageinfo['numgroups'] = int(np / grpsize)
752	if np % grpsize > 0:
753	pageinfo['numgroups'] += 1
754
755	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
756	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
757	# add zeroth page for two columns
758	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
759	pageinfo['pageZero'] = pageZero
760	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
761	# more page parameters
762	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
763	if docinfo.get('pageNumbers'):
764	# get original page numbers
765	pageNumber = docinfo['pageNumbers'].get(current, None)
766	if pageNumber is not None:
767	pageinfo['pageNumberOrig'] = pageNumber['no']
768	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
769
770	# cache search results
771	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
772	query = self.REQUEST.get('query',None)
773	pageinfo['query'] = query
774	if query:
775	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
776	pageinfo['queryType'] = queryType
777	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
778	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
779
780	# highlighting
781	highlightQuery = self.REQUEST.get('highlightQuery', None)
782	if highlightQuery:
783	pageinfo['highlightQuery'] = highlightQuery
784	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
785	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
786
787	return pageinfo
788
789
790	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
791	"""returns dict with array of page informations for one screenfull of thumbnails"""
792	batch = {}
793	grpsize = rows * cols
794	if maxIdx == 0:
795	maxIdx = start + grpsize
796
797	nb = int(math.ceil(maxIdx / float(grpsize)))
798	# list of all batch start and end points
799	batches = []
800	if pageZero:
801	ofs = 0
802	else:
803	ofs = 1
804
805	for i in range(nb):
806	s = i * grpsize + ofs
807	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
808	batches.append({'start':s, 'end':e})
809
810	batch['batches'] = batches
811
812	pages = []
813	if pageZero and start == 1:
814	# correct beginning
815	idx = 0
816	else:
817	idx = start
818
819	for r in range(rows):
820	row = []
821	for c in range(cols):
822	if idx < minIdx or idx > maxIdx:
823	page = {'idx':None}
824	else:
825	page = {'idx':idx}
826
827	idx += 1
828	if pageFlowLtr:
829	row.append(page)
830	else:
831	row.insert(0, page)
832
833	pages.append(row)
834
835	if start > 1:
836	batch['prevStart'] = max(start - grpsize, 1)
837	else:
838	batch['prevStart'] = None
839
840	if start + grpsize < maxIdx:
841	batch['nextStart'] = start + grpsize
842	else:
843	batch['nextStart'] = None
844
845	batch['pages'] = pages
846	return batch
847
848	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
849	"""returns dict with information for one screenfull of data."""
850	batch = {}
851	if end == 0:
852	end = start + size
853
854	nb = int(math.ceil(end / float(size)))
855	# list of all batch start and end points
856	batches = []
857	for i in range(nb):
858	s = i * size + 1
859	e = min((i + 1) * size, end)
860	batches.append({'start':s, 'end':e})
861
862	batch['batches'] = batches
863	# list of elements in this batch
864	this = []
865	j = 0
866	for i in range(start, min(start+size, end)):
867	if data:
868	if fullData:
869	d = data[i]
870	else:
871	d = data[j]
872	j += 1
873
874	else:
875	d = i+1
876
877	this.append(d)
878
879	batch['this'] = this
880	if start > 1:
881	batch['prevStart'] = max(start - size, 1)
882	else:
883	batch['prevStart'] = None
884
885	if start + size < end:
886	batch['nextStart'] = start + size
887	else:
888	batch['nextStart'] = None
889
890	return batch
891
892
893	security.declareProtected('View management screens','changeDocumentViewerForm')
894	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
895
896	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
897	"""init document viewer"""
898	self.title=title
899	self.digilibBaseUrl = digilibBaseUrl
900	self.thumbrows = thumbrows
901	self.thumbcols = thumbcols
902	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
903	try:
904	# assume MetaDataFolder instance is called metadata
905	self.metadataService = getattr(self, 'metadata')
906	except Exception, e:
907	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
908
909	if RESPONSE is not None:
910	RESPONSE.redirect('manage_main')
911
912	def manage_AddDocumentViewerForm(self):
913	"""add the viewer form"""
914	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
915	return pt()
916
917	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
918	"""add the viewer"""
919	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
920	self._setObject(id,newObj)
921
922	if RESPONSE is not None:
923	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: