Context Navigation

source: documentViewer/documentViewer.py @ 518:91051b36b9cc

Last change on this file since 518:91051b36b9cc was 518:91051b36b9cc, checked in by casties, 12 years ago
uses xml info from doc-info.xql for table of contents now.
File size: 34.5 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19
20	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
21
22	def serializeNode(node, encoding="utf-8"):
23	"""returns a string containing node as XML"""
24	s = ET.tostring(node)
25
26	# 4Suite:
27	# stream = cStringIO.StringIO()
28	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
29	# s = stream.getvalue()
30	# stream.close()
31	return s
32
33	def browserCheck(self):
34	"""check the browsers request to find out the browser type"""
35	bt = {}
36	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
37	bt['ua'] = ua
38	bt['isIE'] = False
39	bt['isN4'] = False
40	bt['versFirefox']=""
41	bt['versIE']=""
42	bt['versSafariChrome']=""
43	bt['versOpera']=""
44
45	if string.find(ua, 'MSIE') > -1:
46	bt['isIE'] = True
47	else:
48	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
49	# Safari oder Chrome identification
50	try:
51	nav = ua[string.find(ua, '('):]
52	nav1=ua[string.find(ua,')'):]
53	nav2=nav1[string.find(nav1,'('):]
54	nav3=nav2[string.find(nav2,')'):]
55	ie = string.split(nav, "; ")[1]
56	ie1 =string.split(nav1, " ")[2]
57	ie2 =string.split(nav3, " ")[1]
58	ie3 =string.split(nav3, " ")[2]
59	if string.find(ie3, "Safari") >-1:
60	bt['versSafariChrome']=string.split(ie2, "/")[1]
61	except: pass
62	# IE identification
63	try:
64	nav = ua[string.find(ua, '('):]
65	ie = string.split(nav, "; ")[1]
66	if string.find(ie, "MSIE") > -1:
67	bt['versIE'] = string.split(ie, " ")[1]
68	except:pass
69	# Firefox identification
70	try:
71	nav = ua[string.find(ua, '('):]
72	nav1=ua[string.find(ua,')'):]
73	if string.find(ie1, "Firefox") >-1:
74	nav5= string.split(ie1, "/")[1]
75	logging.debug("FIREFOX: %s"%(nav5))
76	bt['versFirefox']=nav5[0:3]
77	except:pass
78	#Opera identification
79	try:
80	if string.find(ua,"Opera") >-1:
81	nav = ua[string.find(ua, '('):]
82	nav1=nav[string.find(nav,')'):]
83	bt['versOpera']=string.split(nav1,"/")[2]
84	except:pass
85
86	bt['isMac'] = string.find(ua, 'Macintosh') > -1
87	bt['isWin'] = string.find(ua, 'Windows') > -1
88	bt['isIEWin'] = bt['isIE'] and bt['isWin']
89	bt['isIEMac'] = bt['isIE'] and bt['isMac']
90	bt['staticHTML'] = False
91
92	return bt
93
94	def getParentPath(path, cnt=1):
95	"""returns pathname shortened by cnt"""
96	# make sure path doesn't end with /
97	path = path.rstrip('/')
98	# split by /, shorten, and reassemble
99	return '/'.join(path.split('/')[0:-cnt])
100
101	##
102	## documentViewer class
103	##
104	class documentViewer(Folder):
105	"""document viewer"""
106	meta_type="Document viewer"
107
108	security=ClassSecurityInfo()
109	manage_options=Folder.manage_options+(
110	{'label':'Configuration','action':'changeDocumentViewerForm'},
111	)
112
113	metadataService = None
114	"""MetaDataFolder instance"""
115
116	# templates and forms
117	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
118	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
119	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
120	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
121	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
122	toc_text = PageTemplateFile('zpt/toc_text', globals())
123	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
124	toc_none = PageTemplateFile('zpt/toc_none', globals())
125	common_template = PageTemplateFile('zpt/common_template', globals())
126	search_template = PageTemplateFile('zpt/search_template', globals())
127	info_xml = PageTemplateFile('zpt/info_xml', globals())
128	docuviewer_css = ImageFile('css/docuviewer.css',globals())
129	# make ImageFile better for development
130	docuviewer_css.index_html = refreshingImageFileIndexHtml
131	jquery_js = ImageFile('js/jquery.js',globals())
132
133
134	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
135	"""init document viewer"""
136	self.id=id
137	self.title=title
138	self.thumbcols = thumbcols
139	self.thumbrows = thumbrows
140	# authgroups is list of authorized groups (delimited by ,)
141	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
142	# create template folder so we can always use template.something
143
144	templateFolder = Folder('template')
145	self['template'] = templateFolder # Zope-2.12 style
146	#self._setObject('template',templateFolder) # old style
147	try:
148	import MpdlXmlTextServer
149	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
150	templateFolder['fulltextclient'] = textServer
151	#templateFolder._setObject('fulltextclient',textServer)
152	except Exception, e:
153	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
154
155	try:
156	from Products.zogiLib.zogiLib import zogiLib
157	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
158	templateFolder['zogilib'] = zogilib
159	#templateFolder._setObject('zogilib',zogilib)
160	except Exception, e:
161	logging.error("Unable to create zogiLib for zogilib: "+str(e))
162
163	try:
164	# assume MetaDataFolder instance is called metadata
165	self.metadataService = getattr(self, 'metadata')
166	except Exception, e:
167	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
168
169	if digilibBaseUrl is not None:
170	self.digilibBaseUrl = digilibBaseUrl
171
172
173	# proxy text server methods to fulltextclient
174	def getTextPage(self, **args):
175	"""returns full text content of page"""
176	return self.template.fulltextclient.getTextPage(**args)
177
178	def getSearchResults(self, **args):
179	"""loads list of search results and stores XML in docinfo"""
180	return self.template.fulltextclient.getSearchResults(**args)
181
182	def getResultsPage(self, **args):
183	"""returns one page of the search results"""
184	return self.template.fulltextclient.getResultsPage(**args)
185
186	def getTextInfo(self, **args):
187	"""returns document info from the text server"""
188	return self.template.fulltextclient.getTextInfo(**args)
189
190	def getToc(self, **args):
191	"""loads table of contents and stores XML in docinfo"""
192	return self.template.fulltextclient.getToc(**args)
193
194	def getTocPage(self, **args):
195	"""returns one page of the table of contents"""
196	return self.template.fulltextclient.getTocPage(**args)
197
198	def getPlacesOnPage(self, **args):
199	"""get list of gis places on one page"""
200	return self.template.fulltextclient.getPlacesOnPage(**args)
201
202	#WTF?
203	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
204	security.declareProtected('View','thumbs_rss')
205	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
206	'''
207	view it
208	@param mode: defines how to access the document behind url
209	@param url: url which contains display information
210	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
211
212	'''
213	logging.debug("HHHHHHHHHHHHHH:load the rss")
214	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
215
216	if not hasattr(self, 'template'):
217	# create template folder if it doesn't exist
218	self.manage_addFolder('template')
219
220	if not self.digilibBaseUrl:
221	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
222
223	docinfo = self.getDocinfo(mode=mode,url=url)
224	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
225	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
226	''' ZDES '''
227	pt = getattr(self.template, 'thumbs_main_rss')
228
229	if viewMode=="auto": # automodus gewaehlt
230	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
231	viewMode="text"
232	else:
233	viewMode="images"
234
235	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
236
237
238	security.declareProtected('View','index_html')
239	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode="thumbs",start=1,pn=1):
240	"""
241	view page
242	@param url: url which contains display information
243	@param mode: defines how to access the document behind url
244	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
245	@param viewLayer: sub-type of viewMode, e.g. 'dict' for viewMode='text'
246	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
247	"""
248
249	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
250
251	if not hasattr(self, 'template'):
252	# this won't work
253	logging.error("template folder missing!")
254	return "ERROR: template folder missing!"
255
256	if not getattr(self, 'digilibBaseUrl', None):
257	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
258
259	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
260
261	# auto viewMode: text if there is a text else images
262	if viewMode=="auto":
263	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
264	viewMode = "text"
265	if viewLayer is None:
266	viewLayer = "dict"
267	else:
268	viewMode = "images"
269
270	elif viewMode == "text_dict":
271	# legacy fix
272	viewMode = "text"
273	viewLayer = "dict"
274
275	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
276
277	# get template /template/viewer_$viewMode
278	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
279	if pt is None:
280	logging.error("No template for viewMode=%s!"%viewMode)
281	# TODO: error page?
282	return "No template for viewMode=%s!"%viewMode
283
284	# and execute with parameters
285	return pt(docinfo=docinfo, pageinfo=pageinfo)
286
287	#WTF?
288	def generateMarks(self,mk):
289	ret=""
290	if mk is None:
291	return ""
292	if not isinstance(mk, list):
293	mk=[mk]
294	for m in mk:
295	ret+="mk=%s"%m
296	return ret
297
298
299	def getBrowser(self):
300	"""getBrowser the version of browser """
301	bt = browserCheck(self)
302	logging.debug("BROWSER VERSION: %s"%(bt))
303	return bt
304
305	def findDigilibUrl(self):
306	"""try to get the digilib URL from zogilib"""
307	url = self.template.zogilib.getDLBaseUrl()
308	return url
309
310	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
311	"""returns URL to digilib Scaler with params"""
312	url = None
313	if docinfo is not None:
314	url = docinfo.get('imageURL', None)
315
316	if url is None:
317	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
318	if fn is None and docinfo is not None:
319	fn = docinfo.get('imagePath','')
320
321	url += "fn=%s"%fn
322
323	if pn:
324	url += "&pn=%s"%pn
325
326	url += "&dw=%s&dh=%s"%(dw,dh)
327	return url
328
329	def getDocumentViewerURL(self):
330	"""returns the URL of this instance"""
331	return self.absolute_url()
332
333	def getStyle(self, idx, selected, style=""):
334	"""returns a string with the given style and append 'sel' if idx == selected."""
335	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
336	if idx == selected:
337	return style + 'sel'
338	else:
339	return style
340
341	def getParams(self, param=None, val=None, params=None, duplicates=None):
342	"""returns dict with URL parameters.
343
344	Takes URL parameters and additionally param=val or dict params.
345	Deletes key if value is None."""
346	# copy existing request params
347	newParams=self.REQUEST.form.copy()
348	# change single param
349	if param is not None:
350	if val is None:
351	if newParams.has_key(param):
352	del newParams[param]
353	else:
354	newParams[param] = str(val)
355
356	# change more params
357	if params is not None:
358	for (k, v) in params.items():
359	if v is None:
360	# val=None removes param
361	if newParams.has_key(k):
362	del newParams[k]
363
364	else:
365	newParams[k] = v
366
367	if duplicates:
368	# eliminate lists (coming from duplicate keys)
369	for (k,v) in newParams.items():
370	if isinstance(v, list):
371	if duplicates == 'comma':
372	# make comma-separated list of non-empty entries
373	newParams[k] = ','.join([t for t in v if t])
374	elif duplicates == 'first':
375	# take first non-empty entry
376	newParams[k] = [t for t in v if t][0]
377
378	return newParams
379
380	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
381	"""returns URL to documentviewer with parameter param set to val or from dict params"""
382	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
383	# quote values and assemble into query string (not escaping '/')
384	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
385	if baseUrl is None:
386	baseUrl = self.getDocumentViewerURL()
387
388	url = "%s?%s"%(baseUrl, ps)
389	return url
390
391	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
392	"""link to documentviewer with parameter param set to val"""
393	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
394
395
396	def getInfo_xml(self,url,mode):
397	"""returns info about the document as XML"""
398	if not self.digilibBaseUrl:
399	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
400
401	docinfo = self.getDocinfo(mode=mode,url=url)
402	pt = getattr(self.template, 'info_xml')
403	return pt(docinfo=docinfo)
404
405	def isAccessible(self, docinfo):
406	"""returns if access to the resource is granted"""
407	access = docinfo.get('accessType', None)
408	logging.debug("documentViewer (accessOK) access type %s"%access)
409	if access == 'free':
410	logging.debug("documentViewer (accessOK) access is free")
411	return True
412
413	elif access is None or access in self.authgroups:
414	# only local access -- only logged in users
415	user = getSecurityManager().getUser()
416	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
417	if user is not None:
418	#print "user: ", user
419	return (user.getUserName() != "Anonymous User")
420	else:
421	return False
422
423	logging.error("documentViewer (accessOK) unknown access type %s"%access)
424	return False
425
426
427
428	def getDocinfo(self, mode, url, tocMode=None):
429	"""returns docinfo depending on mode"""
430	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
431	# look for cached docinfo in session
432	if self.REQUEST.SESSION.has_key('docinfo'):
433	docinfo = self.REQUEST.SESSION['docinfo']
434	# check if its still current
435	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
436	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
437	return docinfo
438
439	# new docinfo
440	docinfo = {'mode': mode, 'url': url}
441	# add self url
442	docinfo['viewerUrl'] = self.getDocumentViewerURL()
443	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
444	# get index.meta DOM
445	docUrl = None
446	metaDom = None
447	if mode=="texttool":
448	# url points to document dir or index.meta
449	metaDom = self.metadataService.getDomFromPathOrUrl(url)
450	docUrl = url.replace('/index.meta', '')
451	if metaDom is None:
452	raise IOError("Unable to find index.meta for mode=texttool!")
453
454	elif mode=="imagepath":
455	# url points to folder with images, index.meta optional
456	# asssume index.meta in parent dir
457	docUrl = getParentPath(url)
458	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
459
460	elif mode=="filepath":
461	# url points to image file, index.meta optional
462	# asssume index.meta is two path segments up
463	docUrl = getParentPath(url, 2)
464	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
465
466	else:
467	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
468	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
469
470	docinfo['documentUrl'] = docUrl
471	# process index.meta contents
472	if metaDom is not None and metaDom.tag == 'resource':
473	# document directory name and path
474	resource = self.metadataService.getResourceData(dom=metaDom)
475	if resource:
476	docinfo = self.getDocinfoFromResource(docinfo, resource)
477
478	# texttool info
479	texttool = self.metadataService.getTexttoolData(dom=metaDom)
480	if texttool:
481	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
482	# document info (including toc) from full text
483	if docinfo.get('textURLPath', None):
484	docinfo = self.getTextInfo(mode=tocMode, docinfo=docinfo)
485
486	# bib info
487	bib = self.metadataService.getBibData(dom=metaDom)
488	if bib:
489	docinfo = self.getDocinfoFromBib(docinfo, bib)
490	else:
491	# no bib - try info.xml
492	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
493
494	# auth info
495	access = self.metadataService.getAccessData(dom=metaDom)
496	if access:
497	docinfo = self.getDocinfoFromAccess(docinfo, access)
498
499	# attribution info
500	attribution = self.metadataService.getAttributionData(dom=metaDom)
501	if attribution:
502	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
503	docinfo['attribution'] = attribution
504	#docinfo = self.getDocinfoFromAccess(docinfo, access)
505
506	# copyright info
507	copyright = self.metadataService.getCopyrightData(dom=metaDom)
508	if copyright:
509	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
510	docinfo['copyright'] = copyright
511	#docinfo = self.getDocinfoFromAccess(docinfo, access)
512
513	# image path
514	if mode != 'texttool':
515	# override image path from texttool with url TODO: how about mode=auto?
516	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
517
518	# number of images from digilib
519	if docinfo.get('imagePath', None):
520	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
521	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
522
523	# check numPages
524	if docinfo.get('numPages', 0) == 0:
525	if docinfo.get('numTextPages', 0) > 0:
526	# replace with numTextPages (text-only?)
527	docinfo['numPages'] = docinfo['numTextPages']
528
529	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
530	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
531	# store in session
532	self.REQUEST.SESSION['docinfo'] = docinfo
533	return docinfo
534
535
536	def getDocinfoFromResource(self, docinfo, resource):
537	"""reads contents of resource element into docinfo"""
538	docName = resource.get('name', None)
539	docinfo['documentName'] = docName
540	docPath = resource.get('archive-path', None)
541	if docPath:
542	# clean up document path
543	if docPath[0] != '/':
544	docPath = '/' + docPath
545
546	if docName and (not docPath.endswith(docName)):
547	docPath += "/" + docName
548
549	else:
550	# use docUrl as docPath
551	docUrl = docinfo['documentURL']
552	if not docUrl.startswith('http:'):
553	docPath = docUrl
554	if docPath:
555	# fix URLs starting with /mpiwg/online
556	docPath = docPath.replace('/mpiwg/online', '', 1)
557
558	docinfo['documentPath'] = docPath
559	return docinfo
560
561	def getDocinfoFromTexttool(self, docinfo, texttool):
562	"""reads contents of texttool element into docinfo"""
563	# image dir
564	imageDir = texttool.get('image', None)
565	docPath = docinfo.get('documentPath', None)
566	if imageDir and docPath:
567	#print "image: ", imageDir, " archivepath: ", archivePath
568	imageDir = os.path.join(docPath, imageDir)
569	imageDir = imageDir.replace('/mpiwg/online', '', 1)
570	docinfo['imagePath'] = imageDir
571
572	# old style text URL
573	textUrl = texttool.get('text', None)
574	if textUrl and docPath:
575	if urlparse.urlparse(textUrl)[0] == "": #keine url
576	textUrl = os.path.join(docPath, textUrl)
577
578	docinfo['textURL'] = textUrl
579
580	# new style text-url-path
581	textUrl = texttool.get('text-url-path', None)
582	if textUrl:
583	docinfo['textURLPath'] = textUrl
584
585	# page flow
586	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
587
588	# odd pages are left
589	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
590
591	# number of title page (0: not defined)
592	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
593
594	# old presentation stuff
595	presentation = texttool.get('presentation', None)
596	if presentation and docPath:
597	if presentation.startswith('http:'):
598	docinfo['presentationUrl'] = presentation
599	else:
600	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
601
602
603	return docinfo
604
605	def getDocinfoFromBib(self, docinfo, bib):
606	"""reads contents of bib element into docinfo"""
607	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
608	# put all raw bib fields in dict "bib"
609	docinfo['bib'] = bib
610	bibtype = bib.get('@type', None)
611	docinfo['bibType'] = bibtype
612	# also store DC metadata for convenience
613	dc = self.metadataService.getDCMappedData(bib)
614	docinfo['creator'] = dc.get('creator',None)
615	docinfo['title'] = dc.get('title',None)
616	docinfo['date'] = dc.get('date',None)
617	return docinfo
618
619	def getDocinfoFromAccess(self, docinfo, acc):
620	"""reads contents of access element into docinfo"""
621	#TODO: also read resource type
622	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
623	try:
624	acctype = acc['@attr']['type']
625	if acctype:
626	access=acctype
627	if access in ['group', 'institution']:
628	access = acc['name'].lower()
629
630	docinfo['accessType'] = access
631
632	except:
633	pass
634
635	return docinfo
636
637	def getDocinfoFromDigilib(self, docinfo, path):
638	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
639	# fetch data
640	txt = getHttpData(infoUrl)
641	if not txt:
642	logging.error("Unable to get dir-info from %s"%(infoUrl))
643	return docinfo
644
645	dom = ET.fromstring(txt)
646	size = getText(dom.find("size"))
647	logging.debug("getDocinfoFromDigilib: size=%s"%size)
648	if size:
649	docinfo['numPages'] = int(size)
650	else:
651	docinfo['numPages'] = 0
652
653	# TODO: produce and keep list of image names and numbers
654	return docinfo
655
656
657	def getDocinfoFromPresentationInfoXml(self,docinfo):
658	"""gets DC-like bibliographical information from the presentation entry in texttools"""
659	url = docinfo.get('presentationUrl', None)
660	if not url:
661	logging.error("getDocinfoFromPresentation: no URL!")
662	return docinfo
663
664	dom = None
665	metaUrl = None
666	if url.startswith("http://"):
667	# real URL
668	metaUrl = url
669	else:
670	# online path
671
672	server=self.digilibBaseUrl+"/servlet/Texter?fn="
673	metaUrl=server+url
674
675	txt=getHttpData(metaUrl)
676	if txt is None:
677	logging.error("Unable to read info.xml from %s"%(url))
678	return docinfo
679
680	dom = ET.fromstring(txt)
681	docinfo['creator']=getText(dom.find(".//author"))
682	docinfo['title']=getText(dom.find(".//title"))
683	docinfo['date']=getText(dom.find(".//date"))
684	return docinfo
685
686
687	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewLayer=None, tocMode=None):
688	"""returns pageinfo with the given parameters"""
689	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
690	pageinfo = {}
691	pageinfo['viewMode'] = viewMode
692	# split viewLayer if necessary
693	if isinstance(viewLayer,basestring):
694	viewLayer = viewLayer.split(',')
695
696	if isinstance(viewLayer, list):
697	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
698	# save (unique) list in viewLayers
699	seen = set()
700	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
701	pageinfo['viewLayers'] = viewLayers
702	# stringify viewLayer
703	viewLayer = ','.join(viewLayers)
704	else:
705	#create list
706	pageinfo['viewLayers'] = [viewLayer]
707
708	pageinfo['viewLayer'] = viewLayer
709	pageinfo['tocMode'] = tocMode
710
711	# TODO: unify current and pn!
712	current = getInt(current)
713	pageinfo['current'] = current
714	pageinfo['pn'] = current
715	rows = int(rows or self.thumbrows)
716	pageinfo['rows'] = rows
717	cols = int(cols or self.thumbcols)
718	pageinfo['cols'] = cols
719	grpsize = cols * rows
720	pageinfo['groupsize'] = grpsize
721	# is start is empty use one around current
722	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
723	# int(current / grpsize) * grpsize +1))
724	pageinfo['start'] = start
725	# get number of pages
726	np = int(docinfo.get('numPages', 0))
727	if np == 0:
728	# try numTextPages
729	np = docinfo.get('numTextPages', 0)
730	if np != 0:
731	docinfo['numPages'] = np
732
733	# cache table of contents
734	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
735	pageinfo['numgroups'] = int(np / grpsize)
736	if np % grpsize > 0:
737	pageinfo['numgroups'] += 1
738
739	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
740	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
741	# add zeroth page for two columns
742	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
743	pageinfo['pageZero'] = pageZero
744	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
745	# more page parameters
746	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
747	if docinfo.get('pageNumbers'):
748	# get original page numbers
749	pageNumber = docinfo['pageNumbers'].get(current, None)
750	if pageNumber is not None:
751	pageinfo['pageNumberOrig'] = pageNumber['no']
752	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
753
754	# cache search results
755	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
756	query = self.REQUEST.get('query',None)
757	pageinfo['query'] = query
758	if query:
759	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
760	pageinfo['queryType'] = queryType
761	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
762	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
763
764	# highlighting
765	highlightQuery = self.REQUEST.get('highlightQuery', None)
766	if highlightQuery:
767	pageinfo['highlightQuery'] = highlightQuery
768	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
769	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
770
771	return pageinfo
772
773
774	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
775	"""returns dict with array of page informations for one screenfull of thumbnails"""
776	batch = {}
777	grpsize = rows * cols
778	if maxIdx == 0:
779	maxIdx = start + grpsize
780
781	nb = int(math.ceil(maxIdx / float(grpsize)))
782	# list of all batch start and end points
783	batches = []
784	if pageZero:
785	ofs = 0
786	else:
787	ofs = 1
788
789	for i in range(nb):
790	s = i * grpsize + ofs
791	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
792	batches.append({'start':s, 'end':e})
793
794	batch['batches'] = batches
795
796	pages = []
797	if pageZero and start == 1:
798	# correct beginning
799	idx = 0
800	else:
801	idx = start
802
803	for r in range(rows):
804	row = []
805	for c in range(cols):
806	if idx < minIdx or idx > maxIdx:
807	page = {'idx':None}
808	else:
809	page = {'idx':idx}
810
811	idx += 1
812	if pageFlowLtr:
813	row.append(page)
814	else:
815	row.insert(0, page)
816
817	pages.append(row)
818
819	if start > 1:
820	batch['prevStart'] = max(start - grpsize, 1)
821	else:
822	batch['prevStart'] = None
823
824	if start + grpsize < maxIdx:
825	batch['nextStart'] = start + grpsize
826	else:
827	batch['nextStart'] = None
828
829	batch['pages'] = pages
830	return batch
831
832	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
833	"""returns dict with information for one screenfull of data."""
834	batch = {}
835	if end == 0:
836	end = start + size
837
838	nb = int(math.ceil(end / float(size)))
839	# list of all batch start and end points
840	batches = []
841	for i in range(nb):
842	s = i * size + 1
843	e = min((i + 1) * size, end)
844	batches.append({'start':s, 'end':e})
845
846	batch['batches'] = batches
847	# list of elements in this batch
848	this = []
849	j = 0
850	for i in range(start, min(start+size, end)):
851	if data:
852	if fullData:
853	d = data[i]
854	else:
855	d = data[j]
856	j += 1
857
858	else:
859	d = i+1
860
861	this.append(d)
862
863	batch['this'] = this
864	if start > 1:
865	batch['prevStart'] = max(start - size, 1)
866	else:
867	batch['prevStart'] = None
868
869	if start + size < end:
870	batch['nextStart'] = start + size
871	else:
872	batch['nextStart'] = None
873
874	return batch
875
876
877	security.declareProtected('View management screens','changeDocumentViewerForm')
878	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
879
880	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
881	"""init document viewer"""
882	self.title=title
883	self.digilibBaseUrl = digilibBaseUrl
884	self.thumbrows = thumbrows
885	self.thumbcols = thumbcols
886	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
887	try:
888	# assume MetaDataFolder instance is called metadata
889	self.metadataService = getattr(self, 'metadata')
890	except Exception, e:
891	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
892
893	if RESPONSE is not None:
894	RESPONSE.redirect('manage_main')
895
896	def manage_AddDocumentViewerForm(self):
897	"""add the viewer form"""
898	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
899	return pt()
900
901	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
902	"""add the viewer"""
903	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
904	self._setObject(id,newObj)
905
906	if RESPONSE is not None:
907	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: