Context Navigation

source: documentViewer/documentViewer.py @ 508:d5a47f82e755

elementtree

Last change on this file since 508:d5a47f82e755 was 508:d5a47f82e755, checked in by casties, 12 years ago
more cleanup. search works mostly now. layers work better now.
File size: 33.8 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19
20	from SrvTxtUtils import getInt, getText, getHttpData, refreshingImageFileIndexHtml
21
22	def serializeNode(node, encoding="utf-8"):
23	"""returns a string containing node as XML"""
24	s = ET.tostring(node)
25
26	# 4Suite:
27	# stream = cStringIO.StringIO()
28	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
29	# s = stream.getvalue()
30	# stream.close()
31	return s
32
33	def browserCheck(self):
34	"""check the browsers request to find out the browser type"""
35	bt = {}
36	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
37	bt['ua'] = ua
38	bt['isIE'] = False
39	bt['isN4'] = False
40	bt['versFirefox']=""
41	bt['versIE']=""
42	bt['versSafariChrome']=""
43	bt['versOpera']=""
44
45	if string.find(ua, 'MSIE') > -1:
46	bt['isIE'] = True
47	else:
48	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
49	# Safari oder Chrome identification
50	try:
51	nav = ua[string.find(ua, '('):]
52	nav1=ua[string.find(ua,')'):]
53	nav2=nav1[string.find(nav1,'('):]
54	nav3=nav2[string.find(nav2,')'):]
55	ie = string.split(nav, "; ")[1]
56	ie1 =string.split(nav1, " ")[2]
57	ie2 =string.split(nav3, " ")[1]
58	ie3 =string.split(nav3, " ")[2]
59	if string.find(ie3, "Safari") >-1:
60	bt['versSafariChrome']=string.split(ie2, "/")[1]
61	except: pass
62	# IE identification
63	try:
64	nav = ua[string.find(ua, '('):]
65	ie = string.split(nav, "; ")[1]
66	if string.find(ie, "MSIE") > -1:
67	bt['versIE'] = string.split(ie, " ")[1]
68	except:pass
69	# Firefox identification
70	try:
71	nav = ua[string.find(ua, '('):]
72	nav1=ua[string.find(ua,')'):]
73	if string.find(ie1, "Firefox") >-1:
74	nav5= string.split(ie1, "/")[1]
75	logging.debug("FIREFOX: %s"%(nav5))
76	bt['versFirefox']=nav5[0:3]
77	except:pass
78	#Opera identification
79	try:
80	if string.find(ua,"Opera") >-1:
81	nav = ua[string.find(ua, '('):]
82	nav1=nav[string.find(nav,')'):]
83	bt['versOpera']=string.split(nav1,"/")[2]
84	except:pass
85
86	bt['isMac'] = string.find(ua, 'Macintosh') > -1
87	bt['isWin'] = string.find(ua, 'Windows') > -1
88	bt['isIEWin'] = bt['isIE'] and bt['isWin']
89	bt['isIEMac'] = bt['isIE'] and bt['isMac']
90	bt['staticHTML'] = False
91
92	return bt
93
94	def getParentPath(path, cnt=1):
95	"""returns pathname shortened by cnt"""
96	# make sure path doesn't end with /
97	path = path.rstrip('/')
98	# split by /, shorten, and reassemble
99	return '/'.join(path.split('/')[0:-cnt])
100
101	##
102	## documentViewer class
103	##
104	class documentViewer(Folder):
105	"""document viewer"""
106	meta_type="Document viewer"
107
108	security=ClassSecurityInfo()
109	manage_options=Folder.manage_options+(
110	{'label':'Configuration','action':'changeDocumentViewerForm'},
111	)
112
113	metadataService = None
114	"""MetaDataFolder instance"""
115
116	# templates and forms
117	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
118	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
119	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
120	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
121	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
122	toc_text = PageTemplateFile('zpt/toc_text', globals())
123	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
124	toc_none = PageTemplateFile('zpt/toc_none', globals())
125	common_template = PageTemplateFile('zpt/common_template', globals())
126	info_xml = PageTemplateFile('zpt/info_xml', globals())
127	docuviewer_css = ImageFile('css/docuviewer.css',globals())
128	# make ImageFile better for development
129	docuviewer_css.index_html = refreshingImageFileIndexHtml
130	jquery_js = ImageFile('js/jquery.js',globals())
131
132
133	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
134	"""init document viewer"""
135	self.id=id
136	self.title=title
137	self.thumbcols = thumbcols
138	self.thumbrows = thumbrows
139	# authgroups is list of authorized groups (delimited by ,)
140	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
141	# create template folder so we can always use template.something
142
143	templateFolder = Folder('template')
144	self['template'] = templateFolder # Zope-2.12 style
145	#self._setObject('template',templateFolder) # old style
146	try:
147	import MpdlXmlTextServer
148	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
149	templateFolder['fulltextclient'] = textServer
150	#templateFolder._setObject('fulltextclient',textServer)
151	except Exception, e:
152	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
153
154	try:
155	from Products.zogiLib.zogiLib import zogiLib
156	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
157	templateFolder['zogilib'] = zogilib
158	#templateFolder._setObject('zogilib',zogilib)
159	except Exception, e:
160	logging.error("Unable to create zogiLib for zogilib: "+str(e))
161
162	try:
163	# assume MetaDataFolder instance is called metadata
164	self.metadataService = getattr(self, 'metadata')
165	except Exception, e:
166	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
167
168	if digilibBaseUrl is not None:
169	self.digilibBaseUrl = digilibBaseUrl
170
171
172	# proxy text server methods to fulltextclient
173	def getTextPage(self, **args):
174	"""returns full text content of page"""
175	return self.template.fulltextclient.getTextPage(**args)
176
177	def getSearchResults(self, **args):
178	"""loads list of search results and stores XML in docinfo"""
179	return self.template.fulltextclient.getSearchResults(**args)
180
181	def getResultsPage(self, **args):
182	"""returns one page of the search results"""
183	return self.template.fulltextclient.getResultsPage(**args)
184
185	def getToc(self, **args):
186	"""loads table of contents and stores XML in docinfo"""
187	return self.template.fulltextclient.getToc(**args)
188
189	def getTocPage(self, **args):
190	"""returns one page of the table of contents"""
191	return self.template.fulltextclient.getTocPage(**args)
192
193	def getPlacesOnPage(self, **args):
194	"""get list of gis places on one page"""
195	return self.template.fulltextclient.getPlacesOnPage(**args)
196
197	#WTF?
198	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
199	security.declareProtected('View','thumbs_rss')
200	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
201	'''
202	view it
203	@param mode: defines how to access the document behind url
204	@param url: url which contains display information
205	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
206
207	'''
208	logging.debug("HHHHHHHHHHHHHH:load the rss")
209	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
210
211	if not hasattr(self, 'template'):
212	# create template folder if it doesn't exist
213	self.manage_addFolder('template')
214
215	if not self.digilibBaseUrl:
216	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
217
218	docinfo = self.getDocinfo(mode=mode,url=url)
219	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
220	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
221	''' ZDES '''
222	pt = getattr(self.template, 'thumbs_main_rss')
223
224	if viewMode=="auto": # automodus gewaehlt
225	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
226	viewMode="text"
227	else:
228	viewMode="images"
229
230	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
231
232
233	security.declareProtected('View','index_html')
234	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode="thumbs",start=1,pn=1):
235	"""
236	view page
237	@param url: url which contains display information
238	@param mode: defines how to access the document behind url
239	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
240	@param viewLayer: sub-type of viewMode, e.g. 'dict' for viewMode='text'
241	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
242	"""
243
244	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
245
246	if not hasattr(self, 'template'):
247	# this won't work
248	logging.error("template folder missing!")
249	return "ERROR: template folder missing!"
250
251	if not getattr(self, 'digilibBaseUrl', None):
252	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
253
254	docinfo = self.getDocinfo(mode=mode,url=url)
255
256	if tocMode != "thumbs":
257	# get table of contents
258	self.getToc(mode=tocMode, docinfo=docinfo)
259
260	# auto viewMode: text if there is a text else images
261	if viewMode=="auto":
262	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
263	viewMode = "text"
264	if viewLayer is None:
265	viewLayer = "dict"
266	else:
267	viewMode = "images"
268
269	elif viewMode == "text_dict":
270	# legacy fix
271	viewMode = "text"
272	viewLayer = "dict"
273
274	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
275
276	# get template /template/viewer_$viewMode
277	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
278	if pt is None:
279	logging.error("No template for viewMode=%s!"%viewMode)
280	# TODO: error page?
281	return "No template for viewMode=%s!"%viewMode
282
283	# and execute with parameters
284	return pt(docinfo=docinfo, pageinfo=pageinfo)
285
286	#WTF?
287	def generateMarks(self,mk):
288	ret=""
289	if mk is None:
290	return ""
291	if not isinstance(mk, list):
292	mk=[mk]
293	for m in mk:
294	ret+="mk=%s"%m
295	return ret
296
297
298	def getBrowser(self):
299	"""getBrowser the version of browser """
300	bt = browserCheck(self)
301	logging.debug("BROWSER VERSION: %s"%(bt))
302	return bt
303
304	def findDigilibUrl(self):
305	"""try to get the digilib URL from zogilib"""
306	url = self.template.zogilib.getDLBaseUrl()
307	return url
308
309	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
310	"""returns URL to digilib Scaler with params"""
311	url = None
312	if docinfo is not None:
313	url = docinfo.get('imageURL', None)
314
315	if url is None:
316	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
317	if fn is None and docinfo is not None:
318	fn = docinfo.get('imagePath','')
319
320	url += "fn=%s"%fn
321
322	if pn:
323	url += "&pn=%s"%pn
324
325	url += "&dw=%s&dh=%s"%(dw,dh)
326	return url
327
328	def getDocumentViewerURL(self):
329	"""returns the URL of this instance"""
330	return self.absolute_url()
331
332	def getStyle(self, idx, selected, style=""):
333	"""returns a string with the given style and append 'sel' if idx == selected."""
334	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
335	if idx == selected:
336	return style + 'sel'
337	else:
338	return style
339
340	def getParams(self, param=None, val=None, params=None, duplicates=None):
341	"""returns dict with URL parameters.
342
343	Takes URL parameters and additionally param=val or dict params.
344	Deletes key if value is None."""
345	# copy existing request params
346	newParams=self.REQUEST.form.copy()
347	# change single param
348	if param is not None:
349	if val is None:
350	if newParams.has_key(param):
351	del newParams[param]
352	else:
353	newParams[param] = str(val)
354
355	# change more params
356	if params is not None:
357	for (k, v) in params.items():
358	if v is None:
359	# val=None removes param
360	if newParams.has_key(k):
361	del newParams[k]
362
363	else:
364	newParams[k] = v
365
366	if duplicates:
367	# eliminate lists (coming from duplicate keys)
368	for (k,v) in newParams.items():
369	if isinstance(v, list):
370	if duplicates == 'comma':
371	# make comma-separated list of non-empty entries
372	newParams[k] = ','.join([t for t in v if t])
373	elif duplicates == 'first':
374	# take first non-empty entry
375	newParams[k] = [t for t in v if t][0]
376
377	return newParams
378
379	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
380	"""returns URL to documentviewer with parameter param set to val or from dict params"""
381	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
382	# quote values and assemble into query string (not escaping '/')
383	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()])
384	if baseUrl is None:
385	baseUrl = self.getDocumentViewerURL()
386
387	url = "%s?%s"%(baseUrl, ps)
388	return url
389
390	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
391	"""link to documentviewer with parameter param set to val"""
392	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
393
394
395	def getInfo_xml(self,url,mode):
396	"""returns info about the document as XML"""
397	if not self.digilibBaseUrl:
398	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
399
400	docinfo = self.getDocinfo(mode=mode,url=url)
401	pt = getattr(self.template, 'info_xml')
402	return pt(docinfo=docinfo)
403
404	def isAccessible(self, docinfo):
405	"""returns if access to the resource is granted"""
406	access = docinfo.get('accessType', None)
407	logging.debug("documentViewer (accessOK) access type %s"%access)
408	if access == 'free':
409	logging.debug("documentViewer (accessOK) access is free")
410	return True
411
412	elif access is None or access in self.authgroups:
413	# only local access -- only logged in users
414	user = getSecurityManager().getUser()
415	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
416	if user is not None:
417	#print "user: ", user
418	return (user.getUserName() != "Anonymous User")
419	else:
420	return False
421
422	logging.error("documentViewer (accessOK) unknown access type %s"%access)
423	return False
424
425
426
427	def getDocinfo(self, mode, url):
428	"""returns docinfo depending on mode"""
429	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
430	# look for cached docinfo in session
431	if self.REQUEST.SESSION.has_key('docinfo'):
432	docinfo = self.REQUEST.SESSION['docinfo']
433	# check if its still current
434	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
435	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
436	return docinfo
437
438	# new docinfo
439	docinfo = {'mode': mode, 'url': url}
440	# add self url
441	docinfo['viewerUrl'] = self.getDocumentViewerURL()
442	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
443	# get index.meta DOM
444	docUrl = None
445	metaDom = None
446	if mode=="texttool":
447	# url points to document dir or index.meta
448	metaDom = self.metadataService.getDomFromPathOrUrl(url)
449	docUrl = url.replace('/index.meta', '')
450	if metaDom is None:
451	raise IOError("Unable to find index.meta for mode=texttool!")
452
453	elif mode=="imagepath":
454	# url points to folder with images, index.meta optional
455	# asssume index.meta in parent dir
456	docUrl = getParentPath(url)
457	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
458
459	elif mode=="filepath":
460	# url points to image file, index.meta optional
461	# asssume index.meta is two path segments up
462	docUrl = getParentPath(url, 2)
463	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
464
465	else:
466	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
467	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
468
469	docinfo['documentUrl'] = docUrl
470	# process index.meta contents
471	if metaDom is not None and metaDom.tag == 'resource':
472	# document directory name and path
473	resource = self.metadataService.getResourceData(dom=metaDom)
474	if resource:
475	docinfo = self.getDocinfoFromResource(docinfo, resource)
476
477	# texttool info
478	texttool = self.metadataService.getTexttoolData(dom=metaDom)
479	if texttool:
480	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
481
482	# bib info
483	bib = self.metadataService.getBibData(dom=metaDom)
484	if bib:
485	docinfo = self.getDocinfoFromBib(docinfo, bib)
486	else:
487	# no bib - try info.xml
488	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
489
490	# auth info
491	access = self.metadataService.getAccessData(dom=metaDom)
492	if access:
493	docinfo = self.getDocinfoFromAccess(docinfo, access)
494
495	# attribution info
496	attribution = self.metadataService.getAttributionData(dom=metaDom)
497	if attribution:
498	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
499	docinfo['attribution'] = attribution
500	#docinfo = self.getDocinfoFromAccess(docinfo, access)
501
502	# copyright info
503	copyright = self.metadataService.getCopyrightData(dom=metaDom)
504	if copyright:
505	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
506	docinfo['copyright'] = copyright
507	#docinfo = self.getDocinfoFromAccess(docinfo, access)
508
509	# image path
510	if mode != 'texttool':
511	# override image path from texttool with url
512	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
513
514	# number of images from digilib
515	if docinfo.get('imagePath', None):
516	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
517	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
518
519	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
520	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
521	# store in session
522	self.REQUEST.SESSION['docinfo'] = docinfo
523	return docinfo
524
525	def getDocinfoFromResource(self, docinfo, resource):
526	"""reads contents of resource element into docinfo"""
527	docName = resource.get('name', None)
528	docinfo['documentName'] = docName
529	docPath = resource.get('archive-path', None)
530	if docPath:
531	# clean up document path
532	if docPath[0] != '/':
533	docPath = '/' + docPath
534
535	if docName and (not docPath.endswith(docName)):
536	docPath += "/" + docName
537
538	else:
539	# use docUrl as docPath
540	docUrl = docinfo['documentURL']
541	if not docUrl.startswith('http:'):
542	docPath = docUrl
543	if docPath:
544	# fix URLs starting with /mpiwg/online
545	docPath = docPath.replace('/mpiwg/online', '', 1)
546
547	docinfo['documentPath'] = docPath
548	return docinfo
549
550	def getDocinfoFromTexttool(self, docinfo, texttool):
551	"""reads contents of texttool element into docinfo"""
552	# image dir
553	imageDir = texttool.get('image', None)
554	docPath = docinfo.get('documentPath', None)
555	if imageDir and docPath:
556	#print "image: ", imageDir, " archivepath: ", archivePath
557	imageDir = os.path.join(docPath, imageDir)
558	imageDir = imageDir.replace('/mpiwg/online', '', 1)
559	docinfo['imagePath'] = imageDir
560
561	# old style text URL
562	textUrl = texttool.get('text', None)
563	if textUrl and docPath:
564	if urlparse.urlparse(textUrl)[0] == "": #keine url
565	textUrl = os.path.join(docPath, textUrl)
566
567	docinfo['textURL'] = textUrl
568
569	# new style text-url-path
570	textUrl = texttool.get('text-url-path', None)
571	if textUrl:
572	docinfo['textURLPath'] = textUrl
573
574	# page flow
575	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
576
577	# odd pages are left
578	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
579
580	# number of title page (0: not defined)
581	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
582
583	# old presentation stuff
584	presentation = texttool.get('presentation', None)
585	if presentation and docPath:
586	if presentation.startswith('http:'):
587	docinfo['presentationUrl'] = presentation
588	else:
589	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
590
591
592	return docinfo
593
594	def getDocinfoFromBib(self, docinfo, bib):
595	"""reads contents of bib element into docinfo"""
596	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
597	# put all raw bib fields in dict "bib"
598	docinfo['bib'] = bib
599	bibtype = bib.get('@type', None)
600	docinfo['bibType'] = bibtype
601	# also store DC metadata for convenience
602	dc = self.metadataService.getDCMappedData(bib)
603	docinfo['creator'] = dc.get('creator',None)
604	docinfo['title'] = dc.get('title',None)
605	docinfo['date'] = dc.get('date',None)
606	return docinfo
607
608	def getDocinfoFromAccess(self, docinfo, acc):
609	"""reads contents of access element into docinfo"""
610	#TODO: also read resource type
611	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
612	try:
613	acctype = acc['@attr']['type']
614	if acctype:
615	access=acctype
616	if access in ['group', 'institution']:
617	access = acc['name'].lower()
618
619	docinfo['accessType'] = access
620
621	except:
622	pass
623
624	return docinfo
625
626	def getDocinfoFromDigilib(self, docinfo, path):
627	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
628	# fetch data
629	txt = getHttpData(infoUrl)
630	if not txt:
631	logging.error("Unable to get dir-info from %s"%(infoUrl))
632	return docinfo
633
634	dom = ET.fromstring(txt)
635	size = getText(dom.find("size"))
636	logging.debug("getDocinfoFromDigilib: size=%s"%size)
637	if size:
638	docinfo['numPages'] = int(size)
639	else:
640	docinfo['numPages'] = 0
641
642	# TODO: produce and keep list of image names and numbers
643	return docinfo
644
645
646	def getDocinfoFromPresentationInfoXml(self,docinfo):
647	"""gets DC-like bibliographical information from the presentation entry in texttools"""
648	url = docinfo.get('presentationUrl', None)
649	if not url:
650	logging.error("getDocinfoFromPresentation: no URL!")
651	return docinfo
652
653	dom = None
654	metaUrl = None
655	if url.startswith("http://"):
656	# real URL
657	metaUrl = url
658	else:
659	# online path
660
661	server=self.digilibBaseUrl+"/servlet/Texter?fn="
662	metaUrl=server+url
663
664	txt=getHttpData(metaUrl)
665	if txt is None:
666	logging.error("Unable to read info.xml from %s"%(url))
667	return docinfo
668
669	dom = ET.fromstring(txt)
670	docinfo['creator']=getText(dom.find(".//author"))
671	docinfo['title']=getText(dom.find(".//title"))
672	docinfo['date']=getText(dom.find(".//date"))
673	return docinfo
674
675
676	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewLayer=None, tocMode=None):
677	"""returns pageinfo with the given parameters"""
678	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
679	pageinfo = {}
680	pageinfo['viewMode'] = viewMode
681	# split viewLayer if necessary
682	if isinstance(viewLayer,basestring):
683	viewLayer = viewLayer.split(',')
684
685	if isinstance(viewLayer, list):
686	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
687	# save (unique) list in viewLayers
688	seen = set()
689	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
690	pageinfo['viewLayers'] = viewLayers
691	# stringify viewLayer
692	viewLayer = ','.join(viewLayers)
693	else:
694	#create list
695	pageinfo['viewLayers'] = [viewLayer]
696
697	pageinfo['viewLayer'] = viewLayer
698	pageinfo['tocMode'] = tocMode
699
700	current = getInt(current)
701	pageinfo['current'] = current
702	pageinfo['pn'] = current
703	rows = int(rows or self.thumbrows)
704	pageinfo['rows'] = rows
705	cols = int(cols or self.thumbcols)
706	pageinfo['cols'] = cols
707	grpsize = cols * rows
708	pageinfo['groupsize'] = grpsize
709	# is start is empty use one around current
710	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
711	# int(current / grpsize) * grpsize +1))
712	pageinfo['start'] = start
713
714	np = int(docinfo.get('numPages', 0))
715	if np == 0:
716	# numPages unknown - maybe we can get it from text page
717	if docinfo.get('textURLPath', None):
718	# cache text page as well
719	pageinfo['textPage'] = self.getTextPage(mode=viewLayer, pn=current, docinfo=docinfo, pageinfo=pageinfo)
720	np = int(docinfo.get('numPages', 0))
721
722	pageinfo['numgroups'] = int(np / grpsize)
723	if np % grpsize > 0:
724	pageinfo['numgroups'] += 1
725
726	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
727	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
728	# add zeroth page for two columns
729	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
730	pageinfo['pageZero'] = pageZero
731	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
732
733	# TODO: do we need this here?
734	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
735	pageinfo['query'] = self.REQUEST.get('query','')
736	pageinfo['queryType'] = self.REQUEST.get('queryType','')
737	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
738	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
739	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
740	pageinfo['queryPageSize'] = getInt(self.REQUEST.get('queryPageSize', 10))
741	pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1'))
742	pageinfo['searchPN'] = getInt(self.REQUEST.get('searchPN','1'))
743
744	# limit tocPN
745	if 'tocSize_%s'%tocMode in docinfo:
746	tocSize = docinfo['tocSize_%s'%tocMode]
747	tocPageSize = pageinfo['tocPageSize']
748	# cached toc
749	if tocSize%tocPageSize>0:
750	tocPages=tocSize/tocPageSize+1
751	else:
752	tocPages=tocSize/tocPageSize
753
754	pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN'])
755
756	return pageinfo
757
758
759	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
760	"""returns dict with array of page informations for one screenfull of thumbnails"""
761	batch = {}
762	grpsize = rows * cols
763	if maxIdx == 0:
764	maxIdx = start + grpsize
765
766	nb = int(math.ceil(maxIdx / float(grpsize)))
767	# list of all batch start and end points
768	batches = []
769	if pageZero:
770	ofs = 0
771	else:
772	ofs = 1
773
774	for i in range(nb):
775	s = i * grpsize + ofs
776	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
777	batches.append({'start':s, 'end':e})
778
779	batch['batches'] = batches
780
781	pages = []
782	if pageZero and start == 1:
783	# correct beginning
784	idx = 0
785	else:
786	idx = start
787
788	for r in range(rows):
789	row = []
790	for c in range(cols):
791	if idx < minIdx or idx > maxIdx:
792	page = {'idx':None}
793	else:
794	page = {'idx':idx}
795
796	idx += 1
797	if pageFlowLtr:
798	row.append(page)
799	else:
800	row.insert(0, page)
801
802	pages.append(row)
803
804	if start > 1:
805	batch['prevStart'] = max(start - grpsize, 1)
806	else:
807	batch['prevStart'] = None
808
809	if start + grpsize < maxIdx:
810	batch['nextStart'] = start + grpsize
811	else:
812	batch['nextStart'] = None
813
814	batch['pages'] = pages
815	return batch
816
817	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
818	"""returns dict with information for one screenfull of data."""
819	batch = {}
820	if end == 0:
821	end = start + size
822
823	nb = int(math.ceil(end / float(size)))
824	# list of all batch start and end points
825	batches = []
826	for i in range(nb):
827	s = i * size + 1
828	e = min((i + 1) * size, end)
829	batches.append({'start':s, 'end':e})
830
831	batch['batches'] = batches
832	# list of elements in this batch
833	this = []
834	j = 0
835	for i in range(start, min(start+size, end)):
836	if data:
837	if fullData:
838	d = data[i]
839	else:
840	d = data[j]
841	j += 1
842
843	else:
844	d = i+1
845
846	this.append(d)
847
848	batch['this'] = this
849	if start > 1:
850	batch['prevStart'] = max(start - size, 1)
851	else:
852	batch['prevStart'] = None
853
854	if start + size < end:
855	batch['nextStart'] = start + size
856	else:
857	batch['nextStart'] = None
858
859	return batch
860
861
862	security.declareProtected('View management screens','changeDocumentViewerForm')
863	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
864
865	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
866	"""init document viewer"""
867	self.title=title
868	self.digilibBaseUrl = digilibBaseUrl
869	self.thumbrows = thumbrows
870	self.thumbcols = thumbcols
871	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
872	try:
873	# assume MetaDataFolder instance is called metadata
874	self.metadataService = getattr(self, 'metadata')
875	except Exception, e:
876	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
877
878	if RESPONSE is not None:
879	RESPONSE.redirect('manage_main')
880
881	def manage_AddDocumentViewerForm(self):
882	"""add the viewer form"""
883	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
884	return pt()
885
886	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
887	"""add the viewer"""
888	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
889	self._setObject(id,newObj)
890
891	if RESPONSE is not None:
892	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: