Context Navigation

source: documentViewer/documentViewer.py @ 511:551ca1641a5e

elementtree

Last change on this file since 511:551ca1641a5e was 511:551ca1641a5e, checked in by casties, 12 years ago
more cleanup. search really works now.
File size: 33.9 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19
20	from SrvTxtUtils import getInt, getText, getHttpData, refreshingImageFileIndexHtml
21
22	def serializeNode(node, encoding="utf-8"):
23	"""returns a string containing node as XML"""
24	s = ET.tostring(node)
25
26	# 4Suite:
27	# stream = cStringIO.StringIO()
28	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
29	# s = stream.getvalue()
30	# stream.close()
31	return s
32
33	def browserCheck(self):
34	"""check the browsers request to find out the browser type"""
35	bt = {}
36	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
37	bt['ua'] = ua
38	bt['isIE'] = False
39	bt['isN4'] = False
40	bt['versFirefox']=""
41	bt['versIE']=""
42	bt['versSafariChrome']=""
43	bt['versOpera']=""
44
45	if string.find(ua, 'MSIE') > -1:
46	bt['isIE'] = True
47	else:
48	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
49	# Safari oder Chrome identification
50	try:
51	nav = ua[string.find(ua, '('):]
52	nav1=ua[string.find(ua,')'):]
53	nav2=nav1[string.find(nav1,'('):]
54	nav3=nav2[string.find(nav2,')'):]
55	ie = string.split(nav, "; ")[1]
56	ie1 =string.split(nav1, " ")[2]
57	ie2 =string.split(nav3, " ")[1]
58	ie3 =string.split(nav3, " ")[2]
59	if string.find(ie3, "Safari") >-1:
60	bt['versSafariChrome']=string.split(ie2, "/")[1]
61	except: pass
62	# IE identification
63	try:
64	nav = ua[string.find(ua, '('):]
65	ie = string.split(nav, "; ")[1]
66	if string.find(ie, "MSIE") > -1:
67	bt['versIE'] = string.split(ie, " ")[1]
68	except:pass
69	# Firefox identification
70	try:
71	nav = ua[string.find(ua, '('):]
72	nav1=ua[string.find(ua,')'):]
73	if string.find(ie1, "Firefox") >-1:
74	nav5= string.split(ie1, "/")[1]
75	logging.debug("FIREFOX: %s"%(nav5))
76	bt['versFirefox']=nav5[0:3]
77	except:pass
78	#Opera identification
79	try:
80	if string.find(ua,"Opera") >-1:
81	nav = ua[string.find(ua, '('):]
82	nav1=nav[string.find(nav,')'):]
83	bt['versOpera']=string.split(nav1,"/")[2]
84	except:pass
85
86	bt['isMac'] = string.find(ua, 'Macintosh') > -1
87	bt['isWin'] = string.find(ua, 'Windows') > -1
88	bt['isIEWin'] = bt['isIE'] and bt['isWin']
89	bt['isIEMac'] = bt['isIE'] and bt['isMac']
90	bt['staticHTML'] = False
91
92	return bt
93
94	def getParentPath(path, cnt=1):
95	"""returns pathname shortened by cnt"""
96	# make sure path doesn't end with /
97	path = path.rstrip('/')
98	# split by /, shorten, and reassemble
99	return '/'.join(path.split('/')[0:-cnt])
100
101	##
102	## documentViewer class
103	##
104	class documentViewer(Folder):
105	"""document viewer"""
106	meta_type="Document viewer"
107
108	security=ClassSecurityInfo()
109	manage_options=Folder.manage_options+(
110	{'label':'Configuration','action':'changeDocumentViewerForm'},
111	)
112
113	metadataService = None
114	"""MetaDataFolder instance"""
115
116	# templates and forms
117	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
118	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
119	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
120	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
121	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
122	toc_text = PageTemplateFile('zpt/toc_text', globals())
123	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
124	toc_none = PageTemplateFile('zpt/toc_none', globals())
125	common_template = PageTemplateFile('zpt/common_template', globals())
126	search_template = PageTemplateFile('zpt/search_template', globals())
127	info_xml = PageTemplateFile('zpt/info_xml', globals())
128	docuviewer_css = ImageFile('css/docuviewer.css',globals())
129	# make ImageFile better for development
130	docuviewer_css.index_html = refreshingImageFileIndexHtml
131	jquery_js = ImageFile('js/jquery.js',globals())
132
133
134	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
135	"""init document viewer"""
136	self.id=id
137	self.title=title
138	self.thumbcols = thumbcols
139	self.thumbrows = thumbrows
140	# authgroups is list of authorized groups (delimited by ,)
141	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
142	# create template folder so we can always use template.something
143
144	templateFolder = Folder('template')
145	self['template'] = templateFolder # Zope-2.12 style
146	#self._setObject('template',templateFolder) # old style
147	try:
148	import MpdlXmlTextServer
149	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
150	templateFolder['fulltextclient'] = textServer
151	#templateFolder._setObject('fulltextclient',textServer)
152	except Exception, e:
153	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
154
155	try:
156	from Products.zogiLib.zogiLib import zogiLib
157	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
158	templateFolder['zogilib'] = zogilib
159	#templateFolder._setObject('zogilib',zogilib)
160	except Exception, e:
161	logging.error("Unable to create zogiLib for zogilib: "+str(e))
162
163	try:
164	# assume MetaDataFolder instance is called metadata
165	self.metadataService = getattr(self, 'metadata')
166	except Exception, e:
167	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
168
169	if digilibBaseUrl is not None:
170	self.digilibBaseUrl = digilibBaseUrl
171
172
173	# proxy text server methods to fulltextclient
174	def getTextPage(self, **args):
175	"""returns full text content of page"""
176	return self.template.fulltextclient.getTextPage(**args)
177
178	def getSearchResults(self, **args):
179	"""loads list of search results and stores XML in docinfo"""
180	return self.template.fulltextclient.getSearchResults(**args)
181
182	def getResultsPage(self, **args):
183	"""returns one page of the search results"""
184	return self.template.fulltextclient.getResultsPage(**args)
185
186	def getToc(self, **args):
187	"""loads table of contents and stores XML in docinfo"""
188	return self.template.fulltextclient.getToc(**args)
189
190	def getTocPage(self, **args):
191	"""returns one page of the table of contents"""
192	return self.template.fulltextclient.getTocPage(**args)
193
194	def getPlacesOnPage(self, **args):
195	"""get list of gis places on one page"""
196	return self.template.fulltextclient.getPlacesOnPage(**args)
197
198	#WTF?
199	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
200	security.declareProtected('View','thumbs_rss')
201	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
202	'''
203	view it
204	@param mode: defines how to access the document behind url
205	@param url: url which contains display information
206	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
207
208	'''
209	logging.debug("HHHHHHHHHHHHHH:load the rss")
210	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
211
212	if not hasattr(self, 'template'):
213	# create template folder if it doesn't exist
214	self.manage_addFolder('template')
215
216	if not self.digilibBaseUrl:
217	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
218
219	docinfo = self.getDocinfo(mode=mode,url=url)
220	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
221	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
222	''' ZDES '''
223	pt = getattr(self.template, 'thumbs_main_rss')
224
225	if viewMode=="auto": # automodus gewaehlt
226	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
227	viewMode="text"
228	else:
229	viewMode="images"
230
231	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
232
233
234	security.declareProtected('View','index_html')
235	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode="thumbs",start=1,pn=1):
236	"""
237	view page
238	@param url: url which contains display information
239	@param mode: defines how to access the document behind url
240	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
241	@param viewLayer: sub-type of viewMode, e.g. 'dict' for viewMode='text'
242	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
243	"""
244
245	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
246
247	if not hasattr(self, 'template'):
248	# this won't work
249	logging.error("template folder missing!")
250	return "ERROR: template folder missing!"
251
252	if not getattr(self, 'digilibBaseUrl', None):
253	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
254
255	docinfo = self.getDocinfo(mode=mode,url=url)
256
257	if tocMode != "thumbs":
258	# get table of contents
259	self.getToc(mode=tocMode, docinfo=docinfo)
260
261	# auto viewMode: text if there is a text else images
262	if viewMode=="auto":
263	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
264	viewMode = "text"
265	if viewLayer is None:
266	viewLayer = "dict"
267	else:
268	viewMode = "images"
269
270	elif viewMode == "text_dict":
271	# legacy fix
272	viewMode = "text"
273	viewLayer = "dict"
274
275	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
276
277	# get template /template/viewer_$viewMode
278	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
279	if pt is None:
280	logging.error("No template for viewMode=%s!"%viewMode)
281	# TODO: error page?
282	return "No template for viewMode=%s!"%viewMode
283
284	# and execute with parameters
285	return pt(docinfo=docinfo, pageinfo=pageinfo)
286
287	#WTF?
288	def generateMarks(self,mk):
289	ret=""
290	if mk is None:
291	return ""
292	if not isinstance(mk, list):
293	mk=[mk]
294	for m in mk:
295	ret+="mk=%s"%m
296	return ret
297
298
299	def getBrowser(self):
300	"""getBrowser the version of browser """
301	bt = browserCheck(self)
302	logging.debug("BROWSER VERSION: %s"%(bt))
303	return bt
304
305	def findDigilibUrl(self):
306	"""try to get the digilib URL from zogilib"""
307	url = self.template.zogilib.getDLBaseUrl()
308	return url
309
310	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
311	"""returns URL to digilib Scaler with params"""
312	url = None
313	if docinfo is not None:
314	url = docinfo.get('imageURL', None)
315
316	if url is None:
317	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
318	if fn is None and docinfo is not None:
319	fn = docinfo.get('imagePath','')
320
321	url += "fn=%s"%fn
322
323	if pn:
324	url += "&pn=%s"%pn
325
326	url += "&dw=%s&dh=%s"%(dw,dh)
327	return url
328
329	def getDocumentViewerURL(self):
330	"""returns the URL of this instance"""
331	return self.absolute_url()
332
333	def getStyle(self, idx, selected, style=""):
334	"""returns a string with the given style and append 'sel' if idx == selected."""
335	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
336	if idx == selected:
337	return style + 'sel'
338	else:
339	return style
340
341	def getParams(self, param=None, val=None, params=None, duplicates=None):
342	"""returns dict with URL parameters.
343
344	Takes URL parameters and additionally param=val or dict params.
345	Deletes key if value is None."""
346	# copy existing request params
347	newParams=self.REQUEST.form.copy()
348	# change single param
349	if param is not None:
350	if val is None:
351	if newParams.has_key(param):
352	del newParams[param]
353	else:
354	newParams[param] = str(val)
355
356	# change more params
357	if params is not None:
358	for (k, v) in params.items():
359	if v is None:
360	# val=None removes param
361	if newParams.has_key(k):
362	del newParams[k]
363
364	else:
365	newParams[k] = v
366
367	if duplicates:
368	# eliminate lists (coming from duplicate keys)
369	for (k,v) in newParams.items():
370	if isinstance(v, list):
371	if duplicates == 'comma':
372	# make comma-separated list of non-empty entries
373	newParams[k] = ','.join([t for t in v if t])
374	elif duplicates == 'first':
375	# take first non-empty entry
376	newParams[k] = [t for t in v if t][0]
377
378	return newParams
379
380	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
381	"""returns URL to documentviewer with parameter param set to val or from dict params"""
382	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
383	# quote values and assemble into query string (not escaping '/')
384	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()])
385	if baseUrl is None:
386	baseUrl = self.getDocumentViewerURL()
387
388	url = "%s?%s"%(baseUrl, ps)
389	return url
390
391	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
392	"""link to documentviewer with parameter param set to val"""
393	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
394
395
396	def getInfo_xml(self,url,mode):
397	"""returns info about the document as XML"""
398	if not self.digilibBaseUrl:
399	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
400
401	docinfo = self.getDocinfo(mode=mode,url=url)
402	pt = getattr(self.template, 'info_xml')
403	return pt(docinfo=docinfo)
404
405	def isAccessible(self, docinfo):
406	"""returns if access to the resource is granted"""
407	access = docinfo.get('accessType', None)
408	logging.debug("documentViewer (accessOK) access type %s"%access)
409	if access == 'free':
410	logging.debug("documentViewer (accessOK) access is free")
411	return True
412
413	elif access is None or access in self.authgroups:
414	# only local access -- only logged in users
415	user = getSecurityManager().getUser()
416	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
417	if user is not None:
418	#print "user: ", user
419	return (user.getUserName() != "Anonymous User")
420	else:
421	return False
422
423	logging.error("documentViewer (accessOK) unknown access type %s"%access)
424	return False
425
426
427
428	def getDocinfo(self, mode, url):
429	"""returns docinfo depending on mode"""
430	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
431	# look for cached docinfo in session
432	if self.REQUEST.SESSION.has_key('docinfo'):
433	docinfo = self.REQUEST.SESSION['docinfo']
434	# check if its still current
435	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
436	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
437	return docinfo
438
439	# new docinfo
440	docinfo = {'mode': mode, 'url': url}
441	# add self url
442	docinfo['viewerUrl'] = self.getDocumentViewerURL()
443	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
444	# get index.meta DOM
445	docUrl = None
446	metaDom = None
447	if mode=="texttool":
448	# url points to document dir or index.meta
449	metaDom = self.metadataService.getDomFromPathOrUrl(url)
450	docUrl = url.replace('/index.meta', '')
451	if metaDom is None:
452	raise IOError("Unable to find index.meta for mode=texttool!")
453
454	elif mode=="imagepath":
455	# url points to folder with images, index.meta optional
456	# asssume index.meta in parent dir
457	docUrl = getParentPath(url)
458	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
459
460	elif mode=="filepath":
461	# url points to image file, index.meta optional
462	# asssume index.meta is two path segments up
463	docUrl = getParentPath(url, 2)
464	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
465
466	else:
467	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
468	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
469
470	docinfo['documentUrl'] = docUrl
471	# process index.meta contents
472	if metaDom is not None and metaDom.tag == 'resource':
473	# document directory name and path
474	resource = self.metadataService.getResourceData(dom=metaDom)
475	if resource:
476	docinfo = self.getDocinfoFromResource(docinfo, resource)
477
478	# texttool info
479	texttool = self.metadataService.getTexttoolData(dom=metaDom)
480	if texttool:
481	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
482
483	# bib info
484	bib = self.metadataService.getBibData(dom=metaDom)
485	if bib:
486	docinfo = self.getDocinfoFromBib(docinfo, bib)
487	else:
488	# no bib - try info.xml
489	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
490
491	# auth info
492	access = self.metadataService.getAccessData(dom=metaDom)
493	if access:
494	docinfo = self.getDocinfoFromAccess(docinfo, access)
495
496	# attribution info
497	attribution = self.metadataService.getAttributionData(dom=metaDom)
498	if attribution:
499	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
500	docinfo['attribution'] = attribution
501	#docinfo = self.getDocinfoFromAccess(docinfo, access)
502
503	# copyright info
504	copyright = self.metadataService.getCopyrightData(dom=metaDom)
505	if copyright:
506	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
507	docinfo['copyright'] = copyright
508	#docinfo = self.getDocinfoFromAccess(docinfo, access)
509
510	# image path
511	if mode != 'texttool':
512	# override image path from texttool with url
513	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
514
515	# number of images from digilib
516	if docinfo.get('imagePath', None):
517	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
518	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
519
520	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
521	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
522	# store in session
523	self.REQUEST.SESSION['docinfo'] = docinfo
524	return docinfo
525
526	def getDocinfoFromResource(self, docinfo, resource):
527	"""reads contents of resource element into docinfo"""
528	docName = resource.get('name', None)
529	docinfo['documentName'] = docName
530	docPath = resource.get('archive-path', None)
531	if docPath:
532	# clean up document path
533	if docPath[0] != '/':
534	docPath = '/' + docPath
535
536	if docName and (not docPath.endswith(docName)):
537	docPath += "/" + docName
538
539	else:
540	# use docUrl as docPath
541	docUrl = docinfo['documentURL']
542	if not docUrl.startswith('http:'):
543	docPath = docUrl
544	if docPath:
545	# fix URLs starting with /mpiwg/online
546	docPath = docPath.replace('/mpiwg/online', '', 1)
547
548	docinfo['documentPath'] = docPath
549	return docinfo
550
551	def getDocinfoFromTexttool(self, docinfo, texttool):
552	"""reads contents of texttool element into docinfo"""
553	# image dir
554	imageDir = texttool.get('image', None)
555	docPath = docinfo.get('documentPath', None)
556	if imageDir and docPath:
557	#print "image: ", imageDir, " archivepath: ", archivePath
558	imageDir = os.path.join(docPath, imageDir)
559	imageDir = imageDir.replace('/mpiwg/online', '', 1)
560	docinfo['imagePath'] = imageDir
561
562	# old style text URL
563	textUrl = texttool.get('text', None)
564	if textUrl and docPath:
565	if urlparse.urlparse(textUrl)[0] == "": #keine url
566	textUrl = os.path.join(docPath, textUrl)
567
568	docinfo['textURL'] = textUrl
569
570	# new style text-url-path
571	textUrl = texttool.get('text-url-path', None)
572	if textUrl:
573	docinfo['textURLPath'] = textUrl
574
575	# page flow
576	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
577
578	# odd pages are left
579	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
580
581	# number of title page (0: not defined)
582	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
583
584	# old presentation stuff
585	presentation = texttool.get('presentation', None)
586	if presentation and docPath:
587	if presentation.startswith('http:'):
588	docinfo['presentationUrl'] = presentation
589	else:
590	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
591
592
593	return docinfo
594
595	def getDocinfoFromBib(self, docinfo, bib):
596	"""reads contents of bib element into docinfo"""
597	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
598	# put all raw bib fields in dict "bib"
599	docinfo['bib'] = bib
600	bibtype = bib.get('@type', None)
601	docinfo['bibType'] = bibtype
602	# also store DC metadata for convenience
603	dc = self.metadataService.getDCMappedData(bib)
604	docinfo['creator'] = dc.get('creator',None)
605	docinfo['title'] = dc.get('title',None)
606	docinfo['date'] = dc.get('date',None)
607	return docinfo
608
609	def getDocinfoFromAccess(self, docinfo, acc):
610	"""reads contents of access element into docinfo"""
611	#TODO: also read resource type
612	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
613	try:
614	acctype = acc['@attr']['type']
615	if acctype:
616	access=acctype
617	if access in ['group', 'institution']:
618	access = acc['name'].lower()
619
620	docinfo['accessType'] = access
621
622	except:
623	pass
624
625	return docinfo
626
627	def getDocinfoFromDigilib(self, docinfo, path):
628	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
629	# fetch data
630	txt = getHttpData(infoUrl)
631	if not txt:
632	logging.error("Unable to get dir-info from %s"%(infoUrl))
633	return docinfo
634
635	dom = ET.fromstring(txt)
636	size = getText(dom.find("size"))
637	logging.debug("getDocinfoFromDigilib: size=%s"%size)
638	if size:
639	docinfo['numPages'] = int(size)
640	else:
641	docinfo['numPages'] = 0
642
643	# TODO: produce and keep list of image names and numbers
644	return docinfo
645
646
647	def getDocinfoFromPresentationInfoXml(self,docinfo):
648	"""gets DC-like bibliographical information from the presentation entry in texttools"""
649	url = docinfo.get('presentationUrl', None)
650	if not url:
651	logging.error("getDocinfoFromPresentation: no URL!")
652	return docinfo
653
654	dom = None
655	metaUrl = None
656	if url.startswith("http://"):
657	# real URL
658	metaUrl = url
659	else:
660	# online path
661
662	server=self.digilibBaseUrl+"/servlet/Texter?fn="
663	metaUrl=server+url
664
665	txt=getHttpData(metaUrl)
666	if txt is None:
667	logging.error("Unable to read info.xml from %s"%(url))
668	return docinfo
669
670	dom = ET.fromstring(txt)
671	docinfo['creator']=getText(dom.find(".//author"))
672	docinfo['title']=getText(dom.find(".//title"))
673	docinfo['date']=getText(dom.find(".//date"))
674	return docinfo
675
676
677	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewLayer=None, tocMode=None):
678	"""returns pageinfo with the given parameters"""
679	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
680	pageinfo = {}
681	pageinfo['viewMode'] = viewMode
682	# split viewLayer if necessary
683	if isinstance(viewLayer,basestring):
684	viewLayer = viewLayer.split(',')
685
686	if isinstance(viewLayer, list):
687	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
688	# save (unique) list in viewLayers
689	seen = set()
690	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
691	pageinfo['viewLayers'] = viewLayers
692	# stringify viewLayer
693	viewLayer = ','.join(viewLayers)
694	else:
695	#create list
696	pageinfo['viewLayers'] = [viewLayer]
697
698	pageinfo['viewLayer'] = viewLayer
699	pageinfo['tocMode'] = tocMode
700
701	current = getInt(current)
702	pageinfo['current'] = current
703	pageinfo['pn'] = current
704	rows = int(rows or self.thumbrows)
705	pageinfo['rows'] = rows
706	cols = int(cols or self.thumbcols)
707	pageinfo['cols'] = cols
708	grpsize = cols * rows
709	pageinfo['groupsize'] = grpsize
710	# is start is empty use one around current
711	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
712	# int(current / grpsize) * grpsize +1))
713	pageinfo['start'] = start
714	# get number of pages
715	np = int(docinfo.get('numPages', 0))
716	if np == 0:
717	# numPages unknown - maybe we can get it from text page
718	if docinfo.get('textURLPath', None):
719	# cache text page as well
720	pageinfo['textPage'] = self.getTextPage(mode=viewLayer, pn=current, docinfo=docinfo, pageinfo=pageinfo)
721	np = int(docinfo.get('numPages', 0))
722
723	# cache table of contents
724	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
725	pageinfo['numgroups'] = int(np / grpsize)
726	if np % grpsize > 0:
727	pageinfo['numgroups'] += 1
728
729	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
730	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
731	# add zeroth page for two columns
732	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
733	pageinfo['pageZero'] = pageZero
734	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
735
736	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
737
738	# cache search results
739	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
740	query = self.REQUEST.get('query',None)
741	pageinfo['query'] = query
742	if query:
743	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
744	pageinfo['queryType'] = queryType
745	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
746	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
747
748	# highlighting
749	highlightQuery = self.REQUEST.get('highlightQuery', None)
750	if highlightQuery:
751	pageinfo['highlightQuery'] = highlightQuery
752	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
753	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
754
755	return pageinfo
756
757
758	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
759	"""returns dict with array of page informations for one screenfull of thumbnails"""
760	batch = {}
761	grpsize = rows * cols
762	if maxIdx == 0:
763	maxIdx = start + grpsize
764
765	nb = int(math.ceil(maxIdx / float(grpsize)))
766	# list of all batch start and end points
767	batches = []
768	if pageZero:
769	ofs = 0
770	else:
771	ofs = 1
772
773	for i in range(nb):
774	s = i * grpsize + ofs
775	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
776	batches.append({'start':s, 'end':e})
777
778	batch['batches'] = batches
779
780	pages = []
781	if pageZero and start == 1:
782	# correct beginning
783	idx = 0
784	else:
785	idx = start
786
787	for r in range(rows):
788	row = []
789	for c in range(cols):
790	if idx < minIdx or idx > maxIdx:
791	page = {'idx':None}
792	else:
793	page = {'idx':idx}
794
795	idx += 1
796	if pageFlowLtr:
797	row.append(page)
798	else:
799	row.insert(0, page)
800
801	pages.append(row)
802
803	if start > 1:
804	batch['prevStart'] = max(start - grpsize, 1)
805	else:
806	batch['prevStart'] = None
807
808	if start + grpsize < maxIdx:
809	batch['nextStart'] = start + grpsize
810	else:
811	batch['nextStart'] = None
812
813	batch['pages'] = pages
814	return batch
815
816	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
817	"""returns dict with information for one screenfull of data."""
818	batch = {}
819	if end == 0:
820	end = start + size
821
822	nb = int(math.ceil(end / float(size)))
823	# list of all batch start and end points
824	batches = []
825	for i in range(nb):
826	s = i * size + 1
827	e = min((i + 1) * size, end)
828	batches.append({'start':s, 'end':e})
829
830	batch['batches'] = batches
831	# list of elements in this batch
832	this = []
833	j = 0
834	for i in range(start, min(start+size, end)):
835	if data:
836	if fullData:
837	d = data[i]
838	else:
839	d = data[j]
840	j += 1
841
842	else:
843	d = i+1
844
845	this.append(d)
846
847	batch['this'] = this
848	if start > 1:
849	batch['prevStart'] = max(start - size, 1)
850	else:
851	batch['prevStart'] = None
852
853	if start + size < end:
854	batch['nextStart'] = start + size
855	else:
856	batch['nextStart'] = None
857
858	return batch
859
860
861	security.declareProtected('View management screens','changeDocumentViewerForm')
862	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
863
864	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
865	"""init document viewer"""
866	self.title=title
867	self.digilibBaseUrl = digilibBaseUrl
868	self.thumbrows = thumbrows
869	self.thumbcols = thumbcols
870	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
871	try:
872	# assume MetaDataFolder instance is called metadata
873	self.metadataService = getattr(self, 'metadata')
874	except Exception, e:
875	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
876
877	if RESPONSE is not None:
878	RESPONSE.redirect('manage_main')
879
880	def manage_AddDocumentViewerForm(self):
881	"""add the viewer form"""
882	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
883	return pt()
884
885	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
886	"""add the viewer"""
887	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
888	self._setObject(id,newObj)
889
890	if RESPONSE is not None:
891	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: