Context Navigation

source: documentViewer/documentViewer.py @ 509:9d05befdd462

elementtree

Last change on this file since 509:9d05befdd462 was 509:9d05befdd462, checked in by casties, 12 years ago
try to get characterNormalization in search result working.
File size: 33.6 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19
20	from SrvTxtUtils import getInt, getText, getHttpData, refreshingImageFileIndexHtml
21
22	def serializeNode(node, encoding="utf-8"):
23	"""returns a string containing node as XML"""
24	s = ET.tostring(node)
25
26	# 4Suite:
27	# stream = cStringIO.StringIO()
28	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
29	# s = stream.getvalue()
30	# stream.close()
31	return s
32
33	def browserCheck(self):
34	"""check the browsers request to find out the browser type"""
35	bt = {}
36	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
37	bt['ua'] = ua
38	bt['isIE'] = False
39	bt['isN4'] = False
40	bt['versFirefox']=""
41	bt['versIE']=""
42	bt['versSafariChrome']=""
43	bt['versOpera']=""
44
45	if string.find(ua, 'MSIE') > -1:
46	bt['isIE'] = True
47	else:
48	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
49	# Safari oder Chrome identification
50	try:
51	nav = ua[string.find(ua, '('):]
52	nav1=ua[string.find(ua,')'):]
53	nav2=nav1[string.find(nav1,'('):]
54	nav3=nav2[string.find(nav2,')'):]
55	ie = string.split(nav, "; ")[1]
56	ie1 =string.split(nav1, " ")[2]
57	ie2 =string.split(nav3, " ")[1]
58	ie3 =string.split(nav3, " ")[2]
59	if string.find(ie3, "Safari") >-1:
60	bt['versSafariChrome']=string.split(ie2, "/")[1]
61	except: pass
62	# IE identification
63	try:
64	nav = ua[string.find(ua, '('):]
65	ie = string.split(nav, "; ")[1]
66	if string.find(ie, "MSIE") > -1:
67	bt['versIE'] = string.split(ie, " ")[1]
68	except:pass
69	# Firefox identification
70	try:
71	nav = ua[string.find(ua, '('):]
72	nav1=ua[string.find(ua,')'):]
73	if string.find(ie1, "Firefox") >-1:
74	nav5= string.split(ie1, "/")[1]
75	logging.debug("FIREFOX: %s"%(nav5))
76	bt['versFirefox']=nav5[0:3]
77	except:pass
78	#Opera identification
79	try:
80	if string.find(ua,"Opera") >-1:
81	nav = ua[string.find(ua, '('):]
82	nav1=nav[string.find(nav,')'):]
83	bt['versOpera']=string.split(nav1,"/")[2]
84	except:pass
85
86	bt['isMac'] = string.find(ua, 'Macintosh') > -1
87	bt['isWin'] = string.find(ua, 'Windows') > -1
88	bt['isIEWin'] = bt['isIE'] and bt['isWin']
89	bt['isIEMac'] = bt['isIE'] and bt['isMac']
90	bt['staticHTML'] = False
91
92	return bt
93
94	def getParentPath(path, cnt=1):
95	"""returns pathname shortened by cnt"""
96	# make sure path doesn't end with /
97	path = path.rstrip('/')
98	# split by /, shorten, and reassemble
99	return '/'.join(path.split('/')[0:-cnt])
100
101	##
102	## documentViewer class
103	##
104	class documentViewer(Folder):
105	"""document viewer"""
106	meta_type="Document viewer"
107
108	security=ClassSecurityInfo()
109	manage_options=Folder.manage_options+(
110	{'label':'Configuration','action':'changeDocumentViewerForm'},
111	)
112
113	metadataService = None
114	"""MetaDataFolder instance"""
115
116	# templates and forms
117	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
118	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
119	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
120	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
121	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
122	toc_text = PageTemplateFile('zpt/toc_text', globals())
123	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
124	toc_none = PageTemplateFile('zpt/toc_none', globals())
125	common_template = PageTemplateFile('zpt/common_template', globals())
126	info_xml = PageTemplateFile('zpt/info_xml', globals())
127	docuviewer_css = ImageFile('css/docuviewer.css',globals())
128	# make ImageFile better for development
129	docuviewer_css.index_html = refreshingImageFileIndexHtml
130	jquery_js = ImageFile('js/jquery.js',globals())
131
132
133	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
134	"""init document viewer"""
135	self.id=id
136	self.title=title
137	self.thumbcols = thumbcols
138	self.thumbrows = thumbrows
139	# authgroups is list of authorized groups (delimited by ,)
140	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
141	# create template folder so we can always use template.something
142
143	templateFolder = Folder('template')
144	self['template'] = templateFolder # Zope-2.12 style
145	#self._setObject('template',templateFolder) # old style
146	try:
147	import MpdlXmlTextServer
148	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
149	templateFolder['fulltextclient'] = textServer
150	#templateFolder._setObject('fulltextclient',textServer)
151	except Exception, e:
152	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
153
154	try:
155	from Products.zogiLib.zogiLib import zogiLib
156	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
157	templateFolder['zogilib'] = zogilib
158	#templateFolder._setObject('zogilib',zogilib)
159	except Exception, e:
160	logging.error("Unable to create zogiLib for zogilib: "+str(e))
161
162	try:
163	# assume MetaDataFolder instance is called metadata
164	self.metadataService = getattr(self, 'metadata')
165	except Exception, e:
166	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
167
168	if digilibBaseUrl is not None:
169	self.digilibBaseUrl = digilibBaseUrl
170
171
172	# proxy text server methods to fulltextclient
173	def getTextPage(self, **args):
174	"""returns full text content of page"""
175	return self.template.fulltextclient.getTextPage(**args)
176
177	def getSearchResults(self, **args):
178	"""loads list of search results and stores XML in docinfo"""
179	return self.template.fulltextclient.getSearchResults(**args)
180
181	def getResultsPage(self, **args):
182	"""returns one page of the search results"""
183	return self.template.fulltextclient.getResultsPage(**args)
184
185	def getToc(self, **args):
186	"""loads table of contents and stores XML in docinfo"""
187	return self.template.fulltextclient.getToc(**args)
188
189	def getTocPage(self, **args):
190	"""returns one page of the table of contents"""
191	return self.template.fulltextclient.getTocPage(**args)
192
193	def getPlacesOnPage(self, **args):
194	"""get list of gis places on one page"""
195	return self.template.fulltextclient.getPlacesOnPage(**args)
196
197	#WTF?
198	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
199	security.declareProtected('View','thumbs_rss')
200	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
201	'''
202	view it
203	@param mode: defines how to access the document behind url
204	@param url: url which contains display information
205	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
206
207	'''
208	logging.debug("HHHHHHHHHHHHHH:load the rss")
209	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
210
211	if not hasattr(self, 'template'):
212	# create template folder if it doesn't exist
213	self.manage_addFolder('template')
214
215	if not self.digilibBaseUrl:
216	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
217
218	docinfo = self.getDocinfo(mode=mode,url=url)
219	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
220	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
221	''' ZDES '''
222	pt = getattr(self.template, 'thumbs_main_rss')
223
224	if viewMode=="auto": # automodus gewaehlt
225	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
226	viewMode="text"
227	else:
228	viewMode="images"
229
230	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
231
232
233	security.declareProtected('View','index_html')
234	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode="thumbs",start=1,pn=1):
235	"""
236	view page
237	@param url: url which contains display information
238	@param mode: defines how to access the document behind url
239	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
240	@param viewLayer: sub-type of viewMode, e.g. 'dict' for viewMode='text'
241	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
242	"""
243
244	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
245
246	if not hasattr(self, 'template'):
247	# this won't work
248	logging.error("template folder missing!")
249	return "ERROR: template folder missing!"
250
251	if not getattr(self, 'digilibBaseUrl', None):
252	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
253
254	docinfo = self.getDocinfo(mode=mode,url=url)
255
256	if tocMode != "thumbs":
257	# get table of contents
258	self.getToc(mode=tocMode, docinfo=docinfo)
259
260	# auto viewMode: text if there is a text else images
261	if viewMode=="auto":
262	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
263	viewMode = "text"
264	if viewLayer is None:
265	viewLayer = "dict"
266	else:
267	viewMode = "images"
268
269	elif viewMode == "text_dict":
270	# legacy fix
271	viewMode = "text"
272	viewLayer = "dict"
273
274	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
275
276	# get template /template/viewer_$viewMode
277	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
278	if pt is None:
279	logging.error("No template for viewMode=%s!"%viewMode)
280	# TODO: error page?
281	return "No template for viewMode=%s!"%viewMode
282
283	# and execute with parameters
284	return pt(docinfo=docinfo, pageinfo=pageinfo)
285
286	#WTF?
287	def generateMarks(self,mk):
288	ret=""
289	if mk is None:
290	return ""
291	if not isinstance(mk, list):
292	mk=[mk]
293	for m in mk:
294	ret+="mk=%s"%m
295	return ret
296
297
298	def getBrowser(self):
299	"""getBrowser the version of browser """
300	bt = browserCheck(self)
301	logging.debug("BROWSER VERSION: %s"%(bt))
302	return bt
303
304	def findDigilibUrl(self):
305	"""try to get the digilib URL from zogilib"""
306	url = self.template.zogilib.getDLBaseUrl()
307	return url
308
309	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
310	"""returns URL to digilib Scaler with params"""
311	url = None
312	if docinfo is not None:
313	url = docinfo.get('imageURL', None)
314
315	if url is None:
316	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
317	if fn is None and docinfo is not None:
318	fn = docinfo.get('imagePath','')
319
320	url += "fn=%s"%fn
321
322	if pn:
323	url += "&pn=%s"%pn
324
325	url += "&dw=%s&dh=%s"%(dw,dh)
326	return url
327
328	def getDocumentViewerURL(self):
329	"""returns the URL of this instance"""
330	return self.absolute_url()
331
332	def getStyle(self, idx, selected, style=""):
333	"""returns a string with the given style and append 'sel' if idx == selected."""
334	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
335	if idx == selected:
336	return style + 'sel'
337	else:
338	return style
339
340	def getParams(self, param=None, val=None, params=None, duplicates=None):
341	"""returns dict with URL parameters.
342
343	Takes URL parameters and additionally param=val or dict params.
344	Deletes key if value is None."""
345	# copy existing request params
346	newParams=self.REQUEST.form.copy()
347	# change single param
348	if param is not None:
349	if val is None:
350	if newParams.has_key(param):
351	del newParams[param]
352	else:
353	newParams[param] = str(val)
354
355	# change more params
356	if params is not None:
357	for (k, v) in params.items():
358	if v is None:
359	# val=None removes param
360	if newParams.has_key(k):
361	del newParams[k]
362
363	else:
364	newParams[k] = v
365
366	if duplicates:
367	# eliminate lists (coming from duplicate keys)
368	for (k,v) in newParams.items():
369	if isinstance(v, list):
370	if duplicates == 'comma':
371	# make comma-separated list of non-empty entries
372	newParams[k] = ','.join([t for t in v if t])
373	elif duplicates == 'first':
374	# take first non-empty entry
375	newParams[k] = [t for t in v if t][0]
376
377	return newParams
378
379	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
380	"""returns URL to documentviewer with parameter param set to val or from dict params"""
381	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
382	# quote values and assemble into query string (not escaping '/')
383	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()])
384	if baseUrl is None:
385	baseUrl = self.getDocumentViewerURL()
386
387	url = "%s?%s"%(baseUrl, ps)
388	return url
389
390	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
391	"""link to documentviewer with parameter param set to val"""
392	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
393
394
395	def getInfo_xml(self,url,mode):
396	"""returns info about the document as XML"""
397	if not self.digilibBaseUrl:
398	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
399
400	docinfo = self.getDocinfo(mode=mode,url=url)
401	pt = getattr(self.template, 'info_xml')
402	return pt(docinfo=docinfo)
403
404	def isAccessible(self, docinfo):
405	"""returns if access to the resource is granted"""
406	access = docinfo.get('accessType', None)
407	logging.debug("documentViewer (accessOK) access type %s"%access)
408	if access == 'free':
409	logging.debug("documentViewer (accessOK) access is free")
410	return True
411
412	elif access is None or access in self.authgroups:
413	# only local access -- only logged in users
414	user = getSecurityManager().getUser()
415	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
416	if user is not None:
417	#print "user: ", user
418	return (user.getUserName() != "Anonymous User")
419	else:
420	return False
421
422	logging.error("documentViewer (accessOK) unknown access type %s"%access)
423	return False
424
425
426
427	def getDocinfo(self, mode, url):
428	"""returns docinfo depending on mode"""
429	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
430	# look for cached docinfo in session
431	if self.REQUEST.SESSION.has_key('docinfo'):
432	docinfo = self.REQUEST.SESSION['docinfo']
433	# check if its still current
434	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
435	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
436	return docinfo
437
438	# new docinfo
439	docinfo = {'mode': mode, 'url': url}
440	# add self url
441	docinfo['viewerUrl'] = self.getDocumentViewerURL()
442	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
443	# get index.meta DOM
444	docUrl = None
445	metaDom = None
446	if mode=="texttool":
447	# url points to document dir or index.meta
448	metaDom = self.metadataService.getDomFromPathOrUrl(url)
449	docUrl = url.replace('/index.meta', '')
450	if metaDom is None:
451	raise IOError("Unable to find index.meta for mode=texttool!")
452
453	elif mode=="imagepath":
454	# url points to folder with images, index.meta optional
455	# asssume index.meta in parent dir
456	docUrl = getParentPath(url)
457	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
458
459	elif mode=="filepath":
460	# url points to image file, index.meta optional
461	# asssume index.meta is two path segments up
462	docUrl = getParentPath(url, 2)
463	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
464
465	else:
466	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
467	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
468
469	docinfo['documentUrl'] = docUrl
470	# process index.meta contents
471	if metaDom is not None and metaDom.tag == 'resource':
472	# document directory name and path
473	resource = self.metadataService.getResourceData(dom=metaDom)
474	if resource:
475	docinfo = self.getDocinfoFromResource(docinfo, resource)
476
477	# texttool info
478	texttool = self.metadataService.getTexttoolData(dom=metaDom)
479	if texttool:
480	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
481
482	# bib info
483	bib = self.metadataService.getBibData(dom=metaDom)
484	if bib:
485	docinfo = self.getDocinfoFromBib(docinfo, bib)
486	else:
487	# no bib - try info.xml
488	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
489
490	# auth info
491	access = self.metadataService.getAccessData(dom=metaDom)
492	if access:
493	docinfo = self.getDocinfoFromAccess(docinfo, access)
494
495	# attribution info
496	attribution = self.metadataService.getAttributionData(dom=metaDom)
497	if attribution:
498	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
499	docinfo['attribution'] = attribution
500	#docinfo = self.getDocinfoFromAccess(docinfo, access)
501
502	# copyright info
503	copyright = self.metadataService.getCopyrightData(dom=metaDom)
504	if copyright:
505	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
506	docinfo['copyright'] = copyright
507	#docinfo = self.getDocinfoFromAccess(docinfo, access)
508
509	# image path
510	if mode != 'texttool':
511	# override image path from texttool with url
512	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
513
514	# number of images from digilib
515	if docinfo.get('imagePath', None):
516	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
517	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
518
519	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
520	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
521	# store in session
522	self.REQUEST.SESSION['docinfo'] = docinfo
523	return docinfo
524
525	def getDocinfoFromResource(self, docinfo, resource):
526	"""reads contents of resource element into docinfo"""
527	docName = resource.get('name', None)
528	docinfo['documentName'] = docName
529	docPath = resource.get('archive-path', None)
530	if docPath:
531	# clean up document path
532	if docPath[0] != '/':
533	docPath = '/' + docPath
534
535	if docName and (not docPath.endswith(docName)):
536	docPath += "/" + docName
537
538	else:
539	# use docUrl as docPath
540	docUrl = docinfo['documentURL']
541	if not docUrl.startswith('http:'):
542	docPath = docUrl
543	if docPath:
544	# fix URLs starting with /mpiwg/online
545	docPath = docPath.replace('/mpiwg/online', '', 1)
546
547	docinfo['documentPath'] = docPath
548	return docinfo
549
550	def getDocinfoFromTexttool(self, docinfo, texttool):
551	"""reads contents of texttool element into docinfo"""
552	# image dir
553	imageDir = texttool.get('image', None)
554	docPath = docinfo.get('documentPath', None)
555	if imageDir and docPath:
556	#print "image: ", imageDir, " archivepath: ", archivePath
557	imageDir = os.path.join(docPath, imageDir)
558	imageDir = imageDir.replace('/mpiwg/online', '', 1)
559	docinfo['imagePath'] = imageDir
560
561	# old style text URL
562	textUrl = texttool.get('text', None)
563	if textUrl and docPath:
564	if urlparse.urlparse(textUrl)[0] == "": #keine url
565	textUrl = os.path.join(docPath, textUrl)
566
567	docinfo['textURL'] = textUrl
568
569	# new style text-url-path
570	textUrl = texttool.get('text-url-path', None)
571	if textUrl:
572	docinfo['textURLPath'] = textUrl
573
574	# page flow
575	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
576
577	# odd pages are left
578	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
579
580	# number of title page (0: not defined)
581	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
582
583	# old presentation stuff
584	presentation = texttool.get('presentation', None)
585	if presentation and docPath:
586	if presentation.startswith('http:'):
587	docinfo['presentationUrl'] = presentation
588	else:
589	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
590
591
592	return docinfo
593
594	def getDocinfoFromBib(self, docinfo, bib):
595	"""reads contents of bib element into docinfo"""
596	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
597	# put all raw bib fields in dict "bib"
598	docinfo['bib'] = bib
599	bibtype = bib.get('@type', None)
600	docinfo['bibType'] = bibtype
601	# also store DC metadata for convenience
602	dc = self.metadataService.getDCMappedData(bib)
603	docinfo['creator'] = dc.get('creator',None)
604	docinfo['title'] = dc.get('title',None)
605	docinfo['date'] = dc.get('date',None)
606	return docinfo
607
608	def getDocinfoFromAccess(self, docinfo, acc):
609	"""reads contents of access element into docinfo"""
610	#TODO: also read resource type
611	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
612	try:
613	acctype = acc['@attr']['type']
614	if acctype:
615	access=acctype
616	if access in ['group', 'institution']:
617	access = acc['name'].lower()
618
619	docinfo['accessType'] = access
620
621	except:
622	pass
623
624	return docinfo
625
626	def getDocinfoFromDigilib(self, docinfo, path):
627	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
628	# fetch data
629	txt = getHttpData(infoUrl)
630	if not txt:
631	logging.error("Unable to get dir-info from %s"%(infoUrl))
632	return docinfo
633
634	dom = ET.fromstring(txt)
635	size = getText(dom.find("size"))
636	logging.debug("getDocinfoFromDigilib: size=%s"%size)
637	if size:
638	docinfo['numPages'] = int(size)
639	else:
640	docinfo['numPages'] = 0
641
642	# TODO: produce and keep list of image names and numbers
643	return docinfo
644
645
646	def getDocinfoFromPresentationInfoXml(self,docinfo):
647	"""gets DC-like bibliographical information from the presentation entry in texttools"""
648	url = docinfo.get('presentationUrl', None)
649	if not url:
650	logging.error("getDocinfoFromPresentation: no URL!")
651	return docinfo
652
653	dom = None
654	metaUrl = None
655	if url.startswith("http://"):
656	# real URL
657	metaUrl = url
658	else:
659	# online path
660
661	server=self.digilibBaseUrl+"/servlet/Texter?fn="
662	metaUrl=server+url
663
664	txt=getHttpData(metaUrl)
665	if txt is None:
666	logging.error("Unable to read info.xml from %s"%(url))
667	return docinfo
668
669	dom = ET.fromstring(txt)
670	docinfo['creator']=getText(dom.find(".//author"))
671	docinfo['title']=getText(dom.find(".//title"))
672	docinfo['date']=getText(dom.find(".//date"))
673	return docinfo
674
675
676	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewLayer=None, tocMode=None):
677	"""returns pageinfo with the given parameters"""
678	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
679	pageinfo = {}
680	pageinfo['viewMode'] = viewMode
681	# split viewLayer if necessary
682	if isinstance(viewLayer,basestring):
683	viewLayer = viewLayer.split(',')
684
685	if isinstance(viewLayer, list):
686	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
687	# save (unique) list in viewLayers
688	seen = set()
689	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
690	pageinfo['viewLayers'] = viewLayers
691	# stringify viewLayer
692	viewLayer = ','.join(viewLayers)
693	else:
694	#create list
695	pageinfo['viewLayers'] = [viewLayer]
696
697	pageinfo['viewLayer'] = viewLayer
698	pageinfo['tocMode'] = tocMode
699
700	current = getInt(current)
701	pageinfo['current'] = current
702	pageinfo['pn'] = current
703	rows = int(rows or self.thumbrows)
704	pageinfo['rows'] = rows
705	cols = int(cols or self.thumbcols)
706	pageinfo['cols'] = cols
707	grpsize = cols * rows
708	pageinfo['groupsize'] = grpsize
709	# is start is empty use one around current
710	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
711	# int(current / grpsize) * grpsize +1))
712	pageinfo['start'] = start
713
714	np = int(docinfo.get('numPages', 0))
715	if np == 0:
716	# numPages unknown - maybe we can get it from text page
717	if docinfo.get('textURLPath', None):
718	# cache text page as well
719	pageinfo['textPage'] = self.getTextPage(mode=viewLayer, pn=current, docinfo=docinfo, pageinfo=pageinfo)
720	np = int(docinfo.get('numPages', 0))
721
722	pageinfo['numgroups'] = int(np / grpsize)
723	if np % grpsize > 0:
724	pageinfo['numgroups'] += 1
725
726	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
727	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
728	# add zeroth page for two columns
729	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
730	pageinfo['pageZero'] = pageZero
731	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
732
733	# TODO: do we need this here?
734	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
735	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
736	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
737	pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1'))
738	pageinfo['resultPN'] = getInt(self.REQUEST.get('resultPN','1'))
739
740	# limit tocPN TODO: do we need this?
741	if 'tocSize_%s'%tocMode in docinfo:
742	tocSize = docinfo['tocSize_%s'%tocMode]
743	tocPageSize = pageinfo['tocPageSize']
744	# cached toc
745	if tocSize%tocPageSize>0:
746	tocPages=tocSize/tocPageSize+1
747	else:
748	tocPages=tocSize/tocPageSize
749
750	pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN'])
751
752	return pageinfo
753
754
755	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
756	"""returns dict with array of page informations for one screenfull of thumbnails"""
757	batch = {}
758	grpsize = rows * cols
759	if maxIdx == 0:
760	maxIdx = start + grpsize
761
762	nb = int(math.ceil(maxIdx / float(grpsize)))
763	# list of all batch start and end points
764	batches = []
765	if pageZero:
766	ofs = 0
767	else:
768	ofs = 1
769
770	for i in range(nb):
771	s = i * grpsize + ofs
772	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
773	batches.append({'start':s, 'end':e})
774
775	batch['batches'] = batches
776
777	pages = []
778	if pageZero and start == 1:
779	# correct beginning
780	idx = 0
781	else:
782	idx = start
783
784	for r in range(rows):
785	row = []
786	for c in range(cols):
787	if idx < minIdx or idx > maxIdx:
788	page = {'idx':None}
789	else:
790	page = {'idx':idx}
791
792	idx += 1
793	if pageFlowLtr:
794	row.append(page)
795	else:
796	row.insert(0, page)
797
798	pages.append(row)
799
800	if start > 1:
801	batch['prevStart'] = max(start - grpsize, 1)
802	else:
803	batch['prevStart'] = None
804
805	if start + grpsize < maxIdx:
806	batch['nextStart'] = start + grpsize
807	else:
808	batch['nextStart'] = None
809
810	batch['pages'] = pages
811	return batch
812
813	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
814	"""returns dict with information for one screenfull of data."""
815	batch = {}
816	if end == 0:
817	end = start + size
818
819	nb = int(math.ceil(end / float(size)))
820	# list of all batch start and end points
821	batches = []
822	for i in range(nb):
823	s = i * size + 1
824	e = min((i + 1) * size, end)
825	batches.append({'start':s, 'end':e})
826
827	batch['batches'] = batches
828	# list of elements in this batch
829	this = []
830	j = 0
831	for i in range(start, min(start+size, end)):
832	if data:
833	if fullData:
834	d = data[i]
835	else:
836	d = data[j]
837	j += 1
838
839	else:
840	d = i+1
841
842	this.append(d)
843
844	batch['this'] = this
845	if start > 1:
846	batch['prevStart'] = max(start - size, 1)
847	else:
848	batch['prevStart'] = None
849
850	if start + size < end:
851	batch['nextStart'] = start + size
852	else:
853	batch['nextStart'] = None
854
855	return batch
856
857
858	security.declareProtected('View management screens','changeDocumentViewerForm')
859	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
860
861	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
862	"""init document viewer"""
863	self.title=title
864	self.digilibBaseUrl = digilibBaseUrl
865	self.thumbrows = thumbrows
866	self.thumbcols = thumbcols
867	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
868	try:
869	# assume MetaDataFolder instance is called metadata
870	self.metadataService = getattr(self, 'metadata')
871	except Exception, e:
872	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
873
874	if RESPONSE is not None:
875	RESPONSE.redirect('manage_main')
876
877	def manage_AddDocumentViewerForm(self):
878	"""add the viewer form"""
879	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
880	return pt()
881
882	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
883	"""add the viewer"""
884	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
885	self._setObject(id,newObj)
886
887	if RESPONSE is not None:
888	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: