Context Navigation

source: documentViewer/documentViewer.py @ 483:ab9b34a1c62a

elementtree

Last change on this file since 483:ab9b34a1c62a was 483:ab9b34a1c62a, checked in by casties, 13 years ago
more new templates
File size: 35.4 KB

Line
1	from OFS.Folder import Folder
2	from OFS.Image import File
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	#from Ft.Xml import EMPTY_NAMESPACE, Parse
10	#import Ft.Xml.Domlette
11
12	import xml.etree.ElementTree as ET
13
14	import os.path
15	import sys
16	import urllib
17	import logging
18	import math
19	import urlparse
20	import re
21	import string
22
23	from SrvTxtUtils import getInt, getText, getHttpData
24
25	def logger(txt,method,txt2):
26	"""logging"""
27	logging.info(txt+ txt2)
28
29
30	def serializeNode(node, encoding="utf-8"):
31	"""returns a string containing node as XML"""
32	s = ET.tostring(node)
33
34	# 4Suite:
35	# stream = cStringIO.StringIO()
36	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
37	# s = stream.getvalue()
38	# stream.close()
39	return s
40
41	def browserCheck(self):
42	"""check the browsers request to find out the browser type"""
43	bt = {}
44	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
45	bt['ua'] = ua
46	bt['isIE'] = False
47	bt['isN4'] = False
48	bt['versFirefox']=""
49	bt['versIE']=""
50	bt['versSafariChrome']=""
51	bt['versOpera']=""
52
53	if string.find(ua, 'MSIE') > -1:
54	bt['isIE'] = True
55	else:
56	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
57	# Safari oder Chrome identification
58	try:
59	nav = ua[string.find(ua, '('):]
60	nav1=ua[string.find(ua,')'):]
61	nav2=nav1[string.find(nav1,'('):]
62	nav3=nav2[string.find(nav2,')'):]
63	ie = string.split(nav, "; ")[1]
64	ie1 =string.split(nav1, " ")[2]
65	ie2 =string.split(nav3, " ")[1]
66	ie3 =string.split(nav3, " ")[2]
67	if string.find(ie3, "Safari") >-1:
68	bt['versSafariChrome']=string.split(ie2, "/")[1]
69	except: pass
70	# IE identification
71	try:
72	nav = ua[string.find(ua, '('):]
73	ie = string.split(nav, "; ")[1]
74	if string.find(ie, "MSIE") > -1:
75	bt['versIE'] = string.split(ie, " ")[1]
76	except:pass
77	# Firefox identification
78	try:
79	nav = ua[string.find(ua, '('):]
80	nav1=ua[string.find(ua,')'):]
81	if string.find(ie1, "Firefox") >-1:
82	nav5= string.split(ie1, "/")[1]
83	logging.debug("FIREFOX: %s"%(nav5))
84	bt['versFirefox']=nav5[0:3]
85	except:pass
86	#Opera identification
87	try:
88	if string.find(ua,"Opera") >-1:
89	nav = ua[string.find(ua, '('):]
90	nav1=nav[string.find(nav,')'):]
91	bt['versOpera']=string.split(nav1,"/")[2]
92	except:pass
93
94	bt['isMac'] = string.find(ua, 'Macintosh') > -1
95	bt['isWin'] = string.find(ua, 'Windows') > -1
96	bt['isIEWin'] = bt['isIE'] and bt['isWin']
97	bt['isIEMac'] = bt['isIE'] and bt['isMac']
98	bt['staticHTML'] = False
99
100	return bt
101
102	def getParentPath(path, cnt=1):
103	"""returns pathname shortened by cnt"""
104	# make sure path doesn't end with /
105	path = path.rstrip('/')
106	# split by /, shorten, and reassemble
107	return '/'.join(path.split('/')[0:-cnt])
108
109
110	##
111	## documentViewer class
112	##
113	class documentViewer(Folder):
114	"""document viewer"""
115	meta_type="Document viewer"
116
117	security=ClassSecurityInfo()
118	manage_options=Folder.manage_options+(
119	{'label':'main config','action':'changeDocumentViewerForm'},
120	)
121
122	metadataService = None
123	"""MetaDataFolder instance"""
124
125	# templates and forms
126	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
127	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
128	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
129	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
130	toc_text = PageTemplateFile('zpt/toc_text', globals())
131	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
132	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
133	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
134	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
135	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
136	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
137	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
138	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
139	head_main = PageTemplateFile('zpt/head_main', globals())
140	info_xml = PageTemplateFile('zpt/info_xml', globals())
141	# TODO: can this be nicer?
142	docuviewer_css = File('docuviewer_css','',open(os.path.join(package_home(globals()),'css/docuviewer.css')), content_type='text/css')
143
144
145	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
146
147
148	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
149	"""init document viewer"""
150	self.id=id
151	self.title=title
152	self.thumbcols = thumbcols
153	self.thumbrows = thumbrows
154	# authgroups is list of authorized groups (delimited by ,)
155	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
156	# create template folder so we can always use template.something
157
158	templateFolder = Folder('template')
159	#self['template'] = templateFolder # Zope-2.12 style
160	self._setObject('template',templateFolder) # old style
161	try:
162	import MpdlXmlTextServer
163	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
164	#templateFolder['fulltextclient'] = xmlRpcClient
165	templateFolder._setObject('fulltextclient',textServer)
166	except Exception, e:
167	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
168
169	try:
170	from Products.zogiLib.zogiLib import zogiLib
171	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
172	#templateFolder['zogilib'] = zogilib
173	templateFolder._setObject('zogilib',zogilib)
174	except Exception, e:
175	logging.error("Unable to create zogiLib for zogilib: "+str(e))
176
177	try:
178	# assume MetaDataFolder instance is called metadata
179	self.metadataService = getattr(self, 'metadata')
180	except Exception, e:
181	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
182
183	if digilibBaseUrl is not None:
184	self.digilibBaseUrl = digilibBaseUrl
185
186
187	# proxy text server methods to fulltextclient
188	def getTextPage(self, **args):
189	"""get page"""
190	return self.template.fulltextclient.getTextPage(**args)
191
192	def getOrigPages(self, **args):
193	"""get page"""
194	return self.template.fulltextclient.getOrigPages(**args)
195
196	def getOrigPagesNorm(self, **args):
197	"""get page"""
198	return self.template.fulltextclient.getOrigPagesNorm(**args)
199
200	def getQuery(self, **args):
201	"""get query in search"""
202	return self.template.fulltextclient.getQuery(**args)
203
204	def getSearch(self, **args):
205	"""get search"""
206	return self.template.fulltextclient.getSearch(**args)
207
208	def getGisPlaces(self, **args):
209	"""get gis places"""
210	return self.template.fulltextclient.getGisPlaces(**args)
211
212	def getAllGisPlaces(self, **args):
213	"""get all gis places """
214	return self.template.fulltextclient.getAllGisPlaces(**args)
215
216	def getWordInfo(self, **args):
217	"""get translate"""
218	return self.template.fulltextclient.getWordInfo(**args)
219
220	def getLemma(self, **args):
221	"""get lemma"""
222	return self.template.fulltextclient.getLemma(**args)
223
224	def getLemmaQuery(self, **args):
225	"""get query"""
226	return self.template.fulltextclient.getLemmaQuery(**args)
227
228	def getLex(self, **args):
229	"""get lex"""
230	return self.template.fulltextclient.getLex(**args)
231
232	def getToc(self, **args):
233	"""get toc"""
234	return self.template.fulltextclient.getToc(**args)
235
236	def getTocPage(self, **args):
237	"""get tocpage"""
238	return self.template.fulltextclient.getTocPage(**args)
239
240
241	security.declareProtected('View','thumbs_rss')
242	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
243	'''
244	view it
245	@param mode: defines how to access the document behind url
246	@param url: url which contains display information
247	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
248
249	'''
250	logging.debug("HHHHHHHHHHHHHH:load the rss")
251	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
252
253	if not hasattr(self, 'template'):
254	# create template folder if it doesn't exist
255	self.manage_addFolder('template')
256
257	if not self.digilibBaseUrl:
258	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
259
260	docinfo = self.getDocinfo(mode=mode,url=url)
261	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
262	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
263	''' ZDES '''
264	pt = getattr(self.template, 'thumbs_main_rss')
265
266	if viewMode=="auto": # automodus gewaehlt
267	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
268	viewMode="text"
269	else:
270	viewMode="images"
271
272	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
273
274
275	security.declareProtected('View','index_html')
276	def index_html(self,url,mode="texttool",viewMode="auto",viewType=None,tocMode="thumbs",start=1,pn=1):
277	"""
278	view page
279	@param url: url which contains display information
280	@param mode: defines how to access the document behind url
281	@param viewMode: 'images': display images, 'text': display text, default is 'auto'
282	@param viewType: sub-type of viewMode, e.g. 'dict' for viewMode='text'
283	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
284	"""
285
286	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewType=%s start=%s pn=%s"%(mode,url,viewMode,viewType,start,pn))
287
288	if not hasattr(self, 'template'):
289	# this won't work
290	logging.error("template folder missing!")
291	return "ERROR: template folder missing!"
292
293	if not getattr(self, 'digilibBaseUrl', None):
294	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
295
296	docinfo = self.getDocinfo(mode=mode,url=url)
297
298	if tocMode != "thumbs":
299	# get table of contents
300	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
301
302	# auto viewMode: text if there is a text else images
303	if viewMode=="auto":
304	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
305	viewMode = "text"
306	viewType = "dict"
307	else:
308	viewMode = "images"
309
310	elif viewMode == "text_dict":
311	# legacy fix
312	viewMode = "text"
313	viewType = "dict"
314
315	# stringify viewType
316	if isinstance(viewType, list):
317	logging.debug("index_html: viewType is list:%s"%viewType)
318	viewType = ','.join([t for t in viewType if t])
319
320	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewType=viewType, tocMode=tocMode)
321
322	# get template /template/viewer_$viewMode
323	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
324	if pt is None:
325	logging.error("No template for viewMode=%s!"%viewMode)
326	# TODO: error page?
327	return "No template for viewMode=%s!"%viewMode
328
329	# and execute with parameters
330	return pt(docinfo=docinfo, pageinfo=pageinfo)
331
332	def generateMarks(self,mk):
333	ret=""
334	if mk is None:
335	return ""
336	if not isinstance(mk, list):
337	mk=[mk]
338	for m in mk:
339	ret+="mk=%s"%m
340	return ret
341
342
343	def getBrowser(self):
344	"""getBrowser the version of browser """
345	bt = browserCheck(self)
346	logging.debug("BROWSER VERSION: %s"%(bt))
347	return bt
348
349	def findDigilibUrl(self):
350	"""try to get the digilib URL from zogilib"""
351	url = self.template.zogilib.getDLBaseUrl()
352	return url
353
354	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
355	"""returns URL to digilib Scaler with params"""
356	url = None
357	if docinfo is not None:
358	url = docinfo.get('imageURL', None)
359
360	if url is None:
361	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
362	if fn is None and docinfo is not None:
363	fn = docinfo.get('imagePath','')
364
365	url += "fn=%s"%fn
366
367	if pn:
368	url += "&pn=%s"%pn
369
370	url += "&dw=%s&dh=%s"%(dw,dh)
371	return url
372
373	def getDocumentViewerURL(self):
374	"""returns the URL of this instance"""
375	return self.absolute_url()
376
377	def getStyle(self, idx, selected, style=""):
378	"""returns a string with the given style and append 'sel' if idx == selected."""
379	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
380	if idx == selected:
381	return style + 'sel'
382	else:
383	return style
384
385	def getParams(self, param=None, val=None, params=None, duplicates=None):
386	"""returns dict with URL parameters.
387
388	Takes URL parameters and additionally param=val or dict params.
389	Deletes key if value is None."""
390	# copy existing request params
391	newParams=self.REQUEST.form.copy()
392	# change single param
393	if param is not None:
394	if val is None:
395	if newParams.has_key(param):
396	del newParams[param]
397	else:
398	newParams[param] = str(val)
399
400	# change more params
401	if params is not None:
402	for (k, v) in params.items():
403	if v is None:
404	# val=None removes param
405	if newParams.has_key(k):
406	del newParams[k]
407
408	else:
409	newParams[k] = v
410
411	if duplicates:
412	# eliminate lists (coming from duplicate keys)
413	for (k,v) in newParams.items():
414	if isinstance(v, list):
415	if duplicates == 'comma':
416	# make comma-separated list of non-empty entries
417	newParams[k] = ','.join([t for t in v if t])
418	elif duplicates == 'first':
419	# take first non-empty entry
420	newParams[k] = [t for t in v if t][0]
421
422	return newParams
423
424	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
425	"""returns URL to documentviewer with parameter param set to val or from dict params"""
426	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
427	# quote values and assemble into query string (not escaping '/')
428	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()])
429	if baseUrl is None:
430	baseUrl = self.getDocumentViewerURL()
431
432	url = "%s?%s"%(baseUrl, ps)
433	return url
434
435	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
436	"""link to documentviewer with parameter param set to val"""
437	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
438
439
440	def getInfo_xml(self,url,mode):
441	"""returns info about the document as XML"""
442	if not self.digilibBaseUrl:
443	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
444
445	docinfo = self.getDocinfo(mode=mode,url=url)
446	pt = getattr(self.template, 'info_xml')
447	return pt(docinfo=docinfo)
448
449	def isAccessible(self, docinfo):
450	"""returns if access to the resource is granted"""
451	access = docinfo.get('accessType', None)
452	logging.debug("documentViewer (accessOK) access type %s"%access)
453	if access == 'free':
454	logging.debug("documentViewer (accessOK) access is free")
455	return True
456
457	elif access is None or access in self.authgroups:
458	# only local access -- only logged in users
459	user = getSecurityManager().getUser()
460	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
461	if user is not None:
462	#print "user: ", user
463	return (user.getUserName() != "Anonymous User")
464	else:
465	return False
466
467	logging.error("documentViewer (accessOK) unknown access type %s"%access)
468	return False
469
470
471
472	def getDocinfo(self, mode, url):
473	"""returns docinfo depending on mode"""
474	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
475	# look for cached docinfo in session
476	if self.REQUEST.SESSION.has_key('docinfo'):
477	docinfo = self.REQUEST.SESSION['docinfo']
478	# check if its still current
479	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
480	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
481	return docinfo
482
483	# new docinfo
484	docinfo = {'mode': mode, 'url': url}
485	# add self url
486	docinfo['viewerUrl'] = self.getDocumentViewerURL()
487	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
488	# get index.meta DOM
489	docUrl = None
490	metaDom = None
491	if mode=="texttool":
492	# url points to document dir or index.meta
493	metaDom = self.metadataService.getDomFromPathOrUrl(url)
494	docUrl = url.replace('/index.meta', '')
495	if metaDom is None:
496	raise IOError("Unable to find index.meta for mode=texttool!")
497
498	elif mode=="imagepath":
499	# url points to folder with images, index.meta optional
500	# asssume index.meta in parent dir
501	docUrl = getParentPath(url)
502	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
503
504	elif mode=="filepath":
505	# url points to image file, index.meta optional
506	# asssume index.meta is two path segments up
507	docUrl = getParentPath(url, 2)
508	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
509
510	else:
511	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
512	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
513
514	docinfo['documentUrl'] = docUrl
515	# process index.meta contents
516	if metaDom is not None and metaDom.tag == 'resource':
517	# document directory name and path
518	resource = self.metadataService.getResourceData(dom=metaDom)
519	if resource:
520	docinfo = self.getDocinfoFromResource(docinfo, resource)
521
522	# texttool info
523	texttool = self.metadataService.getTexttoolData(dom=metaDom)
524	if texttool:
525	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
526
527	# bib info
528	bib = self.metadataService.getBibData(dom=metaDom)
529	if bib:
530	docinfo = self.getDocinfoFromBib(docinfo, bib)
531	else:
532	# no bib - try info.xml
533	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
534
535	# auth info
536	access = self.metadataService.getAccessData(dom=metaDom)
537	if access:
538	docinfo = self.getDocinfoFromAccess(docinfo, access)
539
540	# attribution info
541	attribution = self.metadataService.getAttributionData(dom=metaDom)
542	if attribution:
543	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
544	docinfo['attribution'] = attribution
545	#docinfo = self.getDocinfoFromAccess(docinfo, access)
546
547	# copyright info
548	copyright = self.metadataService.getCopyrightData(dom=metaDom)
549	if copyright:
550	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
551	docinfo['copyright'] = copyright
552	#docinfo = self.getDocinfoFromAccess(docinfo, access)
553
554	# image path
555	if mode != 'texttool':
556	# override image path from texttool with url
557	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
558
559	# number of images from digilib
560	if docinfo.get('imagePath', None):
561	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
562	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
563
564	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
565	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
566	# store in session
567	self.REQUEST.SESSION['docinfo'] = docinfo
568	return docinfo
569
570	def getDocinfoFromResource(self, docinfo, resource):
571	"""reads contents of resource element into docinfo"""
572	docName = resource.get('name', None)
573	docinfo['documentName'] = docName
574	docPath = resource.get('archive-path', None)
575	if docPath:
576	# clean up document path
577	if docPath[0] != '/':
578	docPath = '/' + docPath
579
580	if docName and (not docPath.endswith(docName)):
581	docPath += "/" + docName
582
583	else:
584	# use docUrl as docPath
585	docUrl = docinfo['documentURL']
586	if not docUrl.startswith('http:'):
587	docPath = docUrl
588	if docPath:
589	# fix URLs starting with /mpiwg/online
590	docPath = docPath.replace('/mpiwg/online', '', 1)
591
592	docinfo['documentPath'] = docPath
593	return docinfo
594
595	def getDocinfoFromTexttool(self, docinfo, texttool):
596	"""reads contents of texttool element into docinfo"""
597	# image dir
598	imageDir = texttool.get('image', None)
599	docPath = docinfo.get('documentPath', None)
600	if imageDir and docPath:
601	#print "image: ", imageDir, " archivepath: ", archivePath
602	imageDir = os.path.join(docPath, imageDir)
603	imageDir = imageDir.replace('/mpiwg/online', '', 1)
604	docinfo['imagePath'] = imageDir
605
606	# old style text URL
607	textUrl = texttool.get('text', None)
608	if textUrl and docPath:
609	if urlparse.urlparse(textUrl)[0] == "": #keine url
610	textUrl = os.path.join(docPath, textUrl)
611
612	docinfo['textURL'] = textUrl
613
614	# new style text-url-path
615	textUrl = texttool.get('text-url-path', None)
616	if textUrl:
617	docinfo['textURLPath'] = textUrl
618
619	# page flow
620	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
621
622	# odd pages are left
623	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
624
625	# number of title page (0: not defined)
626	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
627
628	# old presentation stuff
629	presentation = texttool.get('presentation', None)
630	if presentation and docPath:
631	if presentation.startswith('http:'):
632	docinfo['presentationUrl'] = presentation
633	else:
634	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
635
636
637	return docinfo
638
639	def getDocinfoFromBib(self, docinfo, bib):
640	"""reads contents of bib element into docinfo"""
641	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
642	# put all raw bib fields in dict "bib"
643	docinfo['bib'] = bib
644	bibtype = bib.get('@type', None)
645	docinfo['bibType'] = bibtype
646	# also store DC metadata for convenience
647	dc = self.metadataService.getDCMappedData(bib)
648	docinfo['creator'] = dc.get('creator',None)
649	docinfo['title'] = dc.get('title',None)
650	docinfo['date'] = dc.get('date',None)
651	return docinfo
652
653	def getDocinfoFromAccess(self, docinfo, acc):
654	"""reads contents of access element into docinfo"""
655	#TODO: also read resource type
656	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
657	try:
658	acctype = acc['@attr']['type']
659	if acctype:
660	access=acctype
661	if access in ['group', 'institution']:
662	access = acc['name'].lower()
663
664	docinfo['accessType'] = access
665
666	except:
667	pass
668
669	return docinfo
670
671	def getDocinfoFromDigilib(self, docinfo, path):
672	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
673	# fetch data
674	txt = getHttpData(infoUrl)
675	if not txt:
676	logging.error("Unable to get dir-info from %s"%(infoUrl))
677	return docinfo
678
679	dom = ET.fromstring(txt)
680	size = getText(dom.find("size"))
681	logging.debug("getDocinfoFromDigilib: size=%s"%size)
682	if size:
683	docinfo['numPages'] = int(size)
684	else:
685	docinfo['numPages'] = 0
686
687	# TODO: produce and keep list of image names and numbers
688	return docinfo
689
690
691	def getDocinfoFromPresentationInfoXml(self,docinfo):
692	"""gets DC-like bibliographical information from the presentation entry in texttools"""
693	url = docinfo.get('presentationUrl', None)
694	if not url:
695	logging.error("getDocinfoFromPresentation: no URL!")
696	return docinfo
697
698	dom = None
699	metaUrl = None
700	if url.startswith("http://"):
701	# real URL
702	metaUrl = url
703	else:
704	# online path
705
706	server=self.digilibBaseUrl+"/servlet/Texter?fn="
707	metaUrl=server+url
708
709	txt=getHttpData(metaUrl)
710	if txt is None:
711	logging.error("Unable to read info.xml from %s"%(url))
712	return docinfo
713
714	dom = ET.fromstring(txt)
715	docinfo['creator']=getText(dom.find(".//author"))
716	docinfo['title']=getText(dom.find(".//title"))
717	docinfo['date']=getText(dom.find(".//date"))
718	return docinfo
719
720
721	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewType=None, tocMode=None):
722	"""returns pageinfo with the given parameters"""
723	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewType=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewType,tocMode))
724	pageinfo = {}
725	pageinfo['viewMode'] = viewMode
726	pageinfo['viewType'] = viewType
727	pageinfo['tocMode'] = tocMode
728
729	current = getInt(current)
730	pageinfo['current'] = current
731	pageinfo['pn'] = current
732	rows = int(rows or self.thumbrows)
733	pageinfo['rows'] = rows
734	cols = int(cols or self.thumbcols)
735	pageinfo['cols'] = cols
736	grpsize = cols * rows
737	pageinfo['groupsize'] = grpsize
738	# is start is empty use one around current
739	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
740	# int(current / grpsize) * grpsize +1))
741	pageinfo['start'] = start
742
743	np = int(docinfo.get('numPages', 0))
744	if np == 0:
745	# numPages unknown - maybe we can get it from text page
746	if docinfo.get('textURLPath', None):
747	# cache text page as well
748	pageinfo['textPage'] = self.getTextPage(mode=viewType, pn=current, docinfo=docinfo, pageinfo=pageinfo)
749	np = int(docinfo.get('numPages', 0))
750
751	pageinfo['numgroups'] = int(np / grpsize)
752	if np % grpsize > 0:
753	pageinfo['numgroups'] += 1
754
755	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
756	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
757	# add zeroth page for two columns
758	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
759	pageinfo['pageZero'] = pageZero
760	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
761
762	# TODO: do we need this here?
763	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
764	pageinfo['query'] = self.REQUEST.get('query','')
765	pageinfo['queryType'] = self.REQUEST.get('queryType','')
766	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
767	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
768	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
769	pageinfo['queryPageSize'] = getInt(self.REQUEST.get('queryPageSize', 10))
770	pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1'))
771	pageinfo['searchPN'] = getInt(self.REQUEST.get('searchPN','1'))
772
773	# limit tocPN
774	if 'tocSize_%s'%tocMode in docinfo:
775	tocSize = docinfo['tocSize_%s'%tocMode]
776	tocPageSize = pageinfo['tocPageSize']
777	# cached toc
778	if tocSize%tocPageSize>0:
779	tocPages=tocSize/tocPageSize+1
780	else:
781	tocPages=tocSize/tocPageSize
782
783	pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN'])
784
785	return pageinfo
786
787
788	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
789	"""returns dict with array of page informations for one screenfull of thumbnails"""
790	batch = {}
791	grpsize = rows * cols
792	if maxIdx == 0:
793	maxIdx = start + grpsize
794
795	nb = int(math.ceil(maxIdx / float(grpsize)))
796	# list of all batch start and end points
797	batches = []
798	if pageZero:
799	ofs = 0
800	else:
801	ofs = 1
802
803	for i in range(nb):
804	s = i * grpsize + ofs
805	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
806	batches.append({'start':s, 'end':e})
807
808	batch['batches'] = batches
809
810	pages = []
811	if pageZero and start == 1:
812	# correct beginning
813	idx = 0
814	else:
815	idx = start
816
817	for r in range(rows):
818	row = []
819	for c in range(cols):
820	if idx < minIdx or idx > maxIdx:
821	page = {'idx':None}
822	else:
823	page = {'idx':idx}
824
825	idx += 1
826	if pageFlowLtr:
827	row.append(page)
828	else:
829	row.insert(0, page)
830
831	pages.append(row)
832
833	if start > 1:
834	batch['prevStart'] = max(start - grpsize, 1)
835	else:
836	batch['prevStart'] = None
837
838	if start + grpsize < maxIdx:
839	batch['nextStart'] = start + grpsize
840	else:
841	batch['nextStart'] = None
842
843	batch['pages'] = pages
844	return batch
845
846	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
847	"""returns dict with information for one screenfull of data."""
848	batch = {}
849	if end == 0:
850	end = start + size
851
852	nb = int(math.ceil(end / float(size)))
853	# list of all batch start and end points
854	batches = []
855	for i in range(nb):
856	s = i * size + 1
857	e = min((i + 1) * size, end)
858	batches.append({'start':s, 'end':e})
859
860	batch['batches'] = batches
861	# list of elements in this batch
862	this = []
863	j = 0
864	for i in range(start, min(start+size, end)):
865	if data:
866	if fullData:
867	d = data[i]
868	else:
869	d = data[j]
870	j += 1
871
872	else:
873	d = i+1
874
875	this.append(d)
876
877	batch['this'] = this
878	if start > 1:
879	batch['prevStart'] = max(start - size, 1)
880	else:
881	batch['prevStart'] = None
882
883	if start + size < end:
884	batch['nextStart'] = start + size
885	else:
886	batch['nextStart'] = None
887
888	return batch
889
890
891	security.declareProtected('View management screens','changeDocumentViewerForm')
892	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
893
894	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
895	"""init document viewer"""
896	self.title=title
897	self.digilibBaseUrl = digilibBaseUrl
898	self.thumbrows = thumbrows
899	self.thumbcols = thumbcols
900	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
901	try:
902	# assume MetaDataFolder instance is called metadata
903	self.metadataService = getattr(self, 'metadata')
904	except Exception, e:
905	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
906
907	if RESPONSE is not None:
908	RESPONSE.redirect('manage_main')
909
910	def manage_AddDocumentViewerForm(self):
911	"""add the viewer form"""
912	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
913	return pt()
914
915	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
916	"""add the viewer"""
917	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
918	self._setObject(id,newObj)
919
920	if RESPONSE is not None:
921	RESPONSE.redirect('manage_main')
922
923	## DocumentViewerTemplate class
924	class DocumentViewerTemplate(ZopePageTemplate):
925	"""Template for document viewer"""
926	meta_type="DocumentViewer Template"
927
928
929	def manage_addDocumentViewerTemplateForm(self):
930	"""Form for adding"""
931	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
932	return pt()
933
934	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
935	REQUEST=None, submit=None):
936	"Add a Page Template with optional file content."
937
938	self._setObject(id, DocumentViewerTemplate(id))
939	ob = getattr(self, id)
940	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
941	logging.info("txt %s:"%txt)
942	ob.pt_edit(txt,"text/html")
943	if title:
944	ob.pt_setTitle(title)
945	try:
946	u = self.DestinationURL()
947	except AttributeError:
948	u = REQUEST['URL1']
949
950	u = "%s/%s" % (u, urllib.quote(id))
951	REQUEST.RESPONSE.redirect(u+'/manage_main')
952	return ''
953
954
955

Note: See TracBrowser for help on using the repository browser.

Download in other formats: