Context Navigation

source: documentViewer/documentViewer.py @ 501:29c6d09a506c

elementtree

Last change on this file since 501:29c6d09a506c was 501:29c6d09a506c, checked in by casties, 12 years ago
more cleanup. viewMode=index works now. moved common template parts in common_template.
File size: 33.8 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19
20	from SrvTxtUtils import getInt, getText, getHttpData, refreshingImageFileIndexHtml
21
22	def serializeNode(node, encoding="utf-8"):
23	"""returns a string containing node as XML"""
24	s = ET.tostring(node)
25
26	# 4Suite:
27	# stream = cStringIO.StringIO()
28	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
29	# s = stream.getvalue()
30	# stream.close()
31	return s
32
33	def browserCheck(self):
34	"""check the browsers request to find out the browser type"""
35	bt = {}
36	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
37	bt['ua'] = ua
38	bt['isIE'] = False
39	bt['isN4'] = False
40	bt['versFirefox']=""
41	bt['versIE']=""
42	bt['versSafariChrome']=""
43	bt['versOpera']=""
44
45	if string.find(ua, 'MSIE') > -1:
46	bt['isIE'] = True
47	else:
48	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
49	# Safari oder Chrome identification
50	try:
51	nav = ua[string.find(ua, '('):]
52	nav1=ua[string.find(ua,')'):]
53	nav2=nav1[string.find(nav1,'('):]
54	nav3=nav2[string.find(nav2,')'):]
55	ie = string.split(nav, "; ")[1]
56	ie1 =string.split(nav1, " ")[2]
57	ie2 =string.split(nav3, " ")[1]
58	ie3 =string.split(nav3, " ")[2]
59	if string.find(ie3, "Safari") >-1:
60	bt['versSafariChrome']=string.split(ie2, "/")[1]
61	except: pass
62	# IE identification
63	try:
64	nav = ua[string.find(ua, '('):]
65	ie = string.split(nav, "; ")[1]
66	if string.find(ie, "MSIE") > -1:
67	bt['versIE'] = string.split(ie, " ")[1]
68	except:pass
69	# Firefox identification
70	try:
71	nav = ua[string.find(ua, '('):]
72	nav1=ua[string.find(ua,')'):]
73	if string.find(ie1, "Firefox") >-1:
74	nav5= string.split(ie1, "/")[1]
75	logging.debug("FIREFOX: %s"%(nav5))
76	bt['versFirefox']=nav5[0:3]
77	except:pass
78	#Opera identification
79	try:
80	if string.find(ua,"Opera") >-1:
81	nav = ua[string.find(ua, '('):]
82	nav1=nav[string.find(nav,')'):]
83	bt['versOpera']=string.split(nav1,"/")[2]
84	except:pass
85
86	bt['isMac'] = string.find(ua, 'Macintosh') > -1
87	bt['isWin'] = string.find(ua, 'Windows') > -1
88	bt['isIEWin'] = bt['isIE'] and bt['isWin']
89	bt['isIEMac'] = bt['isIE'] and bt['isMac']
90	bt['staticHTML'] = False
91
92	return bt
93
94	def getParentPath(path, cnt=1):
95	"""returns pathname shortened by cnt"""
96	# make sure path doesn't end with /
97	path = path.rstrip('/')
98	# split by /, shorten, and reassemble
99	return '/'.join(path.split('/')[0:-cnt])
100
101	##
102	## documentViewer class
103	##
104	class documentViewer(Folder):
105	"""document viewer"""
106	meta_type="Document viewer"
107
108	security=ClassSecurityInfo()
109	manage_options=Folder.manage_options+(
110	{'label':'main config','action':'changeDocumentViewerForm'},
111	)
112
113	metadataService = None
114	"""MetaDataFolder instance"""
115
116	# templates and forms
117	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
118	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
119	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
120	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
121	toc_text = PageTemplateFile('zpt/toc_text', globals())
122	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
123	toc_none = PageTemplateFile('zpt/toc_none', globals())
124	common_template = PageTemplateFile('zpt/common_template', globals())
125	info_xml = PageTemplateFile('zpt/info_xml', globals())
126	docuviewer_css = ImageFile('css/docuviewer.css',globals())
127	# make ImageFile better for development
128	docuviewer_css.index_html = refreshingImageFileIndexHtml
129	jquery_js = ImageFile('js/jquery.js',globals())
130
131
132	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
133	"""init document viewer"""
134	self.id=id
135	self.title=title
136	self.thumbcols = thumbcols
137	self.thumbrows = thumbrows
138	# authgroups is list of authorized groups (delimited by ,)
139	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
140	# create template folder so we can always use template.something
141
142	templateFolder = Folder('template')
143	self['template'] = templateFolder # Zope-2.12 style
144	#self._setObject('template',templateFolder) # old style
145	try:
146	import MpdlXmlTextServer
147	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
148	templateFolder['fulltextclient'] = textServer
149	#templateFolder._setObject('fulltextclient',textServer)
150	except Exception, e:
151	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
152
153	try:
154	from Products.zogiLib.zogiLib import zogiLib
155	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
156	templateFolder['zogilib'] = zogilib
157	#templateFolder._setObject('zogilib',zogilib)
158	except Exception, e:
159	logging.error("Unable to create zogiLib for zogilib: "+str(e))
160
161	try:
162	# assume MetaDataFolder instance is called metadata
163	self.metadataService = getattr(self, 'metadata')
164	except Exception, e:
165	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
166
167	if digilibBaseUrl is not None:
168	self.digilibBaseUrl = digilibBaseUrl
169
170
171	# proxy text server methods to fulltextclient
172	def getTextPage(self, **args):
173	"""returns full text content of page"""
174	return self.template.fulltextclient.getTextPage(**args)
175
176	def getToc(self, **args):
177	"""returns the full table of contents (in internal format)"""
178	return self.template.fulltextclient.getToc(**args)
179
180	def getTocPage(self, **args):
181	"""returns one page of the table of contents"""
182	return self.template.fulltextclient.getTocPage(**args)
183
184	#WTF?
185	def getQuery(self, **args):
186	"""get query in search"""
187	return self.template.fulltextclient.getQuery(**args)
188
189	#WTF?
190	def getSearch(self, **args):
191	"""get search"""
192	return self.template.fulltextclient.getSearch(**args)
193
194	#WTF?
195	def getGisPlaces(self, **args):
196	"""get gis places"""
197	return self.template.fulltextclient.getGisPlaces(**args)
198
199	#WTF?
200	def getAllGisPlaces(self, **args):
201	"""get all gis places """
202	return self.template.fulltextclient.getAllGisPlaces(**args)
203
204	#WTF?
205	def getWordInfo(self, **args):
206	"""get translate"""
207	return self.template.fulltextclient.getWordInfo(**args)
208
209	#WTF?
210	def getLemma(self, **args):
211	"""get lemma"""
212	return self.template.fulltextclient.getLemma(**args)
213
214	#WTF?
215	def getLemmaQuery(self, **args):
216	"""get query"""
217	return self.template.fulltextclient.getLemmaQuery(**args)
218
219	#WTF?
220	def getLex(self, **args):
221	"""get lex"""
222	return self.template.fulltextclient.getLex(**args)
223
224	#WTF?
225	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
226	security.declareProtected('View','thumbs_rss')
227	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
228	'''
229	view it
230	@param mode: defines how to access the document behind url
231	@param url: url which contains display information
232	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
233
234	'''
235	logging.debug("HHHHHHHHHHHHHH:load the rss")
236	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
237
238	if not hasattr(self, 'template'):
239	# create template folder if it doesn't exist
240	self.manage_addFolder('template')
241
242	if not self.digilibBaseUrl:
243	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
244
245	docinfo = self.getDocinfo(mode=mode,url=url)
246	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
247	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
248	''' ZDES '''
249	pt = getattr(self.template, 'thumbs_main_rss')
250
251	if viewMode=="auto": # automodus gewaehlt
252	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
253	viewMode="text"
254	else:
255	viewMode="images"
256
257	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
258
259
260	security.declareProtected('View','index_html')
261	def index_html(self,url,mode="texttool",viewMode="auto",viewType=None,tocMode="thumbs",start=1,pn=1):
262	"""
263	view page
264	@param url: url which contains display information
265	@param mode: defines how to access the document behind url
266	@param viewMode: 'images': display images, 'text': display text, default is 'auto'
267	@param viewType: sub-type of viewMode, e.g. 'dict' for viewMode='text'
268	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
269	"""
270
271	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewType=%s start=%s pn=%s"%(mode,url,viewMode,viewType,start,pn))
272
273	if not hasattr(self, 'template'):
274	# this won't work
275	logging.error("template folder missing!")
276	return "ERROR: template folder missing!"
277
278	if not getattr(self, 'digilibBaseUrl', None):
279	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
280
281	docinfo = self.getDocinfo(mode=mode,url=url)
282
283	if tocMode != "thumbs":
284	# get table of contents
285	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
286
287	# auto viewMode: text if there is a text else images
288	if viewMode=="auto":
289	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
290	viewMode = "text"
291	viewType = "dict"
292	else:
293	viewMode = "images"
294
295	elif viewMode == "text_dict":
296	# legacy fix
297	viewMode = "text"
298	viewType = "dict"
299
300	# stringify viewType
301	if isinstance(viewType, list):
302	logging.debug("index_html: viewType is list:%s"%viewType)
303	viewType = ','.join([t for t in viewType if t])
304
305	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewType=viewType, tocMode=tocMode)
306
307	# get template /template/viewer_$viewMode
308	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
309	if pt is None:
310	logging.error("No template for viewMode=%s!"%viewMode)
311	# TODO: error page?
312	return "No template for viewMode=%s!"%viewMode
313
314	# and execute with parameters
315	return pt(docinfo=docinfo, pageinfo=pageinfo)
316
317	def generateMarks(self,mk):
318	ret=""
319	if mk is None:
320	return ""
321	if not isinstance(mk, list):
322	mk=[mk]
323	for m in mk:
324	ret+="mk=%s"%m
325	return ret
326
327
328	def getBrowser(self):
329	"""getBrowser the version of browser """
330	bt = browserCheck(self)
331	logging.debug("BROWSER VERSION: %s"%(bt))
332	return bt
333
334	def findDigilibUrl(self):
335	"""try to get the digilib URL from zogilib"""
336	url = self.template.zogilib.getDLBaseUrl()
337	return url
338
339	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
340	"""returns URL to digilib Scaler with params"""
341	url = None
342	if docinfo is not None:
343	url = docinfo.get('imageURL', None)
344
345	if url is None:
346	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
347	if fn is None and docinfo is not None:
348	fn = docinfo.get('imagePath','')
349
350	url += "fn=%s"%fn
351
352	if pn:
353	url += "&pn=%s"%pn
354
355	url += "&dw=%s&dh=%s"%(dw,dh)
356	return url
357
358	def getDocumentViewerURL(self):
359	"""returns the URL of this instance"""
360	return self.absolute_url()
361
362	def getStyle(self, idx, selected, style=""):
363	"""returns a string with the given style and append 'sel' if idx == selected."""
364	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
365	if idx == selected:
366	return style + 'sel'
367	else:
368	return style
369
370	def getParams(self, param=None, val=None, params=None, duplicates=None):
371	"""returns dict with URL parameters.
372
373	Takes URL parameters and additionally param=val or dict params.
374	Deletes key if value is None."""
375	# copy existing request params
376	newParams=self.REQUEST.form.copy()
377	# change single param
378	if param is not None:
379	if val is None:
380	if newParams.has_key(param):
381	del newParams[param]
382	else:
383	newParams[param] = str(val)
384
385	# change more params
386	if params is not None:
387	for (k, v) in params.items():
388	if v is None:
389	# val=None removes param
390	if newParams.has_key(k):
391	del newParams[k]
392
393	else:
394	newParams[k] = v
395
396	if duplicates:
397	# eliminate lists (coming from duplicate keys)
398	for (k,v) in newParams.items():
399	if isinstance(v, list):
400	if duplicates == 'comma':
401	# make comma-separated list of non-empty entries
402	newParams[k] = ','.join([t for t in v if t])
403	elif duplicates == 'first':
404	# take first non-empty entry
405	newParams[k] = [t for t in v if t][0]
406
407	return newParams
408
409	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
410	"""returns URL to documentviewer with parameter param set to val or from dict params"""
411	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
412	# quote values and assemble into query string (not escaping '/')
413	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()])
414	if baseUrl is None:
415	baseUrl = self.getDocumentViewerURL()
416
417	url = "%s?%s"%(baseUrl, ps)
418	return url
419
420	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
421	"""link to documentviewer with parameter param set to val"""
422	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
423
424
425	def getInfo_xml(self,url,mode):
426	"""returns info about the document as XML"""
427	if not self.digilibBaseUrl:
428	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
429
430	docinfo = self.getDocinfo(mode=mode,url=url)
431	pt = getattr(self.template, 'info_xml')
432	return pt(docinfo=docinfo)
433
434	def isAccessible(self, docinfo):
435	"""returns if access to the resource is granted"""
436	access = docinfo.get('accessType', None)
437	logging.debug("documentViewer (accessOK) access type %s"%access)
438	if access == 'free':
439	logging.debug("documentViewer (accessOK) access is free")
440	return True
441
442	elif access is None or access in self.authgroups:
443	# only local access -- only logged in users
444	user = getSecurityManager().getUser()
445	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
446	if user is not None:
447	#print "user: ", user
448	return (user.getUserName() != "Anonymous User")
449	else:
450	return False
451
452	logging.error("documentViewer (accessOK) unknown access type %s"%access)
453	return False
454
455
456
457	def getDocinfo(self, mode, url):
458	"""returns docinfo depending on mode"""
459	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
460	# look for cached docinfo in session
461	if self.REQUEST.SESSION.has_key('docinfo'):
462	docinfo = self.REQUEST.SESSION['docinfo']
463	# check if its still current
464	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
465	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
466	return docinfo
467
468	# new docinfo
469	docinfo = {'mode': mode, 'url': url}
470	# add self url
471	docinfo['viewerUrl'] = self.getDocumentViewerURL()
472	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
473	# get index.meta DOM
474	docUrl = None
475	metaDom = None
476	if mode=="texttool":
477	# url points to document dir or index.meta
478	metaDom = self.metadataService.getDomFromPathOrUrl(url)
479	docUrl = url.replace('/index.meta', '')
480	if metaDom is None:
481	raise IOError("Unable to find index.meta for mode=texttool!")
482
483	elif mode=="imagepath":
484	# url points to folder with images, index.meta optional
485	# asssume index.meta in parent dir
486	docUrl = getParentPath(url)
487	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
488
489	elif mode=="filepath":
490	# url points to image file, index.meta optional
491	# asssume index.meta is two path segments up
492	docUrl = getParentPath(url, 2)
493	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
494
495	else:
496	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
497	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
498
499	docinfo['documentUrl'] = docUrl
500	# process index.meta contents
501	if metaDom is not None and metaDom.tag == 'resource':
502	# document directory name and path
503	resource = self.metadataService.getResourceData(dom=metaDom)
504	if resource:
505	docinfo = self.getDocinfoFromResource(docinfo, resource)
506
507	# texttool info
508	texttool = self.metadataService.getTexttoolData(dom=metaDom)
509	if texttool:
510	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
511
512	# bib info
513	bib = self.metadataService.getBibData(dom=metaDom)
514	if bib:
515	docinfo = self.getDocinfoFromBib(docinfo, bib)
516	else:
517	# no bib - try info.xml
518	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
519
520	# auth info
521	access = self.metadataService.getAccessData(dom=metaDom)
522	if access:
523	docinfo = self.getDocinfoFromAccess(docinfo, access)
524
525	# attribution info
526	attribution = self.metadataService.getAttributionData(dom=metaDom)
527	if attribution:
528	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
529	docinfo['attribution'] = attribution
530	#docinfo = self.getDocinfoFromAccess(docinfo, access)
531
532	# copyright info
533	copyright = self.metadataService.getCopyrightData(dom=metaDom)
534	if copyright:
535	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
536	docinfo['copyright'] = copyright
537	#docinfo = self.getDocinfoFromAccess(docinfo, access)
538
539	# image path
540	if mode != 'texttool':
541	# override image path from texttool with url
542	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
543
544	# number of images from digilib
545	if docinfo.get('imagePath', None):
546	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
547	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
548
549	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
550	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
551	# store in session
552	self.REQUEST.SESSION['docinfo'] = docinfo
553	return docinfo
554
555	def getDocinfoFromResource(self, docinfo, resource):
556	"""reads contents of resource element into docinfo"""
557	docName = resource.get('name', None)
558	docinfo['documentName'] = docName
559	docPath = resource.get('archive-path', None)
560	if docPath:
561	# clean up document path
562	if docPath[0] != '/':
563	docPath = '/' + docPath
564
565	if docName and (not docPath.endswith(docName)):
566	docPath += "/" + docName
567
568	else:
569	# use docUrl as docPath
570	docUrl = docinfo['documentURL']
571	if not docUrl.startswith('http:'):
572	docPath = docUrl
573	if docPath:
574	# fix URLs starting with /mpiwg/online
575	docPath = docPath.replace('/mpiwg/online', '', 1)
576
577	docinfo['documentPath'] = docPath
578	return docinfo
579
580	def getDocinfoFromTexttool(self, docinfo, texttool):
581	"""reads contents of texttool element into docinfo"""
582	# image dir
583	imageDir = texttool.get('image', None)
584	docPath = docinfo.get('documentPath', None)
585	if imageDir and docPath:
586	#print "image: ", imageDir, " archivepath: ", archivePath
587	imageDir = os.path.join(docPath, imageDir)
588	imageDir = imageDir.replace('/mpiwg/online', '', 1)
589	docinfo['imagePath'] = imageDir
590
591	# old style text URL
592	textUrl = texttool.get('text', None)
593	if textUrl and docPath:
594	if urlparse.urlparse(textUrl)[0] == "": #keine url
595	textUrl = os.path.join(docPath, textUrl)
596
597	docinfo['textURL'] = textUrl
598
599	# new style text-url-path
600	textUrl = texttool.get('text-url-path', None)
601	if textUrl:
602	docinfo['textURLPath'] = textUrl
603
604	# page flow
605	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
606
607	# odd pages are left
608	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
609
610	# number of title page (0: not defined)
611	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
612
613	# old presentation stuff
614	presentation = texttool.get('presentation', None)
615	if presentation and docPath:
616	if presentation.startswith('http:'):
617	docinfo['presentationUrl'] = presentation
618	else:
619	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
620
621
622	return docinfo
623
624	def getDocinfoFromBib(self, docinfo, bib):
625	"""reads contents of bib element into docinfo"""
626	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
627	# put all raw bib fields in dict "bib"
628	docinfo['bib'] = bib
629	bibtype = bib.get('@type', None)
630	docinfo['bibType'] = bibtype
631	# also store DC metadata for convenience
632	dc = self.metadataService.getDCMappedData(bib)
633	docinfo['creator'] = dc.get('creator',None)
634	docinfo['title'] = dc.get('title',None)
635	docinfo['date'] = dc.get('date',None)
636	return docinfo
637
638	def getDocinfoFromAccess(self, docinfo, acc):
639	"""reads contents of access element into docinfo"""
640	#TODO: also read resource type
641	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
642	try:
643	acctype = acc['@attr']['type']
644	if acctype:
645	access=acctype
646	if access in ['group', 'institution']:
647	access = acc['name'].lower()
648
649	docinfo['accessType'] = access
650
651	except:
652	pass
653
654	return docinfo
655
656	def getDocinfoFromDigilib(self, docinfo, path):
657	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
658	# fetch data
659	txt = getHttpData(infoUrl)
660	if not txt:
661	logging.error("Unable to get dir-info from %s"%(infoUrl))
662	return docinfo
663
664	dom = ET.fromstring(txt)
665	size = getText(dom.find("size"))
666	logging.debug("getDocinfoFromDigilib: size=%s"%size)
667	if size:
668	docinfo['numPages'] = int(size)
669	else:
670	docinfo['numPages'] = 0
671
672	# TODO: produce and keep list of image names and numbers
673	return docinfo
674
675
676	def getDocinfoFromPresentationInfoXml(self,docinfo):
677	"""gets DC-like bibliographical information from the presentation entry in texttools"""
678	url = docinfo.get('presentationUrl', None)
679	if not url:
680	logging.error("getDocinfoFromPresentation: no URL!")
681	return docinfo
682
683	dom = None
684	metaUrl = None
685	if url.startswith("http://"):
686	# real URL
687	metaUrl = url
688	else:
689	# online path
690
691	server=self.digilibBaseUrl+"/servlet/Texter?fn="
692	metaUrl=server+url
693
694	txt=getHttpData(metaUrl)
695	if txt is None:
696	logging.error("Unable to read info.xml from %s"%(url))
697	return docinfo
698
699	dom = ET.fromstring(txt)
700	docinfo['creator']=getText(dom.find(".//author"))
701	docinfo['title']=getText(dom.find(".//title"))
702	docinfo['date']=getText(dom.find(".//date"))
703	return docinfo
704
705
706	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewType=None, tocMode=None):
707	"""returns pageinfo with the given parameters"""
708	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewType=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewType,tocMode))
709	pageinfo = {}
710	pageinfo['viewMode'] = viewMode
711	pageinfo['viewType'] = viewType
712	pageinfo['tocMode'] = tocMode
713
714	current = getInt(current)
715	pageinfo['current'] = current
716	pageinfo['pn'] = current
717	rows = int(rows or self.thumbrows)
718	pageinfo['rows'] = rows
719	cols = int(cols or self.thumbcols)
720	pageinfo['cols'] = cols
721	grpsize = cols * rows
722	pageinfo['groupsize'] = grpsize
723	# is start is empty use one around current
724	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
725	# int(current / grpsize) * grpsize +1))
726	pageinfo['start'] = start
727
728	np = int(docinfo.get('numPages', 0))
729	if np == 0:
730	# numPages unknown - maybe we can get it from text page
731	if docinfo.get('textURLPath', None):
732	# cache text page as well
733	pageinfo['textPage'] = self.getTextPage(mode=viewType, pn=current, docinfo=docinfo, pageinfo=pageinfo)
734	np = int(docinfo.get('numPages', 0))
735
736	pageinfo['numgroups'] = int(np / grpsize)
737	if np % grpsize > 0:
738	pageinfo['numgroups'] += 1
739
740	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
741	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
742	# add zeroth page for two columns
743	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
744	pageinfo['pageZero'] = pageZero
745	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
746
747	# TODO: do we need this here?
748	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
749	pageinfo['query'] = self.REQUEST.get('query','')
750	pageinfo['queryType'] = self.REQUEST.get('queryType','')
751	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
752	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
753	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
754	pageinfo['queryPageSize'] = getInt(self.REQUEST.get('queryPageSize', 10))
755	pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1'))
756	pageinfo['searchPN'] = getInt(self.REQUEST.get('searchPN','1'))
757
758	# limit tocPN
759	if 'tocSize_%s'%tocMode in docinfo:
760	tocSize = docinfo['tocSize_%s'%tocMode]
761	tocPageSize = pageinfo['tocPageSize']
762	# cached toc
763	if tocSize%tocPageSize>0:
764	tocPages=tocSize/tocPageSize+1
765	else:
766	tocPages=tocSize/tocPageSize
767
768	pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN'])
769
770	return pageinfo
771
772
773	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
774	"""returns dict with array of page informations for one screenfull of thumbnails"""
775	batch = {}
776	grpsize = rows * cols
777	if maxIdx == 0:
778	maxIdx = start + grpsize
779
780	nb = int(math.ceil(maxIdx / float(grpsize)))
781	# list of all batch start and end points
782	batches = []
783	if pageZero:
784	ofs = 0
785	else:
786	ofs = 1
787
788	for i in range(nb):
789	s = i * grpsize + ofs
790	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
791	batches.append({'start':s, 'end':e})
792
793	batch['batches'] = batches
794
795	pages = []
796	if pageZero and start == 1:
797	# correct beginning
798	idx = 0
799	else:
800	idx = start
801
802	for r in range(rows):
803	row = []
804	for c in range(cols):
805	if idx < minIdx or idx > maxIdx:
806	page = {'idx':None}
807	else:
808	page = {'idx':idx}
809
810	idx += 1
811	if pageFlowLtr:
812	row.append(page)
813	else:
814	row.insert(0, page)
815
816	pages.append(row)
817
818	if start > 1:
819	batch['prevStart'] = max(start - grpsize, 1)
820	else:
821	batch['prevStart'] = None
822
823	if start + grpsize < maxIdx:
824	batch['nextStart'] = start + grpsize
825	else:
826	batch['nextStart'] = None
827
828	batch['pages'] = pages
829	return batch
830
831	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
832	"""returns dict with information for one screenfull of data."""
833	batch = {}
834	if end == 0:
835	end = start + size
836
837	nb = int(math.ceil(end / float(size)))
838	# list of all batch start and end points
839	batches = []
840	for i in range(nb):
841	s = i * size + 1
842	e = min((i + 1) * size, end)
843	batches.append({'start':s, 'end':e})
844
845	batch['batches'] = batches
846	# list of elements in this batch
847	this = []
848	j = 0
849	for i in range(start, min(start+size, end)):
850	if data:
851	if fullData:
852	d = data[i]
853	else:
854	d = data[j]
855	j += 1
856
857	else:
858	d = i+1
859
860	this.append(d)
861
862	batch['this'] = this
863	if start > 1:
864	batch['prevStart'] = max(start - size, 1)
865	else:
866	batch['prevStart'] = None
867
868	if start + size < end:
869	batch['nextStart'] = start + size
870	else:
871	batch['nextStart'] = None
872
873	return batch
874
875
876	security.declareProtected('View management screens','changeDocumentViewerForm')
877	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
878
879	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
880	"""init document viewer"""
881	self.title=title
882	self.digilibBaseUrl = digilibBaseUrl
883	self.thumbrows = thumbrows
884	self.thumbcols = thumbcols
885	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
886	try:
887	# assume MetaDataFolder instance is called metadata
888	self.metadataService = getattr(self, 'metadata')
889	except Exception, e:
890	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
891
892	if RESPONSE is not None:
893	RESPONSE.redirect('manage_main')
894
895	def manage_AddDocumentViewerForm(self):
896	"""add the viewer form"""
897	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
898	return pt()
899
900	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
901	"""add the viewer"""
902	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
903	self._setObject(id,newObj)
904
905	if RESPONSE is not None:
906	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: