Context Navigation

source: documentViewer/documentViewer.py @ 490:6f116b86a226

elementtree

Last change on this file since 490:6f116b86a226 was 490:6f116b86a226, checked in by casties, 13 years ago
more new template stuff. moved ImageFile? index method to SrvTxtUtils?
File size: 34.3 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19
20	from SrvTxtUtils import getInt, getText, getHttpData, refreshingImageFileIndexHtml
21
22	def serializeNode(node, encoding="utf-8"):
23	"""returns a string containing node as XML"""
24	s = ET.tostring(node)
25
26	# 4Suite:
27	# stream = cStringIO.StringIO()
28	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
29	# s = stream.getvalue()
30	# stream.close()
31	return s
32
33	def browserCheck(self):
34	"""check the browsers request to find out the browser type"""
35	bt = {}
36	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
37	bt['ua'] = ua
38	bt['isIE'] = False
39	bt['isN4'] = False
40	bt['versFirefox']=""
41	bt['versIE']=""
42	bt['versSafariChrome']=""
43	bt['versOpera']=""
44
45	if string.find(ua, 'MSIE') > -1:
46	bt['isIE'] = True
47	else:
48	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
49	# Safari oder Chrome identification
50	try:
51	nav = ua[string.find(ua, '('):]
52	nav1=ua[string.find(ua,')'):]
53	nav2=nav1[string.find(nav1,'('):]
54	nav3=nav2[string.find(nav2,')'):]
55	ie = string.split(nav, "; ")[1]
56	ie1 =string.split(nav1, " ")[2]
57	ie2 =string.split(nav3, " ")[1]
58	ie3 =string.split(nav3, " ")[2]
59	if string.find(ie3, "Safari") >-1:
60	bt['versSafariChrome']=string.split(ie2, "/")[1]
61	except: pass
62	# IE identification
63	try:
64	nav = ua[string.find(ua, '('):]
65	ie = string.split(nav, "; ")[1]
66	if string.find(ie, "MSIE") > -1:
67	bt['versIE'] = string.split(ie, " ")[1]
68	except:pass
69	# Firefox identification
70	try:
71	nav = ua[string.find(ua, '('):]
72	nav1=ua[string.find(ua,')'):]
73	if string.find(ie1, "Firefox") >-1:
74	nav5= string.split(ie1, "/")[1]
75	logging.debug("FIREFOX: %s"%(nav5))
76	bt['versFirefox']=nav5[0:3]
77	except:pass
78	#Opera identification
79	try:
80	if string.find(ua,"Opera") >-1:
81	nav = ua[string.find(ua, '('):]
82	nav1=nav[string.find(nav,')'):]
83	bt['versOpera']=string.split(nav1,"/")[2]
84	except:pass
85
86	bt['isMac'] = string.find(ua, 'Macintosh') > -1
87	bt['isWin'] = string.find(ua, 'Windows') > -1
88	bt['isIEWin'] = bt['isIE'] and bt['isWin']
89	bt['isIEMac'] = bt['isIE'] and bt['isMac']
90	bt['staticHTML'] = False
91
92	return bt
93
94	def getParentPath(path, cnt=1):
95	"""returns pathname shortened by cnt"""
96	# make sure path doesn't end with /
97	path = path.rstrip('/')
98	# split by /, shorten, and reassemble
99	return '/'.join(path.split('/')[0:-cnt])
100
101	##
102	## documentViewer class
103	##
104	class documentViewer(Folder):
105	"""document viewer"""
106	meta_type="Document viewer"
107
108	security=ClassSecurityInfo()
109	manage_options=Folder.manage_options+(
110	{'label':'main config','action':'changeDocumentViewerForm'},
111	)
112
113	metadataService = None
114	"""MetaDataFolder instance"""
115
116	# templates and forms
117	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
118	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
119	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
120	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
121	toc_text = PageTemplateFile('zpt/toc_text', globals())
122	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
123	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
124	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
125	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
126	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
127	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
128	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
129	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
130	head_main = PageTemplateFile('zpt/head_main', globals())
131	info_xml = PageTemplateFile('zpt/info_xml', globals())
132	docuviewer_css = ImageFile('css/docuviewer.css',globals())
133	# make ImageFile better for development
134	docuviewer_css.index_html = refreshingImageFileIndexHtml
135
136
137	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
138
139
140	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
141	"""init document viewer"""
142	self.id=id
143	self.title=title
144	self.thumbcols = thumbcols
145	self.thumbrows = thumbrows
146	# authgroups is list of authorized groups (delimited by ,)
147	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
148	# create template folder so we can always use template.something
149
150	templateFolder = Folder('template')
151	#self['template'] = templateFolder # Zope-2.12 style
152	self._setObject('template',templateFolder) # old style
153	try:
154	import MpdlXmlTextServer
155	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
156	#templateFolder['fulltextclient'] = xmlRpcClient
157	templateFolder._setObject('fulltextclient',textServer)
158	except Exception, e:
159	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
160
161	try:
162	from Products.zogiLib.zogiLib import zogiLib
163	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
164	#templateFolder['zogilib'] = zogilib
165	templateFolder._setObject('zogilib',zogilib)
166	except Exception, e:
167	logging.error("Unable to create zogiLib for zogilib: "+str(e))
168
169	try:
170	# assume MetaDataFolder instance is called metadata
171	self.metadataService = getattr(self, 'metadata')
172	except Exception, e:
173	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
174
175	if digilibBaseUrl is not None:
176	self.digilibBaseUrl = digilibBaseUrl
177
178
179	# proxy text server methods to fulltextclient
180	def getTextPage(self, **args):
181	"""get page"""
182	return self.template.fulltextclient.getTextPage(**args)
183
184	def getOrigPages(self, **args):
185	"""get page"""
186	return self.template.fulltextclient.getOrigPages(**args)
187
188	def getOrigPagesNorm(self, **args):
189	"""get page"""
190	return self.template.fulltextclient.getOrigPagesNorm(**args)
191
192	def getQuery(self, **args):
193	"""get query in search"""
194	return self.template.fulltextclient.getQuery(**args)
195
196	def getSearch(self, **args):
197	"""get search"""
198	return self.template.fulltextclient.getSearch(**args)
199
200	def getGisPlaces(self, **args):
201	"""get gis places"""
202	return self.template.fulltextclient.getGisPlaces(**args)
203
204	def getAllGisPlaces(self, **args):
205	"""get all gis places """
206	return self.template.fulltextclient.getAllGisPlaces(**args)
207
208	def getWordInfo(self, **args):
209	"""get translate"""
210	return self.template.fulltextclient.getWordInfo(**args)
211
212	def getLemma(self, **args):
213	"""get lemma"""
214	return self.template.fulltextclient.getLemma(**args)
215
216	def getLemmaQuery(self, **args):
217	"""get query"""
218	return self.template.fulltextclient.getLemmaQuery(**args)
219
220	def getLex(self, **args):
221	"""get lex"""
222	return self.template.fulltextclient.getLex(**args)
223
224	def getToc(self, **args):
225	"""get toc"""
226	return self.template.fulltextclient.getToc(**args)
227
228	def getTocPage(self, **args):
229	"""get tocpage"""
230	return self.template.fulltextclient.getTocPage(**args)
231
232
233	security.declareProtected('View','thumbs_rss')
234	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
235	'''
236	view it
237	@param mode: defines how to access the document behind url
238	@param url: url which contains display information
239	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
240
241	'''
242	logging.debug("HHHHHHHHHHHHHH:load the rss")
243	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
244
245	if not hasattr(self, 'template'):
246	# create template folder if it doesn't exist
247	self.manage_addFolder('template')
248
249	if not self.digilibBaseUrl:
250	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
251
252	docinfo = self.getDocinfo(mode=mode,url=url)
253	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
254	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
255	''' ZDES '''
256	pt = getattr(self.template, 'thumbs_main_rss')
257
258	if viewMode=="auto": # automodus gewaehlt
259	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
260	viewMode="text"
261	else:
262	viewMode="images"
263
264	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
265
266
267	security.declareProtected('View','index_html')
268	def index_html(self,url,mode="texttool",viewMode="auto",viewType=None,tocMode="thumbs",start=1,pn=1):
269	"""
270	view page
271	@param url: url which contains display information
272	@param mode: defines how to access the document behind url
273	@param viewMode: 'images': display images, 'text': display text, default is 'auto'
274	@param viewType: sub-type of viewMode, e.g. 'dict' for viewMode='text'
275	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
276	"""
277
278	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewType=%s start=%s pn=%s"%(mode,url,viewMode,viewType,start,pn))
279
280	if not hasattr(self, 'template'):
281	# this won't work
282	logging.error("template folder missing!")
283	return "ERROR: template folder missing!"
284
285	if not getattr(self, 'digilibBaseUrl', None):
286	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
287
288	docinfo = self.getDocinfo(mode=mode,url=url)
289
290	if tocMode != "thumbs":
291	# get table of contents
292	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
293
294	# auto viewMode: text if there is a text else images
295	if viewMode=="auto":
296	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
297	viewMode = "text"
298	viewType = "dict"
299	else:
300	viewMode = "images"
301
302	elif viewMode == "text_dict":
303	# legacy fix
304	viewMode = "text"
305	viewType = "dict"
306
307	# stringify viewType
308	if isinstance(viewType, list):
309	logging.debug("index_html: viewType is list:%s"%viewType)
310	viewType = ','.join([t for t in viewType if t])
311
312	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewType=viewType, tocMode=tocMode)
313
314	# get template /template/viewer_$viewMode
315	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
316	if pt is None:
317	logging.error("No template for viewMode=%s!"%viewMode)
318	# TODO: error page?
319	return "No template for viewMode=%s!"%viewMode
320
321	# and execute with parameters
322	return pt(docinfo=docinfo, pageinfo=pageinfo)
323
324	def generateMarks(self,mk):
325	ret=""
326	if mk is None:
327	return ""
328	if not isinstance(mk, list):
329	mk=[mk]
330	for m in mk:
331	ret+="mk=%s"%m
332	return ret
333
334
335	def getBrowser(self):
336	"""getBrowser the version of browser """
337	bt = browserCheck(self)
338	logging.debug("BROWSER VERSION: %s"%(bt))
339	return bt
340
341	def findDigilibUrl(self):
342	"""try to get the digilib URL from zogilib"""
343	url = self.template.zogilib.getDLBaseUrl()
344	return url
345
346	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
347	"""returns URL to digilib Scaler with params"""
348	url = None
349	if docinfo is not None:
350	url = docinfo.get('imageURL', None)
351
352	if url is None:
353	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
354	if fn is None and docinfo is not None:
355	fn = docinfo.get('imagePath','')
356
357	url += "fn=%s"%fn
358
359	if pn:
360	url += "&pn=%s"%pn
361
362	url += "&dw=%s&dh=%s"%(dw,dh)
363	return url
364
365	def getDocumentViewerURL(self):
366	"""returns the URL of this instance"""
367	return self.absolute_url()
368
369	def getStyle(self, idx, selected, style=""):
370	"""returns a string with the given style and append 'sel' if idx == selected."""
371	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
372	if idx == selected:
373	return style + 'sel'
374	else:
375	return style
376
377	def getParams(self, param=None, val=None, params=None, duplicates=None):
378	"""returns dict with URL parameters.
379
380	Takes URL parameters and additionally param=val or dict params.
381	Deletes key if value is None."""
382	# copy existing request params
383	newParams=self.REQUEST.form.copy()
384	# change single param
385	if param is not None:
386	if val is None:
387	if newParams.has_key(param):
388	del newParams[param]
389	else:
390	newParams[param] = str(val)
391
392	# change more params
393	if params is not None:
394	for (k, v) in params.items():
395	if v is None:
396	# val=None removes param
397	if newParams.has_key(k):
398	del newParams[k]
399
400	else:
401	newParams[k] = v
402
403	if duplicates:
404	# eliminate lists (coming from duplicate keys)
405	for (k,v) in newParams.items():
406	if isinstance(v, list):
407	if duplicates == 'comma':
408	# make comma-separated list of non-empty entries
409	newParams[k] = ','.join([t for t in v if t])
410	elif duplicates == 'first':
411	# take first non-empty entry
412	newParams[k] = [t for t in v if t][0]
413
414	return newParams
415
416	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
417	"""returns URL to documentviewer with parameter param set to val or from dict params"""
418	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
419	# quote values and assemble into query string (not escaping '/')
420	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()])
421	if baseUrl is None:
422	baseUrl = self.getDocumentViewerURL()
423
424	url = "%s?%s"%(baseUrl, ps)
425	return url
426
427	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
428	"""link to documentviewer with parameter param set to val"""
429	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
430
431
432	def getInfo_xml(self,url,mode):
433	"""returns info about the document as XML"""
434	if not self.digilibBaseUrl:
435	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
436
437	docinfo = self.getDocinfo(mode=mode,url=url)
438	pt = getattr(self.template, 'info_xml')
439	return pt(docinfo=docinfo)
440
441	def isAccessible(self, docinfo):
442	"""returns if access to the resource is granted"""
443	access = docinfo.get('accessType', None)
444	logging.debug("documentViewer (accessOK) access type %s"%access)
445	if access == 'free':
446	logging.debug("documentViewer (accessOK) access is free")
447	return True
448
449	elif access is None or access in self.authgroups:
450	# only local access -- only logged in users
451	user = getSecurityManager().getUser()
452	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
453	if user is not None:
454	#print "user: ", user
455	return (user.getUserName() != "Anonymous User")
456	else:
457	return False
458
459	logging.error("documentViewer (accessOK) unknown access type %s"%access)
460	return False
461
462
463
464	def getDocinfo(self, mode, url):
465	"""returns docinfo depending on mode"""
466	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
467	# look for cached docinfo in session
468	if self.REQUEST.SESSION.has_key('docinfo'):
469	docinfo = self.REQUEST.SESSION['docinfo']
470	# check if its still current
471	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
472	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
473	return docinfo
474
475	# new docinfo
476	docinfo = {'mode': mode, 'url': url}
477	# add self url
478	docinfo['viewerUrl'] = self.getDocumentViewerURL()
479	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
480	# get index.meta DOM
481	docUrl = None
482	metaDom = None
483	if mode=="texttool":
484	# url points to document dir or index.meta
485	metaDom = self.metadataService.getDomFromPathOrUrl(url)
486	docUrl = url.replace('/index.meta', '')
487	if metaDom is None:
488	raise IOError("Unable to find index.meta for mode=texttool!")
489
490	elif mode=="imagepath":
491	# url points to folder with images, index.meta optional
492	# asssume index.meta in parent dir
493	docUrl = getParentPath(url)
494	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
495
496	elif mode=="filepath":
497	# url points to image file, index.meta optional
498	# asssume index.meta is two path segments up
499	docUrl = getParentPath(url, 2)
500	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
501
502	else:
503	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
504	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
505
506	docinfo['documentUrl'] = docUrl
507	# process index.meta contents
508	if metaDom is not None and metaDom.tag == 'resource':
509	# document directory name and path
510	resource = self.metadataService.getResourceData(dom=metaDom)
511	if resource:
512	docinfo = self.getDocinfoFromResource(docinfo, resource)
513
514	# texttool info
515	texttool = self.metadataService.getTexttoolData(dom=metaDom)
516	if texttool:
517	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
518
519	# bib info
520	bib = self.metadataService.getBibData(dom=metaDom)
521	if bib:
522	docinfo = self.getDocinfoFromBib(docinfo, bib)
523	else:
524	# no bib - try info.xml
525	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
526
527	# auth info
528	access = self.metadataService.getAccessData(dom=metaDom)
529	if access:
530	docinfo = self.getDocinfoFromAccess(docinfo, access)
531
532	# attribution info
533	attribution = self.metadataService.getAttributionData(dom=metaDom)
534	if attribution:
535	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
536	docinfo['attribution'] = attribution
537	#docinfo = self.getDocinfoFromAccess(docinfo, access)
538
539	# copyright info
540	copyright = self.metadataService.getCopyrightData(dom=metaDom)
541	if copyright:
542	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
543	docinfo['copyright'] = copyright
544	#docinfo = self.getDocinfoFromAccess(docinfo, access)
545
546	# image path
547	if mode != 'texttool':
548	# override image path from texttool with url
549	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
550
551	# number of images from digilib
552	if docinfo.get('imagePath', None):
553	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
554	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
555
556	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
557	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
558	# store in session
559	self.REQUEST.SESSION['docinfo'] = docinfo
560	return docinfo
561
562	def getDocinfoFromResource(self, docinfo, resource):
563	"""reads contents of resource element into docinfo"""
564	docName = resource.get('name', None)
565	docinfo['documentName'] = docName
566	docPath = resource.get('archive-path', None)
567	if docPath:
568	# clean up document path
569	if docPath[0] != '/':
570	docPath = '/' + docPath
571
572	if docName and (not docPath.endswith(docName)):
573	docPath += "/" + docName
574
575	else:
576	# use docUrl as docPath
577	docUrl = docinfo['documentURL']
578	if not docUrl.startswith('http:'):
579	docPath = docUrl
580	if docPath:
581	# fix URLs starting with /mpiwg/online
582	docPath = docPath.replace('/mpiwg/online', '', 1)
583
584	docinfo['documentPath'] = docPath
585	return docinfo
586
587	def getDocinfoFromTexttool(self, docinfo, texttool):
588	"""reads contents of texttool element into docinfo"""
589	# image dir
590	imageDir = texttool.get('image', None)
591	docPath = docinfo.get('documentPath', None)
592	if imageDir and docPath:
593	#print "image: ", imageDir, " archivepath: ", archivePath
594	imageDir = os.path.join(docPath, imageDir)
595	imageDir = imageDir.replace('/mpiwg/online', '', 1)
596	docinfo['imagePath'] = imageDir
597
598	# old style text URL
599	textUrl = texttool.get('text', None)
600	if textUrl and docPath:
601	if urlparse.urlparse(textUrl)[0] == "": #keine url
602	textUrl = os.path.join(docPath, textUrl)
603
604	docinfo['textURL'] = textUrl
605
606	# new style text-url-path
607	textUrl = texttool.get('text-url-path', None)
608	if textUrl:
609	docinfo['textURLPath'] = textUrl
610
611	# page flow
612	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
613
614	# odd pages are left
615	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
616
617	# number of title page (0: not defined)
618	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
619
620	# old presentation stuff
621	presentation = texttool.get('presentation', None)
622	if presentation and docPath:
623	if presentation.startswith('http:'):
624	docinfo['presentationUrl'] = presentation
625	else:
626	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
627
628
629	return docinfo
630
631	def getDocinfoFromBib(self, docinfo, bib):
632	"""reads contents of bib element into docinfo"""
633	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
634	# put all raw bib fields in dict "bib"
635	docinfo['bib'] = bib
636	bibtype = bib.get('@type', None)
637	docinfo['bibType'] = bibtype
638	# also store DC metadata for convenience
639	dc = self.metadataService.getDCMappedData(bib)
640	docinfo['creator'] = dc.get('creator',None)
641	docinfo['title'] = dc.get('title',None)
642	docinfo['date'] = dc.get('date',None)
643	return docinfo
644
645	def getDocinfoFromAccess(self, docinfo, acc):
646	"""reads contents of access element into docinfo"""
647	#TODO: also read resource type
648	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
649	try:
650	acctype = acc['@attr']['type']
651	if acctype:
652	access=acctype
653	if access in ['group', 'institution']:
654	access = acc['name'].lower()
655
656	docinfo['accessType'] = access
657
658	except:
659	pass
660
661	return docinfo
662
663	def getDocinfoFromDigilib(self, docinfo, path):
664	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
665	# fetch data
666	txt = getHttpData(infoUrl)
667	if not txt:
668	logging.error("Unable to get dir-info from %s"%(infoUrl))
669	return docinfo
670
671	dom = ET.fromstring(txt)
672	size = getText(dom.find("size"))
673	logging.debug("getDocinfoFromDigilib: size=%s"%size)
674	if size:
675	docinfo['numPages'] = int(size)
676	else:
677	docinfo['numPages'] = 0
678
679	# TODO: produce and keep list of image names and numbers
680	return docinfo
681
682
683	def getDocinfoFromPresentationInfoXml(self,docinfo):
684	"""gets DC-like bibliographical information from the presentation entry in texttools"""
685	url = docinfo.get('presentationUrl', None)
686	if not url:
687	logging.error("getDocinfoFromPresentation: no URL!")
688	return docinfo
689
690	dom = None
691	metaUrl = None
692	if url.startswith("http://"):
693	# real URL
694	metaUrl = url
695	else:
696	# online path
697
698	server=self.digilibBaseUrl+"/servlet/Texter?fn="
699	metaUrl=server+url
700
701	txt=getHttpData(metaUrl)
702	if txt is None:
703	logging.error("Unable to read info.xml from %s"%(url))
704	return docinfo
705
706	dom = ET.fromstring(txt)
707	docinfo['creator']=getText(dom.find(".//author"))
708	docinfo['title']=getText(dom.find(".//title"))
709	docinfo['date']=getText(dom.find(".//date"))
710	return docinfo
711
712
713	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewType=None, tocMode=None):
714	"""returns pageinfo with the given parameters"""
715	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewType=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewType,tocMode))
716	pageinfo = {}
717	pageinfo['viewMode'] = viewMode
718	pageinfo['viewType'] = viewType
719	pageinfo['tocMode'] = tocMode
720
721	current = getInt(current)
722	pageinfo['current'] = current
723	pageinfo['pn'] = current
724	rows = int(rows or self.thumbrows)
725	pageinfo['rows'] = rows
726	cols = int(cols or self.thumbcols)
727	pageinfo['cols'] = cols
728	grpsize = cols * rows
729	pageinfo['groupsize'] = grpsize
730	# is start is empty use one around current
731	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
732	# int(current / grpsize) * grpsize +1))
733	pageinfo['start'] = start
734
735	np = int(docinfo.get('numPages', 0))
736	if np == 0:
737	# numPages unknown - maybe we can get it from text page
738	if docinfo.get('textURLPath', None):
739	# cache text page as well
740	pageinfo['textPage'] = self.getTextPage(mode=viewType, pn=current, docinfo=docinfo, pageinfo=pageinfo)
741	np = int(docinfo.get('numPages', 0))
742
743	pageinfo['numgroups'] = int(np / grpsize)
744	if np % grpsize > 0:
745	pageinfo['numgroups'] += 1
746
747	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
748	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
749	# add zeroth page for two columns
750	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
751	pageinfo['pageZero'] = pageZero
752	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
753
754	# TODO: do we need this here?
755	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
756	pageinfo['query'] = self.REQUEST.get('query','')
757	pageinfo['queryType'] = self.REQUEST.get('queryType','')
758	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
759	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
760	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
761	pageinfo['queryPageSize'] = getInt(self.REQUEST.get('queryPageSize', 10))
762	pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1'))
763	pageinfo['searchPN'] = getInt(self.REQUEST.get('searchPN','1'))
764
765	# limit tocPN
766	if 'tocSize_%s'%tocMode in docinfo:
767	tocSize = docinfo['tocSize_%s'%tocMode]
768	tocPageSize = pageinfo['tocPageSize']
769	# cached toc
770	if tocSize%tocPageSize>0:
771	tocPages=tocSize/tocPageSize+1
772	else:
773	tocPages=tocSize/tocPageSize
774
775	pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN'])
776
777	return pageinfo
778
779
780	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
781	"""returns dict with array of page informations for one screenfull of thumbnails"""
782	batch = {}
783	grpsize = rows * cols
784	if maxIdx == 0:
785	maxIdx = start + grpsize
786
787	nb = int(math.ceil(maxIdx / float(grpsize)))
788	# list of all batch start and end points
789	batches = []
790	if pageZero:
791	ofs = 0
792	else:
793	ofs = 1
794
795	for i in range(nb):
796	s = i * grpsize + ofs
797	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
798	batches.append({'start':s, 'end':e})
799
800	batch['batches'] = batches
801
802	pages = []
803	if pageZero and start == 1:
804	# correct beginning
805	idx = 0
806	else:
807	idx = start
808
809	for r in range(rows):
810	row = []
811	for c in range(cols):
812	if idx < minIdx or idx > maxIdx:
813	page = {'idx':None}
814	else:
815	page = {'idx':idx}
816
817	idx += 1
818	if pageFlowLtr:
819	row.append(page)
820	else:
821	row.insert(0, page)
822
823	pages.append(row)
824
825	if start > 1:
826	batch['prevStart'] = max(start - grpsize, 1)
827	else:
828	batch['prevStart'] = None
829
830	if start + grpsize < maxIdx:
831	batch['nextStart'] = start + grpsize
832	else:
833	batch['nextStart'] = None
834
835	batch['pages'] = pages
836	return batch
837
838	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
839	"""returns dict with information for one screenfull of data."""
840	batch = {}
841	if end == 0:
842	end = start + size
843
844	nb = int(math.ceil(end / float(size)))
845	# list of all batch start and end points
846	batches = []
847	for i in range(nb):
848	s = i * size + 1
849	e = min((i + 1) * size, end)
850	batches.append({'start':s, 'end':e})
851
852	batch['batches'] = batches
853	# list of elements in this batch
854	this = []
855	j = 0
856	for i in range(start, min(start+size, end)):
857	if data:
858	if fullData:
859	d = data[i]
860	else:
861	d = data[j]
862	j += 1
863
864	else:
865	d = i+1
866
867	this.append(d)
868
869	batch['this'] = this
870	if start > 1:
871	batch['prevStart'] = max(start - size, 1)
872	else:
873	batch['prevStart'] = None
874
875	if start + size < end:
876	batch['nextStart'] = start + size
877	else:
878	batch['nextStart'] = None
879
880	return batch
881
882
883	security.declareProtected('View management screens','changeDocumentViewerForm')
884	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
885
886	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
887	"""init document viewer"""
888	self.title=title
889	self.digilibBaseUrl = digilibBaseUrl
890	self.thumbrows = thumbrows
891	self.thumbcols = thumbcols
892	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
893	try:
894	# assume MetaDataFolder instance is called metadata
895	self.metadataService = getattr(self, 'metadata')
896	except Exception, e:
897	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
898
899	if RESPONSE is not None:
900	RESPONSE.redirect('manage_main')
901
902	def manage_AddDocumentViewerForm(self):
903	"""add the viewer form"""
904	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
905	return pt()
906
907	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
908	"""add the viewer"""
909	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
910	self._setObject(id,newObj)
911
912	if RESPONSE is not None:
913	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: