Context Navigation

source: documentViewer/documentViewer.py @ 488:ec3d848fe9e8

elementtree

Last change on this file since 488:ec3d848fe9e8 was 488:ec3d848fe9e8, checked in by casties, 13 years ago
more new templates
File size: 34.3 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	#from Products.ZSimpleFile.ZSimpleFile import ZSimpleFile
6	from AccessControl import ClassSecurityInfo
7	from AccessControl import getSecurityManager
8	from Globals import package_home
9
10	import xml.etree.ElementTree as ET
11
12	import os.path
13	import sys
14	import urllib
15	import logging
16	import math
17	import urlparse
18	import re
19	import string
20
21	from SrvTxtUtils import getInt, getText, getHttpData
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def serializeNode(node, encoding="utf-8"):
29	"""returns a string containing node as XML"""
30	s = ET.tostring(node)
31
32	# 4Suite:
33	# stream = cStringIO.StringIO()
34	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
35	# s = stream.getvalue()
36	# stream.close()
37	return s
38
39	def browserCheck(self):
40	"""check the browsers request to find out the browser type"""
41	bt = {}
42	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
43	bt['ua'] = ua
44	bt['isIE'] = False
45	bt['isN4'] = False
46	bt['versFirefox']=""
47	bt['versIE']=""
48	bt['versSafariChrome']=""
49	bt['versOpera']=""
50
51	if string.find(ua, 'MSIE') > -1:
52	bt['isIE'] = True
53	else:
54	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
55	# Safari oder Chrome identification
56	try:
57	nav = ua[string.find(ua, '('):]
58	nav1=ua[string.find(ua,')'):]
59	nav2=nav1[string.find(nav1,'('):]
60	nav3=nav2[string.find(nav2,')'):]
61	ie = string.split(nav, "; ")[1]
62	ie1 =string.split(nav1, " ")[2]
63	ie2 =string.split(nav3, " ")[1]
64	ie3 =string.split(nav3, " ")[2]
65	if string.find(ie3, "Safari") >-1:
66	bt['versSafariChrome']=string.split(ie2, "/")[1]
67	except: pass
68	# IE identification
69	try:
70	nav = ua[string.find(ua, '('):]
71	ie = string.split(nav, "; ")[1]
72	if string.find(ie, "MSIE") > -1:
73	bt['versIE'] = string.split(ie, " ")[1]
74	except:pass
75	# Firefox identification
76	try:
77	nav = ua[string.find(ua, '('):]
78	nav1=ua[string.find(ua,')'):]
79	if string.find(ie1, "Firefox") >-1:
80	nav5= string.split(ie1, "/")[1]
81	logging.debug("FIREFOX: %s"%(nav5))
82	bt['versFirefox']=nav5[0:3]
83	except:pass
84	#Opera identification
85	try:
86	if string.find(ua,"Opera") >-1:
87	nav = ua[string.find(ua, '('):]
88	nav1=nav[string.find(nav,')'):]
89	bt['versOpera']=string.split(nav1,"/")[2]
90	except:pass
91
92	bt['isMac'] = string.find(ua, 'Macintosh') > -1
93	bt['isWin'] = string.find(ua, 'Windows') > -1
94	bt['isIEWin'] = bt['isIE'] and bt['isWin']
95	bt['isIEMac'] = bt['isIE'] and bt['isMac']
96	bt['staticHTML'] = False
97
98	return bt
99
100	def getParentPath(path, cnt=1):
101	"""returns pathname shortened by cnt"""
102	# make sure path doesn't end with /
103	path = path.rstrip('/')
104	# split by /, shorten, and reassemble
105	return '/'.join(path.split('/')[0:-cnt])
106
107
108	##
109	## documentViewer class
110	##
111	class documentViewer(Folder):
112	"""document viewer"""
113	meta_type="Document viewer"
114
115	security=ClassSecurityInfo()
116	manage_options=Folder.manage_options+(
117	{'label':'main config','action':'changeDocumentViewerForm'},
118	)
119
120	metadataService = None
121	"""MetaDataFolder instance"""
122
123	# templates and forms
124	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
125	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
126	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
127	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
128	toc_text = PageTemplateFile('zpt/toc_text', globals())
129	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
130	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
131	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
132	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
133	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
134	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
135	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
136	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
137	head_main = PageTemplateFile('zpt/head_main', globals())
138	info_xml = PageTemplateFile('zpt/info_xml', globals())
139	# TODO: can this be nicer?
140	docuviewer_css = ImageFile('css/docuviewer.css',globals())
141
142
143	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
144
145
146	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
147	"""init document viewer"""
148	self.id=id
149	self.title=title
150	self.thumbcols = thumbcols
151	self.thumbrows = thumbrows
152	# authgroups is list of authorized groups (delimited by ,)
153	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
154	# create template folder so we can always use template.something
155
156	templateFolder = Folder('template')
157	#self['template'] = templateFolder # Zope-2.12 style
158	self._setObject('template',templateFolder) # old style
159	try:
160	import MpdlXmlTextServer
161	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
162	#templateFolder['fulltextclient'] = xmlRpcClient
163	templateFolder._setObject('fulltextclient',textServer)
164	except Exception, e:
165	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
166
167	try:
168	from Products.zogiLib.zogiLib import zogiLib
169	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
170	#templateFolder['zogilib'] = zogilib
171	templateFolder._setObject('zogilib',zogilib)
172	except Exception, e:
173	logging.error("Unable to create zogiLib for zogilib: "+str(e))
174
175	try:
176	# assume MetaDataFolder instance is called metadata
177	self.metadataService = getattr(self, 'metadata')
178	except Exception, e:
179	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
180
181	if digilibBaseUrl is not None:
182	self.digilibBaseUrl = digilibBaseUrl
183
184
185	# proxy text server methods to fulltextclient
186	def getTextPage(self, **args):
187	"""get page"""
188	return self.template.fulltextclient.getTextPage(**args)
189
190	def getOrigPages(self, **args):
191	"""get page"""
192	return self.template.fulltextclient.getOrigPages(**args)
193
194	def getOrigPagesNorm(self, **args):
195	"""get page"""
196	return self.template.fulltextclient.getOrigPagesNorm(**args)
197
198	def getQuery(self, **args):
199	"""get query in search"""
200	return self.template.fulltextclient.getQuery(**args)
201
202	def getSearch(self, **args):
203	"""get search"""
204	return self.template.fulltextclient.getSearch(**args)
205
206	def getGisPlaces(self, **args):
207	"""get gis places"""
208	return self.template.fulltextclient.getGisPlaces(**args)
209
210	def getAllGisPlaces(self, **args):
211	"""get all gis places """
212	return self.template.fulltextclient.getAllGisPlaces(**args)
213
214	def getWordInfo(self, **args):
215	"""get translate"""
216	return self.template.fulltextclient.getWordInfo(**args)
217
218	def getLemma(self, **args):
219	"""get lemma"""
220	return self.template.fulltextclient.getLemma(**args)
221
222	def getLemmaQuery(self, **args):
223	"""get query"""
224	return self.template.fulltextclient.getLemmaQuery(**args)
225
226	def getLex(self, **args):
227	"""get lex"""
228	return self.template.fulltextclient.getLex(**args)
229
230	def getToc(self, **args):
231	"""get toc"""
232	return self.template.fulltextclient.getToc(**args)
233
234	def getTocPage(self, **args):
235	"""get tocpage"""
236	return self.template.fulltextclient.getTocPage(**args)
237
238
239	security.declareProtected('View','thumbs_rss')
240	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
241	'''
242	view it
243	@param mode: defines how to access the document behind url
244	@param url: url which contains display information
245	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
246
247	'''
248	logging.debug("HHHHHHHHHHHHHH:load the rss")
249	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
250
251	if not hasattr(self, 'template'):
252	# create template folder if it doesn't exist
253	self.manage_addFolder('template')
254
255	if not self.digilibBaseUrl:
256	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
257
258	docinfo = self.getDocinfo(mode=mode,url=url)
259	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
260	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
261	''' ZDES '''
262	pt = getattr(self.template, 'thumbs_main_rss')
263
264	if viewMode=="auto": # automodus gewaehlt
265	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
266	viewMode="text"
267	else:
268	viewMode="images"
269
270	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
271
272
273	security.declareProtected('View','index_html')
274	def index_html(self,url,mode="texttool",viewMode="auto",viewType=None,tocMode="thumbs",start=1,pn=1):
275	"""
276	view page
277	@param url: url which contains display information
278	@param mode: defines how to access the document behind url
279	@param viewMode: 'images': display images, 'text': display text, default is 'auto'
280	@param viewType: sub-type of viewMode, e.g. 'dict' for viewMode='text'
281	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
282	"""
283
284	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewType=%s start=%s pn=%s"%(mode,url,viewMode,viewType,start,pn))
285
286	if not hasattr(self, 'template'):
287	# this won't work
288	logging.error("template folder missing!")
289	return "ERROR: template folder missing!"
290
291	if not getattr(self, 'digilibBaseUrl', None):
292	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
293
294	docinfo = self.getDocinfo(mode=mode,url=url)
295
296	if tocMode != "thumbs":
297	# get table of contents
298	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
299
300	# auto viewMode: text if there is a text else images
301	if viewMode=="auto":
302	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
303	viewMode = "text"
304	viewType = "dict"
305	else:
306	viewMode = "images"
307
308	elif viewMode == "text_dict":
309	# legacy fix
310	viewMode = "text"
311	viewType = "dict"
312
313	# stringify viewType
314	if isinstance(viewType, list):
315	logging.debug("index_html: viewType is list:%s"%viewType)
316	viewType = ','.join([t for t in viewType if t])
317
318	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewType=viewType, tocMode=tocMode)
319
320	# get template /template/viewer_$viewMode
321	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
322	if pt is None:
323	logging.error("No template for viewMode=%s!"%viewMode)
324	# TODO: error page?
325	return "No template for viewMode=%s!"%viewMode
326
327	# and execute with parameters
328	return pt(docinfo=docinfo, pageinfo=pageinfo)
329
330	def generateMarks(self,mk):
331	ret=""
332	if mk is None:
333	return ""
334	if not isinstance(mk, list):
335	mk=[mk]
336	for m in mk:
337	ret+="mk=%s"%m
338	return ret
339
340
341	def getBrowser(self):
342	"""getBrowser the version of browser """
343	bt = browserCheck(self)
344	logging.debug("BROWSER VERSION: %s"%(bt))
345	return bt
346
347	def findDigilibUrl(self):
348	"""try to get the digilib URL from zogilib"""
349	url = self.template.zogilib.getDLBaseUrl()
350	return url
351
352	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
353	"""returns URL to digilib Scaler with params"""
354	url = None
355	if docinfo is not None:
356	url = docinfo.get('imageURL', None)
357
358	if url is None:
359	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
360	if fn is None and docinfo is not None:
361	fn = docinfo.get('imagePath','')
362
363	url += "fn=%s"%fn
364
365	if pn:
366	url += "&pn=%s"%pn
367
368	url += "&dw=%s&dh=%s"%(dw,dh)
369	return url
370
371	def getDocumentViewerURL(self):
372	"""returns the URL of this instance"""
373	return self.absolute_url()
374
375	def getStyle(self, idx, selected, style=""):
376	"""returns a string with the given style and append 'sel' if idx == selected."""
377	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
378	if idx == selected:
379	return style + 'sel'
380	else:
381	return style
382
383	def getParams(self, param=None, val=None, params=None, duplicates=None):
384	"""returns dict with URL parameters.
385
386	Takes URL parameters and additionally param=val or dict params.
387	Deletes key if value is None."""
388	# copy existing request params
389	newParams=self.REQUEST.form.copy()
390	# change single param
391	if param is not None:
392	if val is None:
393	if newParams.has_key(param):
394	del newParams[param]
395	else:
396	newParams[param] = str(val)
397
398	# change more params
399	if params is not None:
400	for (k, v) in params.items():
401	if v is None:
402	# val=None removes param
403	if newParams.has_key(k):
404	del newParams[k]
405
406	else:
407	newParams[k] = v
408
409	if duplicates:
410	# eliminate lists (coming from duplicate keys)
411	for (k,v) in newParams.items():
412	if isinstance(v, list):
413	if duplicates == 'comma':
414	# make comma-separated list of non-empty entries
415	newParams[k] = ','.join([t for t in v if t])
416	elif duplicates == 'first':
417	# take first non-empty entry
418	newParams[k] = [t for t in v if t][0]
419
420	return newParams
421
422	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
423	"""returns URL to documentviewer with parameter param set to val or from dict params"""
424	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
425	# quote values and assemble into query string (not escaping '/')
426	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()])
427	if baseUrl is None:
428	baseUrl = self.getDocumentViewerURL()
429
430	url = "%s?%s"%(baseUrl, ps)
431	return url
432
433	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
434	"""link to documentviewer with parameter param set to val"""
435	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
436
437
438	def getInfo_xml(self,url,mode):
439	"""returns info about the document as XML"""
440	if not self.digilibBaseUrl:
441	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
442
443	docinfo = self.getDocinfo(mode=mode,url=url)
444	pt = getattr(self.template, 'info_xml')
445	return pt(docinfo=docinfo)
446
447	def isAccessible(self, docinfo):
448	"""returns if access to the resource is granted"""
449	access = docinfo.get('accessType', None)
450	logging.debug("documentViewer (accessOK) access type %s"%access)
451	if access == 'free':
452	logging.debug("documentViewer (accessOK) access is free")
453	return True
454
455	elif access is None or access in self.authgroups:
456	# only local access -- only logged in users
457	user = getSecurityManager().getUser()
458	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
459	if user is not None:
460	#print "user: ", user
461	return (user.getUserName() != "Anonymous User")
462	else:
463	return False
464
465	logging.error("documentViewer (accessOK) unknown access type %s"%access)
466	return False
467
468
469
470	def getDocinfo(self, mode, url):
471	"""returns docinfo depending on mode"""
472	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
473	# look for cached docinfo in session
474	if self.REQUEST.SESSION.has_key('docinfo'):
475	docinfo = self.REQUEST.SESSION['docinfo']
476	# check if its still current
477	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
478	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
479	return docinfo
480
481	# new docinfo
482	docinfo = {'mode': mode, 'url': url}
483	# add self url
484	docinfo['viewerUrl'] = self.getDocumentViewerURL()
485	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
486	# get index.meta DOM
487	docUrl = None
488	metaDom = None
489	if mode=="texttool":
490	# url points to document dir or index.meta
491	metaDom = self.metadataService.getDomFromPathOrUrl(url)
492	docUrl = url.replace('/index.meta', '')
493	if metaDom is None:
494	raise IOError("Unable to find index.meta for mode=texttool!")
495
496	elif mode=="imagepath":
497	# url points to folder with images, index.meta optional
498	# asssume index.meta in parent dir
499	docUrl = getParentPath(url)
500	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
501
502	elif mode=="filepath":
503	# url points to image file, index.meta optional
504	# asssume index.meta is two path segments up
505	docUrl = getParentPath(url, 2)
506	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
507
508	else:
509	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
510	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
511
512	docinfo['documentUrl'] = docUrl
513	# process index.meta contents
514	if metaDom is not None and metaDom.tag == 'resource':
515	# document directory name and path
516	resource = self.metadataService.getResourceData(dom=metaDom)
517	if resource:
518	docinfo = self.getDocinfoFromResource(docinfo, resource)
519
520	# texttool info
521	texttool = self.metadataService.getTexttoolData(dom=metaDom)
522	if texttool:
523	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
524
525	# bib info
526	bib = self.metadataService.getBibData(dom=metaDom)
527	if bib:
528	docinfo = self.getDocinfoFromBib(docinfo, bib)
529	else:
530	# no bib - try info.xml
531	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
532
533	# auth info
534	access = self.metadataService.getAccessData(dom=metaDom)
535	if access:
536	docinfo = self.getDocinfoFromAccess(docinfo, access)
537
538	# attribution info
539	attribution = self.metadataService.getAttributionData(dom=metaDom)
540	if attribution:
541	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
542	docinfo['attribution'] = attribution
543	#docinfo = self.getDocinfoFromAccess(docinfo, access)
544
545	# copyright info
546	copyright = self.metadataService.getCopyrightData(dom=metaDom)
547	if copyright:
548	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
549	docinfo['copyright'] = copyright
550	#docinfo = self.getDocinfoFromAccess(docinfo, access)
551
552	# image path
553	if mode != 'texttool':
554	# override image path from texttool with url
555	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
556
557	# number of images from digilib
558	if docinfo.get('imagePath', None):
559	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
560	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
561
562	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
563	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
564	# store in session
565	self.REQUEST.SESSION['docinfo'] = docinfo
566	return docinfo
567
568	def getDocinfoFromResource(self, docinfo, resource):
569	"""reads contents of resource element into docinfo"""
570	docName = resource.get('name', None)
571	docinfo['documentName'] = docName
572	docPath = resource.get('archive-path', None)
573	if docPath:
574	# clean up document path
575	if docPath[0] != '/':
576	docPath = '/' + docPath
577
578	if docName and (not docPath.endswith(docName)):
579	docPath += "/" + docName
580
581	else:
582	# use docUrl as docPath
583	docUrl = docinfo['documentURL']
584	if not docUrl.startswith('http:'):
585	docPath = docUrl
586	if docPath:
587	# fix URLs starting with /mpiwg/online
588	docPath = docPath.replace('/mpiwg/online', '', 1)
589
590	docinfo['documentPath'] = docPath
591	return docinfo
592
593	def getDocinfoFromTexttool(self, docinfo, texttool):
594	"""reads contents of texttool element into docinfo"""
595	# image dir
596	imageDir = texttool.get('image', None)
597	docPath = docinfo.get('documentPath', None)
598	if imageDir and docPath:
599	#print "image: ", imageDir, " archivepath: ", archivePath
600	imageDir = os.path.join(docPath, imageDir)
601	imageDir = imageDir.replace('/mpiwg/online', '', 1)
602	docinfo['imagePath'] = imageDir
603
604	# old style text URL
605	textUrl = texttool.get('text', None)
606	if textUrl and docPath:
607	if urlparse.urlparse(textUrl)[0] == "": #keine url
608	textUrl = os.path.join(docPath, textUrl)
609
610	docinfo['textURL'] = textUrl
611
612	# new style text-url-path
613	textUrl = texttool.get('text-url-path', None)
614	if textUrl:
615	docinfo['textURLPath'] = textUrl
616
617	# page flow
618	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
619
620	# odd pages are left
621	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
622
623	# number of title page (0: not defined)
624	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
625
626	# old presentation stuff
627	presentation = texttool.get('presentation', None)
628	if presentation and docPath:
629	if presentation.startswith('http:'):
630	docinfo['presentationUrl'] = presentation
631	else:
632	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
633
634
635	return docinfo
636
637	def getDocinfoFromBib(self, docinfo, bib):
638	"""reads contents of bib element into docinfo"""
639	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
640	# put all raw bib fields in dict "bib"
641	docinfo['bib'] = bib
642	bibtype = bib.get('@type', None)
643	docinfo['bibType'] = bibtype
644	# also store DC metadata for convenience
645	dc = self.metadataService.getDCMappedData(bib)
646	docinfo['creator'] = dc.get('creator',None)
647	docinfo['title'] = dc.get('title',None)
648	docinfo['date'] = dc.get('date',None)
649	return docinfo
650
651	def getDocinfoFromAccess(self, docinfo, acc):
652	"""reads contents of access element into docinfo"""
653	#TODO: also read resource type
654	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
655	try:
656	acctype = acc['@attr']['type']
657	if acctype:
658	access=acctype
659	if access in ['group', 'institution']:
660	access = acc['name'].lower()
661
662	docinfo['accessType'] = access
663
664	except:
665	pass
666
667	return docinfo
668
669	def getDocinfoFromDigilib(self, docinfo, path):
670	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
671	# fetch data
672	txt = getHttpData(infoUrl)
673	if not txt:
674	logging.error("Unable to get dir-info from %s"%(infoUrl))
675	return docinfo
676
677	dom = ET.fromstring(txt)
678	size = getText(dom.find("size"))
679	logging.debug("getDocinfoFromDigilib: size=%s"%size)
680	if size:
681	docinfo['numPages'] = int(size)
682	else:
683	docinfo['numPages'] = 0
684
685	# TODO: produce and keep list of image names and numbers
686	return docinfo
687
688
689	def getDocinfoFromPresentationInfoXml(self,docinfo):
690	"""gets DC-like bibliographical information from the presentation entry in texttools"""
691	url = docinfo.get('presentationUrl', None)
692	if not url:
693	logging.error("getDocinfoFromPresentation: no URL!")
694	return docinfo
695
696	dom = None
697	metaUrl = None
698	if url.startswith("http://"):
699	# real URL
700	metaUrl = url
701	else:
702	# online path
703
704	server=self.digilibBaseUrl+"/servlet/Texter?fn="
705	metaUrl=server+url
706
707	txt=getHttpData(metaUrl)
708	if txt is None:
709	logging.error("Unable to read info.xml from %s"%(url))
710	return docinfo
711
712	dom = ET.fromstring(txt)
713	docinfo['creator']=getText(dom.find(".//author"))
714	docinfo['title']=getText(dom.find(".//title"))
715	docinfo['date']=getText(dom.find(".//date"))
716	return docinfo
717
718
719	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewType=None, tocMode=None):
720	"""returns pageinfo with the given parameters"""
721	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewType=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewType,tocMode))
722	pageinfo = {}
723	pageinfo['viewMode'] = viewMode
724	pageinfo['viewType'] = viewType
725	pageinfo['tocMode'] = tocMode
726
727	current = getInt(current)
728	pageinfo['current'] = current
729	pageinfo['pn'] = current
730	rows = int(rows or self.thumbrows)
731	pageinfo['rows'] = rows
732	cols = int(cols or self.thumbcols)
733	pageinfo['cols'] = cols
734	grpsize = cols * rows
735	pageinfo['groupsize'] = grpsize
736	# is start is empty use one around current
737	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
738	# int(current / grpsize) * grpsize +1))
739	pageinfo['start'] = start
740
741	np = int(docinfo.get('numPages', 0))
742	if np == 0:
743	# numPages unknown - maybe we can get it from text page
744	if docinfo.get('textURLPath', None):
745	# cache text page as well
746	pageinfo['textPage'] = self.getTextPage(mode=viewType, pn=current, docinfo=docinfo, pageinfo=pageinfo)
747	np = int(docinfo.get('numPages', 0))
748
749	pageinfo['numgroups'] = int(np / grpsize)
750	if np % grpsize > 0:
751	pageinfo['numgroups'] += 1
752
753	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
754	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
755	# add zeroth page for two columns
756	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
757	pageinfo['pageZero'] = pageZero
758	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
759
760	# TODO: do we need this here?
761	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
762	pageinfo['query'] = self.REQUEST.get('query','')
763	pageinfo['queryType'] = self.REQUEST.get('queryType','')
764	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
765	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
766	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
767	pageinfo['queryPageSize'] = getInt(self.REQUEST.get('queryPageSize', 10))
768	pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1'))
769	pageinfo['searchPN'] = getInt(self.REQUEST.get('searchPN','1'))
770
771	# limit tocPN
772	if 'tocSize_%s'%tocMode in docinfo:
773	tocSize = docinfo['tocSize_%s'%tocMode]
774	tocPageSize = pageinfo['tocPageSize']
775	# cached toc
776	if tocSize%tocPageSize>0:
777	tocPages=tocSize/tocPageSize+1
778	else:
779	tocPages=tocSize/tocPageSize
780
781	pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN'])
782
783	return pageinfo
784
785
786	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
787	"""returns dict with array of page informations for one screenfull of thumbnails"""
788	batch = {}
789	grpsize = rows * cols
790	if maxIdx == 0:
791	maxIdx = start + grpsize
792
793	nb = int(math.ceil(maxIdx / float(grpsize)))
794	# list of all batch start and end points
795	batches = []
796	if pageZero:
797	ofs = 0
798	else:
799	ofs = 1
800
801	for i in range(nb):
802	s = i * grpsize + ofs
803	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
804	batches.append({'start':s, 'end':e})
805
806	batch['batches'] = batches
807
808	pages = []
809	if pageZero and start == 1:
810	# correct beginning
811	idx = 0
812	else:
813	idx = start
814
815	for r in range(rows):
816	row = []
817	for c in range(cols):
818	if idx < minIdx or idx > maxIdx:
819	page = {'idx':None}
820	else:
821	page = {'idx':idx}
822
823	idx += 1
824	if pageFlowLtr:
825	row.append(page)
826	else:
827	row.insert(0, page)
828
829	pages.append(row)
830
831	if start > 1:
832	batch['prevStart'] = max(start - grpsize, 1)
833	else:
834	batch['prevStart'] = None
835
836	if start + grpsize < maxIdx:
837	batch['nextStart'] = start + grpsize
838	else:
839	batch['nextStart'] = None
840
841	batch['pages'] = pages
842	return batch
843
844	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
845	"""returns dict with information for one screenfull of data."""
846	batch = {}
847	if end == 0:
848	end = start + size
849
850	nb = int(math.ceil(end / float(size)))
851	# list of all batch start and end points
852	batches = []
853	for i in range(nb):
854	s = i * size + 1
855	e = min((i + 1) * size, end)
856	batches.append({'start':s, 'end':e})
857
858	batch['batches'] = batches
859	# list of elements in this batch
860	this = []
861	j = 0
862	for i in range(start, min(start+size, end)):
863	if data:
864	if fullData:
865	d = data[i]
866	else:
867	d = data[j]
868	j += 1
869
870	else:
871	d = i+1
872
873	this.append(d)
874
875	batch['this'] = this
876	if start > 1:
877	batch['prevStart'] = max(start - size, 1)
878	else:
879	batch['prevStart'] = None
880
881	if start + size < end:
882	batch['nextStart'] = start + size
883	else:
884	batch['nextStart'] = None
885
886	return batch
887
888
889	security.declareProtected('View management screens','changeDocumentViewerForm')
890	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
891
892	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
893	"""init document viewer"""
894	self.title=title
895	self.digilibBaseUrl = digilibBaseUrl
896	self.thumbrows = thumbrows
897	self.thumbcols = thumbcols
898	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
899	try:
900	# assume MetaDataFolder instance is called metadata
901	self.metadataService = getattr(self, 'metadata')
902	except Exception, e:
903	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
904
905	if RESPONSE is not None:
906	RESPONSE.redirect('manage_main')
907
908	def manage_AddDocumentViewerForm(self):
909	"""add the viewer form"""
910	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
911	return pt()
912
913	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
914	"""add the viewer"""
915	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
916	self._setObject(id,newObj)
917
918	if RESPONSE is not None:
919	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: