Context Navigation

source: documentViewer/documentViewer.py @ 498:3146b4e7b6a5

elementtree

Last change on this file since 498:3146b4e7b6a5 was 498:3146b4e7b6a5, checked in by casties, 12 years ago
more clean up. works with new jquery digilib.
File size: 34.3 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19
20	from SrvTxtUtils import getInt, getText, getHttpData, refreshingImageFileIndexHtml
21
22	def serializeNode(node, encoding="utf-8"):
23	"""returns a string containing node as XML"""
24	s = ET.tostring(node)
25
26	# 4Suite:
27	# stream = cStringIO.StringIO()
28	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
29	# s = stream.getvalue()
30	# stream.close()
31	return s
32
33	def browserCheck(self):
34	"""check the browsers request to find out the browser type"""
35	bt = {}
36	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
37	bt['ua'] = ua
38	bt['isIE'] = False
39	bt['isN4'] = False
40	bt['versFirefox']=""
41	bt['versIE']=""
42	bt['versSafariChrome']=""
43	bt['versOpera']=""
44
45	if string.find(ua, 'MSIE') > -1:
46	bt['isIE'] = True
47	else:
48	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
49	# Safari oder Chrome identification
50	try:
51	nav = ua[string.find(ua, '('):]
52	nav1=ua[string.find(ua,')'):]
53	nav2=nav1[string.find(nav1,'('):]
54	nav3=nav2[string.find(nav2,')'):]
55	ie = string.split(nav, "; ")[1]
56	ie1 =string.split(nav1, " ")[2]
57	ie2 =string.split(nav3, " ")[1]
58	ie3 =string.split(nav3, " ")[2]
59	if string.find(ie3, "Safari") >-1:
60	bt['versSafariChrome']=string.split(ie2, "/")[1]
61	except: pass
62	# IE identification
63	try:
64	nav = ua[string.find(ua, '('):]
65	ie = string.split(nav, "; ")[1]
66	if string.find(ie, "MSIE") > -1:
67	bt['versIE'] = string.split(ie, " ")[1]
68	except:pass
69	# Firefox identification
70	try:
71	nav = ua[string.find(ua, '('):]
72	nav1=ua[string.find(ua,')'):]
73	if string.find(ie1, "Firefox") >-1:
74	nav5= string.split(ie1, "/")[1]
75	logging.debug("FIREFOX: %s"%(nav5))
76	bt['versFirefox']=nav5[0:3]
77	except:pass
78	#Opera identification
79	try:
80	if string.find(ua,"Opera") >-1:
81	nav = ua[string.find(ua, '('):]
82	nav1=nav[string.find(nav,')'):]
83	bt['versOpera']=string.split(nav1,"/")[2]
84	except:pass
85
86	bt['isMac'] = string.find(ua, 'Macintosh') > -1
87	bt['isWin'] = string.find(ua, 'Windows') > -1
88	bt['isIEWin'] = bt['isIE'] and bt['isWin']
89	bt['isIEMac'] = bt['isIE'] and bt['isMac']
90	bt['staticHTML'] = False
91
92	return bt
93
94	def getParentPath(path, cnt=1):
95	"""returns pathname shortened by cnt"""
96	# make sure path doesn't end with /
97	path = path.rstrip('/')
98	# split by /, shorten, and reassemble
99	return '/'.join(path.split('/')[0:-cnt])
100
101	##
102	## documentViewer class
103	##
104	class documentViewer(Folder):
105	"""document viewer"""
106	meta_type="Document viewer"
107
108	security=ClassSecurityInfo()
109	manage_options=Folder.manage_options+(
110	{'label':'main config','action':'changeDocumentViewerForm'},
111	)
112
113	metadataService = None
114	"""MetaDataFolder instance"""
115
116	# templates and forms
117	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
118	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
119	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
120	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
121	toc_text = PageTemplateFile('zpt/toc_text', globals())
122	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
123	toc_none = PageTemplateFile('zpt/toc_none', globals())
124	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
125	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
126	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
127	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
128	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
129	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
130	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
131	head_main = PageTemplateFile('zpt/head_main', globals())
132	info_xml = PageTemplateFile('zpt/info_xml', globals())
133	docuviewer_css = ImageFile('css/docuviewer.css',globals())
134	# make ImageFile better for development
135	docuviewer_css.index_html = refreshingImageFileIndexHtml
136
137
138	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
139
140
141	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
142	"""init document viewer"""
143	self.id=id
144	self.title=title
145	self.thumbcols = thumbcols
146	self.thumbrows = thumbrows
147	# authgroups is list of authorized groups (delimited by ,)
148	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
149	# create template folder so we can always use template.something
150
151	templateFolder = Folder('template')
152	self['template'] = templateFolder # Zope-2.12 style
153	#self._setObject('template',templateFolder) # old style
154	try:
155	import MpdlXmlTextServer
156	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
157	templateFolder['fulltextclient'] = textServer
158	#templateFolder._setObject('fulltextclient',textServer)
159	except Exception, e:
160	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
161
162	try:
163	from Products.zogiLib.zogiLib import zogiLib
164	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
165	templateFolder['zogilib'] = zogilib
166	#templateFolder._setObject('zogilib',zogilib)
167	except Exception, e:
168	logging.error("Unable to create zogiLib for zogilib: "+str(e))
169
170	try:
171	# assume MetaDataFolder instance is called metadata
172	self.metadataService = getattr(self, 'metadata')
173	except Exception, e:
174	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
175
176	if digilibBaseUrl is not None:
177	self.digilibBaseUrl = digilibBaseUrl
178
179
180	# proxy text server methods to fulltextclient
181	def getTextPage(self, **args):
182	"""returns full text content of page"""
183	return self.template.fulltextclient.getTextPage(**args)
184
185	#WTF?
186	def getQuery(self, **args):
187	"""get query in search"""
188	return self.template.fulltextclient.getQuery(**args)
189
190	#WTF?
191	def getSearch(self, **args):
192	"""get search"""
193	return self.template.fulltextclient.getSearch(**args)
194
195	#WTF?
196	def getGisPlaces(self, **args):
197	"""get gis places"""
198	return self.template.fulltextclient.getGisPlaces(**args)
199
200	#WTF?
201	def getAllGisPlaces(self, **args):
202	"""get all gis places """
203	return self.template.fulltextclient.getAllGisPlaces(**args)
204
205	#WTF?
206	def getWordInfo(self, **args):
207	"""get translate"""
208	return self.template.fulltextclient.getWordInfo(**args)
209
210	#WTF?
211	def getLemma(self, **args):
212	"""get lemma"""
213	return self.template.fulltextclient.getLemma(**args)
214
215	#WTF?
216	def getLemmaQuery(self, **args):
217	"""get query"""
218	return self.template.fulltextclient.getLemmaQuery(**args)
219
220	#WTF?
221	def getLex(self, **args):
222	"""get lex"""
223	return self.template.fulltextclient.getLex(**args)
224
225	def getToc(self, **args):
226	"""returns the full table of contents (in internal format)"""
227	return self.template.fulltextclient.getToc(**args)
228
229	def getTocPage(self, **args):
230	"""returns one page of the table of contents"""
231	return self.template.fulltextclient.getTocPage(**args)
232
233
234	security.declareProtected('View','thumbs_rss')
235	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
236	'''
237	view it
238	@param mode: defines how to access the document behind url
239	@param url: url which contains display information
240	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
241
242	'''
243	logging.debug("HHHHHHHHHHHHHH:load the rss")
244	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
245
246	if not hasattr(self, 'template'):
247	# create template folder if it doesn't exist
248	self.manage_addFolder('template')
249
250	if not self.digilibBaseUrl:
251	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
252
253	docinfo = self.getDocinfo(mode=mode,url=url)
254	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
255	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
256	''' ZDES '''
257	pt = getattr(self.template, 'thumbs_main_rss')
258
259	if viewMode=="auto": # automodus gewaehlt
260	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
261	viewMode="text"
262	else:
263	viewMode="images"
264
265	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
266
267
268	security.declareProtected('View','index_html')
269	def index_html(self,url,mode="texttool",viewMode="auto",viewType=None,tocMode="thumbs",start=1,pn=1):
270	"""
271	view page
272	@param url: url which contains display information
273	@param mode: defines how to access the document behind url
274	@param viewMode: 'images': display images, 'text': display text, default is 'auto'
275	@param viewType: sub-type of viewMode, e.g. 'dict' for viewMode='text'
276	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
277	"""
278
279	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewType=%s start=%s pn=%s"%(mode,url,viewMode,viewType,start,pn))
280
281	if not hasattr(self, 'template'):
282	# this won't work
283	logging.error("template folder missing!")
284	return "ERROR: template folder missing!"
285
286	if not getattr(self, 'digilibBaseUrl', None):
287	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
288
289	docinfo = self.getDocinfo(mode=mode,url=url)
290
291	if tocMode != "thumbs":
292	# get table of contents
293	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
294
295	# auto viewMode: text if there is a text else images
296	if viewMode=="auto":
297	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
298	viewMode = "text"
299	viewType = "dict"
300	else:
301	viewMode = "images"
302
303	elif viewMode == "text_dict":
304	# legacy fix
305	viewMode = "text"
306	viewType = "dict"
307
308	# stringify viewType
309	if isinstance(viewType, list):
310	logging.debug("index_html: viewType is list:%s"%viewType)
311	viewType = ','.join([t for t in viewType if t])
312
313	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewType=viewType, tocMode=tocMode)
314
315	# get template /template/viewer_$viewMode
316	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
317	if pt is None:
318	logging.error("No template for viewMode=%s!"%viewMode)
319	# TODO: error page?
320	return "No template for viewMode=%s!"%viewMode
321
322	# and execute with parameters
323	return pt(docinfo=docinfo, pageinfo=pageinfo)
324
325	def generateMarks(self,mk):
326	ret=""
327	if mk is None:
328	return ""
329	if not isinstance(mk, list):
330	mk=[mk]
331	for m in mk:
332	ret+="mk=%s"%m
333	return ret
334
335
336	def getBrowser(self):
337	"""getBrowser the version of browser """
338	bt = browserCheck(self)
339	logging.debug("BROWSER VERSION: %s"%(bt))
340	return bt
341
342	def findDigilibUrl(self):
343	"""try to get the digilib URL from zogilib"""
344	url = self.template.zogilib.getDLBaseUrl()
345	return url
346
347	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
348	"""returns URL to digilib Scaler with params"""
349	url = None
350	if docinfo is not None:
351	url = docinfo.get('imageURL', None)
352
353	if url is None:
354	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
355	if fn is None and docinfo is not None:
356	fn = docinfo.get('imagePath','')
357
358	url += "fn=%s"%fn
359
360	if pn:
361	url += "&pn=%s"%pn
362
363	url += "&dw=%s&dh=%s"%(dw,dh)
364	return url
365
366	def getDocumentViewerURL(self):
367	"""returns the URL of this instance"""
368	return self.absolute_url()
369
370	def getStyle(self, idx, selected, style=""):
371	"""returns a string with the given style and append 'sel' if idx == selected."""
372	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
373	if idx == selected:
374	return style + 'sel'
375	else:
376	return style
377
378	def getParams(self, param=None, val=None, params=None, duplicates=None):
379	"""returns dict with URL parameters.
380
381	Takes URL parameters and additionally param=val or dict params.
382	Deletes key if value is None."""
383	# copy existing request params
384	newParams=self.REQUEST.form.copy()
385	# change single param
386	if param is not None:
387	if val is None:
388	if newParams.has_key(param):
389	del newParams[param]
390	else:
391	newParams[param] = str(val)
392
393	# change more params
394	if params is not None:
395	for (k, v) in params.items():
396	if v is None:
397	# val=None removes param
398	if newParams.has_key(k):
399	del newParams[k]
400
401	else:
402	newParams[k] = v
403
404	if duplicates:
405	# eliminate lists (coming from duplicate keys)
406	for (k,v) in newParams.items():
407	if isinstance(v, list):
408	if duplicates == 'comma':
409	# make comma-separated list of non-empty entries
410	newParams[k] = ','.join([t for t in v if t])
411	elif duplicates == 'first':
412	# take first non-empty entry
413	newParams[k] = [t for t in v if t][0]
414
415	return newParams
416
417	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
418	"""returns URL to documentviewer with parameter param set to val or from dict params"""
419	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
420	# quote values and assemble into query string (not escaping '/')
421	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()])
422	if baseUrl is None:
423	baseUrl = self.getDocumentViewerURL()
424
425	url = "%s?%s"%(baseUrl, ps)
426	return url
427
428	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
429	"""link to documentviewer with parameter param set to val"""
430	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
431
432
433	def getInfo_xml(self,url,mode):
434	"""returns info about the document as XML"""
435	if not self.digilibBaseUrl:
436	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
437
438	docinfo = self.getDocinfo(mode=mode,url=url)
439	pt = getattr(self.template, 'info_xml')
440	return pt(docinfo=docinfo)
441
442	def isAccessible(self, docinfo):
443	"""returns if access to the resource is granted"""
444	access = docinfo.get('accessType', None)
445	logging.debug("documentViewer (accessOK) access type %s"%access)
446	if access == 'free':
447	logging.debug("documentViewer (accessOK) access is free")
448	return True
449
450	elif access is None or access in self.authgroups:
451	# only local access -- only logged in users
452	user = getSecurityManager().getUser()
453	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
454	if user is not None:
455	#print "user: ", user
456	return (user.getUserName() != "Anonymous User")
457	else:
458	return False
459
460	logging.error("documentViewer (accessOK) unknown access type %s"%access)
461	return False
462
463
464
465	def getDocinfo(self, mode, url):
466	"""returns docinfo depending on mode"""
467	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
468	# look for cached docinfo in session
469	if self.REQUEST.SESSION.has_key('docinfo'):
470	docinfo = self.REQUEST.SESSION['docinfo']
471	# check if its still current
472	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
473	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
474	return docinfo
475
476	# new docinfo
477	docinfo = {'mode': mode, 'url': url}
478	# add self url
479	docinfo['viewerUrl'] = self.getDocumentViewerURL()
480	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
481	# get index.meta DOM
482	docUrl = None
483	metaDom = None
484	if mode=="texttool":
485	# url points to document dir or index.meta
486	metaDom = self.metadataService.getDomFromPathOrUrl(url)
487	docUrl = url.replace('/index.meta', '')
488	if metaDom is None:
489	raise IOError("Unable to find index.meta for mode=texttool!")
490
491	elif mode=="imagepath":
492	# url points to folder with images, index.meta optional
493	# asssume index.meta in parent dir
494	docUrl = getParentPath(url)
495	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
496
497	elif mode=="filepath":
498	# url points to image file, index.meta optional
499	# asssume index.meta is two path segments up
500	docUrl = getParentPath(url, 2)
501	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
502
503	else:
504	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
505	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
506
507	docinfo['documentUrl'] = docUrl
508	# process index.meta contents
509	if metaDom is not None and metaDom.tag == 'resource':
510	# document directory name and path
511	resource = self.metadataService.getResourceData(dom=metaDom)
512	if resource:
513	docinfo = self.getDocinfoFromResource(docinfo, resource)
514
515	# texttool info
516	texttool = self.metadataService.getTexttoolData(dom=metaDom)
517	if texttool:
518	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
519
520	# bib info
521	bib = self.metadataService.getBibData(dom=metaDom)
522	if bib:
523	docinfo = self.getDocinfoFromBib(docinfo, bib)
524	else:
525	# no bib - try info.xml
526	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
527
528	# auth info
529	access = self.metadataService.getAccessData(dom=metaDom)
530	if access:
531	docinfo = self.getDocinfoFromAccess(docinfo, access)
532
533	# attribution info
534	attribution = self.metadataService.getAttributionData(dom=metaDom)
535	if attribution:
536	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
537	docinfo['attribution'] = attribution
538	#docinfo = self.getDocinfoFromAccess(docinfo, access)
539
540	# copyright info
541	copyright = self.metadataService.getCopyrightData(dom=metaDom)
542	if copyright:
543	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
544	docinfo['copyright'] = copyright
545	#docinfo = self.getDocinfoFromAccess(docinfo, access)
546
547	# image path
548	if mode != 'texttool':
549	# override image path from texttool with url
550	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
551
552	# number of images from digilib
553	if docinfo.get('imagePath', None):
554	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
555	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
556
557	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
558	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
559	# store in session
560	self.REQUEST.SESSION['docinfo'] = docinfo
561	return docinfo
562
563	def getDocinfoFromResource(self, docinfo, resource):
564	"""reads contents of resource element into docinfo"""
565	docName = resource.get('name', None)
566	docinfo['documentName'] = docName
567	docPath = resource.get('archive-path', None)
568	if docPath:
569	# clean up document path
570	if docPath[0] != '/':
571	docPath = '/' + docPath
572
573	if docName and (not docPath.endswith(docName)):
574	docPath += "/" + docName
575
576	else:
577	# use docUrl as docPath
578	docUrl = docinfo['documentURL']
579	if not docUrl.startswith('http:'):
580	docPath = docUrl
581	if docPath:
582	# fix URLs starting with /mpiwg/online
583	docPath = docPath.replace('/mpiwg/online', '', 1)
584
585	docinfo['documentPath'] = docPath
586	return docinfo
587
588	def getDocinfoFromTexttool(self, docinfo, texttool):
589	"""reads contents of texttool element into docinfo"""
590	# image dir
591	imageDir = texttool.get('image', None)
592	docPath = docinfo.get('documentPath', None)
593	if imageDir and docPath:
594	#print "image: ", imageDir, " archivepath: ", archivePath
595	imageDir = os.path.join(docPath, imageDir)
596	imageDir = imageDir.replace('/mpiwg/online', '', 1)
597	docinfo['imagePath'] = imageDir
598
599	# old style text URL
600	textUrl = texttool.get('text', None)
601	if textUrl and docPath:
602	if urlparse.urlparse(textUrl)[0] == "": #keine url
603	textUrl = os.path.join(docPath, textUrl)
604
605	docinfo['textURL'] = textUrl
606
607	# new style text-url-path
608	textUrl = texttool.get('text-url-path', None)
609	if textUrl:
610	docinfo['textURLPath'] = textUrl
611
612	# page flow
613	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
614
615	# odd pages are left
616	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
617
618	# number of title page (0: not defined)
619	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
620
621	# old presentation stuff
622	presentation = texttool.get('presentation', None)
623	if presentation and docPath:
624	if presentation.startswith('http:'):
625	docinfo['presentationUrl'] = presentation
626	else:
627	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
628
629
630	return docinfo
631
632	def getDocinfoFromBib(self, docinfo, bib):
633	"""reads contents of bib element into docinfo"""
634	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
635	# put all raw bib fields in dict "bib"
636	docinfo['bib'] = bib
637	bibtype = bib.get('@type', None)
638	docinfo['bibType'] = bibtype
639	# also store DC metadata for convenience
640	dc = self.metadataService.getDCMappedData(bib)
641	docinfo['creator'] = dc.get('creator',None)
642	docinfo['title'] = dc.get('title',None)
643	docinfo['date'] = dc.get('date',None)
644	return docinfo
645
646	def getDocinfoFromAccess(self, docinfo, acc):
647	"""reads contents of access element into docinfo"""
648	#TODO: also read resource type
649	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
650	try:
651	acctype = acc['@attr']['type']
652	if acctype:
653	access=acctype
654	if access in ['group', 'institution']:
655	access = acc['name'].lower()
656
657	docinfo['accessType'] = access
658
659	except:
660	pass
661
662	return docinfo
663
664	def getDocinfoFromDigilib(self, docinfo, path):
665	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
666	# fetch data
667	txt = getHttpData(infoUrl)
668	if not txt:
669	logging.error("Unable to get dir-info from %s"%(infoUrl))
670	return docinfo
671
672	dom = ET.fromstring(txt)
673	size = getText(dom.find("size"))
674	logging.debug("getDocinfoFromDigilib: size=%s"%size)
675	if size:
676	docinfo['numPages'] = int(size)
677	else:
678	docinfo['numPages'] = 0
679
680	# TODO: produce and keep list of image names and numbers
681	return docinfo
682
683
684	def getDocinfoFromPresentationInfoXml(self,docinfo):
685	"""gets DC-like bibliographical information from the presentation entry in texttools"""
686	url = docinfo.get('presentationUrl', None)
687	if not url:
688	logging.error("getDocinfoFromPresentation: no URL!")
689	return docinfo
690
691	dom = None
692	metaUrl = None
693	if url.startswith("http://"):
694	# real URL
695	metaUrl = url
696	else:
697	# online path
698
699	server=self.digilibBaseUrl+"/servlet/Texter?fn="
700	metaUrl=server+url
701
702	txt=getHttpData(metaUrl)
703	if txt is None:
704	logging.error("Unable to read info.xml from %s"%(url))
705	return docinfo
706
707	dom = ET.fromstring(txt)
708	docinfo['creator']=getText(dom.find(".//author"))
709	docinfo['title']=getText(dom.find(".//title"))
710	docinfo['date']=getText(dom.find(".//date"))
711	return docinfo
712
713
714	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewType=None, tocMode=None):
715	"""returns pageinfo with the given parameters"""
716	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewType=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewType,tocMode))
717	pageinfo = {}
718	pageinfo['viewMode'] = viewMode
719	pageinfo['viewType'] = viewType
720	pageinfo['tocMode'] = tocMode
721
722	current = getInt(current)
723	pageinfo['current'] = current
724	pageinfo['pn'] = current
725	rows = int(rows or self.thumbrows)
726	pageinfo['rows'] = rows
727	cols = int(cols or self.thumbcols)
728	pageinfo['cols'] = cols
729	grpsize = cols * rows
730	pageinfo['groupsize'] = grpsize
731	# is start is empty use one around current
732	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
733	# int(current / grpsize) * grpsize +1))
734	pageinfo['start'] = start
735
736	np = int(docinfo.get('numPages', 0))
737	if np == 0:
738	# numPages unknown - maybe we can get it from text page
739	if docinfo.get('textURLPath', None):
740	# cache text page as well
741	pageinfo['textPage'] = self.getTextPage(mode=viewType, pn=current, docinfo=docinfo, pageinfo=pageinfo)
742	np = int(docinfo.get('numPages', 0))
743
744	pageinfo['numgroups'] = int(np / grpsize)
745	if np % grpsize > 0:
746	pageinfo['numgroups'] += 1
747
748	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
749	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
750	# add zeroth page for two columns
751	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
752	pageinfo['pageZero'] = pageZero
753	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
754
755	# TODO: do we need this here?
756	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
757	pageinfo['query'] = self.REQUEST.get('query','')
758	pageinfo['queryType'] = self.REQUEST.get('queryType','')
759	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
760	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
761	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
762	pageinfo['queryPageSize'] = getInt(self.REQUEST.get('queryPageSize', 10))
763	pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1'))
764	pageinfo['searchPN'] = getInt(self.REQUEST.get('searchPN','1'))
765
766	# limit tocPN
767	if 'tocSize_%s'%tocMode in docinfo:
768	tocSize = docinfo['tocSize_%s'%tocMode]
769	tocPageSize = pageinfo['tocPageSize']
770	# cached toc
771	if tocSize%tocPageSize>0:
772	tocPages=tocSize/tocPageSize+1
773	else:
774	tocPages=tocSize/tocPageSize
775
776	pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN'])
777
778	return pageinfo
779
780
781	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
782	"""returns dict with array of page informations for one screenfull of thumbnails"""
783	batch = {}
784	grpsize = rows * cols
785	if maxIdx == 0:
786	maxIdx = start + grpsize
787
788	nb = int(math.ceil(maxIdx / float(grpsize)))
789	# list of all batch start and end points
790	batches = []
791	if pageZero:
792	ofs = 0
793	else:
794	ofs = 1
795
796	for i in range(nb):
797	s = i * grpsize + ofs
798	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
799	batches.append({'start':s, 'end':e})
800
801	batch['batches'] = batches
802
803	pages = []
804	if pageZero and start == 1:
805	# correct beginning
806	idx = 0
807	else:
808	idx = start
809
810	for r in range(rows):
811	row = []
812	for c in range(cols):
813	if idx < minIdx or idx > maxIdx:
814	page = {'idx':None}
815	else:
816	page = {'idx':idx}
817
818	idx += 1
819	if pageFlowLtr:
820	row.append(page)
821	else:
822	row.insert(0, page)
823
824	pages.append(row)
825
826	if start > 1:
827	batch['prevStart'] = max(start - grpsize, 1)
828	else:
829	batch['prevStart'] = None
830
831	if start + grpsize < maxIdx:
832	batch['nextStart'] = start + grpsize
833	else:
834	batch['nextStart'] = None
835
836	batch['pages'] = pages
837	return batch
838
839	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
840	"""returns dict with information for one screenfull of data."""
841	batch = {}
842	if end == 0:
843	end = start + size
844
845	nb = int(math.ceil(end / float(size)))
846	# list of all batch start and end points
847	batches = []
848	for i in range(nb):
849	s = i * size + 1
850	e = min((i + 1) * size, end)
851	batches.append({'start':s, 'end':e})
852
853	batch['batches'] = batches
854	# list of elements in this batch
855	this = []
856	j = 0
857	for i in range(start, min(start+size, end)):
858	if data:
859	if fullData:
860	d = data[i]
861	else:
862	d = data[j]
863	j += 1
864
865	else:
866	d = i+1
867
868	this.append(d)
869
870	batch['this'] = this
871	if start > 1:
872	batch['prevStart'] = max(start - size, 1)
873	else:
874	batch['prevStart'] = None
875
876	if start + size < end:
877	batch['nextStart'] = start + size
878	else:
879	batch['nextStart'] = None
880
881	return batch
882
883
884	security.declareProtected('View management screens','changeDocumentViewerForm')
885	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
886
887	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
888	"""init document viewer"""
889	self.title=title
890	self.digilibBaseUrl = digilibBaseUrl
891	self.thumbrows = thumbrows
892	self.thumbcols = thumbcols
893	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
894	try:
895	# assume MetaDataFolder instance is called metadata
896	self.metadataService = getattr(self, 'metadata')
897	except Exception, e:
898	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
899
900	if RESPONSE is not None:
901	RESPONSE.redirect('manage_main')
902
903	def manage_AddDocumentViewerForm(self):
904	"""add the viewer form"""
905	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
906	return pt()
907
908	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
909	"""add the viewer"""
910	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
911	self._setObject(id,newObj)
912
913	if RESPONSE is not None:
914	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: