Context Navigation

source: documentViewer/documentViewer.py @ 516:7d7b639d7be7

Last change on this file since 516:7d7b639d7be7 was 516:7d7b639d7be7, checked in by casties, 12 years ago
add methods to use doc-info.xql. read list of page numbers from doc-info.xql. add original page numbers to thumbs.
File size: 34.9 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19
20	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
21
22	def serializeNode(node, encoding="utf-8"):
23	"""returns a string containing node as XML"""
24	s = ET.tostring(node)
25
26	# 4Suite:
27	# stream = cStringIO.StringIO()
28	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
29	# s = stream.getvalue()
30	# stream.close()
31	return s
32
33	def browserCheck(self):
34	"""check the browsers request to find out the browser type"""
35	bt = {}
36	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
37	bt['ua'] = ua
38	bt['isIE'] = False
39	bt['isN4'] = False
40	bt['versFirefox']=""
41	bt['versIE']=""
42	bt['versSafariChrome']=""
43	bt['versOpera']=""
44
45	if string.find(ua, 'MSIE') > -1:
46	bt['isIE'] = True
47	else:
48	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
49	# Safari oder Chrome identification
50	try:
51	nav = ua[string.find(ua, '('):]
52	nav1=ua[string.find(ua,')'):]
53	nav2=nav1[string.find(nav1,'('):]
54	nav3=nav2[string.find(nav2,')'):]
55	ie = string.split(nav, "; ")[1]
56	ie1 =string.split(nav1, " ")[2]
57	ie2 =string.split(nav3, " ")[1]
58	ie3 =string.split(nav3, " ")[2]
59	if string.find(ie3, "Safari") >-1:
60	bt['versSafariChrome']=string.split(ie2, "/")[1]
61	except: pass
62	# IE identification
63	try:
64	nav = ua[string.find(ua, '('):]
65	ie = string.split(nav, "; ")[1]
66	if string.find(ie, "MSIE") > -1:
67	bt['versIE'] = string.split(ie, " ")[1]
68	except:pass
69	# Firefox identification
70	try:
71	nav = ua[string.find(ua, '('):]
72	nav1=ua[string.find(ua,')'):]
73	if string.find(ie1, "Firefox") >-1:
74	nav5= string.split(ie1, "/")[1]
75	logging.debug("FIREFOX: %s"%(nav5))
76	bt['versFirefox']=nav5[0:3]
77	except:pass
78	#Opera identification
79	try:
80	if string.find(ua,"Opera") >-1:
81	nav = ua[string.find(ua, '('):]
82	nav1=nav[string.find(nav,')'):]
83	bt['versOpera']=string.split(nav1,"/")[2]
84	except:pass
85
86	bt['isMac'] = string.find(ua, 'Macintosh') > -1
87	bt['isWin'] = string.find(ua, 'Windows') > -1
88	bt['isIEWin'] = bt['isIE'] and bt['isWin']
89	bt['isIEMac'] = bt['isIE'] and bt['isMac']
90	bt['staticHTML'] = False
91
92	return bt
93
94	def getParentPath(path, cnt=1):
95	"""returns pathname shortened by cnt"""
96	# make sure path doesn't end with /
97	path = path.rstrip('/')
98	# split by /, shorten, and reassemble
99	return '/'.join(path.split('/')[0:-cnt])
100
101	##
102	## documentViewer class
103	##
104	class documentViewer(Folder):
105	"""document viewer"""
106	meta_type="Document viewer"
107
108	security=ClassSecurityInfo()
109	manage_options=Folder.manage_options+(
110	{'label':'Configuration','action':'changeDocumentViewerForm'},
111	)
112
113	metadataService = None
114	"""MetaDataFolder instance"""
115
116	# templates and forms
117	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
118	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
119	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
120	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
121	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
122	toc_text = PageTemplateFile('zpt/toc_text', globals())
123	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
124	toc_none = PageTemplateFile('zpt/toc_none', globals())
125	common_template = PageTemplateFile('zpt/common_template', globals())
126	search_template = PageTemplateFile('zpt/search_template', globals())
127	info_xml = PageTemplateFile('zpt/info_xml', globals())
128	docuviewer_css = ImageFile('css/docuviewer.css',globals())
129	# make ImageFile better for development
130	docuviewer_css.index_html = refreshingImageFileIndexHtml
131	jquery_js = ImageFile('js/jquery.js',globals())
132
133
134	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
135	"""init document viewer"""
136	self.id=id
137	self.title=title
138	self.thumbcols = thumbcols
139	self.thumbrows = thumbrows
140	# authgroups is list of authorized groups (delimited by ,)
141	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
142	# create template folder so we can always use template.something
143
144	templateFolder = Folder('template')
145	self['template'] = templateFolder # Zope-2.12 style
146	#self._setObject('template',templateFolder) # old style
147	try:
148	import MpdlXmlTextServer
149	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
150	templateFolder['fulltextclient'] = textServer
151	#templateFolder._setObject('fulltextclient',textServer)
152	except Exception, e:
153	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
154
155	try:
156	from Products.zogiLib.zogiLib import zogiLib
157	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
158	templateFolder['zogilib'] = zogilib
159	#templateFolder._setObject('zogilib',zogilib)
160	except Exception, e:
161	logging.error("Unable to create zogiLib for zogilib: "+str(e))
162
163	try:
164	# assume MetaDataFolder instance is called metadata
165	self.metadataService = getattr(self, 'metadata')
166	except Exception, e:
167	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
168
169	if digilibBaseUrl is not None:
170	self.digilibBaseUrl = digilibBaseUrl
171
172
173	# proxy text server methods to fulltextclient
174	def getTextPage(self, **args):
175	"""returns full text content of page"""
176	return self.template.fulltextclient.getTextPage(**args)
177
178	def getSearchResults(self, **args):
179	"""loads list of search results and stores XML in docinfo"""
180	return self.template.fulltextclient.getSearchResults(**args)
181
182	def getResultsPage(self, **args):
183	"""returns one page of the search results"""
184	return self.template.fulltextclient.getResultsPage(**args)
185
186	def getTextInfo(self, **args):
187	"""returns document info from the text server"""
188	return self.template.fulltextclient.getTextInfo(**args)
189
190	def getToc(self, **args):
191	"""loads table of contents and stores XML in docinfo"""
192	return self.template.fulltextclient.getToc(**args)
193
194	def getTocPage(self, **args):
195	"""returns one page of the table of contents"""
196	return self.template.fulltextclient.getTocPage(**args)
197
198	def getPlacesOnPage(self, **args):
199	"""get list of gis places on one page"""
200	return self.template.fulltextclient.getPlacesOnPage(**args)
201
202	#WTF?
203	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
204	security.declareProtected('View','thumbs_rss')
205	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
206	'''
207	view it
208	@param mode: defines how to access the document behind url
209	@param url: url which contains display information
210	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
211
212	'''
213	logging.debug("HHHHHHHHHHHHHH:load the rss")
214	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
215
216	if not hasattr(self, 'template'):
217	# create template folder if it doesn't exist
218	self.manage_addFolder('template')
219
220	if not self.digilibBaseUrl:
221	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
222
223	docinfo = self.getDocinfo(mode=mode,url=url)
224	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
225	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
226	''' ZDES '''
227	pt = getattr(self.template, 'thumbs_main_rss')
228
229	if viewMode=="auto": # automodus gewaehlt
230	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
231	viewMode="text"
232	else:
233	viewMode="images"
234
235	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
236
237
238	security.declareProtected('View','index_html')
239	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode="thumbs",start=1,pn=1):
240	"""
241	view page
242	@param url: url which contains display information
243	@param mode: defines how to access the document behind url
244	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
245	@param viewLayer: sub-type of viewMode, e.g. 'dict' for viewMode='text'
246	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
247	"""
248
249	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
250
251	if not hasattr(self, 'template'):
252	# this won't work
253	logging.error("template folder missing!")
254	return "ERROR: template folder missing!"
255
256	if not getattr(self, 'digilibBaseUrl', None):
257	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
258
259	docinfo = self.getDocinfo(mode=mode,url=url)
260
261	if tocMode != "thumbs":
262	# get table of contents
263	self.getToc(mode=tocMode, docinfo=docinfo)
264
265	# auto viewMode: text if there is a text else images
266	if viewMode=="auto":
267	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
268	viewMode = "text"
269	if viewLayer is None:
270	viewLayer = "dict"
271	else:
272	viewMode = "images"
273
274	elif viewMode == "text_dict":
275	# legacy fix
276	viewMode = "text"
277	viewLayer = "dict"
278
279	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
280
281	# get template /template/viewer_$viewMode
282	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
283	if pt is None:
284	logging.error("No template for viewMode=%s!"%viewMode)
285	# TODO: error page?
286	return "No template for viewMode=%s!"%viewMode
287
288	# and execute with parameters
289	return pt(docinfo=docinfo, pageinfo=pageinfo)
290
291	#WTF?
292	def generateMarks(self,mk):
293	ret=""
294	if mk is None:
295	return ""
296	if not isinstance(mk, list):
297	mk=[mk]
298	for m in mk:
299	ret+="mk=%s"%m
300	return ret
301
302
303	def getBrowser(self):
304	"""getBrowser the version of browser """
305	bt = browserCheck(self)
306	logging.debug("BROWSER VERSION: %s"%(bt))
307	return bt
308
309	def findDigilibUrl(self):
310	"""try to get the digilib URL from zogilib"""
311	url = self.template.zogilib.getDLBaseUrl()
312	return url
313
314	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
315	"""returns URL to digilib Scaler with params"""
316	url = None
317	if docinfo is not None:
318	url = docinfo.get('imageURL', None)
319
320	if url is None:
321	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
322	if fn is None and docinfo is not None:
323	fn = docinfo.get('imagePath','')
324
325	url += "fn=%s"%fn
326
327	if pn:
328	url += "&pn=%s"%pn
329
330	url += "&dw=%s&dh=%s"%(dw,dh)
331	return url
332
333	def getDocumentViewerURL(self):
334	"""returns the URL of this instance"""
335	return self.absolute_url()
336
337	def getStyle(self, idx, selected, style=""):
338	"""returns a string with the given style and append 'sel' if idx == selected."""
339	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
340	if idx == selected:
341	return style + 'sel'
342	else:
343	return style
344
345	def getParams(self, param=None, val=None, params=None, duplicates=None):
346	"""returns dict with URL parameters.
347
348	Takes URL parameters and additionally param=val or dict params.
349	Deletes key if value is None."""
350	# copy existing request params
351	newParams=self.REQUEST.form.copy()
352	# change single param
353	if param is not None:
354	if val is None:
355	if newParams.has_key(param):
356	del newParams[param]
357	else:
358	newParams[param] = str(val)
359
360	# change more params
361	if params is not None:
362	for (k, v) in params.items():
363	if v is None:
364	# val=None removes param
365	if newParams.has_key(k):
366	del newParams[k]
367
368	else:
369	newParams[k] = v
370
371	if duplicates:
372	# eliminate lists (coming from duplicate keys)
373	for (k,v) in newParams.items():
374	if isinstance(v, list):
375	if duplicates == 'comma':
376	# make comma-separated list of non-empty entries
377	newParams[k] = ','.join([t for t in v if t])
378	elif duplicates == 'first':
379	# take first non-empty entry
380	newParams[k] = [t for t in v if t][0]
381
382	return newParams
383
384	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
385	"""returns URL to documentviewer with parameter param set to val or from dict params"""
386	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
387	# quote values and assemble into query string (not escaping '/')
388	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
389	if baseUrl is None:
390	baseUrl = self.getDocumentViewerURL()
391
392	url = "%s?%s"%(baseUrl, ps)
393	return url
394
395	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
396	"""link to documentviewer with parameter param set to val"""
397	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
398
399
400	def getInfo_xml(self,url,mode):
401	"""returns info about the document as XML"""
402	if not self.digilibBaseUrl:
403	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
404
405	docinfo = self.getDocinfo(mode=mode,url=url)
406	pt = getattr(self.template, 'info_xml')
407	return pt(docinfo=docinfo)
408
409	def isAccessible(self, docinfo):
410	"""returns if access to the resource is granted"""
411	access = docinfo.get('accessType', None)
412	logging.debug("documentViewer (accessOK) access type %s"%access)
413	if access == 'free':
414	logging.debug("documentViewer (accessOK) access is free")
415	return True
416
417	elif access is None or access in self.authgroups:
418	# only local access -- only logged in users
419	user = getSecurityManager().getUser()
420	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
421	if user is not None:
422	#print "user: ", user
423	return (user.getUserName() != "Anonymous User")
424	else:
425	return False
426
427	logging.error("documentViewer (accessOK) unknown access type %s"%access)
428	return False
429
430
431
432	def getDocinfo(self, mode, url):
433	"""returns docinfo depending on mode"""
434	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
435	# look for cached docinfo in session
436	if self.REQUEST.SESSION.has_key('docinfo'):
437	docinfo = self.REQUEST.SESSION['docinfo']
438	# check if its still current
439	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
440	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
441	return docinfo
442
443	# new docinfo
444	docinfo = {'mode': mode, 'url': url}
445	# add self url
446	docinfo['viewerUrl'] = self.getDocumentViewerURL()
447	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
448	# get index.meta DOM
449	docUrl = None
450	metaDom = None
451	if mode=="texttool":
452	# url points to document dir or index.meta
453	metaDom = self.metadataService.getDomFromPathOrUrl(url)
454	docUrl = url.replace('/index.meta', '')
455	if metaDom is None:
456	raise IOError("Unable to find index.meta for mode=texttool!")
457
458	elif mode=="imagepath":
459	# url points to folder with images, index.meta optional
460	# asssume index.meta in parent dir
461	docUrl = getParentPath(url)
462	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
463
464	elif mode=="filepath":
465	# url points to image file, index.meta optional
466	# asssume index.meta is two path segments up
467	docUrl = getParentPath(url, 2)
468	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
469
470	else:
471	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
472	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
473
474	docinfo['documentUrl'] = docUrl
475	# process index.meta contents
476	if metaDom is not None and metaDom.tag == 'resource':
477	# document directory name and path
478	resource = self.metadataService.getResourceData(dom=metaDom)
479	if resource:
480	docinfo = self.getDocinfoFromResource(docinfo, resource)
481
482	# texttool info
483	texttool = self.metadataService.getTexttoolData(dom=metaDom)
484	if texttool:
485	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
486	# document info from full text
487	if docinfo.get('textURLPath', None):
488	docinfo = self.getTextInfo(docinfo=docinfo)
489
490	# bib info
491	bib = self.metadataService.getBibData(dom=metaDom)
492	if bib:
493	docinfo = self.getDocinfoFromBib(docinfo, bib)
494	else:
495	# no bib - try info.xml
496	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
497
498	# auth info
499	access = self.metadataService.getAccessData(dom=metaDom)
500	if access:
501	docinfo = self.getDocinfoFromAccess(docinfo, access)
502
503	# attribution info
504	attribution = self.metadataService.getAttributionData(dom=metaDom)
505	if attribution:
506	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
507	docinfo['attribution'] = attribution
508	#docinfo = self.getDocinfoFromAccess(docinfo, access)
509
510	# copyright info
511	copyright = self.metadataService.getCopyrightData(dom=metaDom)
512	if copyright:
513	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
514	docinfo['copyright'] = copyright
515	#docinfo = self.getDocinfoFromAccess(docinfo, access)
516
517	# image path
518	if mode != 'texttool':
519	# override image path from texttool with url TODO: how about mode=auto?
520	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
521
522	# number of images from digilib
523	if docinfo.get('imagePath', None):
524	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
525	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
526
527	# check numPages
528	if docinfo.get('numPages', 0) == 0:
529	if docinfo.get('numTextPages', 0) > 0:
530	# replace with numTextPages (text-only?)
531	docinfo['numPages'] = docinfo['numTextPages']
532
533	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
534	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
535	# store in session
536	self.REQUEST.SESSION['docinfo'] = docinfo
537	return docinfo
538
539
540	def getDocinfoFromResource(self, docinfo, resource):
541	"""reads contents of resource element into docinfo"""
542	docName = resource.get('name', None)
543	docinfo['documentName'] = docName
544	docPath = resource.get('archive-path', None)
545	if docPath:
546	# clean up document path
547	if docPath[0] != '/':
548	docPath = '/' + docPath
549
550	if docName and (not docPath.endswith(docName)):
551	docPath += "/" + docName
552
553	else:
554	# use docUrl as docPath
555	docUrl = docinfo['documentURL']
556	if not docUrl.startswith('http:'):
557	docPath = docUrl
558	if docPath:
559	# fix URLs starting with /mpiwg/online
560	docPath = docPath.replace('/mpiwg/online', '', 1)
561
562	docinfo['documentPath'] = docPath
563	return docinfo
564
565	def getDocinfoFromTexttool(self, docinfo, texttool):
566	"""reads contents of texttool element into docinfo"""
567	# image dir
568	imageDir = texttool.get('image', None)
569	docPath = docinfo.get('documentPath', None)
570	if imageDir and docPath:
571	#print "image: ", imageDir, " archivepath: ", archivePath
572	imageDir = os.path.join(docPath, imageDir)
573	imageDir = imageDir.replace('/mpiwg/online', '', 1)
574	docinfo['imagePath'] = imageDir
575
576	# old style text URL
577	textUrl = texttool.get('text', None)
578	if textUrl and docPath:
579	if urlparse.urlparse(textUrl)[0] == "": #keine url
580	textUrl = os.path.join(docPath, textUrl)
581
582	docinfo['textURL'] = textUrl
583
584	# new style text-url-path
585	textUrl = texttool.get('text-url-path', None)
586	if textUrl:
587	docinfo['textURLPath'] = textUrl
588
589	# page flow
590	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
591
592	# odd pages are left
593	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
594
595	# number of title page (0: not defined)
596	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
597
598	# old presentation stuff
599	presentation = texttool.get('presentation', None)
600	if presentation and docPath:
601	if presentation.startswith('http:'):
602	docinfo['presentationUrl'] = presentation
603	else:
604	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
605
606
607	return docinfo
608
609	def getDocinfoFromBib(self, docinfo, bib):
610	"""reads contents of bib element into docinfo"""
611	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
612	# put all raw bib fields in dict "bib"
613	docinfo['bib'] = bib
614	bibtype = bib.get('@type', None)
615	docinfo['bibType'] = bibtype
616	# also store DC metadata for convenience
617	dc = self.metadataService.getDCMappedData(bib)
618	docinfo['creator'] = dc.get('creator',None)
619	docinfo['title'] = dc.get('title',None)
620	docinfo['date'] = dc.get('date',None)
621	return docinfo
622
623	def getDocinfoFromAccess(self, docinfo, acc):
624	"""reads contents of access element into docinfo"""
625	#TODO: also read resource type
626	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
627	try:
628	acctype = acc['@attr']['type']
629	if acctype:
630	access=acctype
631	if access in ['group', 'institution']:
632	access = acc['name'].lower()
633
634	docinfo['accessType'] = access
635
636	except:
637	pass
638
639	return docinfo
640
641	def getDocinfoFromDigilib(self, docinfo, path):
642	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
643	# fetch data
644	txt = getHttpData(infoUrl)
645	if not txt:
646	logging.error("Unable to get dir-info from %s"%(infoUrl))
647	return docinfo
648
649	dom = ET.fromstring(txt)
650	size = getText(dom.find("size"))
651	logging.debug("getDocinfoFromDigilib: size=%s"%size)
652	if size:
653	docinfo['numPages'] = int(size)
654	else:
655	docinfo['numPages'] = 0
656
657	# TODO: produce and keep list of image names and numbers
658	return docinfo
659
660
661	def getDocinfoFromPresentationInfoXml(self,docinfo):
662	"""gets DC-like bibliographical information from the presentation entry in texttools"""
663	url = docinfo.get('presentationUrl', None)
664	if not url:
665	logging.error("getDocinfoFromPresentation: no URL!")
666	return docinfo
667
668	dom = None
669	metaUrl = None
670	if url.startswith("http://"):
671	# real URL
672	metaUrl = url
673	else:
674	# online path
675
676	server=self.digilibBaseUrl+"/servlet/Texter?fn="
677	metaUrl=server+url
678
679	txt=getHttpData(metaUrl)
680	if txt is None:
681	logging.error("Unable to read info.xml from %s"%(url))
682	return docinfo
683
684	dom = ET.fromstring(txt)
685	docinfo['creator']=getText(dom.find(".//author"))
686	docinfo['title']=getText(dom.find(".//title"))
687	docinfo['date']=getText(dom.find(".//date"))
688	return docinfo
689
690
691	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewLayer=None, tocMode=None):
692	"""returns pageinfo with the given parameters"""
693	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
694	pageinfo = {}
695	pageinfo['viewMode'] = viewMode
696	# split viewLayer if necessary
697	if isinstance(viewLayer,basestring):
698	viewLayer = viewLayer.split(',')
699
700	if isinstance(viewLayer, list):
701	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
702	# save (unique) list in viewLayers
703	seen = set()
704	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
705	pageinfo['viewLayers'] = viewLayers
706	# stringify viewLayer
707	viewLayer = ','.join(viewLayers)
708	else:
709	#create list
710	pageinfo['viewLayers'] = [viewLayer]
711
712	pageinfo['viewLayer'] = viewLayer
713	pageinfo['tocMode'] = tocMode
714
715	# TODO: unify current and pn!
716	current = getInt(current)
717	pageinfo['current'] = current
718	pageinfo['pn'] = current
719	rows = int(rows or self.thumbrows)
720	pageinfo['rows'] = rows
721	cols = int(cols or self.thumbcols)
722	pageinfo['cols'] = cols
723	grpsize = cols * rows
724	pageinfo['groupsize'] = grpsize
725	# is start is empty use one around current
726	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
727	# int(current / grpsize) * grpsize +1))
728	pageinfo['start'] = start
729	# get number of pages
730	np = int(docinfo.get('numPages', 0))
731	if np == 0:
732	# numPages unknown - maybe we can get it from text page
733	logging.warn("getPageInfo: numPages=0 trying getTextPage!")
734	if docinfo.get('textURLPath', None):
735	# cache text page as well
736	pageinfo['textPage'] = self.getTextPage(mode=viewLayer, pn=current, docinfo=docinfo, pageinfo=pageinfo)
737	np = int(docinfo.get('numPages', 0))
738
739	# cache table of contents
740	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
741	pageinfo['numgroups'] = int(np / grpsize)
742	if np % grpsize > 0:
743	pageinfo['numgroups'] += 1
744
745	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
746	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
747	# add zeroth page for two columns
748	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
749	pageinfo['pageZero'] = pageZero
750	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
751	# more page parameters
752	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
753	if docinfo.get('pageNumbers'):
754	# get original page numbers
755	pageNumber = docinfo['pageNumbers'].get(current, None)
756	if pageNumber is not None:
757	pageinfo['pageNumberOrig'] = pageNumber['no']
758	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
759
760	# cache search results
761	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
762	query = self.REQUEST.get('query',None)
763	pageinfo['query'] = query
764	if query:
765	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
766	pageinfo['queryType'] = queryType
767	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
768	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
769
770	# highlighting
771	highlightQuery = self.REQUEST.get('highlightQuery', None)
772	if highlightQuery:
773	pageinfo['highlightQuery'] = highlightQuery
774	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
775	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
776
777	return pageinfo
778
779
780	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
781	"""returns dict with array of page informations for one screenfull of thumbnails"""
782	batch = {}
783	grpsize = rows * cols
784	if maxIdx == 0:
785	maxIdx = start + grpsize
786
787	nb = int(math.ceil(maxIdx / float(grpsize)))
788	# list of all batch start and end points
789	batches = []
790	if pageZero:
791	ofs = 0
792	else:
793	ofs = 1
794
795	for i in range(nb):
796	s = i * grpsize + ofs
797	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
798	batches.append({'start':s, 'end':e})
799
800	batch['batches'] = batches
801
802	pages = []
803	if pageZero and start == 1:
804	# correct beginning
805	idx = 0
806	else:
807	idx = start
808
809	for r in range(rows):
810	row = []
811	for c in range(cols):
812	if idx < minIdx or idx > maxIdx:
813	page = {'idx':None}
814	else:
815	page = {'idx':idx}
816
817	idx += 1
818	if pageFlowLtr:
819	row.append(page)
820	else:
821	row.insert(0, page)
822
823	pages.append(row)
824
825	if start > 1:
826	batch['prevStart'] = max(start - grpsize, 1)
827	else:
828	batch['prevStart'] = None
829
830	if start + grpsize < maxIdx:
831	batch['nextStart'] = start + grpsize
832	else:
833	batch['nextStart'] = None
834
835	batch['pages'] = pages
836	return batch
837
838	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
839	"""returns dict with information for one screenfull of data."""
840	batch = {}
841	if end == 0:
842	end = start + size
843
844	nb = int(math.ceil(end / float(size)))
845	# list of all batch start and end points
846	batches = []
847	for i in range(nb):
848	s = i * size + 1
849	e = min((i + 1) * size, end)
850	batches.append({'start':s, 'end':e})
851
852	batch['batches'] = batches
853	# list of elements in this batch
854	this = []
855	j = 0
856	for i in range(start, min(start+size, end)):
857	if data:
858	if fullData:
859	d = data[i]
860	else:
861	d = data[j]
862	j += 1
863
864	else:
865	d = i+1
866
867	this.append(d)
868
869	batch['this'] = this
870	if start > 1:
871	batch['prevStart'] = max(start - size, 1)
872	else:
873	batch['prevStart'] = None
874
875	if start + size < end:
876	batch['nextStart'] = start + size
877	else:
878	batch['nextStart'] = None
879
880	return batch
881
882
883	security.declareProtected('View management screens','changeDocumentViewerForm')
884	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
885
886	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
887	"""init document viewer"""
888	self.title=title
889	self.digilibBaseUrl = digilibBaseUrl
890	self.thumbrows = thumbrows
891	self.thumbcols = thumbcols
892	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
893	try:
894	# assume MetaDataFolder instance is called metadata
895	self.metadataService = getattr(self, 'metadata')
896	except Exception, e:
897	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
898
899	if RESPONSE is not None:
900	RESPONSE.redirect('manage_main')
901
902	def manage_AddDocumentViewerForm(self):
903	"""add the viewer form"""
904	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
905	return pt()
906
907	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
908	"""add the viewer"""
909	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
910	self._setObject(id,newObj)
911
912	if RESPONSE is not None:
913	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: