Context Navigation

source: documentViewer/documentViewer.py @ 503:030251fe9dbc

elementtree

Last change on this file since 503:030251fe9dbc was 503:030251fe9dbc, checked in by casties, 12 years ago
more cleanup. made viewType into viewLayer and viewType=xml into viewMode=xml.
File size: 33.9 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19
20	from SrvTxtUtils import getInt, getText, getHttpData, refreshingImageFileIndexHtml
21
22	def serializeNode(node, encoding="utf-8"):
23	"""returns a string containing node as XML"""
24	s = ET.tostring(node)
25
26	# 4Suite:
27	# stream = cStringIO.StringIO()
28	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
29	# s = stream.getvalue()
30	# stream.close()
31	return s
32
33	def browserCheck(self):
34	"""check the browsers request to find out the browser type"""
35	bt = {}
36	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
37	bt['ua'] = ua
38	bt['isIE'] = False
39	bt['isN4'] = False
40	bt['versFirefox']=""
41	bt['versIE']=""
42	bt['versSafariChrome']=""
43	bt['versOpera']=""
44
45	if string.find(ua, 'MSIE') > -1:
46	bt['isIE'] = True
47	else:
48	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
49	# Safari oder Chrome identification
50	try:
51	nav = ua[string.find(ua, '('):]
52	nav1=ua[string.find(ua,')'):]
53	nav2=nav1[string.find(nav1,'('):]
54	nav3=nav2[string.find(nav2,')'):]
55	ie = string.split(nav, "; ")[1]
56	ie1 =string.split(nav1, " ")[2]
57	ie2 =string.split(nav3, " ")[1]
58	ie3 =string.split(nav3, " ")[2]
59	if string.find(ie3, "Safari") >-1:
60	bt['versSafariChrome']=string.split(ie2, "/")[1]
61	except: pass
62	# IE identification
63	try:
64	nav = ua[string.find(ua, '('):]
65	ie = string.split(nav, "; ")[1]
66	if string.find(ie, "MSIE") > -1:
67	bt['versIE'] = string.split(ie, " ")[1]
68	except:pass
69	# Firefox identification
70	try:
71	nav = ua[string.find(ua, '('):]
72	nav1=ua[string.find(ua,')'):]
73	if string.find(ie1, "Firefox") >-1:
74	nav5= string.split(ie1, "/")[1]
75	logging.debug("FIREFOX: %s"%(nav5))
76	bt['versFirefox']=nav5[0:3]
77	except:pass
78	#Opera identification
79	try:
80	if string.find(ua,"Opera") >-1:
81	nav = ua[string.find(ua, '('):]
82	nav1=nav[string.find(nav,')'):]
83	bt['versOpera']=string.split(nav1,"/")[2]
84	except:pass
85
86	bt['isMac'] = string.find(ua, 'Macintosh') > -1
87	bt['isWin'] = string.find(ua, 'Windows') > -1
88	bt['isIEWin'] = bt['isIE'] and bt['isWin']
89	bt['isIEMac'] = bt['isIE'] and bt['isMac']
90	bt['staticHTML'] = False
91
92	return bt
93
94	def getParentPath(path, cnt=1):
95	"""returns pathname shortened by cnt"""
96	# make sure path doesn't end with /
97	path = path.rstrip('/')
98	# split by /, shorten, and reassemble
99	return '/'.join(path.split('/')[0:-cnt])
100
101	##
102	## documentViewer class
103	##
104	class documentViewer(Folder):
105	"""document viewer"""
106	meta_type="Document viewer"
107
108	security=ClassSecurityInfo()
109	manage_options=Folder.manage_options+(
110	{'label':'main config','action':'changeDocumentViewerForm'},
111	)
112
113	metadataService = None
114	"""MetaDataFolder instance"""
115
116	# templates and forms
117	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
118	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
119	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
120	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
121	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
122	toc_text = PageTemplateFile('zpt/toc_text', globals())
123	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
124	toc_none = PageTemplateFile('zpt/toc_none', globals())
125	common_template = PageTemplateFile('zpt/common_template', globals())
126	info_xml = PageTemplateFile('zpt/info_xml', globals())
127	docuviewer_css = ImageFile('css/docuviewer.css',globals())
128	# make ImageFile better for development
129	docuviewer_css.index_html = refreshingImageFileIndexHtml
130	jquery_js = ImageFile('js/jquery.js',globals())
131
132
133	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
134	"""init document viewer"""
135	self.id=id
136	self.title=title
137	self.thumbcols = thumbcols
138	self.thumbrows = thumbrows
139	# authgroups is list of authorized groups (delimited by ,)
140	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
141	# create template folder so we can always use template.something
142
143	templateFolder = Folder('template')
144	self['template'] = templateFolder # Zope-2.12 style
145	#self._setObject('template',templateFolder) # old style
146	try:
147	import MpdlXmlTextServer
148	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
149	templateFolder['fulltextclient'] = textServer
150	#templateFolder._setObject('fulltextclient',textServer)
151	except Exception, e:
152	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
153
154	try:
155	from Products.zogiLib.zogiLib import zogiLib
156	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
157	templateFolder['zogilib'] = zogilib
158	#templateFolder._setObject('zogilib',zogilib)
159	except Exception, e:
160	logging.error("Unable to create zogiLib for zogilib: "+str(e))
161
162	try:
163	# assume MetaDataFolder instance is called metadata
164	self.metadataService = getattr(self, 'metadata')
165	except Exception, e:
166	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
167
168	if digilibBaseUrl is not None:
169	self.digilibBaseUrl = digilibBaseUrl
170
171
172	# proxy text server methods to fulltextclient
173	def getTextPage(self, **args):
174	"""returns full text content of page"""
175	return self.template.fulltextclient.getTextPage(**args)
176
177	def getToc(self, **args):
178	"""returns the full table of contents (in internal format)"""
179	return self.template.fulltextclient.getToc(**args)
180
181	def getTocPage(self, **args):
182	"""returns one page of the table of contents"""
183	return self.template.fulltextclient.getTocPage(**args)
184
185	#WTF?
186	def getQuery(self, **args):
187	"""get query in search"""
188	return self.template.fulltextclient.getQuery(**args)
189
190	#WTF?
191	def getSearch(self, **args):
192	"""get search"""
193	return self.template.fulltextclient.getSearch(**args)
194
195	#WTF?
196	def getGisPlaces(self, **args):
197	"""get gis places"""
198	return self.template.fulltextclient.getGisPlaces(**args)
199
200	#WTF?
201	def getAllGisPlaces(self, **args):
202	"""get all gis places """
203	return self.template.fulltextclient.getAllGisPlaces(**args)
204
205	#WTF?
206	def getWordInfo(self, **args):
207	"""get translate"""
208	return self.template.fulltextclient.getWordInfo(**args)
209
210	#WTF?
211	def getLemma(self, **args):
212	"""get lemma"""
213	return self.template.fulltextclient.getLemma(**args)
214
215	#WTF?
216	def getLemmaQuery(self, **args):
217	"""get query"""
218	return self.template.fulltextclient.getLemmaQuery(**args)
219
220	#WTF?
221	def getLex(self, **args):
222	"""get lex"""
223	return self.template.fulltextclient.getLex(**args)
224
225	#WTF?
226	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
227	security.declareProtected('View','thumbs_rss')
228	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
229	'''
230	view it
231	@param mode: defines how to access the document behind url
232	@param url: url which contains display information
233	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
234
235	'''
236	logging.debug("HHHHHHHHHHHHHH:load the rss")
237	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
238
239	if not hasattr(self, 'template'):
240	# create template folder if it doesn't exist
241	self.manage_addFolder('template')
242
243	if not self.digilibBaseUrl:
244	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
245
246	docinfo = self.getDocinfo(mode=mode,url=url)
247	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
248	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
249	''' ZDES '''
250	pt = getattr(self.template, 'thumbs_main_rss')
251
252	if viewMode=="auto": # automodus gewaehlt
253	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
254	viewMode="text"
255	else:
256	viewMode="images"
257
258	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
259
260
261	security.declareProtected('View','index_html')
262	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode="thumbs",start=1,pn=1):
263	"""
264	view page
265	@param url: url which contains display information
266	@param mode: defines how to access the document behind url
267	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
268	@param viewLayer: sub-type of viewMode, e.g. 'dict' for viewMode='text'
269	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
270	"""
271
272	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
273
274	if not hasattr(self, 'template'):
275	# this won't work
276	logging.error("template folder missing!")
277	return "ERROR: template folder missing!"
278
279	if not getattr(self, 'digilibBaseUrl', None):
280	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
281
282	docinfo = self.getDocinfo(mode=mode,url=url)
283
284	if tocMode != "thumbs":
285	# get table of contents
286	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
287
288	# auto viewMode: text if there is a text else images
289	if viewMode=="auto":
290	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
291	viewMode = "text"
292	viewLayer = "dict"
293	else:
294	viewMode = "images"
295
296	elif viewMode == "text_dict":
297	# legacy fix
298	viewMode = "text"
299	viewLayer = "dict"
300
301	# stringify viewLayer
302	if isinstance(viewLayer, list):
303	logging.debug("index_html: viewLayer is list:%s"%viewLayer)
304	viewLayer = ','.join([t for t in viewLayer if t])
305
306	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
307
308	# get template /template/viewer_$viewMode
309	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
310	if pt is None:
311	logging.error("No template for viewMode=%s!"%viewMode)
312	# TODO: error page?
313	return "No template for viewMode=%s!"%viewMode
314
315	# and execute with parameters
316	return pt(docinfo=docinfo, pageinfo=pageinfo)
317
318	def generateMarks(self,mk):
319	ret=""
320	if mk is None:
321	return ""
322	if not isinstance(mk, list):
323	mk=[mk]
324	for m in mk:
325	ret+="mk=%s"%m
326	return ret
327
328
329	def getBrowser(self):
330	"""getBrowser the version of browser """
331	bt = browserCheck(self)
332	logging.debug("BROWSER VERSION: %s"%(bt))
333	return bt
334
335	def findDigilibUrl(self):
336	"""try to get the digilib URL from zogilib"""
337	url = self.template.zogilib.getDLBaseUrl()
338	return url
339
340	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
341	"""returns URL to digilib Scaler with params"""
342	url = None
343	if docinfo is not None:
344	url = docinfo.get('imageURL', None)
345
346	if url is None:
347	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
348	if fn is None and docinfo is not None:
349	fn = docinfo.get('imagePath','')
350
351	url += "fn=%s"%fn
352
353	if pn:
354	url += "&pn=%s"%pn
355
356	url += "&dw=%s&dh=%s"%(dw,dh)
357	return url
358
359	def getDocumentViewerURL(self):
360	"""returns the URL of this instance"""
361	return self.absolute_url()
362
363	def getStyle(self, idx, selected, style=""):
364	"""returns a string with the given style and append 'sel' if idx == selected."""
365	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
366	if idx == selected:
367	return style + 'sel'
368	else:
369	return style
370
371	def getParams(self, param=None, val=None, params=None, duplicates=None):
372	"""returns dict with URL parameters.
373
374	Takes URL parameters and additionally param=val or dict params.
375	Deletes key if value is None."""
376	# copy existing request params
377	newParams=self.REQUEST.form.copy()
378	# change single param
379	if param is not None:
380	if val is None:
381	if newParams.has_key(param):
382	del newParams[param]
383	else:
384	newParams[param] = str(val)
385
386	# change more params
387	if params is not None:
388	for (k, v) in params.items():
389	if v is None:
390	# val=None removes param
391	if newParams.has_key(k):
392	del newParams[k]
393
394	else:
395	newParams[k] = v
396
397	if duplicates:
398	# eliminate lists (coming from duplicate keys)
399	for (k,v) in newParams.items():
400	if isinstance(v, list):
401	if duplicates == 'comma':
402	# make comma-separated list of non-empty entries
403	newParams[k] = ','.join([t for t in v if t])
404	elif duplicates == 'first':
405	# take first non-empty entry
406	newParams[k] = [t for t in v if t][0]
407
408	return newParams
409
410	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
411	"""returns URL to documentviewer with parameter param set to val or from dict params"""
412	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
413	# quote values and assemble into query string (not escaping '/')
414	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()])
415	if baseUrl is None:
416	baseUrl = self.getDocumentViewerURL()
417
418	url = "%s?%s"%(baseUrl, ps)
419	return url
420
421	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
422	"""link to documentviewer with parameter param set to val"""
423	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
424
425
426	def getInfo_xml(self,url,mode):
427	"""returns info about the document as XML"""
428	if not self.digilibBaseUrl:
429	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
430
431	docinfo = self.getDocinfo(mode=mode,url=url)
432	pt = getattr(self.template, 'info_xml')
433	return pt(docinfo=docinfo)
434
435	def isAccessible(self, docinfo):
436	"""returns if access to the resource is granted"""
437	access = docinfo.get('accessType', None)
438	logging.debug("documentViewer (accessOK) access type %s"%access)
439	if access == 'free':
440	logging.debug("documentViewer (accessOK) access is free")
441	return True
442
443	elif access is None or access in self.authgroups:
444	# only local access -- only logged in users
445	user = getSecurityManager().getUser()
446	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
447	if user is not None:
448	#print "user: ", user
449	return (user.getUserName() != "Anonymous User")
450	else:
451	return False
452
453	logging.error("documentViewer (accessOK) unknown access type %s"%access)
454	return False
455
456
457
458	def getDocinfo(self, mode, url):
459	"""returns docinfo depending on mode"""
460	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
461	# look for cached docinfo in session
462	if self.REQUEST.SESSION.has_key('docinfo'):
463	docinfo = self.REQUEST.SESSION['docinfo']
464	# check if its still current
465	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
466	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
467	return docinfo
468
469	# new docinfo
470	docinfo = {'mode': mode, 'url': url}
471	# add self url
472	docinfo['viewerUrl'] = self.getDocumentViewerURL()
473	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
474	# get index.meta DOM
475	docUrl = None
476	metaDom = None
477	if mode=="texttool":
478	# url points to document dir or index.meta
479	metaDom = self.metadataService.getDomFromPathOrUrl(url)
480	docUrl = url.replace('/index.meta', '')
481	if metaDom is None:
482	raise IOError("Unable to find index.meta for mode=texttool!")
483
484	elif mode=="imagepath":
485	# url points to folder with images, index.meta optional
486	# asssume index.meta in parent dir
487	docUrl = getParentPath(url)
488	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
489
490	elif mode=="filepath":
491	# url points to image file, index.meta optional
492	# asssume index.meta is two path segments up
493	docUrl = getParentPath(url, 2)
494	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
495
496	else:
497	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
498	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
499
500	docinfo['documentUrl'] = docUrl
501	# process index.meta contents
502	if metaDom is not None and metaDom.tag == 'resource':
503	# document directory name and path
504	resource = self.metadataService.getResourceData(dom=metaDom)
505	if resource:
506	docinfo = self.getDocinfoFromResource(docinfo, resource)
507
508	# texttool info
509	texttool = self.metadataService.getTexttoolData(dom=metaDom)
510	if texttool:
511	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
512
513	# bib info
514	bib = self.metadataService.getBibData(dom=metaDom)
515	if bib:
516	docinfo = self.getDocinfoFromBib(docinfo, bib)
517	else:
518	# no bib - try info.xml
519	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
520
521	# auth info
522	access = self.metadataService.getAccessData(dom=metaDom)
523	if access:
524	docinfo = self.getDocinfoFromAccess(docinfo, access)
525
526	# attribution info
527	attribution = self.metadataService.getAttributionData(dom=metaDom)
528	if attribution:
529	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
530	docinfo['attribution'] = attribution
531	#docinfo = self.getDocinfoFromAccess(docinfo, access)
532
533	# copyright info
534	copyright = self.metadataService.getCopyrightData(dom=metaDom)
535	if copyright:
536	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
537	docinfo['copyright'] = copyright
538	#docinfo = self.getDocinfoFromAccess(docinfo, access)
539
540	# image path
541	if mode != 'texttool':
542	# override image path from texttool with url
543	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
544
545	# number of images from digilib
546	if docinfo.get('imagePath', None):
547	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
548	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
549
550	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
551	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
552	# store in session
553	self.REQUEST.SESSION['docinfo'] = docinfo
554	return docinfo
555
556	def getDocinfoFromResource(self, docinfo, resource):
557	"""reads contents of resource element into docinfo"""
558	docName = resource.get('name', None)
559	docinfo['documentName'] = docName
560	docPath = resource.get('archive-path', None)
561	if docPath:
562	# clean up document path
563	if docPath[0] != '/':
564	docPath = '/' + docPath
565
566	if docName and (not docPath.endswith(docName)):
567	docPath += "/" + docName
568
569	else:
570	# use docUrl as docPath
571	docUrl = docinfo['documentURL']
572	if not docUrl.startswith('http:'):
573	docPath = docUrl
574	if docPath:
575	# fix URLs starting with /mpiwg/online
576	docPath = docPath.replace('/mpiwg/online', '', 1)
577
578	docinfo['documentPath'] = docPath
579	return docinfo
580
581	def getDocinfoFromTexttool(self, docinfo, texttool):
582	"""reads contents of texttool element into docinfo"""
583	# image dir
584	imageDir = texttool.get('image', None)
585	docPath = docinfo.get('documentPath', None)
586	if imageDir and docPath:
587	#print "image: ", imageDir, " archivepath: ", archivePath
588	imageDir = os.path.join(docPath, imageDir)
589	imageDir = imageDir.replace('/mpiwg/online', '', 1)
590	docinfo['imagePath'] = imageDir
591
592	# old style text URL
593	textUrl = texttool.get('text', None)
594	if textUrl and docPath:
595	if urlparse.urlparse(textUrl)[0] == "": #keine url
596	textUrl = os.path.join(docPath, textUrl)
597
598	docinfo['textURL'] = textUrl
599
600	# new style text-url-path
601	textUrl = texttool.get('text-url-path', None)
602	if textUrl:
603	docinfo['textURLPath'] = textUrl
604
605	# page flow
606	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
607
608	# odd pages are left
609	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
610
611	# number of title page (0: not defined)
612	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
613
614	# old presentation stuff
615	presentation = texttool.get('presentation', None)
616	if presentation and docPath:
617	if presentation.startswith('http:'):
618	docinfo['presentationUrl'] = presentation
619	else:
620	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
621
622
623	return docinfo
624
625	def getDocinfoFromBib(self, docinfo, bib):
626	"""reads contents of bib element into docinfo"""
627	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
628	# put all raw bib fields in dict "bib"
629	docinfo['bib'] = bib
630	bibtype = bib.get('@type', None)
631	docinfo['bibType'] = bibtype
632	# also store DC metadata for convenience
633	dc = self.metadataService.getDCMappedData(bib)
634	docinfo['creator'] = dc.get('creator',None)
635	docinfo['title'] = dc.get('title',None)
636	docinfo['date'] = dc.get('date',None)
637	return docinfo
638
639	def getDocinfoFromAccess(self, docinfo, acc):
640	"""reads contents of access element into docinfo"""
641	#TODO: also read resource type
642	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
643	try:
644	acctype = acc['@attr']['type']
645	if acctype:
646	access=acctype
647	if access in ['group', 'institution']:
648	access = acc['name'].lower()
649
650	docinfo['accessType'] = access
651
652	except:
653	pass
654
655	return docinfo
656
657	def getDocinfoFromDigilib(self, docinfo, path):
658	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
659	# fetch data
660	txt = getHttpData(infoUrl)
661	if not txt:
662	logging.error("Unable to get dir-info from %s"%(infoUrl))
663	return docinfo
664
665	dom = ET.fromstring(txt)
666	size = getText(dom.find("size"))
667	logging.debug("getDocinfoFromDigilib: size=%s"%size)
668	if size:
669	docinfo['numPages'] = int(size)
670	else:
671	docinfo['numPages'] = 0
672
673	# TODO: produce and keep list of image names and numbers
674	return docinfo
675
676
677	def getDocinfoFromPresentationInfoXml(self,docinfo):
678	"""gets DC-like bibliographical information from the presentation entry in texttools"""
679	url = docinfo.get('presentationUrl', None)
680	if not url:
681	logging.error("getDocinfoFromPresentation: no URL!")
682	return docinfo
683
684	dom = None
685	metaUrl = None
686	if url.startswith("http://"):
687	# real URL
688	metaUrl = url
689	else:
690	# online path
691
692	server=self.digilibBaseUrl+"/servlet/Texter?fn="
693	metaUrl=server+url
694
695	txt=getHttpData(metaUrl)
696	if txt is None:
697	logging.error("Unable to read info.xml from %s"%(url))
698	return docinfo
699
700	dom = ET.fromstring(txt)
701	docinfo['creator']=getText(dom.find(".//author"))
702	docinfo['title']=getText(dom.find(".//title"))
703	docinfo['date']=getText(dom.find(".//date"))
704	return docinfo
705
706
707	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewLayer=None, tocMode=None):
708	"""returns pageinfo with the given parameters"""
709	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
710	pageinfo = {}
711	pageinfo['viewMode'] = viewMode
712	pageinfo['viewLayer'] = viewLayer
713	pageinfo['tocMode'] = tocMode
714
715	current = getInt(current)
716	pageinfo['current'] = current
717	pageinfo['pn'] = current
718	rows = int(rows or self.thumbrows)
719	pageinfo['rows'] = rows
720	cols = int(cols or self.thumbcols)
721	pageinfo['cols'] = cols
722	grpsize = cols * rows
723	pageinfo['groupsize'] = grpsize
724	# is start is empty use one around current
725	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
726	# int(current / grpsize) * grpsize +1))
727	pageinfo['start'] = start
728
729	np = int(docinfo.get('numPages', 0))
730	if np == 0:
731	# numPages unknown - maybe we can get it from text page
732	if docinfo.get('textURLPath', None):
733	# cache text page as well
734	pageinfo['textPage'] = self.getTextPage(mode=viewLayer, pn=current, docinfo=docinfo, pageinfo=pageinfo)
735	np = int(docinfo.get('numPages', 0))
736
737	pageinfo['numgroups'] = int(np / grpsize)
738	if np % grpsize > 0:
739	pageinfo['numgroups'] += 1
740
741	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
742	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
743	# add zeroth page for two columns
744	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
745	pageinfo['pageZero'] = pageZero
746	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
747
748	# TODO: do we need this here?
749	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
750	pageinfo['query'] = self.REQUEST.get('query','')
751	pageinfo['queryType'] = self.REQUEST.get('queryType','')
752	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
753	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
754	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
755	pageinfo['queryPageSize'] = getInt(self.REQUEST.get('queryPageSize', 10))
756	pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1'))
757	pageinfo['searchPN'] = getInt(self.REQUEST.get('searchPN','1'))
758
759	# limit tocPN
760	if 'tocSize_%s'%tocMode in docinfo:
761	tocSize = docinfo['tocSize_%s'%tocMode]
762	tocPageSize = pageinfo['tocPageSize']
763	# cached toc
764	if tocSize%tocPageSize>0:
765	tocPages=tocSize/tocPageSize+1
766	else:
767	tocPages=tocSize/tocPageSize
768
769	pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN'])
770
771	return pageinfo
772
773
774	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
775	"""returns dict with array of page informations for one screenfull of thumbnails"""
776	batch = {}
777	grpsize = rows * cols
778	if maxIdx == 0:
779	maxIdx = start + grpsize
780
781	nb = int(math.ceil(maxIdx / float(grpsize)))
782	# list of all batch start and end points
783	batches = []
784	if pageZero:
785	ofs = 0
786	else:
787	ofs = 1
788
789	for i in range(nb):
790	s = i * grpsize + ofs
791	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
792	batches.append({'start':s, 'end':e})
793
794	batch['batches'] = batches
795
796	pages = []
797	if pageZero and start == 1:
798	# correct beginning
799	idx = 0
800	else:
801	idx = start
802
803	for r in range(rows):
804	row = []
805	for c in range(cols):
806	if idx < minIdx or idx > maxIdx:
807	page = {'idx':None}
808	else:
809	page = {'idx':idx}
810
811	idx += 1
812	if pageFlowLtr:
813	row.append(page)
814	else:
815	row.insert(0, page)
816
817	pages.append(row)
818
819	if start > 1:
820	batch['prevStart'] = max(start - grpsize, 1)
821	else:
822	batch['prevStart'] = None
823
824	if start + grpsize < maxIdx:
825	batch['nextStart'] = start + grpsize
826	else:
827	batch['nextStart'] = None
828
829	batch['pages'] = pages
830	return batch
831
832	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
833	"""returns dict with information for one screenfull of data."""
834	batch = {}
835	if end == 0:
836	end = start + size
837
838	nb = int(math.ceil(end / float(size)))
839	# list of all batch start and end points
840	batches = []
841	for i in range(nb):
842	s = i * size + 1
843	e = min((i + 1) * size, end)
844	batches.append({'start':s, 'end':e})
845
846	batch['batches'] = batches
847	# list of elements in this batch
848	this = []
849	j = 0
850	for i in range(start, min(start+size, end)):
851	if data:
852	if fullData:
853	d = data[i]
854	else:
855	d = data[j]
856	j += 1
857
858	else:
859	d = i+1
860
861	this.append(d)
862
863	batch['this'] = this
864	if start > 1:
865	batch['prevStart'] = max(start - size, 1)
866	else:
867	batch['prevStart'] = None
868
869	if start + size < end:
870	batch['nextStart'] = start + size
871	else:
872	batch['nextStart'] = None
873
874	return batch
875
876
877	security.declareProtected('View management screens','changeDocumentViewerForm')
878	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
879
880	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
881	"""init document viewer"""
882	self.title=title
883	self.digilibBaseUrl = digilibBaseUrl
884	self.thumbrows = thumbrows
885	self.thumbcols = thumbcols
886	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
887	try:
888	# assume MetaDataFolder instance is called metadata
889	self.metadataService = getattr(self, 'metadata')
890	except Exception, e:
891	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
892
893	if RESPONSE is not None:
894	RESPONSE.redirect('manage_main')
895
896	def manage_AddDocumentViewerForm(self):
897	"""add the viewer form"""
898	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
899	return pt()
900
901	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
902	"""add the viewer"""
903	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
904	self._setObject(id,newObj)
905
906	if RESPONSE is not None:
907	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: