Context Navigation

source: documentViewer/documentViewer.py @ 482:7ca8ac7db06e

elementtree

Last change on this file since 482:7ca8ac7db06e was 482:7ca8ac7db06e, checked in by casties, 13 years ago
more new template stuff. more batching methods in documentViewer.
File size: 35.3 KB

Line
1	from OFS.Folder import Folder
2	from OFS.Image import File
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	#from Ft.Xml import EMPTY_NAMESPACE, Parse
10	#import Ft.Xml.Domlette
11
12	import xml.etree.ElementTree as ET
13
14	import os.path
15	import sys
16	import urllib
17	import logging
18	import math
19	import urlparse
20	import re
21	import string
22
23	from SrvTxtUtils import getInt, getText, getHttpData
24
25	def logger(txt,method,txt2):
26	"""logging"""
27	logging.info(txt+ txt2)
28
29
30	def serializeNode(node, encoding="utf-8"):
31	"""returns a string containing node as XML"""
32	s = ET.tostring(node)
33
34	# 4Suite:
35	# stream = cStringIO.StringIO()
36	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
37	# s = stream.getvalue()
38	# stream.close()
39	return s
40
41	def browserCheck(self):
42	"""check the browsers request to find out the browser type"""
43	bt = {}
44	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
45	bt['ua'] = ua
46	bt['isIE'] = False
47	bt['isN4'] = False
48	bt['versFirefox']=""
49	bt['versIE']=""
50	bt['versSafariChrome']=""
51	bt['versOpera']=""
52
53	if string.find(ua, 'MSIE') > -1:
54	bt['isIE'] = True
55	else:
56	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
57	# Safari oder Chrome identification
58	try:
59	nav = ua[string.find(ua, '('):]
60	nav1=ua[string.find(ua,')'):]
61	nav2=nav1[string.find(nav1,'('):]
62	nav3=nav2[string.find(nav2,')'):]
63	ie = string.split(nav, "; ")[1]
64	ie1 =string.split(nav1, " ")[2]
65	ie2 =string.split(nav3, " ")[1]
66	ie3 =string.split(nav3, " ")[2]
67	if string.find(ie3, "Safari") >-1:
68	bt['versSafariChrome']=string.split(ie2, "/")[1]
69	except: pass
70	# IE identification
71	try:
72	nav = ua[string.find(ua, '('):]
73	ie = string.split(nav, "; ")[1]
74	if string.find(ie, "MSIE") > -1:
75	bt['versIE'] = string.split(ie, " ")[1]
76	except:pass
77	# Firefox identification
78	try:
79	nav = ua[string.find(ua, '('):]
80	nav1=ua[string.find(ua,')'):]
81	if string.find(ie1, "Firefox") >-1:
82	nav5= string.split(ie1, "/")[1]
83	logging.debug("FIREFOX: %s"%(nav5))
84	bt['versFirefox']=nav5[0:3]
85	except:pass
86	#Opera identification
87	try:
88	if string.find(ua,"Opera") >-1:
89	nav = ua[string.find(ua, '('):]
90	nav1=nav[string.find(nav,')'):]
91	bt['versOpera']=string.split(nav1,"/")[2]
92	except:pass
93
94	bt['isMac'] = string.find(ua, 'Macintosh') > -1
95	bt['isWin'] = string.find(ua, 'Windows') > -1
96	bt['isIEWin'] = bt['isIE'] and bt['isWin']
97	bt['isIEMac'] = bt['isIE'] and bt['isMac']
98	bt['staticHTML'] = False
99
100	return bt
101
102	def getParentPath(path, cnt=1):
103	"""returns pathname shortened by cnt"""
104	# make sure path doesn't end with /
105	path = path.rstrip('/')
106	# split by /, shorten, and reassemble
107	return '/'.join(path.split('/')[0:-cnt])
108
109
110	##
111	## documentViewer class
112	##
113	class documentViewer(Folder):
114	"""document viewer"""
115	meta_type="Document viewer"
116
117	security=ClassSecurityInfo()
118	manage_options=Folder.manage_options+(
119	{'label':'main config','action':'changeDocumentViewerForm'},
120	)
121
122	metadataService = None
123	"""MetaDataFolder instance"""
124
125	# templates and forms
126	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
127	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
128	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
129	toc_text = PageTemplateFile('zpt/toc_text', globals())
130	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
131	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
132	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
133	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
134	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
135	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
136	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
137	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
138	head_main = PageTemplateFile('zpt/head_main', globals())
139	info_xml = PageTemplateFile('zpt/info_xml', globals())
140	# TODO: can this be nicer?
141	docuviewer_css = File('docuviewer_css','',open(os.path.join(package_home(globals()),'css/docuviewer.css')), content_type='text/css')
142
143
144	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
145
146
147	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
148	"""init document viewer"""
149	self.id=id
150	self.title=title
151	self.thumbcols = thumbcols
152	self.thumbrows = thumbrows
153	# authgroups is list of authorized groups (delimited by ,)
154	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
155	# create template folder so we can always use template.something
156
157	templateFolder = Folder('template')
158	#self['template'] = templateFolder # Zope-2.12 style
159	self._setObject('template',templateFolder) # old style
160	try:
161	import MpdlXmlTextServer
162	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
163	#templateFolder['fulltextclient'] = xmlRpcClient
164	templateFolder._setObject('fulltextclient',textServer)
165	except Exception, e:
166	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
167
168	try:
169	from Products.zogiLib.zogiLib import zogiLib
170	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
171	#templateFolder['zogilib'] = zogilib
172	templateFolder._setObject('zogilib',zogilib)
173	except Exception, e:
174	logging.error("Unable to create zogiLib for zogilib: "+str(e))
175
176	try:
177	# assume MetaDataFolder instance is called metadata
178	self.metadataService = getattr(self, 'metadata')
179	except Exception, e:
180	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
181
182	if digilibBaseUrl is not None:
183	self.digilibBaseUrl = digilibBaseUrl
184
185
186	# proxy text server methods to fulltextclient
187	def getTextPage(self, **args):
188	"""get page"""
189	return self.template.fulltextclient.getTextPage(**args)
190
191	def getOrigPages(self, **args):
192	"""get page"""
193	return self.template.fulltextclient.getOrigPages(**args)
194
195	def getOrigPagesNorm(self, **args):
196	"""get page"""
197	return self.template.fulltextclient.getOrigPagesNorm(**args)
198
199	def getQuery(self, **args):
200	"""get query in search"""
201	return self.template.fulltextclient.getQuery(**args)
202
203	def getSearch(self, **args):
204	"""get search"""
205	return self.template.fulltextclient.getSearch(**args)
206
207	def getGisPlaces(self, **args):
208	"""get gis places"""
209	return self.template.fulltextclient.getGisPlaces(**args)
210
211	def getAllGisPlaces(self, **args):
212	"""get all gis places """
213	return self.template.fulltextclient.getAllGisPlaces(**args)
214
215	def getWordInfo(self, **args):
216	"""get translate"""
217	return self.template.fulltextclient.getWordInfo(**args)
218
219	def getLemma(self, **args):
220	"""get lemma"""
221	return self.template.fulltextclient.getLemma(**args)
222
223	def getLemmaQuery(self, **args):
224	"""get query"""
225	return self.template.fulltextclient.getLemmaQuery(**args)
226
227	def getLex(self, **args):
228	"""get lex"""
229	return self.template.fulltextclient.getLex(**args)
230
231	def getToc(self, **args):
232	"""get toc"""
233	return self.template.fulltextclient.getToc(**args)
234
235	def getTocPage(self, **args):
236	"""get tocpage"""
237	return self.template.fulltextclient.getTocPage(**args)
238
239
240	security.declareProtected('View','thumbs_rss')
241	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
242	'''
243	view it
244	@param mode: defines how to access the document behind url
245	@param url: url which contains display information
246	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
247
248	'''
249	logging.debug("HHHHHHHHHHHHHH:load the rss")
250	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
251
252	if not hasattr(self, 'template'):
253	# create template folder if it doesn't exist
254	self.manage_addFolder('template')
255
256	if not self.digilibBaseUrl:
257	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
258
259	docinfo = self.getDocinfo(mode=mode,url=url)
260	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
261	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
262	''' ZDES '''
263	pt = getattr(self.template, 'thumbs_main_rss')
264
265	if viewMode=="auto": # automodus gewaehlt
266	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
267	viewMode="text"
268	else:
269	viewMode="images"
270
271	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
272
273
274	security.declareProtected('View','index_html')
275	def index_html(self,url,mode="texttool",viewMode="auto",viewType=None,tocMode="thumbs",start=1,pn=1):
276	"""
277	view page
278	@param url: url which contains display information
279	@param mode: defines how to access the document behind url
280	@param viewMode: 'images': display images, 'text': display text, default is 'auto'
281	@param viewType: sub-type of viewMode, e.g. 'dict' for viewMode='text'
282	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
283	"""
284
285	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewType=%s start=%s pn=%s"%(mode,url,viewMode,viewType,start,pn))
286
287	if not hasattr(self, 'template'):
288	# this won't work
289	logging.error("template folder missing!")
290	return "ERROR: template folder missing!"
291
292	if not getattr(self, 'digilibBaseUrl', None):
293	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
294
295	docinfo = self.getDocinfo(mode=mode,url=url)
296
297	if tocMode != "thumbs":
298	# get table of contents
299	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
300
301	# auto viewMode: text if there is a text else images
302	if viewMode=="auto":
303	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
304	viewMode = "text"
305	viewType = "dict"
306	else:
307	viewMode = "images"
308
309	elif viewMode == "text_dict":
310	# legacy fix
311	viewMode = "text"
312	viewType = "dict"
313
314	# stringify viewType
315	if isinstance(viewType, list):
316	logging.debug("index_html: viewType is list:%s"%viewType)
317	viewType = ','.join([t for t in viewType if t])
318
319	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewType=viewType, tocMode=tocMode)
320
321	# get template /template/viewer_$viewMode
322	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
323	if pt is None:
324	logging.error("No template for viewMode=%s!"%viewMode)
325	# TODO: error page?
326	return "No template for viewMode=%s!"%viewMode
327
328	# and execute with parameters
329	return pt(docinfo=docinfo, pageinfo=pageinfo)
330
331	def generateMarks(self,mk):
332	ret=""
333	if mk is None:
334	return ""
335	if not isinstance(mk, list):
336	mk=[mk]
337	for m in mk:
338	ret+="mk=%s"%m
339	return ret
340
341
342	def getBrowser(self):
343	"""getBrowser the version of browser """
344	bt = browserCheck(self)
345	logging.debug("BROWSER VERSION: %s"%(bt))
346	return bt
347
348	def findDigilibUrl(self):
349	"""try to get the digilib URL from zogilib"""
350	url = self.template.zogilib.getDLBaseUrl()
351	return url
352
353	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
354	"""returns URL to digilib Scaler with params"""
355	url = None
356	if docinfo is not None:
357	url = docinfo.get('imageURL', None)
358
359	if url is None:
360	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
361	if fn is None and docinfo is not None:
362	fn = docinfo.get('imagePath','')
363
364	url += "fn=%s"%fn
365
366	if pn:
367	url += "&pn=%s"%pn
368
369	url += "&dw=%s&dh=%s"%(dw,dh)
370	return url
371
372	def getDocumentViewerURL(self):
373	"""returns the URL of this instance"""
374	return self.absolute_url()
375
376	def getStyle(self, idx, selected, style=""):
377	"""returns a string with the given style and append 'sel' if idx == selected."""
378	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
379	if idx == selected:
380	return style + 'sel'
381	else:
382	return style
383
384	def getParams(self, param=None, val=None, params=None, duplicates=None):
385	"""returns dict with URL parameters.
386
387	Takes URL parameters and additionally param=val or dict params.
388	Deletes key if value is None."""
389	# copy existing request params
390	newParams=self.REQUEST.form.copy()
391	# change single param
392	if param is not None:
393	if val is None:
394	if newParams.has_key(param):
395	del newParams[param]
396	else:
397	newParams[param] = str(val)
398
399	# change more params
400	if params is not None:
401	for (k, v) in params.items():
402	if v is None:
403	# val=None removes param
404	if newParams.has_key(k):
405	del newParams[k]
406
407	else:
408	newParams[k] = v
409
410	if duplicates:
411	# eliminate lists (coming from duplicate keys)
412	for (k,v) in newParams.items():
413	if isinstance(v, list):
414	if duplicates == 'comma':
415	# make comma-separated list of non-empty entries
416	newParams[k] = ','.join([t for t in v if t])
417	elif duplicates == 'first':
418	# take first non-empty entry
419	newParams[k] = [t for t in v if t][0]
420
421	return newParams
422
423	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
424	"""returns URL to documentviewer with parameter param set to val or from dict params"""
425	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
426	# quote values and assemble into query string (not escaping '/')
427	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()])
428	if baseUrl is None:
429	baseUrl = self.getDocumentViewerURL()
430
431	url = "%s?%s"%(baseUrl, ps)
432	return url
433
434	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
435	"""link to documentviewer with parameter param set to val"""
436	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
437
438
439	def getInfo_xml(self,url,mode):
440	"""returns info about the document as XML"""
441	if not self.digilibBaseUrl:
442	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
443
444	docinfo = self.getDocinfo(mode=mode,url=url)
445	pt = getattr(self.template, 'info_xml')
446	return pt(docinfo=docinfo)
447
448	def isAccessible(self, docinfo):
449	"""returns if access to the resource is granted"""
450	access = docinfo.get('accessType', None)
451	logging.debug("documentViewer (accessOK) access type %s"%access)
452	if access == 'free':
453	logging.debug("documentViewer (accessOK) access is free")
454	return True
455
456	elif access is None or access in self.authgroups:
457	# only local access -- only logged in users
458	user = getSecurityManager().getUser()
459	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
460	if user is not None:
461	#print "user: ", user
462	return (user.getUserName() != "Anonymous User")
463	else:
464	return False
465
466	logging.error("documentViewer (accessOK) unknown access type %s"%access)
467	return False
468
469
470
471	def getDocinfo(self, mode, url):
472	"""returns docinfo depending on mode"""
473	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
474	# look for cached docinfo in session
475	if self.REQUEST.SESSION.has_key('docinfo'):
476	docinfo = self.REQUEST.SESSION['docinfo']
477	# check if its still current
478	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
479	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
480	return docinfo
481
482	# new docinfo
483	docinfo = {'mode': mode, 'url': url}
484	# add self url
485	docinfo['viewerUrl'] = self.getDocumentViewerURL()
486	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
487	# get index.meta DOM
488	docUrl = None
489	metaDom = None
490	if mode=="texttool":
491	# url points to document dir or index.meta
492	metaDom = self.metadataService.getDomFromPathOrUrl(url)
493	docUrl = url.replace('/index.meta', '')
494	if metaDom is None:
495	raise IOError("Unable to find index.meta for mode=texttool!")
496
497	elif mode=="imagepath":
498	# url points to folder with images, index.meta optional
499	# asssume index.meta in parent dir
500	docUrl = getParentPath(url)
501	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
502
503	elif mode=="filepath":
504	# url points to image file, index.meta optional
505	# asssume index.meta is two path segments up
506	docUrl = getParentPath(url, 2)
507	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
508
509	else:
510	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
511	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
512
513	docinfo['documentUrl'] = docUrl
514	# process index.meta contents
515	if metaDom is not None and metaDom.tag == 'resource':
516	# document directory name and path
517	resource = self.metadataService.getResourceData(dom=metaDom)
518	if resource:
519	docinfo = self.getDocinfoFromResource(docinfo, resource)
520
521	# texttool info
522	texttool = self.metadataService.getTexttoolData(dom=metaDom)
523	if texttool:
524	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
525
526	# bib info
527	bib = self.metadataService.getBibData(dom=metaDom)
528	if bib:
529	docinfo = self.getDocinfoFromBib(docinfo, bib)
530	else:
531	# no bib - try info.xml
532	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
533
534	# auth info
535	access = self.metadataService.getAccessData(dom=metaDom)
536	if access:
537	docinfo = self.getDocinfoFromAccess(docinfo, access)
538
539	# attribution info
540	attribution = self.metadataService.getAttributionData(dom=metaDom)
541	if attribution:
542	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
543	docinfo['attribution'] = attribution
544	#docinfo = self.getDocinfoFromAccess(docinfo, access)
545
546	# copyright info
547	copyright = self.metadataService.getCopyrightData(dom=metaDom)
548	if copyright:
549	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
550	docinfo['copyright'] = copyright
551	#docinfo = self.getDocinfoFromAccess(docinfo, access)
552
553	# image path
554	if mode != 'texttool':
555	# override image path from texttool with url
556	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
557
558	# number of images from digilib
559	if docinfo.get('imagePath', None):
560	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
561	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
562
563	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
564	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
565	# store in session
566	self.REQUEST.SESSION['docinfo'] = docinfo
567	return docinfo
568
569	def getDocinfoFromResource(self, docinfo, resource):
570	"""reads contents of resource element into docinfo"""
571	docName = resource.get('name', None)
572	docinfo['documentName'] = docName
573	docPath = resource.get('archive-path', None)
574	if docPath:
575	# clean up document path
576	if docPath[0] != '/':
577	docPath = '/' + docPath
578
579	if docName and (not docPath.endswith(docName)):
580	docPath += "/" + docName
581
582	else:
583	# use docUrl as docPath
584	docUrl = docinfo['documentURL']
585	if not docUrl.startswith('http:'):
586	docPath = docUrl
587	if docPath:
588	# fix URLs starting with /mpiwg/online
589	docPath = docPath.replace('/mpiwg/online', '', 1)
590
591	docinfo['documentPath'] = docPath
592	return docinfo
593
594	def getDocinfoFromTexttool(self, docinfo, texttool):
595	"""reads contents of texttool element into docinfo"""
596	# image dir
597	imageDir = texttool.get('image', None)
598	docPath = docinfo.get('documentPath', None)
599	if imageDir and docPath:
600	#print "image: ", imageDir, " archivepath: ", archivePath
601	imageDir = os.path.join(docPath, imageDir)
602	imageDir = imageDir.replace('/mpiwg/online', '', 1)
603	docinfo['imagePath'] = imageDir
604
605	# old style text URL
606	textUrl = texttool.get('text', None)
607	if textUrl and docPath:
608	if urlparse.urlparse(textUrl)[0] == "": #keine url
609	textUrl = os.path.join(docPath, textUrl)
610
611	docinfo['textURL'] = textUrl
612
613	# new style text-url-path
614	textUrl = texttool.get('text-url-path', None)
615	if textUrl:
616	docinfo['textURLPath'] = textUrl
617
618	# page flow
619	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
620
621	# odd pages are left
622	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
623
624	# number of title page (0: not defined)
625	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
626
627	# old presentation stuff
628	presentation = texttool.get('presentation', None)
629	if presentation and docPath:
630	if presentation.startswith('http:'):
631	docinfo['presentationUrl'] = presentation
632	else:
633	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
634
635
636	return docinfo
637
638	def getDocinfoFromBib(self, docinfo, bib):
639	"""reads contents of bib element into docinfo"""
640	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
641	# put all raw bib fields in dict "bib"
642	docinfo['bib'] = bib
643	bibtype = bib.get('@type', None)
644	docinfo['bibType'] = bibtype
645	# also store DC metadata for convenience
646	dc = self.metadataService.getDCMappedData(bib)
647	docinfo['creator'] = dc.get('creator',None)
648	docinfo['title'] = dc.get('title',None)
649	docinfo['date'] = dc.get('date',None)
650	return docinfo
651
652	def getDocinfoFromAccess(self, docinfo, acc):
653	"""reads contents of access element into docinfo"""
654	#TODO: also read resource type
655	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
656	try:
657	acctype = acc['@attr']['type']
658	if acctype:
659	access=acctype
660	if access in ['group', 'institution']:
661	access = acc['name'].lower()
662
663	docinfo['accessType'] = access
664
665	except:
666	pass
667
668	return docinfo
669
670	def getDocinfoFromDigilib(self, docinfo, path):
671	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
672	# fetch data
673	txt = getHttpData(infoUrl)
674	if not txt:
675	logging.error("Unable to get dir-info from %s"%(infoUrl))
676	return docinfo
677
678	dom = ET.fromstring(txt)
679	size = getText(dom.find("size"))
680	logging.debug("getDocinfoFromDigilib: size=%s"%size)
681	if size:
682	docinfo['numPages'] = int(size)
683	else:
684	docinfo['numPages'] = 0
685
686	# TODO: produce and keep list of image names and numbers
687	return docinfo
688
689
690	def getDocinfoFromPresentationInfoXml(self,docinfo):
691	"""gets DC-like bibliographical information from the presentation entry in texttools"""
692	url = docinfo.get('presentationUrl', None)
693	if not url:
694	logging.error("getDocinfoFromPresentation: no URL!")
695	return docinfo
696
697	dom = None
698	metaUrl = None
699	if url.startswith("http://"):
700	# real URL
701	metaUrl = url
702	else:
703	# online path
704
705	server=self.digilibBaseUrl+"/servlet/Texter?fn="
706	metaUrl=server+url
707
708	txt=getHttpData(metaUrl)
709	if txt is None:
710	logging.error("Unable to read info.xml from %s"%(url))
711	return docinfo
712
713	dom = ET.fromstring(txt)
714	docinfo['creator']=getText(dom.find(".//author"))
715	docinfo['title']=getText(dom.find(".//title"))
716	docinfo['date']=getText(dom.find(".//date"))
717	return docinfo
718
719
720	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewType=None, tocMode=None):
721	"""returns pageinfo with the given parameters"""
722	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewType=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewType,tocMode))
723	pageinfo = {}
724	pageinfo['viewMode'] = viewMode
725	pageinfo['viewType'] = viewType
726	pageinfo['tocMode'] = tocMode
727
728	current = getInt(current)
729	pageinfo['current'] = current
730	pageinfo['pn'] = current
731	rows = int(rows or self.thumbrows)
732	pageinfo['rows'] = rows
733	cols = int(cols or self.thumbcols)
734	pageinfo['cols'] = cols
735	grpsize = cols * rows
736	pageinfo['groupsize'] = grpsize
737	# is start is empty use one around current
738	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
739	# int(current / grpsize) * grpsize +1))
740	pageinfo['start'] = start
741
742	np = int(docinfo.get('numPages', 0))
743	if np == 0:
744	# numPages unknown - maybe we can get it from text page
745	if docinfo.get('textURLPath', None):
746	# cache text page as well
747	pageinfo['textPage'] = self.getTextPage(mode=viewType, pn=current, docinfo=docinfo, pageinfo=pageinfo)
748	np = int(docinfo.get('numPages', 0))
749
750	pageinfo['numgroups'] = int(np / grpsize)
751	if np % grpsize > 0:
752	pageinfo['numgroups'] += 1
753
754	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
755	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
756	# add zeroth page for two columns
757	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
758	pageinfo['pageZero'] = pageZero
759	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
760
761	# TODO: do we need this here?
762	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
763	pageinfo['query'] = self.REQUEST.get('query','')
764	pageinfo['queryType'] = self.REQUEST.get('queryType','')
765	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
766	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
767	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
768	pageinfo['queryPageSize'] = getInt(self.REQUEST.get('queryPageSize', 10))
769	pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1'))
770	pageinfo['searchPN'] = getInt(self.REQUEST.get('searchPN','1'))
771
772	# limit tocPN
773	if 'tocSize_%s'%tocMode in docinfo:
774	tocSize = docinfo['tocSize_%s'%tocMode]
775	tocPageSize = pageinfo['tocPageSize']
776	# cached toc
777	if tocSize%tocPageSize>0:
778	tocPages=tocSize/tocPageSize+1
779	else:
780	tocPages=tocSize/tocPageSize
781
782	pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN'])
783
784	return pageinfo
785
786
787	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
788	"""returns dict with array of page informations for one screenfull of thumbnails"""
789	batch = {}
790	grpsize = rows * cols
791	if maxIdx == 0:
792	maxIdx = start + grpsize
793
794	nb = int(math.ceil(maxIdx / float(grpsize)))
795	# list of all batch start and end points
796	batches = []
797	if pageZero:
798	ofs = 0
799	else:
800	ofs = 1
801
802	for i in range(nb):
803	s = i * grpsize + ofs
804	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
805	batches.append({'start':s, 'end':e})
806
807	batch['batches'] = batches
808
809	pages = []
810	if pageZero and start == 1:
811	# correct beginning
812	idx = 0
813	else:
814	idx = start
815
816	for r in range(rows):
817	row = []
818	for c in range(cols):
819	if idx < minIdx or idx > maxIdx:
820	page = {'idx':None}
821	else:
822	page = {'idx':idx}
823
824	idx += 1
825	if pageFlowLtr:
826	row.append(page)
827	else:
828	row.insert(0, page)
829
830	pages.append(row)
831
832	if start > 1:
833	batch['prevStart'] = max(start - grpsize, 1)
834	else:
835	batch['prevStart'] = None
836
837	if start + grpsize < maxIdx:
838	batch['nextStart'] = start + grpsize
839	else:
840	batch['nextStart'] = None
841
842	batch['pages'] = pages
843	return batch
844
845	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
846	"""returns dict with information for one screenfull of data."""
847	batch = {}
848	if end == 0:
849	end = start + size
850
851	nb = int(math.ceil(end / float(size)))
852	# list of all batch start and end points
853	batches = []
854	for i in range(nb):
855	s = i * size + 1
856	e = min((i + 1) * size, end)
857	batches.append({'start':s, 'end':e})
858
859	batch['batches'] = batches
860	# list of elements in this batch
861	this = []
862	j = 0
863	for i in range(start, min(start+size, end)):
864	if data:
865	if fullData:
866	d = data[i]
867	else:
868	d = data[j]
869	j += 1
870
871	else:
872	d = i+1
873
874	this.append(d)
875
876	batch['this'] = this
877	if start > 1:
878	batch['prevStart'] = max(start - size, 1)
879	else:
880	batch['prevStart'] = None
881
882	if start + size < end:
883	batch['nextStart'] = start + size
884	else:
885	batch['nextStart'] = None
886
887	return batch
888
889
890	security.declareProtected('View management screens','changeDocumentViewerForm')
891	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
892
893	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
894	"""init document viewer"""
895	self.title=title
896	self.digilibBaseUrl = digilibBaseUrl
897	self.thumbrows = thumbrows
898	self.thumbcols = thumbcols
899	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
900	try:
901	# assume MetaDataFolder instance is called metadata
902	self.metadataService = getattr(self, 'metadata')
903	except Exception, e:
904	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
905
906	if RESPONSE is not None:
907	RESPONSE.redirect('manage_main')
908
909	def manage_AddDocumentViewerForm(self):
910	"""add the viewer form"""
911	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
912	return pt()
913
914	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
915	"""add the viewer"""
916	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
917	self._setObject(id,newObj)
918
919	if RESPONSE is not None:
920	RESPONSE.redirect('manage_main')
921
922	## DocumentViewerTemplate class
923	class DocumentViewerTemplate(ZopePageTemplate):
924	"""Template for document viewer"""
925	meta_type="DocumentViewer Template"
926
927
928	def manage_addDocumentViewerTemplateForm(self):
929	"""Form for adding"""
930	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
931	return pt()
932
933	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
934	REQUEST=None, submit=None):
935	"Add a Page Template with optional file content."
936
937	self._setObject(id, DocumentViewerTemplate(id))
938	ob = getattr(self, id)
939	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
940	logging.info("txt %s:"%txt)
941	ob.pt_edit(txt,"text/html")
942	if title:
943	ob.pt_setTitle(title)
944	try:
945	u = self.DestinationURL()
946	except AttributeError:
947	u = REQUEST['URL1']
948
949	u = "%s/%s" % (u, urllib.quote(id))
950	REQUEST.RESPONSE.redirect(u+'/manage_main')
951	return ''
952
953
954

Note: See TracBrowser for help on using the repository browser.

Download in other formats: