Context Navigation

source: documentViewer/documentViewer.py @ 84:a6e4f9b6729a

Last change on this file since 84:a6e4f9b6729a was 84:a6e4f9b6729a, checked in by casties, 14 years ago
first version with new full-text infrastructure and slightly changed templates
File size: 29.3 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	from Ft.Xml.Domlette import NonvalidatingReader
10	from Ft.Xml.Domlette import PrettyPrint, Print
11	from Ft.Xml import EMPTY_NAMESPACE, Parse
12
13
14	import Ft.Xml.XPath
15	import cStringIO
16	import xmlrpclib
17	import os.path
18	import sys
19	import cgi
20	import urllib
21	import logging
22	import math
23
24	import urlparse
25	from types import *
26
27	def logger(txt,method,txt2):
28	"""logging"""
29	logging.info(txt+ txt2)
30
31
32	def getInt(number, default=0):
33	"""returns always an int (0 in case of problems)"""
34	try:
35	return int(number)
36	except:
37	return int(default)
38
39	def getTextFromNode(nodename):
40	"""get the cdata content of a node"""
41	if nodename is None:
42	return ""
43	nodelist=nodename.childNodes
44	rc = ""
45	for node in nodelist:
46	if node.nodeType == node.TEXT_NODE:
47	rc = rc + node.data
48	return rc
49
50	def serializeNode(node, encoding='utf-8'):
51	"""returns a string containing node as XML"""
52	buf = cStringIO.StringIO()
53	Print(node, stream=buf, encoding=encoding)
54	s = buf.getvalue()
55	buf.close()
56	return s
57
58
59	def getParentDir(path):
60	"""returns pathname shortened by one"""
61	return '/'.join(path.split('/')[0:-1])
62
63
64	import socket
65
66	def urlopen(url,timeout=2):
67	"""urlopen mit timeout"""
68	socket.setdefaulttimeout(timeout)
69	ret=urllib.urlopen(url)
70	socket.setdefaulttimeout(5)
71	return ret
72
73
74	##
75	## documentViewer class
76	##
77	class documentViewer(Folder):
78	"""document viewer"""
79	#textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?"
80
81	meta_type="Document viewer"
82
83	security=ClassSecurityInfo()
84	manage_options=Folder.manage_options+(
85	{'label':'main config','action':'changeDocumentViewerForm'},
86	)
87
88	# templates and forms
89	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
90	thumbs_main = PageTemplateFile('zpt/thumbs_main', globals())
91	image_main = PageTemplateFile('zpt/image_main', globals()) # obsolete!
92	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
93	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
94	head_main = PageTemplateFile('zpt/head_main', globals())
95	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
96	info_xml = PageTemplateFile('zpt/info_xml', globals())
97
98	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
99	security.declareProtected('View management screens','changeDocumentViewerForm')
100	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
101
102
103	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"):
104	"""init document viewer"""
105	self.id=id
106	self.title=title
107	self.thumbcols = thumbcols
108	self.thumbrows = thumbrows
109	# authgroups is list of authorized groups (delimited by ,)
110	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
111	# create template folder so we can always use template.something
112
113	templateFolder = Folder('template')
114	#self['template'] = templateFolder # Zope-2.12 style
115	self._setObject('template',templateFolder) # old style
116	try:
117	from Products.XMLRpcTools.XMLRpcTools import XMLRpcServerProxy
118	xmlRpcClient = XMLRpcServerProxy(id='fulltextclient', serverUrl=textServerName, use_xmlrpc=False)
119	#templateFolder['fulltextclient'] = xmlRpcClient
120	templateFolder._setObject('fulltextclient',xmlRpcClient)
121	except Exception, e:
122	logging.error("Unable to create XMLRpcTools for fulltextclient: "+str(e))
123	try:
124	from Products.zogiLib.zogiLib import zogiLib
125	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
126	#templateFolder['zogilib'] = zogilib
127	templateFolder._setObject('zogilib',zogilib)
128	except Exception, e:
129	logging.error("Unable to create zogiLib for zogilib: "+str(e))
130
131
132	security.declareProtected('View','thumbs_rss')
133	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
134	'''
135	view it
136	@param mode: defines how to access the document behind url
137	@param url: url which contains display information
138	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
139
140	'''
141	logging.debug("HHHHHHHHHHHHHH:load the rss")
142	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
143
144	if not hasattr(self, 'template'):
145	# create template folder if it doesn't exist
146	self.manage_addFolder('template')
147
148	if not self.digilibBaseUrl:
149	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
150
151	docinfo = self.getDocinfo(mode=mode,url=url)
152	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
153	pt = getattr(self.template, 'thumbs_main_rss')
154
155	if viewMode=="auto": # automodus gewaehlt
156	if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert
157	viewMode="text"
158	else:
159	viewMode="images"
160
161	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
162
163	security.declareProtected('View','index_html')
164	def index_html(self,url,mode="texttool",viewMode="auto",start=None,pn=1,mk=None):
165	'''
166	view it
167	@param mode: defines how to access the document behind url
168	@param url: url which contains display information
169	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
170
171	'''
172
173	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
174
175	if not hasattr(self, 'template'):
176	# this won't work
177	logging.error("template folder missing!")
178	return "ERROR: template folder missing!"
179
180	if not getattr(self, 'digilibBaseUrl', None):
181	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
182
183	docinfo = self.getDocinfo(mode=mode,url=url)
184	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
185	pt = getattr(self.template, 'viewer_main')
186
187	if viewMode=="auto": # automodus gewaehlt
188	if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert
189	viewMode="text"
190	else:
191	viewMode="images"
192
193	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
194
195	def generateMarks(self,mk):
196	ret=""
197	if mk is None:
198	return ""
199
200	if type(mk) is not ListType:
201	mk=[mk]
202	for m in mk:
203	ret+="mk=%s"%m
204	return ret
205
206	def findDigilibUrl(self):
207	"""try to get the digilib URL from zogilib"""
208	url = self.template.zogilib.getDLBaseUrl()
209	return url
210
211	def getStyle(self, idx, selected, style=""):
212	"""returns a string with the given style and append 'sel' if path == selected."""
213	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
214	if idx == selected:
215	return style + 'sel'
216	else:
217	return style
218
219	def getLink(self,param=None,val=None):
220	"""link to documentviewer with parameter param set to val"""
221	params=self.REQUEST.form.copy()
222	if param is not None:
223	if val is None:
224	if params.has_key(param):
225	del params[param]
226	else:
227	params[param] = str(val)
228
229	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
230	params["mode"] = "imagepath"
231	params["url"] = getParentDir(params["url"])
232
233	# quote values and assemble into query string
234	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
235	url=self.REQUEST['URL1']+"?"+ps
236	return url
237
238	def getLinkAmp(self,param=None,val=None):
239	"""link to documentviewer with parameter param set to val"""
240	params=self.REQUEST.form.copy()
241	if param is not None:
242	if val is None:
243	if params.has_key(param):
244	del params[param]
245	else:
246	params[param] = str(val)
247
248	# quote values and assemble into query string
249	logging.info("XYXXXXX: %s"%repr(params.items()))
250	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
251	url=self.REQUEST['URL1']+"?"+ps
252	return url
253
254	def getInfo_xml(self,url,mode):
255	"""returns info about the document as XML"""
256
257	if not self.digilibBaseUrl:
258	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
259
260	docinfo = self.getDocinfo(mode=mode,url=url)
261	pt = getattr(self.template, 'info_xml')
262	return pt(docinfo=docinfo)
263
264
265	def isAccessible(self, docinfo):
266	"""returns if access to the resource is granted"""
267	access = docinfo.get('accessType', None)
268	logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access)
269	if access is not None and access == 'free':
270	logger("documentViewer (accessOK)", logging.INFO, "access is free")
271	return True
272	elif access is None or access in self.authgroups:
273	# only local access -- only logged in users
274	user = getSecurityManager().getUser()
275	if user is not None:
276	#print "user: ", user
277	return (user.getUserName() != "Anonymous User")
278	else:
279	return False
280
281	logger("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access)
282	return False
283
284
285	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
286	"""gibt param von dlInfo aus"""
287	num_retries = 3
288	if docinfo is None:
289	docinfo = {}
290
291	for x in range(cut):
292
293	path=getParentDir(path)
294
295	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
296
297	logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl))
298
299	for cnt in range(num_retries):
300	try:
301	# dom = NonvalidatingReader.parseUri(imageUrl)
302	txt=urllib.urlopen(infoUrl).read()
303	dom = Parse(txt)
304	break
305	except:
306	logger("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt))
307	else:
308	raise IOError("Unable to get dir-info from %s"%(infoUrl))
309
310	sizes=dom.xpath("//dir/size")
311	logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes)
312
313	if sizes:
314	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
315	else:
316	docinfo['numPages'] = 0
317
318	# TODO: produce and keep list of image names and numbers
319
320	return docinfo
321
322
323	def getIndexMeta(self, url):
324	"""returns dom of index.meta document at url"""
325	num_retries = 3
326	dom = None
327	metaUrl = None
328	if url.startswith("http://"):
329	# real URL
330	metaUrl = url
331	else:
332	# online path
333	server=self.digilibBaseUrl+"/servlet/Texter?fn="
334	metaUrl=server+url.replace("/mpiwg/online","")
335	if not metaUrl.endswith("index.meta"):
336	metaUrl += "/index.meta"
337	logging.debug("METAURL: %s"%metaUrl)
338	for cnt in range(num_retries):
339	try:
340	# patch dirk encoding fehler treten dann nicht mehr auf
341	# dom = NonvalidatingReader.parseUri(metaUrl)
342	txt=urllib.urlopen(metaUrl).read()
343	dom = Parse(txt)
344	break
345	except:
346	logger("ERROR documentViewer (getIndexMata)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
347
348	if dom is None:
349	raise IOError("Unable to read index meta from %s"%(url))
350
351	return dom
352
353	def getPresentationInfoXML(self, url):
354	"""returns dom of info.xml document at url"""
355	num_retries = 3
356	dom = None
357	metaUrl = None
358	if url.startswith("http://"):
359	# real URL
360	metaUrl = url
361	else:
362	# online path
363	server=self.digilibBaseUrl+"/servlet/Texter?fn="
364	metaUrl=server+url.replace("/mpiwg/online","")
365
366
367	for cnt in range(num_retries):
368	try:
369	# patch dirk encoding fehler treten dann nicht mehr auf
370	# dom = NonvalidatingReader.parseUri(metaUrl)
371	txt=urllib.urlopen(metaUrl).read()
372	dom = Parse(txt)
373	break
374	except:
375	logger("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
376
377	if dom is None:
378	raise IOError("Unable to read infoXMLfrom %s"%(url))
379
380	return dom
381
382
383	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
384	"""gets authorization info from the index.meta file at path or given by dom"""
385	logger("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path))
386
387	access = None
388
389	if docinfo is None:
390	docinfo = {}
391
392	if dom is None:
393	for x in range(cut):
394	path=getParentDir(path)
395	dom = self.getIndexMeta(path)
396
397	acctype = dom.xpath("//access-conditions/access/@type")
398	if acctype and (len(acctype)>0):
399	access=acctype[0].value
400	if access in ['group', 'institution']:
401	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
402
403	docinfo['accessType'] = access
404	return docinfo
405
406
407	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
408	"""gets bibliographical info from the index.meta file at path or given by dom"""
409	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
410
411	if docinfo is None:
412	docinfo = {}
413
414	if dom is None:
415	for x in range(cut):
416	path=getParentDir(path)
417	dom = self.getIndexMeta(path)
418
419	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
420	# put in all raw bib fields as dict "bib"
421	bib = dom.xpath("//bib/*")
422	if bib and len(bib)>0:
423	bibinfo = {}
424	for e in bib:
425	bibinfo[e.localName] = getTextFromNode(e)
426	docinfo['bib'] = bibinfo
427
428	# extract some fields (author, title, year) according to their mapping
429	metaData=self.metadata.main.meta.bib
430	bibtype=dom.xpath("//bib/@type")
431	if bibtype and (len(bibtype)>0):
432	bibtype=bibtype[0].value
433	else:
434	bibtype="generic"
435
436	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
437	docinfo['bib_type'] = bibtype
438	bibmap=metaData.generateMappingForType(bibtype)
439	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
440	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
441	try:
442	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
443	except: pass
444	try:
445	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
446	except: pass
447	try:
448	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
449	except: pass
450	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
451	try:
452	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
453	except:
454	docinfo['lang']=''
455
456	return docinfo
457
458
459	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
460	"""parse texttool tag in index meta"""
461	logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url))
462	if docinfo is None:
463	docinfo = {}
464
465	if docinfo.get('lang', None) is None:
466	docinfo['lang'] = '' # default keine Sprache gesetzt
467	if dom is None:
468	dom = self.getIndexMeta(url)
469
470	archivePath = None
471	archiveName = None
472
473	archiveNames = dom.xpath("//resource/name")
474	if archiveNames and (len(archiveNames) > 0):
475	archiveName = getTextFromNode(archiveNames[0])
476	else:
477	logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/name missing in: %s" % (url))
478
479	archivePaths = dom.xpath("//resource/archive-path")
480	if archivePaths and (len(archivePaths) > 0):
481	archivePath = getTextFromNode(archivePaths[0])
482	# clean up archive path
483	if archivePath[0] != '/':
484	archivePath = '/' + archivePath
485	if archiveName and (not archivePath.endswith(archiveName)):
486	archivePath += "/" + archiveName
487	else:
488	# try to get archive-path from url
489	logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/archive-path missing in: %s" % (url))
490	if (not url.startswith('http')):
491	archivePath = url.replace('index.meta', '')
492
493	if archivePath is None:
494	# we balk without archive-path
495	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
496
497	imageDirs = dom.xpath("//texttool/image")
498	if imageDirs and (len(imageDirs) > 0):
499	imageDir = getTextFromNode(imageDirs[0])
500
501	else:
502	# we balk with no image tag / not necessary anymore because textmode is now standard
503	#raise IOError("No text-tool info in %s"%(url))
504	imageDir = ""
505	#xquery="//pb"
506	docinfo['imagePath'] = "" # keine Bilder
507	docinfo['imageURL'] = ""
508
509	if imageDir and archivePath:
510	#print "image: ", imageDir, " archivepath: ", archivePath
511	imageDir = os.path.join(archivePath, imageDir)
512	imageDir = imageDir.replace("/mpiwg/online", '')
513	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
514	docinfo['imagePath'] = imageDir
515
516	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
517
518	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
519	if viewerUrls and (len(viewerUrls) > 0):
520	viewerUrl = getTextFromNode(viewerUrls[0])
521	docinfo['viewerURL'] = viewerUrl
522
523	textUrls = dom.xpath("//texttool/text")
524	if textUrls and (len(textUrls) > 0):
525	textUrl = getTextFromNode(textUrls[0])
526	if urlparse.urlparse(textUrl)[0] == "": #keine url
527	textUrl = os.path.join(archivePath, textUrl)
528	# fix URLs starting with /mpiwg/online
529	if textUrl.startswith("/mpiwg/online"):
530	textUrl = textUrl.replace("/mpiwg/online", '', 1)
531
532	docinfo['textURL'] = textUrl
533
534	textUrls = dom.xpath("//texttool/text-url-path")
535	if textUrls and (len(textUrls) > 0):
536	textUrl = getTextFromNode(textUrls[0])
537	docinfo['textURLPath'] = textUrl
538
539	presentationUrls = dom.xpath("//texttool/presentation")
540	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
541
542	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
543	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
544	# durch den relativen Pfad auf die presentation infos
545	presentationPath = getTextFromNode(presentationUrls[0])
546	if url.endswith("index.meta"):
547	presentationUrl = url.replace('index.meta', presentationPath)
548	else:
549	presentationUrl = url + "/" + presentationPath
550	docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht
551	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
552
553	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
554
555	return docinfo
556
557
558
559
560
561	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
562	"""gets the bibliographical information from the preseantion entry in texttools
563	"""
564	dom=self.getPresentationInfoXML(url)
565	try:
566	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
567	except:
568	pass
569	try:
570	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
571	except:
572	pass
573	try:
574	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
575	except:
576	pass
577	return docinfo
578
579	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
580	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
581	logger("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path))
582	if docinfo is None:
583	docinfo = {}
584	path=path.replace("/mpiwg/online","")
585	docinfo['imagePath'] = path
586	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
587
588	pathorig=path
589	for x in range(cut):
590	path=getParentDir(path)
591	logging.error("PATH:"+path)
592	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
593	docinfo['imageURL'] = imageUrl
594
595	#path ist the path to the images it assumes that the index.meta file is one level higher.
596	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
597	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
598	return docinfo
599
600
601	def getDocinfo(self, mode, url):
602	"""returns docinfo depending on mode"""
603	logger("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url))
604	# look for cached docinfo in session
605	if self.REQUEST.SESSION.has_key('docinfo'):
606	docinfo = self.REQUEST.SESSION['docinfo']
607	# check if its still current
608	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
609	logger("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo)
610	return docinfo
611	# new docinfo
612	docinfo = {'mode': mode, 'url': url}
613	if mode=="texttool": #index.meta with texttool information
614	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
615	elif mode=="imagepath":
616	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
617	elif mode=="filepath":
618	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
619	else:
620	logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!")
621	raise ValueError("Unknown mode %s"%(mode))
622
623	logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo)
624	self.REQUEST.SESSION['docinfo'] = docinfo
625	return docinfo
626
627
628	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None):
629	"""returns pageinfo with the given parameters"""
630	pageinfo = {}
631	current = getInt(current)
632	pageinfo['current'] = current
633	rows = int(rows or self.thumbrows)
634	pageinfo['rows'] = rows
635	cols = int(cols or self.thumbcols)
636	pageinfo['cols'] = cols
637	grpsize = cols * rows
638	pageinfo['groupsize'] = grpsize
639	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
640	# int(current / grpsize) * grpsize +1))
641	pageinfo['start'] = start
642	pageinfo['end'] = start + grpsize
643	if docinfo is not None:
644	np = int(docinfo['numPages'])
645	pageinfo['end'] = min(pageinfo['end'], np)
646	pageinfo['numgroups'] = int(np / grpsize)
647	if np % grpsize > 0:
648	pageinfo['numgroups'] += 1
649
650	return pageinfo
651
652
653
654	def getNumPages(self,docinfo=None):
655	"""get list of pages from fulltext and put in docinfo"""
656	xquery = '//pb'
657	text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
658	# TODO: better processing of the page list. do we need the info somewhere else also?
659	docinfo['numPages'] = text.count("<pb ")
660	return docinfo
661
662	def getTextPage(self, mode="text", pn=1, docinfo=None):
663	"""returns single page from fulltext"""
664	pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docinfo['textURLPath'],mode,pn), outputUnicode=False)
665	# post-processing downloaded xml
666	pagedom = Parse(pagexml)
667	# plain text mode
668	if mode == "text":
669	# first div contains text
670	pagedivs = pagedom.xpath("/div")
671	if len(pagedivs) > 0:
672	pagenode = pagedivs[0]
673	return serializeNode(pagenode)
674
675	# text-with-links mode
676	if mode == "textPollux":
677	# first div contains text
678	pagedivs = pagedom.xpath("/div")
679	if len(pagedivs) > 0:
680	pagenode = pagedivs[0]
681	# check all a-tags
682	links = pagenode.xpath("//a")
683	for l in links:
684	hrefNode = l.getAttributeNodeNS(None, u"href")
685	if hrefNode:
686	# is link with href
687	href = hrefNode.nodeValue
688	if href.startswith('lt/lex.xql'):
689	# is pollux link
690	selfurl = self.absolute_url()
691	# change href
692	hrefNode.nodeValue = href.replace('lt/lex.xql','%s/head_main_voc'%selfurl)
693	# add target
694	l.setAttributeNS(None, 'target', '_blank')
695	return serializeNode(pagenode)
696
697	return "no text here"
698
699
700	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None):
701	"""init document viewer"""
702	self.title=title
703	self.digilibBaseUrl = digilibBaseUrl
704	self.thumbrows = thumbrows
705	self.thumbcols = thumbcols
706	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
707	if RESPONSE is not None:
708	RESPONSE.redirect('manage_main')
709
710
711
712	def manage_AddDocumentViewerForm(self):
713	"""add the viewer form"""
714	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
715	return pt()
716
717	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
718	"""add the viewer"""
719	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
720	self._setObject(id,newObj)
721
722	if RESPONSE is not None:
723	RESPONSE.redirect('manage_main')
724
725
726	##
727	## DocumentViewerTemplate class
728	##
729	class DocumentViewerTemplate(ZopePageTemplate):
730	"""Template for document viewer"""
731	meta_type="DocumentViewer Template"
732
733
734	def manage_addDocumentViewerTemplateForm(self):
735	"""Form for adding"""
736	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
737	return pt()
738
739	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
740	REQUEST=None, submit=None):
741	"Add a Page Template with optional file content."
742
743	self._setObject(id, DocumentViewerTemplate(id))
744	ob = getattr(self, id)
745	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
746	logging.info("txt %s:"%txt)
747	ob.pt_edit(txt,"text/html")
748	if title:
749	ob.pt_setTitle(title)
750	try:
751	u = self.DestinationURL()
752	except AttributeError:
753	u = REQUEST['URL1']
754
755	u = "%s/%s" % (u, urllib.quote(id))
756	REQUEST.RESPONSE.redirect(u+'/manage_main')
757	return ''
758
759
760

Note: See TracBrowser for help on using the repository browser.

Download in other formats: