Context Navigation

source: documentViewer/documentViewer.py @ 90:6a4a72033d58

Last change on this file since 90:6a4a72033d58 was 90:6a4a72033d58, checked in by casties, 14 years ago
new version with new full-text infrastructure and some more changed templates
File size: 32.3 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	from Ft.Xml.Domlette import NonvalidatingReader
10	from Ft.Xml.Domlette import PrettyPrint, Print
11	from Ft.Xml import EMPTY_NAMESPACE, Parse
12
13
14	import Ft.Xml.XPath
15	import cStringIO
16	import xmlrpclib
17	import os.path
18	import sys
19	import cgi
20	import urllib
21	import logging
22	import math
23
24	import urlparse
25	from types import *
26
27	def logger(txt,method,txt2):
28	"""logging"""
29	logging.info(txt+ txt2)
30
31
32	def getInt(number, default=0):
33	"""returns always an int (0 in case of problems)"""
34	try:
35	return int(number)
36	except:
37	return int(default)
38
39	def getTextFromNode(nodename):
40	"""get the cdata content of a node"""
41	if nodename is None:
42	return ""
43	nodelist=nodename.childNodes
44	rc = ""
45	for node in nodelist:
46	if node.nodeType == node.TEXT_NODE:
47	rc = rc + node.data
48	return rc
49
50	def serializeNode(node, encoding='utf-8'):
51	"""returns a string containing node as XML"""
52	buf = cStringIO.StringIO()
53	Print(node, stream=buf, encoding=encoding)
54	s = buf.getvalue()
55	buf.close()
56	return s
57
58
59	def getParentDir(path):
60	"""returns pathname shortened by one"""
61	return '/'.join(path.split('/')[0:-1])
62
63
64	import socket
65
66	def urlopen(url,timeout=2):
67	"""urlopen mit timeout"""
68	socket.setdefaulttimeout(timeout)
69	ret=urllib.urlopen(url)
70	socket.setdefaulttimeout(5)
71	return ret
72
73
74	##
75	## documentViewer class
76	##
77	class documentViewer(Folder):
78	"""document viewer"""
79	#textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?"
80
81	meta_type="Document viewer"
82
83	security=ClassSecurityInfo()
84	manage_options=Folder.manage_options+(
85	{'label':'main config','action':'changeDocumentViewerForm'},
86	)
87
88	# templates and forms
89	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
90	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
91	toc_text = PageTemplateFile('zpt/toc_text', globals())
92	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
93	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
94	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
95	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
96	head_main = PageTemplateFile('zpt/head_main', globals())
97	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
98	info_xml = PageTemplateFile('zpt/info_xml', globals())
99
100	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
101	security.declareProtected('View management screens','changeDocumentViewerForm')
102	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
103
104
105	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"):
106	"""init document viewer"""
107	self.id=id
108	self.title=title
109	self.thumbcols = thumbcols
110	self.thumbrows = thumbrows
111	# authgroups is list of authorized groups (delimited by ,)
112	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
113	# create template folder so we can always use template.something
114
115	templateFolder = Folder('template')
116	#self['template'] = templateFolder # Zope-2.12 style
117	self._setObject('template',templateFolder) # old style
118	try:
119	from Products.XMLRpcTools.XMLRpcTools import XMLRpcServerProxy
120	xmlRpcClient = XMLRpcServerProxy(id='fulltextclient', serverUrl=textServerName, use_xmlrpc=False)
121	#templateFolder['fulltextclient'] = xmlRpcClient
122	templateFolder._setObject('fulltextclient',xmlRpcClient)
123	except Exception, e:
124	logging.error("Unable to create XMLRpcTools for fulltextclient: "+str(e))
125	try:
126	from Products.zogiLib.zogiLib import zogiLib
127	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
128	#templateFolder['zogilib'] = zogilib
129	templateFolder._setObject('zogilib',zogilib)
130	except Exception, e:
131	logging.error("Unable to create zogiLib for zogilib: "+str(e))
132
133
134	security.declareProtected('View','thumbs_rss')
135	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
136	'''
137	view it
138	@param mode: defines how to access the document behind url
139	@param url: url which contains display information
140	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
141
142	'''
143	logging.debug("HHHHHHHHHHHHHH:load the rss")
144	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
145
146	if not hasattr(self, 'template'):
147	# create template folder if it doesn't exist
148	self.manage_addFolder('template')
149
150	if not self.digilibBaseUrl:
151	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
152
153	docinfo = self.getDocinfo(mode=mode,url=url)
154	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
155	pt = getattr(self.template, 'thumbs_main_rss')
156
157	if viewMode=="auto": # automodus gewaehlt
158	if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert
159	viewMode="text"
160	else:
161	viewMode="images"
162
163	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
164
165	security.declareProtected('View','index_html')
166	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
167	'''
168	view it
169	@param mode: defines how to access the document behind url
170	@param url: url which contains display information
171	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
172	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures)
173	'''
174
175	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
176
177	if not hasattr(self, 'template'):
178	# this won't work
179	logging.error("template folder missing!")
180	return "ERROR: template folder missing!"
181
182	if not getattr(self, 'digilibBaseUrl', None):
183	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
184
185	docinfo = self.getDocinfo(mode=mode,url=url)
186	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
187	if tocMode != "thumbs":
188	# get table of contents
189	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
190
191	if viewMode=="auto": # automodus gewaehlt
192	if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert
193	viewMode="text"
194	else:
195	viewMode="images"
196
197	pt = getattr(self.template, 'viewer_main')
198	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
199
200	def generateMarks(self,mk):
201	ret=""
202	if mk is None:
203	return ""
204	if type(mk) is not ListType:
205	mk=[mk]
206	for m in mk:
207	ret+="mk=%s"%m
208	return ret
209
210
211	def findDigilibUrl(self):
212	"""try to get the digilib URL from zogilib"""
213	url = self.template.zogilib.getDLBaseUrl()
214	return url
215
216	def getStyle(self, idx, selected, style=""):
217	"""returns a string with the given style and append 'sel' if path == selected."""
218	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
219	if idx == selected:
220	return style + 'sel'
221	else:
222	return style
223
224	def getLink(self,param=None,val=None):
225	"""link to documentviewer with parameter param set to val"""
226	params=self.REQUEST.form.copy()
227	if param is not None:
228	if val is None:
229	if params.has_key(param):
230	del params[param]
231	else:
232	params[param] = str(val)
233
234	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
235	params["mode"] = "imagepath"
236	params["url"] = getParentDir(params["url"])
237
238	# quote values and assemble into query string
239	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
240	url=self.REQUEST['URL1']+"?"+ps
241	return url
242
243	def getLinkAmp(self,param=None,val=None):
244	"""link to documentviewer with parameter param set to val"""
245	params=self.REQUEST.form.copy()
246	if param is not None:
247	if val is None:
248	if params.has_key(param):
249	del params[param]
250	else:
251	params[param] = str(val)
252
253	# quote values and assemble into query string
254	logging.info("XYXXXXX: %s"%repr(params.items()))
255	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
256	url=self.REQUEST['URL1']+"?"+ps
257	return url
258
259	def getInfo_xml(self,url,mode):
260	"""returns info about the document as XML"""
261
262	if not self.digilibBaseUrl:
263	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
264
265	docinfo = self.getDocinfo(mode=mode,url=url)
266	pt = getattr(self.template, 'info_xml')
267	return pt(docinfo=docinfo)
268
269
270	def isAccessible(self, docinfo):
271	"""returns if access to the resource is granted"""
272	access = docinfo.get('accessType', None)
273	logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access)
274	if access is not None and access == 'free':
275	logger("documentViewer (accessOK)", logging.INFO, "access is free")
276	return True
277	elif access is None or access in self.authgroups:
278	# only local access -- only logged in users
279	user = getSecurityManager().getUser()
280	if user is not None:
281	#print "user: ", user
282	return (user.getUserName() != "Anonymous User")
283	else:
284	return False
285
286	logger("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access)
287	return False
288
289
290	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
291	"""gibt param von dlInfo aus"""
292	num_retries = 3
293	if docinfo is None:
294	docinfo = {}
295
296	for x in range(cut):
297
298	path=getParentDir(path)
299
300	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
301
302	logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl))
303
304	for cnt in range(num_retries):
305	try:
306	# dom = NonvalidatingReader.parseUri(imageUrl)
307	txt=urllib.urlopen(infoUrl).read()
308	dom = Parse(txt)
309	break
310	except:
311	logger("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt))
312	else:
313	raise IOError("Unable to get dir-info from %s"%(infoUrl))
314
315	sizes=dom.xpath("//dir/size")
316	logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes)
317
318	if sizes:
319	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
320	else:
321	docinfo['numPages'] = 0
322
323	# TODO: produce and keep list of image names and numbers
324
325	return docinfo
326
327
328	def getIndexMeta(self, url):
329	"""returns dom of index.meta document at url"""
330	num_retries = 3
331	dom = None
332	metaUrl = None
333	if url.startswith("http://"):
334	# real URL
335	metaUrl = url
336	else:
337	# online path
338	server=self.digilibBaseUrl+"/servlet/Texter?fn="
339	metaUrl=server+url.replace("/mpiwg/online","")
340	if not metaUrl.endswith("index.meta"):
341	metaUrl += "/index.meta"
342	logging.debug("METAURL: %s"%metaUrl)
343	for cnt in range(num_retries):
344	try:
345	# patch dirk encoding fehler treten dann nicht mehr auf
346	# dom = NonvalidatingReader.parseUri(metaUrl)
347	txt=urllib.urlopen(metaUrl).read()
348	dom = Parse(txt)
349	break
350	except:
351	logger("ERROR documentViewer (getIndexMeta)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
352
353	if dom is None:
354	raise IOError("Unable to read index meta from %s"%(url))
355
356	return dom
357
358	def getPresentationInfoXML(self, url):
359	"""returns dom of info.xml document at url"""
360	num_retries = 3
361	dom = None
362	metaUrl = None
363	if url.startswith("http://"):
364	# real URL
365	metaUrl = url
366	else:
367	# online path
368	server=self.digilibBaseUrl+"/servlet/Texter?fn="
369	metaUrl=server+url.replace("/mpiwg/online","")
370
371	for cnt in range(num_retries):
372	try:
373	# patch dirk encoding fehler treten dann nicht mehr auf
374	# dom = NonvalidatingReader.parseUri(metaUrl)
375	txt=urllib.urlopen(metaUrl).read()
376	dom = Parse(txt)
377	break
378	except:
379	logger("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
380
381	if dom is None:
382	raise IOError("Unable to read infoXMLfrom %s"%(url))
383
384	return dom
385
386
387	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
388	"""gets authorization info from the index.meta file at path or given by dom"""
389	logger("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path))
390
391	access = None
392
393	if docinfo is None:
394	docinfo = {}
395
396	if dom is None:
397	for x in range(cut):
398	path=getParentDir(path)
399	dom = self.getIndexMeta(path)
400
401	acctype = dom.xpath("//access-conditions/access/@type")
402	if acctype and (len(acctype)>0):
403	access=acctype[0].value
404	if access in ['group', 'institution']:
405	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
406
407	docinfo['accessType'] = access
408	return docinfo
409
410
411	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
412	"""gets bibliographical info from the index.meta file at path or given by dom"""
413	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
414
415	if docinfo is None:
416	docinfo = {}
417
418	if dom is None:
419	for x in range(cut):
420	path=getParentDir(path)
421	dom = self.getIndexMeta(path)
422
423	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
424	# put in all raw bib fields as dict "bib"
425	bib = dom.xpath("//bib/*")
426	if bib and len(bib)>0:
427	bibinfo = {}
428	for e in bib:
429	bibinfo[e.localName] = getTextFromNode(e)
430	docinfo['bib'] = bibinfo
431
432	# extract some fields (author, title, year) according to their mapping
433	metaData=self.metadata.main.meta.bib
434	bibtype=dom.xpath("//bib/@type")
435	if bibtype and (len(bibtype)>0):
436	bibtype=bibtype[0].value
437	else:
438	bibtype="generic"
439
440	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
441	docinfo['bib_type'] = bibtype
442	bibmap=metaData.generateMappingForType(bibtype)
443	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
444	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
445	try:
446	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
447	except: pass
448	try:
449	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
450	except: pass
451	try:
452	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
453	except: pass
454	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
455	try:
456	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
457	except:
458	docinfo['lang']=''
459
460	return docinfo
461
462
463	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
464	"""parse texttool tag in index meta"""
465	logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url))
466	if docinfo is None:
467	docinfo = {}
468	if docinfo.get('lang', None) is None:
469	docinfo['lang'] = '' # default keine Sprache gesetzt
470	if dom is None:
471	dom = self.getIndexMeta(url)
472
473	archivePath = None
474	archiveName = None
475
476	archiveNames = dom.xpath("//resource/name")
477	if archiveNames and (len(archiveNames) > 0):
478	archiveName = getTextFromNode(archiveNames[0])
479	else:
480	logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/name missing in: %s" % (url))
481
482	archivePaths = dom.xpath("//resource/archive-path")
483	if archivePaths and (len(archivePaths) > 0):
484	archivePath = getTextFromNode(archivePaths[0])
485	# clean up archive path
486	if archivePath[0] != '/':
487	archivePath = '/' + archivePath
488	if archiveName and (not archivePath.endswith(archiveName)):
489	archivePath += "/" + archiveName
490	else:
491	# try to get archive-path from url
492	logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/archive-path missing in: %s" % (url))
493	if (not url.startswith('http')):
494	archivePath = url.replace('index.meta', '')
495
496	if archivePath is None:
497	# we balk without archive-path
498	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
499
500	imageDirs = dom.xpath("//texttool/image")
501	if imageDirs and (len(imageDirs) > 0):
502	imageDir = getTextFromNode(imageDirs[0])
503
504	else:
505	# we balk with no image tag / not necessary anymore because textmode is now standard
506	#raise IOError("No text-tool info in %s"%(url))
507	imageDir = ""
508	#xquery="//pb"
509	docinfo['imagePath'] = "" # keine Bilder
510	docinfo['imageURL'] = ""
511
512	if imageDir and archivePath:
513	#print "image: ", imageDir, " archivepath: ", archivePath
514	imageDir = os.path.join(archivePath, imageDir)
515	imageDir = imageDir.replace("/mpiwg/online", '')
516	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
517	docinfo['imagePath'] = imageDir
518
519	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
520
521	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
522	if viewerUrls and (len(viewerUrls) > 0):
523	viewerUrl = getTextFromNode(viewerUrls[0])
524	docinfo['viewerURL'] = viewerUrl
525
526	textUrls = dom.xpath("//texttool/text")
527	if textUrls and (len(textUrls) > 0):
528	textUrl = getTextFromNode(textUrls[0])
529	if urlparse.urlparse(textUrl)[0] == "": #keine url
530	textUrl = os.path.join(archivePath, textUrl)
531	# fix URLs starting with /mpiwg/online
532	if textUrl.startswith("/mpiwg/online"):
533	textUrl = textUrl.replace("/mpiwg/online", '', 1)
534
535	docinfo['textURL'] = textUrl
536
537	textUrls = dom.xpath("//texttool/text-url-path")
538	if textUrls and (len(textUrls) > 0):
539	textUrl = getTextFromNode(textUrls[0])
540	docinfo['textURLPath'] = textUrl
541
542	presentationUrls = dom.xpath("//texttool/presentation")
543	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
544
545	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
546	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
547	# durch den relativen Pfad auf die presentation infos
548	presentationPath = getTextFromNode(presentationUrls[0])
549	if url.endswith("index.meta"):
550	presentationUrl = url.replace('index.meta', presentationPath)
551	else:
552	presentationUrl = url + "/" + presentationPath
553	docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht
554	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
555
556	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
557
558	return docinfo
559
560
561	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
562	"""gets the bibliographical information from the preseantion entry in texttools
563	"""
564	dom=self.getPresentationInfoXML(url)
565	try:
566	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
567	except:
568	pass
569	try:
570	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
571	except:
572	pass
573	try:
574	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
575	except:
576	pass
577	return docinfo
578
579	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
580	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
581	logger("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path))
582	if docinfo is None:
583	docinfo = {}
584	path=path.replace("/mpiwg/online","")
585	docinfo['imagePath'] = path
586	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
587
588	pathorig=path
589	for x in range(cut):
590	path=getParentDir(path)
591	logging.error("PATH:"+path)
592	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
593	docinfo['imageURL'] = imageUrl
594
595	#path ist the path to the images it assumes that the index.meta file is one level higher.
596	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
597	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
598	return docinfo
599
600
601	def getDocinfo(self, mode, url):
602	"""returns docinfo depending on mode"""
603	logger("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url))
604	# look for cached docinfo in session
605	if self.REQUEST.SESSION.has_key('docinfo'):
606	docinfo = self.REQUEST.SESSION['docinfo']
607	# check if its still current
608	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
609	logger("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo)
610	return docinfo
611	# new docinfo
612	docinfo = {'mode': mode, 'url': url}
613	if mode=="texttool": #index.meta with texttool information
614	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
615	elif mode=="imagepath":
616	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
617	elif mode=="filepath":
618	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
619	else:
620	logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!")
621	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
622
623	logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo)
624	self.REQUEST.SESSION['docinfo'] = docinfo
625	return docinfo
626
627
628	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
629	"""returns pageinfo with the given parameters"""
630	pageinfo = {}
631	current = getInt(current)
632	pageinfo['current'] = current
633	rows = int(rows or self.thumbrows)
634	pageinfo['rows'] = rows
635	cols = int(cols or self.thumbcols)
636	pageinfo['cols'] = cols
637	grpsize = cols * rows
638	pageinfo['groupsize'] = grpsize
639	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
640	# int(current / grpsize) * grpsize +1))
641	pageinfo['start'] = start
642	pageinfo['end'] = start + grpsize
643	if (docinfo is not None) and ('numPages' in docinfo):
644	np = int(docinfo['numPages'])
645	pageinfo['end'] = min(pageinfo['end'], np)
646	pageinfo['numgroups'] = int(np / grpsize)
647	if np % grpsize > 0:
648	pageinfo['numgroups'] += 1
649
650	pageinfo['viewMode'] = viewMode
651	pageinfo['tocMode'] = tocMode
652	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '10')
653	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
654
655	return pageinfo
656
657
658
659	def getNumPages(self,docinfo=None):
660	"""get list of pages from fulltext and put in docinfo"""
661	xquery = '//pb'
662	text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
663	# TODO: better processing of the page list. do we need the info somewhere else also?
664	docinfo['numPages'] = text.count("<pb ")
665	return docinfo
666
667	def getTextPage(self, mode="text", pn=1, docinfo=None):
668	"""returns single page from fulltext"""
669	docpath = docinfo['textURLPath']
670	if mode == "text_dict":
671	textmode = "textPollux"
672	else:
673	textmode = mode
674
675	pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn), outputUnicode=False)
676	# post-processing downloaded xml
677	pagedom = Parse(pagexml)
678	# plain text mode
679	if mode == "text":
680	# first div contains text
681	pagedivs = pagedom.xpath("/div")
682	if len(pagedivs) > 0:
683	pagenode = pagedivs[0]
684	return serializeNode(pagenode)
685
686	# text-with-links mode
687	if mode == "text_dict":
688	# first div contains text
689	pagedivs = pagedom.xpath("/div")
690	if len(pagedivs) > 0:
691	pagenode = pagedivs[0]
692	# check all a-tags
693	links = pagenode.xpath("//a")
694	for l in links:
695	hrefNode = l.getAttributeNodeNS(None, u"href")
696	if hrefNode:
697	# is link with href
698	href = hrefNode.nodeValue
699	if href.startswith('lt/lex.xql'):
700	# is pollux link
701	selfurl = self.absolute_url()
702	# change href
703	hrefNode.nodeValue = href.replace('lt/lex.xql','%s/head_main_voc'%selfurl)
704	# add target
705	l.setAttributeNS(None, 'target', '_blank')
706	return serializeNode(pagenode)
707
708	return "no text here"
709
710	def getToc(self, mode="text", docinfo=None):
711	"""loads table of contents and stores in docinfo"""
712	logging.debug("documentViewer (gettoc) mode: %s"%(mode))
713	if 'tocSize_%s'%mode in docinfo:
714	# cached toc
715	return docinfo
716
717	docpath = docinfo['textURLPath']
718	# we need to set a result set size
719	pagesize = 1000
720	pn = 1
721	if mode == "text":
722	queryType = "toc"
723	else:
724	queryType = mode
725	# number of entries in toc
726	tocSize = 0
727	tocDiv = None
728	pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
729	# post-processing downloaded xml
730	pagedom = Parse(pagexml)
731	# get number of entries
732	numdivs = pagedom.xpath("//div[@class='queryResultHits']")
733	if len(numdivs) > 0:
734	tocSize = int(getTextFromNode(numdivs[0]))
735	# div contains text
736	#pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
737	#if len(pagedivs) > 0:
738	# tocDiv = pagedivs[0]
739
740	docinfo['tocSize_%s'%mode] = tocSize
741	#docinfo['tocDiv_%s'%mode] = tocDiv
742	return docinfo
743
744	def getTocPage(self, mode="toc", pn=1, pageinfo=None, docinfo=None):
745	"""returns single page from the table of contents"""
746	# TODO: this should use the cached TOC
747	if mode == "text":
748	queryType = "toc"
749	else:
750	queryType = mode
751	docpath = docinfo['textURLPath']
752	pagesize = pageinfo['tocPageSize']
753	pn = pageinfo['tocPN']
754	pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
755	# post-processing downloaded xml
756	pagedom = Parse(pagexml)
757	# div contains text
758	pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
759	if len(pagedivs) > 0:
760	pagenode = pagedivs[0]
761	return serializeNode(pagenode)
762	else:
763	return "No TOC!"
764
765
766	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None):
767	"""init document viewer"""
768	self.title=title
769	self.digilibBaseUrl = digilibBaseUrl
770	self.thumbrows = thumbrows
771	self.thumbcols = thumbcols
772	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
773	if RESPONSE is not None:
774	RESPONSE.redirect('manage_main')
775
776
777
778	def manage_AddDocumentViewerForm(self):
779	"""add the viewer form"""
780	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
781	return pt()
782
783	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
784	"""add the viewer"""
785	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
786	self._setObject(id,newObj)
787
788	if RESPONSE is not None:
789	RESPONSE.redirect('manage_main')
790
791
792	##
793	## DocumentViewerTemplate class
794	##
795	class DocumentViewerTemplate(ZopePageTemplate):
796	"""Template for document viewer"""
797	meta_type="DocumentViewer Template"
798
799
800	def manage_addDocumentViewerTemplateForm(self):
801	"""Form for adding"""
802	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
803	return pt()
804
805	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
806	REQUEST=None, submit=None):
807	"Add a Page Template with optional file content."
808
809	self._setObject(id, DocumentViewerTemplate(id))
810	ob = getattr(self, id)
811	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
812	logging.info("txt %s:"%txt)
813	ob.pt_edit(txt,"text/html")
814	if title:
815	ob.pt_setTitle(title)
816	try:
817	u = self.DestinationURL()
818	except AttributeError:
819	u = REQUEST['URL1']
820
821	u = "%s/%s" % (u, urllib.quote(id))
822	REQUEST.RESPONSE.redirect(u+'/manage_main')
823	return ''
824
825
826

Note: See TracBrowser for help on using the repository browser.

Download in other formats: