Context Navigation

source: documentViewer/documentViewer.py @ 95:db6d594aa4d9

Last change on this file since 95:db6d594aa4d9 was 95:db6d594aa4d9, checked in by abukhman, 14 years ago
Last update with search function (getSearch)
File size: 34.1 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	from Ft.Xml.Domlette import NonvalidatingReader
10	from Ft.Xml.Domlette import PrettyPrint, Print
11	from Ft.Xml import EMPTY_NAMESPACE, Parse
12
13
14	import Ft.Xml.XPath
15	import cStringIO
16	import xmlrpclib
17	import os.path
18	import sys
19	import cgi
20	import urllib
21	import logging
22	import math
23
24	import urlparse
25	from types import *
26
27	def logger(txt,method,txt2):
28	"""logging"""
29	logging.info(txt+ txt2)
30
31
32	def getInt(number, default=0):
33	"""returns always an int (0 in case of problems)"""
34	try:
35	return int(number)
36	except:
37	return int(default)
38
39	def getTextFromNode(nodename):
40	"""get the cdata content of a node"""
41	if nodename is None:
42	return ""
43	nodelist=nodename.childNodes
44	rc = ""
45	for node in nodelist:
46	if node.nodeType == node.TEXT_NODE:
47	rc = rc + node.data
48	return rc
49
50	def serializeNode(node, encoding='utf-8'):
51	"""returns a string containing node as XML"""
52	buf = cStringIO.StringIO()
53	Print(node, stream=buf, encoding=encoding)
54	s = buf.getvalue()
55	buf.close()
56	return s
57
58
59	def getParentDir(path):
60	"""returns pathname shortened by one"""
61	return '/'.join(path.split('/')[0:-1])
62
63
64	import socket
65
66	def urlopen(url,timeout=2):
67	"""urlopen mit timeout"""
68	socket.setdefaulttimeout(timeout)
69	ret=urllib.urlopen(url)
70	socket.setdefaulttimeout(5)
71	return ret
72
73
74	##
75	## documentViewer class
76	##
77	class documentViewer(Folder):
78	"""document viewer"""
79	#textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?"
80
81	meta_type="Document viewer"
82
83	security=ClassSecurityInfo()
84	manage_options=Folder.manage_options+(
85	{'label':'main config','action':'changeDocumentViewerForm'},
86	)
87
88	# templates and forms
89	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
90	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
91	toc_text = PageTemplateFile('zpt/toc_text', globals())
92	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
93	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
94	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
95	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
96	head_main = PageTemplateFile('zpt/head_main', globals())
97	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
98	info_xml = PageTemplateFile('zpt/info_xml', globals())
99
100	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
101	security.declareProtected('View management screens','changeDocumentViewerForm')
102	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
103
104
105	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
106	"""init document viewer"""
107	self.id=id
108	self.title=title
109	self.thumbcols = thumbcols
110	self.thumbrows = thumbrows
111	# authgroups is list of authorized groups (delimited by ,)
112	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
113	# create template folder so we can always use template.something
114
115	templateFolder = Folder('template')
116	#self['template'] = templateFolder # Zope-2.12 style
117	self._setObject('template',templateFolder) # old style
118	try:
119	from Products.XMLRpcTools.XMLRpcTools import XMLRpcServerProxy
120	xmlRpcClient = XMLRpcServerProxy(id='fulltextclient', serverUrl=textServerName, use_xmlrpc=False)
121	#templateFolder['fulltextclient'] = xmlRpcClient
122	templateFolder._setObject('fulltextclient',xmlRpcClient)
123	except Exception, e:
124	logging.error("Unable to create XMLRpcTools for fulltextclient: "+str(e))
125	try:
126	from Products.zogiLib.zogiLib import zogiLib
127	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
128	#templateFolder['zogilib'] = zogilib
129	templateFolder._setObject('zogilib',zogilib)
130	except Exception, e:
131	logging.error("Unable to create zogiLib for zogilib: "+str(e))
132
133
134	security.declareProtected('View','thumbs_rss')
135	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
136	'''
137	view it
138	@param mode: defines how to access the document behind url
139	@param url: url which contains display information
140	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
141
142	'''
143	logging.debug("HHHHHHHHHHHHHH:load the rss")
144	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
145
146	if not hasattr(self, 'template'):
147	# create template folder if it doesn't exist
148	self.manage_addFolder('template')
149
150	if not self.digilibBaseUrl:
151	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
152
153	docinfo = self.getDocinfo(mode=mode,url=url)
154	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
155	pt = getattr(self.template, 'thumbs_main_rss')
156
157	if viewMode=="auto": # automodus gewaehlt
158	if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert
159	viewMode="text"
160	else:
161	viewMode="images"
162
163	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
164
165	security.declareProtected('View','index_html')
166	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None):
167	'''
168	view it
169	@param mode: defines how to access the document behind url
170	@param url: url which contains display information
171	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
172	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, search)
173	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph)
174	'''
175
176	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
177
178	if not hasattr(self, 'template'):
179	# this won't work
180	logging.error("template folder missing!")
181	return "ERROR: template folder missing!"
182
183	if not getattr(self, 'digilibBaseUrl', None):
184	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
185
186	docinfo = self.getDocinfo(mode=mode,url=url)
187	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
188	if tocMode != "thumbs":
189	# get table of contents
190	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
191
192	if viewMode=="auto": # automodus gewaehlt
193	if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert
194	viewMode="text"
195	else:
196	viewMode="images"
197
198	pt = getattr(self.template, 'viewer_main')
199	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
200
201	def generateMarks(self,mk):
202	ret=""
203	if mk is None:
204	return ""
205	if type(mk) is not ListType:
206	mk=[mk]
207	for m in mk:
208	ret+="mk=%s"%m
209	return ret
210
211
212	def findDigilibUrl(self):
213	"""try to get the digilib URL from zogilib"""
214	url = self.template.zogilib.getDLBaseUrl()
215	return url
216
217	def getStyle(self, idx, selected, style=""):
218	"""returns a string with the given style and append 'sel' if path == selected."""
219	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
220	if idx == selected:
221	return style + 'sel'
222	else:
223	return style
224
225	def getLink(self,param=None,val=None):
226	"""link to documentviewer with parameter param set to val"""
227	params=self.REQUEST.form.copy()
228	if param is not None:
229	if val is None:
230	if params.has_key(param):
231	del params[param]
232	else:
233	params[param] = str(val)
234
235	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
236	params["mode"] = "imagepath"
237	params["url"] = getParentDir(params["url"])
238
239	# quote values and assemble into query string
240	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
241	url=self.REQUEST['URL1']+"?"+ps
242	return url
243
244	def getLinkAmp(self,param=None,val=None):
245	"""link to documentviewer with parameter param set to val"""
246	params=self.REQUEST.form.copy()
247	if param is not None:
248	if val is None:
249	if params.has_key(param):
250	del params[param]
251	else:
252	params[param] = str(val)
253
254	# quote values and assemble into query string
255	logging.info("XYXXXXX: %s"%repr(params.items()))
256	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
257	url=self.REQUEST['URL1']+"?"+ps
258	return url
259
260	def getInfo_xml(self,url,mode):
261	"""returns info about the document as XML"""
262
263	if not self.digilibBaseUrl:
264	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
265
266	docinfo = self.getDocinfo(mode=mode,url=url)
267	pt = getattr(self.template, 'info_xml')
268	return pt(docinfo=docinfo)
269
270
271	def isAccessible(self, docinfo):
272	"""returns if access to the resource is granted"""
273	access = docinfo.get('accessType', None)
274	logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access)
275	if access is not None and access == 'free':
276	logger("documentViewer (accessOK)", logging.INFO, "access is free")
277	return True
278	elif access is None or access in self.authgroups:
279	# only local access -- only logged in users
280	user = getSecurityManager().getUser()
281	if user is not None:
282	#print "user: ", user
283	return (user.getUserName() != "Anonymous User")
284	else:
285	return False
286
287	logger("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access)
288	return False
289
290
291	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
292	"""gibt param von dlInfo aus"""
293	num_retries = 3
294	if docinfo is None:
295	docinfo = {}
296
297	for x in range(cut):
298
299	path=getParentDir(path)
300
301	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
302
303	logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl))
304
305	for cnt in range(num_retries):
306	try:
307	# dom = NonvalidatingReader.parseUri(imageUrl)
308	txt=urllib.urlopen(infoUrl).read()
309	dom = Parse(txt)
310	break
311	except:
312	logger("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt))
313	else:
314	raise IOError("Unable to get dir-info from %s"%(infoUrl))
315
316	sizes=dom.xpath("//dir/size")
317	logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes)
318
319	if sizes:
320	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
321	else:
322	docinfo['numPages'] = 0
323
324	# TODO: produce and keep list of image names and numbers
325
326	return docinfo
327
328
329	def getIndexMeta(self, url):
330	"""returns dom of index.meta document at url"""
331	num_retries = 3
332	dom = None
333	metaUrl = None
334	if url.startswith("http://"):
335	# real URL
336	metaUrl = url
337	else:
338	# online path
339	server=self.digilibBaseUrl+"/servlet/Texter?fn="
340	metaUrl=server+url.replace("/mpiwg/online","")
341	if not metaUrl.endswith("index.meta"):
342	metaUrl += "/index.meta"
343	logging.debug("METAURL: %s"%metaUrl)
344	for cnt in range(num_retries):
345	try:
346	# patch dirk encoding fehler treten dann nicht mehr auf
347	# dom = NonvalidatingReader.parseUri(metaUrl)
348	txt=urllib.urlopen(metaUrl).read()
349	dom = Parse(txt)
350	break
351	except:
352	logger("ERROR documentViewer (getIndexMeta)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
353
354	if dom is None:
355	raise IOError("Unable to read index meta from %s"%(url))
356
357	return dom
358
359	def getPresentationInfoXML(self, url):
360	"""returns dom of info.xml document at url"""
361	num_retries = 3
362	dom = None
363	metaUrl = None
364	if url.startswith("http://"):
365	# real URL
366	metaUrl = url
367	else:
368	# online path
369	server=self.digilibBaseUrl+"/servlet/Texter?fn="
370	metaUrl=server+url.replace("/mpiwg/online","")
371
372	for cnt in range(num_retries):
373	try:
374	# patch dirk encoding fehler treten dann nicht mehr auf
375	# dom = NonvalidatingReader.parseUri(metaUrl)
376	txt=urllib.urlopen(metaUrl).read()
377	dom = Parse(txt)
378	break
379	except:
380	logger("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
381
382	if dom is None:
383	raise IOError("Unable to read infoXMLfrom %s"%(url))
384
385	return dom
386
387
388	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
389	"""gets authorization info from the index.meta file at path or given by dom"""
390	logger("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path))
391
392	access = None
393
394	if docinfo is None:
395	docinfo = {}
396
397	if dom is None:
398	for x in range(cut):
399	path=getParentDir(path)
400	dom = self.getIndexMeta(path)
401
402	acctype = dom.xpath("//access-conditions/access/@type")
403	if acctype and (len(acctype)>0):
404	access=acctype[0].value
405	if access in ['group', 'institution']:
406	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
407
408	docinfo['accessType'] = access
409	return docinfo
410
411
412	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
413	"""gets bibliographical info from the index.meta file at path or given by dom"""
414	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
415
416	if docinfo is None:
417	docinfo = {}
418
419	if dom is None:
420	for x in range(cut):
421	path=getParentDir(path)
422	dom = self.getIndexMeta(path)
423
424	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
425	# put in all raw bib fields as dict "bib"
426	bib = dom.xpath("//bib/*")
427	if bib and len(bib)>0:
428	bibinfo = {}
429	for e in bib:
430	bibinfo[e.localName] = getTextFromNode(e)
431	docinfo['bib'] = bibinfo
432
433	# extract some fields (author, title, year) according to their mapping
434	metaData=self.metadata.main.meta.bib
435	bibtype=dom.xpath("//bib/@type")
436	if bibtype and (len(bibtype)>0):
437	bibtype=bibtype[0].value
438	else:
439	bibtype="generic"
440
441	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
442	docinfo['bib_type'] = bibtype
443	bibmap=metaData.generateMappingForType(bibtype)
444	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
445	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
446	try:
447	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
448	except: pass
449	try:
450	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
451	except: pass
452	try:
453	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
454	except: pass
455	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
456	try:
457	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
458	except:
459	docinfo['lang']=''
460
461	return docinfo
462
463
464	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
465	"""parse texttool tag in index meta"""
466	logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url))
467	if docinfo is None:
468	docinfo = {}
469	if docinfo.get('lang', None) is None:
470	docinfo['lang'] = '' # default keine Sprache gesetzt
471	if dom is None:
472	dom = self.getIndexMeta(url)
473
474	archivePath = None
475	archiveName = None
476
477	archiveNames = dom.xpath("//resource/name")
478	if archiveNames and (len(archiveNames) > 0):
479	archiveName = getTextFromNode(archiveNames[0])
480	else:
481	logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/name missing in: %s" % (url))
482
483	archivePaths = dom.xpath("//resource/archive-path")
484	if archivePaths and (len(archivePaths) > 0):
485	archivePath = getTextFromNode(archivePaths[0])
486	# clean up archive path
487	if archivePath[0] != '/':
488	archivePath = '/' + archivePath
489	if archiveName and (not archivePath.endswith(archiveName)):
490	archivePath += "/" + archiveName
491	else:
492	# try to get archive-path from url
493	logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/archive-path missing in: %s" % (url))
494	if (not url.startswith('http')):
495	archivePath = url.replace('index.meta', '')
496
497	if archivePath is None:
498	# we balk without archive-path
499	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
500
501	imageDirs = dom.xpath("//texttool/image")
502	if imageDirs and (len(imageDirs) > 0):
503	imageDir = getTextFromNode(imageDirs[0])
504
505	else:
506	# we balk with no image tag / not necessary anymore because textmode is now standard
507	#raise IOError("No text-tool info in %s"%(url))
508	imageDir = ""
509	#xquery="//pb"
510	docinfo['imagePath'] = "" # keine Bilder
511	docinfo['imageURL'] = ""
512
513	if imageDir and archivePath:
514	#print "image: ", imageDir, " archivepath: ", archivePath
515	imageDir = os.path.join(archivePath, imageDir)
516	imageDir = imageDir.replace("/mpiwg/online", '')
517	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
518	docinfo['imagePath'] = imageDir
519
520	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
521
522	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
523	if viewerUrls and (len(viewerUrls) > 0):
524	viewerUrl = getTextFromNode(viewerUrls[0])
525	docinfo['viewerURL'] = viewerUrl
526
527	textUrls = dom.xpath("//texttool/text")
528	if textUrls and (len(textUrls) > 0):
529	textUrl = getTextFromNode(textUrls[0])
530	if urlparse.urlparse(textUrl)[0] == "": #keine url
531	textUrl = os.path.join(archivePath, textUrl)
532	# fix URLs starting with /mpiwg/online
533	if textUrl.startswith("/mpiwg/online"):
534	textUrl = textUrl.replace("/mpiwg/online", '', 1)
535
536	docinfo['textURL'] = textUrl
537
538	textUrls = dom.xpath("//texttool/text-url-path")
539	if textUrls and (len(textUrls) > 0):
540	textUrl = getTextFromNode(textUrls[0])
541	docinfo['textURLPath'] = textUrl
542
543	presentationUrls = dom.xpath("//texttool/presentation")
544	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
545
546	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
547	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
548	# durch den relativen Pfad auf die presentation infos
549	presentationPath = getTextFromNode(presentationUrls[0])
550	if url.endswith("index.meta"):
551	presentationUrl = url.replace('index.meta', presentationPath)
552	else:
553	presentationUrl = url + "/" + presentationPath
554	docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht
555	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
556
557	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
558
559	return docinfo
560
561
562	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
563	"""gets the bibliographical information from the preseantion entry in texttools
564	"""
565	dom=self.getPresentationInfoXML(url)
566	try:
567	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
568	except:
569	pass
570	try:
571	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
572	except:
573	pass
574	try:
575	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
576	except:
577	pass
578	return docinfo
579
580	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
581	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
582	logger("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path))
583	if docinfo is None:
584	docinfo = {}
585	path=path.replace("/mpiwg/online","")
586	docinfo['imagePath'] = path
587	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
588
589	pathorig=path
590	for x in range(cut):
591	path=getParentDir(path)
592	logging.error("PATH:"+path)
593	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
594	docinfo['imageURL'] = imageUrl
595
596	#path ist the path to the images it assumes that the index.meta file is one level higher.
597	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
598	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
599	return docinfo
600
601
602	def getDocinfo(self, mode, url):
603	"""returns docinfo depending on mode"""
604	logger("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url))
605	# look for cached docinfo in session
606	if self.REQUEST.SESSION.has_key('docinfo'):
607	docinfo = self.REQUEST.SESSION['docinfo']
608	# check if its still current
609	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
610	logger("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo)
611	return docinfo
612	# new docinfo
613	docinfo = {'mode': mode, 'url': url}
614	if mode=="texttool": #index.meta with texttool information
615	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
616	elif mode=="imagepath":
617	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
618	elif mode=="filepath":
619	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
620	else:
621	logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!")
622	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
623
624	logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo)
625	self.REQUEST.SESSION['docinfo'] = docinfo
626	return docinfo
627
628
629	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
630	"""returns pageinfo with the given parameters"""
631	pageinfo = {}
632	current = getInt(current)
633	pageinfo['current'] = current
634	rows = int(rows or self.thumbrows)
635	pageinfo['rows'] = rows
636	cols = int(cols or self.thumbcols)
637	pageinfo['cols'] = cols
638	grpsize = cols * rows
639	pageinfo['groupsize'] = grpsize
640	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
641	# int(current / grpsize) * grpsize +1))
642	pageinfo['start'] = start
643	pageinfo['end'] = start + grpsize
644	if (docinfo is not None) and ('numPages' in docinfo):
645	np = int(docinfo['numPages'])
646	pageinfo['end'] = min(pageinfo['end'], np)
647	pageinfo['numgroups'] = int(np / grpsize)
648	if np % grpsize > 0:
649	pageinfo['numgroups'] += 1
650
651	pageinfo['viewMode'] = viewMode
652	pageinfo['tocMode'] = tocMode
653	pageinfo['query'] = self.REQUEST.get('query',' ')
654	pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
655	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
656	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
657	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '20')
658	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
659	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
660	pageinfo['sn'] =self.REQUEST.get('sn','1')
661
662	return pageinfo
663
664	def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None):
665	"""get search list"""
666	docpath = docinfo['textURLPath']
667	pagesize = pageinfo['queryPageSize']
668	pn = pageinfo['searchPN']
669	sn = pageinfo['sn']
670	query =pageinfo['query']
671	queryType =pageinfo['queryType']
672
673	pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn) ,outputUnicode=False)
674	pagedom = Parse(pagexml)
675	#pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
676
677	return pagexml
678	#if len(pagedivs) > 0:
679	# pagenode = pagedom[0]
680	# return serializeNode(pagenode)
681	#else:
682	# return "xaxa"
683
684	def getNumPages(self,docinfo=None):
685	"""get list of pages from fulltext and put in docinfo"""
686	xquery = '//pb'
687	text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
688	# TODO: better processing of the page list. do we need the info somewhere else also?
689	docinfo['numPages'] = text.count("<pb ")
690	return docinfo
691
692	def getTextPage(self, mode="text", pn=1, docinfo=None):
693	"""returns single page from fulltext"""
694	docpath = docinfo['textURLPath']
695	if mode == "text_dict":
696	textmode = "textPollux"
697	else:
698	textmode = mode
699
700	pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn), outputUnicode=False)
701	#######
702	#textpython = pagexml.replace('page-fragment.xql?document=/echo/la/Benedetti_1585.xml','?url=/mpiwg/online/permanent/library/163127KK&tocMode='+str(tocMode)+'&queryResultPN='+str(queryResultPN)+'&viewMode='+str(viewMode))
703	#textnew =textpython.replace('mode=text','mode=texttool')
704	#######
705	# post-processing downloaded xml
706	pagedom = Parse(pagexml)
707	# plain text mode
708	if mode == "text":
709	# first div contains text
710	pagedivs = pagedom.xpath("/div")
711	#queryResultPage
712	if len(pagedivs) > 0:
713	pagenode = pagedivs[0]
714	return serializeNode(pagenode)
715
716	# text-with-links mode
717	if mode == "text_dict":
718	# first div contains text
719	pagedivs = pagedom.xpath("/div")
720	if len(pagedivs) > 0:
721	pagenode = pagedivs[0]
722	# check all a-tags
723	links = pagenode.xpath("//a")
724	for l in links:
725	hrefNode = l.getAttributeNodeNS(None, u"href")
726	if hrefNode:
727	# is link with href
728	href = hrefNode.nodeValue
729	if href.startswith('lt/lex.xql'):
730	# is pollux link
731	selfurl = self.absolute_url()
732	# change href
733	hrefNode.nodeValue = href.replace('lt/lex.xql','%s/head_main_voc'%selfurl)
734	# add target
735	l.setAttributeNS(None, 'target', '_blank')
736	return serializeNode(pagenode)
737
738	return "no text here"
739
740	def getToc(self, mode="text", docinfo=None):
741	"""loads table of contents and stores in docinfo"""
742	logging.debug("documentViewer (gettoc) mode: %s"%(mode))
743	if 'tocSize_%s'%mode in docinfo:
744	# cached toc
745	return docinfo
746
747	docpath = docinfo['textURLPath']
748	# we need to set a result set size
749	pagesize = 1000
750	pn = 1
751	if mode == "text":
752	queryType = "toc"
753	else:
754	queryType = mode
755	# number of entries in toc
756	tocSize = 0
757	tocDiv = None
758	pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
759	# post-processing downloaded xml
760	pagedom = Parse(pagexml)
761	# get number of entries
762	numdivs = pagedom.xpath("//div[@class='queryResultHits']")
763	if len(numdivs) > 0:
764	tocSize = int(getTextFromNode(numdivs[0]))
765	# div contains text
766	#pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
767	#if len(pagedivs) > 0:
768	# tocDiv = pagedivs[0]
769
770	docinfo['tocSize_%s'%mode] = tocSize
771	#docinfo['tocDiv_%s'%mode] = tocDiv
772	return docinfo
773
774	def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None):
775	"""returns single page from the table of contents"""
776	# TODO: this should use the cached TOC
777	if mode == "text":
778	queryType = "toc"
779	else:
780	queryType = mode
781	docpath = docinfo['textURLPath']
782	pagesize = pageinfo['tocPageSize']
783	pn = pageinfo['tocPN']
784	pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn), outputUnicode=False)
785	# post-processing downloaded xml
786	pagedom = Parse(pagexml)
787	# div contains text
788	pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
789	if len(pagedivs) > 0:
790	pagenode = pagedivs[0]
791	return serializeNode(pagenode)
792	else:
793	return "No TOC!"
794
795
796	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
797	"""init document viewer"""
798	self.title=title
799	self.digilibBaseUrl = digilibBaseUrl
800	self.thumbrows = thumbrows
801	self.thumbcols = thumbcols
802	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
803	if RESPONSE is not None:
804	RESPONSE.redirect('manage_main')
805
806
807
808	def manage_AddDocumentViewerForm(self):
809	"""add the viewer form"""
810	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
811	return pt()
812
813	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
814	"""add the viewer"""
815	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
816	self._setObject(id,newObj)
817
818	if RESPONSE is not None:
819	RESPONSE.redirect('manage_main')
820
821
822	##
823	## DocumentViewerTemplate class
824	##
825	class DocumentViewerTemplate(ZopePageTemplate):
826	"""Template for document viewer"""
827	meta_type="DocumentViewer Template"
828
829
830	def manage_addDocumentViewerTemplateForm(self):
831	"""Form for adding"""
832	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
833	return pt()
834
835	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
836	REQUEST=None, submit=None):
837	"Add a Page Template with optional file content."
838
839	self._setObject(id, DocumentViewerTemplate(id))
840	ob = getattr(self, id)
841	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
842	logging.info("txt %s:"%txt)
843	ob.pt_edit(txt,"text/html")
844	if title:
845	ob.pt_setTitle(title)
846	try:
847	u = self.DestinationURL()
848	except AttributeError:
849	u = REQUEST['URL1']
850
851	u = "%s/%s" % (u, urllib.quote(id))
852	REQUEST.RESPONSE.redirect(u+'/manage_main')
853	return ''
854
855
856

Note: See TracBrowser for help on using the repository browser.

Download in other formats: