Context Navigation

source: documentViewer/documentViewer.py @ 100:137459e3b378

Last change on this file since 100:137459e3b378 was 100:137459e3b378, checked in by abukhman, 14 years ago
Last update
File size: 42.4 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	from Ft.Xml.Domlette import NonvalidatingReader
10	from Ft.Xml.Domlette import PrettyPrint, Print
11	from Ft.Xml import EMPTY_NAMESPACE, Parse
12
13	from xml.dom.minidom import parse, parseString
14
15
16
17	import Ft.Xml.XPath
18	import cStringIO
19	import xmlrpclib
20	import os.path
21	import sys
22	import cgi
23	import urllib
24	import logging
25	import math
26
27	import urlparse
28	from types import *
29
30	def logger(txt,method,txt2):
31	"""logging"""
32	logging.info(txt+ txt2)
33
34
35	def getInt(number, default=0):
36	"""returns always an int (0 in case of problems)"""
37	try:
38	return int(number)
39	except:
40	return int(default)
41
42	def getTextFromNode(nodename):
43	"""get the cdata content of a node"""
44	if nodename is None:
45	return ""
46	nodelist=nodename.childNodes
47	rc = ""
48	for node in nodelist:
49	if node.nodeType == node.TEXT_NODE:
50	rc = rc + node.data
51	return rc
52
53	def serializeNode(node, encoding='utf-8'):
54	"""returns a string containing node as XML"""
55	buf = cStringIO.StringIO()
56	Print(node, stream=buf, encoding=encoding)
57	s = buf.getvalue()
58	buf.close()
59	return s
60
61
62	def getParentDir(path):
63	"""returns pathname shortened by one"""
64	return '/'.join(path.split('/')[0:-1])
65
66
67	import socket
68
69	def urlopen(url,timeout=2):
70	"""urlopen mit timeout"""
71	socket.setdefaulttimeout(timeout)
72	ret=urllib.urlopen(url)
73	socket.setdefaulttimeout(5)
74	return ret
75
76
77	##
78	## documentViewer class
79	##
80	class documentViewer(Folder):
81	"""document viewer"""
82	#textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?"
83
84	meta_type="Document viewer"
85
86	security=ClassSecurityInfo()
87	manage_options=Folder.manage_options+(
88	{'label':'main config','action':'changeDocumentViewerForm'},
89	)
90
91	# templates and forms
92	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
93	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
94	toc_text = PageTemplateFile('zpt/toc_text', globals())
95	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
96	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
97	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
98	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
99	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
100	head_main = PageTemplateFile('zpt/head_main', globals())
101	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
102	info_xml = PageTemplateFile('zpt/info_xml', globals())
103
104	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
105	security.declareProtected('View management screens','changeDocumentViewerForm')
106	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
107
108
109	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
110	"""init document viewer"""
111	self.id=id
112	self.title=title
113	self.thumbcols = thumbcols
114	self.thumbrows = thumbrows
115	# authgroups is list of authorized groups (delimited by ,)
116	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
117	# create template folder so we can always use template.something
118
119	templateFolder = Folder('template')
120	#self['template'] = templateFolder # Zope-2.12 style
121	self._setObject('template',templateFolder) # old style
122	try:
123	from Products.XMLRpcTools.XMLRpcTools import XMLRpcServerProxy
124	xmlRpcClient = XMLRpcServerProxy(id='fulltextclient', serverUrl=textServerName, use_xmlrpc=False)
125	#templateFolder['fulltextclient'] = xmlRpcClient
126	templateFolder._setObject('fulltextclient',xmlRpcClient)
127	except Exception, e:
128	logging.error("Unable to create XMLRpcTools for fulltextclient: "+str(e))
129	try:
130	from Products.zogiLib.zogiLib import zogiLib
131	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
132	#templateFolder['zogilib'] = zogilib
133	templateFolder._setObject('zogilib',zogilib)
134	except Exception, e:
135	logging.error("Unable to create zogiLib for zogilib: "+str(e))
136
137
138	security.declareProtected('View','thumbs_rss')
139	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
140	'''
141	view it
142	@param mode: defines how to access the document behind url
143	@param url: url which contains display information
144	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
145
146	'''
147	logging.debug("HHHHHHHHHHHHHH:load the rss")
148	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
149
150	if not hasattr(self, 'template'):
151	# create template folder if it doesn't exist
152	self.manage_addFolder('template')
153
154	if not self.digilibBaseUrl:
155	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
156
157	docinfo = self.getDocinfo(mode=mode,url=url)
158	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
159	pt = getattr(self.template, 'thumbs_main_rss')
160
161	if viewMode=="auto": # automodus gewaehlt
162	if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert
163	viewMode="text"
164	else:
165	viewMode="images"
166
167	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
168
169	security.declareProtected('View','index_html')
170	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None):
171	'''
172	view it
173	@param mode: defines how to access the document behind url
174	@param url: url which contains display information
175	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
176	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
177	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
178	'''
179
180	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
181
182	if not hasattr(self, 'template'):
183	# this won't work
184	logging.error("template folder missing!")
185	return "ERROR: template folder missing!"
186
187	if not getattr(self, 'digilibBaseUrl', None):
188	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
189
190	docinfo = self.getDocinfo(mode=mode,url=url)
191
192
193	if tocMode != "thumbs":
194	# get table of contents
195	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
196
197	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
198
199	if viewMode=="auto": # automodus gewaehlt
200	if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert
201	viewMode="text"
202	else:
203	viewMode="images"
204
205	pt = getattr(self.template, 'viewer_main')
206	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
207
208	def generateMarks(self,mk):
209	ret=""
210	if mk is None:
211	return ""
212	if type(mk) is not ListType:
213	mk=[mk]
214	for m in mk:
215	ret+="mk=%s"%m
216	return ret
217
218
219	def findDigilibUrl(self):
220	"""try to get the digilib URL from zogilib"""
221	url = self.template.zogilib.getDLBaseUrl()
222	return url
223
224	def getStyle(self, idx, selected, style=""):
225	"""returns a string with the given style and append 'sel' if path == selected."""
226	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
227	if idx == selected:
228	return style + 'sel'
229	else:
230	return style
231
232	def getLink(self,param=None,val=None):
233	"""link to documentviewer with parameter param set to val"""
234	params=self.REQUEST.form.copy()
235	if param is not None:
236	if val is None:
237	if params.has_key(param):
238	del params[param]
239	else:
240	params[param] = str(val)
241
242	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
243	params["mode"] = "imagepath"
244	params["url"] = getParentDir(params["url"])
245
246	# quote values and assemble into query string
247	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
248	url=self.REQUEST['URL1']+"?"+ps
249	return url
250
251	def getLinkAmp(self,param=None,val=None):
252	"""link to documentviewer with parameter param set to val"""
253	params=self.REQUEST.form.copy()
254	if param is not None:
255	if val is None:
256	if params.has_key(param):
257	del params[param]
258	else:
259	params[param] = str(val)
260
261	# quote values and assemble into query string
262	logging.info("XYXXXXX: %s"%repr(params.items()))
263	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
264	url=self.REQUEST['URL1']+"?"+ps
265	return url
266
267	def getInfo_xml(self,url,mode):
268	"""returns info about the document as XML"""
269
270	if not self.digilibBaseUrl:
271	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
272
273	docinfo = self.getDocinfo(mode=mode,url=url)
274	pt = getattr(self.template, 'info_xml')
275	return pt(docinfo=docinfo)
276
277
278	def isAccessible(self, docinfo):
279	"""returns if access to the resource is granted"""
280	access = docinfo.get('accessType', None)
281	logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access)
282	if access is not None and access == 'free':
283	logger("documentViewer (accessOK)", logging.INFO, "access is free")
284	return True
285	elif access is None or access in self.authgroups:
286	# only local access -- only logged in users
287	user = getSecurityManager().getUser()
288	if user is not None:
289	#print "user: ", user
290	return (user.getUserName() != "Anonymous User")
291	else:
292	return False
293
294	logger("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access)
295	return False
296
297
298	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
299	"""gibt param von dlInfo aus"""
300	num_retries = 3
301	if docinfo is None:
302	docinfo = {}
303
304	for x in range(cut):
305
306	path=getParentDir(path)
307
308	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
309
310	logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl))
311
312	for cnt in range(num_retries):
313	try:
314	# dom = NonvalidatingReader.parseUri(imageUrl)
315	txt=urllib.urlopen(infoUrl).read()
316	dom = Parse(txt)
317	break
318	except:
319	logger("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt))
320	else:
321	raise IOError("Unable to get dir-info from %s"%(infoUrl))
322
323	sizes=dom.xpath("//dir/size")
324	logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes)
325
326	if sizes:
327	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
328	else:
329	docinfo['numPages'] = 0
330
331	# TODO: produce and keep list of image names and numbers
332
333	return docinfo
334
335
336	def getIndexMeta(self, url):
337	"""returns dom of index.meta document at url"""
338	num_retries = 3
339	dom = None
340	metaUrl = None
341	if url.startswith("http://"):
342	# real URL
343	metaUrl = url
344	else:
345	# online path
346	server=self.digilibBaseUrl+"/servlet/Texter?fn="
347	metaUrl=server+url.replace("/mpiwg/online","")
348	if not metaUrl.endswith("index.meta"):
349	metaUrl += "/index.meta"
350	logging.debug("METAURL: %s"%metaUrl)
351	for cnt in range(num_retries):
352	try:
353	# patch dirk encoding fehler treten dann nicht mehr auf
354	# dom = NonvalidatingReader.parseUri(metaUrl)
355	txt=urllib.urlopen(metaUrl).read()
356	dom = Parse(txt)
357	break
358	except:
359	logger("ERROR documentViewer (getIndexMeta)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
360
361	if dom is None:
362	raise IOError("Unable to read index meta from %s"%(url))
363
364	return dom
365
366	def getPresentationInfoXML(self, url):
367	"""returns dom of info.xml document at url"""
368	num_retries = 3
369	dom = None
370	metaUrl = None
371	if url.startswith("http://"):
372	# real URL
373	metaUrl = url
374	else:
375	# online path
376	server=self.digilibBaseUrl+"/servlet/Texter?fn="
377	metaUrl=server+url.replace("/mpiwg/online","")
378
379	for cnt in range(num_retries):
380	try:
381	# patch dirk encoding fehler treten dann nicht mehr auf
382	# dom = NonvalidatingReader.parseUri(metaUrl)
383	txt=urllib.urlopen(metaUrl).read()
384	dom = Parse(txt)
385	break
386	except:
387	logger("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
388
389	if dom is None:
390	raise IOError("Unable to read infoXMLfrom %s"%(url))
391
392	return dom
393
394
395	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
396	"""gets authorization info from the index.meta file at path or given by dom"""
397	logger("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path))
398
399	access = None
400
401	if docinfo is None:
402	docinfo = {}
403
404	if dom is None:
405	for x in range(cut):
406	path=getParentDir(path)
407	dom = self.getIndexMeta(path)
408
409	acctype = dom.xpath("//access-conditions/access/@type")
410	if acctype and (len(acctype)>0):
411	access=acctype[0].value
412	if access in ['group', 'institution']:
413	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
414
415	docinfo['accessType'] = access
416	return docinfo
417
418
419	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
420	"""gets bibliographical info from the index.meta file at path or given by dom"""
421	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
422
423	if docinfo is None:
424	docinfo = {}
425
426	if dom is None:
427	for x in range(cut):
428	path=getParentDir(path)
429	dom = self.getIndexMeta(path)
430
431	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
432	# put in all raw bib fields as dict "bib"
433	bib = dom.xpath("//bib/*")
434	if bib and len(bib)>0:
435	bibinfo = {}
436	for e in bib:
437	bibinfo[e.localName] = getTextFromNode(e)
438	docinfo['bib'] = bibinfo
439
440	# extract some fields (author, title, year) according to their mapping
441	metaData=self.metadata.main.meta.bib
442	bibtype=dom.xpath("//bib/@type")
443	if bibtype and (len(bibtype)>0):
444	bibtype=bibtype[0].value
445	else:
446	bibtype="generic"
447
448	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
449	docinfo['bib_type'] = bibtype
450	bibmap=metaData.generateMappingForType(bibtype)
451	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
452	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
453	try:
454	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
455	except: pass
456	try:
457	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
458	except: pass
459	try:
460	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
461	except: pass
462	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
463	try:
464	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
465	except:
466	docinfo['lang']=''
467
468	return docinfo
469
470
471	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
472	"""parse texttool tag in index meta"""
473	logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url))
474	if docinfo is None:
475	docinfo = {}
476	if docinfo.get('lang', None) is None:
477	docinfo['lang'] = '' # default keine Sprache gesetzt
478	if dom is None:
479	dom = self.getIndexMeta(url)
480
481	archivePath = None
482	archiveName = None
483
484	archiveNames = dom.xpath("//resource/name")
485	if archiveNames and (len(archiveNames) > 0):
486	archiveName = getTextFromNode(archiveNames[0])
487	else:
488	logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/name missing in: %s" % (url))
489
490	archivePaths = dom.xpath("//resource/archive-path")
491	if archivePaths and (len(archivePaths) > 0):
492	archivePath = getTextFromNode(archivePaths[0])
493	# clean up archive path
494	if archivePath[0] != '/':
495	archivePath = '/' + archivePath
496	if archiveName and (not archivePath.endswith(archiveName)):
497	archivePath += "/" + archiveName
498	else:
499	# try to get archive-path from url
500	logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/archive-path missing in: %s" % (url))
501	if (not url.startswith('http')):
502	archivePath = url.replace('index.meta', '')
503
504	if archivePath is None:
505	# we balk without archive-path
506	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
507
508	imageDirs = dom.xpath("//texttool/image")
509	if imageDirs and (len(imageDirs) > 0):
510	imageDir = getTextFromNode(imageDirs[0])
511
512	else:
513	# we balk with no image tag / not necessary anymore because textmode is now standard
514	#raise IOError("No text-tool info in %s"%(url))
515	imageDir = ""
516	#xquery="//pb"
517	docinfo['imagePath'] = "" # keine Bilder
518	docinfo['imageURL'] = ""
519
520	if imageDir and archivePath:
521	#print "image: ", imageDir, " archivepath: ", archivePath
522	imageDir = os.path.join(archivePath, imageDir)
523	imageDir = imageDir.replace("/mpiwg/online", '')
524	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
525	docinfo['imagePath'] = imageDir
526
527	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
528
529	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
530	if viewerUrls and (len(viewerUrls) > 0):
531	viewerUrl = getTextFromNode(viewerUrls[0])
532	docinfo['viewerURL'] = viewerUrl
533
534	textUrls = dom.xpath("//texttool/text")
535	if textUrls and (len(textUrls) > 0):
536	textUrl = getTextFromNode(textUrls[0])
537	if urlparse.urlparse(textUrl)[0] == "": #keine url
538	textUrl = os.path.join(archivePath, textUrl)
539	# fix URLs starting with /mpiwg/online
540	if textUrl.startswith("/mpiwg/online"):
541	textUrl = textUrl.replace("/mpiwg/online", '', 1)
542
543	docinfo['textURL'] = textUrl
544
545	textUrls = dom.xpath("//texttool/text-url-path")
546	if textUrls and (len(textUrls) > 0):
547	textUrl = getTextFromNode(textUrls[0])
548	docinfo['textURLPath'] = textUrl
549
550	presentationUrls = dom.xpath("//texttool/presentation")
551	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
552
553	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
554	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
555	# durch den relativen Pfad auf die presentation infos
556	presentationPath = getTextFromNode(presentationUrls[0])
557	if url.endswith("index.meta"):
558	presentationUrl = url.replace('index.meta', presentationPath)
559	else:
560	presentationUrl = url + "/" + presentationPath
561	docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht
562	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
563
564	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
565
566	return docinfo
567
568
569	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
570	"""gets the bibliographical information from the preseantion entry in texttools
571	"""
572	dom=self.getPresentationInfoXML(url)
573	try:
574	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
575	except:
576	pass
577	try:
578	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
579	except:
580	pass
581	try:
582	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
583	except:
584	pass
585	return docinfo
586
587	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
588	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
589	logger("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path))
590	if docinfo is None:
591	docinfo = {}
592	path=path.replace("/mpiwg/online","")
593	docinfo['imagePath'] = path
594	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
595
596	pathorig=path
597	for x in range(cut):
598	path=getParentDir(path)
599	logging.error("PATH:"+path)
600	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
601	docinfo['imageURL'] = imageUrl
602
603	#path ist the path to the images it assumes that the index.meta file is one level higher.
604	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
605	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
606	return docinfo
607
608
609	def getDocinfo(self, mode, url):
610	"""returns docinfo depending on mode"""
611	logger("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url))
612	# look for cached docinfo in session
613	if self.REQUEST.SESSION.has_key('docinfo'):
614	docinfo = self.REQUEST.SESSION['docinfo']
615	# check if its still current
616	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
617	logger("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo)
618	return docinfo
619	# new docinfo
620	docinfo = {'mode': mode, 'url': url}
621	if mode=="texttool": #index.meta with texttool information
622	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
623	elif mode=="imagepath":
624	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
625	elif mode=="filepath":
626	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
627	else:
628	logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!")
629	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
630
631	logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo)
632	self.REQUEST.SESSION['docinfo'] = docinfo
633	return docinfo
634
635
636	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
637	"""returns pageinfo with the given parameters"""
638	pageinfo = {}
639	current = getInt(current)
640	pageinfo['current'] = current
641	rows = int(rows or self.thumbrows)
642	pageinfo['rows'] = rows
643	cols = int(cols or self.thumbcols)
644	pageinfo['cols'] = cols
645	grpsize = cols * rows
646	pageinfo['groupsize'] = grpsize
647	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
648	# int(current / grpsize) * grpsize +1))
649	pageinfo['start'] = start
650	pageinfo['end'] = start + grpsize
651	if (docinfo is not None) and ('numPages' in docinfo):
652	np = int(docinfo['numPages'])
653	pageinfo['end'] = min(pageinfo['end'], np)
654	pageinfo['numgroups'] = int(np / grpsize)
655	if np % grpsize > 0:
656	pageinfo['numgroups'] += 1
657
658
659	pageinfo['viewMode'] = viewMode
660	pageinfo['tocMode'] = tocMode
661	pageinfo['query'] = self.REQUEST.get('query',' ')
662	pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
663	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
664
665	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
666
667	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
668	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '20')
669	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
670	toc = int (pageinfo['tocPN'])
671	pageinfo['textPages'] =int (toc)
672
673	if 'tocSize_%s'%tocMode in docinfo:
674	tocSize = int(docinfo['tocSize_%s'%tocMode])
675	tocPageSize = int(pageinfo['tocPageSize'])
676	# cached toc
677
678	if tocSize%tocPageSize>0:
679	tocPages=tocSize/tocPageSize+1
680	else:
681	tocPages=tocSize/tocPageSize
682	pageinfo['tocPN'] = min (tocPages,toc)
683
684	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
685	pageinfo['sn'] =self.REQUEST.get('sn','1')
686
687	return pageinfo
688
689	def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None):
690	"""get search list"""
691	docpath = docinfo['textURLPath']
692	pagesize = pageinfo['queryPageSize']
693	pn = pageinfo['searchPN']
694	sn = pageinfo['sn']
695	query =pageinfo['query']
696	queryType =pageinfo['queryType']
697	viewMode= pageinfo['viewMode']
698	tocMode = pageinfo['tocMode']
699	tocPN = pageinfo['tocPN']
700	selfurl = self.absolute_url()
701	page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode) ,outputUnicode=False)
702	pagexml = page.replace('?document=/echo/la/Benedetti_1585.xml','?url=/mpiwg/online/permanent/library/163127KK')
703	#hrefNode.nodeValue =pagexml.replace('mode=text','%s&mode=texttool'%selfurl)
704	pagedom = Parse(pagexml)
705	#logging.debug("documentViewer (gettoc) pagedom: %s"%(pagedom))
706	if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"):
707	pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
708	if len(pagedivs)>0:
709	pagenode=pagedivs[0]
710	links=pagenode.xpath("//a")
711	for l in links:
712	hrefNode = l.getAttributeNodeNS(None, u"href")
713	if hrefNode:
714	href = hrefNode.nodeValue
715	if href.startswith('page-fragment.xql'):
716	selfurl = self.absolute_url()
717	#l.setAttributeNS(None, "span class = 'hit highlight'", "background-color: #77DD77;")
718	pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN))
719	hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
720	l.setAttributeNS(None, "span class = 'hit'", "background-color: #77DD77;")
721	return serializeNode(pagenode)
722
723	if (queryType=="fulltextMorph"):
724	pagedivs = pagedom.xpath("//div[@class='queryResult']")
725
726	if len(pagedivs)>0:
727	pagenode=pagedivs[0]
728	links=pagenode.xpath("//a")
729	for l in links:
730	hrefNode = l.getAttributeNodeNS(None, u"href")
731	if hrefNode:
732	href = hrefNode.nodeValue
733	if href.startswith('page-fragment.xql'):
734	selfurl = self.absolute_url()
735	pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN))
736	hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
737	if href.startswith('../lt/lemma.xql'):
738	selfurl = self.absolute_url()
739	hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl)
740	l.setAttributeNS(None, 'target', '_blank')
741	l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
742	l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
743	return serializeNode(pagenode)
744
745	if (queryType=="ftIndex")or(queryType=="ftIndexMorph"):
746	pagedivs= pagedom.xpath("//div[@class='queryResultPage']")
747	if len(pagedivs)>0:
748	pagenode=pagedivs[0]
749	links=pagenode.xpath("//a")
750	for l in links:
751	hrefNode = l.getAttributeNodeNS(None, u"href")
752	if hrefNode:
753	href = hrefNode.nodeValue
754	hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s'%(viewMode,tocMode,tocPN,pn))
755
756	if href.startswith('../lt/lex.xql'):
757	selfurl = self.absolute_url()
758	hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl)
759	l.setAttributeNS(None, 'target', '_blank')
760	l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
761	l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
762	if href.startswith('../lt/lemma.xql'):
763	selfurl = self.absolute_url()
764	hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl)
765	l.setAttributeNS(None, 'target', '_blank')
766	l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
767	l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
768	return serializeNode(pagenode)
769	return "xexe"
770
771	def getNumPages(self,docinfo=None):
772	"""get list of pages from fulltext and put in docinfo"""
773	xquery = '//pb'
774	text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
775	# TODO: better processing of the page list. do we need the info somewhere else also?
776	docinfo['numPages'] = text.count("<pb ")
777	return docinfo
778
779	def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None,):
780	"""returns single page from fulltext"""
781	docpath = docinfo['textURLPath']
782	if mode == "text_dict":
783	textmode = "textPollux"
784	else:
785	textmode = mode
786
787	#selfurl = self.absolute_url()
788	#viewMode= pageinfo['viewMode']
789	#tocMode = pageinfo['tocMode']
790	#tocPN = pageinfo['tocPN']
791
792	pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn), outputUnicode=False)
793	# post-processing downloaded xml
794	pagedom = Parse(pagexml)
795	# plain text mode
796	if mode == "text":
797	# first div contains text
798	pagedivs = pagedom.xpath("/div")
799	#queryResultPage
800	if len(pagedivs) > 0:
801	pagenode = pagedivs[0]
802	return serializeNode(pagenode)
803	if mode == "xml":
804	# first div contains text
805	pagedivs = pagedom.xpath("/div")
806	if len(pagedivs) > 0:
807	pagenode = pagedivs[0]
808	return serializeNode(pagenode)
809	# text-with-links mode
810	if mode == "text_dict":
811	# first div contains text
812	pagedivs = pagedom.xpath("/div")
813	if len(pagedivs) > 0:
814	pagenode = pagedivs[0]
815	# check all a-tags
816	links = pagenode.xpath("//a")
817	for l in links:
818	hrefNode = l.getAttributeNodeNS(None, u"href")
819	if hrefNode:
820	# is link with href
821	href = hrefNode.nodeValue
822	if href.startswith('lt/lex.xql'):
823	# is pollux link
824	selfurl = self.absolute_url()
825	# change href
826	hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl)
827	# add target
828	l.setAttributeNS(None, 'target', '_blank')
829	l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;")
830	l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
831
832	if href.startswith('lt/lemma.xql'):
833	selfurl = self.absolute_url()
834	hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl)
835	l.setAttributeNS(None, 'target', '_blank')
836	l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;")
837	l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
838	return serializeNode(pagenode)
839
840	return "no text here"
841
842	def getTranslate(self, query=None, language=None):
843	"""translate into another languages"""
844	pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","query=%s&language=%s"%(query,language),outputUnicode=False)
845	return pagexml
846
847	def getLemma(self, lemma=None, language=None):
848	"""simular words lemma """
849	pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","lemma=%s&language=%s"%(lemma,language),outputUnicode=False)
850	return pagexml
851
852	def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1):
853	"""number of"""
854	docpath = docinfo['textURLPath']
855	pagesize = pageinfo['queryPageSize']
856	pn = pageinfo['searchPN']
857	query =pageinfo['query']
858	queryType =pageinfo['queryType']
859
860	tocSearch = 0
861	tocDiv = None
862	pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False)
863
864	pagedom = Parse(pagexml)
865	numdivs = pagedom.xpath("//div[@class='queryResultHits']")
866	tocSearch = int(getTextFromNode(numdivs[0]))
867	tc=int((tocSearch/20)+1)
868	logging.debug("documentViewer (gettoc) tc: %s"%(tc))
869	return tc
870
871	def getToc(self, mode="text", docinfo=None):
872	"""loads table of contents and stores in docinfo"""
873	logging.debug("documentViewer (gettoc) mode: %s"%(mode))
874	if 'tocSize_%s'%mode in docinfo:
875	# cached toc
876	return docinfo
877	docpath = docinfo['textURLPath']
878	# we need to set a result set size
879	pagesize = 1000
880	pn = 1
881	if mode == "text":
882	queryType = "toc"
883	else:
884	queryType = mode
885	# number of entries in toc
886	tocSize = 0
887	tocDiv = None
888	pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
889	# post-processing downloaded xml
890	pagedom = Parse(pagexml)
891	# get number of entries
892	numdivs = pagedom.xpath("//div[@class='queryResultHits']")
893	if len(numdivs) > 0:
894	tocSize = int(getTextFromNode(numdivs[0]))
895	# div contains text
896	#pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
897	#if len(pagedivs) > 0:
898	# tocDiv = pagedivs[0]
899
900	docinfo['tocSize_%s'%mode] = tocSize
901	#docinfo['tocDiv_%s'%mode] = tocDiv
902	return docinfo
903
904	def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None):
905	"""returns single page from the table of contents"""
906	# TODO: this should use the cached TOC
907	if mode == "text":
908	queryType = "toc"
909	else:
910	queryType = mode
911	docpath = docinfo['textURLPath']
912	pagesize = pageinfo['tocPageSize']
913	pn = pageinfo['tocPN']
914
915	selfurl = self.absolute_url()
916	viewMode= pageinfo['viewMode']
917	tocMode = pageinfo['tocMode']
918	tocPN = pageinfo['tocPN']
919
920	pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn), outputUnicode=False)
921	page = pagexml.replace('page-fragment.xql?document=/echo/la/Benedetti_1585.xml','%s?url=/mpiwg/online/permanent/library/163127KK&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl, viewMode, tocMode, tocPN))
922	text = page.replace('mode=image','mode=texttool')
923	return text
924	# post-processing downloaded xml
925	#pagedom = Parse(text)
926	# div contains text
927	#pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
928	#if len(pagedivs) > 0:
929	# pagenode = pagedivs[0]
930	# return serializeNode(pagenode)
931	#else:
932	# return "No TOC!"
933
934
935	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
936	"""init document viewer"""
937	self.title=title
938	self.digilibBaseUrl = digilibBaseUrl
939	self.thumbrows = thumbrows
940	self.thumbcols = thumbcols
941	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
942	if RESPONSE is not None:
943	RESPONSE.redirect('manage_main')
944
945
946
947	def manage_AddDocumentViewerForm(self):
948	"""add the viewer form"""
949	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
950	return pt()
951
952	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
953	"""add the viewer"""
954	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
955	self._setObject(id,newObj)
956
957	if RESPONSE is not None:
958	RESPONSE.redirect('manage_main')
959
960
961	##
962	## DocumentViewerTemplate class
963	##
964	class DocumentViewerTemplate(ZopePageTemplate):
965	"""Template for document viewer"""
966	meta_type="DocumentViewer Template"
967
968
969	def manage_addDocumentViewerTemplateForm(self):
970	"""Form for adding"""
971	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
972	return pt()
973
974	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
975	REQUEST=None, submit=None):
976	"Add a Page Template with optional file content."
977
978	self._setObject(id, DocumentViewerTemplate(id))
979	ob = getattr(self, id)
980	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
981	logging.info("txt %s:"%txt)
982	ob.pt_edit(txt,"text/html")
983	if title:
984	ob.pt_setTitle(title)
985	try:
986	u = self.DestinationURL()
987	except AttributeError:
988	u = REQUEST['URL1']
989
990	u = "%s/%s" % (u, urllib.quote(id))
991	REQUEST.RESPONSE.redirect(u+'/manage_main')
992	return ''
993
994
995

Note: See TracBrowser for help on using the repository browser.

Download in other formats: