Context Navigation

source: documentViewer/documentViewer.py @ 51:c5d3aabbf61b

Last change on this file since 51:c5d3aabbf61b was 51:c5d3aabbf61b, checked in by dwinter, 17 years ago
textviewer now integrated, new modus auto introduced as standard for viewing
File size: 21.9 KB

Line
1
2
3	from OFS.Folder import Folder
4	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
5	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
6	from AccessControl import ClassSecurityInfo
7	from AccessControl import getSecurityManager
8	from Globals import package_home
9
10	from Ft.Xml.Domlette import NonvalidatingReader
11	from Ft.Xml.Domlette import PrettyPrint, Print
12	from Ft.Xml import EMPTY_NAMESPACE, Parse
13
14	import Ft.Xml.XPath
15
16	import os.path
17	import sys
18	import cgi
19	import urllib
20	import logging
21	import zLOG
22	import urlparse
23
24	def getInt(number, default=0):
25	"""returns always an int (0 in case of problems)"""
26	try:
27	return int(number)
28	except:
29	return default
30
31	def getTextFromNode(nodename):
32	"""get the cdata content of a node"""
33	if nodename is None:
34	return ""
35	nodelist=nodename.childNodes
36	rc = ""
37	for node in nodelist:
38	if node.nodeType == node.TEXT_NODE:
39	rc = rc + node.data
40	return rc
41
42
43	def getParentDir(path):
44	"""returns pathname shortened by one"""
45	return '/'.join(path.split('/')[0:-1])
46
47
48	import socket
49
50	def urlopen(url,timeout=2):
51	"""urlopen mit timeout"""
52	socket.setdefaulttimeout(timeout)
53	ret=urllib.urlopen(url)
54	socket.setdefaulttimeout(5)
55	return ret
56
57
58	##
59	## documentViewer class
60	##
61	class documentViewer(Folder):
62	"""document viewer"""
63	#textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?"
64
65	meta_type="Document viewer"
66
67	security=ClassSecurityInfo()
68	manage_options=Folder.manage_options+(
69	{'label':'main config','action':'changeDocumentViewerForm'},
70	)
71
72	# templates and forms
73	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
74	thumbs_main = PageTemplateFile('zpt/thumbs_main', globals())
75	image_main = PageTemplateFile('zpt/image_main', globals())
76	head_main = PageTemplateFile('zpt/head_main', globals())
77	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
78
79	security.declareProtected('View management screens','changeDocumentViewerForm')
80	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
81
82
83	def __init__(self,id,imageViewerUrl,textViewerUrl=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"):
84	"""init document viewer"""
85	self.id=id
86	self.title=title
87	self.imageViewerUrl=imageViewerUrl
88	self.textViewerUrl=textViewerUrl
89
90	if not digilibBaseUrl:
91	self.digilibBaseUrl = self.findDigilibUrl()
92	else:
93	self.digilibBaseUrl = digilibBaseUrl
94	self.thumbcols = thumbcols
95	self.thumbrows = thumbrows
96	# authgroups is list of authorized groups (delimited by ,)
97	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
98	# add template folder so we can always use template.something
99	self.manage_addFolder('template')
100
101
102	security.declareProtected('View','index_html')
103	def index_html(self,mode,url,viewMode="auto",start=None,pn=1):
104	'''
105	view it
106	@param mode: defines which type of document is behind url (text,images or auto)
107	@param url: url which contains display information
108	@param viewMode: if images display images, if text display text, default is images
109
110	'''
111
112	zLOG.LOG("documentViewer (index)", zLOG.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
113
114	if not hasattr(self, 'template'):
115	# create template folder if it doesn't exist
116	self.manage_addFolder('template')
117
118	if not self.digilibBaseUrl:
119	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
120
121	docinfo = self.getDocinfo(mode=mode,url=url)
122	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
123	pt = getattr(self.template, 'viewer_main')
124
125	if viewMode=="auto": # automodus gewaehlt
126	if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert
127	viewMode="text"
128	else:
129	viewMode="images"
130
131	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
132
133
134	def getLink(self,param=None,val=None):
135	"""link to documentviewer with parameter param set to val"""
136	params=self.REQUEST.form.copy()
137	if param is not None:
138	if val is None:
139	if params.has_key(param):
140	del params[param]
141	else:
142	params[param] = str(val)
143
144	# quote values and assemble into query string
145	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
146	url=self.REQUEST['URL1']+"?"+ps
147	return url
148
149
150	def getStyle(self, idx, selected, style=""):
151	"""returns a string with the given style and append 'sel' if path == selected."""
152	#zLOG.LOG("documentViewer (getstyle)", zLOG.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
153	if idx == selected:
154	return style + 'sel'
155	else:
156	return style
157
158
159	def isAccessible(self, docinfo):
160	"""returns if access to the resource is granted"""
161	access = docinfo.get('accessType', None)
162	zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "access type %s"%access)
163	if access is not None and access == 'free':
164	zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "access is free")
165	return True
166	elif access is None or access in self.authgroups:
167	# only local access -- only logged in users
168	user = getSecurityManager().getUser()
169	if user is not None:
170	#print "user: ", user
171	return (user.getUserName() != "Anonymous User")
172	else:
173	return False
174
175	zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "unknown access type %s"%access)
176	return False
177
178
179	def getDirinfoFromDigilib(self,path,docinfo=None):
180	"""gibt param von dlInfo aus"""
181	num_retries = 3
182	if docinfo is None:
183	docinfo = {}
184
185	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
186
187	zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo from %s"%(infoUrl))
188
189	for cnt in range(num_retries):
190	try:
191	# dom = NonvalidatingReader.parseUri(imageUrl)
192	txt=urllib.urlopen(infoUrl).read()
193	dom = Parse(txt)
194	break
195	except:
196	zLOG.LOG("documentViewer (getdirinfofromdigilib)", zLOG.ERROR, "error reading %s (try %d)"%(infoUrl,cnt))
197	else:
198	raise IOError("Unable to get dir-info from %s"%(infoUrl))
199
200	sizes=dom.xpath("//dir/size")
201	zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo:size"%sizes)
202
203	if sizes:
204	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
205	else:
206	docinfo['numPages'] = 0
207
208	return docinfo
209
210
211	def getIndexMeta(self, url):
212	"""returns dom of index.meta document at url"""
213	num_retries = 3
214	dom = None
215	metaUrl = None
216	if url.startswith("http://"):
217	# real URL
218	metaUrl = url
219	else:
220	# online path
221	server=self.digilibBaseUrl+"/servlet/Texter?fn="
222	metaUrl=server+url.replace("/mpiwg/online","")
223	if not metaUrl.endswith("index.meta"):
224	metaUrl += "/index.meta"
225	print metaUrl
226	for cnt in range(num_retries):
227	try:
228	# patch dirk encoding fehler treten dann nicht mehr auf
229	# dom = NonvalidatingReader.parseUri(metaUrl)
230	txt=urllib.urlopen(metaUrl).read()
231	dom = Parse(txt)
232	break
233	except:
234	zLOG.LOG("ERROR documentViewer (getIndexMata)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2])
235
236	if dom is None:
237	raise IOError("Unable to read index meta from %s"%(url))
238
239	return dom
240
241	def getPresentationInfoXML(self, url):
242	"""returns dom of info.xml document at url"""
243	num_retries = 3
244	dom = None
245	metaUrl = None
246	if url.startswith("http://"):
247	# real URL
248	metaUrl = url
249	else:
250	# online path
251	server=self.digilibBaseUrl+"/servlet/Texter?fn="
252	metaUrl=server+url.replace("/mpiwg/online","")
253
254
255	for cnt in range(num_retries):
256	try:
257	# patch dirk encoding fehler treten dann nicht mehr auf
258	# dom = NonvalidatingReader.parseUri(metaUrl)
259	txt=urllib.urlopen(metaUrl).read()
260	dom = Parse(txt)
261	break
262	except:
263	zLOG.LOG("ERROR documentViewer (getPresentationInfoXML)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2])
264
265	if dom is None:
266	raise IOError("Unable to read infoXMLfrom %s"%(url))
267
268	return dom
269
270
271	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None):
272	"""gets authorization info from the index.meta file at path or given by dom"""
273	zLOG.LOG("documentViewer (getauthinfofromindexmeta)", zLOG.INFO,"path: %s"%(path))
274
275	access = None
276
277	if docinfo is None:
278	docinfo = {}
279
280	if dom is None:
281	dom = self.getIndexMeta(getParentDir(path))
282
283	acctype = dom.xpath("//access-conditions/access/@type")
284	if acctype and (len(acctype)>0):
285	access=acctype[0].value
286	if access in ['group', 'institution']:
287	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
288
289	docinfo['accessType'] = access
290	return docinfo
291
292
293	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None):
294	"""gets bibliographical info from the index.meta file at path or given by dom"""
295	zLOG.LOG("documentViewer (getbibinfofromindexmeta)", zLOG.INFO,"path: %s"%(path))
296
297	if docinfo is None:
298	docinfo = {}
299
300	if dom is None:
301	dom = self.getIndexMeta(getParentDir(path))
302
303	metaData=self.metadata.main.meta.bib
304	bibtype=dom.xpath("//bib/@type")
305	if bibtype and (len(bibtype)>0):
306	bibtype=bibtype[0].value
307	else:
308	bibtype="generic"
309	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
310	bibmap=metaData.generateMappingForType(bibtype)
311	#print "bibmap: ", bibmap, " for: ", bibtype
312	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
313	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
314	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
315	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
316	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
317
318	return docinfo
319
320
321	def getDocinfoFromTextTool(self,url,dom=None,docinfo=None):
322	"""parse texttool tag in index meta"""
323	zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.INFO,"url: %s"%(url))
324	if docinfo is None:
325	docinfo = {}
326
327	if dom is None:
328	dom = self.getIndexMeta(url)
329
330	archivePath = None
331	archiveName = None
332
333	archiveNames=dom.xpath("//resource/name")
334	if archiveNames and (len(archiveNames)>0):
335	archiveName=getTextFromNode(archiveNames[0])
336	else:
337	zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.WARNING,"resource/name missing in: %s"%(url))
338
339	archivePaths=dom.xpath("//resource/archive-path")
340	if archivePaths and (len(archivePaths)>0):
341	archivePath=getTextFromNode(archivePaths[0])
342	# clean up archive path
343	if archivePath[0] != '/':
344	archivePath = '/' + archivePath
345	if archiveName and (not archivePath.endswith(archiveName)):
346	archivePath += "/" + archiveName
347	else:
348	# try to get archive-path from url
349	zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.WARNING,"resource/archive-path missing in: %s"%(url))
350	if (not url.startswith('http')):
351	archivePath = url.replace('index.meta', '')
352
353	if archivePath is None:
354	# we balk without archive-path
355	raise IOError("Missing archive-path (for text-tool) in %s"%(url))
356
357	imageDirs=dom.xpath("//texttool/image")
358	if imageDirs and (len(imageDirs)>0):
359	imageDir=getTextFromNode(imageDirs[0])
360	else:
361	# we balk with no image tag
362	raise IOError("No text-tool info in %s"%(url))
363
364	if imageDir and archivePath:
365	#print "image: ", imageDir, " archivepath: ", archivePath
366	imageDir=os.path.join(archivePath,imageDir)
367	imageDir=imageDir.replace("/mpiwg/online",'')
368	docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo)
369	docinfo['imagePath'] = imageDir
370	docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir
371
372	viewerUrls=dom.xpath("//texttool/digiliburlprefix")
373	if viewerUrls and (len(viewerUrls)>0):
374	viewerUrl=getTextFromNode(viewerUrls[0])
375	docinfo['viewerURL'] = viewerUrl
376
377	textUrls=dom.xpath("//texttool/text")
378	if textUrls and (len(textUrls)>0):
379	textUrl=getTextFromNode(textUrls[0])
380	if urlparse.urlparse(textUrl)[0]=="": #keine url
381	textUrl=os.path.join(archivePath,textUrl)
382
383	docinfo['textURL'] = textUrl
384
385
386	presentationUrls=dom.xpath("//texttool/presentation")
387	if presentationUrls and (len(presentationUrls)>0):
388	# presentation url ergiebt sich ersetzen von index.meta in der url der fr die Metadaten
389	# durch den relativen Pfad auf die presentation infos
390	presentationUrl=url.replace('index.meta',getTextFromNode(presentationUrls[0]))
391
392	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl,docinfo=docinfo,dom=dom)
393	else:
394	docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)
395	docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)
396	return docinfo
397
398
399	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
400	"""gets the bibliographical information from the preseantion entry in texttools
401	"""
402	dom=self.getPresentationInfoXML(url)
403	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
404	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
405	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
406	return docinfo
407
408	def getDocinfoFromImagePath(self,path,docinfo=None):
409	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
410	zLOG.LOG("documentViewer (getdocinfofromimagepath)", zLOG.INFO,"path: %s"%(path))
411	if docinfo is None:
412	docinfo = {}
413	path=path.replace("/mpiwg/online","")
414	docinfo['imagePath'] = path
415	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo)
416	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
417	docinfo['imageURL'] = imageUrl
418
419	docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo)
420	docinfo = self.getAuthinfoFromIndexMeta(path,docinfo=docinfo)
421	return docinfo
422
423
424	def getDocinfo(self, mode, url):
425	"""returns docinfo depending on mode"""
426	zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"mode: %s, url: %s"%(mode,url))
427	# look for cached docinfo in session
428	if self.REQUEST.SESSION.has_key('docinfo'):
429	docinfo = self.REQUEST.SESSION['docinfo']
430	# check if its still current
431	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
432	zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo in session: %s"%docinfo)
433	return docinfo
434	# new docinfo
435	docinfo = {'mode': mode, 'url': url}
436	if mode=="texttool": #index.meta with texttool information
437	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
438	elif mode=="imagepath":
439	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
440	else:
441	zLOG.LOG("documentViewer (getdocinfo)", zLOG.ERROR,"unknown mode!")
442	raise ValueError("Unknown mode %s"%(mode))
443
444	zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo: %s"%docinfo)
445	self.REQUEST.SESSION['docinfo'] = docinfo
446	return docinfo
447
448
449	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None):
450	"""returns pageinfo with the given parameters"""
451	pageinfo = {}
452	current = getInt(current)
453	pageinfo['current'] = current
454	rows = int(rows or self.thumbrows)
455	pageinfo['rows'] = rows
456	cols = int(cols or self.thumbcols)
457	pageinfo['cols'] = cols
458	grpsize = cols * rows
459	pageinfo['groupsize'] = grpsize
460	start = getInt(start, default=(int(current / grpsize) * grpsize +1))
461	pageinfo['start'] = start
462	pageinfo['end'] = start + grpsize
463	if docinfo is not None:
464	np = int(docinfo['numPages'])
465	pageinfo['end'] = min(pageinfo['end'], np)
466	pageinfo['numgroups'] = int(np / grpsize)
467	if np % grpsize > 0:
468	pageinfo['numgroups'] += 1
469
470	return pageinfo
471
472	def text(self,mode,url,pn):
473	"""give text"""
474	if mode=="texttool": #index.meta with texttool information
475	(viewerUrl,imagepath,textpath)=parseUrlTextTool(url)
476
477	#print textpath
478	try:
479	dom = NonvalidatingReader.parseUri(textpath)
480	except:
481	return None
482
483	list=[]
484	nodes=dom.xpath("//pb")
485
486	node=nodes[int(pn)-1]
487
488	p=node
489
490	while p.tagName!="p":
491	p=p.parentNode
492
493
494	endNode=nodes[int(pn)]
495
496
497	e=endNode
498
499	while e.tagName!="p":
500	e=e.parentNode
501
502
503	next=node.parentNode
504
505	#sammle s
506	while next and (next!=endNode.parentNode):
507	list.append(next)
508	next=next.nextSibling
509	list.append(endNode.parentNode)
510
511	if p==e:# beide im selben paragraphen
512	pass
513	# else:
514	# next=p
515	# while next!=e:
516	# print next,e
517	# list.append(next)
518	# next=next.nextSibling
519	#
520	# for x in list:
521	# PrettyPrint(x)
522	#
523	# return list
524	#
525
526	def findDigilibUrl(self):
527	"""try to get the digilib URL from zogilib"""
528	url = self.imageViewerUrl[:-1] + "/getScalerUrl"
529	#print urlparse.urlparse(url)[0]
530	#print urlparse.urljoin(self.absolute_url(),url)
531	logging.info("finddigiliburl: %s"%urlparse.urlparse(url)[0])
532	logging.info("finddigiliburl: %s"%urlparse.urljoin(self.absolute_url(),url))
533
534	try:
535	if urlparse.urlparse(url)[0]=='': #relative path
536	url=urlparse.urljoin(self.absolute_url()+"/",url)
537
538	scaler = urlopen(url).read()
539	return scaler.replace("/servlet/Scaler?", "")
540	except:
541	return None
542
543	def changeDocumentViewer(self,imageViewerUrl,textViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None):
544	"""init document viewer"""
545	self.title=title
546	self.imageViewerUrl=imageViewerUrl
547	self.textViewerUrl=textViewerUrl
548	self.digilibBaseUrl = digilibBaseUrl
549	self.thumbrows = thumbrows
550	self.thumbcols = thumbcols
551	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
552	if RESPONSE is not None:
553	RESPONSE.redirect('manage_main')
554
555
556
557
558	# security.declareProtected('View management screens','renameImageForm')
559
560	def manage_AddDocumentViewerForm(self):
561	"""add the viewer form"""
562	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
563	return pt()
564
565	def manage_AddDocumentViewer(self,id,imageViewerUrl="",textViewerUrl="",title="",RESPONSE=None):
566	"""add the viewer"""
567	newObj=documentViewer(id,imageViewerUrl,title=title,textViewerUrl=textViewerUrl)
568	self._setObject(id,newObj)
569
570	if RESPONSE is not None:
571	RESPONSE.redirect('manage_main')
572
573
574	##
575	## DocumentViewerTemplate class
576	##
577	class DocumentViewerTemplate(ZopePageTemplate):
578	"""Template for document viewer"""
579	meta_type="DocumentViewer Template"
580
581
582	def manage_addDocumentViewerTemplateForm(self):
583	"""Form for adding"""
584	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
585	return pt()
586
587	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
588	REQUEST=None, submit=None):
589	"Add a Page Template with optional file content."
590
591	self._setObject(id, DocumentViewerTemplate(id))
592	ob = getattr(self, id)
593	ob.pt_edit(open(os.path.join(package_home(globals()),'zpt/viewer_main.zpt')).read(),None)
594	if title:
595	ob.pt_setTitle(title)
596	try:
597	u = self.DestinationURL()
598	except AttributeError:
599	u = REQUEST['URL1']
600
601	u = "%s/%s" % (u, urllib.quote(id))
602	REQUEST.RESPONSE.redirect(u+'/manage_main')
603	return ''
604
605
606

Note: See TracBrowser for help on using the repository browser.

Download in other formats: