Context Navigation

source: documentViewer/documentViewer.py @ 50:6c0f20cecc60

Last change on this file since 50:6c0f20cecc60 was 50:6c0f20cecc60, checked in by dwinter, 17 years ago
added evaluation of the presentation/info.xml in texttools
File size: 21.7 KB

Line
1
2
3	from OFS.Folder import Folder
4	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
5	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
6	from AccessControl import ClassSecurityInfo
7	from AccessControl import getSecurityManager
8	from Globals import package_home
9
10	from Ft.Xml.Domlette import NonvalidatingReader
11	from Ft.Xml.Domlette import PrettyPrint, Print
12	from Ft.Xml import EMPTY_NAMESPACE, Parse
13
14	import Ft.Xml.XPath
15
16	import os.path
17	import sys
18	import cgi
19	import urllib
20	import logging
21	import zLOG
22	import urlparse
23
24	def getInt(number, default=0):
25	"""returns always an int (0 in case of problems)"""
26	try:
27	return int(number)
28	except:
29	return default
30
31	def getTextFromNode(nodename):
32	"""get the cdata content of a node"""
33	if nodename is None:
34	return ""
35	nodelist=nodename.childNodes
36	rc = ""
37	for node in nodelist:
38	if node.nodeType == node.TEXT_NODE:
39	rc = rc + node.data
40	return rc
41
42
43	def getParentDir(path):
44	"""returns pathname shortened by one"""
45	return '/'.join(path.split('/')[0:-1])
46
47
48	import socket
49
50	def urlopen(url,timeout=2):
51	"""urlopen mit timeout"""
52	socket.setdefaulttimeout(timeout)
53	ret=urllib.urlopen(url)
54	socket.setdefaulttimeout(5)
55	return ret
56
57
58	##
59	## documentViewer class
60	##
61	class documentViewer(Folder):
62	"""document viewer"""
63	#textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?"
64
65	meta_type="Document viewer"
66
67	security=ClassSecurityInfo()
68	manage_options=Folder.manage_options+(
69	{'label':'main config','action':'changeDocumentViewerForm'},
70	)
71
72	# templates and forms
73	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
74	thumbs_main = PageTemplateFile('zpt/thumbs_main', globals())
75	image_main = PageTemplateFile('zpt/image_main', globals())
76	head_main = PageTemplateFile('zpt/head_main', globals())
77	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
78
79	security.declareProtected('View management screens','changeDocumentViewerForm')
80	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
81
82
83	def __init__(self,id,imageViewerUrl,textViewerUrl=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"):
84	"""init document viewer"""
85	self.id=id
86	self.title=title
87	self.imageViewerUrl=imageViewerUrl
88	self.textViewerUrl=textViewerUrl
89
90	if not digilibBaseUrl:
91	self.digilibBaseUrl = self.findDigilibUrl()
92	else:
93	self.digilibBaseUrl = digilibBaseUrl
94	self.thumbcols = thumbcols
95	self.thumbrows = thumbrows
96	# authgroups is list of authorized groups (delimited by ,)
97	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
98	# add template folder so we can always use template.something
99	self.manage_addFolder('template')
100
101
102	security.declareProtected('View','index_html')
103	def index_html(self,mode,url,viewMode="images",start=None,pn=1):
104	'''
105	view it
106	@param mode: defines which type of document is behind url
107	@param url: url which contains display information
108	@param viewMode: if images display images, if text display text, default is images
109
110	'''
111
112	zLOG.LOG("documentViewer (index)", zLOG.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
113
114	if not hasattr(self, 'template'):
115	# create template folder if it doesn't exist
116	self.manage_addFolder('template')
117
118	if not self.digilibBaseUrl:
119	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
120
121	docinfo = self.getDocinfo(mode=mode,url=url)
122	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
123	pt = getattr(self.template, 'viewer_main')
124	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
125
126
127	def getLink(self,param=None,val=None):
128	"""link to documentviewer with parameter param set to val"""
129	params=self.REQUEST.form.copy()
130	if param is not None:
131	if val is None:
132	if params.has_key(param):
133	del params[param]
134	else:
135	params[param] = str(val)
136
137	# quote values and assemble into query string
138	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
139	url=self.REQUEST['URL1']+"?"+ps
140	return url
141
142
143	def getStyle(self, idx, selected, style=""):
144	"""returns a string with the given style and append 'sel' if path == selected."""
145	#zLOG.LOG("documentViewer (getstyle)", zLOG.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
146	if idx == selected:
147	return style + 'sel'
148	else:
149	return style
150
151
152	def isAccessible(self, docinfo):
153	"""returns if access to the resource is granted"""
154	access = docinfo.get('accessType', None)
155	zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "access type %s"%access)
156	if access is not None and access == 'free':
157	zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "access is free")
158	return True
159	elif access is None or access in self.authgroups:
160	# only local access -- only logged in users
161	user = getSecurityManager().getUser()
162	if user is not None:
163	#print "user: ", user
164	return (user.getUserName() != "Anonymous User")
165	else:
166	return False
167
168	zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "unknown access type %s"%access)
169	return False
170
171
172	def getDirinfoFromDigilib(self,path,docinfo=None):
173	"""gibt param von dlInfo aus"""
174	num_retries = 3
175	if docinfo is None:
176	docinfo = {}
177
178	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
179
180	zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo from %s"%(infoUrl))
181
182	for cnt in range(num_retries):
183	try:
184	# dom = NonvalidatingReader.parseUri(imageUrl)
185	txt=urllib.urlopen(infoUrl).read()
186	dom = Parse(txt)
187	break
188	except:
189	zLOG.LOG("documentViewer (getdirinfofromdigilib)", zLOG.ERROR, "error reading %s (try %d)"%(infoUrl,cnt))
190	else:
191	raise IOError("Unable to get dir-info from %s"%(infoUrl))
192
193	sizes=dom.xpath("//dir/size")
194	zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo:size"%sizes)
195
196	if sizes:
197	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
198	else:
199	docinfo['numPages'] = 0
200
201	return docinfo
202
203
204	def getIndexMeta(self, url):
205	"""returns dom of index.meta document at url"""
206	num_retries = 3
207	dom = None
208	metaUrl = None
209	if url.startswith("http://"):
210	# real URL
211	metaUrl = url
212	else:
213	# online path
214	server=self.digilibBaseUrl+"/servlet/Texter?fn="
215	metaUrl=server+url.replace("/mpiwg/online","")
216	if not metaUrl.endswith("index.meta"):
217	metaUrl += "/index.meta"
218	print metaUrl
219	for cnt in range(num_retries):
220	try:
221	# patch dirk encoding fehler treten dann nicht mehr auf
222	# dom = NonvalidatingReader.parseUri(metaUrl)
223	txt=urllib.urlopen(metaUrl).read()
224	dom = Parse(txt)
225	break
226	except:
227	zLOG.LOG("ERROR documentViewer (getIndexMata)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2])
228
229	if dom is None:
230	raise IOError("Unable to read index meta from %s"%(url))
231
232	return dom
233
234	def getPresentationInfoXML(self, url):
235	"""returns dom of info.xml document at url"""
236	num_retries = 3
237	dom = None
238	metaUrl = None
239	if url.startswith("http://"):
240	# real URL
241	metaUrl = url
242	else:
243	# online path
244	server=self.digilibBaseUrl+"/servlet/Texter?fn="
245	metaUrl=server+url.replace("/mpiwg/online","")
246
247
248	for cnt in range(num_retries):
249	try:
250	# patch dirk encoding fehler treten dann nicht mehr auf
251	# dom = NonvalidatingReader.parseUri(metaUrl)
252	txt=urllib.urlopen(metaUrl).read()
253	dom = Parse(txt)
254	break
255	except:
256	zLOG.LOG("ERROR documentViewer (getPresentationInfoXML)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2])
257
258	if dom is None:
259	raise IOError("Unable to read infoXMLfrom %s"%(url))
260
261	return dom
262
263
264	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None):
265	"""gets authorization info from the index.meta file at path or given by dom"""
266	zLOG.LOG("documentViewer (getauthinfofromindexmeta)", zLOG.INFO,"path: %s"%(path))
267
268	access = None
269
270	if docinfo is None:
271	docinfo = {}
272
273	if dom is None:
274	dom = self.getIndexMeta(getParentDir(path))
275
276	acctype = dom.xpath("//access-conditions/access/@type")
277	if acctype and (len(acctype)>0):
278	access=acctype[0].value
279	if access in ['group', 'institution']:
280	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
281
282	docinfo['accessType'] = access
283	return docinfo
284
285
286	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None):
287	"""gets bibliographical info from the index.meta file at path or given by dom"""
288	zLOG.LOG("documentViewer (getbibinfofromindexmeta)", zLOG.INFO,"path: %s"%(path))
289
290	if docinfo is None:
291	docinfo = {}
292
293	if dom is None:
294	dom = self.getIndexMeta(getParentDir(path))
295
296	metaData=self.metadata.main.meta.bib
297	bibtype=dom.xpath("//bib/@type")
298	if bibtype and (len(bibtype)>0):
299	bibtype=bibtype[0].value
300	else:
301	bibtype="generic"
302	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
303	bibmap=metaData.generateMappingForType(bibtype)
304	#print "bibmap: ", bibmap, " for: ", bibtype
305	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
306	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
307	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
308	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
309	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
310
311	return docinfo
312
313
314	def getDocinfoFromTextTool(self,url,dom=None,docinfo=None):
315	"""parse texttool tag in index meta"""
316	zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.INFO,"url: %s"%(url))
317	if docinfo is None:
318	docinfo = {}
319
320	if dom is None:
321	dom = self.getIndexMeta(url)
322
323	archivePath = None
324	archiveName = None
325
326	archiveNames=dom.xpath("//resource/name")
327	if archiveNames and (len(archiveNames)>0):
328	archiveName=getTextFromNode(archiveNames[0])
329	else:
330	zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.WARNING,"resource/name missing in: %s"%(url))
331
332	archivePaths=dom.xpath("//resource/archive-path")
333	if archivePaths and (len(archivePaths)>0):
334	archivePath=getTextFromNode(archivePaths[0])
335	# clean up archive path
336	if archivePath[0] != '/':
337	archivePath = '/' + archivePath
338	if archiveName and (not archivePath.endswith(archiveName)):
339	archivePath += "/" + archiveName
340	else:
341	# try to get archive-path from url
342	zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.WARNING,"resource/archive-path missing in: %s"%(url))
343	if (not url.startswith('http')):
344	archivePath = url.replace('index.meta', '')
345
346	if archivePath is None:
347	# we balk without archive-path
348	raise IOError("Missing archive-path (for text-tool) in %s"%(url))
349
350	imageDirs=dom.xpath("//texttool/image")
351	if imageDirs and (len(imageDirs)>0):
352	imageDir=getTextFromNode(imageDirs[0])
353	else:
354	# we balk with no image tag
355	raise IOError("No text-tool info in %s"%(url))
356
357	if imageDir and archivePath:
358	#print "image: ", imageDir, " archivepath: ", archivePath
359	imageDir=os.path.join(archivePath,imageDir)
360	imageDir=imageDir.replace("/mpiwg/online",'')
361	docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo)
362	docinfo['imagePath'] = imageDir
363	docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir
364
365	viewerUrls=dom.xpath("//texttool/digiliburlprefix")
366	if viewerUrls and (len(viewerUrls)>0):
367	viewerUrl=getTextFromNode(viewerUrls[0])
368	docinfo['viewerURL'] = viewerUrl
369
370	textUrls=dom.xpath("//texttool/text")
371	if textUrls and (len(textUrls)>0):
372	textUrl=getTextFromNode(textUrls[0])
373	if urlparse.urlparse(textUrl)[0]=="": #keine url
374	textUrl=os.path.join(archivePath,textUrl)
375
376	docinfo['textURL'] = textUrl
377
378
379	presentationUrls=dom.xpath("//texttool/presentation")
380	if presentationUrls and (len(presentationUrls)>0):
381	# presentation url ergiebt sich ersetzen von index.meta in der url der fr die Metadaten
382	# durch den relativen Pfad auf die presentation infos
383	presentationUrl=url.replace('index.meta',getTextFromNode(presentationUrls[0]))
384
385	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl,docinfo=docinfo,dom=dom)
386	else:
387	docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)
388	docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)
389	return docinfo
390
391
392	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
393	"""gets the bibliographical information from the preseantion entry in texttools
394	"""
395	dom=self.getPresentationInfoXML(url)
396	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
397	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
398	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
399	return docinfo
400
401	def getDocinfoFromImagePath(self,path,docinfo=None):
402	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
403	zLOG.LOG("documentViewer (getdocinfofromimagepath)", zLOG.INFO,"path: %s"%(path))
404	if docinfo is None:
405	docinfo = {}
406	path=path.replace("/mpiwg/online","")
407	docinfo['imagePath'] = path
408	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo)
409	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
410	docinfo['imageURL'] = imageUrl
411
412	docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo)
413	docinfo = self.getAuthinfoFromIndexMeta(path,docinfo=docinfo)
414	return docinfo
415
416
417	def getDocinfo(self, mode, url):
418	"""returns docinfo depending on mode"""
419	zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"mode: %s, url: %s"%(mode,url))
420	# look for cached docinfo in session
421	# XXXX Sesion abgeschaltet
422	if self.REQUEST.SESSION.has_key('docinfo_XX'):
423	docinfo = self.REQUEST.SESSION['docinfo']
424	# check if its still current
425	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
426	zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo in session: %s"%docinfo)
427	return docinfo
428	# new docinfo
429	docinfo = {'mode': mode, 'url': url}
430	if mode=="texttool": #index.meta with texttool information
431	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
432	elif mode=="imagepath":
433	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
434	else:
435	zLOG.LOG("documentViewer (getdocinfo)", zLOG.ERROR,"unknown mode!")
436	raise ValueError("Unknown mode %s"%(mode))
437
438	zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo: %s"%docinfo)
439	self.REQUEST.SESSION['docinfo'] = docinfo
440	return docinfo
441
442
443	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None):
444	"""returns pageinfo with the given parameters"""
445	pageinfo = {}
446	current = getInt(current)
447	pageinfo['current'] = current
448	rows = int(rows or self.thumbrows)
449	pageinfo['rows'] = rows
450	cols = int(cols or self.thumbcols)
451	pageinfo['cols'] = cols
452	grpsize = cols * rows
453	pageinfo['groupsize'] = grpsize
454	start = getInt(start, default=(int(current / grpsize) * grpsize +1))
455	pageinfo['start'] = start
456	pageinfo['end'] = start + grpsize
457	if docinfo is not None:
458	np = int(docinfo['numPages'])
459	pageinfo['end'] = min(pageinfo['end'], np)
460	pageinfo['numgroups'] = int(np / grpsize)
461	if np % grpsize > 0:
462	pageinfo['numgroups'] += 1
463
464	return pageinfo
465
466	def text(self,mode,url,pn):
467	"""give text"""
468	if mode=="texttool": #index.meta with texttool information
469	(viewerUrl,imagepath,textpath)=parseUrlTextTool(url)
470
471	#print textpath
472	try:
473	dom = NonvalidatingReader.parseUri(textpath)
474	except:
475	return None
476
477	list=[]
478	nodes=dom.xpath("//pb")
479
480	node=nodes[int(pn)-1]
481
482	p=node
483
484	while p.tagName!="p":
485	p=p.parentNode
486
487
488	endNode=nodes[int(pn)]
489
490
491	e=endNode
492
493	while e.tagName!="p":
494	e=e.parentNode
495
496
497	next=node.parentNode
498
499	#sammle s
500	while next and (next!=endNode.parentNode):
501	list.append(next)
502	next=next.nextSibling
503	list.append(endNode.parentNode)
504
505	if p==e:# beide im selben paragraphen
506	pass
507	# else:
508	# next=p
509	# while next!=e:
510	# print next,e
511	# list.append(next)
512	# next=next.nextSibling
513	#
514	# for x in list:
515	# PrettyPrint(x)
516	#
517	# return list
518	#
519
520	def findDigilibUrl(self):
521	"""try to get the digilib URL from zogilib"""
522	url = self.imageViewerUrl[:-1] + "/getScalerUrl"
523	#print urlparse.urlparse(url)[0]
524	#print urlparse.urljoin(self.absolute_url(),url)
525	logging.info("finddigiliburl: %s"%urlparse.urlparse(url)[0])
526	logging.info("finddigiliburl: %s"%urlparse.urljoin(self.absolute_url(),url))
527
528	try:
529	if urlparse.urlparse(url)[0]=='': #relative path
530	url=urlparse.urljoin(self.absolute_url()+"/",url)
531
532	scaler = urlopen(url).read()
533	return scaler.replace("/servlet/Scaler?", "")
534	except:
535	return None
536
537	def changeDocumentViewer(self,imageViewerUrl,textViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None):
538	"""init document viewer"""
539	self.title=title
540	self.imageViewerUrl=imageViewerUrl
541	self.textViewerUrl=textViewerUrl
542	self.digilibBaseUrl = digilibBaseUrl
543	self.thumbrows = thumbrows
544	self.thumbcols = thumbcols
545	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
546	if RESPONSE is not None:
547	RESPONSE.redirect('manage_main')
548
549
550
551
552	# security.declareProtected('View management screens','renameImageForm')
553
554	def manage_AddDocumentViewerForm(self):
555	"""add the viewer form"""
556	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
557	return pt()
558
559	def manage_AddDocumentViewer(self,id,imageViewerUrl="",textViewerUrl="",title="",RESPONSE=None):
560	"""add the viewer"""
561	newObj=documentViewer(id,imageViewerUrl,title=title,textViewerUrl=textViewerUrl)
562	self._setObject(id,newObj)
563
564	if RESPONSE is not None:
565	RESPONSE.redirect('manage_main')
566
567
568	##
569	## DocumentViewerTemplate class
570	##
571	class DocumentViewerTemplate(ZopePageTemplate):
572	"""Template for document viewer"""
573	meta_type="DocumentViewer Template"
574
575
576	def manage_addDocumentViewerTemplateForm(self):
577	"""Form for adding"""
578	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
579	return pt()
580
581	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
582	REQUEST=None, submit=None):
583	"Add a Page Template with optional file content."
584
585	self._setObject(id, DocumentViewerTemplate(id))
586	ob = getattr(self, id)
587	ob.pt_edit(open(os.path.join(package_home(globals()),'zpt/viewer_main.zpt')).read(),None)
588	if title:
589	ob.pt_setTitle(title)
590	try:
591	u = self.DestinationURL()
592	except AttributeError:
593	u = REQUEST['URL1']
594
595	u = "%s/%s" % (u, urllib.quote(id))
596	REQUEST.RESPONSE.redirect(u+'/manage_main')
597	return ''
598
599
600

Note: See TracBrowser for help on using the repository browser.

Download in other formats: