Context Navigation

source: documentViewer/documentViewer.py @ 174:5780092e4989

Last change on this file since 174:5780092e4989 was 174:5780092e4989, checked in by dwinter, 14 years ago
* empty log message *
File size: 31.6 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	from Ft.Xml import EMPTY_NAMESPACE, Parse
10	import Ft.Xml.Domlette
11	import os.path
12	import sys
13	import urllib
14	import urllib2
15	import logging
16	import math
17	import urlparse
18	import cStringIO
19	import re
20
21	def logger(txt,method,txt2):
22	"""logging"""
23	logging.info(txt+ txt2)
24
25
26	def getInt(number, default=0):
27	"""returns always an int (0 in case of problems)"""
28	try:
29	return int(number)
30	except:
31	return int(default)
32
33	def getTextFromNode(nodename):
34	"""get the cdata content of a node"""
35	if nodename is None:
36	return ""
37	nodelist=nodename.childNodes
38	rc = ""
39	for node in nodelist:
40	if node.nodeType == node.TEXT_NODE:
41	rc = rc + node.data
42	return rc
43
44	def serializeNode(node, encoding='utf-8'):
45	"""returns a string containing node as XML"""
46	buf = cStringIO.StringIO()
47	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
48	s = buf.getvalue()
49	buf.close()
50	return s
51
52
53	def getParentDir(path):
54	"""returns pathname shortened by one"""
55	return '/'.join(path.split('/')[0:-1])
56
57
58	def getHttpData(url, data=None, num_tries=3, timeout=10):
59	"""returns result from url+data HTTP request"""
60	# we do GET (by appending data to url)
61	if isinstance(data, str) or isinstance(data, unicode):
62	# if data is string then append
63	url = "%s?%s"%(url,data)
64	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
65	# urlencode
66	url = "%s?%s"%(url,urllib.urlencode(data))
67
68	response = None
69	errmsg = None
70	for cnt in range(num_tries):
71	try:
72	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
73	if sys.version_info < (2, 6):
74	# set timeout on socket -- ugly :-(
75	import socket
76	socket.setdefaulttimeout(float(timeout))
77	response = urllib2.urlopen(url)
78	else:
79	response = urllib2.urlopen(url,timeout=float(timeout))
80	# check result?
81	break
82	except urllib2.HTTPError, e:
83	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
84	errmsg = str(e)
85	# stop trying
86	break
87	except urllib2.URLError, e:
88	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
89	errmsg = str(e)
90	# stop trying
91	#break
92
93	if response is not None:
94	data = response.read()
95	response.close()
96	return data
97
98	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
99	#return None
100
101
102
103	##
104	## documentViewer class
105	##
106	class documentViewer(Folder):
107	"""document viewer"""
108	meta_type="Document viewer"
109
110	security=ClassSecurityInfo()
111	manage_options=Folder.manage_options+(
112	{'label':'main config','action':'changeDocumentViewerForm'},
113	)
114
115	# templates and forms
116	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
117	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
118	toc_text = PageTemplateFile('zpt/toc_text', globals())
119	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
120	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
121	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
122	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
123	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
124	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
125	head_main = PageTemplateFile('zpt/head_main', globals())
126	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
127	info_xml = PageTemplateFile('zpt/info_xml', globals())
128
129
130	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
131	security.declareProtected('View management screens','changeDocumentViewerForm')
132	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
133
134
135	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
136	"""init document viewer"""
137	self.id=id
138	self.title=title
139	self.thumbcols = thumbcols
140	self.thumbrows = thumbrows
141	# authgroups is list of authorized groups (delimited by ,)
142	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
143	# create template folder so we can always use template.something
144
145	templateFolder = Folder('template')
146	#self['template'] = templateFolder # Zope-2.12 style
147	self._setObject('template',templateFolder) # old style
148	try:
149	import MpdlXmlTextServer
150	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
151	#templateFolder['fulltextclient'] = xmlRpcClient
152	templateFolder._setObject('fulltextclient',textServer)
153	except Exception, e:
154	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
155	try:
156	from Products.zogiLib.zogiLib import zogiLib
157	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
158	#templateFolder['zogilib'] = zogilib
159	templateFolder._setObject('zogilib',zogilib)
160	except Exception, e:
161	logging.error("Unable to create zogiLib for zogilib: "+str(e))
162
163
164	# proxy text server methods to fulltextclient
165	def getTextPage(self, **args):
166	"""get page"""
167	return self.template.fulltextclient.getTextPage(**args)
168
169	def getQuery(self, **args):
170	"""get query"""
171	return self.template.fulltextclient.getQuery(**args)
172
173	def getSearch(self, **args):
174	"""get search"""
175	return self.template.fulltextclient.getSearch(**args)
176
177	def getNumPages(self, docinfo):
178	"""get numpages"""
179	return self.template.fulltextclient.getNumPages(docinfo)
180
181	def getTranslate(self, **args):
182	"""get translate"""
183	return self.template.fulltextclient.getTranslate(**args)
184
185	def getLemma(self, **args):
186	"""get lemma"""
187	return self.template.fulltextclient.getLemma(**args)
188
189	def getToc(self, **args):
190	"""get toc"""
191	return self.template.fulltextclient.getToc(**args)
192
193	def getTocPage(self, **args):
194	"""get tocpage"""
195	return self.template.fulltextclient.getTocPage(**args)
196
197
198	security.declareProtected('View','thumbs_rss')
199	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
200	'''
201	view it
202	@param mode: defines how to access the document behind url
203	@param url: url which contains display information
204	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
205
206	'''
207	logging.debug("HHHHHHHHHHHHHH:load the rss")
208	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
209
210	if not hasattr(self, 'template'):
211	# create template folder if it doesn't exist
212	self.manage_addFolder('template')
213
214	if not self.digilibBaseUrl:
215	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
216
217	docinfo = self.getDocinfo(mode=mode,url=url)
218	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
219	pt = getattr(self.template, 'thumbs_main_rss')
220
221	if viewMode=="auto": # automodus gewaehlt
222	if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
223	viewMode="text"
224	else:
225	viewMode="images"
226
227	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
228
229	security.declareProtected('View','index_html')
230	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization=""):
231	'''
232	view it
233	@param mode: defines how to access the document behind url
234	@param url: url which contains display information
235	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
236	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
237	@param characterNormalization type of text display (reg, norm, none)
238	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
239	'''
240
241	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
242
243	if not hasattr(self, 'template'):
244	# this won't work
245	logging.error("template folder missing!")
246	return "ERROR: template folder missing!"
247
248	if not getattr(self, 'digilibBaseUrl', None):
249	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
250
251	docinfo = self.getDocinfo(mode=mode,url=url)
252
253	if tocMode != "thumbs":
254	# get table of contents
255	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
256
257	if viewMode=="auto": # automodus gewaehlt
258	if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
259	viewMode="text_dict"
260	else:
261	viewMode="images"
262
263	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
264
265	pt = getattr(self.template, 'viewer_main')
266	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
267
268	def generateMarks(self,mk):
269	ret=""
270	if mk is None:
271	return ""
272	if not isinstance(mk, list):
273	mk=[mk]
274	for m in mk:
275	ret+="mk=%s"%m
276	return ret
277
278
279	def findDigilibUrl(self):
280	"""try to get the digilib URL from zogilib"""
281	url = self.template.zogilib.getDLBaseUrl()
282	return url
283
284	def getDocumentViewerURL(self):
285	"""returns the URL of this instance"""
286	return self.absolute_url()
287
288	def getStyle(self, idx, selected, style=""):
289	"""returns a string with the given style and append 'sel' if path == selected."""
290	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
291	if idx == selected:
292	return style + 'sel'
293	else:
294	return style
295
296	def getLink(self,param=None,val=None):
297	"""link to documentviewer with parameter param set to val"""
298	params=self.REQUEST.form.copy()
299	if param is not None:
300	if val is None:
301	if params.has_key(param):
302	del params[param]
303	else:
304	params[param] = str(val)
305
306	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
307	params["mode"] = "imagepath"
308	params["url"] = getParentDir(params["url"])
309
310	# quote values and assemble into query string
311	#ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
312	ps = urllib.urlencode(params)
313	url=self.REQUEST['URL1']+"?"+ps
314	return url
315
316	def getLinkAmp(self,param=None,val=None):
317	"""link to documentviewer with parameter param set to val"""
318	params=self.REQUEST.form.copy()
319	if param is not None:
320	if val is None:
321	if params.has_key(param):
322	del params[param]
323	else:
324	params[param] = str(val)
325
326	# quote values and assemble into query string
327	logging.debug("XYXXXXX: %s"%repr(params.items()))
328	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
329	url=self.REQUEST['URL1']+"?"+ps
330	return url
331
332	def getInfo_xml(self,url,mode):
333	"""returns info about the document as XML"""
334
335	if not self.digilibBaseUrl:
336	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
337
338	docinfo = self.getDocinfo(mode=mode,url=url)
339	pt = getattr(self.template, 'info_xml')
340	return pt(docinfo=docinfo)
341
342
343	def isAccessible(self, docinfo):
344	"""returns if access to the resource is granted"""
345	access = docinfo.get('accessType', None)
346	logging.debug("documentViewer (accessOK) access type %s"%access)
347	if access is not None and access == 'free':
348	logging.debug("documentViewer (accessOK) access is free")
349	return True
350	elif access is None or access in self.authgroups:
351	# only local access -- only logged in users
352	user = getSecurityManager().getUser()
353	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
354	if user is not None:
355	#print "user: ", user
356	return (user.getUserName() != "Anonymous User")
357	else:
358	return False
359
360	logging.error("documentViewer (accessOK) unknown access type %s"%access)
361	return False
362
363
364	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
365	"""gibt param von dlInfo aus"""
366	if docinfo is None:
367	docinfo = {}
368
369	for x in range(cut):
370
371	path=getParentDir(path)
372
373	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
374
375	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
376
377	txt = getHttpData(infoUrl)
378	if txt is None:
379	raise IOError("Unable to get dir-info from %s"%(infoUrl))
380
381	dom = Parse(txt)
382	sizes=dom.xpath("//dir/size")
383	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
384
385	if sizes:
386	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
387	else:
388	docinfo['numPages'] = 0
389
390	# TODO: produce and keep list of image names and numbers
391
392	return docinfo
393
394	def getIndexMetaPath(self,url):
395	"""gib nur den Pfad zurueck"""
396	regexp = re.compile(r".(experimental\|permanent)/(.)")
397	regpath = regexp.match(url)
398	if (regpath==None):
399	return ""
400
401	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
402
403	def getIndexMetaUrl(self,url):
404	"""returns utr of index.meta document at url"""
405
406	metaUrl = None
407	if url.startswith("http://"):
408	# real URL
409	metaUrl = url
410	else:
411	# online path
412	server=self.digilibBaseUrl+"/servlet/Texter?fn="
413	metaUrl=server+url.replace("/mpiwg/online","")
414	if not metaUrl.endswith("index.meta"):
415	metaUrl += "/index.meta"
416
417	return metaUrl
418
419	def getDomFromIndexMeta(self, url):
420	"""get dom from index meta"""
421	dom = None
422	metaUrl = self.getIndexMetaUrl(url)
423
424	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
425	txt=getHttpData(metaUrl)
426	if txt is None:
427	raise IOError("Unable to read index meta from %s"%(url))
428
429	dom = Parse(txt)
430	return dom
431
432	def getPresentationInfoXML(self, url):
433	"""returns dom of info.xml document at url"""
434	dom = None
435	metaUrl = None
436	if url.startswith("http://"):
437	# real URL
438	metaUrl = url
439	else:
440	# online path
441	server=self.digilibBaseUrl+"/servlet/Texter?fn="
442	metaUrl=server+url.replace("/mpiwg/online","")
443
444	txt=getHttpData(metaUrl)
445	if txt is None:
446	raise IOError("Unable to read infoXMLfrom %s"%(url))
447
448	dom = Parse(txt)
449	return dom
450
451
452	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
453	"""gets authorization info from the index.meta file at path or given by dom"""
454	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
455
456	access = None
457
458	if docinfo is None:
459	docinfo = {}
460
461	if dom is None:
462	for x in range(cut):
463	path=getParentDir(path)
464	dom = self.getDomFromIndexMeta(path)
465
466	acctype = dom.xpath("//access-conditions/access/@type")
467	if acctype and (len(acctype)>0):
468	access=acctype[0].value
469	if access in ['group', 'institution']:
470	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
471
472	docinfo['accessType'] = access
473	return docinfo
474
475
476	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
477	"""gets bibliographical info from the index.meta file at path or given by dom"""
478	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
479
480	if docinfo is None:
481	docinfo = {}
482
483	if dom is None:
484	for x in range(cut):
485	path=getParentDir(path)
486	dom = self.getDomFromIndexMeta(path)
487
488	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
489
490	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
491	# put in all raw bib fields as dict "bib"
492	bib = dom.xpath("//bib/*")
493	if bib and len(bib)>0:
494	bibinfo = {}
495	for e in bib:
496	bibinfo[e.localName] = getTextFromNode(e)
497	docinfo['bib'] = bibinfo
498
499	# extract some fields (author, title, year) according to their mapping
500	metaData=self.metadata.main.meta.bib
501	bibtype=dom.xpath("//bib/@type")
502	if bibtype and (len(bibtype)>0):
503	bibtype=bibtype[0].value
504	else:
505	bibtype="generic"
506
507	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
508	docinfo['bib_type'] = bibtype
509	bibmap=metaData.generateMappingForType(bibtype)
510	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
511	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
512	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
513	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
514	try:
515	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
516	except: pass
517	try:
518	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
519	except: pass
520	try:
521	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
522	except: pass
523	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
524	try:
525	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
526	except:
527	docinfo['lang']=''
528
529	return docinfo
530
531
532	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
533	"""parse texttool tag in index meta"""
534	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
535	if docinfo is None:
536	docinfo = {}
537	if docinfo.get('lang', None) is None:
538	docinfo['lang'] = '' # default keine Sprache gesetzt
539	if dom is None:
540	dom = self.getDomFromIndexMeta(url)
541
542	archivePath = None
543	archiveName = None
544
545	archiveNames = dom.xpath("//resource/name")
546	if archiveNames and (len(archiveNames) > 0):
547	archiveName = getTextFromNode(archiveNames[0])
548	else:
549	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
550
551	archivePaths = dom.xpath("//resource/archive-path")
552	if archivePaths and (len(archivePaths) > 0):
553	archivePath = getTextFromNode(archivePaths[0])
554	# clean up archive path
555	if archivePath[0] != '/':
556	archivePath = '/' + archivePath
557	if archiveName and (not archivePath.endswith(archiveName)):
558	archivePath += "/" + archiveName
559	else:
560	# try to get archive-path from url
561	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
562	if (not url.startswith('http')):
563	archivePath = url.replace('index.meta', '')
564
565	if archivePath is None:
566	# we balk without archive-path
567	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
568
569	imageDirs = dom.xpath("//texttool/image")
570	if imageDirs and (len(imageDirs) > 0):
571	imageDir = getTextFromNode(imageDirs[0])
572
573	else:
574	# we balk with no image tag / not necessary anymore because textmode is now standard
575	#raise IOError("No text-tool info in %s"%(url))
576	imageDir = ""
577	#xquery="//pb"
578	docinfo['imagePath'] = "" # keine Bilder
579	docinfo['imageURL'] = ""
580
581	if imageDir and archivePath:
582	#print "image: ", imageDir, " archivepath: ", archivePath
583	imageDir = os.path.join(archivePath, imageDir)
584	imageDir = imageDir.replace("/mpiwg/online", '')
585	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
586	docinfo['imagePath'] = imageDir
587
588	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
589
590	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
591	if viewerUrls and (len(viewerUrls) > 0):
592	viewerUrl = getTextFromNode(viewerUrls[0])
593	docinfo['viewerURL'] = viewerUrl
594
595	# old style text URL
596	textUrls = dom.xpath("//texttool/text")
597	if textUrls and (len(textUrls) > 0):
598	textUrl = getTextFromNode(textUrls[0])
599	if urlparse.urlparse(textUrl)[0] == "": #keine url
600	textUrl = os.path.join(archivePath, textUrl)
601	# fix URLs starting with /mpiwg/online
602	if textUrl.startswith("/mpiwg/online"):
603	textUrl = textUrl.replace("/mpiwg/online", '', 1)
604
605	docinfo['textURL'] = textUrl
606
607	# new style text-url-path
608	textUrls = dom.xpath("//texttool/text-url-path")
609	if textUrls and (len(textUrls) > 0):
610	textUrl = getTextFromNode(textUrls[0])
611	docinfo['textURLPath'] = textUrl
612	if not docinfo['imagePath']:
613	# text-only, no page images
614	docinfo = self.getNumPages(docinfo)
615
616	presentationUrls = dom.xpath("//texttool/presentation")
617	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
618
619	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
620	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
621	# durch den relativen Pfad auf die presentation infos
622	presentationPath = getTextFromNode(presentationUrls[0])
623	if url.endswith("index.meta"):
624	presentationUrl = url.replace('index.meta', presentationPath)
625	else:
626	presentationUrl = url + "/" + presentationPath
627
628	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
629
630	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
631
632	return docinfo
633
634
635	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
636	"""gets the bibliographical information from the preseantion entry in texttools
637	"""
638	dom=self.getPresentationInfoXML(url)
639	try:
640	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
641	except:
642	pass
643	try:
644	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
645	except:
646	pass
647	try:
648	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
649	except:
650	pass
651	return docinfo
652
653	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
654	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
655	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
656	if docinfo is None:
657	docinfo = {}
658	path=path.replace("/mpiwg/online","")
659	docinfo['imagePath'] = path
660	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
661
662	pathorig=path
663	for x in range(cut):
664	path=getParentDir(path)
665	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
666	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
667	docinfo['imageURL'] = imageUrl
668
669	#path ist the path to the images it assumes that the index.meta file is one level higher.
670	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
671	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
672	return docinfo
673
674
675	def getDocinfo(self, mode, url):
676	"""returns docinfo depending on mode"""
677	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
678	# look for cached docinfo in session
679	if self.REQUEST.SESSION.has_key('docinfo'):
680	docinfo = self.REQUEST.SESSION['docinfo']
681	# check if its still current
682	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
683	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
684	return docinfo
685	# new docinfo
686	docinfo = {'mode': mode, 'url': url}
687	if mode=="texttool": #index.meta with texttool information
688	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
689	elif mode=="imagepath":
690	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
691	elif mode=="filepath":
692	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
693	else:
694	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
695	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
696
697	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
698	self.REQUEST.SESSION['docinfo'] = docinfo
699	return docinfo
700
701	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization=""):
702	"""returns pageinfo with the given parameters"""
703	pageinfo = {}
704	current = getInt(current)
705	pageinfo['current'] = current
706	rows = int(rows or self.thumbrows)
707	pageinfo['rows'] = rows
708	cols = int(cols or self.thumbcols)
709	pageinfo['cols'] = cols
710	grpsize = cols * rows
711	pageinfo['groupsize'] = grpsize
712	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
713	# int(current / grpsize) * grpsize +1))
714	pageinfo['start'] = start
715	pageinfo['end'] = start + grpsize
716	if (docinfo is not None) and ('numPages' in docinfo):
717	np = int(docinfo['numPages'])
718	pageinfo['end'] = min(pageinfo['end'], np)
719	pageinfo['numgroups'] = int(np / grpsize)
720	if np % grpsize > 0:
721	pageinfo['numgroups'] += 1
722	pageinfo['viewMode'] = viewMode
723	pageinfo['tocMode'] = tocMode
724	#pageinfo['characterNormalization'] =characterNormalization
725	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization',' ')
726	pageinfo['query'] = self.REQUEST.get('query',' ')
727	pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
728	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
729	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
730	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
731	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
732	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
733	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
734	toc = int (pageinfo['tocPN'])
735	pageinfo['textPages'] =int (toc)
736
737	if 'tocSize_%s'%tocMode in docinfo:
738	tocSize = int(docinfo['tocSize_%s'%tocMode])
739	tocPageSize = int(pageinfo['tocPageSize'])
740	# cached toc
741	if tocSize%tocPageSize>0:
742	tocPages=tocSize/tocPageSize+1
743	else:
744	tocPages=tocSize/tocPageSize
745	pageinfo['tocPN'] = min (tocPages,toc)
746	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
747	pageinfo['sn'] =self.REQUEST.get('sn','')
748	return pageinfo
749
750	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
751	"""init document viewer"""
752	self.title=title
753	self.digilibBaseUrl = digilibBaseUrl
754	self.thumbrows = thumbrows
755	self.thumbcols = thumbcols
756	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
757	if RESPONSE is not None:
758	RESPONSE.redirect('manage_main')
759
760	def manage_AddDocumentViewerForm(self):
761	"""add the viewer form"""
762	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
763	return pt()
764
765	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
766	"""add the viewer"""
767	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
768	self._setObject(id,newObj)
769
770	if RESPONSE is not None:
771	RESPONSE.redirect('manage_main')
772
773	## DocumentViewerTemplate class
774	class DocumentViewerTemplate(ZopePageTemplate):
775	"""Template for document viewer"""
776	meta_type="DocumentViewer Template"
777
778
779	def manage_addDocumentViewerTemplateForm(self):
780	"""Form for adding"""
781	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
782	return pt()
783
784	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
785	REQUEST=None, submit=None):
786	"Add a Page Template with optional file content."
787
788	self._setObject(id, DocumentViewerTemplate(id))
789	ob = getattr(self, id)
790	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
791	logging.info("txt %s:"%txt)
792	ob.pt_edit(txt,"text/html")
793	if title:
794	ob.pt_setTitle(title)
795	try:
796	u = self.DestinationURL()
797	except AttributeError:
798	u = REQUEST['URL1']
799
800	u = "%s/%s" % (u, urllib.quote(id))
801	REQUEST.RESPONSE.redirect(u+'/manage_main')
802	return ''
803
804
805

Note: See TracBrowser for help on using the repository browser.

Download in other formats: