Context Navigation

source: documentViewer/documentViewer.py @ 160:db9bcbbd5868

Last change on this file since 160:db9bcbbd5868 was 160:db9bcbbd5868, checked in by abukhman, 14 years ago
characterNormalization
File size: 30.7 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	from Ft.Xml import EMPTY_NAMESPACE, Parse
10	import Ft.Xml.Domlette
11	import os.path
12	import sys
13	import urllib
14	import urllib2
15	import logging
16	import math
17	import urlparse
18	import cStringIO
19
20	def logger(txt,method,txt2):
21	"""logging"""
22	logging.info(txt+ txt2)
23
24
25	def getInt(number, default=0):
26	"""returns always an int (0 in case of problems)"""
27	try:
28	return int(number)
29	except:
30	return int(default)
31
32	def getTextFromNode(nodename):
33	"""get the cdata content of a node"""
34	if nodename is None:
35	return ""
36	nodelist=nodename.childNodes
37	rc = ""
38	for node in nodelist:
39	if node.nodeType == node.TEXT_NODE:
40	rc = rc + node.data
41	return rc
42
43	def serializeNode(node, encoding='utf-8'):
44	"""returns a string containing node as XML"""
45	buf = cStringIO.StringIO()
46	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
47	s = buf.getvalue()
48	buf.close()
49	return s
50
51
52	def getParentDir(path):
53	"""returns pathname shortened by one"""
54	return '/'.join(path.split('/')[0:-1])
55
56
57	def getHttpData(url, data=None, num_tries=3, timeout=10):
58	"""returns result from url+data HTTP request"""
59	# we do GET (by appending data to url)
60	if isinstance(data, str) or isinstance(data, unicode):
61	# if data is string then append
62	url = "%s?%s"%(url,data)
63	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
64	# urlencode
65	url = "%s?%s"%(url,urllib.urlencode(data))
66
67	response = None
68	errmsg = None
69	for cnt in range(num_tries):
70	try:
71	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
72	if sys.version_info < (2, 6):
73	# set timeout on socket -- ugly :-(
74	import socket
75	socket.setdefaulttimeout(float(timeout))
76	response = urllib2.urlopen(url)
77	else:
78	response = urllib2.urlopen(url,timeout=float(timeout))
79	# check result?
80	break
81	except urllib2.HTTPError, e:
82	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
83	errmsg = str(e)
84	# stop trying
85	break
86	except urllib2.URLError, e:
87	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
88	errmsg = str(e)
89	# stop trying
90	#break
91
92	if response is not None:
93	data = response.read()
94	response.close()
95	return data
96
97	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
98	#return None
99
100
101
102	##
103	## documentViewer class
104	##
105	class documentViewer(Folder):
106	"""document viewer"""
107	meta_type="Document viewer"
108
109	security=ClassSecurityInfo()
110	manage_options=Folder.manage_options+(
111	{'label':'main config','action':'changeDocumentViewerForm'},
112	)
113
114	# templates and forms
115	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
116	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
117	toc_text = PageTemplateFile('zpt/toc_text', globals())
118	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
119	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
120	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
121	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
122	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
123	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
124	head_main = PageTemplateFile('zpt/head_main', globals())
125	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
126	info_xml = PageTemplateFile('zpt/info_xml', globals())
127
128
129	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
130	security.declareProtected('View management screens','changeDocumentViewerForm')
131	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
132
133
134	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
135	"""init document viewer"""
136	self.id=id
137	self.title=title
138	self.thumbcols = thumbcols
139	self.thumbrows = thumbrows
140	# authgroups is list of authorized groups (delimited by ,)
141	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
142	# create template folder so we can always use template.something
143
144	templateFolder = Folder('template')
145	#self['template'] = templateFolder # Zope-2.12 style
146	self._setObject('template',templateFolder) # old style
147	try:
148	import MpdlXmlTextServer
149	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
150	#templateFolder['fulltextclient'] = xmlRpcClient
151	templateFolder._setObject('fulltextclient',textServer)
152	except Exception, e:
153	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
154	try:
155	from Products.zogiLib.zogiLib import zogiLib
156	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
157	#templateFolder['zogilib'] = zogilib
158	templateFolder._setObject('zogilib',zogilib)
159	except Exception, e:
160	logging.error("Unable to create zogiLib for zogilib: "+str(e))
161
162
163	# proxy text server methods to fulltextclient
164	def getTextPage(self, **args):
165	"""get page"""
166	return self.template.fulltextclient.getTextPage(**args)
167
168	def getQuery(self, **args):
169	"""get query"""
170	return self.template.fulltextclient.getQuery(**args)
171
172	def getSearch(self, **args):
173	"""get search"""
174	return self.template.fulltextclient.getSearch(**args)
175
176	def getNumPages(self, docinfo):
177	"""get numpages"""
178	return self.template.fulltextclient.getNumPages(docinfo)
179
180	def getTranslate(self, **args):
181	"""get translate"""
182	return self.template.fulltextclient.getTranslate(**args)
183
184	def getLemma(self, **args):
185	"""get lemma"""
186	return self.template.fulltextclient.getLemma(**args)
187
188	def getToc(self, **args):
189	"""get toc"""
190	return self.template.fulltextclient.getToc(**args)
191
192	def getTocPage(self, **args):
193	"""get tocpage"""
194	return self.template.fulltextclient.getTocPage(**args)
195
196
197	security.declareProtected('View','thumbs_rss')
198	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
199	'''
200	view it
201	@param mode: defines how to access the document behind url
202	@param url: url which contains display information
203	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
204
205	'''
206	logging.debug("HHHHHHHHHHHHHH:load the rss")
207	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
208
209	if not hasattr(self, 'template'):
210	# create template folder if it doesn't exist
211	self.manage_addFolder('template')
212
213	if not self.digilibBaseUrl:
214	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
215
216	docinfo = self.getDocinfo(mode=mode,url=url)
217	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
218	pt = getattr(self.template, 'thumbs_main_rss')
219
220	if viewMode=="auto": # automodus gewaehlt
221	if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
222	viewMode="text"
223	else:
224	viewMode="images"
225
226	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
227
228	security.declareProtected('View','index_html')
229	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None):
230	'''
231	view it
232	@param mode: defines how to access the document behind url
233	@param url: url which contains display information
234	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
235	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
236	@param characterNormalization type of text display (reg, norm, none)
237	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
238	'''
239
240	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
241
242	if not hasattr(self, 'template'):
243	# this won't work
244	logging.error("template folder missing!")
245	return "ERROR: template folder missing!"
246
247	if not getattr(self, 'digilibBaseUrl', None):
248	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
249
250	docinfo = self.getDocinfo(mode=mode,url=url)
251
252	if tocMode != "thumbs":
253	# get table of contents
254	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
255
256	if viewMode=="auto": # automodus gewaehlt
257	if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
258	viewMode="text_dict"
259	else:
260	viewMode="images"
261
262	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
263
264	pt = getattr(self.template, 'viewer_main')
265	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
266
267	def generateMarks(self,mk):
268	ret=""
269	if mk is None:
270	return ""
271	if not isinstance(mk, list):
272	mk=[mk]
273	for m in mk:
274	ret+="mk=%s"%m
275	return ret
276
277
278	def findDigilibUrl(self):
279	"""try to get the digilib URL from zogilib"""
280	url = self.template.zogilib.getDLBaseUrl()
281	return url
282
283	def getDocumentViewerURL(self):
284	"""returns the URL of this instance"""
285	return self.absolute_url()
286
287	def getStyle(self, idx, selected, style=""):
288	"""returns a string with the given style and append 'sel' if path == selected."""
289	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
290	if idx == selected:
291	return style + 'sel'
292	else:
293	return style
294
295	def getLink(self,param=None,val=None):
296	"""link to documentviewer with parameter param set to val"""
297	params=self.REQUEST.form.copy()
298	if param is not None:
299	if val is None:
300	if params.has_key(param):
301	del params[param]
302	else:
303	params[param] = str(val)
304
305	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
306	params["mode"] = "imagepath"
307	params["url"] = getParentDir(params["url"])
308
309	# quote values and assemble into query string
310	#ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
311	ps = urllib.urlencode(params)
312	url=self.REQUEST['URL1']+"?"+ps
313	return url
314
315	def getLinkAmp(self,param=None,val=None):
316	"""link to documentviewer with parameter param set to val"""
317	params=self.REQUEST.form.copy()
318	if param is not None:
319	if val is None:
320	if params.has_key(param):
321	del params[param]
322	else:
323	params[param] = str(val)
324
325	# quote values and assemble into query string
326	logging.debug("XYXXXXX: %s"%repr(params.items()))
327	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
328	url=self.REQUEST['URL1']+"?"+ps
329	return url
330
331	def getInfo_xml(self,url,mode):
332	"""returns info about the document as XML"""
333
334	if not self.digilibBaseUrl:
335	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
336
337	docinfo = self.getDocinfo(mode=mode,url=url)
338	pt = getattr(self.template, 'info_xml')
339	return pt(docinfo=docinfo)
340
341
342	def isAccessible(self, docinfo):
343	"""returns if access to the resource is granted"""
344	access = docinfo.get('accessType', None)
345	logging.debug("documentViewer (accessOK) access type %s"%access)
346	if access is not None and access == 'free':
347	logging.debug("documentViewer (accessOK) access is free")
348	return True
349	elif access is None or access in self.authgroups:
350	# only local access -- only logged in users
351	user = getSecurityManager().getUser()
352	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
353	if user is not None:
354	#print "user: ", user
355	return (user.getUserName() != "Anonymous User")
356	else:
357	return False
358
359	logging.error("documentViewer (accessOK) unknown access type %s"%access)
360	return False
361
362
363	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
364	"""gibt param von dlInfo aus"""
365	if docinfo is None:
366	docinfo = {}
367
368	for x in range(cut):
369
370	path=getParentDir(path)
371
372	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
373
374	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
375
376	txt = getHttpData(infoUrl)
377	if txt is None:
378	raise IOError("Unable to get dir-info from %s"%(infoUrl))
379
380	dom = Parse(txt)
381	sizes=dom.xpath("//dir/size")
382	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
383
384	if sizes:
385	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
386	else:
387	docinfo['numPages'] = 0
388
389	# TODO: produce and keep list of image names and numbers
390
391	return docinfo
392
393
394	def getIndexMeta(self, url):
395	"""returns dom of index.meta document at url"""
396	dom = None
397	metaUrl = None
398	if url.startswith("http://"):
399	# real URL
400	metaUrl = url
401	else:
402	# online path
403	server=self.digilibBaseUrl+"/servlet/Texter?fn="
404	metaUrl=server+url.replace("/mpiwg/online","")
405	if not metaUrl.endswith("index.meta"):
406	metaUrl += "/index.meta"
407
408	logging.debug("(getIndexMeta): METAURL: %s"%metaUrl)
409	txt=getHttpData(metaUrl)
410	if txt is None:
411	raise IOError("Unable to read index meta from %s"%(url))
412
413	dom = Parse(txt)
414	return dom
415
416	def getPresentationInfoXML(self, url):
417	"""returns dom of info.xml document at url"""
418	dom = None
419	metaUrl = None
420	if url.startswith("http://"):
421	# real URL
422	metaUrl = url
423	else:
424	# online path
425	server=self.digilibBaseUrl+"/servlet/Texter?fn="
426	metaUrl=server+url.replace("/mpiwg/online","")
427
428	txt=getHttpData(metaUrl)
429	if txt is None:
430	raise IOError("Unable to read infoXMLfrom %s"%(url))
431
432	dom = Parse(txt)
433	return dom
434
435
436	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
437	"""gets authorization info from the index.meta file at path or given by dom"""
438	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
439
440	access = None
441
442	if docinfo is None:
443	docinfo = {}
444
445	if dom is None:
446	for x in range(cut):
447	path=getParentDir(path)
448	dom = self.getIndexMeta(path)
449
450	acctype = dom.xpath("//access-conditions/access/@type")
451	if acctype and (len(acctype)>0):
452	access=acctype[0].value
453	if access in ['group', 'institution']:
454	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
455
456	docinfo['accessType'] = access
457	return docinfo
458
459
460	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
461	"""gets bibliographical info from the index.meta file at path or given by dom"""
462	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
463
464	if docinfo is None:
465	docinfo = {}
466
467	if dom is None:
468	for x in range(cut):
469	path=getParentDir(path)
470	dom = self.getIndexMeta(path)
471
472	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
473	# put in all raw bib fields as dict "bib"
474	bib = dom.xpath("//bib/*")
475	if bib and len(bib)>0:
476	bibinfo = {}
477	for e in bib:
478	bibinfo[e.localName] = getTextFromNode(e)
479	docinfo['bib'] = bibinfo
480
481	# extract some fields (author, title, year) according to their mapping
482	metaData=self.metadata.main.meta.bib
483	bibtype=dom.xpath("//bib/@type")
484	if bibtype and (len(bibtype)>0):
485	bibtype=bibtype[0].value
486	else:
487	bibtype="generic"
488
489	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
490	docinfo['bib_type'] = bibtype
491	bibmap=metaData.generateMappingForType(bibtype)
492	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
493	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
494	try:
495	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
496	except: pass
497	try:
498	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
499	except: pass
500	try:
501	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
502	except: pass
503	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
504	try:
505	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
506	except:
507	docinfo['lang']=''
508
509	return docinfo
510
511
512	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
513	"""parse texttool tag in index meta"""
514	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
515	if docinfo is None:
516	docinfo = {}
517	if docinfo.get('lang', None) is None:
518	docinfo['lang'] = '' # default keine Sprache gesetzt
519	if dom is None:
520	dom = self.getIndexMeta(url)
521
522	archivePath = None
523	archiveName = None
524
525	archiveNames = dom.xpath("//resource/name")
526	if archiveNames and (len(archiveNames) > 0):
527	archiveName = getTextFromNode(archiveNames[0])
528	else:
529	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
530
531	archivePaths = dom.xpath("//resource/archive-path")
532	if archivePaths and (len(archivePaths) > 0):
533	archivePath = getTextFromNode(archivePaths[0])
534	# clean up archive path
535	if archivePath[0] != '/':
536	archivePath = '/' + archivePath
537	if archiveName and (not archivePath.endswith(archiveName)):
538	archivePath += "/" + archiveName
539	else:
540	# try to get archive-path from url
541	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
542	if (not url.startswith('http')):
543	archivePath = url.replace('index.meta', '')
544
545	if archivePath is None:
546	# we balk without archive-path
547	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
548
549	imageDirs = dom.xpath("//texttool/image")
550	if imageDirs and (len(imageDirs) > 0):
551	imageDir = getTextFromNode(imageDirs[0])
552
553	else:
554	# we balk with no image tag / not necessary anymore because textmode is now standard
555	#raise IOError("No text-tool info in %s"%(url))
556	imageDir = ""
557	#xquery="//pb"
558	docinfo['imagePath'] = "" # keine Bilder
559	docinfo['imageURL'] = ""
560
561	if imageDir and archivePath:
562	#print "image: ", imageDir, " archivepath: ", archivePath
563	imageDir = os.path.join(archivePath, imageDir)
564	imageDir = imageDir.replace("/mpiwg/online", '')
565	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
566	docinfo['imagePath'] = imageDir
567
568	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
569
570	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
571	if viewerUrls and (len(viewerUrls) > 0):
572	viewerUrl = getTextFromNode(viewerUrls[0])
573	docinfo['viewerURL'] = viewerUrl
574
575	# old style text URL
576	textUrls = dom.xpath("//texttool/text")
577	if textUrls and (len(textUrls) > 0):
578	textUrl = getTextFromNode(textUrls[0])
579	if urlparse.urlparse(textUrl)[0] == "": #keine url
580	textUrl = os.path.join(archivePath, textUrl)
581	# fix URLs starting with /mpiwg/online
582	if textUrl.startswith("/mpiwg/online"):
583	textUrl = textUrl.replace("/mpiwg/online", '', 1)
584
585	docinfo['textURL'] = textUrl
586
587	# new style text-url-path
588	textUrls = dom.xpath("//texttool/text-url-path")
589	if textUrls and (len(textUrls) > 0):
590	textUrl = getTextFromNode(textUrls[0])
591	docinfo['textURLPath'] = textUrl
592	if not docinfo['imagePath']:
593	# text-only, no page images
594	docinfo = self.getNumPages(docinfo)
595
596	presentationUrls = dom.xpath("//texttool/presentation")
597	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
598
599	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
600	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
601	# durch den relativen Pfad auf die presentation infos
602	presentationPath = getTextFromNode(presentationUrls[0])
603	if url.endswith("index.meta"):
604	presentationUrl = url.replace('index.meta', presentationPath)
605	else:
606	presentationUrl = url + "/" + presentationPath
607
608	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
609
610	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
611
612	return docinfo
613
614
615	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
616	"""gets the bibliographical information from the preseantion entry in texttools
617	"""
618	dom=self.getPresentationInfoXML(url)
619	try:
620	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
621	except:
622	pass
623	try:
624	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
625	except:
626	pass
627	try:
628	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
629	except:
630	pass
631	return docinfo
632
633	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
634	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
635	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
636	if docinfo is None:
637	docinfo = {}
638	path=path.replace("/mpiwg/online","")
639	docinfo['imagePath'] = path
640	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
641
642	pathorig=path
643	for x in range(cut):
644	path=getParentDir(path)
645	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
646	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
647	docinfo['imageURL'] = imageUrl
648
649	#path ist the path to the images it assumes that the index.meta file is one level higher.
650	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
651	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
652	return docinfo
653
654
655	def getDocinfo(self, mode, url):
656	"""returns docinfo depending on mode"""
657	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
658	# look for cached docinfo in session
659	if self.REQUEST.SESSION.has_key('docinfo'):
660	docinfo = self.REQUEST.SESSION['docinfo']
661	# check if its still current
662	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
663	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
664	return docinfo
665	# new docinfo
666	docinfo = {'mode': mode, 'url': url}
667	if mode=="texttool": #index.meta with texttool information
668	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
669	elif mode=="imagepath":
670	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
671	elif mode=="filepath":
672	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
673	else:
674	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
675	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
676
677	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
678	self.REQUEST.SESSION['docinfo'] = docinfo
679	return docinfo
680
681	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
682	"""returns pageinfo with the given parameters"""
683	pageinfo = {}
684	current = getInt(current)
685	pageinfo['current'] = current
686	rows = int(rows or self.thumbrows)
687	pageinfo['rows'] = rows
688	cols = int(cols or self.thumbcols)
689	pageinfo['cols'] = cols
690	grpsize = cols * rows
691	pageinfo['groupsize'] = grpsize
692	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
693	# int(current / grpsize) * grpsize +1))
694	pageinfo['start'] = start
695	pageinfo['end'] = start + grpsize
696	if (docinfo is not None) and ('numPages' in docinfo):
697	np = int(docinfo['numPages'])
698	pageinfo['end'] = min(pageinfo['end'], np)
699	pageinfo['numgroups'] = int(np / grpsize)
700	if np % grpsize > 0:
701	pageinfo['numgroups'] += 1
702	pageinfo['viewMode'] = viewMode
703	pageinfo['tocMode'] = tocMode
704	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','')
705	pageinfo['query'] = self.REQUEST.get('query',' ')
706	pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
707	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
708	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
709	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
710	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
711	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
712	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
713	toc = int (pageinfo['tocPN'])
714	pageinfo['textPages'] =int (toc)
715
716	if 'tocSize_%s'%tocMode in docinfo:
717	tocSize = int(docinfo['tocSize_%s'%tocMode])
718	tocPageSize = int(pageinfo['tocPageSize'])
719	# cached toc
720	if tocSize%tocPageSize>0:
721	tocPages=tocSize/tocPageSize+1
722	else:
723	tocPages=tocSize/tocPageSize
724	pageinfo['tocPN'] = min (tocPages,toc)
725	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
726	pageinfo['sn'] =self.REQUEST.get('sn','')
727	return pageinfo
728
729	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
730	"""init document viewer"""
731	self.title=title
732	self.digilibBaseUrl = digilibBaseUrl
733	self.thumbrows = thumbrows
734	self.thumbcols = thumbcols
735	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
736	if RESPONSE is not None:
737	RESPONSE.redirect('manage_main')
738
739	def manage_AddDocumentViewerForm(self):
740	"""add the viewer form"""
741	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
742	return pt()
743
744	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
745	"""add the viewer"""
746	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
747	self._setObject(id,newObj)
748
749	if RESPONSE is not None:
750	RESPONSE.redirect('manage_main')
751
752	## DocumentViewerTemplate class
753	class DocumentViewerTemplate(ZopePageTemplate):
754	"""Template for document viewer"""
755	meta_type="DocumentViewer Template"
756
757
758	def manage_addDocumentViewerTemplateForm(self):
759	"""Form for adding"""
760	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
761	return pt()
762
763	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
764	REQUEST=None, submit=None):
765	"Add a Page Template with optional file content."
766
767	self._setObject(id, DocumentViewerTemplate(id))
768	ob = getattr(self, id)
769	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
770	logging.info("txt %s:"%txt)
771	ob.pt_edit(txt,"text/html")
772	if title:
773	ob.pt_setTitle(title)
774	try:
775	u = self.DestinationURL()
776	except AttributeError:
777	u = REQUEST['URL1']
778
779	u = "%s/%s" % (u, urllib.quote(id))
780	REQUEST.RESPONSE.redirect(u+'/manage_main')
781	return ''
782
783
784

Note: See TracBrowser for help on using the repository browser.

Download in other formats: