Context Navigation

source: documentViewer/documentViewer.py @ 234:0813585df9f4

Last change on this file since 234:0813585df9f4 was 234:0813585df9f4, checked in by abukhman, 14 years ago
* empty log message *
File size: 33.2 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding='utf-8'):
47	"""returns a string containing node as XML"""
48	buf = cStringIO.StringIO()
49	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
50	s = buf.getvalue()
51	buf.close()
52	return s
53
54	def getBrowserType(self):
55	"""get browser type object"""
56	if self.REQUEST.SESSION.has_key('browserType'):
57	return self.REQUEST.SESSION['browserType']
58	else:
59	bt = browserCheck(self)
60	self.REQUEST.SESSION.set('browserType', bt)
61	logging.debug("documentViewer (BROWSER TYPE) bt %s"%bt)
62	return bt
63
64
65	def getParentDir(path):
66	"""returns pathname shortened by one"""
67	return '/'.join(path.split('/')[0:-1])
68
69
70	def getHttpData(url, data=None, num_tries=3, timeout=10):
71	"""returns result from url+data HTTP request"""
72	# we do GET (by appending data to url)
73	if isinstance(data, str) or isinstance(data, unicode):
74	# if data is string then append
75	url = "%s?%s"%(url,data)
76	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
77	# urlencode
78	url = "%s?%s"%(url,urllib.urlencode(data))
79
80	response = None
81	errmsg = None
82	for cnt in range(num_tries):
83	try:
84	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
85	if sys.version_info < (2, 6):
86	# set timeout on socket -- ugly :-(
87	import socket
88	socket.setdefaulttimeout(float(timeout))
89	response = urllib2.urlopen(url)
90	else:
91	response = urllib2.urlopen(url,timeout=float(timeout))
92	# check result?
93	break
94	except urllib2.HTTPError, e:
95	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
96	errmsg = str(e)
97	# stop trying
98	break
99	except urllib2.URLError, e:
100	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
101	errmsg = str(e)
102	# stop trying
103	#break
104
105	if response is not None:
106	data = response.read()
107	response.close()
108	return data
109
110	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
111	#return None
112
113
114
115	##
116	## documentViewer class
117	##
118	class documentViewer(Folder):
119	"""document viewer"""
120	meta_type="Document viewer"
121
122	security=ClassSecurityInfo()
123	manage_options=Folder.manage_options+(
124	{'label':'main config','action':'changeDocumentViewerForm'},
125	)
126
127	# templates and forms
128	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
129	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
130	toc_text = PageTemplateFile('zpt/toc_text', globals())
131	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
132	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
133	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
134	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
135	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
136	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
137	head_main = PageTemplateFile('zpt/head_main', globals())
138	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
139	info_xml = PageTemplateFile('zpt/info_xml', globals())
140
141
142	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
143	security.declareProtected('View management screens','changeDocumentViewerForm')
144	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
145
146
147	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
148	"""init document viewer"""
149	self.id=id
150	self.title=title
151	self.thumbcols = thumbcols
152	self.thumbrows = thumbrows
153	# authgroups is list of authorized groups (delimited by ,)
154	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
155	# create template folder so we can always use template.something
156
157	templateFolder = Folder('template')
158	#self['template'] = templateFolder # Zope-2.12 style
159	self._setObject('template',templateFolder) # old style
160	try:
161	import MpdlXmlTextServer
162	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
163	#templateFolder['fulltextclient'] = xmlRpcClient
164	templateFolder._setObject('fulltextclient',textServer)
165	except Exception, e:
166	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
167	try:
168	from Products.zogiLib.zogiLib import zogiLib
169	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
170	#templateFolder['zogilib'] = zogilib
171	templateFolder._setObject('zogilib',zogilib)
172	except Exception, e:
173	logging.error("Unable to create zogiLib for zogilib: "+str(e))
174
175
176	# proxy text server methods to fulltextclient
177	def getTextPage(self, **args):
178	"""get page"""
179	return self.template.fulltextclient.getTextPage(**args)
180
181	def getQuery(self, **args):
182	"""get query"""
183	return self.template.fulltextclient.getQuery(**args)
184
185	def getSearch(self, **args):
186	"""get search"""
187	return self.template.fulltextclient.getSearch(**args)
188
189	def getNumPages(self, docinfo):
190	"""get numpages"""
191	return self.template.fulltextclient.getNumPages(docinfo)
192
193	def getNumTextPages(self, docinfo):
194	"""get numpages text"""
195	return self.template.fulltextclient.getNumTextPages(docinfo)
196
197	def getTranslate(self, **args):
198	"""get translate"""
199	return self.template.fulltextclient.getTranslate(**args)
200
201	def getLemma(self, **args):
202	"""get lemma"""
203	return self.template.fulltextclient.getLemma(**args)
204
205	def getToc(self, **args):
206	"""get toc"""
207	return self.template.fulltextclient.getToc(**args)
208
209	def getTocPage(self, **args):
210	"""get tocpage"""
211	return self.template.fulltextclient.getTocPage(**args)
212
213
214	security.declareProtected('View','thumbs_rss')
215	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
216	'''
217	view it
218	@param mode: defines how to access the document behind url
219	@param url: url which contains display information
220	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
221
222	'''
223	logging.debug("HHHHHHHHHHHHHH:load the rss")
224	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
225
226	if not hasattr(self, 'template'):
227	# create template folder if it doesn't exist
228	self.manage_addFolder('template')
229
230	if not self.digilibBaseUrl:
231	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
232
233	docinfo = self.getDocinfo(mode=mode,url=url)
234	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
235	pt = getattr(self.template, 'thumbs_main_rss')
236
237	if viewMode=="auto": # automodus gewaehlt
238	if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
239	viewMode="text"
240	else:
241	viewMode="images"
242
243	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
244
245	security.declareProtected('View','index_html')
246	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization=""):
247	'''
248	view it
249	@param mode: defines how to access the document behind url
250	@param url: url which contains display information
251	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
252	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
253	@param characterNormalization type of text display (reg, norm, none)
254	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
255	'''
256
257	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
258
259	if not hasattr(self, 'template'):
260	# this won't work
261	logging.error("template folder missing!")
262	return "ERROR: template folder missing!"
263
264	if not getattr(self, 'digilibBaseUrl', None):
265	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
266
267	docinfo = self.getDocinfo(mode=mode,url=url)
268
269	if tocMode != "thumbs":
270	# get table of contents
271	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
272
273	if viewMode=="auto": # automodus gewaehlt
274	if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
275	viewMode="text_dict"
276	else:
277	viewMode="images"
278
279	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
280
281	pt = getattr(self.template, 'viewer_main')
282	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
283
284	def generateMarks(self,mk):
285	ret=""
286	if mk is None:
287	return ""
288	if not isinstance(mk, list):
289	mk=[mk]
290	for m in mk:
291	ret+="mk=%s"%m
292	return ret
293
294
295	def findDigilibUrl(self):
296	"""try to get the digilib URL from zogilib"""
297	url = self.template.zogilib.getDLBaseUrl()
298	return url
299
300	def getDocumentViewerURL(self):
301	"""returns the URL of this instance"""
302	return self.absolute_url()
303
304	def getStyle(self, idx, selected, style=""):
305	"""returns a string with the given style and append 'sel' if path == selected."""
306	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
307	if idx == selected:
308	return style + 'sel'
309	else:
310	return style
311
312	def getLink(self,param=None,val=None):
313	"""link to documentviewer with parameter param set to val"""
314	params=self.REQUEST.form.copy()
315	if param is not None:
316	if val is None:
317	if params.has_key(param):
318	del params[param]
319	else:
320	params[param] = str(val)
321
322	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
323	params["mode"] = "imagepath"
324	params["url"] = getParentDir(params["url"])
325
326	# quote values and assemble into query string
327	#ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
328	ps = urllib.urlencode(params)
329	url=self.REQUEST['URL1']+"?"+ps
330	return url
331
332	def getLinkAmp(self,param=None,val=None):
333	"""link to documentviewer with parameter param set to val"""
334	params=self.REQUEST.form.copy()
335	if param is not None:
336	if val is None:
337	if params.has_key(param):
338	del params[param]
339	else:
340	params[param] = str(val)
341
342	# quote values and assemble into query string
343	logging.debug("XYXXXXX: %s"%repr(params.items()))
344	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
345	url=self.REQUEST['URL1']+"?"+ps
346	return url
347
348	def getInfo_xml(self,url,mode):
349	"""returns info about the document as XML"""
350
351	if not self.digilibBaseUrl:
352	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
353
354	docinfo = self.getDocinfo(mode=mode,url=url)
355	pt = getattr(self.template, 'info_xml')
356	return pt(docinfo=docinfo)
357
358
359	def isAccessible(self, docinfo):
360	"""returns if access to the resource is granted"""
361	access = docinfo.get('accessType', None)
362	logging.debug("documentViewer (accessOK) access type %s"%access)
363	if access is not None and access == 'free':
364	logging.debug("documentViewer (accessOK) access is free")
365	return True
366	elif access is None or access in self.authgroups:
367	# only local access -- only logged in users
368	user = getSecurityManager().getUser()
369	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
370	if user is not None:
371	#print "user: ", user
372	return (user.getUserName() != "Anonymous User")
373	else:
374	return False
375
376	logging.error("documentViewer (accessOK) unknown access type %s"%access)
377	return False
378
379
380	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
381	"""gibt param von dlInfo aus"""
382	if docinfo is None:
383	docinfo = {}
384
385	for x in range(cut):
386
387	path=getParentDir(path)
388
389	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
390
391	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
392
393	txt = getHttpData(infoUrl)
394	if txt is None:
395	raise IOError("Unable to get dir-info from %s"%(infoUrl))
396
397	dom = Parse(txt)
398	sizes=dom.xpath("//dir/size")
399	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
400
401	if sizes:
402	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
403	else:
404	docinfo['numPages'] = 0
405
406	# TODO: produce and keep list of image names and numbers
407
408	return docinfo
409
410	def getIndexMetaPath(self,url):
411	"""gib nur den Pfad zurueck"""
412	regexp = re.compile(r".(experimental\|permanent)/(.)")
413	regpath = regexp.match(url)
414	if (regpath==None):
415	return ""
416	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
417	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
418
419
420
421	def getIndexMetaUrl(self,url):
422	"""returns utr of index.meta document at url"""
423
424	metaUrl = None
425	if url.startswith("http://"):
426	# real URL
427	metaUrl = url
428	else:
429	# online path
430	server=self.digilibBaseUrl+"/servlet/Texter?fn="
431	metaUrl=server+url.replace("/mpiwg/online","")
432	if not metaUrl.endswith("index.meta"):
433	metaUrl += "/index.meta"
434
435	return metaUrl
436
437	def getDomFromIndexMeta(self, url):
438	"""get dom from index meta"""
439	dom = None
440	metaUrl = self.getIndexMetaUrl(url)
441
442	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
443	txt=getHttpData(metaUrl)
444	if txt is None:
445	raise IOError("Unable to read index meta from %s"%(url))
446
447	dom = Parse(txt)
448	return dom
449
450	def getPresentationInfoXML(self, url):
451	"""returns dom of info.xml document at url"""
452	dom = None
453	metaUrl = None
454	if url.startswith("http://"):
455	# real URL
456	metaUrl = url
457	else:
458	# online path
459	server=self.digilibBaseUrl+"/servlet/Texter?fn="
460	metaUrl=server+url.replace("/mpiwg/online","")
461
462	txt=getHttpData(metaUrl)
463	if txt is None:
464	raise IOError("Unable to read infoXMLfrom %s"%(url))
465
466	dom = Parse(txt)
467	return dom
468
469
470	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
471	"""gets authorization info from the index.meta file at path or given by dom"""
472	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
473
474	access = None
475
476	if docinfo is None:
477	docinfo = {}
478
479	if dom is None:
480	for x in range(cut):
481	path=getParentDir(path)
482	dom = self.getDomFromIndexMeta(path)
483
484	acctype = dom.xpath("//access-conditions/access/@type")
485	if acctype and (len(acctype)>0):
486	access=acctype[0].value
487	if access in ['group', 'institution']:
488	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
489
490	docinfo['accessType'] = access
491	return docinfo
492
493
494	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
495	"""gets bibliographical info from the index.meta file at path or given by dom"""
496	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
497
498	if docinfo is None:
499	docinfo = {}
500
501	if dom is None:
502	for x in range(cut):
503	path=getParentDir(path)
504	dom = self.getDomFromIndexMeta(path)
505
506	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
507
508	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
509	# put in all raw bib fields as dict "bib"
510	bib = dom.xpath("//bib/*")
511	if bib and len(bib)>0:
512	bibinfo = {}
513	for e in bib:
514	bibinfo[e.localName] = getTextFromNode(e)
515	docinfo['bib'] = bibinfo
516
517	# extract some fields (author, title, year) according to their mapping
518	metaData=self.metadata.main.meta.bib
519	bibtype=dom.xpath("//bib/@type")
520	if bibtype and (len(bibtype)>0):
521	bibtype=bibtype[0].value
522	else:
523	bibtype="generic"
524
525	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
526	docinfo['bib_type'] = bibtype
527	bibmap=metaData.generateMappingForType(bibtype)
528	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
529	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
530	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
531	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
532	try:
533	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
534	except: pass
535	try:
536	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
537	except: pass
538	try:
539	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
540	except: pass
541	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
542	try:
543	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
544	except:
545	docinfo['lang']=''
546
547	return docinfo
548
549
550	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
551	"""gets name info from the index.meta file at path or given by dom"""
552	if docinfo is None:
553	docinfo = {}
554
555	if dom is None:
556	for x in range(cut):
557	path=getParentDir(path)
558	dom = self.getDomFromIndexMeta(path)
559
560	#docinfo['indexMetaPath']=self.getIndexMetaPath(path);
561
562	#result= dom.xpath("//result/resultPage")
563	#docinfo['numPages']=int(getTextFromNode(result[0]))
564
565	#result =dom.xpath("//name")
566	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
567	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
568
569	#logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
570	return docinfo
571
572	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
573	"""parse texttool tag in index meta"""
574	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
575	if docinfo is None:
576	docinfo = {}
577	if docinfo.get('lang', None) is None:
578	docinfo['lang'] = '' # default keine Sprache gesetzt
579	if dom is None:
580	dom = self.getDomFromIndexMeta(url)
581
582	archivePath = None
583	archiveName = None
584
585	archiveNames = dom.xpath("//resource/name")
586	if archiveNames and (len(archiveNames) > 0):
587	archiveName = getTextFromNode(archiveNames[0])
588	else:
589	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
590
591	archivePaths = dom.xpath("//resource/archive-path")
592	if archivePaths and (len(archivePaths) > 0):
593	archivePath = getTextFromNode(archivePaths[0])
594	# clean up archive path
595	if archivePath[0] != '/':
596	archivePath = '/' + archivePath
597	if archiveName and (not archivePath.endswith(archiveName)):
598	archivePath += "/" + archiveName
599	else:
600	# try to get archive-path from url
601	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
602	if (not url.startswith('http')):
603	archivePath = url.replace('index.meta', '')
604
605	if archivePath is None:
606	# we balk without archive-path
607	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
608
609	imageDirs = dom.xpath("//texttool/image")
610	if imageDirs and (len(imageDirs) > 0):
611	imageDir = getTextFromNode(imageDirs[0])
612
613	else:
614	# we balk with no image tag / not necessary anymore because textmode is now standard
615	#raise IOError("No text-tool info in %s"%(url))
616	imageDir = ""
617	#xquery="//pb"
618	docinfo['imagePath'] = "" # keine Bilder
619	docinfo['imageURL'] = ""
620
621	if imageDir and archivePath:
622	#print "image: ", imageDir, " archivepath: ", archivePath
623	imageDir = os.path.join(archivePath, imageDir)
624	imageDir = imageDir.replace("/mpiwg/online", '')
625	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
626	docinfo['imagePath'] = imageDir
627
628	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
629
630	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
631	if viewerUrls and (len(viewerUrls) > 0):
632	viewerUrl = getTextFromNode(viewerUrls[0])
633	docinfo['viewerURL'] = viewerUrl
634
635	# old style text URL
636	textUrls = dom.xpath("//texttool/text")
637	if textUrls and (len(textUrls) > 0):
638	textUrl = getTextFromNode(textUrls[0])
639	if urlparse.urlparse(textUrl)[0] == "": #keine url
640	textUrl = os.path.join(archivePath, textUrl)
641	# fix URLs starting with /mpiwg/online
642	if textUrl.startswith("/mpiwg/online"):
643	textUrl = textUrl.replace("/mpiwg/online", '', 1)
644
645	docinfo['textURL'] = textUrl
646
647	# new style text-url-path
648	textUrls = dom.xpath("//texttool/text-url-path")
649	if textUrls and (len(textUrls) > 0):
650	textUrl = getTextFromNode(textUrls[0])
651	docinfo['textURLPath'] = textUrl
652	if not docinfo['imagePath']:
653	# text-only, no page images
654	docinfo = self.getNumTextPages(docinfo)
655
656	presentationUrls = dom.xpath("//texttool/presentation")
657	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
658	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
659
660	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
661	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
662	# durch den relativen Pfad auf die presentation infos
663	presentationPath = getTextFromNode(presentationUrls[0])
664	if url.endswith("index.meta"):
665	presentationUrl = url.replace('index.meta', presentationPath)
666	else:
667	presentationUrl = url + "/" + presentationPath
668
669	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
670
671	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
672
673	return docinfo
674
675
676	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
677	"""gets the bibliographical information from the preseantion entry in texttools
678	"""
679	dom=self.getPresentationInfoXML(url)
680	try:
681	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
682	except:
683	pass
684	try:
685	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
686	except:
687	pass
688	try:
689	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
690	except:
691	pass
692	return docinfo
693
694	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
695	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
696	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
697	if docinfo is None:
698	docinfo = {}
699	path=path.replace("/mpiwg/online","")
700	docinfo['imagePath'] = path
701	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
702
703	pathorig=path
704	for x in range(cut):
705	path=getParentDir(path)
706	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
707	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
708	docinfo['imageURL'] = imageUrl
709
710	#path ist the path to the images it assumes that the index.meta file is one level higher.
711	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
712	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
713	return docinfo
714
715
716	def getDocinfo(self, mode, url):
717	"""returns docinfo depending on mode"""
718	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
719	# look for cached docinfo in session
720	if self.REQUEST.SESSION.has_key('docinfo'):
721	docinfo = self.REQUEST.SESSION['docinfo']
722	# check if its still current
723	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
724	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
725	return docinfo
726	# new docinfo
727	docinfo = {'mode': mode, 'url': url}
728	if mode=="texttool": #index.meta with texttool information
729	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
730	elif mode=="imagepath":
731	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
732	elif mode=="filepath":
733	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
734	else:
735	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
736	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
737
738	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
739	self.REQUEST.SESSION['docinfo'] = docinfo
740	return docinfo
741
742	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization=""):
743	"""returns pageinfo with the given parameters"""
744	pageinfo = {}
745	current = getInt(current)
746	pageinfo['current'] = current
747	rows = int(rows or self.thumbrows)
748	pageinfo['rows'] = rows
749	cols = int(cols or self.thumbcols)
750	pageinfo['cols'] = cols
751	grpsize = cols * rows
752	pageinfo['groupsize'] = grpsize
753	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
754	# int(current / grpsize) * grpsize +1))
755	pageinfo['start'] = start
756	pageinfo['end'] = start + grpsize
757	if (docinfo is not None) and ('numPages' in docinfo):
758	np = int(docinfo['numPages'])
759	pageinfo['end'] = min(pageinfo['end'], np)
760	pageinfo['numgroups'] = int(np / grpsize)
761	if np % grpsize > 0:
762	pageinfo['numgroups'] += 1
763	pageinfo['viewMode'] = viewMode
764	pageinfo['tocMode'] = tocMode
765	#pageinfo['characterNormalization'] =characterNormalization
766	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization',' ')
767	pageinfo['query'] = self.REQUEST.get('query',' ')
768	pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
769	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
770	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
771	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
772	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
773	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
774	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
775	toc = int (pageinfo['tocPN'])
776	pageinfo['textPages'] =int (toc)
777
778	if 'tocSize_%s'%tocMode in docinfo:
779	tocSize = int(docinfo['tocSize_%s'%tocMode])
780	tocPageSize = int(pageinfo['tocPageSize'])
781	# cached toc
782	if tocSize%tocPageSize>0:
783	tocPages=tocSize/tocPageSize+1
784	else:
785	tocPages=tocSize/tocPageSize
786	pageinfo['tocPN'] = min (tocPages,toc)
787	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
788	pageinfo['sn'] =self.REQUEST.get('sn','')
789	return pageinfo
790
791	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
792	"""init document viewer"""
793	self.title=title
794	self.digilibBaseUrl = digilibBaseUrl
795	self.thumbrows = thumbrows
796	self.thumbcols = thumbcols
797	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
798	if RESPONSE is not None:
799	RESPONSE.redirect('manage_main')
800
801	def manage_AddDocumentViewerForm(self):
802	"""add the viewer form"""
803	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
804	return pt()
805
806	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
807	"""add the viewer"""
808	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
809	self._setObject(id,newObj)
810
811	if RESPONSE is not None:
812	RESPONSE.redirect('manage_main')
813
814	## DocumentViewerTemplate class
815	class DocumentViewerTemplate(ZopePageTemplate):
816	"""Template for document viewer"""
817	meta_type="DocumentViewer Template"
818
819
820	def manage_addDocumentViewerTemplateForm(self):
821	"""Form for adding"""
822	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
823	return pt()
824
825	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
826	REQUEST=None, submit=None):
827	"Add a Page Template with optional file content."
828
829	self._setObject(id, DocumentViewerTemplate(id))
830	ob = getattr(self, id)
831	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
832	logging.info("txt %s:"%txt)
833	ob.pt_edit(txt,"text/html")
834	if title:
835	ob.pt_setTitle(title)
836	try:
837	u = self.DestinationURL()
838	except AttributeError:
839	u = REQUEST['URL1']
840
841	u = "%s/%s" % (u, urllib.quote(id))
842	REQUEST.RESPONSE.redirect(u+'/manage_main')
843	return ''
844
845
846

Note: See TracBrowser for help on using the repository browser.

Download in other formats: