Context Navigation

source: documentViewer/documentViewer.py @ 327:1c4e63d22283

Last change on this file since 327:1c4e63d22283 was 327:1c4e63d22283, checked in by abukhman, 14 years ago
* empty log message *
File size: 33.6 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding='utf-8'):
47	"""returns a string containing node as XML"""
48	buf = cStringIO.StringIO()
49	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
50	s = buf.getvalue()
51	buf.close()
52	return s
53
54	def getBrowserType(self):
55	"""get browser type object"""
56	if self.REQUEST.SESSION.has_key('browserType'):
57	return self.REQUEST.SESSION['browserType']
58	else:
59	bt = browserCheck(self)
60	self.REQUEST.SESSION.set('browserType', bt)
61	logging.debug("documentViewer (BROWSER TYPE) bt %s"%bt)
62	return bt
63
64
65	def getParentDir(path):
66	"""returns pathname shortened by one"""
67	return '/'.join(path.split('/')[0:-1])
68
69
70	def getHttpData(url, data=None, num_tries=3, timeout=10):
71	"""returns result from url+data HTTP request"""
72	# we do GET (by appending data to url)
73	if isinstance(data, str) or isinstance(data, unicode):
74	# if data is string then append
75	url = "%s?%s"%(url,data)
76	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
77	# urlencode
78	url = "%s?%s"%(url,urllib.urlencode(data))
79
80	response = None
81	errmsg = None
82	for cnt in range(num_tries):
83	try:
84	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
85	if sys.version_info < (2, 6):
86	# set timeout on socket -- ugly :-(
87	import socket
88	socket.setdefaulttimeout(float(timeout))
89	response = urllib2.urlopen(url)
90	else:
91	response = urllib2.urlopen(url,timeout=float(timeout))
92	# check result?
93	break
94	except urllib2.HTTPError, e:
95	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
96	errmsg = str(e)
97	# stop trying
98	break
99	except urllib2.URLError, e:
100	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
101	errmsg = str(e)
102	# stop trying
103	#break
104
105	if response is not None:
106	data = response.read()
107	response.close()
108	return data
109
110	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
111	#return None
112
113
114
115	##
116	## documentViewer class
117	##
118	class documentViewer(Folder):
119	"""document viewer"""
120	meta_type="Document viewer"
121
122	security=ClassSecurityInfo()
123	manage_options=Folder.manage_options+(
124	{'label':'main config','action':'changeDocumentViewerForm'},
125	)
126
127	# templates and forms
128	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
129	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
130	toc_text = PageTemplateFile('zpt/toc_text', globals())
131	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
132	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
133	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
134	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
135	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
136	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
137	head_main = PageTemplateFile('zpt/head_main', globals())
138	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
139	info_xml = PageTemplateFile('zpt/info_xml', globals())
140
141
142	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
143	security.declareProtected('View management screens','changeDocumentViewerForm')
144	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
145
146
147	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
148	"""init document viewer"""
149	self.id=id
150	self.title=title
151	self.thumbcols = thumbcols
152	self.thumbrows = thumbrows
153	# authgroups is list of authorized groups (delimited by ,)
154	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
155	# create template folder so we can always use template.something
156
157	templateFolder = Folder('template')
158	#self['template'] = templateFolder # Zope-2.12 style
159	self._setObject('template',templateFolder) # old style
160	try:
161	import MpdlXmlTextServer
162	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
163	#templateFolder['fulltextclient'] = xmlRpcClient
164	templateFolder._setObject('fulltextclient',textServer)
165	except Exception, e:
166	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
167	try:
168	from Products.zogiLib.zogiLib import zogiLib
169	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
170	#templateFolder['zogilib'] = zogilib
171	templateFolder._setObject('zogilib',zogilib)
172	except Exception, e:
173	logging.error("Unable to create zogiLib for zogilib: "+str(e))
174
175
176	# proxy text server methods to fulltextclient
177	def getTextPage(self, **args):
178	"""get page"""
179	return self.template.fulltextclient.getTextPage(**args)
180
181	def getQuery(self, **args):
182	"""get query"""
183	return self.template.fulltextclient.getQuery(**args)
184
185	def getSearch(self, **args):
186	"""get search"""
187	return self.template.fulltextclient.getSearch(**args)
188
189	def getGisPlaces(self, **args):
190	"""get gis places"""
191	return self.template.fulltextclient.getGisPlaces(**args)
192
193	def getAllGisPlaces(self, **args):
194	"""get all gis places """
195	return self.template.fulltextclient.getAllGisPlaces(**args)
196
197	def getOrigPages(self, **args):
198	"""get original page number """
199	return self.template.fulltextclient.getOrigPages(**args)
200
201	def getNumPages(self, docinfo):
202	"""get numpages"""
203	return self.template.fulltextclient.getNumPages(docinfo)
204
205	def getNumTextPages(self, docinfo):
206	"""get numpages text"""
207	return self.template.fulltextclient.getNumTextPages(docinfo)
208
209	def getTranslate(self, **args):
210	"""get translate"""
211	return self.template.fulltextclient.getTranslate(**args)
212
213	def getLemma(self, **args):
214	"""get lemma"""
215	return self.template.fulltextclient.getLemma(**args)
216
217	def getToc(self, **args):
218	"""get toc"""
219	return self.template.fulltextclient.getToc(**args)
220
221	def getTocPage(self, **args):
222	"""get tocpage"""
223	return self.template.fulltextclient.getTocPage(**args)
224
225
226	security.declareProtected('View','thumbs_rss')
227	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
228	'''
229	view it
230	@param mode: defines how to access the document behind url
231	@param url: url which contains display information
232	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
233
234	'''
235	logging.debug("HHHHHHHHHHHHHH:load the rss")
236	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
237
238	if not hasattr(self, 'template'):
239	# create template folder if it doesn't exist
240	self.manage_addFolder('template')
241
242	if not self.digilibBaseUrl:
243	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
244
245	docinfo = self.getDocinfo(mode=mode,url=url)
246	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
247	pt = getattr(self.template, 'thumbs_main_rss')
248
249	if viewMode=="auto": # automodus gewaehlt
250	if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
251	viewMode="text"
252	else:
253	viewMode="images"
254
255	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
256
257	security.declareProtected('View','index_html')
258	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization=""):
259	'''
260	view it
261	@param mode: defines how to access the document behind url
262	@param url: url which contains display information
263	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
264	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
265	@param characterNormalization type of text display (reg, norm, none)
266	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
267	'''
268
269	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
270
271	if not hasattr(self, 'template'):
272	# this won't work
273	logging.error("template folder missing!")
274	return "ERROR: template folder missing!"
275
276	if not getattr(self, 'digilibBaseUrl', None):
277	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
278
279	docinfo = self.getDocinfo(mode=mode,url=url)
280
281	if tocMode != "thumbs":
282	# get table of contents
283	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
284
285	if viewMode=="auto": # automodus gewaehlt
286	if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
287	viewMode="text_dict"
288	else:
289	viewMode="images"
290
291	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
292
293	pt = getattr(self.template, 'viewer_main')
294	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
295
296	def generateMarks(self,mk):
297	ret=""
298	if mk is None:
299	return ""
300	if not isinstance(mk, list):
301	mk=[mk]
302	for m in mk:
303	ret+="mk=%s"%m
304	return ret
305
306
307	def findDigilibUrl(self):
308	"""try to get the digilib URL from zogilib"""
309	url = self.template.zogilib.getDLBaseUrl()
310	return url
311
312	def getDocumentViewerURL(self):
313	"""returns the URL of this instance"""
314	return self.absolute_url()
315
316	def getStyle(self, idx, selected, style=""):
317	"""returns a string with the given style and append 'sel' if path == selected."""
318	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
319	if idx == selected:
320	return style + 'sel'
321	else:
322	return style
323
324	def getLink(self,param=None,val=None):
325	"""link to documentviewer with parameter param set to val"""
326	params=self.REQUEST.form.copy()
327	if param is not None:
328	if val is None:
329	if params.has_key(param):
330	del params[param]
331	else:
332	params[param] = str(val)
333
334	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
335	params["mode"] = "imagepath"
336	params["url"] = getParentDir(params["url"])
337
338	# quote values and assemble into query string
339	#ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
340	ps = urllib.urlencode(params)
341	url=self.REQUEST['URL1']+"?"+ps
342	return url
343
344	def getLinkAmp(self,param=None,val=None):
345	"""link to documentviewer with parameter param set to val"""
346	params=self.REQUEST.form.copy()
347	if param is not None:
348	if val is None:
349	if params.has_key(param):
350	del params[param]
351	else:
352	params[param] = str(val)
353
354	# quote values and assemble into query string
355	logging.debug("XYXXXXX: %s"%repr(params.items()))
356	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
357	url=self.REQUEST['URL1']+"?"+ps
358	return url
359
360	def getInfo_xml(self,url,mode):
361	"""returns info about the document as XML"""
362
363	if not self.digilibBaseUrl:
364	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
365
366	docinfo = self.getDocinfo(mode=mode,url=url)
367	pt = getattr(self.template, 'info_xml')
368	return pt(docinfo=docinfo)
369
370
371	def isAccessible(self, docinfo):
372	"""returns if access to the resource is granted"""
373	access = docinfo.get('accessType', None)
374	logging.debug("documentViewer (accessOK) access type %s"%access)
375	if access is not None and access == 'free':
376	logging.debug("documentViewer (accessOK) access is free")
377	return True
378	elif access is None or access in self.authgroups:
379	# only local access -- only logged in users
380	user = getSecurityManager().getUser()
381	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
382	if user is not None:
383	#print "user: ", user
384	return (user.getUserName() != "Anonymous User")
385	else:
386	return False
387
388	logging.error("documentViewer (accessOK) unknown access type %s"%access)
389	return False
390
391
392	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
393	"""gibt param von dlInfo aus"""
394	if docinfo is None:
395	docinfo = {}
396
397	for x in range(cut):
398
399	path=getParentDir(path)
400
401	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
402
403	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
404
405	txt = getHttpData(infoUrl)
406	if txt is None:
407	raise IOError("Unable to get dir-info from %s"%(infoUrl))
408
409	dom = Parse(txt)
410	sizes=dom.xpath("//dir/size")
411	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
412
413	if sizes:
414	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
415	else:
416	docinfo['numPages'] = 0
417
418	# TODO: produce and keep list of image names and numbers
419
420	return docinfo
421
422	def getIndexMetaPath(self,url):
423	"""gib nur den Pfad zurueck"""
424	regexp = re.compile(r".(experimental\|permanent)/(.)")
425	regpath = regexp.match(url)
426	if (regpath==None):
427	return ""
428	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
429	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
430
431
432
433	def getIndexMetaUrl(self,url):
434	"""returns utr of index.meta document at url"""
435
436	metaUrl = None
437	if url.startswith("http://"):
438	# real URL
439	metaUrl = url
440	else:
441	# online path
442	server=self.digilibBaseUrl+"/servlet/Texter?fn="
443	metaUrl=server+url.replace("/mpiwg/online","")
444	if not metaUrl.endswith("index.meta"):
445	metaUrl += "/index.meta"
446
447	return metaUrl
448
449	def getDomFromIndexMeta(self, url):
450	"""get dom from index meta"""
451	dom = None
452	metaUrl = self.getIndexMetaUrl(url)
453
454	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
455	txt=getHttpData(metaUrl)
456	if txt is None:
457	raise IOError("Unable to read index meta from %s"%(url))
458
459	dom = Parse(txt)
460	return dom
461
462	def getPresentationInfoXML(self, url):
463	"""returns dom of info.xml document at url"""
464	dom = None
465	metaUrl = None
466	if url.startswith("http://"):
467	# real URL
468	metaUrl = url
469	else:
470	# online path
471	server=self.digilibBaseUrl+"/servlet/Texter?fn="
472	metaUrl=server+url.replace("/mpiwg/online","")
473
474	txt=getHttpData(metaUrl)
475	if txt is None:
476	raise IOError("Unable to read infoXMLfrom %s"%(url))
477
478	dom = Parse(txt)
479	return dom
480
481
482	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
483	"""gets authorization info from the index.meta file at path or given by dom"""
484	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
485
486	access = None
487
488	if docinfo is None:
489	docinfo = {}
490
491	if dom is None:
492	for x in range(cut):
493	path=getParentDir(path)
494	dom = self.getDomFromIndexMeta(path)
495
496	acctype = dom.xpath("//access-conditions/access/@type")
497	if acctype and (len(acctype)>0):
498	access=acctype[0].value
499	if access in ['group', 'institution']:
500	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
501
502	docinfo['accessType'] = access
503	return docinfo
504
505
506	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
507	"""gets bibliographical info from the index.meta file at path or given by dom"""
508	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
509
510	if docinfo is None:
511	docinfo = {}
512
513	if dom is None:
514	for x in range(cut):
515	path=getParentDir(path)
516	dom = self.getDomFromIndexMeta(path)
517
518	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
519
520	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
521	# put in all raw bib fields as dict "bib"
522	bib = dom.xpath("//bib/*")
523	if bib and len(bib)>0:
524	bibinfo = {}
525	for e in bib:
526	bibinfo[e.localName] = getTextFromNode(e)
527	docinfo['bib'] = bibinfo
528
529	# extract some fields (author, title, year) according to their mapping
530	metaData=self.metadata.main.meta.bib
531	bibtype=dom.xpath("//bib/@type")
532	if bibtype and (len(bibtype)>0):
533	bibtype=bibtype[0].value
534	else:
535	bibtype="generic"
536
537	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
538	docinfo['bib_type'] = bibtype
539	bibmap=metaData.generateMappingForType(bibtype)
540	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
541	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
542	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
543	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
544	try:
545	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
546	except: pass
547	try:
548	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
549	except: pass
550	try:
551	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
552	except: pass
553	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
554	try:
555	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
556	except:
557	docinfo['lang']=''
558
559	return docinfo
560
561
562	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
563	"""gets name info from the index.meta file at path or given by dom"""
564	if docinfo is None:
565	docinfo = {}
566
567	if dom is None:
568	for x in range(cut):
569	path=getParentDir(path)
570	dom = self.getDomFromIndexMeta(path)
571
572	#docinfo['indexMetaPath']=self.getIndexMetaPath(path);
573
574	#result= dom.xpath("//result/resultPage")
575	#docinfo['numPages']=int(getTextFromNode(result[0]))
576
577	#result =dom.xpath("//name")
578	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
579	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
580
581	#logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
582	return docinfo
583
584	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
585	"""parse texttool tag in index meta"""
586	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
587	if docinfo is None:
588	docinfo = {}
589	if docinfo.get('lang', None) is None:
590	docinfo['lang'] = '' # default keine Sprache gesetzt
591	if dom is None:
592	dom = self.getDomFromIndexMeta(url)
593
594	archivePath = None
595	archiveName = None
596
597	archiveNames = dom.xpath("//resource/name")
598	if archiveNames and (len(archiveNames) > 0):
599	archiveName = getTextFromNode(archiveNames[0])
600	else:
601	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
602
603	archivePaths = dom.xpath("//resource/archive-path")
604	if archivePaths and (len(archivePaths) > 0):
605	archivePath = getTextFromNode(archivePaths[0])
606	# clean up archive path
607	if archivePath[0] != '/':
608	archivePath = '/' + archivePath
609	if archiveName and (not archivePath.endswith(archiveName)):
610	archivePath += "/" + archiveName
611	else:
612	# try to get archive-path from url
613	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
614	if (not url.startswith('http')):
615	archivePath = url.replace('index.meta', '')
616
617	if archivePath is None:
618	# we balk without archive-path
619	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
620
621	imageDirs = dom.xpath("//texttool/image")
622	if imageDirs and (len(imageDirs) > 0):
623	imageDir = getTextFromNode(imageDirs[0])
624
625	else:
626	# we balk with no image tag / not necessary anymore because textmode is now standard
627	#raise IOError("No text-tool info in %s"%(url))
628	imageDir = ""
629	#xquery="//pb"
630	docinfo['imagePath'] = "" # keine Bilder
631	docinfo['imageURL'] = ""
632
633	if imageDir and archivePath:
634	#print "image: ", imageDir, " archivepath: ", archivePath
635	imageDir = os.path.join(archivePath, imageDir)
636	imageDir = imageDir.replace("/mpiwg/online", '')
637	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
638	docinfo['imagePath'] = imageDir
639
640	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
641
642	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
643	if viewerUrls and (len(viewerUrls) > 0):
644	viewerUrl = getTextFromNode(viewerUrls[0])
645	docinfo['viewerURL'] = viewerUrl
646
647	# old style text URL
648	textUrls = dom.xpath("//texttool/text")
649	if textUrls and (len(textUrls) > 0):
650	textUrl = getTextFromNode(textUrls[0])
651	if urlparse.urlparse(textUrl)[0] == "": #keine url
652	textUrl = os.path.join(archivePath, textUrl)
653	# fix URLs starting with /mpiwg/online
654	if textUrl.startswith("/mpiwg/online"):
655	textUrl = textUrl.replace("/mpiwg/online", '', 1)
656
657	docinfo['textURL'] = textUrl
658
659	# new style text-url-path
660	textUrls = dom.xpath("//texttool/text-url-path")
661	if textUrls and (len(textUrls) > 0):
662	textUrl = getTextFromNode(textUrls[0])
663	docinfo['textURLPath'] = textUrl
664	if not docinfo['imagePath']:
665	# text-only, no page images
666	docinfo = self.getNumTextPages(docinfo)
667
668	presentationUrls = dom.xpath("//texttool/presentation")
669	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
670	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
671
672	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
673	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
674	# durch den relativen Pfad auf die presentation infos
675	presentationPath = getTextFromNode(presentationUrls[0])
676	if url.endswith("index.meta"):
677	presentationUrl = url.replace('index.meta', presentationPath)
678	else:
679	presentationUrl = url + "/" + presentationPath
680
681	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
682
683	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
684
685	return docinfo
686
687
688	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
689	"""gets the bibliographical information from the preseantion entry in texttools
690	"""
691	dom=self.getPresentationInfoXML(url)
692	try:
693	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
694	except:
695	pass
696	try:
697	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
698	except:
699	pass
700	try:
701	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
702	except:
703	pass
704	return docinfo
705
706	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
707	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
708	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
709	if docinfo is None:
710	docinfo = {}
711	path=path.replace("/mpiwg/online","")
712	docinfo['imagePath'] = path
713	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
714
715	pathorig=path
716	for x in range(cut):
717	path=getParentDir(path)
718	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
719	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
720	docinfo['imageURL'] = imageUrl
721
722	#path ist the path to the images it assumes that the index.meta file is one level higher.
723	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
724	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
725	return docinfo
726
727
728	def getDocinfo(self, mode, url):
729	"""returns docinfo depending on mode"""
730	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
731	# look for cached docinfo in session
732	if self.REQUEST.SESSION.has_key('docinfo'):
733	docinfo = self.REQUEST.SESSION['docinfo']
734	# check if its still current
735	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
736	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
737	return docinfo
738	# new docinfo
739	docinfo = {'mode': mode, 'url': url}
740	if mode=="texttool": #index.meta with texttool information
741	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
742	elif mode=="imagepath":
743	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
744	elif mode=="filepath":
745	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
746	else:
747	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
748	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
749
750	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
751	self.REQUEST.SESSION['docinfo'] = docinfo
752	return docinfo
753
754	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization=""):
755	"""returns pageinfo with the given parameters"""
756	pageinfo = {}
757	current = getInt(current)
758	#pageinfo ['originalPage'] = originalPage
759	pageinfo['current'] = current
760	rows = int(rows or self.thumbrows)
761	pageinfo['rows'] = rows
762	cols = int(cols or self.thumbcols)
763	pageinfo['cols'] = cols
764	grpsize = cols * rows
765	pageinfo['groupsize'] = grpsize
766	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
767	# int(current / grpsize) * grpsize +1))
768	pageinfo['start'] = start
769	pageinfo['end'] = start + grpsize
770	if (docinfo is not None) and ('numPages' in docinfo):
771	np = int(docinfo['numPages'])
772	pageinfo['end'] = min(pageinfo['end'], np)
773	pageinfo['numgroups'] = int(np / grpsize)
774	if np % grpsize > 0:
775	pageinfo['numgroups'] += 1
776	pageinfo['viewMode'] = viewMode
777	pageinfo['tocMode'] = tocMode
778	#pageinfo['characterNormalization'] =characterNormalization
779	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization',' ')
780	pageinfo['query'] = self.REQUEST.get('query',' ')
781	pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
782	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
783	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
784	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
785	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
786	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
787	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
788	toc = int (pageinfo['tocPN'])
789	pageinfo['textPages'] =int (toc)
790
791
792
793	if 'tocSize_%s'%tocMode in docinfo:
794	tocSize = int(docinfo['tocSize_%s'%tocMode])
795	tocPageSize = int(pageinfo['tocPageSize'])
796	# cached toc
797	if tocSize%tocPageSize>0:
798	tocPages=tocSize/tocPageSize+1
799	else:
800	tocPages=tocSize/tocPageSize
801	pageinfo['tocPN'] = min (tocPages,toc)
802	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
803	pageinfo['sn'] =self.REQUEST.get('sn','')
804	return pageinfo
805
806	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
807	"""init document viewer"""
808	self.title=title
809	self.digilibBaseUrl = digilibBaseUrl
810	self.thumbrows = thumbrows
811	self.thumbcols = thumbcols
812	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
813	if RESPONSE is not None:
814	RESPONSE.redirect('manage_main')
815
816	def manage_AddDocumentViewerForm(self):
817	"""add the viewer form"""
818	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
819	return pt()
820
821	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
822	"""add the viewer"""
823	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
824	self._setObject(id,newObj)
825
826	if RESPONSE is not None:
827	RESPONSE.redirect('manage_main')
828
829	## DocumentViewerTemplate class
830	class DocumentViewerTemplate(ZopePageTemplate):
831	"""Template for document viewer"""
832	meta_type="DocumentViewer Template"
833
834
835	def manage_addDocumentViewerTemplateForm(self):
836	"""Form for adding"""
837	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
838	return pt()
839
840	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
841	REQUEST=None, submit=None):
842	"Add a Page Template with optional file content."
843
844	self._setObject(id, DocumentViewerTemplate(id))
845	ob = getattr(self, id)
846	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
847	logging.info("txt %s:"%txt)
848	ob.pt_edit(txt,"text/html")
849	if title:
850	ob.pt_setTitle(title)
851	try:
852	u = self.DestinationURL()
853	except AttributeError:
854	u = REQUEST['URL1']
855
856	u = "%s/%s" % (u, urllib.quote(id))
857	REQUEST.RESPONSE.redirect(u+'/manage_main')
858	return ''
859
860
861

Note: See TracBrowser for help on using the repository browser.

Download in other formats: