Context Navigation

source: documentViewer/documentViewer.py @ 307:ec5e920a61e6

Last change on this file since 307:ec5e920a61e6 was 307:ec5e920a61e6, checked in by abukhman, 14 years ago
* empty log message *
File size: 33.4 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding='utf-8'):
47	"""returns a string containing node as XML"""
48	buf = cStringIO.StringIO()
49	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
50	s = buf.getvalue()
51	buf.close()
52	return s
53
54	def getBrowserType(self):
55	"""get browser type object"""
56	if self.REQUEST.SESSION.has_key('browserType'):
57	return self.REQUEST.SESSION['browserType']
58	else:
59	bt = browserCheck(self)
60	self.REQUEST.SESSION.set('browserType', bt)
61	logging.debug("documentViewer (BROWSER TYPE) bt %s"%bt)
62	return bt
63
64
65	def getParentDir(path):
66	"""returns pathname shortened by one"""
67	return '/'.join(path.split('/')[0:-1])
68
69
70	def getHttpData(url, data=None, num_tries=3, timeout=10):
71	"""returns result from url+data HTTP request"""
72	# we do GET (by appending data to url)
73	if isinstance(data, str) or isinstance(data, unicode):
74	# if data is string then append
75	url = "%s?%s"%(url,data)
76	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
77	# urlencode
78	url = "%s?%s"%(url,urllib.urlencode(data))
79
80	response = None
81	errmsg = None
82	for cnt in range(num_tries):
83	try:
84	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
85	if sys.version_info < (2, 6):
86	# set timeout on socket -- ugly :-(
87	import socket
88	socket.setdefaulttimeout(float(timeout))
89	response = urllib2.urlopen(url)
90	else:
91	response = urllib2.urlopen(url,timeout=float(timeout))
92	# check result?
93	break
94	except urllib2.HTTPError, e:
95	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
96	errmsg = str(e)
97	# stop trying
98	break
99	except urllib2.URLError, e:
100	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
101	errmsg = str(e)
102	# stop trying
103	#break
104
105	if response is not None:
106	data = response.read()
107	response.close()
108	return data
109
110	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
111	#return None
112
113
114
115	##
116	## documentViewer class
117	##
118	class documentViewer(Folder):
119	"""document viewer"""
120	meta_type="Document viewer"
121
122	security=ClassSecurityInfo()
123	manage_options=Folder.manage_options+(
124	{'label':'main config','action':'changeDocumentViewerForm'},
125	)
126
127	# templates and forms
128	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
129	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
130	toc_text = PageTemplateFile('zpt/toc_text', globals())
131	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
132	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
133	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
134	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
135	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
136	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
137	head_main = PageTemplateFile('zpt/head_main', globals())
138	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
139	info_xml = PageTemplateFile('zpt/info_xml', globals())
140
141
142	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
143	security.declareProtected('View management screens','changeDocumentViewerForm')
144	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
145
146
147	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
148	"""init document viewer"""
149	self.id=id
150	self.title=title
151	self.thumbcols = thumbcols
152	self.thumbrows = thumbrows
153	# authgroups is list of authorized groups (delimited by ,)
154	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
155	# create template folder so we can always use template.something
156
157	templateFolder = Folder('template')
158	#self['template'] = templateFolder # Zope-2.12 style
159	self._setObject('template',templateFolder) # old style
160	try:
161	import MpdlXmlTextServer
162	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
163	#templateFolder['fulltextclient'] = xmlRpcClient
164	templateFolder._setObject('fulltextclient',textServer)
165	except Exception, e:
166	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
167	try:
168	from Products.zogiLib.zogiLib import zogiLib
169	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
170	#templateFolder['zogilib'] = zogilib
171	templateFolder._setObject('zogilib',zogilib)
172	except Exception, e:
173	logging.error("Unable to create zogiLib for zogilib: "+str(e))
174
175
176	# proxy text server methods to fulltextclient
177	def getTextPage(self, **args):
178	"""get page"""
179	return self.template.fulltextclient.getTextPage(**args)
180
181	def getQuery(self, **args):
182	"""get query"""
183	return self.template.fulltextclient.getQuery(**args)
184
185	def getSearch(self, **args):
186	"""get search"""
187	return self.template.fulltextclient.getSearch(**args)
188
189	def getGisPlaces(self, **args):
190	"""get gis places"""
191	return self.template.fulltextclient.getGisPlaces(**args)
192
193	def getAllGisPlaces(self, **args):
194	"""get all gis places"""
195	return self.template.fulltextclient.getGisPlaces(**args)
196
197	def getNumPages(self, docinfo):
198	"""get numpages"""
199	return self.template.fulltextclient.getNumPages(docinfo)
200
201	def getNumTextPages(self, docinfo):
202	"""get numpages text"""
203	return self.template.fulltextclient.getNumTextPages(docinfo)
204
205	def getTranslate(self, **args):
206	"""get translate"""
207	return self.template.fulltextclient.getTranslate(**args)
208
209	def getLemma(self, **args):
210	"""get lemma"""
211	return self.template.fulltextclient.getLemma(**args)
212
213	def getToc(self, **args):
214	"""get toc"""
215	return self.template.fulltextclient.getToc(**args)
216
217	def getTocPage(self, **args):
218	"""get tocpage"""
219	return self.template.fulltextclient.getTocPage(**args)
220
221
222	security.declareProtected('View','thumbs_rss')
223	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
224	'''
225	view it
226	@param mode: defines how to access the document behind url
227	@param url: url which contains display information
228	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
229
230	'''
231	logging.debug("HHHHHHHHHHHHHH:load the rss")
232	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
233
234	if not hasattr(self, 'template'):
235	# create template folder if it doesn't exist
236	self.manage_addFolder('template')
237
238	if not self.digilibBaseUrl:
239	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
240
241	docinfo = self.getDocinfo(mode=mode,url=url)
242	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
243	pt = getattr(self.template, 'thumbs_main_rss')
244
245	if viewMode=="auto": # automodus gewaehlt
246	if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
247	viewMode="text"
248	else:
249	viewMode="images"
250
251	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
252
253	security.declareProtected('View','index_html')
254	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization=""):
255	'''
256	view it
257	@param mode: defines how to access the document behind url
258	@param url: url which contains display information
259	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
260	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
261	@param characterNormalization type of text display (reg, norm, none)
262	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
263	'''
264
265	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
266
267	if not hasattr(self, 'template'):
268	# this won't work
269	logging.error("template folder missing!")
270	return "ERROR: template folder missing!"
271
272	if not getattr(self, 'digilibBaseUrl', None):
273	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
274
275	docinfo = self.getDocinfo(mode=mode,url=url)
276
277	if tocMode != "thumbs":
278	# get table of contents
279	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
280
281	if viewMode=="auto": # automodus gewaehlt
282	if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
283	viewMode="text_dict"
284	else:
285	viewMode="images"
286
287	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
288
289	pt = getattr(self.template, 'viewer_main')
290	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
291
292	def generateMarks(self,mk):
293	ret=""
294	if mk is None:
295	return ""
296	if not isinstance(mk, list):
297	mk=[mk]
298	for m in mk:
299	ret+="mk=%s"%m
300	return ret
301
302
303	def findDigilibUrl(self):
304	"""try to get the digilib URL from zogilib"""
305	url = self.template.zogilib.getDLBaseUrl()
306	return url
307
308	def getDocumentViewerURL(self):
309	"""returns the URL of this instance"""
310	return self.absolute_url()
311
312	def getStyle(self, idx, selected, style=""):
313	"""returns a string with the given style and append 'sel' if path == selected."""
314	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
315	if idx == selected:
316	return style + 'sel'
317	else:
318	return style
319
320	def getLink(self,param=None,val=None):
321	"""link to documentviewer with parameter param set to val"""
322	params=self.REQUEST.form.copy()
323	if param is not None:
324	if val is None:
325	if params.has_key(param):
326	del params[param]
327	else:
328	params[param] = str(val)
329
330	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
331	params["mode"] = "imagepath"
332	params["url"] = getParentDir(params["url"])
333
334	# quote values and assemble into query string
335	#ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
336	ps = urllib.urlencode(params)
337	url=self.REQUEST['URL1']+"?"+ps
338	return url
339
340	def getLinkAmp(self,param=None,val=None):
341	"""link to documentviewer with parameter param set to val"""
342	params=self.REQUEST.form.copy()
343	if param is not None:
344	if val is None:
345	if params.has_key(param):
346	del params[param]
347	else:
348	params[param] = str(val)
349
350	# quote values and assemble into query string
351	logging.debug("XYXXXXX: %s"%repr(params.items()))
352	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
353	url=self.REQUEST['URL1']+"?"+ps
354	return url
355
356	def getInfo_xml(self,url,mode):
357	"""returns info about the document as XML"""
358
359	if not self.digilibBaseUrl:
360	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
361
362	docinfo = self.getDocinfo(mode=mode,url=url)
363	pt = getattr(self.template, 'info_xml')
364	return pt(docinfo=docinfo)
365
366
367	def isAccessible(self, docinfo):
368	"""returns if access to the resource is granted"""
369	access = docinfo.get('accessType', None)
370	logging.debug("documentViewer (accessOK) access type %s"%access)
371	if access is not None and access == 'free':
372	logging.debug("documentViewer (accessOK) access is free")
373	return True
374	elif access is None or access in self.authgroups:
375	# only local access -- only logged in users
376	user = getSecurityManager().getUser()
377	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
378	if user is not None:
379	#print "user: ", user
380	return (user.getUserName() != "Anonymous User")
381	else:
382	return False
383
384	logging.error("documentViewer (accessOK) unknown access type %s"%access)
385	return False
386
387
388	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
389	"""gibt param von dlInfo aus"""
390	if docinfo is None:
391	docinfo = {}
392
393	for x in range(cut):
394
395	path=getParentDir(path)
396
397	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
398
399	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
400
401	txt = getHttpData(infoUrl)
402	if txt is None:
403	raise IOError("Unable to get dir-info from %s"%(infoUrl))
404
405	dom = Parse(txt)
406	sizes=dom.xpath("//dir/size")
407	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
408
409	if sizes:
410	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
411	else:
412	docinfo['numPages'] = 0
413
414	# TODO: produce and keep list of image names and numbers
415
416	return docinfo
417
418	def getIndexMetaPath(self,url):
419	"""gib nur den Pfad zurueck"""
420	regexp = re.compile(r".(experimental\|permanent)/(.)")
421	regpath = regexp.match(url)
422	if (regpath==None):
423	return ""
424	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
425	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
426
427
428
429	def getIndexMetaUrl(self,url):
430	"""returns utr of index.meta document at url"""
431
432	metaUrl = None
433	if url.startswith("http://"):
434	# real URL
435	metaUrl = url
436	else:
437	# online path
438	server=self.digilibBaseUrl+"/servlet/Texter?fn="
439	metaUrl=server+url.replace("/mpiwg/online","")
440	if not metaUrl.endswith("index.meta"):
441	metaUrl += "/index.meta"
442
443	return metaUrl
444
445	def getDomFromIndexMeta(self, url):
446	"""get dom from index meta"""
447	dom = None
448	metaUrl = self.getIndexMetaUrl(url)
449
450	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
451	txt=getHttpData(metaUrl)
452	if txt is None:
453	raise IOError("Unable to read index meta from %s"%(url))
454
455	dom = Parse(txt)
456	return dom
457
458	def getPresentationInfoXML(self, url):
459	"""returns dom of info.xml document at url"""
460	dom = None
461	metaUrl = None
462	if url.startswith("http://"):
463	# real URL
464	metaUrl = url
465	else:
466	# online path
467	server=self.digilibBaseUrl+"/servlet/Texter?fn="
468	metaUrl=server+url.replace("/mpiwg/online","")
469
470	txt=getHttpData(metaUrl)
471	if txt is None:
472	raise IOError("Unable to read infoXMLfrom %s"%(url))
473
474	dom = Parse(txt)
475	return dom
476
477
478	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
479	"""gets authorization info from the index.meta file at path or given by dom"""
480	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
481
482	access = None
483
484	if docinfo is None:
485	docinfo = {}
486
487	if dom is None:
488	for x in range(cut):
489	path=getParentDir(path)
490	dom = self.getDomFromIndexMeta(path)
491
492	acctype = dom.xpath("//access-conditions/access/@type")
493	if acctype and (len(acctype)>0):
494	access=acctype[0].value
495	if access in ['group', 'institution']:
496	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
497
498	docinfo['accessType'] = access
499	return docinfo
500
501
502	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
503	"""gets bibliographical info from the index.meta file at path or given by dom"""
504	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
505
506	if docinfo is None:
507	docinfo = {}
508
509	if dom is None:
510	for x in range(cut):
511	path=getParentDir(path)
512	dom = self.getDomFromIndexMeta(path)
513
514	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
515
516	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
517	# put in all raw bib fields as dict "bib"
518	bib = dom.xpath("//bib/*")
519	if bib and len(bib)>0:
520	bibinfo = {}
521	for e in bib:
522	bibinfo[e.localName] = getTextFromNode(e)
523	docinfo['bib'] = bibinfo
524
525	# extract some fields (author, title, year) according to their mapping
526	metaData=self.metadata.main.meta.bib
527	bibtype=dom.xpath("//bib/@type")
528	if bibtype and (len(bibtype)>0):
529	bibtype=bibtype[0].value
530	else:
531	bibtype="generic"
532
533	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
534	docinfo['bib_type'] = bibtype
535	bibmap=metaData.generateMappingForType(bibtype)
536	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
537	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
538	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
539	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
540	try:
541	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
542	except: pass
543	try:
544	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
545	except: pass
546	try:
547	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
548	except: pass
549	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
550	try:
551	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
552	except:
553	docinfo['lang']=''
554
555	return docinfo
556
557
558	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
559	"""gets name info from the index.meta file at path or given by dom"""
560	if docinfo is None:
561	docinfo = {}
562
563	if dom is None:
564	for x in range(cut):
565	path=getParentDir(path)
566	dom = self.getDomFromIndexMeta(path)
567
568	#docinfo['indexMetaPath']=self.getIndexMetaPath(path);
569
570	#result= dom.xpath("//result/resultPage")
571	#docinfo['numPages']=int(getTextFromNode(result[0]))
572
573	#result =dom.xpath("//name")
574	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
575	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
576
577	#logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
578	return docinfo
579
580	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
581	"""parse texttool tag in index meta"""
582	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
583	if docinfo is None:
584	docinfo = {}
585	if docinfo.get('lang', None) is None:
586	docinfo['lang'] = '' # default keine Sprache gesetzt
587	if dom is None:
588	dom = self.getDomFromIndexMeta(url)
589
590	archivePath = None
591	archiveName = None
592
593	archiveNames = dom.xpath("//resource/name")
594	if archiveNames and (len(archiveNames) > 0):
595	archiveName = getTextFromNode(archiveNames[0])
596	else:
597	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
598
599	archivePaths = dom.xpath("//resource/archive-path")
600	if archivePaths and (len(archivePaths) > 0):
601	archivePath = getTextFromNode(archivePaths[0])
602	# clean up archive path
603	if archivePath[0] != '/':
604	archivePath = '/' + archivePath
605	if archiveName and (not archivePath.endswith(archiveName)):
606	archivePath += "/" + archiveName
607	else:
608	# try to get archive-path from url
609	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
610	if (not url.startswith('http')):
611	archivePath = url.replace('index.meta', '')
612
613	if archivePath is None:
614	# we balk without archive-path
615	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
616
617	imageDirs = dom.xpath("//texttool/image")
618	if imageDirs and (len(imageDirs) > 0):
619	imageDir = getTextFromNode(imageDirs[0])
620
621	else:
622	# we balk with no image tag / not necessary anymore because textmode is now standard
623	#raise IOError("No text-tool info in %s"%(url))
624	imageDir = ""
625	#xquery="//pb"
626	docinfo['imagePath'] = "" # keine Bilder
627	docinfo['imageURL'] = ""
628
629	if imageDir and archivePath:
630	#print "image: ", imageDir, " archivepath: ", archivePath
631	imageDir = os.path.join(archivePath, imageDir)
632	imageDir = imageDir.replace("/mpiwg/online", '')
633	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
634	docinfo['imagePath'] = imageDir
635
636	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
637
638	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
639	if viewerUrls and (len(viewerUrls) > 0):
640	viewerUrl = getTextFromNode(viewerUrls[0])
641	docinfo['viewerURL'] = viewerUrl
642
643	# old style text URL
644	textUrls = dom.xpath("//texttool/text")
645	if textUrls and (len(textUrls) > 0):
646	textUrl = getTextFromNode(textUrls[0])
647	if urlparse.urlparse(textUrl)[0] == "": #keine url
648	textUrl = os.path.join(archivePath, textUrl)
649	# fix URLs starting with /mpiwg/online
650	if textUrl.startswith("/mpiwg/online"):
651	textUrl = textUrl.replace("/mpiwg/online", '', 1)
652
653	docinfo['textURL'] = textUrl
654
655	# new style text-url-path
656	textUrls = dom.xpath("//texttool/text-url-path")
657	if textUrls and (len(textUrls) > 0):
658	textUrl = getTextFromNode(textUrls[0])
659	docinfo['textURLPath'] = textUrl
660	if not docinfo['imagePath']:
661	# text-only, no page images
662	docinfo = self.getNumTextPages(docinfo)
663
664	presentationUrls = dom.xpath("//texttool/presentation")
665	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
666	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
667
668	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
669	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
670	# durch den relativen Pfad auf die presentation infos
671	presentationPath = getTextFromNode(presentationUrls[0])
672	if url.endswith("index.meta"):
673	presentationUrl = url.replace('index.meta', presentationPath)
674	else:
675	presentationUrl = url + "/" + presentationPath
676
677	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
678
679	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
680
681	return docinfo
682
683
684	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
685	"""gets the bibliographical information from the preseantion entry in texttools
686	"""
687	dom=self.getPresentationInfoXML(url)
688	try:
689	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
690	except:
691	pass
692	try:
693	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
694	except:
695	pass
696	try:
697	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
698	except:
699	pass
700	return docinfo
701
702	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
703	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
704	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
705	if docinfo is None:
706	docinfo = {}
707	path=path.replace("/mpiwg/online","")
708	docinfo['imagePath'] = path
709	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
710
711	pathorig=path
712	for x in range(cut):
713	path=getParentDir(path)
714	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
715	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
716	docinfo['imageURL'] = imageUrl
717
718	#path ist the path to the images it assumes that the index.meta file is one level higher.
719	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
720	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
721	return docinfo
722
723
724	def getDocinfo(self, mode, url):
725	"""returns docinfo depending on mode"""
726	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
727	# look for cached docinfo in session
728	if self.REQUEST.SESSION.has_key('docinfo'):
729	docinfo = self.REQUEST.SESSION['docinfo']
730	# check if its still current
731	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
732	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
733	return docinfo
734	# new docinfo
735	docinfo = {'mode': mode, 'url': url}
736	if mode=="texttool": #index.meta with texttool information
737	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
738	elif mode=="imagepath":
739	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
740	elif mode=="filepath":
741	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
742	else:
743	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
744	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
745
746	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
747	self.REQUEST.SESSION['docinfo'] = docinfo
748	return docinfo
749
750	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization=""):
751	"""returns pageinfo with the given parameters"""
752	pageinfo = {}
753	current = getInt(current)
754	pageinfo['current'] = current
755	rows = int(rows or self.thumbrows)
756	pageinfo['rows'] = rows
757	cols = int(cols or self.thumbcols)
758	pageinfo['cols'] = cols
759	grpsize = cols * rows
760	pageinfo['groupsize'] = grpsize
761	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
762	# int(current / grpsize) * grpsize +1))
763	pageinfo['start'] = start
764	pageinfo['end'] = start + grpsize
765	if (docinfo is not None) and ('numPages' in docinfo):
766	np = int(docinfo['numPages'])
767	pageinfo['end'] = min(pageinfo['end'], np)
768	pageinfo['numgroups'] = int(np / grpsize)
769	if np % grpsize > 0:
770	pageinfo['numgroups'] += 1
771	pageinfo['viewMode'] = viewMode
772	pageinfo['tocMode'] = tocMode
773	#pageinfo['characterNormalization'] =characterNormalization
774	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization',' ')
775	pageinfo['query'] = self.REQUEST.get('query',' ')
776	pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
777	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
778	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
779	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
780	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
781	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
782	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
783	toc = int (pageinfo['tocPN'])
784	pageinfo['textPages'] =int (toc)
785
786	if 'tocSize_%s'%tocMode in docinfo:
787	tocSize = int(docinfo['tocSize_%s'%tocMode])
788	tocPageSize = int(pageinfo['tocPageSize'])
789	# cached toc
790	if tocSize%tocPageSize>0:
791	tocPages=tocSize/tocPageSize+1
792	else:
793	tocPages=tocSize/tocPageSize
794	pageinfo['tocPN'] = min (tocPages,toc)
795	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
796	pageinfo['sn'] =self.REQUEST.get('sn','')
797	return pageinfo
798
799	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
800	"""init document viewer"""
801	self.title=title
802	self.digilibBaseUrl = digilibBaseUrl
803	self.thumbrows = thumbrows
804	self.thumbcols = thumbcols
805	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
806	if RESPONSE is not None:
807	RESPONSE.redirect('manage_main')
808
809	def manage_AddDocumentViewerForm(self):
810	"""add the viewer form"""
811	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
812	return pt()
813
814	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
815	"""add the viewer"""
816	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
817	self._setObject(id,newObj)
818
819	if RESPONSE is not None:
820	RESPONSE.redirect('manage_main')
821
822	## DocumentViewerTemplate class
823	class DocumentViewerTemplate(ZopePageTemplate):
824	"""Template for document viewer"""
825	meta_type="DocumentViewer Template"
826
827
828	def manage_addDocumentViewerTemplateForm(self):
829	"""Form for adding"""
830	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
831	return pt()
832
833	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
834	REQUEST=None, submit=None):
835	"Add a Page Template with optional file content."
836
837	self._setObject(id, DocumentViewerTemplate(id))
838	ob = getattr(self, id)
839	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
840	logging.info("txt %s:"%txt)
841	ob.pt_edit(txt,"text/html")
842	if title:
843	ob.pt_setTitle(title)
844	try:
845	u = self.DestinationURL()
846	except AttributeError:
847	u = REQUEST['URL1']
848
849	u = "%s/%s" % (u, urllib.quote(id))
850	REQUEST.RESPONSE.redirect(u+'/manage_main')
851	return ''
852
853
854

Note: See TracBrowser for help on using the repository browser.

Download in other formats: