Context Navigation

source: documentViewer/documentViewer.py @ 231:a88ece9a5e74

Last change on this file since 231:a88ece9a5e74 was 231:a88ece9a5e74, checked in by abukhman, 14 years ago
* empty log message *
File size: 32.8 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding='utf-8'):
47	"""returns a string containing node as XML"""
48	buf = cStringIO.StringIO()
49	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
50	s = buf.getvalue()
51	buf.close()
52	return s
53
54
55	def getParentDir(path):
56	"""returns pathname shortened by one"""
57	return '/'.join(path.split('/')[0:-1])
58
59
60	def getHttpData(url, data=None, num_tries=3, timeout=10):
61	"""returns result from url+data HTTP request"""
62	# we do GET (by appending data to url)
63	if isinstance(data, str) or isinstance(data, unicode):
64	# if data is string then append
65	url = "%s?%s"%(url,data)
66	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
67	# urlencode
68	url = "%s?%s"%(url,urllib.urlencode(data))
69
70	response = None
71	errmsg = None
72	for cnt in range(num_tries):
73	try:
74	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
75	if sys.version_info < (2, 6):
76	# set timeout on socket -- ugly :-(
77	import socket
78	socket.setdefaulttimeout(float(timeout))
79	response = urllib2.urlopen(url)
80	else:
81	response = urllib2.urlopen(url,timeout=float(timeout))
82	# check result?
83	break
84	except urllib2.HTTPError, e:
85	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
86	errmsg = str(e)
87	# stop trying
88	break
89	except urllib2.URLError, e:
90	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
91	errmsg = str(e)
92	# stop trying
93	#break
94
95	if response is not None:
96	data = response.read()
97	response.close()
98	return data
99
100	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
101	#return None
102
103
104
105	##
106	## documentViewer class
107	##
108	class documentViewer(Folder):
109	"""document viewer"""
110	meta_type="Document viewer"
111
112	security=ClassSecurityInfo()
113	manage_options=Folder.manage_options+(
114	{'label':'main config','action':'changeDocumentViewerForm'},
115	)
116
117	# templates and forms
118	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
119	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
120	toc_text = PageTemplateFile('zpt/toc_text', globals())
121	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
122	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
123	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
124	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
125	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
126	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
127	head_main = PageTemplateFile('zpt/head_main', globals())
128	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
129	info_xml = PageTemplateFile('zpt/info_xml', globals())
130
131
132	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
133	security.declareProtected('View management screens','changeDocumentViewerForm')
134	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
135
136
137	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
138	"""init document viewer"""
139	self.id=id
140	self.title=title
141	self.thumbcols = thumbcols
142	self.thumbrows = thumbrows
143	# authgroups is list of authorized groups (delimited by ,)
144	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
145	# create template folder so we can always use template.something
146
147	templateFolder = Folder('template')
148	#self['template'] = templateFolder # Zope-2.12 style
149	self._setObject('template',templateFolder) # old style
150	try:
151	import MpdlXmlTextServer
152	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
153	#templateFolder['fulltextclient'] = xmlRpcClient
154	templateFolder._setObject('fulltextclient',textServer)
155	except Exception, e:
156	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
157	try:
158	from Products.zogiLib.zogiLib import zogiLib
159	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
160	#templateFolder['zogilib'] = zogilib
161	templateFolder._setObject('zogilib',zogilib)
162	except Exception, e:
163	logging.error("Unable to create zogiLib for zogilib: "+str(e))
164
165
166	# proxy text server methods to fulltextclient
167	def getTextPage(self, **args):
168	"""get page"""
169	return self.template.fulltextclient.getTextPage(**args)
170
171	def getQuery(self, **args):
172	"""get query"""
173	return self.template.fulltextclient.getQuery(**args)
174
175	def getSearch(self, **args):
176	"""get search"""
177	return self.template.fulltextclient.getSearch(**args)
178
179	def getNumPages(self, docinfo):
180	"""get numpages"""
181	return self.template.fulltextclient.getNumPages(docinfo)
182
183	def getNumTextPages(self, docinfo):
184	"""get numpages text"""
185	return self.template.fulltextclient.getNumTextPages(docinfo)
186
187	def getTranslate(self, **args):
188	"""get translate"""
189	return self.template.fulltextclient.getTranslate(**args)
190
191	def getLemma(self, **args):
192	"""get lemma"""
193	return self.template.fulltextclient.getLemma(**args)
194
195	def getToc(self, **args):
196	"""get toc"""
197	return self.template.fulltextclient.getToc(**args)
198
199	def getTocPage(self, **args):
200	"""get tocpage"""
201	return self.template.fulltextclient.getTocPage(**args)
202
203
204	security.declareProtected('View','thumbs_rss')
205	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
206	'''
207	view it
208	@param mode: defines how to access the document behind url
209	@param url: url which contains display information
210	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
211
212	'''
213	logging.debug("HHHHHHHHHHHHHH:load the rss")
214	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
215
216	if not hasattr(self, 'template'):
217	# create template folder if it doesn't exist
218	self.manage_addFolder('template')
219
220	if not self.digilibBaseUrl:
221	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
222
223	docinfo = self.getDocinfo(mode=mode,url=url)
224	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
225	pt = getattr(self.template, 'thumbs_main_rss')
226
227	if viewMode=="auto": # automodus gewaehlt
228	if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
229	viewMode="text"
230	else:
231	viewMode="images"
232
233	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
234
235	security.declareProtected('View','index_html')
236	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization=""):
237	'''
238	view it
239	@param mode: defines how to access the document behind url
240	@param url: url which contains display information
241	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
242	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
243	@param characterNormalization type of text display (reg, norm, none)
244	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
245	'''
246
247	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
248
249	if not hasattr(self, 'template'):
250	# this won't work
251	logging.error("template folder missing!")
252	return "ERROR: template folder missing!"
253
254	if not getattr(self, 'digilibBaseUrl', None):
255	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
256
257	docinfo = self.getDocinfo(mode=mode,url=url)
258
259	if tocMode != "thumbs":
260	# get table of contents
261	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
262
263	if viewMode=="auto": # automodus gewaehlt
264	if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
265	viewMode="text_dict"
266	else:
267	viewMode="images"
268
269	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
270
271	pt = getattr(self.template, 'viewer_main')
272	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
273
274	def generateMarks(self,mk):
275	ret=""
276	if mk is None:
277	return ""
278	if not isinstance(mk, list):
279	mk=[mk]
280	for m in mk:
281	ret+="mk=%s"%m
282	return ret
283
284
285	def findDigilibUrl(self):
286	"""try to get the digilib URL from zogilib"""
287	url = self.template.zogilib.getDLBaseUrl()
288	return url
289
290	def getDocumentViewerURL(self):
291	"""returns the URL of this instance"""
292	return self.absolute_url()
293
294	def getStyle(self, idx, selected, style=""):
295	"""returns a string with the given style and append 'sel' if path == selected."""
296	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
297	if idx == selected:
298	return style + 'sel'
299	else:
300	return style
301
302	def getLink(self,param=None,val=None):
303	"""link to documentviewer with parameter param set to val"""
304	params=self.REQUEST.form.copy()
305	if param is not None:
306	if val is None:
307	if params.has_key(param):
308	del params[param]
309	else:
310	params[param] = str(val)
311
312	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
313	params["mode"] = "imagepath"
314	params["url"] = getParentDir(params["url"])
315
316	# quote values and assemble into query string
317	#ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
318	ps = urllib.urlencode(params)
319	url=self.REQUEST['URL1']+"?"+ps
320	return url
321
322	def getLinkAmp(self,param=None,val=None):
323	"""link to documentviewer with parameter param set to val"""
324	params=self.REQUEST.form.copy()
325	if param is not None:
326	if val is None:
327	if params.has_key(param):
328	del params[param]
329	else:
330	params[param] = str(val)
331
332	# quote values and assemble into query string
333	logging.debug("XYXXXXX: %s"%repr(params.items()))
334	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
335	url=self.REQUEST['URL1']+"?"+ps
336	return url
337
338	def getInfo_xml(self,url,mode):
339	"""returns info about the document as XML"""
340
341	if not self.digilibBaseUrl:
342	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
343
344	docinfo = self.getDocinfo(mode=mode,url=url)
345	pt = getattr(self.template, 'info_xml')
346	return pt(docinfo=docinfo)
347
348
349	def isAccessible(self, docinfo):
350	"""returns if access to the resource is granted"""
351	access = docinfo.get('accessType', None)
352	logging.debug("documentViewer (accessOK) access type %s"%access)
353	if access is not None and access == 'free':
354	logging.debug("documentViewer (accessOK) access is free")
355	return True
356	elif access is None or access in self.authgroups:
357	# only local access -- only logged in users
358	user = getSecurityManager().getUser()
359	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
360	if user is not None:
361	#print "user: ", user
362	return (user.getUserName() != "Anonymous User")
363	else:
364	return False
365
366	logging.error("documentViewer (accessOK) unknown access type %s"%access)
367	return False
368
369
370	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
371	"""gibt param von dlInfo aus"""
372	if docinfo is None:
373	docinfo = {}
374
375	for x in range(cut):
376
377	path=getParentDir(path)
378
379	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
380
381	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
382
383	txt = getHttpData(infoUrl)
384	if txt is None:
385	raise IOError("Unable to get dir-info from %s"%(infoUrl))
386
387	dom = Parse(txt)
388	sizes=dom.xpath("//dir/size")
389	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
390
391	if sizes:
392	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
393	else:
394	docinfo['numPages'] = 0
395
396	# TODO: produce and keep list of image names and numbers
397
398	return docinfo
399
400	def getIndexMetaPath(self,url):
401	"""gib nur den Pfad zurueck"""
402	regexp = re.compile(r".(experimental\|permanent)/(.)")
403	regpath = regexp.match(url)
404	if (regpath==None):
405	return ""
406	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
407	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
408
409
410
411	def getIndexMetaUrl(self,url):
412	"""returns utr of index.meta document at url"""
413
414	metaUrl = None
415	if url.startswith("http://"):
416	# real URL
417	metaUrl = url
418	else:
419	# online path
420	server=self.digilibBaseUrl+"/servlet/Texter?fn="
421	metaUrl=server+url.replace("/mpiwg/online","")
422	if not metaUrl.endswith("index.meta"):
423	metaUrl += "/index.meta"
424
425	return metaUrl
426
427	def getDomFromIndexMeta(self, url):
428	"""get dom from index meta"""
429	dom = None
430	metaUrl = self.getIndexMetaUrl(url)
431
432	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
433	txt=getHttpData(metaUrl)
434	if txt is None:
435	raise IOError("Unable to read index meta from %s"%(url))
436
437	dom = Parse(txt)
438	return dom
439
440	def getPresentationInfoXML(self, url):
441	"""returns dom of info.xml document at url"""
442	dom = None
443	metaUrl = None
444	if url.startswith("http://"):
445	# real URL
446	metaUrl = url
447	else:
448	# online path
449	server=self.digilibBaseUrl+"/servlet/Texter?fn="
450	metaUrl=server+url.replace("/mpiwg/online","")
451
452	txt=getHttpData(metaUrl)
453	if txt is None:
454	raise IOError("Unable to read infoXMLfrom %s"%(url))
455
456	dom = Parse(txt)
457	return dom
458
459
460	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
461	"""gets authorization info from the index.meta file at path or given by dom"""
462	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
463
464	access = None
465
466	if docinfo is None:
467	docinfo = {}
468
469	if dom is None:
470	for x in range(cut):
471	path=getParentDir(path)
472	dom = self.getDomFromIndexMeta(path)
473
474	acctype = dom.xpath("//access-conditions/access/@type")
475	if acctype and (len(acctype)>0):
476	access=acctype[0].value
477	if access in ['group', 'institution']:
478	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
479
480	docinfo['accessType'] = access
481	return docinfo
482
483
484	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
485	"""gets bibliographical info from the index.meta file at path or given by dom"""
486	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
487
488	if docinfo is None:
489	docinfo = {}
490
491	if dom is None:
492	for x in range(cut):
493	path=getParentDir(path)
494	dom = self.getDomFromIndexMeta(path)
495
496	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
497
498	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
499	# put in all raw bib fields as dict "bib"
500	bib = dom.xpath("//bib/*")
501	if bib and len(bib)>0:
502	bibinfo = {}
503	for e in bib:
504	bibinfo[e.localName] = getTextFromNode(e)
505	docinfo['bib'] = bibinfo
506
507	# extract some fields (author, title, year) according to their mapping
508	metaData=self.metadata.main.meta.bib
509	bibtype=dom.xpath("//bib/@type")
510	if bibtype and (len(bibtype)>0):
511	bibtype=bibtype[0].value
512	else:
513	bibtype="generic"
514
515	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
516	docinfo['bib_type'] = bibtype
517	bibmap=metaData.generateMappingForType(bibtype)
518	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
519	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
520	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
521	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
522	try:
523	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
524	except: pass
525	try:
526	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
527	except: pass
528	try:
529	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
530	except: pass
531	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
532	try:
533	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
534	except:
535	docinfo['lang']=''
536
537	return docinfo
538
539
540	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
541	"""gets name info from the index.meta file at path or given by dom"""
542	if docinfo is None:
543	docinfo = {}
544
545	if dom is None:
546	for x in range(cut):
547	path=getParentDir(path)
548	dom = self.getDomFromIndexMeta(path)
549
550	#docinfo['indexMetaPath']=self.getIndexMetaPath(path);
551
552	#result= dom.xpath("//result/resultPage")
553	#docinfo['numPages']=int(getTextFromNode(result[0]))
554
555	#result =dom.xpath("//name")
556	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
557	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
558
559	#logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
560	return docinfo
561
562	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
563	"""parse texttool tag in index meta"""
564	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
565	if docinfo is None:
566	docinfo = {}
567	if docinfo.get('lang', None) is None:
568	docinfo['lang'] = '' # default keine Sprache gesetzt
569	if dom is None:
570	dom = self.getDomFromIndexMeta(url)
571
572	archivePath = None
573	archiveName = None
574
575	archiveNames = dom.xpath("//resource/name")
576	if archiveNames and (len(archiveNames) > 0):
577	archiveName = getTextFromNode(archiveNames[0])
578	else:
579	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
580
581	archivePaths = dom.xpath("//resource/archive-path")
582	if archivePaths and (len(archivePaths) > 0):
583	archivePath = getTextFromNode(archivePaths[0])
584	# clean up archive path
585	if archivePath[0] != '/':
586	archivePath = '/' + archivePath
587	if archiveName and (not archivePath.endswith(archiveName)):
588	archivePath += "/" + archiveName
589	else:
590	# try to get archive-path from url
591	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
592	if (not url.startswith('http')):
593	archivePath = url.replace('index.meta', '')
594
595	if archivePath is None:
596	# we balk without archive-path
597	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
598
599	imageDirs = dom.xpath("//texttool/image")
600	if imageDirs and (len(imageDirs) > 0):
601	imageDir = getTextFromNode(imageDirs[0])
602
603	else:
604	# we balk with no image tag / not necessary anymore because textmode is now standard
605	#raise IOError("No text-tool info in %s"%(url))
606	imageDir = ""
607	#xquery="//pb"
608	docinfo['imagePath'] = "" # keine Bilder
609	docinfo['imageURL'] = ""
610
611	if imageDir and archivePath:
612	#print "image: ", imageDir, " archivepath: ", archivePath
613	imageDir = os.path.join(archivePath, imageDir)
614	imageDir = imageDir.replace("/mpiwg/online", '')
615	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
616	docinfo['imagePath'] = imageDir
617
618	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
619
620	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
621	if viewerUrls and (len(viewerUrls) > 0):
622	viewerUrl = getTextFromNode(viewerUrls[0])
623	docinfo['viewerURL'] = viewerUrl
624
625	# old style text URL
626	textUrls = dom.xpath("//texttool/text")
627	if textUrls and (len(textUrls) > 0):
628	textUrl = getTextFromNode(textUrls[0])
629	if urlparse.urlparse(textUrl)[0] == "": #keine url
630	textUrl = os.path.join(archivePath, textUrl)
631	# fix URLs starting with /mpiwg/online
632	if textUrl.startswith("/mpiwg/online"):
633	textUrl = textUrl.replace("/mpiwg/online", '', 1)
634
635	docinfo['textURL'] = textUrl
636
637	# new style text-url-path
638	textUrls = dom.xpath("//texttool/text-url-path")
639	if textUrls and (len(textUrls) > 0):
640	textUrl = getTextFromNode(textUrls[0])
641	docinfo['textURLPath'] = textUrl
642	if not docinfo['imagePath']:
643	# text-only, no page images
644	docinfo = self.getNumTextPages(docinfo)
645
646	presentationUrls = dom.xpath("//texttool/presentation")
647	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
648	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
649
650	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
651	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
652	# durch den relativen Pfad auf die presentation infos
653	presentationPath = getTextFromNode(presentationUrls[0])
654	if url.endswith("index.meta"):
655	presentationUrl = url.replace('index.meta', presentationPath)
656	else:
657	presentationUrl = url + "/" + presentationPath
658
659	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
660
661	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
662
663	return docinfo
664
665
666	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
667	"""gets the bibliographical information from the preseantion entry in texttools
668	"""
669	dom=self.getPresentationInfoXML(url)
670	try:
671	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
672	except:
673	pass
674	try:
675	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
676	except:
677	pass
678	try:
679	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
680	except:
681	pass
682	return docinfo
683
684	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
685	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
686	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
687	if docinfo is None:
688	docinfo = {}
689	path=path.replace("/mpiwg/online","")
690	docinfo['imagePath'] = path
691	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
692
693	pathorig=path
694	for x in range(cut):
695	path=getParentDir(path)
696	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
697	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
698	docinfo['imageURL'] = imageUrl
699
700	#path ist the path to the images it assumes that the index.meta file is one level higher.
701	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
702	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
703	return docinfo
704
705
706	def getDocinfo(self, mode, url):
707	"""returns docinfo depending on mode"""
708	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
709	# look for cached docinfo in session
710	if self.REQUEST.SESSION.has_key('docinfo'):
711	docinfo = self.REQUEST.SESSION['docinfo']
712	# check if its still current
713	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
714	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
715	return docinfo
716	# new docinfo
717	docinfo = {'mode': mode, 'url': url}
718	if mode=="texttool": #index.meta with texttool information
719	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
720	elif mode=="imagepath":
721	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
722	elif mode=="filepath":
723	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
724	else:
725	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
726	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
727
728	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
729	self.REQUEST.SESSION['docinfo'] = docinfo
730	return docinfo
731
732	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization=""):
733	"""returns pageinfo with the given parameters"""
734	pageinfo = {}
735	current = getInt(current)
736	pageinfo['current'] = current
737	rows = int(rows or self.thumbrows)
738	pageinfo['rows'] = rows
739	cols = int(cols or self.thumbcols)
740	pageinfo['cols'] = cols
741	grpsize = cols * rows
742	pageinfo['groupsize'] = grpsize
743	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
744	# int(current / grpsize) * grpsize +1))
745	pageinfo['start'] = start
746	pageinfo['end'] = start + grpsize
747	if (docinfo is not None) and ('numPages' in docinfo):
748	np = int(docinfo['numPages'])
749	pageinfo['end'] = min(pageinfo['end'], np)
750	pageinfo['numgroups'] = int(np / grpsize)
751	if np % grpsize > 0:
752	pageinfo['numgroups'] += 1
753	pageinfo['viewMode'] = viewMode
754	pageinfo['tocMode'] = tocMode
755	#pageinfo['characterNormalization'] =characterNormalization
756	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization',' ')
757	pageinfo['query'] = self.REQUEST.get('query',' ')
758	pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
759	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
760	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
761	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
762	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
763	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
764	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
765	toc = int (pageinfo['tocPN'])
766	pageinfo['textPages'] =int (toc)
767
768	if 'tocSize_%s'%tocMode in docinfo:
769	tocSize = int(docinfo['tocSize_%s'%tocMode])
770	tocPageSize = int(pageinfo['tocPageSize'])
771	# cached toc
772	if tocSize%tocPageSize>0:
773	tocPages=tocSize/tocPageSize+1
774	else:
775	tocPages=tocSize/tocPageSize
776	pageinfo['tocPN'] = min (tocPages,toc)
777	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
778	pageinfo['sn'] =self.REQUEST.get('sn','')
779	return pageinfo
780
781	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
782	"""init document viewer"""
783	self.title=title
784	self.digilibBaseUrl = digilibBaseUrl
785	self.thumbrows = thumbrows
786	self.thumbcols = thumbcols
787	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
788	if RESPONSE is not None:
789	RESPONSE.redirect('manage_main')
790
791	def manage_AddDocumentViewerForm(self):
792	"""add the viewer form"""
793	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
794	return pt()
795
796	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
797	"""add the viewer"""
798	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
799	self._setObject(id,newObj)
800
801	if RESPONSE is not None:
802	RESPONSE.redirect('manage_main')
803
804	## DocumentViewerTemplate class
805	class DocumentViewerTemplate(ZopePageTemplate):
806	"""Template for document viewer"""
807	meta_type="DocumentViewer Template"
808
809
810	def manage_addDocumentViewerTemplateForm(self):
811	"""Form for adding"""
812	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
813	return pt()
814
815	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
816	REQUEST=None, submit=None):
817	"Add a Page Template with optional file content."
818
819	self._setObject(id, DocumentViewerTemplate(id))
820	ob = getattr(self, id)
821	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
822	logging.info("txt %s:"%txt)
823	ob.pt_edit(txt,"text/html")
824	if title:
825	ob.pt_setTitle(title)
826	try:
827	u = self.DestinationURL()
828	except AttributeError:
829	u = REQUEST['URL1']
830
831	u = "%s/%s" % (u, urllib.quote(id))
832	REQUEST.RESPONSE.redirect(u+'/manage_main')
833	return ''
834
835
836

Note: See TracBrowser for help on using the repository browser.

Download in other formats: