Context Navigation

source: documentViewer/documentViewer.py @ 224:58c5338749c7

Last change on this file since 224:58c5338749c7 was 224:58c5338749c7, checked in by abukhman, 14 years ago
* empty log message *
File size: 32.8 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	from Ft.Xml import EMPTY_NAMESPACE, Parse
10	import Ft.Xml.Domlette
11	import os.path
12	import sys
13	import urllib
14	import urllib2
15	import logging
16	import math
17	import urlparse
18	import cStringIO
19	import re
20
21	def logger(txt,method,txt2):
22	"""logging"""
23	logging.info(txt+ txt2)
24
25
26	def getInt(number, default=0):
27	"""returns always an int (0 in case of problems)"""
28	try:
29	return int(number)
30	except:
31	return int(default)
32
33	def getTextFromNode(nodename):
34	"""get the cdata content of a node"""
35	if nodename is None:
36	return ""
37	nodelist=nodename.childNodes
38	rc = ""
39	for node in nodelist:
40	if node.nodeType == node.TEXT_NODE:
41	rc = rc + node.data
42	return rc
43
44	def serializeNode(node, encoding='utf-8'):
45	"""returns a string containing node as XML"""
46	buf = cStringIO.StringIO()
47	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
48	s = buf.getvalue()
49	buf.close()
50	return s
51
52
53	def getParentDir(path):
54	"""returns pathname shortened by one"""
55	return '/'.join(path.split('/')[0:-1])
56
57
58	def getHttpData(url, data=None, num_tries=3, timeout=10):
59	"""returns result from url+data HTTP request"""
60	# we do GET (by appending data to url)
61	if isinstance(data, str) or isinstance(data, unicode):
62	# if data is string then append
63	url = "%s?%s"%(url,data)
64	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
65	# urlencode
66	url = "%s?%s"%(url,urllib.urlencode(data))
67
68	response = None
69	errmsg = None
70	for cnt in range(num_tries):
71	try:
72	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
73	if sys.version_info < (2, 6):
74	# set timeout on socket -- ugly :-(
75	import socket
76	socket.setdefaulttimeout(float(timeout))
77	response = urllib2.urlopen(url)
78	else:
79	response = urllib2.urlopen(url,timeout=float(timeout))
80	# check result?
81	break
82	except urllib2.HTTPError, e:
83	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
84	errmsg = str(e)
85	# stop trying
86	break
87	except urllib2.URLError, e:
88	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
89	errmsg = str(e)
90	# stop trying
91	#break
92
93	if response is not None:
94	data = response.read()
95	response.close()
96	return data
97
98	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
99	#return None
100
101
102
103	##
104	## documentViewer class
105	##
106	class documentViewer(Folder):
107	"""document viewer"""
108	meta_type="Document viewer"
109
110	security=ClassSecurityInfo()
111	manage_options=Folder.manage_options+(
112	{'label':'main config','action':'changeDocumentViewerForm'},
113	)
114
115	# templates and forms
116	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
117	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
118	toc_text = PageTemplateFile('zpt/toc_text', globals())
119	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
120	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
121	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
122	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
123	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
124	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
125	head_main = PageTemplateFile('zpt/head_main', globals())
126	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
127	info_xml = PageTemplateFile('zpt/info_xml', globals())
128
129
130	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
131	security.declareProtected('View management screens','changeDocumentViewerForm')
132	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
133
134
135	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
136	"""init document viewer"""
137	self.id=id
138	self.title=title
139	self.thumbcols = thumbcols
140	self.thumbrows = thumbrows
141	# authgroups is list of authorized groups (delimited by ,)
142	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
143	# create template folder so we can always use template.something
144
145	templateFolder = Folder('template')
146	#self['template'] = templateFolder # Zope-2.12 style
147	self._setObject('template',templateFolder) # old style
148	try:
149	import MpdlXmlTextServer
150	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
151	#templateFolder['fulltextclient'] = xmlRpcClient
152	templateFolder._setObject('fulltextclient',textServer)
153	except Exception, e:
154	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
155	try:
156	from Products.zogiLib.zogiLib import zogiLib
157	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
158	#templateFolder['zogilib'] = zogilib
159	templateFolder._setObject('zogilib',zogilib)
160	except Exception, e:
161	logging.error("Unable to create zogiLib for zogilib: "+str(e))
162
163
164	# proxy text server methods to fulltextclient
165	def getTextPage(self, **args):
166	"""get page"""
167	return self.template.fulltextclient.getTextPage(**args)
168
169	def getQuery(self, **args):
170	"""get query"""
171	return self.template.fulltextclient.getQuery(**args)
172
173	def getSearch(self, **args):
174	"""get search"""
175	return self.template.fulltextclient.getSearch(**args)
176
177	def getNumPages(self, docinfo):
178	"""get numpages"""
179	return self.template.fulltextclient.getNumPages(docinfo)
180
181	def getNumTextPages(self, docinfo):
182	"""get numpages text"""
183	return self.template.fulltextclient.getNumTextPages(docinfo)
184
185	def getTranslate(self, **args):
186	"""get translate"""
187	return self.template.fulltextclient.getTranslate(**args)
188
189	def getLemma(self, **args):
190	"""get lemma"""
191	return self.template.fulltextclient.getLemma(**args)
192
193	def getToc(self, **args):
194	"""get toc"""
195	return self.template.fulltextclient.getToc(**args)
196
197	def getTocPage(self, **args):
198	"""get tocpage"""
199	return self.template.fulltextclient.getTocPage(**args)
200
201
202	security.declareProtected('View','thumbs_rss')
203	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
204	'''
205	view it
206	@param mode: defines how to access the document behind url
207	@param url: url which contains display information
208	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
209
210	'''
211	logging.debug("HHHHHHHHHHHHHH:load the rss")
212	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
213
214	if not hasattr(self, 'template'):
215	# create template folder if it doesn't exist
216	self.manage_addFolder('template')
217
218	if not self.digilibBaseUrl:
219	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
220
221	docinfo = self.getDocinfo(mode=mode,url=url)
222	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
223	pt = getattr(self.template, 'thumbs_main_rss')
224
225	if viewMode=="auto": # automodus gewaehlt
226	if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
227	viewMode="text"
228	else:
229	viewMode="images"
230
231	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
232
233	security.declareProtected('View','index_html')
234	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization=""):
235	'''
236	view it
237	@param mode: defines how to access the document behind url
238	@param url: url which contains display information
239	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
240	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
241	@param characterNormalization type of text display (reg, norm, none)
242	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
243	'''
244
245	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
246
247	if not hasattr(self, 'template'):
248	# this won't work
249	logging.error("template folder missing!")
250	return "ERROR: template folder missing!"
251
252	if not getattr(self, 'digilibBaseUrl', None):
253	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
254
255	docinfo = self.getDocinfo(mode=mode,url=url)
256
257	if tocMode != "thumbs":
258	# get table of contents
259	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
260
261	if viewMode=="auto": # automodus gewaehlt
262	if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
263	viewMode="text_dict"
264	else:
265	viewMode="images"
266
267	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
268
269	pt = getattr(self.template, 'viewer_main')
270	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
271
272	def generateMarks(self,mk):
273	ret=""
274	if mk is None:
275	return ""
276	if not isinstance(mk, list):
277	mk=[mk]
278	for m in mk:
279	ret+="mk=%s"%m
280	return ret
281
282
283	def findDigilibUrl(self):
284	"""try to get the digilib URL from zogilib"""
285	url = self.template.zogilib.getDLBaseUrl()
286	return url
287
288	def getDocumentViewerURL(self):
289	"""returns the URL of this instance"""
290	return self.absolute_url()
291
292	def getStyle(self, idx, selected, style=""):
293	"""returns a string with the given style and append 'sel' if path == selected."""
294	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
295	if idx == selected:
296	return style + 'sel'
297	else:
298	return style
299
300	def getLink(self,param=None,val=None):
301	"""link to documentviewer with parameter param set to val"""
302	params=self.REQUEST.form.copy()
303	if param is not None:
304	if val is None:
305	if params.has_key(param):
306	del params[param]
307	else:
308	params[param] = str(val)
309
310	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
311	params["mode"] = "imagepath"
312	params["url"] = getParentDir(params["url"])
313
314	# quote values and assemble into query string
315	#ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
316	ps = urllib.urlencode(params)
317	url=self.REQUEST['URL1']+"?"+ps
318	return url
319
320	def getLinkAmp(self,param=None,val=None):
321	"""link to documentviewer with parameter param set to val"""
322	params=self.REQUEST.form.copy()
323	if param is not None:
324	if val is None:
325	if params.has_key(param):
326	del params[param]
327	else:
328	params[param] = str(val)
329
330	# quote values and assemble into query string
331	logging.debug("XYXXXXX: %s"%repr(params.items()))
332	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
333	url=self.REQUEST['URL1']+"?"+ps
334	return url
335
336	def getInfo_xml(self,url,mode):
337	"""returns info about the document as XML"""
338
339	if not self.digilibBaseUrl:
340	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
341
342	docinfo = self.getDocinfo(mode=mode,url=url)
343	pt = getattr(self.template, 'info_xml')
344	return pt(docinfo=docinfo)
345
346
347	def isAccessible(self, docinfo):
348	"""returns if access to the resource is granted"""
349	access = docinfo.get('accessType', None)
350	logging.debug("documentViewer (accessOK) access type %s"%access)
351	if access is not None and access == 'free':
352	logging.debug("documentViewer (accessOK) access is free")
353	return True
354	elif access is None or access in self.authgroups:
355	# only local access -- only logged in users
356	user = getSecurityManager().getUser()
357	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
358	if user is not None:
359	#print "user: ", user
360	return (user.getUserName() != "Anonymous User")
361	else:
362	return False
363
364	logging.error("documentViewer (accessOK) unknown access type %s"%access)
365	return False
366
367
368	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
369	"""gibt param von dlInfo aus"""
370	if docinfo is None:
371	docinfo = {}
372
373	for x in range(cut):
374
375	path=getParentDir(path)
376
377	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
378
379	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
380
381	txt = getHttpData(infoUrl)
382	if txt is None:
383	raise IOError("Unable to get dir-info from %s"%(infoUrl))
384
385	dom = Parse(txt)
386	sizes=dom.xpath("//dir/size")
387	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
388
389	if sizes:
390	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
391	else:
392	docinfo['numPages'] = 0
393
394	# TODO: produce and keep list of image names and numbers
395
396	return docinfo
397
398	def getIndexMetaPath(self,url):
399	"""gib nur den Pfad zurueck"""
400	regexp = re.compile(r".(experimental\|permanent)/(.)")
401	regpath = regexp.match(url)
402	if (regpath==None):
403	return ""
404	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
405	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
406
407	def getIndexMetaUrl(self,url):
408	"""returns utr of index.meta document at url"""
409
410	metaUrl = None
411	if url.startswith("http://"):
412	# real URL
413	metaUrl = url
414	else:
415	# online path
416	server=self.digilibBaseUrl+"/servlet/Texter?fn="
417	metaUrl=server+url.replace("/mpiwg/online","")
418	if not metaUrl.endswith("index.meta"):
419	metaUrl += "/index.meta"
420
421	return metaUrl
422
423	def getDomFromIndexMeta(self, url):
424	"""get dom from index meta"""
425	dom = None
426	metaUrl = self.getIndexMetaUrl(url)
427
428	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
429	txt=getHttpData(metaUrl)
430	if txt is None:
431	raise IOError("Unable to read index meta from %s"%(url))
432
433	dom = Parse(txt)
434	return dom
435
436	def getPresentationInfoXML(self, url):
437	"""returns dom of info.xml document at url"""
438	dom = None
439	metaUrl = None
440	if url.startswith("http://"):
441	# real URL
442	metaUrl = url
443	else:
444	# online path
445	server=self.digilibBaseUrl+"/servlet/Texter?fn="
446	metaUrl=server+url.replace("/mpiwg/online","")
447
448	txt=getHttpData(metaUrl)
449	if txt is None:
450	raise IOError("Unable to read infoXMLfrom %s"%(url))
451
452	dom = Parse(txt)
453	return dom
454
455
456	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
457	"""gets authorization info from the index.meta file at path or given by dom"""
458	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
459
460	access = None
461
462	if docinfo is None:
463	docinfo = {}
464
465	if dom is None:
466	for x in range(cut):
467	path=getParentDir(path)
468	dom = self.getDomFromIndexMeta(path)
469
470	acctype = dom.xpath("//access-conditions/access/@type")
471	if acctype and (len(acctype)>0):
472	access=acctype[0].value
473	if access in ['group', 'institution']:
474	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
475
476	docinfo['accessType'] = access
477	return docinfo
478
479
480	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
481	"""gets bibliographical info from the index.meta file at path or given by dom"""
482	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
483
484	if docinfo is None:
485	docinfo = {}
486
487	if dom is None:
488	for x in range(cut):
489	path=getParentDir(path)
490	dom = self.getDomFromIndexMeta(path)
491
492	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
493
494	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
495	# put in all raw bib fields as dict "bib"
496	bib = dom.xpath("//bib/*")
497	if bib and len(bib)>0:
498	bibinfo = {}
499	for e in bib:
500	bibinfo[e.localName] = getTextFromNode(e)
501	docinfo['bib'] = bibinfo
502
503	# extract some fields (author, title, year) according to their mapping
504	metaData=self.metadata.main.meta.bib
505	bibtype=dom.xpath("//bib/@type")
506	if bibtype and (len(bibtype)>0):
507	bibtype=bibtype[0].value
508	else:
509	bibtype="generic"
510
511	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
512	docinfo['bib_type'] = bibtype
513	bibmap=metaData.generateMappingForType(bibtype)
514	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
515	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
516	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
517	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
518	try:
519	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
520	except: pass
521	try:
522	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
523	except: pass
524	try:
525	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
526	except: pass
527	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
528	try:
529	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
530	except:
531	docinfo['lang']=''
532
533	return docinfo
534
535
536	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
537	"""gets name info from the index.meta file at path or given by dom"""
538	if docinfo is None:
539	docinfo = {}
540
541	if dom is None:
542	for x in range(cut):
543	path=getParentDir(path)
544	dom = self.getDomFromIndexMeta(path)
545
546	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
547
548	#result= dom.xpath("//result/resultPage")
549	#docinfo['numPages']=int(getTextFromNode(result[0]))
550
551	if len(name) > 0:
552	try:
553	result =dom.xpath("//name")
554	docinfo['name']=getTextFromNode(result[0])
555	logging.debug("documentViewer docinfo[name] %s"%docinfo[name])
556	except: pass
557	#logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
558	return docinfo
559
560	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
561	"""parse texttool tag in index meta"""
562	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
563	if docinfo is None:
564	docinfo = {}
565	if docinfo.get('lang', None) is None:
566	docinfo['lang'] = '' # default keine Sprache gesetzt
567	if dom is None:
568	dom = self.getDomFromIndexMeta(url)
569
570	archivePath = None
571	archiveName = None
572
573	archiveNames = dom.xpath("//resource/name")
574	if archiveNames and (len(archiveNames) > 0):
575	archiveName = getTextFromNode(archiveNames[0])
576	else:
577	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
578
579	archivePaths = dom.xpath("//resource/archive-path")
580	if archivePaths and (len(archivePaths) > 0):
581	archivePath = getTextFromNode(archivePaths[0])
582	# clean up archive path
583	if archivePath[0] != '/':
584	archivePath = '/' + archivePath
585	if archiveName and (not archivePath.endswith(archiveName)):
586	archivePath += "/" + archiveName
587	else:
588	# try to get archive-path from url
589	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
590	if (not url.startswith('http')):
591	archivePath = url.replace('index.meta', '')
592
593	if archivePath is None:
594	# we balk without archive-path
595	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
596
597	imageDirs = dom.xpath("//texttool/image")
598	if imageDirs and (len(imageDirs) > 0):
599	imageDir = getTextFromNode(imageDirs[0])
600
601	else:
602	# we balk with no image tag / not necessary anymore because textmode is now standard
603	#raise IOError("No text-tool info in %s"%(url))
604	imageDir = ""
605	#xquery="//pb"
606	docinfo['imagePath'] = "" # keine Bilder
607	docinfo['imageURL'] = ""
608
609	if imageDir and archivePath:
610	#print "image: ", imageDir, " archivepath: ", archivePath
611	imageDir = os.path.join(archivePath, imageDir)
612	imageDir = imageDir.replace("/mpiwg/online", '')
613	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
614	docinfo['imagePath'] = imageDir
615
616	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
617
618	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
619	if viewerUrls and (len(viewerUrls) > 0):
620	viewerUrl = getTextFromNode(viewerUrls[0])
621	docinfo['viewerURL'] = viewerUrl
622
623	# old style text URL
624	textUrls = dom.xpath("//texttool/text")
625	if textUrls and (len(textUrls) > 0):
626	textUrl = getTextFromNode(textUrls[0])
627	if urlparse.urlparse(textUrl)[0] == "": #keine url
628	textUrl = os.path.join(archivePath, textUrl)
629	# fix URLs starting with /mpiwg/online
630	if textUrl.startswith("/mpiwg/online"):
631	textUrl = textUrl.replace("/mpiwg/online", '', 1)
632
633	docinfo['textURL'] = textUrl
634
635	# new style text-url-path
636	textUrls = dom.xpath("//texttool/text-url-path")
637	if textUrls and (len(textUrls) > 0):
638	textUrl = getTextFromNode(textUrls[0])
639	docinfo['textURLPath'] = textUrl
640	if not docinfo['imagePath']:
641	# text-only, no page images
642	docinfo = self.getNumTextPages(docinfo)
643
644	presentationUrls = dom.xpath("//texttool/presentation")
645	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
646	#docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
647
648	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
649	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
650	# durch den relativen Pfad auf die presentation infos
651	presentationPath = getTextFromNode(presentationUrls[0])
652	if url.endswith("index.meta"):
653	presentationUrl = url.replace('index.meta', presentationPath)
654	else:
655	presentationUrl = url + "/" + presentationPath
656
657	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
658
659	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
660
661	return docinfo
662
663
664	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
665	"""gets the bibliographical information from the preseantion entry in texttools
666	"""
667	dom=self.getPresentationInfoXML(url)
668	try:
669	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
670	except:
671	pass
672	try:
673	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
674	except:
675	pass
676	try:
677	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
678	except:
679	pass
680	return docinfo
681
682	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
683	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
684	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
685	if docinfo is None:
686	docinfo = {}
687	path=path.replace("/mpiwg/online","")
688	docinfo['imagePath'] = path
689	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
690
691	pathorig=path
692	for x in range(cut):
693	path=getParentDir(path)
694	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
695	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
696	docinfo['imageURL'] = imageUrl
697
698	#path ist the path to the images it assumes that the index.meta file is one level higher.
699	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
700	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
701	return docinfo
702
703
704	def getDocinfo(self, mode, url):
705	"""returns docinfo depending on mode"""
706	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
707	# look for cached docinfo in session
708	if self.REQUEST.SESSION.has_key('docinfo'):
709	docinfo = self.REQUEST.SESSION['docinfo']
710	# check if its still current
711	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
712	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
713	return docinfo
714	# new docinfo
715	docinfo = {'mode': mode, 'url': url}
716	if mode=="texttool": #index.meta with texttool information
717	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
718	elif mode=="imagepath":
719	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
720	elif mode=="filepath":
721	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
722	else:
723	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
724	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
725
726	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
727	self.REQUEST.SESSION['docinfo'] = docinfo
728	return docinfo
729
730	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization=""):
731	"""returns pageinfo with the given parameters"""
732	pageinfo = {}
733	current = getInt(current)
734	pageinfo['current'] = current
735	rows = int(rows or self.thumbrows)
736	pageinfo['rows'] = rows
737	cols = int(cols or self.thumbcols)
738	pageinfo['cols'] = cols
739	grpsize = cols * rows
740	pageinfo['groupsize'] = grpsize
741	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
742	# int(current / grpsize) * grpsize +1))
743	pageinfo['start'] = start
744	pageinfo['end'] = start + grpsize
745	if (docinfo is not None) and ('numPages' in docinfo):
746	np = int(docinfo['numPages'])
747	pageinfo['end'] = min(pageinfo['end'], np)
748	pageinfo['numgroups'] = int(np / grpsize)
749	if np % grpsize > 0:
750	pageinfo['numgroups'] += 1
751	pageinfo['viewMode'] = viewMode
752	pageinfo['tocMode'] = tocMode
753	#pageinfo['characterNormalization'] =characterNormalization
754	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization',' ')
755	pageinfo['query'] = self.REQUEST.get('query',' ')
756	pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
757	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
758	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
759	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
760	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
761	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
762	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
763	toc = int (pageinfo['tocPN'])
764	pageinfo['textPages'] =int (toc)
765
766	if 'tocSize_%s'%tocMode in docinfo:
767	tocSize = int(docinfo['tocSize_%s'%tocMode])
768	tocPageSize = int(pageinfo['tocPageSize'])
769	# cached toc
770	if tocSize%tocPageSize>0:
771	tocPages=tocSize/tocPageSize+1
772	else:
773	tocPages=tocSize/tocPageSize
774	pageinfo['tocPN'] = min (tocPages,toc)
775	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
776	pageinfo['sn'] =self.REQUEST.get('sn','')
777	return pageinfo
778
779	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
780	"""init document viewer"""
781	self.title=title
782	self.digilibBaseUrl = digilibBaseUrl
783	self.thumbrows = thumbrows
784	self.thumbcols = thumbcols
785	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
786	if RESPONSE is not None:
787	RESPONSE.redirect('manage_main')
788
789	def manage_AddDocumentViewerForm(self):
790	"""add the viewer form"""
791	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
792	return pt()
793
794	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
795	"""add the viewer"""
796	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
797	self._setObject(id,newObj)
798
799	if RESPONSE is not None:
800	RESPONSE.redirect('manage_main')
801
802	## DocumentViewerTemplate class
803	class DocumentViewerTemplate(ZopePageTemplate):
804	"""Template for document viewer"""
805	meta_type="DocumentViewer Template"
806
807
808	def manage_addDocumentViewerTemplateForm(self):
809	"""Form for adding"""
810	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
811	return pt()
812
813	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
814	REQUEST=None, submit=None):
815	"Add a Page Template with optional file content."
816
817	self._setObject(id, DocumentViewerTemplate(id))
818	ob = getattr(self, id)
819	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
820	logging.info("txt %s:"%txt)
821	ob.pt_edit(txt,"text/html")
822	if title:
823	ob.pt_setTitle(title)
824	try:
825	u = self.DestinationURL()
826	except AttributeError:
827	u = REQUEST['URL1']
828
829	u = "%s/%s" % (u, urllib.quote(id))
830	REQUEST.RESPONSE.redirect(u+'/manage_main')
831	return ''
832
833
834

Note: See TracBrowser for help on using the repository browser.

Download in other formats: