Context Navigation

source: documentViewer/documentViewer.py @ 256:ad1458aff8a6

Last change on this file since 256:ad1458aff8a6 was 256:ad1458aff8a6, checked in by abukhman, 14 years ago
* empty log message *
File size: 33.3 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding='utf-8'):
47	"""returns a string containing node as XML"""
48	buf = cStringIO.StringIO()
49	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
50	s = buf.getvalue()
51	buf.close()
52	return s
53
54	def getBrowserType(self):
55	"""get browser type object"""
56	if self.REQUEST.SESSION.has_key('browserType'):
57	return self.REQUEST.SESSION['browserType']
58	else:
59	bt = browserCheck(self)
60	self.REQUEST.SESSION.set('browserType', bt)
61	logging.debug("documentViewer (BROWSER TYPE) bt %s"%bt)
62	return bt
63
64
65	def getParentDir(path):
66	"""returns pathname shortened by one"""
67	return '/'.join(path.split('/')[0:-1])
68
69
70	def getHttpData(url, data=None, num_tries=3, timeout=10):
71	"""returns result from url+data HTTP request"""
72	# we do GET (by appending data to url)
73	if isinstance(data, str) or isinstance(data, unicode):
74	# if data is string then append
75	url = "%s?%s"%(url,data)
76	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
77	# urlencode
78	url = "%s?%s"%(url,urllib.urlencode(data))
79
80	response = None
81	errmsg = None
82	for cnt in range(num_tries):
83	try:
84	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
85	if sys.version_info < (2, 6):
86	# set timeout on socket -- ugly :-(
87	import socket
88	socket.setdefaulttimeout(float(timeout))
89	response = urllib2.urlopen(url)
90	else:
91	response = urllib2.urlopen(url,timeout=float(timeout))
92	# check result?
93	break
94	except urllib2.HTTPError, e:
95	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
96	errmsg = str(e)
97	# stop trying
98	break
99	except urllib2.URLError, e:
100	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
101	errmsg = str(e)
102	# stop trying
103	#break
104
105	if response is not None:
106	data = response.read()
107	response.close()
108	return data
109
110	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
111	#return None
112
113
114
115	##
116	## documentViewer class
117	##
118	class documentViewer(Folder):
119	"""document viewer"""
120	meta_type="Document viewer"
121
122	security=ClassSecurityInfo()
123	manage_options=Folder.manage_options+(
124	{'label':'main config','action':'changeDocumentViewerForm'},
125	)
126
127	# templates and forms
128	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
129	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
130	toc_text = PageTemplateFile('zpt/toc_text', globals())
131	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
132	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
133	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
134	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
135	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
136	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
137	head_main = PageTemplateFile('zpt/head_main', globals())
138	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
139	info_xml = PageTemplateFile('zpt/info_xml', globals())
140
141
142	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
143	security.declareProtected('View management screens','changeDocumentViewerForm')
144	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
145
146
147	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
148	"""init document viewer"""
149	self.id=id
150	self.title=title
151	self.thumbcols = thumbcols
152	self.thumbrows = thumbrows
153	# authgroups is list of authorized groups (delimited by ,)
154	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
155	# create template folder so we can always use template.something
156
157	templateFolder = Folder('template')
158	#self['template'] = templateFolder # Zope-2.12 style
159	self._setObject('template',templateFolder) # old style
160	try:
161	import MpdlXmlTextServer
162	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
163	#templateFolder['fulltextclient'] = xmlRpcClient
164	templateFolder._setObject('fulltextclient',textServer)
165	except Exception, e:
166	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
167	try:
168	from Products.zogiLib.zogiLib import zogiLib
169	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
170	#templateFolder['zogilib'] = zogilib
171	templateFolder._setObject('zogilib',zogilib)
172	except Exception, e:
173	logging.error("Unable to create zogiLib for zogilib: "+str(e))
174
175
176	# proxy text server methods to fulltextclient
177	def getTextPage(self, **args):
178	"""get page"""
179	return self.template.fulltextclient.getTextPage(**args)
180
181	def getQuery(self, **args):
182	"""get query"""
183	return self.template.fulltextclient.getQuery(**args)
184
185	def getSearch(self, **args):
186	"""get search"""
187	return self.template.fulltextclient.getSearch(**args)
188
189	def getGisPlaces(self, **args):
190	"""get search"""
191	return self.template.fulltextclient.getGisPlaces(**args)
192
193	def getNumPages(self, docinfo):
194	"""get numpages"""
195	return self.template.fulltextclient.getNumPages(docinfo)
196
197	def getNumTextPages(self, docinfo):
198	"""get numpages text"""
199	return self.template.fulltextclient.getNumTextPages(docinfo)
200
201	def getTranslate(self, **args):
202	"""get translate"""
203	return self.template.fulltextclient.getTranslate(**args)
204
205	def getLemma(self, **args):
206	"""get lemma"""
207	return self.template.fulltextclient.getLemma(**args)
208
209	def getToc(self, **args):
210	"""get toc"""
211	return self.template.fulltextclient.getToc(**args)
212
213	def getTocPage(self, **args):
214	"""get tocpage"""
215	return self.template.fulltextclient.getTocPage(**args)
216
217
218	security.declareProtected('View','thumbs_rss')
219	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
220	'''
221	view it
222	@param mode: defines how to access the document behind url
223	@param url: url which contains display information
224	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
225
226	'''
227	logging.debug("HHHHHHHHHHHHHH:load the rss")
228	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
229
230	if not hasattr(self, 'template'):
231	# create template folder if it doesn't exist
232	self.manage_addFolder('template')
233
234	if not self.digilibBaseUrl:
235	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
236
237	docinfo = self.getDocinfo(mode=mode,url=url)
238	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
239	pt = getattr(self.template, 'thumbs_main_rss')
240
241	if viewMode=="auto": # automodus gewaehlt
242	if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
243	viewMode="text"
244	else:
245	viewMode="images"
246
247	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
248
249	security.declareProtected('View','index_html')
250	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization=""):
251	'''
252	view it
253	@param mode: defines how to access the document behind url
254	@param url: url which contains display information
255	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
256	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
257	@param characterNormalization type of text display (reg, norm, none)
258	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
259	'''
260
261	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
262
263	if not hasattr(self, 'template'):
264	# this won't work
265	logging.error("template folder missing!")
266	return "ERROR: template folder missing!"
267
268	if not getattr(self, 'digilibBaseUrl', None):
269	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
270
271	docinfo = self.getDocinfo(mode=mode,url=url)
272
273	if tocMode != "thumbs":
274	# get table of contents
275	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
276
277	if viewMode=="auto": # automodus gewaehlt
278	if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
279	viewMode="text_dict"
280	else:
281	viewMode="images"
282
283	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
284
285	pt = getattr(self.template, 'viewer_main')
286	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
287
288	def generateMarks(self,mk):
289	ret=""
290	if mk is None:
291	return ""
292	if not isinstance(mk, list):
293	mk=[mk]
294	for m in mk:
295	ret+="mk=%s"%m
296	return ret
297
298
299	def findDigilibUrl(self):
300	"""try to get the digilib URL from zogilib"""
301	url = self.template.zogilib.getDLBaseUrl()
302	return url
303
304	def getDocumentViewerURL(self):
305	"""returns the URL of this instance"""
306	return self.absolute_url()
307
308	def getStyle(self, idx, selected, style=""):
309	"""returns a string with the given style and append 'sel' if path == selected."""
310	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
311	if idx == selected:
312	return style + 'sel'
313	else:
314	return style
315
316	def getLink(self,param=None,val=None):
317	"""link to documentviewer with parameter param set to val"""
318	params=self.REQUEST.form.copy()
319	if param is not None:
320	if val is None:
321	if params.has_key(param):
322	del params[param]
323	else:
324	params[param] = str(val)
325
326	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
327	params["mode"] = "imagepath"
328	params["url"] = getParentDir(params["url"])
329
330	# quote values and assemble into query string
331	#ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
332	ps = urllib.urlencode(params)
333	url=self.REQUEST['URL1']+"?"+ps
334	return url
335
336	def getLinkAmp(self,param=None,val=None):
337	"""link to documentviewer with parameter param set to val"""
338	params=self.REQUEST.form.copy()
339	if param is not None:
340	if val is None:
341	if params.has_key(param):
342	del params[param]
343	else:
344	params[param] = str(val)
345
346	# quote values and assemble into query string
347	logging.debug("XYXXXXX: %s"%repr(params.items()))
348	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
349	url=self.REQUEST['URL1']+"?"+ps
350	return url
351
352	def getInfo_xml(self,url,mode):
353	"""returns info about the document as XML"""
354
355	if not self.digilibBaseUrl:
356	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
357
358	docinfo = self.getDocinfo(mode=mode,url=url)
359	pt = getattr(self.template, 'info_xml')
360	return pt(docinfo=docinfo)
361
362
363	def isAccessible(self, docinfo):
364	"""returns if access to the resource is granted"""
365	access = docinfo.get('accessType', None)
366	logging.debug("documentViewer (accessOK) access type %s"%access)
367	if access is not None and access == 'free':
368	logging.debug("documentViewer (accessOK) access is free")
369	return True
370	elif access is None or access in self.authgroups:
371	# only local access -- only logged in users
372	user = getSecurityManager().getUser()
373	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
374	if user is not None:
375	#print "user: ", user
376	return (user.getUserName() != "Anonymous User")
377	else:
378	return False
379
380	logging.error("documentViewer (accessOK) unknown access type %s"%access)
381	return False
382
383
384	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
385	"""gibt param von dlInfo aus"""
386	if docinfo is None:
387	docinfo = {}
388
389	for x in range(cut):
390
391	path=getParentDir(path)
392
393	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
394
395	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
396
397	txt = getHttpData(infoUrl)
398	if txt is None:
399	raise IOError("Unable to get dir-info from %s"%(infoUrl))
400
401	dom = Parse(txt)
402	sizes=dom.xpath("//dir/size")
403	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
404
405	if sizes:
406	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
407	else:
408	docinfo['numPages'] = 0
409
410	# TODO: produce and keep list of image names and numbers
411
412	return docinfo
413
414	def getIndexMetaPath(self,url):
415	"""gib nur den Pfad zurueck"""
416	regexp = re.compile(r".(experimental\|permanent)/(.)")
417	regpath = regexp.match(url)
418	if (regpath==None):
419	return ""
420	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
421	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
422
423
424
425	def getIndexMetaUrl(self,url):
426	"""returns utr of index.meta document at url"""
427
428	metaUrl = None
429	if url.startswith("http://"):
430	# real URL
431	metaUrl = url
432	else:
433	# online path
434	server=self.digilibBaseUrl+"/servlet/Texter?fn="
435	metaUrl=server+url.replace("/mpiwg/online","")
436	if not metaUrl.endswith("index.meta"):
437	metaUrl += "/index.meta"
438
439	return metaUrl
440
441	def getDomFromIndexMeta(self, url):
442	"""get dom from index meta"""
443	dom = None
444	metaUrl = self.getIndexMetaUrl(url)
445
446	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
447	txt=getHttpData(metaUrl)
448	if txt is None:
449	raise IOError("Unable to read index meta from %s"%(url))
450
451	dom = Parse(txt)
452	return dom
453
454	def getPresentationInfoXML(self, url):
455	"""returns dom of info.xml document at url"""
456	dom = None
457	metaUrl = None
458	if url.startswith("http://"):
459	# real URL
460	metaUrl = url
461	else:
462	# online path
463	server=self.digilibBaseUrl+"/servlet/Texter?fn="
464	metaUrl=server+url.replace("/mpiwg/online","")
465
466	txt=getHttpData(metaUrl)
467	if txt is None:
468	raise IOError("Unable to read infoXMLfrom %s"%(url))
469
470	dom = Parse(txt)
471	return dom
472
473
474	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
475	"""gets authorization info from the index.meta file at path or given by dom"""
476	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
477
478	access = None
479
480	if docinfo is None:
481	docinfo = {}
482
483	if dom is None:
484	for x in range(cut):
485	path=getParentDir(path)
486	dom = self.getDomFromIndexMeta(path)
487
488	acctype = dom.xpath("//access-conditions/access/@type")
489	if acctype and (len(acctype)>0):
490	access=acctype[0].value
491	if access in ['group', 'institution']:
492	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
493
494	docinfo['accessType'] = access
495	return docinfo
496
497
498	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
499	"""gets bibliographical info from the index.meta file at path or given by dom"""
500	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
501
502	if docinfo is None:
503	docinfo = {}
504
505	if dom is None:
506	for x in range(cut):
507	path=getParentDir(path)
508	dom = self.getDomFromIndexMeta(path)
509
510	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
511
512	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
513	# put in all raw bib fields as dict "bib"
514	bib = dom.xpath("//bib/*")
515	if bib and len(bib)>0:
516	bibinfo = {}
517	for e in bib:
518	bibinfo[e.localName] = getTextFromNode(e)
519	docinfo['bib'] = bibinfo
520
521	# extract some fields (author, title, year) according to their mapping
522	metaData=self.metadata.main.meta.bib
523	bibtype=dom.xpath("//bib/@type")
524	if bibtype and (len(bibtype)>0):
525	bibtype=bibtype[0].value
526	else:
527	bibtype="generic"
528
529	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
530	docinfo['bib_type'] = bibtype
531	bibmap=metaData.generateMappingForType(bibtype)
532	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
533	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
534	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
535	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
536	try:
537	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
538	except: pass
539	try:
540	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
541	except: pass
542	try:
543	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
544	except: pass
545	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
546	try:
547	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
548	except:
549	docinfo['lang']=''
550
551	return docinfo
552
553
554	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
555	"""gets name info from the index.meta file at path or given by dom"""
556	if docinfo is None:
557	docinfo = {}
558
559	if dom is None:
560	for x in range(cut):
561	path=getParentDir(path)
562	dom = self.getDomFromIndexMeta(path)
563
564	#docinfo['indexMetaPath']=self.getIndexMetaPath(path);
565
566	#result= dom.xpath("//result/resultPage")
567	#docinfo['numPages']=int(getTextFromNode(result[0]))
568
569	#result =dom.xpath("//name")
570	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
571	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
572
573	#logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
574	return docinfo
575
576	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
577	"""parse texttool tag in index meta"""
578	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
579	if docinfo is None:
580	docinfo = {}
581	if docinfo.get('lang', None) is None:
582	docinfo['lang'] = '' # default keine Sprache gesetzt
583	if dom is None:
584	dom = self.getDomFromIndexMeta(url)
585
586	archivePath = None
587	archiveName = None
588
589	archiveNames = dom.xpath("//resource/name")
590	if archiveNames and (len(archiveNames) > 0):
591	archiveName = getTextFromNode(archiveNames[0])
592	else:
593	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
594
595	archivePaths = dom.xpath("//resource/archive-path")
596	if archivePaths and (len(archivePaths) > 0):
597	archivePath = getTextFromNode(archivePaths[0])
598	# clean up archive path
599	if archivePath[0] != '/':
600	archivePath = '/' + archivePath
601	if archiveName and (not archivePath.endswith(archiveName)):
602	archivePath += "/" + archiveName
603	else:
604	# try to get archive-path from url
605	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
606	if (not url.startswith('http')):
607	archivePath = url.replace('index.meta', '')
608
609	if archivePath is None:
610	# we balk without archive-path
611	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
612
613	imageDirs = dom.xpath("//texttool/image")
614	if imageDirs and (len(imageDirs) > 0):
615	imageDir = getTextFromNode(imageDirs[0])
616
617	else:
618	# we balk with no image tag / not necessary anymore because textmode is now standard
619	#raise IOError("No text-tool info in %s"%(url))
620	imageDir = ""
621	#xquery="//pb"
622	docinfo['imagePath'] = "" # keine Bilder
623	docinfo['imageURL'] = ""
624
625	if imageDir and archivePath:
626	#print "image: ", imageDir, " archivepath: ", archivePath
627	imageDir = os.path.join(archivePath, imageDir)
628	imageDir = imageDir.replace("/mpiwg/online", '')
629	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
630	docinfo['imagePath'] = imageDir
631
632	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
633
634	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
635	if viewerUrls and (len(viewerUrls) > 0):
636	viewerUrl = getTextFromNode(viewerUrls[0])
637	docinfo['viewerURL'] = viewerUrl
638
639	# old style text URL
640	textUrls = dom.xpath("//texttool/text")
641	if textUrls and (len(textUrls) > 0):
642	textUrl = getTextFromNode(textUrls[0])
643	if urlparse.urlparse(textUrl)[0] == "": #keine url
644	textUrl = os.path.join(archivePath, textUrl)
645	# fix URLs starting with /mpiwg/online
646	if textUrl.startswith("/mpiwg/online"):
647	textUrl = textUrl.replace("/mpiwg/online", '', 1)
648
649	docinfo['textURL'] = textUrl
650
651	# new style text-url-path
652	textUrls = dom.xpath("//texttool/text-url-path")
653	if textUrls and (len(textUrls) > 0):
654	textUrl = getTextFromNode(textUrls[0])
655	docinfo['textURLPath'] = textUrl
656	if not docinfo['imagePath']:
657	# text-only, no page images
658	docinfo = self.getNumTextPages(docinfo)
659
660	presentationUrls = dom.xpath("//texttool/presentation")
661	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
662	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
663
664	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
665	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
666	# durch den relativen Pfad auf die presentation infos
667	presentationPath = getTextFromNode(presentationUrls[0])
668	if url.endswith("index.meta"):
669	presentationUrl = url.replace('index.meta', presentationPath)
670	else:
671	presentationUrl = url + "/" + presentationPath
672
673	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
674
675	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
676
677	return docinfo
678
679
680	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
681	"""gets the bibliographical information from the preseantion entry in texttools
682	"""
683	dom=self.getPresentationInfoXML(url)
684	try:
685	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
686	except:
687	pass
688	try:
689	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
690	except:
691	pass
692	try:
693	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
694	except:
695	pass
696	return docinfo
697
698	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
699	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
700	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
701	if docinfo is None:
702	docinfo = {}
703	path=path.replace("/mpiwg/online","")
704	docinfo['imagePath'] = path
705	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
706
707	pathorig=path
708	for x in range(cut):
709	path=getParentDir(path)
710	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
711	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
712	docinfo['imageURL'] = imageUrl
713
714	#path ist the path to the images it assumes that the index.meta file is one level higher.
715	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
716	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
717	return docinfo
718
719
720	def getDocinfo(self, mode, url):
721	"""returns docinfo depending on mode"""
722	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
723	# look for cached docinfo in session
724	if self.REQUEST.SESSION.has_key('docinfo'):
725	docinfo = self.REQUEST.SESSION['docinfo']
726	# check if its still current
727	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
728	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
729	return docinfo
730	# new docinfo
731	docinfo = {'mode': mode, 'url': url}
732	if mode=="texttool": #index.meta with texttool information
733	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
734	elif mode=="imagepath":
735	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
736	elif mode=="filepath":
737	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
738	else:
739	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
740	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
741
742	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
743	self.REQUEST.SESSION['docinfo'] = docinfo
744	return docinfo
745
746	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization=""):
747	"""returns pageinfo with the given parameters"""
748	pageinfo = {}
749	current = getInt(current)
750	pageinfo['current'] = current
751	rows = int(rows or self.thumbrows)
752	pageinfo['rows'] = rows
753	cols = int(cols or self.thumbcols)
754	pageinfo['cols'] = cols
755	grpsize = cols * rows
756	pageinfo['groupsize'] = grpsize
757	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
758	# int(current / grpsize) * grpsize +1))
759	pageinfo['start'] = start
760	pageinfo['end'] = start + grpsize
761	if (docinfo is not None) and ('numPages' in docinfo):
762	np = int(docinfo['numPages'])
763	pageinfo['end'] = min(pageinfo['end'], np)
764	pageinfo['numgroups'] = int(np / grpsize)
765	if np % grpsize > 0:
766	pageinfo['numgroups'] += 1
767	pageinfo['viewMode'] = viewMode
768	pageinfo['tocMode'] = tocMode
769	#pageinfo['characterNormalization'] =characterNormalization
770	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization',' ')
771	pageinfo['query'] = self.REQUEST.get('query',' ')
772	pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
773	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
774	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
775	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
776	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
777	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
778	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
779	toc = int (pageinfo['tocPN'])
780	pageinfo['textPages'] =int (toc)
781
782	if 'tocSize_%s'%tocMode in docinfo:
783	tocSize = int(docinfo['tocSize_%s'%tocMode])
784	tocPageSize = int(pageinfo['tocPageSize'])
785	# cached toc
786	if tocSize%tocPageSize>0:
787	tocPages=tocSize/tocPageSize+1
788	else:
789	tocPages=tocSize/tocPageSize
790	pageinfo['tocPN'] = min (tocPages,toc)
791	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
792	pageinfo['sn'] =self.REQUEST.get('sn','')
793	return pageinfo
794
795	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
796	"""init document viewer"""
797	self.title=title
798	self.digilibBaseUrl = digilibBaseUrl
799	self.thumbrows = thumbrows
800	self.thumbcols = thumbcols
801	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
802	if RESPONSE is not None:
803	RESPONSE.redirect('manage_main')
804
805	def manage_AddDocumentViewerForm(self):
806	"""add the viewer form"""
807	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
808	return pt()
809
810	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
811	"""add the viewer"""
812	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
813	self._setObject(id,newObj)
814
815	if RESPONSE is not None:
816	RESPONSE.redirect('manage_main')
817
818	## DocumentViewerTemplate class
819	class DocumentViewerTemplate(ZopePageTemplate):
820	"""Template for document viewer"""
821	meta_type="DocumentViewer Template"
822
823
824	def manage_addDocumentViewerTemplateForm(self):
825	"""Form for adding"""
826	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
827	return pt()
828
829	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
830	REQUEST=None, submit=None):
831	"Add a Page Template with optional file content."
832
833	self._setObject(id, DocumentViewerTemplate(id))
834	ob = getattr(self, id)
835	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
836	logging.info("txt %s:"%txt)
837	ob.pt_edit(txt,"text/html")
838	if title:
839	ob.pt_setTitle(title)
840	try:
841	u = self.DestinationURL()
842	except AttributeError:
843	u = REQUEST['URL1']
844
845	u = "%s/%s" % (u, urllib.quote(id))
846	REQUEST.RESPONSE.redirect(u+'/manage_main')
847	return ''
848
849
850

Note: See TracBrowser for help on using the repository browser.

Download in other formats: