Context Navigation

source: documentViewer/documentViewer.py @ 225:57c2f5b6ada5

Last change on this file since 225:57c2f5b6ada5 was 225:57c2f5b6ada5, checked in by abukhman, 14 years ago
* empty log message *
File size: 33.1 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	from Ft.Xml import EMPTY_NAMESPACE, Parse
10	import Ft.Xml.Domlette
11	import os.path
12	import sys
13	import urllib
14	import urllib2
15	import logging
16	import math
17	import urlparse
18	import cStringIO
19	import re
20
21	def logger(txt,method,txt2):
22	"""logging"""
23	logging.info(txt+ txt2)
24
25
26	def getInt(number, default=0):
27	"""returns always an int (0 in case of problems)"""
28	try:
29	return int(number)
30	except:
31	return int(default)
32
33	def getTextFromNode(nodename):
34	"""get the cdata content of a node"""
35	if nodename is None:
36	return ""
37	nodelist=nodename.childNodes
38	rc = ""
39	for node in nodelist:
40	if node.nodeType == node.TEXT_NODE:
41	rc = rc + node.data
42	return rc
43
44	def serializeNode(node, encoding='utf-8'):
45	"""returns a string containing node as XML"""
46	buf = cStringIO.StringIO()
47	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
48	s = buf.getvalue()
49	buf.close()
50	return s
51
52
53	def getParentDir(path):
54	"""returns pathname shortened by one"""
55	return '/'.join(path.split('/')[0:-1])
56
57
58	def getHttpData(url, data=None, num_tries=3, timeout=10):
59	"""returns result from url+data HTTP request"""
60	# we do GET (by appending data to url)
61	if isinstance(data, str) or isinstance(data, unicode):
62	# if data is string then append
63	url = "%s?%s"%(url,data)
64	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
65	# urlencode
66	url = "%s?%s"%(url,urllib.urlencode(data))
67
68	response = None
69	errmsg = None
70	for cnt in range(num_tries):
71	try:
72	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
73	if sys.version_info < (2, 6):
74	# set timeout on socket -- ugly :-(
75	import socket
76	socket.setdefaulttimeout(float(timeout))
77	response = urllib2.urlopen(url)
78	else:
79	response = urllib2.urlopen(url,timeout=float(timeout))
80	# check result?
81	break
82	except urllib2.HTTPError, e:
83	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
84	errmsg = str(e)
85	# stop trying
86	break
87	except urllib2.URLError, e:
88	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
89	errmsg = str(e)
90	# stop trying
91	#break
92
93	if response is not None:
94	data = response.read()
95	response.close()
96	return data
97
98	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
99	#return None
100
101
102
103	##
104	## documentViewer class
105	##
106	class documentViewer(Folder):
107	"""document viewer"""
108	meta_type="Document viewer"
109
110	security=ClassSecurityInfo()
111	manage_options=Folder.manage_options+(
112	{'label':'main config','action':'changeDocumentViewerForm'},
113	)
114
115	# templates and forms
116	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
117	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
118	toc_text = PageTemplateFile('zpt/toc_text', globals())
119	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
120	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
121	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
122	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
123	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
124	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
125	head_main = PageTemplateFile('zpt/head_main', globals())
126	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
127	info_xml = PageTemplateFile('zpt/info_xml', globals())
128
129
130	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
131	security.declareProtected('View management screens','changeDocumentViewerForm')
132	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
133
134
135	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
136	"""init document viewer"""
137	self.id=id
138	self.title=title
139	self.thumbcols = thumbcols
140	self.thumbrows = thumbrows
141	# authgroups is list of authorized groups (delimited by ,)
142	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
143	# create template folder so we can always use template.something
144
145	templateFolder = Folder('template')
146	#self['template'] = templateFolder # Zope-2.12 style
147	self._setObject('template',templateFolder) # old style
148	try:
149	import MpdlXmlTextServer
150	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
151	#templateFolder['fulltextclient'] = xmlRpcClient
152	templateFolder._setObject('fulltextclient',textServer)
153	except Exception, e:
154	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
155	try:
156	from Products.zogiLib.zogiLib import zogiLib
157	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
158	#templateFolder['zogilib'] = zogilib
159	templateFolder._setObject('zogilib',zogilib)
160	except Exception, e:
161	logging.error("Unable to create zogiLib for zogilib: "+str(e))
162
163
164	# proxy text server methods to fulltextclient
165	def getTextPage(self, **args):
166	"""get page"""
167	return self.template.fulltextclient.getTextPage(**args)
168
169	def getQuery(self, **args):
170	"""get query"""
171	return self.template.fulltextclient.getQuery(**args)
172
173	def getSearch(self, **args):
174	"""get search"""
175	return self.template.fulltextclient.getSearch(**args)
176
177	def getNumPages(self, docinfo):
178	"""get numpages"""
179	return self.template.fulltextclient.getNumPages(docinfo)
180
181	def getNumTextPages(self, docinfo):
182	"""get numpages text"""
183	return self.template.fulltextclient.getNumTextPages(docinfo)
184
185	def getTranslate(self, **args):
186	"""get translate"""
187	return self.template.fulltextclient.getTranslate(**args)
188
189	def getLemma(self, **args):
190	"""get lemma"""
191	return self.template.fulltextclient.getLemma(**args)
192
193	def getToc(self, **args):
194	"""get toc"""
195	return self.template.fulltextclient.getToc(**args)
196
197	def getTocPage(self, **args):
198	"""get tocpage"""
199	return self.template.fulltextclient.getTocPage(**args)
200
201
202	security.declareProtected('View','thumbs_rss')
203	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
204	'''
205	view it
206	@param mode: defines how to access the document behind url
207	@param url: url which contains display information
208	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
209
210	'''
211	logging.debug("HHHHHHHHHHHHHH:load the rss")
212	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
213
214	if not hasattr(self, 'template'):
215	# create template folder if it doesn't exist
216	self.manage_addFolder('template')
217
218	if not self.digilibBaseUrl:
219	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
220
221	docinfo = self.getDocinfo(mode=mode,url=url)
222	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
223	pt = getattr(self.template, 'thumbs_main_rss')
224
225	if viewMode=="auto": # automodus gewaehlt
226	if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
227	viewMode="text"
228	else:
229	viewMode="images"
230
231	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
232
233	security.declareProtected('View','index_html')
234	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization=""):
235	'''
236	view it
237	@param mode: defines how to access the document behind url
238	@param url: url which contains display information
239	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
240	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
241	@param characterNormalization type of text display (reg, norm, none)
242	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
243	'''
244
245	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
246
247	if not hasattr(self, 'template'):
248	# this won't work
249	logging.error("template folder missing!")
250	return "ERROR: template folder missing!"
251
252	if not getattr(self, 'digilibBaseUrl', None):
253	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
254
255	docinfo = self.getDocinfo(mode=mode,url=url)
256
257	if tocMode != "thumbs":
258	# get table of contents
259	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
260
261	if viewMode=="auto": # automodus gewaehlt
262	if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
263	viewMode="text_dict"
264	else:
265	viewMode="images"
266
267	pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
268
269	pt = getattr(self.template, 'viewer_main')
270	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
271
272	def generateMarks(self,mk):
273	ret=""
274	if mk is None:
275	return ""
276	if not isinstance(mk, list):
277	mk=[mk]
278	for m in mk:
279	ret+="mk=%s"%m
280	return ret
281
282
283	def findDigilibUrl(self):
284	"""try to get the digilib URL from zogilib"""
285	url = self.template.zogilib.getDLBaseUrl()
286	return url
287
288	def getDocumentViewerURL(self):
289	"""returns the URL of this instance"""
290	return self.absolute_url()
291
292	def getStyle(self, idx, selected, style=""):
293	"""returns a string with the given style and append 'sel' if path == selected."""
294	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
295	if idx == selected:
296	return style + 'sel'
297	else:
298	return style
299
300	def getLink(self,param=None,val=None):
301	"""link to documentviewer with parameter param set to val"""
302	params=self.REQUEST.form.copy()
303	if param is not None:
304	if val is None:
305	if params.has_key(param):
306	del params[param]
307	else:
308	params[param] = str(val)
309
310	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
311	params["mode"] = "imagepath"
312	params["url"] = getParentDir(params["url"])
313
314	# quote values and assemble into query string
315	#ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
316	ps = urllib.urlencode(params)
317	url=self.REQUEST['URL1']+"?"+ps
318	return url
319
320	def getLinkAmp(self,param=None,val=None):
321	"""link to documentviewer with parameter param set to val"""
322	params=self.REQUEST.form.copy()
323	if param is not None:
324	if val is None:
325	if params.has_key(param):
326	del params[param]
327	else:
328	params[param] = str(val)
329
330	# quote values and assemble into query string
331	logging.debug("XYXXXXX: %s"%repr(params.items()))
332	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
333	url=self.REQUEST['URL1']+"?"+ps
334	return url
335
336	def getInfo_xml(self,url,mode):
337	"""returns info about the document as XML"""
338
339	if not self.digilibBaseUrl:
340	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
341
342	docinfo = self.getDocinfo(mode=mode,url=url)
343	pt = getattr(self.template, 'info_xml')
344	return pt(docinfo=docinfo)
345
346
347	def isAccessible(self, docinfo):
348	"""returns if access to the resource is granted"""
349	access = docinfo.get('accessType', None)
350	logging.debug("documentViewer (accessOK) access type %s"%access)
351	if access is not None and access == 'free':
352	logging.debug("documentViewer (accessOK) access is free")
353	return True
354	elif access is None or access in self.authgroups:
355	# only local access -- only logged in users
356	user = getSecurityManager().getUser()
357	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
358	if user is not None:
359	#print "user: ", user
360	return (user.getUserName() != "Anonymous User")
361	else:
362	return False
363
364	logging.error("documentViewer (accessOK) unknown access type %s"%access)
365	return False
366
367
368	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
369	"""gibt param von dlInfo aus"""
370	if docinfo is None:
371	docinfo = {}
372
373	for x in range(cut):
374
375	path=getParentDir(path)
376
377	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
378
379	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
380
381	txt = getHttpData(infoUrl)
382	if txt is None:
383	raise IOError("Unable to get dir-info from %s"%(infoUrl))
384
385	dom = Parse(txt)
386	sizes=dom.xpath("//dir/size")
387	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
388
389	if sizes:
390	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
391	else:
392	docinfo['numPages'] = 0
393
394	# TODO: produce and keep list of image names and numbers
395
396	return docinfo
397
398	def getIndexMetaPath(self,url):
399	"""gib nur den Pfad zurueck"""
400	regexp = re.compile(r".(experimental\|permanent)/(.)")
401	regpath = regexp.match(url)
402	if (regpath==None):
403	return ""
404	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
405	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
406
407	def getIndexMetaName(self,url):
408	"""gib nur den Pfad zurueck"""
409	#regexp = re.compile(r".(experimental\|permanent)/(.)")
410	regpath = regexp.match(url)
411	if (regpath==None):
412	return ""
413	logging.debug("(getIndexMetaName): XAXA: %s"%regpath.group(2))
414	return regpath
415
416
417	def getIndexMetaUrl(self,url):
418	"""returns utr of index.meta document at url"""
419
420	metaUrl = None
421	if url.startswith("http://"):
422	# real URL
423	metaUrl = url
424	else:
425	# online path
426	server=self.digilibBaseUrl+"/servlet/Texter?fn="
427	metaUrl=server+url.replace("/mpiwg/online","")
428	if not metaUrl.endswith("index.meta"):
429	metaUrl += "/index.meta"
430
431	return metaUrl
432
433	def getDomFromIndexMeta(self, url):
434	"""get dom from index meta"""
435	dom = None
436	metaUrl = self.getIndexMetaUrl(url)
437
438	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
439	txt=getHttpData(metaUrl)
440	if txt is None:
441	raise IOError("Unable to read index meta from %s"%(url))
442
443	dom = Parse(txt)
444	return dom
445
446	def getPresentationInfoXML(self, url):
447	"""returns dom of info.xml document at url"""
448	dom = None
449	metaUrl = None
450	if url.startswith("http://"):
451	# real URL
452	metaUrl = url
453	else:
454	# online path
455	server=self.digilibBaseUrl+"/servlet/Texter?fn="
456	metaUrl=server+url.replace("/mpiwg/online","")
457
458	txt=getHttpData(metaUrl)
459	if txt is None:
460	raise IOError("Unable to read infoXMLfrom %s"%(url))
461
462	dom = Parse(txt)
463	return dom
464
465
466	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
467	"""gets authorization info from the index.meta file at path or given by dom"""
468	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
469
470	access = None
471
472	if docinfo is None:
473	docinfo = {}
474
475	if dom is None:
476	for x in range(cut):
477	path=getParentDir(path)
478	dom = self.getDomFromIndexMeta(path)
479
480	acctype = dom.xpath("//access-conditions/access/@type")
481	if acctype and (len(acctype)>0):
482	access=acctype[0].value
483	if access in ['group', 'institution']:
484	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
485
486	docinfo['accessType'] = access
487	return docinfo
488
489
490	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
491	"""gets bibliographical info from the index.meta file at path or given by dom"""
492	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
493
494	if docinfo is None:
495	docinfo = {}
496
497	if dom is None:
498	for x in range(cut):
499	path=getParentDir(path)
500	dom = self.getDomFromIndexMeta(path)
501
502	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
503
504	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
505	# put in all raw bib fields as dict "bib"
506	bib = dom.xpath("//bib/*")
507	if bib and len(bib)>0:
508	bibinfo = {}
509	for e in bib:
510	bibinfo[e.localName] = getTextFromNode(e)
511	docinfo['bib'] = bibinfo
512
513	# extract some fields (author, title, year) according to their mapping
514	metaData=self.metadata.main.meta.bib
515	bibtype=dom.xpath("//bib/@type")
516	if bibtype and (len(bibtype)>0):
517	bibtype=bibtype[0].value
518	else:
519	bibtype="generic"
520
521	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
522	docinfo['bib_type'] = bibtype
523	bibmap=metaData.generateMappingForType(bibtype)
524	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
525	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
526	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
527	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
528	try:
529	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
530	except: pass
531	try:
532	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
533	except: pass
534	try:
535	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
536	except: pass
537	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
538	try:
539	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
540	except:
541	docinfo['lang']=''
542
543	return docinfo
544
545
546	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
547	"""gets name info from the index.meta file at path or given by dom"""
548	if docinfo is None:
549	docinfo = {}
550
551	if dom is None:
552	for x in range(cut):
553	path=getParentDir(path)
554	dom = self.getDomFromIndexMeta(path)
555
556	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
557
558	#result= dom.xpath("//result/resultPage")
559	#docinfo['numPages']=int(getTextFromNode(result[0]))
560
561	if len(name) > 0:
562	try:
563	result =dom.xpath("//name")
564	docinfo['name']=getTextFromNode(result[0])
565	logging.debug("documentViewer docinfo[name] %s"%docinfo[name])
566	except: pass
567	#logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
568	return docinfo
569
570	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
571	"""parse texttool tag in index meta"""
572	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
573	if docinfo is None:
574	docinfo = {}
575	if docinfo.get('lang', None) is None:
576	docinfo['lang'] = '' # default keine Sprache gesetzt
577	if dom is None:
578	dom = self.getDomFromIndexMeta(url)
579
580	archivePath = None
581	archiveName = None
582
583	archiveNames = dom.xpath("//resource/name")
584	if archiveNames and (len(archiveNames) > 0):
585	archiveName = getTextFromNode(archiveNames[0])
586	else:
587	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
588
589	archivePaths = dom.xpath("//resource/archive-path")
590	if archivePaths and (len(archivePaths) > 0):
591	archivePath = getTextFromNode(archivePaths[0])
592	# clean up archive path
593	if archivePath[0] != '/':
594	archivePath = '/' + archivePath
595	if archiveName and (not archivePath.endswith(archiveName)):
596	archivePath += "/" + archiveName
597	else:
598	# try to get archive-path from url
599	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
600	if (not url.startswith('http')):
601	archivePath = url.replace('index.meta', '')
602
603	if archivePath is None:
604	# we balk without archive-path
605	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
606
607	imageDirs = dom.xpath("//texttool/image")
608	if imageDirs and (len(imageDirs) > 0):
609	imageDir = getTextFromNode(imageDirs[0])
610
611	else:
612	# we balk with no image tag / not necessary anymore because textmode is now standard
613	#raise IOError("No text-tool info in %s"%(url))
614	imageDir = ""
615	#xquery="//pb"
616	docinfo['imagePath'] = "" # keine Bilder
617	docinfo['imageURL'] = ""
618
619	if imageDir and archivePath:
620	#print "image: ", imageDir, " archivepath: ", archivePath
621	imageDir = os.path.join(archivePath, imageDir)
622	imageDir = imageDir.replace("/mpiwg/online", '')
623	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
624	docinfo['imagePath'] = imageDir
625
626	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
627
628	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
629	if viewerUrls and (len(viewerUrls) > 0):
630	viewerUrl = getTextFromNode(viewerUrls[0])
631	docinfo['viewerURL'] = viewerUrl
632
633	# old style text URL
634	textUrls = dom.xpath("//texttool/text")
635	if textUrls and (len(textUrls) > 0):
636	textUrl = getTextFromNode(textUrls[0])
637	if urlparse.urlparse(textUrl)[0] == "": #keine url
638	textUrl = os.path.join(archivePath, textUrl)
639	# fix URLs starting with /mpiwg/online
640	if textUrl.startswith("/mpiwg/online"):
641	textUrl = textUrl.replace("/mpiwg/online", '', 1)
642
643	docinfo['textURL'] = textUrl
644
645	# new style text-url-path
646	textUrls = dom.xpath("//texttool/text-url-path")
647	if textUrls and (len(textUrls) > 0):
648	textUrl = getTextFromNode(textUrls[0])
649	docinfo['textURLPath'] = textUrl
650	if not docinfo['imagePath']:
651	# text-only, no page images
652	docinfo = self.getNumTextPages(docinfo)
653
654	presentationUrls = dom.xpath("//texttool/presentation")
655	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
656	#docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
657
658	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
659	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
660	# durch den relativen Pfad auf die presentation infos
661	presentationPath = getTextFromNode(presentationUrls[0])
662	if url.endswith("index.meta"):
663	presentationUrl = url.replace('index.meta', presentationPath)
664	else:
665	presentationUrl = url + "/" + presentationPath
666
667	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
668
669	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
670
671	return docinfo
672
673
674	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
675	"""gets the bibliographical information from the preseantion entry in texttools
676	"""
677	dom=self.getPresentationInfoXML(url)
678	try:
679	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
680	except:
681	pass
682	try:
683	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
684	except:
685	pass
686	try:
687	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
688	except:
689	pass
690	return docinfo
691
692	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
693	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
694	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
695	if docinfo is None:
696	docinfo = {}
697	path=path.replace("/mpiwg/online","")
698	docinfo['imagePath'] = path
699	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
700
701	pathorig=path
702	for x in range(cut):
703	path=getParentDir(path)
704	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
705	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
706	docinfo['imageURL'] = imageUrl
707
708	#path ist the path to the images it assumes that the index.meta file is one level higher.
709	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
710	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
711	return docinfo
712
713
714	def getDocinfo(self, mode, url):
715	"""returns docinfo depending on mode"""
716	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
717	# look for cached docinfo in session
718	if self.REQUEST.SESSION.has_key('docinfo'):
719	docinfo = self.REQUEST.SESSION['docinfo']
720	# check if its still current
721	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
722	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
723	return docinfo
724	# new docinfo
725	docinfo = {'mode': mode, 'url': url}
726	if mode=="texttool": #index.meta with texttool information
727	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
728	elif mode=="imagepath":
729	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
730	elif mode=="filepath":
731	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
732	else:
733	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
734	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
735
736	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
737	self.REQUEST.SESSION['docinfo'] = docinfo
738	return docinfo
739
740	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization=""):
741	"""returns pageinfo with the given parameters"""
742	pageinfo = {}
743	current = getInt(current)
744	pageinfo['current'] = current
745	rows = int(rows or self.thumbrows)
746	pageinfo['rows'] = rows
747	cols = int(cols or self.thumbcols)
748	pageinfo['cols'] = cols
749	grpsize = cols * rows
750	pageinfo['groupsize'] = grpsize
751	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
752	# int(current / grpsize) * grpsize +1))
753	pageinfo['start'] = start
754	pageinfo['end'] = start + grpsize
755	if (docinfo is not None) and ('numPages' in docinfo):
756	np = int(docinfo['numPages'])
757	pageinfo['end'] = min(pageinfo['end'], np)
758	pageinfo['numgroups'] = int(np / grpsize)
759	if np % grpsize > 0:
760	pageinfo['numgroups'] += 1
761	pageinfo['viewMode'] = viewMode
762	pageinfo['tocMode'] = tocMode
763	#pageinfo['characterNormalization'] =characterNormalization
764	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization',' ')
765	pageinfo['query'] = self.REQUEST.get('query',' ')
766	pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
767	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
768	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
769	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
770	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
771	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
772	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
773	toc = int (pageinfo['tocPN'])
774	pageinfo['textPages'] =int (toc)
775
776	if 'tocSize_%s'%tocMode in docinfo:
777	tocSize = int(docinfo['tocSize_%s'%tocMode])
778	tocPageSize = int(pageinfo['tocPageSize'])
779	# cached toc
780	if tocSize%tocPageSize>0:
781	tocPages=tocSize/tocPageSize+1
782	else:
783	tocPages=tocSize/tocPageSize
784	pageinfo['tocPN'] = min (tocPages,toc)
785	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
786	pageinfo['sn'] =self.REQUEST.get('sn','')
787	return pageinfo
788
789	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
790	"""init document viewer"""
791	self.title=title
792	self.digilibBaseUrl = digilibBaseUrl
793	self.thumbrows = thumbrows
794	self.thumbcols = thumbcols
795	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
796	if RESPONSE is not None:
797	RESPONSE.redirect('manage_main')
798
799	def manage_AddDocumentViewerForm(self):
800	"""add the viewer form"""
801	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
802	return pt()
803
804	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
805	"""add the viewer"""
806	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
807	self._setObject(id,newObj)
808
809	if RESPONSE is not None:
810	RESPONSE.redirect('manage_main')
811
812	## DocumentViewerTemplate class
813	class DocumentViewerTemplate(ZopePageTemplate):
814	"""Template for document viewer"""
815	meta_type="DocumentViewer Template"
816
817
818	def manage_addDocumentViewerTemplateForm(self):
819	"""Form for adding"""
820	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
821	return pt()
822
823	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
824	REQUEST=None, submit=None):
825	"Add a Page Template with optional file content."
826
827	self._setObject(id, DocumentViewerTemplate(id))
828	ob = getattr(self, id)
829	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
830	logging.info("txt %s:"%txt)
831	ob.pt_edit(txt,"text/html")
832	if title:
833	ob.pt_setTitle(title)
834	try:
835	u = self.DestinationURL()
836	except AttributeError:
837	u = REQUEST['URL1']
838
839	u = "%s/%s" % (u, urllib.quote(id))
840	REQUEST.RESPONSE.redirect(u+'/manage_main')
841	return ''
842
843
844

Note: See TracBrowser for help on using the repository browser.

Download in other formats: