Context Navigation

source: documentViewer/documentViewer.py @ 353:622acf99ccec

Last change on this file since 353:622acf99ccec was 353:622acf99ccec, checked in by abukhman, 14 years ago
* empty log message *
File size: 33.6 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15
16	import urllib2
17	import logging
18	import math
19	import urlparse
20	import cStringIO
21	import re
22
23
24	def logger(txt,method,txt2):
25	"""logging"""
26	logging.info(txt+ txt2)
27
28
29	def getInt(number, default=0):
30	"""returns always an int (0 in case of problems)"""
31	try:
32	return int(number)
33	except:
34	return int(default)
35
36	def getTextFromNode(nodename):
37	"""get the cdata content of a node"""
38	if nodename is None:
39	return ""
40	nodelist=nodename.childNodes
41	rc = ""
42	for node in nodelist:
43	if node.nodeType == node.TEXT_NODE:
44	rc = rc + node.data
45	return rc
46
47	def serializeNode(node, encoding='utf-8'):
48	"""returns a string containing node as XML"""
49	buf = cStringIO.StringIO()
50	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
51	s = buf.getvalue()
52	buf.close()
53	return s
54
55	def getBrowserType(self):
56	"""get browser type object"""
57	if self.REQUEST.SESSION.has_key('browserType'):
58	return self.REQUEST.SESSION['browserType']
59	else:
60	bt = browserCheck(self)
61	self.REQUEST.SESSION.set('browserType', bt)
62	logging.debug("documentViewer (BROWSER TYPE) bt %s"%bt)
63	return bt
64
65
66	def getParentDir(path):
67	"""returns pathname shortened by one"""
68	return '/'.join(path.split('/')[0:-1])
69
70
71	def getHttpData(url, data=None, num_tries=3, timeout=10):
72	"""returns result from url+data HTTP request"""
73	# we do GET (by appending data to url)
74	if isinstance(data, str) or isinstance(data, unicode):
75	# if data is string then append
76	url = "%s?%s"%(url,data)
77	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
78	# urlencode
79	url = "%s?%s"%(url,urllib.urlencode(data))
80
81	response = None
82	errmsg = None
83	for cnt in range(num_tries):
84	try:
85	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
86	if sys.version_info < (2, 6):
87	# set timeout on socket -- ugly :-(
88	import socket
89	socket.setdefaulttimeout(float(timeout))
90	response = urllib2.urlopen(url)
91	else:
92	response = urllib2.urlopen(url,timeout=float(timeout))
93	# check result?
94	break
95	except urllib2.HTTPError, e:
96	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
97	errmsg = str(e)
98	# stop trying
99	break
100	except urllib2.URLError, e:
101	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
102	errmsg = str(e)
103	# stop trying
104	#break
105
106	if response is not None:
107	data = response.read()
108	response.close()
109	return data
110
111	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
112	#return None
113
114
115
116	##
117	## documentViewer class
118	##
119	class documentViewer(Folder):
120	"""document viewer"""
121	meta_type="Document viewer"
122
123	security=ClassSecurityInfo()
124	manage_options=Folder.manage_options+(
125	{'label':'main config','action':'changeDocumentViewerForm'},
126	)
127
128	# templates and forms
129	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
130	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
131	toc_text = PageTemplateFile('zpt/toc_text', globals())
132	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
133	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
134	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
135	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
136	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
137	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
138	head_main = PageTemplateFile('zpt/head_main', globals())
139	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
140	info_xml = PageTemplateFile('zpt/info_xml', globals())
141
142
143	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
144	security.declareProtected('View management screens','changeDocumentViewerForm')
145	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
146
147
148	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
149	"""init document viewer"""
150	self.id=id
151	self.title=title
152	self.thumbcols = thumbcols
153	self.thumbrows = thumbrows
154	# authgroups is list of authorized groups (delimited by ,)
155	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
156	# create template folder so we can always use template.something
157
158	templateFolder = Folder('template')
159	#self['template'] = templateFolder # Zope-2.12 style
160	self._setObject('template',templateFolder) # old style
161	try:
162	import MpdlXmlTextServer
163	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
164	#templateFolder['fulltextclient'] = xmlRpcClient
165	templateFolder._setObject('fulltextclient',textServer)
166	except Exception, e:
167	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
168	try:
169	from Products.zogiLib.zogiLib import zogiLib
170	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
171	#templateFolder['zogilib'] = zogilib
172	templateFolder._setObject('zogilib',zogilib)
173	except Exception, e:
174	logging.error("Unable to create zogiLib for zogilib: "+str(e))
175
176
177	# proxy text server methods to fulltextclient
178	def getTextPage(self, **args):
179	"""get page"""
180	return self.template.fulltextclient.getTextPage(**args)
181
182	def getQuery(self, **args):
183	"""get query"""
184	return self.template.fulltextclient.getQuery(**args)
185
186	def getSearch(self, **args):
187	"""get search"""
188	return self.template.fulltextclient.getSearch(**args)
189
190	def getGisPlaces(self, **args):
191	"""get gis places"""
192	return self.template.fulltextclient.getGisPlaces(**args)
193
194	def getAllGisPlaces(self, **args):
195	"""get all gis places """
196	return self.template.fulltextclient.getAllGisPlaces(**args)
197
198	def getOrigPages(self, **args):
199	"""get original page number """
200	return self.template.fulltextclient.getOrigPages(**args)
201
202	def getNumPages(self, docinfo):
203	"""get numpages"""
204	return self.template.fulltextclient.getNumPages(docinfo)
205
206	def getNumTextPages(self, docinfo):
207	"""get numpages text"""
208	return self.template.fulltextclient.getNumTextPages(docinfo)
209
210	def getTranslate(self, **args):
211	"""get translate"""
212	return self.template.fulltextclient.getTranslate(**args)
213
214	def getLemma(self, **args):
215	"""get lemma"""
216	return self.template.fulltextclient.getLemma(**args)
217
218	def getToc(self, **args):
219	"""get toc"""
220	return self.template.fulltextclient.getToc(**args)
221
222	def getTocPage(self, **args):
223	"""get tocpage"""
224	return self.template.fulltextclient.getTocPage(**args)
225
226
227	security.declareProtected('View','thumbs_rss')
228	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
229	'''
230	view it
231	@param mode: defines how to access the document behind url
232	@param url: url which contains display information
233	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
234
235	'''
236	logging.debug("HHHHHHHHHHHHHH:load the rss")
237	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
238
239	if not hasattr(self, 'template'):
240	# create template folder if it doesn't exist
241	self.manage_addFolder('template')
242
243	if not self.digilibBaseUrl:
244	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
245
246	docinfo = self.getDocinfo(mode=mode,url=url)
247	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
248	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
249	''' ZDES '''
250	pt = getattr(self.template, 'thumbs_main_rss')
251
252	if viewMode=="auto": # automodus gewaehlt
253	if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
254	viewMode="text"
255	else:
256	viewMode="images"
257
258	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
259
260	security.declareProtected('View','index_html')
261	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization=""):
262	'''
263	view it
264	@param mode: defines how to access the document behind url
265	@param url: url which contains display information
266	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
267	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
268	@param characterNormalization type of text display (reg, norm, none)
269	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
270	'''
271
272	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
273
274	if not hasattr(self, 'template'):
275	# this won't work
276	logging.error("template folder missing!")
277	return "ERROR: template folder missing!"
278
279	if not getattr(self, 'digilibBaseUrl', None):
280	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
281
282	docinfo = self.getDocinfo(mode=mode,url=url)
283
284	if tocMode != "thumbs":
285	# get table of contents
286	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
287
288	if viewMode=="auto": # automodus gewaehlt
289	if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
290	viewMode="text_dict"
291	else:
292	viewMode="images"
293
294	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
295
296	pt = getattr(self.template, 'viewer_main')
297	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
298
299	def generateMarks(self,mk):
300	ret=""
301	if mk is None:
302	return ""
303	if not isinstance(mk, list):
304	mk=[mk]
305	for m in mk:
306	ret+="mk=%s"%m
307	return ret
308
309
310	def findDigilibUrl(self):
311	"""try to get the digilib URL from zogilib"""
312	url = self.template.zogilib.getDLBaseUrl()
313	return url
314
315	def getDocumentViewerURL(self):
316	"""returns the URL of this instance"""
317	return self.absolute_url()
318
319	def getStyle(self, idx, selected, style=""):
320	"""returns a string with the given style and append 'sel' if path == selected."""
321	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
322	if idx == selected:
323	return style + 'sel'
324	else:
325	return style
326
327	def getLink(self,param=None,val=None):
328	"""link to documentviewer with parameter param set to val"""
329	params=self.REQUEST.form.copy()
330	if param is not None:
331	if val is None:
332	if params.has_key(param):
333	del params[param]
334	else:
335	params[param] = str(val)
336
337	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
338	params["mode"] = "imagepath"
339	params["url"] = getParentDir(params["url"])
340
341	# quote values and assemble into query string
342	#ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
343	ps = urllib.urlencode(params)
344	url=self.REQUEST['URL1']+"?"+ps
345	return url
346
347	def getLinkAmp(self,param=None,val=None):
348	"""link to documentviewer with parameter param set to val"""
349	params=self.REQUEST.form.copy()
350	if param is not None:
351	if val is None:
352	if params.has_key(param):
353	del params[param]
354	else:
355	params[param] = str(val)
356
357	# quote values and assemble into query string
358	logging.debug("XYXXXXX: %s"%repr(params.items()))
359	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
360	url=self.REQUEST['URL1']+"?"+ps
361	return url
362
363	def getInfo_xml(self,url,mode):
364	"""returns info about the document as XML"""
365
366	if not self.digilibBaseUrl:
367	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
368
369	docinfo = self.getDocinfo(mode=mode,url=url)
370	pt = getattr(self.template, 'info_xml')
371	return pt(docinfo=docinfo)
372
373
374	def isAccessible(self, docinfo):
375	"""returns if access to the resource is granted"""
376	access = docinfo.get('accessType', None)
377	logging.debug("documentViewer (accessOK) access type %s"%access)
378	if access is not None and access == 'free':
379	logging.debug("documentViewer (accessOK) access is free")
380	return True
381	elif access is None or access in self.authgroups:
382	# only local access -- only logged in users
383	user = getSecurityManager().getUser()
384	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
385	if user is not None:
386	#print "user: ", user
387	return (user.getUserName() != "Anonymous User")
388	else:
389	return False
390
391	logging.error("documentViewer (accessOK) unknown access type %s"%access)
392	return False
393
394
395	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
396	"""gibt param von dlInfo aus"""
397	if docinfo is None:
398	docinfo = {}
399
400	for x in range(cut):
401
402	path=getParentDir(path)
403
404	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
405
406	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
407
408	txt = getHttpData(infoUrl)
409	if txt is None:
410	raise IOError("Unable to get dir-info from %s"%(infoUrl))
411
412	dom = Parse(txt)
413	sizes=dom.xpath("//dir/size")
414	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
415
416	if sizes:
417	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
418	else:
419	docinfo['numPages'] = 0
420
421	# TODO: produce and keep list of image names and numbers
422
423	return docinfo
424
425	def getIndexMetaPath(self,url):
426	"""gib nur den Pfad zurueck"""
427	regexp = re.compile(r".(experimental\|permanent)/(.)")
428	regpath = regexp.match(url)
429	if (regpath==None):
430	return ""
431	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
432	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
433
434
435
436	def getIndexMetaUrl(self,url):
437	"""returns utr of index.meta document at url"""
438
439	metaUrl = None
440	if url.startswith("http://"):
441	# real URL
442	metaUrl = url
443	else:
444	# online path
445	server=self.digilibBaseUrl+"/servlet/Texter?fn="
446	metaUrl=server+url.replace("/mpiwg/online","")
447	if not metaUrl.endswith("index.meta"):
448	metaUrl += "/index.meta"
449
450	return metaUrl
451
452	def getDomFromIndexMeta(self, url):
453	"""get dom from index meta"""
454	dom = None
455	metaUrl = self.getIndexMetaUrl(url)
456
457	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
458	txt=getHttpData(metaUrl)
459	if txt is None:
460	raise IOError("Unable to read index meta from %s"%(url))
461
462	dom = Parse(txt)
463	return dom
464
465	def getPresentationInfoXML(self, url):
466	"""returns dom of info.xml document at url"""
467	dom = None
468	metaUrl = None
469	if url.startswith("http://"):
470	# real URL
471	metaUrl = url
472	else:
473	# online path
474	server=self.digilibBaseUrl+"/servlet/Texter?fn="
475	metaUrl=server+url.replace("/mpiwg/online","")
476
477	txt=getHttpData(metaUrl)
478	if txt is None:
479	raise IOError("Unable to read infoXMLfrom %s"%(url))
480
481	dom = Parse(txt)
482	return dom
483
484
485	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
486	"""gets authorization info from the index.meta file at path or given by dom"""
487	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
488
489	access = None
490
491	if docinfo is None:
492	docinfo = {}
493
494	if dom is None:
495	for x in range(cut):
496	path=getParentDir(path)
497	dom = self.getDomFromIndexMeta(path)
498
499	acctype = dom.xpath("//access-conditions/access/@type")
500	if acctype and (len(acctype)>0):
501	access=acctype[0].value
502	if access in ['group', 'institution']:
503	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
504
505	docinfo['accessType'] = access
506	return docinfo
507
508
509	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
510	"""gets bibliographical info from the index.meta file at path or given by dom"""
511	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
512
513	if docinfo is None:
514	docinfo = {}
515
516	if dom is None:
517	for x in range(cut):
518	path=getParentDir(path)
519	dom = self.getDomFromIndexMeta(path)
520
521	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
522
523	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
524	# put in all raw bib fields as dict "bib"
525	bib = dom.xpath("//bib/*")
526	if bib and len(bib)>0:
527	bibinfo = {}
528	for e in bib:
529	bibinfo[e.localName] = getTextFromNode(e)
530	docinfo['bib'] = bibinfo
531
532	# extract some fields (author, title, year) according to their mapping
533	metaData=self.metadata.main.meta.bib
534	bibtype=dom.xpath("//bib/@type")
535	if bibtype and (len(bibtype)>0):
536	bibtype=bibtype[0].value
537	else:
538	bibtype="generic"
539
540	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
541	docinfo['bib_type'] = bibtype
542	bibmap=metaData.generateMappingForType(bibtype)
543	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
544	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
545	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
546	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
547	try:
548	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
549	except: pass
550	try:
551	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
552	except: pass
553	try:
554	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
555	except: pass
556	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
557	try:
558	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
559	except:
560	docinfo['lang']=''
561
562	return docinfo
563
564
565	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
566	"""gets name info from the index.meta file at path or given by dom"""
567	if docinfo is None:
568	docinfo = {}
569
570	if dom is None:
571	for x in range(cut):
572	path=getParentDir(path)
573	dom = self.getDomFromIndexMeta(path)
574
575	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
576	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
577	return docinfo
578
579	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
580	"""parse texttool tag in index meta"""
581	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
582	if docinfo is None:
583	docinfo = {}
584	if docinfo.get('lang', None) is None:
585	docinfo['lang'] = '' # default keine Sprache gesetzt
586	if dom is None:
587	dom = self.getDomFromIndexMeta(url)
588
589	archivePath = None
590	archiveName = None
591
592	archiveNames = dom.xpath("//resource/name")
593	if archiveNames and (len(archiveNames) > 0):
594	archiveName = getTextFromNode(archiveNames[0])
595	else:
596	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
597
598	archivePaths = dom.xpath("//resource/archive-path")
599	if archivePaths and (len(archivePaths) > 0):
600	archivePath = getTextFromNode(archivePaths[0])
601	# clean up archive path
602	if archivePath[0] != '/':
603	archivePath = '/' + archivePath
604	if archiveName and (not archivePath.endswith(archiveName)):
605	archivePath += "/" + archiveName
606	else:
607	# try to get archive-path from url
608	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
609	if (not url.startswith('http')):
610	archivePath = url.replace('index.meta', '')
611
612	if archivePath is None:
613	# we balk without archive-path
614	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
615
616	imageDirs = dom.xpath("//texttool/image")
617	if imageDirs and (len(imageDirs) > 0):
618	imageDir = getTextFromNode(imageDirs[0])
619
620	else:
621	# we balk with no image tag / not necessary anymore because textmode is now standard
622	#raise IOError("No text-tool info in %s"%(url))
623	imageDir = ""
624	#xquery="//pb"
625	docinfo['imagePath'] = "" # keine Bilder
626	docinfo['imageURL'] = ""
627
628	if imageDir and archivePath:
629	#print "image: ", imageDir, " archivepath: ", archivePath
630	imageDir = os.path.join(archivePath, imageDir)
631	imageDir = imageDir.replace("/mpiwg/online", '')
632	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
633	docinfo['imagePath'] = imageDir
634
635	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
636
637	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
638	if viewerUrls and (len(viewerUrls) > 0):
639	viewerUrl = getTextFromNode(viewerUrls[0])
640	docinfo['viewerURL'] = viewerUrl
641
642	# old style text URL
643	textUrls = dom.xpath("//texttool/text")
644	if textUrls and (len(textUrls) > 0):
645	textUrl = getTextFromNode(textUrls[0])
646	if urlparse.urlparse(textUrl)[0] == "": #keine url
647	textUrl = os.path.join(archivePath, textUrl)
648	# fix URLs starting with /mpiwg/online
649	if textUrl.startswith("/mpiwg/online"):
650	textUrl = textUrl.replace("/mpiwg/online", '', 1)
651
652	docinfo['textURL'] = textUrl
653
654	# new style text-url-path
655	textUrls = dom.xpath("//texttool/text-url-path")
656	if textUrls and (len(textUrls) > 0):
657	textUrl = getTextFromNode(textUrls[0])
658	docinfo['textURLPath'] = textUrl
659	if not docinfo['imagePath']:
660	# text-only, no page images
661	docinfo = self.getNumTextPages(docinfo)
662
663	presentationUrls = dom.xpath("//texttool/presentation")
664	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
665	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
666	docinfo =self.getOrigPages(docinfo=docinfo)
667
668	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
669	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
670	# durch den relativen Pfad auf die presentation infos
671	presentationPath = getTextFromNode(presentationUrls[0])
672	if url.endswith("index.meta"):
673	presentationUrl = url.replace('index.meta', presentationPath)
674	else:
675	presentationUrl = url + "/" + presentationPath
676
677	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
678
679	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
680
681	return docinfo
682
683
684	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
685	"""gets the bibliographical information from the preseantion entry in texttools
686	"""
687	dom=self.getPresentationInfoXML(url)
688	try:
689	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
690	except:
691	pass
692	try:
693	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
694	except:
695	pass
696	try:
697	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
698	except:
699	pass
700	return docinfo
701
702	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
703	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
704	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
705	if docinfo is None:
706	docinfo = {}
707	path=path.replace("/mpiwg/online","")
708	docinfo['imagePath'] = path
709	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
710
711	pathorig=path
712	for x in range(cut):
713	path=getParentDir(path)
714	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
715	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
716	docinfo['imageURL'] = imageUrl
717
718	#path ist the path to the images it assumes that the index.meta file is one level higher.
719	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
720	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
721	return docinfo
722
723
724	def getDocinfo(self, mode, url):
725	"""returns docinfo depending on mode"""
726	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
727	# look for cached docinfo in session
728	if self.REQUEST.SESSION.has_key('docinfo'):
729	docinfo = self.REQUEST.SESSION['docinfo']
730	# check if its still current
731	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
732	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
733	return docinfo
734	# new docinfo
735	docinfo = {'mode': mode, 'url': url}
736	if mode=="texttool": #index.meta with texttool information
737	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
738	elif mode=="imagepath":
739	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
740	elif mode=="filepath":
741	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
742	else:
743	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
744	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
745
746	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
747
748	self.REQUEST.SESSION['docinfo'] = docinfo
749	return docinfo
750
751	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization="",originalPage=None):
752	"""returns pageinfo with the given parameters"""
753	pageinfo = {}
754	current = getInt(current)
755
756	pageinfo ['originalPage'] = self.getOrigPages(docinfo=docinfo,pageinfo=pageinfo)
757	pageinfo['current'] = current
758	rows = int(rows or self.thumbrows)
759	pageinfo['rows'] = rows
760	cols = int(cols or self.thumbcols)
761	pageinfo['cols'] = cols
762	grpsize = cols * rows
763	pageinfo['groupsize'] = grpsize
764	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
765	# int(current / grpsize) * grpsize +1))
766	pageinfo['start'] = start
767	pageinfo['end'] = start + grpsize
768	if (docinfo is not None) and ('numPages' in docinfo):
769	np = int(docinfo['numPages'])
770	pageinfo['end'] = min(pageinfo['end'], np)
771	pageinfo['numgroups'] = int(np / grpsize)
772	if np % grpsize > 0:
773	pageinfo['numgroups'] += 1
774	pageinfo['viewMode'] = viewMode
775	pageinfo['tocMode'] = tocMode
776	#pageinfo['characterNormalization'] =characterNormalization
777	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization',' ')
778	pageinfo['query'] = self.REQUEST.get('query',' ')
779	pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
780	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
781	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
782	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
783	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
784	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
785	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
786	toc = int (pageinfo['tocPN'])
787	pageinfo['textPages'] =int (toc)
788
789	logging.debug("originalPage: %s"%originalPage)
790
791	if 'tocSize_%s'%tocMode in docinfo:
792	tocSize = int(docinfo['tocSize_%s'%tocMode])
793	tocPageSize = int(pageinfo['tocPageSize'])
794	# cached toc
795	if tocSize%tocPageSize>0:
796	tocPages=tocSize/tocPageSize+1
797	else:
798	tocPages=tocSize/tocPageSize
799	pageinfo['tocPN'] = min (tocPages,toc)
800	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
801	pageinfo['sn'] =self.REQUEST.get('sn','')
802	return pageinfo
803
804	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
805	"""init document viewer"""
806	self.title=title
807	self.digilibBaseUrl = digilibBaseUrl
808	self.thumbrows = thumbrows
809	self.thumbcols = thumbcols
810	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
811	if RESPONSE is not None:
812	RESPONSE.redirect('manage_main')
813
814	def manage_AddDocumentViewerForm(self):
815	"""add the viewer form"""
816	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
817	return pt()
818
819	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
820	"""add the viewer"""
821	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
822	self._setObject(id,newObj)
823
824	if RESPONSE is not None:
825	RESPONSE.redirect('manage_main')
826
827	## DocumentViewerTemplate class
828	class DocumentViewerTemplate(ZopePageTemplate):
829	"""Template for document viewer"""
830	meta_type="DocumentViewer Template"
831
832
833	def manage_addDocumentViewerTemplateForm(self):
834	"""Form for adding"""
835	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
836	return pt()
837
838	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
839	REQUEST=None, submit=None):
840	"Add a Page Template with optional file content."
841
842	self._setObject(id, DocumentViewerTemplate(id))
843	ob = getattr(self, id)
844	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
845	logging.info("txt %s:"%txt)
846	ob.pt_edit(txt,"text/html")
847	if title:
848	ob.pt_setTitle(title)
849	try:
850	u = self.DestinationURL()
851	except AttributeError:
852	u = REQUEST['URL1']
853
854	u = "%s/%s" % (u, urllib.quote(id))
855	REQUEST.RESPONSE.redirect(u+'/manage_main')
856	return ''
857
858
859

Note: See TracBrowser for help on using the repository browser.

Download in other formats: