Context Navigation

source: documentViewer/documentViewer.py @ 386:48d27e100d19

Last change on this file since 386:48d27e100d19 was 386:48d27e100d19, checked in by abukhman, 14 years ago
* empty log message *
File size: 33.6 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding='utf-8'):
47	"""returns a string containing node as XML"""
48	buf = cStringIO.StringIO()
49	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
50	s = buf.getvalue()
51	buf.close()
52	return s
53
54	def getBrowserType(self):
55	"""get browser type object"""
56	if self.REQUEST.SESSION.has_key('browserType'):
57	return self.REQUEST.SESSION['browserType']
58	else:
59	bt = browserCheck(self)
60	self.REQUEST.SESSION.set('browserType', bt)
61	logging.debug("documentViewer (BROWSER TYPE) bt %s"%bt)
62	return bt
63
64
65	def getParentDir(path):
66	"""returns pathname shortened by one"""
67	return '/'.join(path.split('/')[0:-1])
68
69
70	def getHttpData(url, data=None, num_tries=3, timeout=10):
71	"""returns result from url+data HTTP request"""
72	# we do GET (by appending data to url)
73	if isinstance(data, str) or isinstance(data, unicode):
74	# if data is string then append
75	url = "%s?%s"%(url,data)
76	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
77	# urlencode
78	url = "%s?%s"%(url,urllib.urlencode(data))
79
80	response = None
81	errmsg = None
82	for cnt in range(num_tries):
83	try:
84	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
85	if sys.version_info < (2, 6):
86	# set timeout on socket -- ugly :-(
87	import socket
88	socket.setdefaulttimeout(float(timeout))
89	response = urllib2.urlopen(url)
90	else:
91	response = urllib2.urlopen(url,timeout=float(timeout))
92	# check result?
93	break
94	except urllib2.HTTPError, e:
95	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
96	errmsg = str(e)
97	# stop trying
98	break
99	except urllib2.URLError, e:
100	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
101	errmsg = str(e)
102	# stop trying
103	#break
104
105	if response is not None:
106	data = response.read()
107	response.close()
108	return data
109
110	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
111	#return None
112
113
114
115	##
116	## documentViewer class
117	##
118	class documentViewer(Folder):
119	"""document viewer"""
120	meta_type="Document viewer"
121
122	security=ClassSecurityInfo()
123	manage_options=Folder.manage_options+(
124	{'label':'main config','action':'changeDocumentViewerForm'},
125	)
126
127	# templates and forms
128	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
129	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
130	toc_text = PageTemplateFile('zpt/toc_text', globals())
131	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
132	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
133	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
134	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
135	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
136	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
137	head_main = PageTemplateFile('zpt/head_main', globals())
138	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
139	info_xml = PageTemplateFile('zpt/info_xml', globals())
140
141
142	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
143	security.declareProtected('View management screens','changeDocumentViewerForm')
144	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
145
146
147	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
148	"""init document viewer"""
149	self.id=id
150	self.title=title
151	self.thumbcols = thumbcols
152	self.thumbrows = thumbrows
153	# authgroups is list of authorized groups (delimited by ,)
154	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
155	# create template folder so we can always use template.something
156
157	templateFolder = Folder('template')
158	#self['template'] = templateFolder # Zope-2.12 style
159	self._setObject('template',templateFolder) # old style
160	try:
161	import MpdlXmlTextServer
162	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
163	#templateFolder['fulltextclient'] = xmlRpcClient
164	templateFolder._setObject('fulltextclient',textServer)
165	except Exception, e:
166	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
167	try:
168	from Products.zogiLib.zogiLib import zogiLib
169	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
170	#templateFolder['zogilib'] = zogilib
171	templateFolder._setObject('zogilib',zogilib)
172	except Exception, e:
173	logging.error("Unable to create zogiLib for zogilib: "+str(e))
174
175
176	# proxy text server methods to fulltextclient
177	def getTextPage(self, **args):
178	"""get page"""
179	return self.template.fulltextclient.getTextPage(**args)
180
181	def getQuery(self, **args):
182	"""get query"""
183	return self.template.fulltextclient.getQuery(**args)
184
185	def getPDF(self, **args):
186	"""get query"""
187	return self.template.fulltextclient.getPDF(**args)
188
189	def getSearch(self, **args):
190	"""get search"""
191	return self.template.fulltextclient.getSearch(**args)
192
193	def getGisPlaces(self, **args):
194	"""get gis places"""
195	return self.template.fulltextclient.getGisPlaces(**args)
196
197	def getAllGisPlaces(self, **args):
198	"""get all gis places """
199	return self.template.fulltextclient.getAllGisPlaces(**args)
200
201	def getOrigPages(self, **args):
202	"""get original page number """
203	return self.template.fulltextclient.getOrigPages(**args)
204
205	def getNumPages(self, docinfo):
206	"""get numpages"""
207	return self.template.fulltextclient.getNumPages(docinfo)
208
209	def getNumTextPages(self, docinfo):
210	"""get numpages text"""
211	return self.template.fulltextclient.getNumTextPages(docinfo)
212
213	def getTranslate(self, **args):
214	"""get translate"""
215	return self.template.fulltextclient.getTranslate(**args)
216
217	def getLemma(self, **args):
218	"""get lemma"""
219	return self.template.fulltextclient.getLemma(**args)
220
221	def getToc(self, **args):
222	"""get toc"""
223	return self.template.fulltextclient.getToc(**args)
224
225	def getTocPage(self, **args):
226	"""get tocpage"""
227	return self.template.fulltextclient.getTocPage(**args)
228
229
230	security.declareProtected('View','thumbs_rss')
231	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
232	'''
233	view it
234	@param mode: defines how to access the document behind url
235	@param url: url which contains display information
236	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
237
238	'''
239	logging.debug("HHHHHHHHHHHHHH:load the rss")
240	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
241
242	if not hasattr(self, 'template'):
243	# create template folder if it doesn't exist
244	self.manage_addFolder('template')
245
246	if not self.digilibBaseUrl:
247	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
248
249	docinfo = self.getDocinfo(mode=mode,url=url)
250	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
251	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
252	''' ZDES '''
253	pt = getattr(self.template, 'thumbs_main_rss')
254
255	if viewMode=="auto": # automodus gewaehlt
256	if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
257	viewMode="text"
258	else:
259	viewMode="images"
260
261	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
262
263	security.declareProtected('View','index_html')
264	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization=""):
265	'''
266	view it
267	@param mode: defines how to access the document behind url
268	@param url: url which contains display information
269	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
270	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
271	@param characterNormalization type of text display (reg, norm, none)
272	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
273	'''
274
275	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
276
277	if not hasattr(self, 'template'):
278	# this won't work
279	logging.error("template folder missing!")
280	return "ERROR: template folder missing!"
281
282	if not getattr(self, 'digilibBaseUrl', None):
283	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
284
285	docinfo = self.getDocinfo(mode=mode,url=url)
286
287	if tocMode != "thumbs":
288	# get table of contents
289	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
290
291	if viewMode=="auto": # automodus gewaehlt
292	if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
293	viewMode="text_dict"
294	else:
295	viewMode="images"
296
297	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
298
299	pt = getattr(self.template, 'viewer_main')
300	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
301
302	def generateMarks(self,mk):
303	ret=""
304	if mk is None:
305	return ""
306	if not isinstance(mk, list):
307	mk=[mk]
308	for m in mk:
309	ret+="mk=%s"%m
310	return ret
311
312
313	def findDigilibUrl(self):
314	"""try to get the digilib URL from zogilib"""
315	url = self.template.zogilib.getDLBaseUrl()
316	return url
317
318	def getDocumentViewerURL(self):
319	"""returns the URL of this instance"""
320	return self.absolute_url()
321
322	def getStyle(self, idx, selected, style=""):
323	"""returns a string with the given style and append 'sel' if path == selected."""
324	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
325	if idx == selected:
326	return style + 'sel'
327	else:
328	return style
329
330	def getLink(self,param=None,val=None):
331	"""link to documentviewer with parameter param set to val"""
332	params=self.REQUEST.form.copy()
333	if param is not None:
334	if val is None:
335	if params.has_key(param):
336	del params[param]
337	else:
338	params[param] = str(val)
339
340	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
341	params["mode"] = "imagepath"
342	params["url"] = getParentDir(params["url"])
343
344	# quote values and assemble into query string
345	#ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
346	ps = urllib.urlencode(params)
347	url=self.REQUEST['URL1']+"?"+ps
348	return url
349
350	def getLinkAmp(self,param=None,val=None):
351	"""link to documentviewer with parameter param set to val"""
352	params=self.REQUEST.form.copy()
353	if param is not None:
354	if val is None:
355	if params.has_key(param):
356	del params[param]
357	else:
358	params[param] = str(val)
359
360	# quote values and assemble into query string
361	logging.debug("XYXXXXX: %s"%repr(params.items()))
362	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
363	url=self.REQUEST['URL1']+"?"+ps
364	return url
365
366	def getInfo_xml(self,url,mode):
367	"""returns info about the document as XML"""
368
369	if not self.digilibBaseUrl:
370	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
371
372	docinfo = self.getDocinfo(mode=mode,url=url)
373	pt = getattr(self.template, 'info_xml')
374	return pt(docinfo=docinfo)
375
376
377	def isAccessible(self, docinfo):
378	"""returns if access to the resource is granted"""
379	access = docinfo.get('accessType', None)
380	logging.debug("documentViewer (accessOK) access type %s"%access)
381	if access is not None and access == 'free':
382	logging.debug("documentViewer (accessOK) access is free")
383	return True
384	elif access is None or access in self.authgroups:
385	# only local access -- only logged in users
386	user = getSecurityManager().getUser()
387	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
388	if user is not None:
389	#print "user: ", user
390	return (user.getUserName() != "Anonymous User")
391	else:
392	return False
393
394	logging.error("documentViewer (accessOK) unknown access type %s"%access)
395	return False
396
397
398	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
399	"""gibt param von dlInfo aus"""
400	if docinfo is None:
401	docinfo = {}
402
403	for x in range(cut):
404
405	path=getParentDir(path)
406
407	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
408
409	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
410
411	txt = getHttpData(infoUrl)
412	if txt is None:
413	raise IOError("Unable to get dir-info from %s"%(infoUrl))
414
415	dom = Parse(txt)
416	sizes=dom.xpath("//dir/size")
417	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
418
419	if sizes:
420	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
421	else:
422	docinfo['numPages'] = 0
423
424	# TODO: produce and keep list of image names and numbers
425
426	return docinfo
427
428	def getIndexMetaPath(self,url):
429	"""gib nur den Pfad zurueck"""
430	regexp = re.compile(r".(experimental\|permanent)/(.)")
431	regpath = regexp.match(url)
432	if (regpath==None):
433	return ""
434	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
435	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
436
437
438
439	def getIndexMetaUrl(self,url):
440	"""returns utr of index.meta document at url"""
441
442	metaUrl = None
443	if url.startswith("http://"):
444	# real URL
445	metaUrl = url
446	else:
447	# online path
448	server=self.digilibBaseUrl+"/servlet/Texter?fn="
449	metaUrl=server+url.replace("/mpiwg/online","")
450	if not metaUrl.endswith("index.meta"):
451	metaUrl += "/index.meta"
452
453	return metaUrl
454
455	def getDomFromIndexMeta(self, url):
456	"""get dom from index meta"""
457	dom = None
458	metaUrl = self.getIndexMetaUrl(url)
459
460	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
461	txt=getHttpData(metaUrl)
462	if txt is None:
463	raise IOError("Unable to read index meta from %s"%(url))
464
465	dom = Parse(txt)
466	return dom
467
468	def getPresentationInfoXML(self, url):
469	"""returns dom of info.xml document at url"""
470	dom = None
471	metaUrl = None
472	if url.startswith("http://"):
473	# real URL
474	metaUrl = url
475	else:
476	# online path
477	server=self.digilibBaseUrl+"/servlet/Texter?fn="
478	metaUrl=server+url.replace("/mpiwg/online","")
479
480	txt=getHttpData(metaUrl)
481	if txt is None:
482	raise IOError("Unable to read infoXMLfrom %s"%(url))
483
484	dom = Parse(txt)
485	return dom
486
487
488	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
489	"""gets authorization info from the index.meta file at path or given by dom"""
490	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
491
492	access = None
493
494	if docinfo is None:
495	docinfo = {}
496
497	if dom is None:
498	for x in range(cut):
499	path=getParentDir(path)
500	dom = self.getDomFromIndexMeta(path)
501
502	acctype = dom.xpath("//access-conditions/access/@type")
503	if acctype and (len(acctype)>0):
504	access=acctype[0].value
505	if access in ['group', 'institution']:
506	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
507
508	docinfo['accessType'] = access
509	return docinfo
510
511
512	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
513	"""gets bibliographical info from the index.meta file at path or given by dom"""
514	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
515
516	if docinfo is None:
517	docinfo = {}
518
519	if dom is None:
520	for x in range(cut):
521	path=getParentDir(path)
522	dom = self.getDomFromIndexMeta(path)
523
524	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
525
526	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
527	# put in all raw bib fields as dict "bib"
528	bib = dom.xpath("//bib/*")
529	if bib and len(bib)>0:
530	bibinfo = {}
531	for e in bib:
532	bibinfo[e.localName] = getTextFromNode(e)
533	docinfo['bib'] = bibinfo
534
535	# extract some fields (author, title, year) according to their mapping
536	metaData=self.metadata.main.meta.bib
537	bibtype=dom.xpath("//bib/@type")
538	if bibtype and (len(bibtype)>0):
539	bibtype=bibtype[0].value
540	else:
541	bibtype="generic"
542
543	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
544	docinfo['bib_type'] = bibtype
545	bibmap=metaData.generateMappingForType(bibtype)
546	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
547	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
548	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
549	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
550	try:
551	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
552	except: pass
553	try:
554	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
555	except: pass
556	try:
557	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
558	except: pass
559	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
560	try:
561	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
562	except:
563	docinfo['lang']=''
564
565	return docinfo
566
567
568	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
569	"""gets name info from the index.meta file at path or given by dom"""
570	if docinfo is None:
571	docinfo = {}
572
573	if dom is None:
574	for x in range(cut):
575	path=getParentDir(path)
576	dom = self.getDomFromIndexMeta(path)
577
578	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
579	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
580	return docinfo
581
582	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
583	"""parse texttool tag in index meta"""
584	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
585	if docinfo is None:
586	docinfo = {}
587	if docinfo.get('lang', None) is None:
588	docinfo['lang'] = '' # default keine Sprache gesetzt
589	if dom is None:
590	dom = self.getDomFromIndexMeta(url)
591
592	archivePath = None
593	archiveName = None
594
595	archiveNames = dom.xpath("//resource/name")
596	if archiveNames and (len(archiveNames) > 0):
597	archiveName = getTextFromNode(archiveNames[0])
598	else:
599	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
600
601	archivePaths = dom.xpath("//resource/archive-path")
602	if archivePaths and (len(archivePaths) > 0):
603	archivePath = getTextFromNode(archivePaths[0])
604	# clean up archive path
605	if archivePath[0] != '/':
606	archivePath = '/' + archivePath
607	if archiveName and (not archivePath.endswith(archiveName)):
608	archivePath += "/" + archiveName
609	else:
610	# try to get archive-path from url
611	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
612	if (not url.startswith('http')):
613	archivePath = url.replace('index.meta', '')
614
615	if archivePath is None:
616	# we balk without archive-path
617	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
618
619	imageDirs = dom.xpath("//texttool/image")
620	if imageDirs and (len(imageDirs) > 0):
621	imageDir = getTextFromNode(imageDirs[0])
622
623	else:
624	# we balk with no image tag / not necessary anymore because textmode is now standard
625	#raise IOError("No text-tool info in %s"%(url))
626	imageDir = ""
627	#xquery="//pb"
628	docinfo['imagePath'] = "" # keine Bilder
629	docinfo['imageURL'] = ""
630
631	if imageDir and archivePath:
632	#print "image: ", imageDir, " archivepath: ", archivePath
633	imageDir = os.path.join(archivePath, imageDir)
634	imageDir = imageDir.replace("/mpiwg/online", '')
635	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
636	docinfo['imagePath'] = imageDir
637
638	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
639
640	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
641	if viewerUrls and (len(viewerUrls) > 0):
642	viewerUrl = getTextFromNode(viewerUrls[0])
643	docinfo['viewerURL'] = viewerUrl
644
645	# old style text URL
646	textUrls = dom.xpath("//texttool/text")
647	if textUrls and (len(textUrls) > 0):
648	textUrl = getTextFromNode(textUrls[0])
649	if urlparse.urlparse(textUrl)[0] == "": #keine url
650	textUrl = os.path.join(archivePath, textUrl)
651	# fix URLs starting with /mpiwg/online
652	if textUrl.startswith("/mpiwg/online"):
653	textUrl = textUrl.replace("/mpiwg/online", '', 1)
654
655	docinfo['textURL'] = textUrl
656
657	# new style text-url-path
658	textUrls = dom.xpath("//texttool/text-url-path")
659	if textUrls and (len(textUrls) > 0):
660	textUrl = getTextFromNode(textUrls[0])
661	docinfo['textURLPath'] = textUrl
662	if not docinfo['imagePath']:
663	# text-only, no page images
664	docinfo = self.getNumTextPages(docinfo)
665
666	presentationUrls = dom.xpath("//texttool/presentation")
667	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
668	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
669
670
671	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
672	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
673	# durch den relativen Pfad auf die presentation infos
674	presentationPath = getTextFromNode(presentationUrls[0])
675	if url.endswith("index.meta"):
676	presentationUrl = url.replace('index.meta', presentationPath)
677	else:
678	presentationUrl = url + "/" + presentationPath
679
680	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
681
682	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
683
684	return docinfo
685
686
687	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
688	"""gets the bibliographical information from the preseantion entry in texttools
689	"""
690	dom=self.getPresentationInfoXML(url)
691	try:
692	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
693	except:
694	pass
695	try:
696	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
697	except:
698	pass
699	try:
700	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
701	except:
702	pass
703	return docinfo
704
705	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
706	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
707	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
708	if docinfo is None:
709	docinfo = {}
710	path=path.replace("/mpiwg/online","")
711	docinfo['imagePath'] = path
712	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
713
714	pathorig=path
715	for x in range(cut):
716	path=getParentDir(path)
717	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
718	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
719	docinfo['imageURL'] = imageUrl
720
721	#path ist the path to the images it assumes that the index.meta file is one level higher.
722	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
723	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
724	return docinfo
725
726
727	def getDocinfo(self, mode, url):
728	"""returns docinfo depending on mode"""
729	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
730	# look for cached docinfo in session
731	if self.REQUEST.SESSION.has_key('docinfo'):
732	docinfo = self.REQUEST.SESSION['docinfo']
733	# check if its still current
734	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
735	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
736	return docinfo
737	# new docinfo
738	docinfo = {'mode': mode, 'url': url}
739	if mode=="texttool": #index.meta with texttool information
740	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
741	elif mode=="imagepath":
742	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
743	elif mode=="filepath":
744	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
745	else:
746	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
747	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
748
749	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
750	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
751	self.REQUEST.SESSION['docinfo'] = docinfo
752	return docinfo
753
754	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization=""):
755	"""returns pageinfo with the given parameters"""
756	pageinfo = {}
757	current = getInt(current)
758
759	pageinfo['current'] = current
760	rows = int(rows or self.thumbrows)
761	pageinfo['rows'] = rows
762	cols = int(cols or self.thumbcols)
763	pageinfo['cols'] = cols
764	grpsize = cols * rows
765	pageinfo['groupsize'] = grpsize
766	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
767	# int(current / grpsize) * grpsize +1))
768	pageinfo['start'] = start
769	pageinfo['end'] = start + grpsize
770	if (docinfo is not None) and ('numPages' in docinfo):
771	np = int(docinfo['numPages'])
772	pageinfo['end'] = min(pageinfo['end'], np)
773	pageinfo['numgroups'] = int(np / grpsize)
774	if np % grpsize > 0:
775	pageinfo['numgroups'] += 1
776	pageinfo['viewMode'] = viewMode
777	pageinfo['tocMode'] = tocMode
778	#pageinfo ['originalPage'] = self.getOrigPages(docinfo=None, pageinfo=None)
779	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','')
780	pageinfo['query'] = self.REQUEST.get('query','')
781	pageinfo['queryType'] = self.REQUEST.get('queryType','')
782	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
783	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
784	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
785	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
786	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
787	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
788	toc = int (pageinfo['tocPN'])
789	pageinfo['textPages'] =int (toc)
790
791
792
793	if 'tocSize_%s'%tocMode in docinfo:
794	tocSize = int(docinfo['tocSize_%s'%tocMode])
795	tocPageSize = int(pageinfo['tocPageSize'])
796	# cached toc
797	if tocSize%tocPageSize>0:
798	tocPages=tocSize/tocPageSize+1
799	else:
800	tocPages=tocSize/tocPageSize
801	pageinfo['tocPN'] = min (tocPages,toc)
802	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
803	pageinfo['sn'] =self.REQUEST.get('sn','')
804	return pageinfo
805
806	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
807	"""init document viewer"""
808	self.title=title
809	self.digilibBaseUrl = digilibBaseUrl
810	self.thumbrows = thumbrows
811	self.thumbcols = thumbcols
812	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
813	if RESPONSE is not None:
814	RESPONSE.redirect('manage_main')
815
816	def manage_AddDocumentViewerForm(self):
817	"""add the viewer form"""
818	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
819	return pt()
820
821	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
822	"""add the viewer"""
823	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
824	self._setObject(id,newObj)
825
826	if RESPONSE is not None:
827	RESPONSE.redirect('manage_main')
828
829	## DocumentViewerTemplate class
830	class DocumentViewerTemplate(ZopePageTemplate):
831	"""Template for document viewer"""
832	meta_type="DocumentViewer Template"
833
834
835	def manage_addDocumentViewerTemplateForm(self):
836	"""Form for adding"""
837	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
838	return pt()
839
840	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
841	REQUEST=None, submit=None):
842	"Add a Page Template with optional file content."
843
844	self._setObject(id, DocumentViewerTemplate(id))
845	ob = getattr(self, id)
846	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
847	logging.info("txt %s:"%txt)
848	ob.pt_edit(txt,"text/html")
849	if title:
850	ob.pt_setTitle(title)
851	try:
852	u = self.DestinationURL()
853	except AttributeError:
854	u = REQUEST['URL1']
855
856	u = "%s/%s" % (u, urllib.quote(id))
857	REQUEST.RESPONSE.redirect(u+'/manage_main')
858	return ''
859
860
861

Note: See TracBrowser for help on using the repository browser.

Download in other formats: