Context Navigation

source: documentViewer/documentViewer.py @ 362:1ed87e862806

Last change on this file since 362:1ed87e862806 was 362:1ed87e862806, checked in by abukhman, 14 years ago
* empty log message *
File size: 33.5 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding='utf-8'):
47	"""returns a string containing node as XML"""
48	buf = cStringIO.StringIO()
49	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
50	s = buf.getvalue()
51	buf.close()
52	return s
53
54	def getBrowserType(self):
55	"""get browser type object"""
56	if self.REQUEST.SESSION.has_key('browserType'):
57	return self.REQUEST.SESSION['browserType']
58	else:
59	bt = browserCheck(self)
60	self.REQUEST.SESSION.set('browserType', bt)
61	logging.debug("documentViewer (BROWSER TYPE) bt %s"%bt)
62	return bt
63
64
65	def getParentDir(path):
66	"""returns pathname shortened by one"""
67	return '/'.join(path.split('/')[0:-1])
68
69
70	def getHttpData(url, data=None, num_tries=3, timeout=10):
71	"""returns result from url+data HTTP request"""
72	# we do GET (by appending data to url)
73	if isinstance(data, str) or isinstance(data, unicode):
74	# if data is string then append
75	url = "%s?%s"%(url,data)
76	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
77	# urlencode
78	url = "%s?%s"%(url,urllib.urlencode(data))
79
80	response = None
81	errmsg = None
82	for cnt in range(num_tries):
83	try:
84	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
85	if sys.version_info < (2, 6):
86	# set timeout on socket -- ugly :-(
87	import socket
88	socket.setdefaulttimeout(float(timeout))
89	response = urllib2.urlopen(url)
90	else:
91	response = urllib2.urlopen(url,timeout=float(timeout))
92	# check result?
93	break
94	except urllib2.HTTPError, e:
95	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
96	errmsg = str(e)
97	# stop trying
98	break
99	except urllib2.URLError, e:
100	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
101	errmsg = str(e)
102	# stop trying
103	#break
104
105	if response is not None:
106	data = response.read()
107	response.close()
108	return data
109
110	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
111	#return None
112
113
114
115	##
116	## documentViewer class
117	##
118	class documentViewer(Folder):
119	"""document viewer"""
120	meta_type="Document viewer"
121
122	security=ClassSecurityInfo()
123	manage_options=Folder.manage_options+(
124	{'label':'main config','action':'changeDocumentViewerForm'},
125	)
126
127	# templates and forms
128	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
129	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
130	toc_text = PageTemplateFile('zpt/toc_text', globals())
131	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
132	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
133	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
134	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
135	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
136	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
137	head_main = PageTemplateFile('zpt/head_main', globals())
138	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
139	info_xml = PageTemplateFile('zpt/info_xml', globals())
140
141
142	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
143	security.declareProtected('View management screens','changeDocumentViewerForm')
144	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
145
146
147	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
148	"""init document viewer"""
149	self.id=id
150	self.title=title
151	self.thumbcols = thumbcols
152	self.thumbrows = thumbrows
153	# authgroups is list of authorized groups (delimited by ,)
154	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
155	# create template folder so we can always use template.something
156
157	templateFolder = Folder('template')
158	#self['template'] = templateFolder # Zope-2.12 style
159	self._setObject('template',templateFolder) # old style
160	try:
161	import MpdlXmlTextServer
162	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
163	#templateFolder['fulltextclient'] = xmlRpcClient
164	templateFolder._setObject('fulltextclient',textServer)
165	except Exception, e:
166	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
167	try:
168	from Products.zogiLib.zogiLib import zogiLib
169	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
170	#templateFolder['zogilib'] = zogilib
171	templateFolder._setObject('zogilib',zogilib)
172	except Exception, e:
173	logging.error("Unable to create zogiLib for zogilib: "+str(e))
174
175
176	# proxy text server methods to fulltextclient
177	def getTextPage(self, **args):
178	"""get page"""
179	return self.template.fulltextclient.getTextPage(**args)
180
181	def getQuery(self, **args):
182	"""get query"""
183	return self.template.fulltextclient.getQuery(**args)
184
185	def getSearch(self, **args):
186	"""get search"""
187	return self.template.fulltextclient.getSearch(**args)
188
189	def getGisPlaces(self, **args):
190	"""get gis places"""
191	return self.template.fulltextclient.getGisPlaces(**args)
192
193	def getAllGisPlaces(self, **args):
194	"""get all gis places """
195	return self.template.fulltextclient.getAllGisPlaces(**args)
196
197	def getOrigPages(self, **args):
198	"""get original page number """
199	return self.template.fulltextclient.getOrigPages(**args)
200
201	def getNumPages(self, docinfo):
202	"""get numpages"""
203	return self.template.fulltextclient.getNumPages(docinfo)
204
205	def getNumTextPages(self, docinfo):
206	"""get numpages text"""
207	return self.template.fulltextclient.getNumTextPages(docinfo)
208
209	def getTranslate(self, **args):
210	"""get translate"""
211	return self.template.fulltextclient.getTranslate(**args)
212
213	def getLemma(self, **args):
214	"""get lemma"""
215	return self.template.fulltextclient.getLemma(**args)
216
217	def getToc(self, **args):
218	"""get toc"""
219	return self.template.fulltextclient.getToc(**args)
220
221	def getTocPage(self, **args):
222	"""get tocpage"""
223	return self.template.fulltextclient.getTocPage(**args)
224
225
226	security.declareProtected('View','thumbs_rss')
227	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
228	'''
229	view it
230	@param mode: defines how to access the document behind url
231	@param url: url which contains display information
232	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
233
234	'''
235	logging.debug("HHHHHHHHHHHHHH:load the rss")
236	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
237
238	if not hasattr(self, 'template'):
239	# create template folder if it doesn't exist
240	self.manage_addFolder('template')
241
242	if not self.digilibBaseUrl:
243	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
244
245	docinfo = self.getDocinfo(mode=mode,url=url)
246	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
247	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
248	''' ZDES '''
249	pt = getattr(self.template, 'thumbs_main_rss')
250
251	if viewMode=="auto": # automodus gewaehlt
252	if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
253	viewMode="text"
254	else:
255	viewMode="images"
256
257	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
258
259	security.declareProtected('View','index_html')
260	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization=""):
261	'''
262	view it
263	@param mode: defines how to access the document behind url
264	@param url: url which contains display information
265	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
266	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
267	@param characterNormalization type of text display (reg, norm, none)
268	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
269	'''
270
271	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
272
273	if not hasattr(self, 'template'):
274	# this won't work
275	logging.error("template folder missing!")
276	return "ERROR: template folder missing!"
277
278	if not getattr(self, 'digilibBaseUrl', None):
279	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
280
281	docinfo = self.getDocinfo(mode=mode,url=url)
282
283	if tocMode != "thumbs":
284	# get table of contents
285	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
286
287	if viewMode=="auto": # automodus gewaehlt
288	if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
289	viewMode="text_dict"
290	else:
291	viewMode="images"
292
293	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
294
295	pt = getattr(self.template, 'viewer_main')
296	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
297
298	def generateMarks(self,mk):
299	ret=""
300	if mk is None:
301	return ""
302	if not isinstance(mk, list):
303	mk=[mk]
304	for m in mk:
305	ret+="mk=%s"%m
306	return ret
307
308
309	def findDigilibUrl(self):
310	"""try to get the digilib URL from zogilib"""
311	url = self.template.zogilib.getDLBaseUrl()
312	return url
313
314	def getDocumentViewerURL(self):
315	"""returns the URL of this instance"""
316	return self.absolute_url()
317
318	def getStyle(self, idx, selected, style=""):
319	"""returns a string with the given style and append 'sel' if path == selected."""
320	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
321	if idx == selected:
322	return style + 'sel'
323	else:
324	return style
325
326	def getLink(self,param=None,val=None):
327	"""link to documentviewer with parameter param set to val"""
328	params=self.REQUEST.form.copy()
329	if param is not None:
330	if val is None:
331	if params.has_key(param):
332	del params[param]
333	else:
334	params[param] = str(val)
335
336	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
337	params["mode"] = "imagepath"
338	params["url"] = getParentDir(params["url"])
339
340	# quote values and assemble into query string
341	#ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
342	ps = urllib.urlencode(params)
343	url=self.REQUEST['URL1']+"?"+ps
344	return url
345
346	def getLinkAmp(self,param=None,val=None):
347	"""link to documentviewer with parameter param set to val"""
348	params=self.REQUEST.form.copy()
349	if param is not None:
350	if val is None:
351	if params.has_key(param):
352	del params[param]
353	else:
354	params[param] = str(val)
355
356	# quote values and assemble into query string
357	logging.debug("XYXXXXX: %s"%repr(params.items()))
358	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
359	url=self.REQUEST['URL1']+"?"+ps
360	return url
361
362	def getInfo_xml(self,url,mode):
363	"""returns info about the document as XML"""
364
365	if not self.digilibBaseUrl:
366	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
367
368	docinfo = self.getDocinfo(mode=mode,url=url)
369	pt = getattr(self.template, 'info_xml')
370	return pt(docinfo=docinfo)
371
372
373	def isAccessible(self, docinfo):
374	"""returns if access to the resource is granted"""
375	access = docinfo.get('accessType', None)
376	logging.debug("documentViewer (accessOK) access type %s"%access)
377	if access is not None and access == 'free':
378	logging.debug("documentViewer (accessOK) access is free")
379	return True
380	elif access is None or access in self.authgroups:
381	# only local access -- only logged in users
382	user = getSecurityManager().getUser()
383	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
384	if user is not None:
385	#print "user: ", user
386	return (user.getUserName() != "Anonymous User")
387	else:
388	return False
389
390	logging.error("documentViewer (accessOK) unknown access type %s"%access)
391	return False
392
393
394	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
395	"""gibt param von dlInfo aus"""
396	if docinfo is None:
397	docinfo = {}
398
399	for x in range(cut):
400
401	path=getParentDir(path)
402
403	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
404
405	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
406
407	txt = getHttpData(infoUrl)
408	if txt is None:
409	raise IOError("Unable to get dir-info from %s"%(infoUrl))
410
411	dom = Parse(txt)
412	sizes=dom.xpath("//dir/size")
413	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
414
415	if sizes:
416	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
417	else:
418	docinfo['numPages'] = 0
419
420	# TODO: produce and keep list of image names and numbers
421
422	return docinfo
423
424	def getIndexMetaPath(self,url):
425	"""gib nur den Pfad zurueck"""
426	regexp = re.compile(r".(experimental\|permanent)/(.)")
427	regpath = regexp.match(url)
428	if (regpath==None):
429	return ""
430	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
431	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
432
433
434
435	def getIndexMetaUrl(self,url):
436	"""returns utr of index.meta document at url"""
437
438	metaUrl = None
439	if url.startswith("http://"):
440	# real URL
441	metaUrl = url
442	else:
443	# online path
444	server=self.digilibBaseUrl+"/servlet/Texter?fn="
445	metaUrl=server+url.replace("/mpiwg/online","")
446	if not metaUrl.endswith("index.meta"):
447	metaUrl += "/index.meta"
448
449	return metaUrl
450
451	def getDomFromIndexMeta(self, url):
452	"""get dom from index meta"""
453	dom = None
454	metaUrl = self.getIndexMetaUrl(url)
455
456	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
457	txt=getHttpData(metaUrl)
458	if txt is None:
459	raise IOError("Unable to read index meta from %s"%(url))
460
461	dom = Parse(txt)
462	return dom
463
464	def getPresentationInfoXML(self, url):
465	"""returns dom of info.xml document at url"""
466	dom = None
467	metaUrl = None
468	if url.startswith("http://"):
469	# real URL
470	metaUrl = url
471	else:
472	# online path
473	server=self.digilibBaseUrl+"/servlet/Texter?fn="
474	metaUrl=server+url.replace("/mpiwg/online","")
475
476	txt=getHttpData(metaUrl)
477	if txt is None:
478	raise IOError("Unable to read infoXMLfrom %s"%(url))
479
480	dom = Parse(txt)
481	return dom
482
483
484	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
485	"""gets authorization info from the index.meta file at path or given by dom"""
486	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
487
488	access = None
489
490	if docinfo is None:
491	docinfo = {}
492
493	if dom is None:
494	for x in range(cut):
495	path=getParentDir(path)
496	dom = self.getDomFromIndexMeta(path)
497
498	acctype = dom.xpath("//access-conditions/access/@type")
499	if acctype and (len(acctype)>0):
500	access=acctype[0].value
501	if access in ['group', 'institution']:
502	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
503
504	docinfo['accessType'] = access
505	return docinfo
506
507
508	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
509	"""gets bibliographical info from the index.meta file at path or given by dom"""
510	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
511
512	if docinfo is None:
513	docinfo = {}
514
515	if dom is None:
516	for x in range(cut):
517	path=getParentDir(path)
518	dom = self.getDomFromIndexMeta(path)
519
520	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
521
522	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
523	# put in all raw bib fields as dict "bib"
524	bib = dom.xpath("//bib/*")
525	if bib and len(bib)>0:
526	bibinfo = {}
527	for e in bib:
528	bibinfo[e.localName] = getTextFromNode(e)
529	docinfo['bib'] = bibinfo
530
531	# extract some fields (author, title, year) according to their mapping
532	metaData=self.metadata.main.meta.bib
533	bibtype=dom.xpath("//bib/@type")
534	if bibtype and (len(bibtype)>0):
535	bibtype=bibtype[0].value
536	else:
537	bibtype="generic"
538
539	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
540	docinfo['bib_type'] = bibtype
541	bibmap=metaData.generateMappingForType(bibtype)
542	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
543	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
544	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
545	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
546	try:
547	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
548	except: pass
549	try:
550	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
551	except: pass
552	try:
553	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
554	except: pass
555	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
556	try:
557	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
558	except:
559	docinfo['lang']=''
560
561	return docinfo
562
563
564	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
565	"""gets name info from the index.meta file at path or given by dom"""
566	if docinfo is None:
567	docinfo = {}
568
569	if dom is None:
570	for x in range(cut):
571	path=getParentDir(path)
572	dom = self.getDomFromIndexMeta(path)
573
574	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
575	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
576	return docinfo
577
578	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
579	"""parse texttool tag in index meta"""
580	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
581	if docinfo is None:
582	docinfo = {}
583	if docinfo.get('lang', None) is None:
584	docinfo['lang'] = '' # default keine Sprache gesetzt
585	if dom is None:
586	dom = self.getDomFromIndexMeta(url)
587
588	archivePath = None
589	archiveName = None
590
591	archiveNames = dom.xpath("//resource/name")
592	if archiveNames and (len(archiveNames) > 0):
593	archiveName = getTextFromNode(archiveNames[0])
594	else:
595	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
596
597	archivePaths = dom.xpath("//resource/archive-path")
598	if archivePaths and (len(archivePaths) > 0):
599	archivePath = getTextFromNode(archivePaths[0])
600	# clean up archive path
601	if archivePath[0] != '/':
602	archivePath = '/' + archivePath
603	if archiveName and (not archivePath.endswith(archiveName)):
604	archivePath += "/" + archiveName
605	else:
606	# try to get archive-path from url
607	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
608	if (not url.startswith('http')):
609	archivePath = url.replace('index.meta', '')
610
611	if archivePath is None:
612	# we balk without archive-path
613	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
614
615	imageDirs = dom.xpath("//texttool/image")
616	if imageDirs and (len(imageDirs) > 0):
617	imageDir = getTextFromNode(imageDirs[0])
618
619	else:
620	# we balk with no image tag / not necessary anymore because textmode is now standard
621	#raise IOError("No text-tool info in %s"%(url))
622	imageDir = ""
623	#xquery="//pb"
624	docinfo['imagePath'] = "" # keine Bilder
625	docinfo['imageURL'] = ""
626
627	if imageDir and archivePath:
628	#print "image: ", imageDir, " archivepath: ", archivePath
629	imageDir = os.path.join(archivePath, imageDir)
630	imageDir = imageDir.replace("/mpiwg/online", '')
631	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
632	docinfo['imagePath'] = imageDir
633
634	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
635
636	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
637	if viewerUrls and (len(viewerUrls) > 0):
638	viewerUrl = getTextFromNode(viewerUrls[0])
639	docinfo['viewerURL'] = viewerUrl
640
641	# old style text URL
642	textUrls = dom.xpath("//texttool/text")
643	if textUrls and (len(textUrls) > 0):
644	textUrl = getTextFromNode(textUrls[0])
645	if urlparse.urlparse(textUrl)[0] == "": #keine url
646	textUrl = os.path.join(archivePath, textUrl)
647	# fix URLs starting with /mpiwg/online
648	if textUrl.startswith("/mpiwg/online"):
649	textUrl = textUrl.replace("/mpiwg/online", '', 1)
650
651	docinfo['textURL'] = textUrl
652
653	# new style text-url-path
654	textUrls = dom.xpath("//texttool/text-url-path")
655	if textUrls and (len(textUrls) > 0):
656	textUrl = getTextFromNode(textUrls[0])
657	docinfo['textURLPath'] = textUrl
658	if not docinfo['imagePath']:
659	# text-only, no page images
660	docinfo = self.getNumTextPages(docinfo)
661
662	presentationUrls = dom.xpath("//texttool/presentation")
663	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
664	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
665	#docinfo =self.getOrigPages(docinfo=docinfo)
666
667	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
668	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
669	# durch den relativen Pfad auf die presentation infos
670	presentationPath = getTextFromNode(presentationUrls[0])
671	if url.endswith("index.meta"):
672	presentationUrl = url.replace('index.meta', presentationPath)
673	else:
674	presentationUrl = url + "/" + presentationPath
675
676	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
677
678	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
679
680	return docinfo
681
682
683	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
684	"""gets the bibliographical information from the preseantion entry in texttools
685	"""
686	dom=self.getPresentationInfoXML(url)
687	try:
688	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
689	except:
690	pass
691	try:
692	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
693	except:
694	pass
695	try:
696	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
697	except:
698	pass
699	return docinfo
700
701	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
702	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
703	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
704	if docinfo is None:
705	docinfo = {}
706	path=path.replace("/mpiwg/online","")
707	docinfo['imagePath'] = path
708	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
709
710	pathorig=path
711	for x in range(cut):
712	path=getParentDir(path)
713	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
714	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
715	docinfo['imageURL'] = imageUrl
716
717	#path ist the path to the images it assumes that the index.meta file is one level higher.
718	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
719	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
720	return docinfo
721
722
723	def getDocinfo(self, mode, url):
724	"""returns docinfo depending on mode"""
725	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
726	# look for cached docinfo in session
727	if self.REQUEST.SESSION.has_key('docinfo'):
728	docinfo = self.REQUEST.SESSION['docinfo']
729	# check if its still current
730	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
731	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
732	return docinfo
733	# new docinfo
734	docinfo = {'mode': mode, 'url': url}
735	if mode=="texttool": #index.meta with texttool information
736	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
737	elif mode=="imagepath":
738	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
739	elif mode=="filepath":
740	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
741	else:
742	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
743	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
744
745	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
746	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
747	self.REQUEST.SESSION['docinfo'] = docinfo
748	return docinfo
749
750	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization=""):
751	"""returns pageinfo with the given parameters"""
752	pageinfo = {}
753	current = getInt(current)
754
755	#pageinfo ['originalPage'] = originalPage
756	pageinfo['current'] = current
757	rows = int(rows or self.thumbrows)
758	pageinfo['rows'] = rows
759	cols = int(cols or self.thumbcols)
760	pageinfo['cols'] = cols
761	grpsize = cols * rows
762	pageinfo['groupsize'] = grpsize
763	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
764	# int(current / grpsize) * grpsize +1))
765	pageinfo['start'] = start
766	pageinfo['end'] = start + grpsize
767	if (docinfo is not None) and ('numPages' in docinfo):
768	np = int(docinfo['numPages'])
769	pageinfo['end'] = min(pageinfo['end'], np)
770	pageinfo['numgroups'] = int(np / grpsize)
771	if np % grpsize > 0:
772	pageinfo['numgroups'] += 1
773	pageinfo['viewMode'] = viewMode
774	pageinfo['tocMode'] = tocMode
775	#pageinfo['characterNormalization'] =characterNormalization
776	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization',' ')
777	pageinfo['query'] = self.REQUEST.get('query',' ')
778	pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
779	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
780	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
781	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
782	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
783	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
784	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
785	toc = int (pageinfo['tocPN'])
786	pageinfo['textPages'] =int (toc)
787
788
789
790	if 'tocSize_%s'%tocMode in docinfo:
791	tocSize = int(docinfo['tocSize_%s'%tocMode])
792	tocPageSize = int(pageinfo['tocPageSize'])
793	# cached toc
794	if tocSize%tocPageSize>0:
795	tocPages=tocSize/tocPageSize+1
796	else:
797	tocPages=tocSize/tocPageSize
798	pageinfo['tocPN'] = min (tocPages,toc)
799	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
800	pageinfo['sn'] =self.REQUEST.get('sn','')
801	return pageinfo
802
803	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
804	"""init document viewer"""
805	self.title=title
806	self.digilibBaseUrl = digilibBaseUrl
807	self.thumbrows = thumbrows
808	self.thumbcols = thumbcols
809	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
810	if RESPONSE is not None:
811	RESPONSE.redirect('manage_main')
812
813	def manage_AddDocumentViewerForm(self):
814	"""add the viewer form"""
815	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
816	return pt()
817
818	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
819	"""add the viewer"""
820	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
821	self._setObject(id,newObj)
822
823	if RESPONSE is not None:
824	RESPONSE.redirect('manage_main')
825
826	## DocumentViewerTemplate class
827	class DocumentViewerTemplate(ZopePageTemplate):
828	"""Template for document viewer"""
829	meta_type="DocumentViewer Template"
830
831
832	def manage_addDocumentViewerTemplateForm(self):
833	"""Form for adding"""
834	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
835	return pt()
836
837	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
838	REQUEST=None, submit=None):
839	"Add a Page Template with optional file content."
840
841	self._setObject(id, DocumentViewerTemplate(id))
842	ob = getattr(self, id)
843	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
844	logging.info("txt %s:"%txt)
845	ob.pt_edit(txt,"text/html")
846	if title:
847	ob.pt_setTitle(title)
848	try:
849	u = self.DestinationURL()
850	except AttributeError:
851	u = REQUEST['URL1']
852
853	u = "%s/%s" % (u, urllib.quote(id))
854	REQUEST.RESPONSE.redirect(u+'/manage_main')
855	return ''
856
857
858

Note: See TracBrowser for help on using the repository browser.

Download in other formats: