Context Navigation

source: documentViewer/documentViewer.py @ 448:35e7ea6f2368

Last change on this file since 448:35e7ea6f2368 was 448:35e7ea6f2368, checked in by abukhman, 13 years ago
* empty log message *
File size: 38.9 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21	import string
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding="utf-8"):
47	"""returns a string containing node as XML"""
48	stream = cStringIO.StringIO()
49	#logging.debug("BUF: %s"%(stream))
50	Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
51	s = stream.getvalue()
52	#logging.debug("BUF: %s"%(s))
53	stream.close()
54	return s
55
56	def browserCheck(self):
57	"""check the browsers request to find out the browser type"""
58	bt = {}
59	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
60	bt['ua'] = ua
61	bt['isIE'] = False
62	bt['isN4'] = False
63	bt['versFirefox']=""
64	bt['versIE']=""
65	bt['versSafariChrome']=""
66	bt['versOpera']=""
67
68	if string.find(ua, 'MSIE') > -1:
69	bt['isIE'] = True
70	else:
71	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
72	# Safari oder Chrome identification
73	try:
74	nav = ua[string.find(ua, '('):]
75	nav1=ua[string.find(ua,')'):]
76	nav2=nav1[string.find(nav1,'('):]
77	nav3=nav2[string.find(nav2,')'):]
78	ie = string.split(nav, "; ")[1]
79	ie1 =string.split(nav1, " ")[2]
80	ie2 =string.split(nav3, " ")[1]
81	ie3 =string.split(nav3, " ")[2]
82	if string.find(ie3, "Safari") >-1:
83	bt['versSafariChrome']=string.split(ie2, "/")[1]
84	except: pass
85	# IE identification
86	try:
87	nav = ua[string.find(ua, '('):]
88	ie = string.split(nav, "; ")[1]
89	if string.find(ie, "MSIE") > -1:
90	bt['versIE'] = string.split(ie, " ")[1]
91	except:pass
92	# Firefox identification
93	try:
94	nav = ua[string.find(ua, '('):]
95	nav1=ua[string.find(ua,')'):]
96	if string.find(ie1, "Firefox") >-1:
97	nav5= string.split(ie1, "/")[1]
98	logging.debug("FIREFOX: %s"%(nav5))
99	bt['versFirefox']=nav5[0:3]
100	except:pass
101	#Opera identification
102	try:
103	if string.find(ua,"Opera") >-1:
104	nav = ua[string.find(ua, '('):]
105	nav1=nav[string.find(nav,')'):]
106	bt['versOpera']=string.split(nav1,"/")[2]
107	except:pass
108
109	bt['isMac'] = string.find(ua, 'Macintosh') > -1
110	bt['isWin'] = string.find(ua, 'Windows') > -1
111	bt['isIEWin'] = bt['isIE'] and bt['isWin']
112	bt['isIEMac'] = bt['isIE'] and bt['isMac']
113	bt['staticHTML'] = False
114
115	return bt
116
117
118	def getParentDir(path):
119	"""returns pathname shortened by one"""
120	return '/'.join(path.split('/')[0:-1])
121
122
123	def getHttpData(url, data=None, num_tries=3, timeout=10):
124	"""returns result from url+data HTTP request"""
125	# we do GET (by appending data to url)
126	if isinstance(data, str) or isinstance(data, unicode):
127	# if data is string then append
128	url = "%s?%s"%(url,data)
129	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
130	# urlencode
131	url = "%s?%s"%(url,urllib.urlencode(data))
132
133	response = None
134	errmsg = None
135	for cnt in range(num_tries):
136	try:
137	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
138	if sys.version_info < (2, 6):
139	# set timeout on socket -- ugly :-(
140	import socket
141	socket.setdefaulttimeout(float(timeout))
142	response = urllib2.urlopen(url)
143	else:
144	response = urllib2.urlopen(url,timeout=float(timeout))
145	# check result?
146	break
147	except urllib2.HTTPError, e:
148	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
149	errmsg = str(e)
150	# stop trying
151	break
152	except urllib2.URLError, e:
153	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
154	errmsg = str(e)
155	# stop trying
156	#break
157
158	if response is not None:
159	data = response.read()
160	response.close()
161	return data
162
163	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
164	#return None
165
166	##
167	## documentViewer class
168	##
169	class documentViewer(Folder):
170	"""document viewer"""
171	meta_type="Document viewer"
172
173	security=ClassSecurityInfo()
174	manage_options=Folder.manage_options+(
175	{'label':'main config','action':'changeDocumentViewerForm'},
176	)
177
178	# templates and forms
179	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
180	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
181	toc_text = PageTemplateFile('zpt/toc_text', globals())
182	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
183	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
184	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
185	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
186	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
187	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
188	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
189	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
190	head_main = PageTemplateFile('zpt/head_main', globals())
191	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
192	info_xml = PageTemplateFile('zpt/info_xml', globals())
193
194
195	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
196	security.declareProtected('View management screens','changeDocumentViewerForm')
197	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
198
199
200	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
201	"""init document viewer"""
202	self.id=id
203	self.title=title
204	self.thumbcols = thumbcols
205	self.thumbrows = thumbrows
206	# authgroups is list of authorized groups (delimited by ,)
207	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
208	# create template folder so we can always use template.something
209
210	templateFolder = Folder('template')
211	#self['template'] = templateFolder # Zope-2.12 style
212	self._setObject('template',templateFolder) # old style
213	try:
214	import MpdlXmlTextServer
215	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
216	#templateFolder['fulltextclient'] = xmlRpcClient
217	templateFolder._setObject('fulltextclient',textServer)
218	except Exception, e:
219	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
220	try:
221	from Products.zogiLib.zogiLib import zogiLib
222	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
223	#templateFolder['zogilib'] = zogilib
224	templateFolder._setObject('zogilib',zogilib)
225	except Exception, e:
226	logging.error("Unable to create zogiLib for zogilib: "+str(e))
227
228
229	# proxy text server methods to fulltextclient
230	def getTextPage(self, **args):
231	"""get page"""
232	return self.template.fulltextclient.getTextPage(**args)
233
234	def getOrigPages(self, **args):
235	"""get page"""
236	return self.template.fulltextclient.getOrigPages(**args)
237
238	def getOrigPagesNorm(self, **args):
239	"""get page"""
240	return self.template.fulltextclient.getOrigPagesNorm(**args)
241
242	def getQuery(self, **args):
243	"""get query in search"""
244	return self.template.fulltextclient.getQuery(**args)
245
246	def getSearch(self, **args):
247	"""get search"""
248	return self.template.fulltextclient.getSearch(**args)
249
250	def getGisPlaces(self, **args):
251	"""get gis places"""
252	return self.template.fulltextclient.getGisPlaces(**args)
253
254	def getAllGisPlaces(self, **args):
255	"""get all gis places """
256	return self.template.fulltextclient.getAllGisPlaces(**args)
257
258	def getTranslate(self, **args):
259	"""get translate"""
260	return self.template.fulltextclient.getTranslate(**args)
261
262	def getLemma(self, **args):
263	"""get lemma"""
264	return self.template.fulltextclient.getLemma(**args)
265
266	def getLemmaQuery(self, **args):
267	"""get query"""
268	return self.template.fulltextclient.getLemmaQuery(**args)
269
270	def getLex(self, **args):
271	"""get lex"""
272	return self.template.fulltextclient.getLex(**args)
273
274	def getToc(self, **args):
275	"""get toc"""
276	return self.template.fulltextclient.getToc(**args)
277
278	def getTocPage(self, **args):
279	"""get tocpage"""
280	return self.template.fulltextclient.getTocPage(**args)
281
282
283	security.declareProtected('View','thumbs_rss')
284	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
285	'''
286	view it
287	@param mode: defines how to access the document behind url
288	@param url: url which contains display information
289	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
290
291	'''
292	logging.debug("HHHHHHHHHHHHHH:load the rss")
293	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
294
295	if not hasattr(self, 'template'):
296	# create template folder if it doesn't exist
297	self.manage_addFolder('template')
298
299	if not self.digilibBaseUrl:
300	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
301
302	docinfo = self.getDocinfo(mode=mode,url=url)
303	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
304	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
305	''' ZDES '''
306	pt = getattr(self.template, 'thumbs_main_rss')
307
308	if viewMode=="auto": # automodus gewaehlt
309	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
310	viewMode="text"
311	else:
312	viewMode="images"
313
314	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
315
316	security.declareProtected('View','index_html')
317	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
318	'''
319	view it
320	@param mode: defines how to access the document behind url
321	@param url: url which contains display information
322	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
323	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
324	@param characterNormalization type of text display (reg, norm, none)
325	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
326	'''
327
328	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
329
330	if not hasattr(self, 'template'):
331	# this won't work
332	logging.error("template folder missing!")
333	return "ERROR: template folder missing!"
334
335	if not getattr(self, 'digilibBaseUrl', None):
336	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
337
338	docinfo = self.getDocinfo(mode=mode,url=url)
339
340	if tocMode != "thumbs":
341	# get table of contents
342	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
343
344	if viewMode=="auto": # automodus gewaehlt
345	if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
346	viewMode="text_dict"
347	else:
348	viewMode="images"
349
350	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
351
352	if (docinfo.get('textURLPath',None)):
353	page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo)
354	pageinfo['textPage'] = page
355	tt = getattr(self, 'template')
356	pt = getattr(tt, 'viewer_main')
357	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
358
359	def generateMarks(self,mk):
360	ret=""
361	if mk is None:
362	return ""
363	if not isinstance(mk, list):
364	mk=[mk]
365	for m in mk:
366	ret+="mk=%s"%m
367	return ret
368
369
370	def getBrowser(self):
371	"""getBrowser the version of browser """
372	bt = browserCheck(self)
373	logging.debug("BROWSER VERSION: %s"%(bt))
374	return bt
375
376	def findDigilibUrl(self):
377	"""try to get the digilib URL from zogilib"""
378	url = self.template.zogilib.getDLBaseUrl()
379	return url
380
381	def getDocumentViewerURL(self):
382	"""returns the URL of this instance"""
383	return self.absolute_url()
384
385	def getStyle(self, idx, selected, style=""):
386	"""returns a string with the given style and append 'sel' if path == selected."""
387	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
388	if idx == selected:
389	return style + 'sel'
390	else:
391	return style
392
393	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
394	"""returns URL to documentviewer with parameter param set to val or from dict params"""
395	# copy existing request params
396	urlParams=self.REQUEST.form.copy()
397	# change single param
398	if param is not None:
399	if val is None:
400	if urlParams.has_key(param):
401	del urlParams[param]
402	else:
403	urlParams[param] = str(val)
404
405	# change more params
406	if params is not None:
407	for k in params.keys():
408	v = params[k]
409	if v is None:
410	# val=None removes param
411	if urlParams.has_key(k):
412	del urlParams[k]
413
414	else:
415	urlParams[k] = v
416
417	# FIXME: does this belong here?
418	if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
419	urlParams["mode"] = "imagepath"
420	urlParams["url"] = getParentDir(urlParams["url"])
421
422	# quote values and assemble into query string (not escaping '/')
423	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
424	#ps = urllib.urlencode(urlParams)
425	if baseUrl is None:
426	baseUrl = self.REQUEST['URL1']
427
428	url = "%s?%s"%(baseUrl, ps)
429	return url
430
431
432	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
433	"""link to documentviewer with parameter param set to val"""
434	return self.getLink(param, val, params, baseUrl, '&')
435
436	def getInfo_xml(self,url,mode):
437	"""returns info about the document as XML"""
438
439	if not self.digilibBaseUrl:
440	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
441
442	docinfo = self.getDocinfo(mode=mode,url=url)
443	pt = getattr(self.template, 'info_xml')
444	return pt(docinfo=docinfo)
445
446	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
447	"""returns new option state"""
448	if not self.REQUEST.SESSION.has_key(optionName):
449	# not in session -- initial
450	opt = {'lastState': newState, 'state': initialState}
451	else:
452	opt = self.REQUEST.SESSION.get(optionName)
453	if opt['lastState'] != newState:
454	# state in session has changed -- toggle
455	opt['state'] = not opt['state']
456	opt['lastState'] = newState
457
458	self.REQUEST.SESSION[optionName] = opt
459	return opt['state']
460
461	def isAccessible(self, docinfo):
462	"""returns if access to the resource is granted"""
463	access = docinfo.get('accessType', None)
464	logging.debug("documentViewer (accessOK) access type %s"%access)
465	if access is not None and access == 'free':
466	logging.debug("documentViewer (accessOK) access is free")
467	return True
468	elif access is None or access in self.authgroups:
469	# only local access -- only logged in users
470	user = getSecurityManager().getUser()
471	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
472	if user is not None:
473	#print "user: ", user
474	return (user.getUserName() != "Anonymous User")
475	else:
476	return False
477
478	logging.error("documentViewer (accessOK) unknown access type %s"%access)
479	return False
480
481
482	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
483	"""gibt param von dlInfo aus"""
484	if docinfo is None:
485	docinfo = {}
486
487	for x in range(cut):
488
489	path=getParentDir(path)
490
491	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
492
493	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
494
495	txt = getHttpData(infoUrl)
496	if txt is None:
497	raise IOError("Unable to get dir-info from %s"%(infoUrl))
498
499	dom = Parse(txt)
500	sizes=dom.xpath("//dir/size")
501	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
502
503	if sizes:
504	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
505	else:
506	docinfo['numPages'] = 0
507
508	# TODO: produce and keep list of image names and numbers
509
510	return docinfo
511
512	def getIndexMetaPath(self,url):
513	"""gib nur den Pfad zurueck"""
514	regexp = re.compile(r".(experimental\|permanent)/(.)")
515	regpath = regexp.match(url)
516	if (regpath==None):
517	return ""
518	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
519	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
520
521
522
523	def getIndexMetaUrl(self,url):
524	"""returns utr of index.meta document at url"""
525
526	metaUrl = None
527	if url.startswith("http://"):
528	# real URL
529	metaUrl = url
530	else:
531	# online path
532	server=self.digilibBaseUrl+"/servlet/Texter?fn="
533	metaUrl=server+url.replace("/mpiwg/online","")
534	if not metaUrl.endswith("index.meta"):
535	metaUrl += "/index.meta"
536
537	return metaUrl
538
539	def getDomFromIndexMeta(self, url):
540	"""get dom from index meta"""
541	dom = None
542	metaUrl = self.getIndexMetaUrl(url)
543
544	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
545	txt=getHttpData(metaUrl)
546	if txt is None:
547	raise IOError("Unable to read index meta from %s"%(url))
548
549	dom = Parse(txt)
550	return dom
551
552	def getPresentationInfoXML(self, url):
553	"""returns dom of info.xml document at url"""
554	dom = None
555	metaUrl = None
556	if url.startswith("http://"):
557	# real URL
558	metaUrl = url
559	else:
560	# online path
561	server=self.digilibBaseUrl+"/servlet/Texter?fn="
562	metaUrl=server+url.replace("/mpiwg/online","")
563
564	txt=getHttpData(metaUrl)
565	if txt is None:
566	raise IOError("Unable to read infoXMLfrom %s"%(url))
567
568	dom = Parse(txt)
569	return dom
570
571
572	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
573	"""gets authorization info from the index.meta file at path or given by dom"""
574	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
575
576	access = None
577
578	if docinfo is None:
579	docinfo = {}
580
581	if dom is None:
582	for x in range(cut):
583	path=getParentDir(path)
584	dom = self.getDomFromIndexMeta(path)
585
586	acctype = dom.xpath("//access-conditions/access/@type")
587	if acctype and (len(acctype)>0):
588	access=acctype[0].value
589	if access in ['group', 'institution']:
590	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
591
592	docinfo['accessType'] = access
593	return docinfo
594
595
596	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
597	"""gets bibliographical info from the index.meta file at path or given by dom"""
598	#logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
599
600	if docinfo is None:
601	docinfo = {}
602
603	if dom is None:
604	for x in range(cut):
605	path=getParentDir(path)
606	dom = self.getDomFromIndexMeta(path)
607
608	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
609
610	#logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
611	# put in all raw bib fields as dict "bib"
612	bib = dom.xpath("//bib/*")
613	if bib and len(bib)>0:
614	bibinfo = {}
615	for e in bib:
616	bibinfo[e.localName] = getTextFromNode(e)
617	docinfo['bib'] = bibinfo
618
619	# extract some fields (author, title, year) according to their mapping
620	metaData=self.metadata.main.meta.bib
621	bibtype=dom.xpath("//bib/@type")
622	if bibtype and (len(bibtype)>0):
623	bibtype=bibtype[0].value
624	else:
625	bibtype="generic"
626
627	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
628	docinfo['bib_type'] = bibtype
629	bibmap=metaData.generateMappingForType(bibtype)
630	#logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
631	#logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
632	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
633	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
634	try:
635	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
636	except: pass
637	try:
638	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
639	except: pass
640	try:
641	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
642	except: pass
643	#logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
644	try:
645	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
646	except:
647	docinfo['lang']=''
648	try:
649	docinfo['city']=getTextFromNode(dom.xpath("//bib/city")[0])
650	except:
651	docinfo['city']=''
652	try:
653	docinfo['number_of_pages']=getTextFromNode(dom.xpath("//bib/number_of_pages")[0])
654	except:
655	docinfo['number_of_pages']=''
656	try:
657	docinfo['series_volume']=getTextFromNode(dom.xpath("//bib/series_volume")[0])
658	except:
659	docinfo['series_volume']=''
660	try:
661	docinfo['number_of_volumes']=getTextFromNode(dom.xpath("//bib/number_of_volumes")[0])
662	except:
663	docinfo['number_of_volumes']=''
664	try:
665	docinfo['translator']=getTextFromNode(dom.xpath("//bib/translator")[0])
666	except:
667	docinfo['translator']=''
668	try:
669	docinfo['edition']=getTextFromNode(dom.xpath("//bib/edition")[0])
670	except:
671	docinfo['edition']=''
672	try:
673	docinfo['series_author']=getTextFromNode(dom.xpath("//bib/series_author")[0])
674	except:
675	docinfo['series_author']=''
676	try:
677	docinfo['publisher']=getTextFromNode(dom.xpath("//bib/publisher")[0])
678	except:
679	docinfo['publisher']=''
680	try:
681	docinfo['series_title']=getTextFromNode(dom.xpath("//bib/series_title")[0])
682	except:
683	docinfo['series_title']=''
684	try:
685	docinfo['isbn_issn']=getTextFromNode(dom.xpath("//bib/isbn_issn")[0])
686	except:
687	docinfo['isbn_issn']=''
688	#logging.debug("I NEED BIBTEX %s"%docinfo)
689	return docinfo
690
691
692	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
693	"""gets name info from the index.meta file at path or given by dom"""
694	if docinfo is None:
695	docinfo = {}
696
697	if dom is None:
698	for x in range(cut):
699	path=getParentDir(path)
700	dom = self.getDomFromIndexMeta(path)
701
702	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
703	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
704	return docinfo
705
706	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
707	"""parse texttool tag in index meta"""
708	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
709	if docinfo is None:
710	docinfo = {}
711	if docinfo.get('lang', None) is None:
712	docinfo['lang'] = '' # default keine Sprache gesetzt
713	if dom is None:
714	dom = self.getDomFromIndexMeta(url)
715
716	archivePath = None
717	archiveName = None
718
719	archiveNames = dom.xpath("//resource/name")
720	if archiveNames and (len(archiveNames) > 0):
721	archiveName = getTextFromNode(archiveNames[0])
722	else:
723	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
724
725	archivePaths = dom.xpath("//resource/archive-path")
726	if archivePaths and (len(archivePaths) > 0):
727	archivePath = getTextFromNode(archivePaths[0])
728	# clean up archive path
729	if archivePath[0] != '/':
730	archivePath = '/' + archivePath
731	if archiveName and (not archivePath.endswith(archiveName)):
732	archivePath += "/" + archiveName
733	else:
734	# try to get archive-path from url
735	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
736	if (not url.startswith('http')):
737	archivePath = url.replace('index.meta', '')
738
739	if archivePath is None:
740	# we balk without archive-path
741	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
742
743	imageDirs = dom.xpath("//texttool/image")
744	if imageDirs and (len(imageDirs) > 0):
745	imageDir = getTextFromNode(imageDirs[0])
746
747	else:
748	# we balk with no image tag / not necessary anymore because textmode is now standard
749	#raise IOError("No text-tool info in %s"%(url))
750	imageDir = ""
751	#xquery="//pb"
752	docinfo['imagePath'] = "" # keine Bilder
753	docinfo['imageURL'] = ""
754
755	if imageDir and archivePath:
756	#print "image: ", imageDir, " archivepath: ", archivePath
757	imageDir = os.path.join(archivePath, imageDir)
758	imageDir = imageDir.replace("/mpiwg/online", '')
759	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
760	docinfo['imagePath'] = imageDir
761
762	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
763
764	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
765	if viewerUrls and (len(viewerUrls) > 0):
766	viewerUrl = getTextFromNode(viewerUrls[0])
767	docinfo['viewerURL'] = viewerUrl
768
769	# old style text URL
770	textUrls = dom.xpath("//texttool/text")
771	if textUrls and (len(textUrls) > 0):
772	textUrl = getTextFromNode(textUrls[0])
773	if urlparse.urlparse(textUrl)[0] == "": #keine url
774	textUrl = os.path.join(archivePath, textUrl)
775	# fix URLs starting with /mpiwg/online
776	if textUrl.startswith("/mpiwg/online"):
777	textUrl = textUrl.replace("/mpiwg/online", '', 1)
778
779	docinfo['textURL'] = textUrl
780
781	# new style text-url-path
782	textUrls = dom.xpath("//texttool/text-url-path")
783	if textUrls and (len(textUrls) > 0):
784	textUrl = getTextFromNode(textUrls[0])
785	docinfo['textURLPath'] = textUrl
786	textUrlkurz = string.split(textUrl, ".")[0]
787	docinfo['textURLPathkurz'] = textUrlkurz
788	#if not docinfo['imagePath']:
789	# text-only, no page images
790	#docinfo = self.getNumTextPages(docinfo)
791
792
793	presentationUrls = dom.xpath("//texttool/presentation")
794	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
795	#docinfo = self.getDownloadfromDocinfoToBibtex(url, docinfo=docinfo, dom=dom)
796	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
797
798
799	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
800	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
801	# durch den relativen Pfad auf die presentation infos
802	presentationPath = getTextFromNode(presentationUrls[0])
803	if url.endswith("index.meta"):
804	presentationUrl = url.replace('index.meta', presentationPath)
805	else:
806	presentationUrl = url + "/" + presentationPath
807
808	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
809
810	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
811
812	return docinfo
813
814
815	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
816	"""gets the bibliographical information from the preseantion entry in texttools
817	"""
818	dom=self.getPresentationInfoXML(url)
819	try:
820	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
821	except:
822	pass
823	try:
824	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
825	except:
826	pass
827	try:
828	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
829	except:
830	pass
831	return docinfo
832
833	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
834	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
835	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
836	if docinfo is None:
837	docinfo = {}
838	path=path.replace("/mpiwg/online","")
839	docinfo['imagePath'] = path
840	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
841
842	pathorig=path
843	for x in range(cut):
844	path=getParentDir(path)
845	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
846	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
847	docinfo['imageURL'] = imageUrl
848
849	#path ist the path to the images it assumes that the index.meta file is one level higher.
850	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
851	#docinfo = self.getDownloadfromDocinfoToBibtex(pathorig,docinfo=docinfo,cut=cut+1)
852	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
853	return docinfo
854
855
856	def getDocinfo(self, mode, url):
857	"""returns docinfo depending on mode"""
858	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
859	# look for cached docinfo in session
860	if self.REQUEST.SESSION.has_key('docinfo'):
861	docinfo = self.REQUEST.SESSION['docinfo']
862	# check if its still current
863	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
864	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
865	return docinfo
866	# new docinfo
867	docinfo = {'mode': mode, 'url': url}
868	if mode=="texttool": #index.meta with texttool information
869	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
870	elif mode=="imagepath":
871	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
872	elif mode=="filepath":
873	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
874	else:
875	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
876	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
877
878	# FIXME: fake texturlpath
879	if not docinfo.has_key('textURLPath'):
880	docinfo['textURLPath'] = None
881
882	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
883	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
884	self.REQUEST.SESSION['docinfo'] = docinfo
885	return docinfo
886
887	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
888	"""returns pageinfo with the given parameters"""
889	pageinfo = {}
890	current = getInt(current)
891
892	pageinfo['current'] = current
893	rows = int(rows or self.thumbrows)
894	pageinfo['rows'] = rows
895	cols = int(cols or self.thumbcols)
896	pageinfo['cols'] = cols
897	grpsize = cols * rows
898	pageinfo['groupsize'] = grpsize
899	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
900	# int(current / grpsize) * grpsize +1))
901	pageinfo['start'] = start
902	pageinfo['end'] = start + grpsize
903	if (docinfo is not None) and ('numPages' in docinfo):
904	np = int(docinfo['numPages'])
905	pageinfo['end'] = min(pageinfo['end'], np)
906	pageinfo['numgroups'] = int(np / grpsize)
907	if np % grpsize > 0:
908	pageinfo['numgroups'] += 1
909	pageinfo['viewMode'] = viewMode
910	pageinfo['tocMode'] = tocMode
911	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
912	#pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1')
913	pageinfo['query'] = self.REQUEST.get('query','')
914	pageinfo['queryType'] = self.REQUEST.get('queryType','')
915	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
916	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
917	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
918
919	pageinfo ['highlightElementPos'] = self.REQUEST.get('highlightElementPos','')
920	pageinfo ['highlightElement'] = self.REQUEST.get('highlightElement','')
921
922
923	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
924	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
925	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
926	toc = int (pageinfo['tocPN'])
927	pageinfo['textPages'] =int (toc)
928
929	if 'tocSize_%s'%tocMode in docinfo:
930	tocSize = int(docinfo['tocSize_%s'%tocMode])
931	tocPageSize = int(pageinfo['tocPageSize'])
932	# cached toc
933	if tocSize%tocPageSize>0:
934	tocPages=tocSize/tocPageSize+1
935	else:
936	tocPages=tocSize/tocPageSize
937	pageinfo['tocPN'] = min (tocPages,toc)
938	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
939	#pageinfo['sn'] =self.REQUEST.get('sn','')
940	pageinfo['s'] =self.REQUEST.get('s','')
941	return pageinfo
942
943	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
944	"""init document viewer"""
945	self.title=title
946	self.digilibBaseUrl = digilibBaseUrl
947	self.thumbrows = thumbrows
948	self.thumbcols = thumbcols
949	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
950	if RESPONSE is not None:
951	RESPONSE.redirect('manage_main')
952
953	def manage_AddDocumentViewerForm(self):
954	"""add the viewer form"""
955	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
956	return pt()
957
958	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
959	"""add the viewer"""
960	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
961	self._setObject(id,newObj)
962
963	if RESPONSE is not None:
964	RESPONSE.redirect('manage_main')
965
966	## DocumentViewerTemplate class
967	class DocumentViewerTemplate(ZopePageTemplate):
968	"""Template for document viewer"""
969	meta_type="DocumentViewer Template"
970
971
972	def manage_addDocumentViewerTemplateForm(self):
973	"""Form for adding"""
974	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
975	return pt()
976
977	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
978	REQUEST=None, submit=None):
979	"Add a Page Template with optional file content."
980
981	self._setObject(id, DocumentViewerTemplate(id))
982	ob = getattr(self, id)
983	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
984	logging.info("txt %s:"%txt)
985	ob.pt_edit(txt,"text/html")
986	if title:
987	ob.pt_setTitle(title)
988	try:
989	u = self.DestinationURL()
990	except AttributeError:
991	u = REQUEST['URL1']
992
993	u = "%s/%s" % (u, urllib.quote(id))
994	REQUEST.RESPONSE.redirect(u+'/manage_main')
995	return ''
996
997
998

Note: See TracBrowser for help on using the repository browser.

Download in other formats: