Context Navigation

source: documentViewer/documentViewer.py @ 425:fd8bef319208

Last change on this file since 425:fd8bef319208 was 425:fd8bef319208, checked in by abukhman, 13 years ago
browserCheck
File size: 36.1 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21	import string
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding='utf-8'):
47	"""returns a string containing node as XML"""
48	buf = cStringIO.StringIO()
49	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
50	s = buf.getvalue()
51	buf.close()
52	return s
53
54	def browserCheck(self):
55	"""check the browsers request to find out the browser type"""
56	bt = {}
57	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
58	bt['ua'] = ua
59	bt['isIE'] = False
60	bt['isN4'] = False
61	bt['versFirefox']=""
62	bt['versIE']=""
63	bt['versSafariChrome']=""
64	bt['versOpera']=""
65
66	if string.find(ua, 'MSIE') > -1:
67	bt['isIE'] = True
68	else:
69	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
70	# Safari oder Chrome identification
71	try:
72	nav = ua[string.find(ua, '('):]
73	nav1=ua[string.find(ua,')'):]
74	nav2=nav1[string.find(nav1,'('):]
75	nav3=nav2[string.find(nav2,')'):]
76	ie = string.split(nav, "; ")[1]
77	ie1 =string.split(nav1, " ")[2]
78	ie2 =string.split(nav3, " ")[1]
79	ie3 =string.split(nav3, " ")[2]
80	if string.find(ie3, "Safari") >-1:
81	bt['versSafariChrome']=string.split(ie2, "/")[1]
82	except: pass
83	# IE identification
84	try:
85	nav = ua[string.find(ua, '('):]
86	ie = string.split(nav, "; ")[1]
87	if string.find(ie, "MSIE") > -1:
88	bt['versIE'] = string.split(ie, " ")[1]
89	except:pass
90	# Firefox identification
91	try:
92	nav = ua[string.find(ua, '('):]
93	nav1=ua[string.find(ua,')'):]
94	if string.find(ie1, "Firefox") >-1:
95	nav5= string.split(ie1, "/")[1]
96	logging.debug("FIREFOX: %s"%(nav5))
97	bt['versFirefox']=string.split(nav5, ".")[1]
98	except:pass
99	#Opera identification
100	try:
101	if string.find(ua,"Opera") >-1:
102	nav = ua[string.find(ua, '('):]
103	nav1=nav[string.find(nav,')'):]
104	bt['versOpera']=string.split(nav1,"/")[2]
105	except:pass
106
107	bt['isMac'] = string.find(ua, 'Macintosh') > -1
108	bt['isWin'] = string.find(ua, 'Windows') > -1
109	bt['isIEWin'] = bt['isIE'] and bt['isWin']
110	bt['isIEMac'] = bt['isIE'] and bt['isMac']
111	bt['staticHTML'] = False
112
113	return bt
114
115
116	def getParentDir(path):
117	"""returns pathname shortened by one"""
118	return '/'.join(path.split('/')[0:-1])
119
120
121	def getHttpData(url, data=None, num_tries=3, timeout=10):
122	"""returns result from url+data HTTP request"""
123	# we do GET (by appending data to url)
124	if isinstance(data, str) or isinstance(data, unicode):
125	# if data is string then append
126	url = "%s?%s"%(url,data)
127	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
128	# urlencode
129	url = "%s?%s"%(url,urllib.urlencode(data))
130
131	response = None
132	errmsg = None
133	for cnt in range(num_tries):
134	try:
135	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
136	if sys.version_info < (2, 6):
137	# set timeout on socket -- ugly :-(
138	import socket
139	socket.setdefaulttimeout(float(timeout))
140	response = urllib2.urlopen(url)
141	else:
142	response = urllib2.urlopen(url,timeout=float(timeout))
143	# check result?
144	break
145	except urllib2.HTTPError, e:
146	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
147	errmsg = str(e)
148	# stop trying
149	break
150	except urllib2.URLError, e:
151	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
152	errmsg = str(e)
153	# stop trying
154	#break
155
156	if response is not None:
157	data = response.read()
158	response.close()
159	return data
160
161	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
162	#return None
163
164
165
166	##
167	## documentViewer class
168	##
169	class documentViewer(Folder):
170	"""document viewer"""
171	meta_type="Document viewer"
172
173	security=ClassSecurityInfo()
174	manage_options=Folder.manage_options+(
175	{'label':'main config','action':'changeDocumentViewerForm'},
176	)
177
178	# templates and forms
179	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
180	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
181	toc_text = PageTemplateFile('zpt/toc_text', globals())
182	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
183	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
184	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
185	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
186	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
187	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
188	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
189	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
190	head_main = PageTemplateFile('zpt/head_main', globals())
191	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
192	info_xml = PageTemplateFile('zpt/info_xml', globals())
193
194
195	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
196	security.declareProtected('View management screens','changeDocumentViewerForm')
197	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
198
199
200	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
201	"""init document viewer"""
202	self.id=id
203	self.title=title
204	self.thumbcols = thumbcols
205	self.thumbrows = thumbrows
206	# authgroups is list of authorized groups (delimited by ,)
207	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
208	# create template folder so we can always use template.something
209
210	templateFolder = Folder('template')
211	#self['template'] = templateFolder # Zope-2.12 style
212	self._setObject('template',templateFolder) # old style
213	try:
214	import MpdlXmlTextServer
215	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
216	#templateFolder['fulltextclient'] = xmlRpcClient
217	templateFolder._setObject('fulltextclient',textServer)
218	except Exception, e:
219	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
220	try:
221	from Products.zogiLib.zogiLib import zogiLib
222	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
223	#templateFolder['zogilib'] = zogilib
224	templateFolder._setObject('zogilib',zogilib)
225	except Exception, e:
226	logging.error("Unable to create zogiLib for zogilib: "+str(e))
227
228
229	# proxy text server methods to fulltextclient
230	def getTextPage(self, **args):
231	"""get page"""
232	return self.template.fulltextclient.getTextPage(**args)
233
234	def getQuery(self, **args):
235	"""get query in search"""
236	return self.template.fulltextclient.getQuery(**args)
237
238	def getSearch(self, **args):
239	"""get search"""
240	return self.template.fulltextclient.getSearch(**args)
241
242	def getGisPlaces(self, **args):
243	"""get gis places"""
244	return self.template.fulltextclient.getGisPlaces(**args)
245
246	def getAllGisPlaces(self, **args):
247	"""get all gis places """
248	return self.template.fulltextclient.getAllGisPlaces(**args)
249
250	def getTranslate(self, **args):
251	"""get translate"""
252	return self.template.fulltextclient.getTranslate(**args)
253
254	def getLemma(self, **args):
255	"""get lemma"""
256	return self.template.fulltextclient.getLemma(**args)
257
258	def getToc(self, **args):
259	"""get toc"""
260	return self.template.fulltextclient.getToc(**args)
261
262	def getTocPage(self, **args):
263	"""get tocpage"""
264	return self.template.fulltextclient.getTocPage(**args)
265
266
267	security.declareProtected('View','thumbs_rss')
268	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
269	'''
270	view it
271	@param mode: defines how to access the document behind url
272	@param url: url which contains display information
273	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
274
275	'''
276	logging.debug("HHHHHHHHHHHHHH:load the rss")
277	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
278
279	if not hasattr(self, 'template'):
280	# create template folder if it doesn't exist
281	self.manage_addFolder('template')
282
283	if not self.digilibBaseUrl:
284	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
285
286	docinfo = self.getDocinfo(mode=mode,url=url)
287	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
288	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
289	''' ZDES '''
290	pt = getattr(self.template, 'thumbs_main_rss')
291
292	if viewMode=="auto": # automodus gewaehlt
293	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
294	viewMode="text"
295	else:
296	viewMode="images"
297
298	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
299
300	security.declareProtected('View','index_html')
301	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
302	'''
303	view it
304	@param mode: defines how to access the document behind url
305	@param url: url which contains display information
306	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
307	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
308	@param characterNormalization type of text display (reg, norm, none)
309	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
310	'''
311
312	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
313
314	if not hasattr(self, 'template'):
315	# this won't work
316	logging.error("template folder missing!")
317	return "ERROR: template folder missing!"
318
319	if not getattr(self, 'digilibBaseUrl', None):
320	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
321
322	docinfo = self.getDocinfo(mode=mode,url=url)
323
324	if tocMode != "thumbs":
325	# get table of contents
326	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
327
328	if viewMode=="auto": # automodus gewaehlt
329	if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
330	viewMode="text_dict"
331	else:
332	viewMode="images"
333
334	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
335
336	if (docinfo.get('textURLPath',None)):
337	page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo)
338	pageinfo['textPage'] = page
339	pt = getattr(self.template, 'viewer_main')
340	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
341
342	def generateMarks(self,mk):
343	ret=""
344	if mk is None:
345	return ""
346	if not isinstance(mk, list):
347	mk=[mk]
348	for m in mk:
349	ret+="mk=%s"%m
350	return ret
351
352
353	def getBrowser(self):
354	"""getBrowser the version of browser """
355	bt = browserCheck(self)
356	logging.debug("BROWSER VERSION: %s"%(bt))
357	return bt
358
359	def findDigilibUrl(self):
360	"""try to get the digilib URL from zogilib"""
361	url = self.template.zogilib.getDLBaseUrl()
362	return url
363
364	def getDocumentViewerURL(self):
365	"""returns the URL of this instance"""
366	return self.absolute_url()
367
368	def getStyle(self, idx, selected, style=""):
369	"""returns a string with the given style and append 'sel' if path == selected."""
370	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
371	if idx == selected:
372	return style + 'sel'
373	else:
374	return style
375
376	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
377	"""returns URL to documentviewer with parameter param set to val or from dict params"""
378	# copy existing request params
379	urlParams=self.REQUEST.form.copy()
380	# change single param
381	if param is not None:
382	if val is None:
383	if urlParams.has_key(param):
384	del urlParams[param]
385	else:
386	urlParams[param] = str(val)
387
388	# change more params
389	if params is not None:
390	for k in params.keys():
391	v = params[k]
392	if v is None:
393	# val=None removes param
394	if urlParams.has_key(k):
395	del urlParams[k]
396
397	else:
398	urlParams[k] = v
399
400	# FIXME: does this belong here?
401	if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
402	urlParams["mode"] = "imagepath"
403	urlParams["url"] = getParentDir(urlParams["url"])
404
405	# quote values and assemble into query string (not escaping '/')
406	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
407	#ps = urllib.urlencode(urlParams)
408	if baseUrl is None:
409	baseUrl = self.REQUEST['URL1']
410
411	url = "%s?%s"%(baseUrl, ps)
412	return url
413
414
415	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
416	"""link to documentviewer with parameter param set to val"""
417	return self.getLink(param, val, params, baseUrl, '&')
418
419	def getInfo_xml(self,url,mode):
420	"""returns info about the document as XML"""
421
422	if not self.digilibBaseUrl:
423	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
424
425	docinfo = self.getDocinfo(mode=mode,url=url)
426	pt = getattr(self.template, 'info_xml')
427	return pt(docinfo=docinfo)
428
429	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
430	"""returns new option state"""
431	if not self.REQUEST.SESSION.has_key(optionName):
432	# not in session -- initial
433	opt = {'lastState': newState, 'state': initialState}
434	else:
435	opt = self.REQUEST.SESSION.get(optionName)
436	if opt['lastState'] != newState:
437	# state in session has changed -- toggle
438	opt['state'] = not opt['state']
439	opt['lastState'] = newState
440
441	self.REQUEST.SESSION[optionName] = opt
442	return opt['state']
443
444	def isAccessible(self, docinfo):
445	"""returns if access to the resource is granted"""
446	access = docinfo.get('accessType', None)
447	logging.debug("documentViewer (accessOK) access type %s"%access)
448	if access is not None and access == 'free':
449	logging.debug("documentViewer (accessOK) access is free")
450	return True
451	elif access is None or access in self.authgroups:
452	# only local access -- only logged in users
453	user = getSecurityManager().getUser()
454	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
455	if user is not None:
456	#print "user: ", user
457	return (user.getUserName() != "Anonymous User")
458	else:
459	return False
460
461	logging.error("documentViewer (accessOK) unknown access type %s"%access)
462	return False
463
464
465	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
466	"""gibt param von dlInfo aus"""
467	if docinfo is None:
468	docinfo = {}
469
470	for x in range(cut):
471
472	path=getParentDir(path)
473
474	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
475
476	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
477
478	txt = getHttpData(infoUrl)
479	if txt is None:
480	raise IOError("Unable to get dir-info from %s"%(infoUrl))
481
482	dom = Parse(txt)
483	sizes=dom.xpath("//dir/size")
484	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
485
486	if sizes:
487	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
488	else:
489	docinfo['numPages'] = 0
490
491	# TODO: produce and keep list of image names and numbers
492
493	return docinfo
494
495	def getIndexMetaPath(self,url):
496	"""gib nur den Pfad zurueck"""
497	regexp = re.compile(r".(experimental\|permanent)/(.)")
498	regpath = regexp.match(url)
499	if (regpath==None):
500	return ""
501	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
502	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
503
504
505
506	def getIndexMetaUrl(self,url):
507	"""returns utr of index.meta document at url"""
508
509	metaUrl = None
510	if url.startswith("http://"):
511	# real URL
512	metaUrl = url
513	else:
514	# online path
515	server=self.digilibBaseUrl+"/servlet/Texter?fn="
516	metaUrl=server+url.replace("/mpiwg/online","")
517	if not metaUrl.endswith("index.meta"):
518	metaUrl += "/index.meta"
519
520	return metaUrl
521
522	def getDomFromIndexMeta(self, url):
523	"""get dom from index meta"""
524	dom = None
525	metaUrl = self.getIndexMetaUrl(url)
526
527	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
528	txt=getHttpData(metaUrl)
529	if txt is None:
530	raise IOError("Unable to read index meta from %s"%(url))
531
532	dom = Parse(txt)
533	return dom
534
535	def getPresentationInfoXML(self, url):
536	"""returns dom of info.xml document at url"""
537	dom = None
538	metaUrl = None
539	if url.startswith("http://"):
540	# real URL
541	metaUrl = url
542	else:
543	# online path
544	server=self.digilibBaseUrl+"/servlet/Texter?fn="
545	metaUrl=server+url.replace("/mpiwg/online","")
546
547	txt=getHttpData(metaUrl)
548	if txt is None:
549	raise IOError("Unable to read infoXMLfrom %s"%(url))
550
551	dom = Parse(txt)
552	return dom
553
554
555	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
556	"""gets authorization info from the index.meta file at path or given by dom"""
557	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
558
559	access = None
560
561	if docinfo is None:
562	docinfo = {}
563
564	if dom is None:
565	for x in range(cut):
566	path=getParentDir(path)
567	dom = self.getDomFromIndexMeta(path)
568
569	acctype = dom.xpath("//access-conditions/access/@type")
570	if acctype and (len(acctype)>0):
571	access=acctype[0].value
572	if access in ['group', 'institution']:
573	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
574
575	docinfo['accessType'] = access
576	return docinfo
577
578
579	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
580	"""gets bibliographical info from the index.meta file at path or given by dom"""
581	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
582
583	if docinfo is None:
584	docinfo = {}
585
586	if dom is None:
587	for x in range(cut):
588	path=getParentDir(path)
589	dom = self.getDomFromIndexMeta(path)
590
591	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
592
593	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
594	# put in all raw bib fields as dict "bib"
595	bib = dom.xpath("//bib/*")
596	if bib and len(bib)>0:
597	bibinfo = {}
598	for e in bib:
599	bibinfo[e.localName] = getTextFromNode(e)
600	docinfo['bib'] = bibinfo
601
602	# extract some fields (author, title, year) according to their mapping
603	metaData=self.metadata.main.meta.bib
604	bibtype=dom.xpath("//bib/@type")
605	if bibtype and (len(bibtype)>0):
606	bibtype=bibtype[0].value
607	else:
608	bibtype="generic"
609
610	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
611	docinfo['bib_type'] = bibtype
612	bibmap=metaData.generateMappingForType(bibtype)
613	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
614	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
615	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
616	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
617	try:
618	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
619	except: pass
620	try:
621	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
622	except: pass
623	try:
624	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
625	except: pass
626	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
627	try:
628	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
629	except:
630	docinfo['lang']=''
631
632	return docinfo
633
634
635	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
636	"""gets name info from the index.meta file at path or given by dom"""
637	if docinfo is None:
638	docinfo = {}
639
640	if dom is None:
641	for x in range(cut):
642	path=getParentDir(path)
643	dom = self.getDomFromIndexMeta(path)
644
645	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
646	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
647	return docinfo
648
649	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
650	"""parse texttool tag in index meta"""
651	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
652	if docinfo is None:
653	docinfo = {}
654	if docinfo.get('lang', None) is None:
655	docinfo['lang'] = '' # default keine Sprache gesetzt
656	if dom is None:
657	dom = self.getDomFromIndexMeta(url)
658
659	archivePath = None
660	archiveName = None
661
662	archiveNames = dom.xpath("//resource/name")
663	if archiveNames and (len(archiveNames) > 0):
664	archiveName = getTextFromNode(archiveNames[0])
665	else:
666	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
667
668	archivePaths = dom.xpath("//resource/archive-path")
669	if archivePaths and (len(archivePaths) > 0):
670	archivePath = getTextFromNode(archivePaths[0])
671	# clean up archive path
672	if archivePath[0] != '/':
673	archivePath = '/' + archivePath
674	if archiveName and (not archivePath.endswith(archiveName)):
675	archivePath += "/" + archiveName
676	else:
677	# try to get archive-path from url
678	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
679	if (not url.startswith('http')):
680	archivePath = url.replace('index.meta', '')
681
682	if archivePath is None:
683	# we balk without archive-path
684	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
685
686	imageDirs = dom.xpath("//texttool/image")
687	if imageDirs and (len(imageDirs) > 0):
688	imageDir = getTextFromNode(imageDirs[0])
689
690	else:
691	# we balk with no image tag / not necessary anymore because textmode is now standard
692	#raise IOError("No text-tool info in %s"%(url))
693	imageDir = ""
694	#xquery="//pb"
695	docinfo['imagePath'] = "" # keine Bilder
696	docinfo['imageURL'] = ""
697
698	if imageDir and archivePath:
699	#print "image: ", imageDir, " archivepath: ", archivePath
700	imageDir = os.path.join(archivePath, imageDir)
701	imageDir = imageDir.replace("/mpiwg/online", '')
702	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
703	docinfo['imagePath'] = imageDir
704
705	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
706
707	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
708	if viewerUrls and (len(viewerUrls) > 0):
709	viewerUrl = getTextFromNode(viewerUrls[0])
710	docinfo['viewerURL'] = viewerUrl
711
712	# old style text URL
713	textUrls = dom.xpath("//texttool/text")
714	if textUrls and (len(textUrls) > 0):
715	textUrl = getTextFromNode(textUrls[0])
716	if urlparse.urlparse(textUrl)[0] == "": #keine url
717	textUrl = os.path.join(archivePath, textUrl)
718	# fix URLs starting with /mpiwg/online
719	if textUrl.startswith("/mpiwg/online"):
720	textUrl = textUrl.replace("/mpiwg/online", '', 1)
721
722	docinfo['textURL'] = textUrl
723
724	# new style text-url-path
725	textUrls = dom.xpath("//texttool/text-url-path")
726	if textUrls and (len(textUrls) > 0):
727	textUrl = getTextFromNode(textUrls[0])
728	docinfo['textURLPath'] = textUrl
729	#if not docinfo['imagePath']:
730	# text-only, no page images
731	#docinfo = self.getNumTextPages(docinfo)
732
733
734	presentationUrls = dom.xpath("//texttool/presentation")
735	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
736	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
737
738
739	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
740	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
741	# durch den relativen Pfad auf die presentation infos
742	presentationPath = getTextFromNode(presentationUrls[0])
743	if url.endswith("index.meta"):
744	presentationUrl = url.replace('index.meta', presentationPath)
745	else:
746	presentationUrl = url + "/" + presentationPath
747
748	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
749
750	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
751
752	return docinfo
753
754
755	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
756	"""gets the bibliographical information from the preseantion entry in texttools
757	"""
758	dom=self.getPresentationInfoXML(url)
759	try:
760	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
761	except:
762	pass
763	try:
764	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
765	except:
766	pass
767	try:
768	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
769	except:
770	pass
771	return docinfo
772
773	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
774	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
775	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
776	if docinfo is None:
777	docinfo = {}
778	path=path.replace("/mpiwg/online","")
779	docinfo['imagePath'] = path
780	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
781
782	pathorig=path
783	for x in range(cut):
784	path=getParentDir(path)
785	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
786	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
787	docinfo['imageURL'] = imageUrl
788
789	#path ist the path to the images it assumes that the index.meta file is one level higher.
790	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
791	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
792	return docinfo
793
794
795	def getDocinfo(self, mode, url):
796	"""returns docinfo depending on mode"""
797	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
798	# look for cached docinfo in session
799	if self.REQUEST.SESSION.has_key('docinfo'):
800	docinfo = self.REQUEST.SESSION['docinfo']
801	# check if its still current
802	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
803	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
804	return docinfo
805	# new docinfo
806	docinfo = {'mode': mode, 'url': url}
807	if mode=="texttool": #index.meta with texttool information
808	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
809	elif mode=="imagepath":
810	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
811	elif mode=="filepath":
812	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
813	else:
814	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
815	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
816
817	# FIXME: fake texturlpath
818	if not docinfo.has_key('textURLPath'):
819	docinfo['textURLPath'] = None
820
821	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
822	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
823	self.REQUEST.SESSION['docinfo'] = docinfo
824	return docinfo
825
826	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
827	"""returns pageinfo with the given parameters"""
828	pageinfo = {}
829	current = getInt(current)
830
831	pageinfo['current'] = current
832	rows = int(rows or self.thumbrows)
833	pageinfo['rows'] = rows
834	cols = int(cols or self.thumbcols)
835	pageinfo['cols'] = cols
836	grpsize = cols * rows
837	pageinfo['groupsize'] = grpsize
838	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
839	# int(current / grpsize) * grpsize +1))
840	pageinfo['start'] = start
841	pageinfo['end'] = start + grpsize
842	if (docinfo is not None) and ('numPages' in docinfo):
843	np = int(docinfo['numPages'])
844	pageinfo['end'] = min(pageinfo['end'], np)
845	pageinfo['numgroups'] = int(np / grpsize)
846	if np % grpsize > 0:
847	pageinfo['numgroups'] += 1
848	pageinfo['viewMode'] = viewMode
849	pageinfo['tocMode'] = tocMode
850	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
851	pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','')
852	pageinfo['query'] = self.REQUEST.get('query','')
853	pageinfo['queryType'] = self.REQUEST.get('queryType','')
854	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
855	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
856	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
857	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
858	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
859	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
860
861	toc = int (pageinfo['tocPN'])
862	pageinfo['textPages'] =int (toc)
863
864
865
866	if 'tocSize_%s'%tocMode in docinfo:
867	tocSize = int(docinfo['tocSize_%s'%tocMode])
868	tocPageSize = int(pageinfo['tocPageSize'])
869	# cached toc
870	if tocSize%tocPageSize>0:
871	tocPages=tocSize/tocPageSize+1
872	else:
873	tocPages=tocSize/tocPageSize
874	pageinfo['tocPN'] = min (tocPages,toc)
875	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
876	pageinfo['sn'] =self.REQUEST.get('sn','')
877	return pageinfo
878
879	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
880	"""init document viewer"""
881	self.title=title
882	self.digilibBaseUrl = digilibBaseUrl
883	self.thumbrows = thumbrows
884	self.thumbcols = thumbcols
885	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
886	if RESPONSE is not None:
887	RESPONSE.redirect('manage_main')
888
889	def manage_AddDocumentViewerForm(self):
890	"""add the viewer form"""
891	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
892	return pt()
893
894	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
895	"""add the viewer"""
896	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
897	self._setObject(id,newObj)
898
899	if RESPONSE is not None:
900	RESPONSE.redirect('manage_main')
901
902	## DocumentViewerTemplate class
903	class DocumentViewerTemplate(ZopePageTemplate):
904	"""Template for document viewer"""
905	meta_type="DocumentViewer Template"
906
907
908	def manage_addDocumentViewerTemplateForm(self):
909	"""Form for adding"""
910	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
911	return pt()
912
913	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
914	REQUEST=None, submit=None):
915	"Add a Page Template with optional file content."
916
917	self._setObject(id, DocumentViewerTemplate(id))
918	ob = getattr(self, id)
919	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
920	logging.info("txt %s:"%txt)
921	ob.pt_edit(txt,"text/html")
922	if title:
923	ob.pt_setTitle(title)
924	try:
925	u = self.DestinationURL()
926	except AttributeError:
927	u = REQUEST['URL1']
928
929	u = "%s/%s" % (u, urllib.quote(id))
930	REQUEST.RESPONSE.redirect(u+'/manage_main')
931	return ''
932
933
934

Note: See TracBrowser for help on using the repository browser.

Download in other formats: