Context Navigation

source: documentViewer/documentViewer.py @ 445:93b9a5dddb8b

Root_elementtree

Last change on this file since 445:93b9a5dddb8b was 445:93b9a5dddb8b, checked in by abukhman, 13 years ago
* empty log message *
File size: 38.4 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21	import string
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding="utf-8"):
47	"""returns a string containing node as XML"""
48	stream = cStringIO.StringIO()
49	#logging.debug("BUF: %s"%(stream))
50	Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
51	s = stream.getvalue()
52	#logging.debug("BUF: %s"%(s))
53	stream.close()
54	return s
55
56	def browserCheck(self):
57	"""check the browsers request to find out the browser type"""
58	bt = {}
59	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
60	bt['ua'] = ua
61	bt['isIE'] = False
62	bt['isN4'] = False
63	bt['versFirefox']=""
64	bt['versIE']=""
65	bt['versSafariChrome']=""
66	bt['versOpera']=""
67
68	if string.find(ua, 'MSIE') > -1:
69	bt['isIE'] = True
70	else:
71	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
72	# Safari oder Chrome identification
73	try:
74	nav = ua[string.find(ua, '('):]
75	nav1=ua[string.find(ua,')'):]
76	nav2=nav1[string.find(nav1,'('):]
77	nav3=nav2[string.find(nav2,')'):]
78	ie = string.split(nav, "; ")[1]
79	ie1 =string.split(nav1, " ")[2]
80	ie2 =string.split(nav3, " ")[1]
81	ie3 =string.split(nav3, " ")[2]
82	if string.find(ie3, "Safari") >-1:
83	bt['versSafariChrome']=string.split(ie2, "/")[1]
84	except: pass
85	# IE identification
86	try:
87	nav = ua[string.find(ua, '('):]
88	ie = string.split(nav, "; ")[1]
89	if string.find(ie, "MSIE") > -1:
90	bt['versIE'] = string.split(ie, " ")[1]
91	except:pass
92	# Firefox identification
93	try:
94	nav = ua[string.find(ua, '('):]
95	nav1=ua[string.find(ua,')'):]
96	if string.find(ie1, "Firefox") >-1:
97	nav5= string.split(ie1, "/")[1]
98	logging.debug("FIREFOX: %s"%(nav5))
99	bt['versFirefox']=nav5[0:3]
100	except:pass
101	#Opera identification
102	try:
103	if string.find(ua,"Opera") >-1:
104	nav = ua[string.find(ua, '('):]
105	nav1=nav[string.find(nav,')'):]
106	bt['versOpera']=string.split(nav1,"/")[2]
107	except:pass
108
109	bt['isMac'] = string.find(ua, 'Macintosh') > -1
110	bt['isWin'] = string.find(ua, 'Windows') > -1
111	bt['isIEWin'] = bt['isIE'] and bt['isWin']
112	bt['isIEMac'] = bt['isIE'] and bt['isMac']
113	bt['staticHTML'] = False
114
115	return bt
116
117
118	def getParentDir(path):
119	"""returns pathname shortened by one"""
120	return '/'.join(path.split('/')[0:-1])
121
122
123	def getHttpData(url, data=None, num_tries=3, timeout=10):
124	"""returns result from url+data HTTP request"""
125	# we do GET (by appending data to url)
126	if isinstance(data, str) or isinstance(data, unicode):
127	# if data is string then append
128	url = "%s?%s"%(url,data)
129	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
130	# urlencode
131	url = "%s?%s"%(url,urllib.urlencode(data))
132
133	response = None
134	errmsg = None
135	for cnt in range(num_tries):
136	try:
137	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
138	if sys.version_info < (2, 6):
139	# set timeout on socket -- ugly :-(
140	import socket
141	socket.setdefaulttimeout(float(timeout))
142	response = urllib2.urlopen(url)
143	else:
144	response = urllib2.urlopen(url,timeout=float(timeout))
145	# check result?
146	break
147	except urllib2.HTTPError, e:
148	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
149	errmsg = str(e)
150	# stop trying
151	break
152	except urllib2.URLError, e:
153	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
154	errmsg = str(e)
155	# stop trying
156	#break
157
158	if response is not None:
159	data = response.read()
160	response.close()
161	return data
162
163	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
164	#return None
165
166	##
167	## documentViewer class
168	##
169	class documentViewer(Folder):
170	"""document viewer"""
171	meta_type="Document viewer"
172
173	security=ClassSecurityInfo()
174	manage_options=Folder.manage_options+(
175	{'label':'main config','action':'changeDocumentViewerForm'},
176	)
177
178	# templates and forms
179	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
180	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
181	toc_text = PageTemplateFile('zpt/toc_text', globals())
182	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
183	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
184	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
185	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
186	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
187	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
188	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
189	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
190	head_main = PageTemplateFile('zpt/head_main', globals())
191	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
192	info_xml = PageTemplateFile('zpt/info_xml', globals())
193
194
195	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
196	security.declareProtected('View management screens','changeDocumentViewerForm')
197	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
198
199
200	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
201	"""init document viewer"""
202	self.id=id
203	self.title=title
204	self.thumbcols = thumbcols
205	self.thumbrows = thumbrows
206	# authgroups is list of authorized groups (delimited by ,)
207	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
208	# create template folder so we can always use template.something
209
210	templateFolder = Folder('template')
211	#self['template'] = templateFolder # Zope-2.12 style
212	self._setObject('template',templateFolder) # old style
213	try:
214	import MpdlXmlTextServer
215	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
216	#templateFolder['fulltextclient'] = xmlRpcClient
217	templateFolder._setObject('fulltextclient',textServer)
218	except Exception, e:
219	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
220	try:
221	from Products.zogiLib.zogiLib import zogiLib
222	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
223	#templateFolder['zogilib'] = zogilib
224	templateFolder._setObject('zogilib',zogilib)
225	except Exception, e:
226	logging.error("Unable to create zogiLib for zogilib: "+str(e))
227
228
229	# proxy text server methods to fulltextclient
230	def getTextPage(self, **args):
231	"""get page"""
232	return self.template.fulltextclient.getTextPage(**args)
233
234	def getOrigPages(self, **args):
235	"""get page"""
236	return self.template.fulltextclient.getOrigPages(**args)
237
238	def getOrigPagesNorm(self, **args):
239	"""get page"""
240	return self.template.fulltextclient.getOrigPagesNorm(**args)
241
242	def getQuery(self, **args):
243	"""get query in search"""
244	return self.template.fulltextclient.getQuery(**args)
245
246	def getSearch(self, **args):
247	"""get search"""
248	return self.template.fulltextclient.getSearch(**args)
249
250	def getGisPlaces(self, **args):
251	"""get gis places"""
252	return self.template.fulltextclient.getGisPlaces(**args)
253
254	def getAllGisPlaces(self, **args):
255	"""get all gis places """
256	return self.template.fulltextclient.getAllGisPlaces(**args)
257
258	def getTranslate(self, **args):
259	"""get translate"""
260	return self.template.fulltextclient.getTranslate(**args)
261
262	def getLemma(self, **args):
263	"""get lemma"""
264	return self.template.fulltextclient.getLemma(**args)
265
266	def getLemmaQuery(self, **args):
267	"""get query"""
268	return self.template.fulltextclient.getLemmaQuery(**args)
269
270	def getLex(self, **args):
271	"""get lex"""
272	return self.template.fulltextclient.getLex(**args)
273
274	def getToc(self, **args):
275	"""get toc"""
276	return self.template.fulltextclient.getToc(**args)
277
278	def getTocPage(self, **args):
279	"""get tocpage"""
280	return self.template.fulltextclient.getTocPage(**args)
281
282
283	security.declareProtected('View','thumbs_rss')
284	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
285	'''
286	view it
287	@param mode: defines how to access the document behind url
288	@param url: url which contains display information
289	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
290
291	'''
292	logging.debug("HHHHHHHHHHHHHH:load the rss")
293	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
294
295	if not hasattr(self, 'template'):
296	# create template folder if it doesn't exist
297	self.manage_addFolder('template')
298
299	if not self.digilibBaseUrl:
300	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
301
302	docinfo = self.getDocinfo(mode=mode,url=url)
303	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
304	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
305	''' ZDES '''
306	pt = getattr(self.template, 'thumbs_main_rss')
307
308	if viewMode=="auto": # automodus gewaehlt
309	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
310	viewMode="text"
311	else:
312	viewMode="images"
313
314	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
315
316	security.declareProtected('View','index_html')
317	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
318	'''
319	view it
320	@param mode: defines how to access the document behind url
321	@param url: url which contains display information
322	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
323	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
324	@param characterNormalization type of text display (reg, norm, none)
325	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
326	'''
327
328	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
329
330	if not hasattr(self, 'template'):
331	# this won't work
332	logging.error("template folder missing!")
333	return "ERROR: template folder missing!"
334
335	if not getattr(self, 'digilibBaseUrl', None):
336	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
337
338	docinfo = self.getDocinfo(mode=mode,url=url)
339
340	if tocMode != "thumbs":
341	# get table of contents
342	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
343
344	if viewMode=="auto": # automodus gewaehlt
345	if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
346	viewMode="text_dict"
347	else:
348	viewMode="images"
349
350	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
351
352	if (docinfo.get('textURLPath',None)):
353	page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo)
354	pageinfo['textPage'] = page
355	tt = getattr(self, 'template')
356	pt = getattr(tt, 'viewer_main')
357	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
358
359	def generateMarks(self,mk):
360	ret=""
361	if mk is None:
362	return ""
363	if not isinstance(mk, list):
364	mk=[mk]
365	for m in mk:
366	ret+="mk=%s"%m
367	return ret
368
369
370	def getBrowser(self):
371	"""getBrowser the version of browser """
372	bt = browserCheck(self)
373	logging.debug("BROWSER VERSION: %s"%(bt))
374	return bt
375
376	def findDigilibUrl(self):
377	"""try to get the digilib URL from zogilib"""
378	url = self.template.zogilib.getDLBaseUrl()
379	return url
380
381	def getDocumentViewerURL(self):
382	"""returns the URL of this instance"""
383	return self.absolute_url()
384
385	def getStyle(self, idx, selected, style=""):
386	"""returns a string with the given style and append 'sel' if path == selected."""
387	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
388	if idx == selected:
389	return style + 'sel'
390	else:
391	return style
392
393	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
394	"""returns URL to documentviewer with parameter param set to val or from dict params"""
395	# copy existing request params
396	urlParams=self.REQUEST.form.copy()
397	# change single param
398	if param is not None:
399	if val is None:
400	if urlParams.has_key(param):
401	del urlParams[param]
402	else:
403	urlParams[param] = str(val)
404
405	# change more params
406	if params is not None:
407	for k in params.keys():
408	v = params[k]
409	if v is None:
410	# val=None removes param
411	if urlParams.has_key(k):
412	del urlParams[k]
413
414	else:
415	urlParams[k] = v
416
417	# FIXME: does this belong here?
418	if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
419	urlParams["mode"] = "imagepath"
420	urlParams["url"] = getParentDir(urlParams["url"])
421
422	# quote values and assemble into query string (not escaping '/')
423	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
424	#ps = urllib.urlencode(urlParams)
425	if baseUrl is None:
426	baseUrl = self.REQUEST['URL1']
427
428	url = "%s?%s"%(baseUrl, ps)
429	return url
430
431
432	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
433	"""link to documentviewer with parameter param set to val"""
434	return self.getLink(param, val, params, baseUrl, '&')
435
436	def getInfo_xml(self,url,mode):
437	"""returns info about the document as XML"""
438
439	if not self.digilibBaseUrl:
440	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
441
442	docinfo = self.getDocinfo(mode=mode,url=url)
443	pt = getattr(self.template, 'info_xml')
444	return pt(docinfo=docinfo)
445
446	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
447	"""returns new option state"""
448	if not self.REQUEST.SESSION.has_key(optionName):
449	# not in session -- initial
450	opt = {'lastState': newState, 'state': initialState}
451	else:
452	opt = self.REQUEST.SESSION.get(optionName)
453	if opt['lastState'] != newState:
454	# state in session has changed -- toggle
455	opt['state'] = not opt['state']
456	opt['lastState'] = newState
457
458	self.REQUEST.SESSION[optionName] = opt
459	return opt['state']
460
461	def isAccessible(self, docinfo):
462	"""returns if access to the resource is granted"""
463	access = docinfo.get('accessType', None)
464	logging.debug("documentViewer (accessOK) access type %s"%access)
465	if access is not None and access == 'free':
466	logging.debug("documentViewer (accessOK) access is free")
467	return True
468	elif access is None or access in self.authgroups:
469	# only local access -- only logged in users
470	user = getSecurityManager().getUser()
471	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
472	if user is not None:
473	#print "user: ", user
474	return (user.getUserName() != "Anonymous User")
475	else:
476	return False
477
478	logging.error("documentViewer (accessOK) unknown access type %s"%access)
479	return False
480
481
482	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
483	"""gibt param von dlInfo aus"""
484	if docinfo is None:
485	docinfo = {}
486
487	for x in range(cut):
488
489	path=getParentDir(path)
490
491	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
492
493	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
494
495	txt = getHttpData(infoUrl)
496	if txt is None:
497	raise IOError("Unable to get dir-info from %s"%(infoUrl))
498
499	dom = Parse(txt)
500	sizes=dom.xpath("//dir/size")
501	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
502
503	if sizes:
504	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
505	else:
506	docinfo['numPages'] = 0
507
508	# TODO: produce and keep list of image names and numbers
509
510	return docinfo
511
512	def getIndexMetaPath(self,url):
513	"""gib nur den Pfad zurueck"""
514	regexp = re.compile(r".(experimental\|permanent)/(.)")
515	regpath = regexp.match(url)
516	if (regpath==None):
517	return ""
518	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
519	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
520
521
522
523	def getIndexMetaUrl(self,url):
524	"""returns utr of index.meta document at url"""
525
526	metaUrl = None
527	if url.startswith("http://"):
528	# real URL
529	metaUrl = url
530	else:
531	# online path
532	server=self.digilibBaseUrl+"/servlet/Texter?fn="
533	metaUrl=server+url.replace("/mpiwg/online","")
534	if not metaUrl.endswith("index.meta"):
535	metaUrl += "/index.meta"
536
537	return metaUrl
538
539	def getDomFromIndexMeta(self, url):
540	"""get dom from index meta"""
541	dom = None
542	metaUrl = self.getIndexMetaUrl(url)
543
544	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
545	txt=getHttpData(metaUrl)
546	if txt is None:
547	raise IOError("Unable to read index meta from %s"%(url))
548
549	dom = Parse(txt)
550	return dom
551
552	def getPresentationInfoXML(self, url):
553	"""returns dom of info.xml document at url"""
554	dom = None
555	metaUrl = None
556	if url.startswith("http://"):
557	# real URL
558	metaUrl = url
559	else:
560	# online path
561	server=self.digilibBaseUrl+"/servlet/Texter?fn="
562	metaUrl=server+url.replace("/mpiwg/online","")
563
564	txt=getHttpData(metaUrl)
565	if txt is None:
566	raise IOError("Unable to read infoXMLfrom %s"%(url))
567
568	dom = Parse(txt)
569	return dom
570
571
572	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
573	"""gets authorization info from the index.meta file at path or given by dom"""
574	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
575
576	access = None
577
578	if docinfo is None:
579	docinfo = {}
580
581	if dom is None:
582	for x in range(cut):
583	path=getParentDir(path)
584	dom = self.getDomFromIndexMeta(path)
585
586	acctype = dom.xpath("//access-conditions/access/@type")
587	if acctype and (len(acctype)>0):
588	access=acctype[0].value
589	if access in ['group', 'institution']:
590	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
591
592	docinfo['accessType'] = access
593	return docinfo
594
595
596	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
597	"""gets bibliographical info from the index.meta file at path or given by dom"""
598	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
599
600	if docinfo is None:
601	docinfo = {}
602
603	if dom is None:
604	for x in range(cut):
605	path=getParentDir(path)
606	dom = self.getDomFromIndexMeta(path)
607
608	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
609
610	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
611	# put in all raw bib fields as dict "bib"
612	bib = dom.xpath("//bib/*")
613	if bib and len(bib)>0:
614	bibinfo = {}
615	for e in bib:
616	bibinfo[e.localName] = getTextFromNode(e)
617	docinfo['bib'] = bibinfo
618
619	# extract some fields (author, title, year) according to their mapping
620	metaData=self.metadata.main.meta.bib
621	bibtype=dom.xpath("//bib/@type")
622	if bibtype and (len(bibtype)>0):
623	bibtype=bibtype[0].value
624	else:
625	bibtype="generic"
626
627	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
628	docinfo['bib_type'] = bibtype
629	bibmap=metaData.generateMappingForType(bibtype)
630	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
631	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
632	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
633	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
634	try:
635	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
636	except: pass
637	try:
638	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
639	except: pass
640	try:
641	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
642	except: pass
643	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
644	try:
645	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
646	except:
647	docinfo['lang']=''
648	try:
649	docinfo['city']=getTextFromNode(dom.xpath("//bib/city")[0])
650	except:
651	docinfo['city']=''
652	try:
653	docinfo['number_of_pages']=getTextFromNode(dom.xpath("//bib/number_of_pages")[0])
654	except:
655	docinfo['number_of_pages']=''
656	try:
657	docinfo['series_volume']=getTextFromNode(dom.xpath("//bib/series_volume")[0])
658	except:
659	docinfo['series_volume']=''
660	try:
661	docinfo['number_of_volumes']=getTextFromNode(dom.xpath("//bib/number_of_volumes")[0])
662	except:
663	docinfo['number_of_volumes']=''
664	try:
665	docinfo['translator']=getTextFromNode(dom.xpath("//bib/translator")[0])
666	except:
667	docinfo['translator']=''
668	try:
669	docinfo['edition']=getTextFromNode(dom.xpath("//bib/edition")[0])
670	except:
671	docinfo['edition']=''
672	try:
673	docinfo['series_author']=getTextFromNode(dom.xpath("//bib/series_author")[0])
674	except:
675	docinfo['series_author']=''
676	try:
677	docinfo['publisher']=getTextFromNode(dom.xpath("//bib/publisher")[0])
678	except:
679	docinfo['publisher']=''
680	try:
681	docinfo['series_title']=getTextFromNode(dom.xpath("//bib/series_title")[0])
682	except:
683	docinfo['series_title']=''
684	try:
685	docinfo['isbn_issn']=getTextFromNode(dom.xpath("//bib/isbn_issn")[0])
686	except:
687	docinfo['isbn_issn']=''
688	return docinfo
689
690
691	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
692	"""gets name info from the index.meta file at path or given by dom"""
693	if docinfo is None:
694	docinfo = {}
695
696	if dom is None:
697	for x in range(cut):
698	path=getParentDir(path)
699	dom = self.getDomFromIndexMeta(path)
700
701	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
702	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
703	return docinfo
704
705	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
706	"""parse texttool tag in index meta"""
707	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
708	if docinfo is None:
709	docinfo = {}
710	if docinfo.get('lang', None) is None:
711	docinfo['lang'] = '' # default keine Sprache gesetzt
712	if dom is None:
713	dom = self.getDomFromIndexMeta(url)
714
715	archivePath = None
716	archiveName = None
717
718	archiveNames = dom.xpath("//resource/name")
719	if archiveNames and (len(archiveNames) > 0):
720	archiveName = getTextFromNode(archiveNames[0])
721	else:
722	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
723
724	archivePaths = dom.xpath("//resource/archive-path")
725	if archivePaths and (len(archivePaths) > 0):
726	archivePath = getTextFromNode(archivePaths[0])
727	# clean up archive path
728	if archivePath[0] != '/':
729	archivePath = '/' + archivePath
730	if archiveName and (not archivePath.endswith(archiveName)):
731	archivePath += "/" + archiveName
732	else:
733	# try to get archive-path from url
734	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
735	if (not url.startswith('http')):
736	archivePath = url.replace('index.meta', '')
737
738	if archivePath is None:
739	# we balk without archive-path
740	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
741
742	imageDirs = dom.xpath("//texttool/image")
743	if imageDirs and (len(imageDirs) > 0):
744	imageDir = getTextFromNode(imageDirs[0])
745
746	else:
747	# we balk with no image tag / not necessary anymore because textmode is now standard
748	#raise IOError("No text-tool info in %s"%(url))
749	imageDir = ""
750	#xquery="//pb"
751	docinfo['imagePath'] = "" # keine Bilder
752	docinfo['imageURL'] = ""
753
754	if imageDir and archivePath:
755	#print "image: ", imageDir, " archivepath: ", archivePath
756	imageDir = os.path.join(archivePath, imageDir)
757	imageDir = imageDir.replace("/mpiwg/online", '')
758	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
759	docinfo['imagePath'] = imageDir
760
761	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
762
763	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
764	if viewerUrls and (len(viewerUrls) > 0):
765	viewerUrl = getTextFromNode(viewerUrls[0])
766	docinfo['viewerURL'] = viewerUrl
767
768	# old style text URL
769	textUrls = dom.xpath("//texttool/text")
770	if textUrls and (len(textUrls) > 0):
771	textUrl = getTextFromNode(textUrls[0])
772	if urlparse.urlparse(textUrl)[0] == "": #keine url
773	textUrl = os.path.join(archivePath, textUrl)
774	# fix URLs starting with /mpiwg/online
775	if textUrl.startswith("/mpiwg/online"):
776	textUrl = textUrl.replace("/mpiwg/online", '', 1)
777
778	docinfo['textURL'] = textUrl
779
780	# new style text-url-path
781	textUrls = dom.xpath("//texttool/text-url-path")
782	if textUrls and (len(textUrls) > 0):
783	textUrl = getTextFromNode(textUrls[0])
784	docinfo['textURLPath'] = textUrl
785	textUrlkurz = string.split(textUrl, ".")[0]
786	docinfo['textURLPathkurz'] = textUrlkurz
787	#if not docinfo['imagePath']:
788	# text-only, no page images
789	#docinfo = self.getNumTextPages(docinfo)
790
791
792	presentationUrls = dom.xpath("//texttool/presentation")
793	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
794	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
795
796
797	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
798	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
799	# durch den relativen Pfad auf die presentation infos
800	presentationPath = getTextFromNode(presentationUrls[0])
801	if url.endswith("index.meta"):
802	presentationUrl = url.replace('index.meta', presentationPath)
803	else:
804	presentationUrl = url + "/" + presentationPath
805
806	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
807
808	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
809
810	return docinfo
811
812
813	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
814	"""gets the bibliographical information from the preseantion entry in texttools
815	"""
816	dom=self.getPresentationInfoXML(url)
817	try:
818	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
819	except:
820	pass
821	try:
822	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
823	except:
824	pass
825	try:
826	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
827	except:
828	pass
829	return docinfo
830
831	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
832	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
833	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
834	if docinfo is None:
835	docinfo = {}
836	path=path.replace("/mpiwg/online","")
837	docinfo['imagePath'] = path
838	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
839
840	pathorig=path
841	for x in range(cut):
842	path=getParentDir(path)
843	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
844	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
845	docinfo['imageURL'] = imageUrl
846
847	#path ist the path to the images it assumes that the index.meta file is one level higher.
848	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
849	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
850	return docinfo
851
852
853	def getDocinfo(self, mode, url):
854	"""returns docinfo depending on mode"""
855	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
856	# look for cached docinfo in session
857	if self.REQUEST.SESSION.has_key('docinfo'):
858	docinfo = self.REQUEST.SESSION['docinfo']
859	# check if its still current
860	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
861	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
862	return docinfo
863	# new docinfo
864	docinfo = {'mode': mode, 'url': url}
865	if mode=="texttool": #index.meta with texttool information
866	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
867	elif mode=="imagepath":
868	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
869	elif mode=="filepath":
870	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
871	else:
872	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
873	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
874
875	# FIXME: fake texturlpath
876	if not docinfo.has_key('textURLPath'):
877	docinfo['textURLPath'] = None
878
879	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
880	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
881	self.REQUEST.SESSION['docinfo'] = docinfo
882	return docinfo
883
884	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
885	"""returns pageinfo with the given parameters"""
886	pageinfo = {}
887	current = getInt(current)
888
889	pageinfo['current'] = current
890	rows = int(rows or self.thumbrows)
891	pageinfo['rows'] = rows
892	cols = int(cols or self.thumbcols)
893	pageinfo['cols'] = cols
894	grpsize = cols * rows
895	pageinfo['groupsize'] = grpsize
896	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
897	# int(current / grpsize) * grpsize +1))
898	pageinfo['start'] = start
899	pageinfo['end'] = start + grpsize
900	if (docinfo is not None) and ('numPages' in docinfo):
901	np = int(docinfo['numPages'])
902	pageinfo['end'] = min(pageinfo['end'], np)
903	pageinfo['numgroups'] = int(np / grpsize)
904	if np % grpsize > 0:
905	pageinfo['numgroups'] += 1
906	pageinfo['viewMode'] = viewMode
907	pageinfo['tocMode'] = tocMode
908	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
909	#pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1')
910	pageinfo['query'] = self.REQUEST.get('query','')
911	pageinfo['queryType'] = self.REQUEST.get('queryType','')
912	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
913	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
914	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
915	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
916	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
917	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
918	toc = int (pageinfo['tocPN'])
919	pageinfo['textPages'] =int (toc)
920
921	if 'tocSize_%s'%tocMode in docinfo:
922	tocSize = int(docinfo['tocSize_%s'%tocMode])
923	tocPageSize = int(pageinfo['tocPageSize'])
924	# cached toc
925	if tocSize%tocPageSize>0:
926	tocPages=tocSize/tocPageSize+1
927	else:
928	tocPages=tocSize/tocPageSize
929	pageinfo['tocPN'] = min (tocPages,toc)
930	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
931	pageinfo['sn'] =self.REQUEST.get('sn','')
932	return pageinfo
933
934	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
935	"""init document viewer"""
936	self.title=title
937	self.digilibBaseUrl = digilibBaseUrl
938	self.thumbrows = thumbrows
939	self.thumbcols = thumbcols
940	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
941	if RESPONSE is not None:
942	RESPONSE.redirect('manage_main')
943
944	def manage_AddDocumentViewerForm(self):
945	"""add the viewer form"""
946	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
947	return pt()
948
949	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
950	"""add the viewer"""
951	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
952	self._setObject(id,newObj)
953
954	if RESPONSE is not None:
955	RESPONSE.redirect('manage_main')
956
957	## DocumentViewerTemplate class
958	class DocumentViewerTemplate(ZopePageTemplate):
959	"""Template for document viewer"""
960	meta_type="DocumentViewer Template"
961
962
963	def manage_addDocumentViewerTemplateForm(self):
964	"""Form for adding"""
965	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
966	return pt()
967
968	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
969	REQUEST=None, submit=None):
970	"Add a Page Template with optional file content."
971
972	self._setObject(id, DocumentViewerTemplate(id))
973	ob = getattr(self, id)
974	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
975	logging.info("txt %s:"%txt)
976	ob.pt_edit(txt,"text/html")
977	if title:
978	ob.pt_setTitle(title)
979	try:
980	u = self.DestinationURL()
981	except AttributeError:
982	u = REQUEST['URL1']
983
984	u = "%s/%s" % (u, urllib.quote(id))
985	REQUEST.RESPONSE.redirect(u+'/manage_main')
986	return ''
987
988
989

Note: See TracBrowser for help on using the repository browser.

Download in other formats: