Context Navigation

source: documentViewer/documentViewer.py @ 435:94baa8dbc7e8

Last change on this file since 435:94baa8dbc7e8 was 435:94baa8dbc7e8, checked in by abukhman, 13 years ago
* empty log message *
File size: 37.9 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21	import string
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding="utf-8"):
47	"""returns a string containing node as XML"""
48	stream = cStringIO.StringIO()
49	logging.debug("BUF: %s"%(stream))
50	Ft.Xml.Domlette.PrettyPrint(node, stream=stream, encoding=encoding)
51	s = stream.getvalue()
52	logging.debug("BUF: %s"%(s))
53	stream.close()
54	return s
55
56	def browserCheck(self):
57	"""check the browsers request to find out the browser type"""
58	bt = {}
59	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
60	bt['ua'] = ua
61	bt['isIE'] = False
62	bt['isN4'] = False
63	bt['versFirefox']=""
64	bt['versIE']=""
65	bt['versSafariChrome']=""
66	bt['versOpera']=""
67
68	if string.find(ua, 'MSIE') > -1:
69	bt['isIE'] = True
70	else:
71	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
72	# Safari oder Chrome identification
73	try:
74	nav = ua[string.find(ua, '('):]
75	nav1=ua[string.find(ua,')'):]
76	nav2=nav1[string.find(nav1,'('):]
77	nav3=nav2[string.find(nav2,')'):]
78	ie = string.split(nav, "; ")[1]
79	ie1 =string.split(nav1, " ")[2]
80	ie2 =string.split(nav3, " ")[1]
81	ie3 =string.split(nav3, " ")[2]
82	if string.find(ie3, "Safari") >-1:
83	bt['versSafariChrome']=string.split(ie2, "/")[1]
84	except: pass
85	# IE identification
86	try:
87	nav = ua[string.find(ua, '('):]
88	ie = string.split(nav, "; ")[1]
89	if string.find(ie, "MSIE") > -1:
90	bt['versIE'] = string.split(ie, " ")[1]
91	except:pass
92	# Firefox identification
93	try:
94	nav = ua[string.find(ua, '('):]
95	nav1=ua[string.find(ua,')'):]
96	if string.find(ie1, "Firefox") >-1:
97	nav5= string.split(ie1, "/")[1]
98	logging.debug("FIREFOX: %s"%(nav5))
99	bt['versFirefox']=nav5[0:3]
100	except:pass
101	#Opera identification
102	try:
103	if string.find(ua,"Opera") >-1:
104	nav = ua[string.find(ua, '('):]
105	nav1=nav[string.find(nav,')'):]
106	bt['versOpera']=string.split(nav1,"/")[2]
107	except:pass
108
109	bt['isMac'] = string.find(ua, 'Macintosh') > -1
110	bt['isWin'] = string.find(ua, 'Windows') > -1
111	bt['isIEWin'] = bt['isIE'] and bt['isWin']
112	bt['isIEMac'] = bt['isIE'] and bt['isMac']
113	bt['staticHTML'] = False
114
115	return bt
116
117
118	def getParentDir(path):
119	"""returns pathname shortened by one"""
120	return '/'.join(path.split('/')[0:-1])
121
122
123	def getHttpData(url, data=None, num_tries=3, timeout=10):
124	"""returns result from url+data HTTP request"""
125	# we do GET (by appending data to url)
126	if isinstance(data, str) or isinstance(data, unicode):
127	# if data is string then append
128	url = "%s?%s"%(url,data)
129	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
130	# urlencode
131	url = "%s?%s"%(url,urllib.urlencode(data))
132
133	response = None
134	errmsg = None
135	for cnt in range(num_tries):
136	try:
137	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
138	if sys.version_info < (2, 6):
139	# set timeout on socket -- ugly :-(
140	import socket
141	socket.setdefaulttimeout(float(timeout))
142	response = urllib2.urlopen(url)
143	else:
144	response = urllib2.urlopen(url,timeout=float(timeout))
145	# check result?
146	break
147	except urllib2.HTTPError, e:
148	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
149	errmsg = str(e)
150	# stop trying
151	break
152	except urllib2.URLError, e:
153	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
154	errmsg = str(e)
155	# stop trying
156	#break
157
158	if response is not None:
159	data = response.read()
160	response.close()
161	return data
162
163	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
164	#return None
165
166	##
167	## documentViewer class
168	##
169	class documentViewer(Folder):
170	"""document viewer"""
171	meta_type="Document viewer"
172
173	security=ClassSecurityInfo()
174	manage_options=Folder.manage_options+(
175	{'label':'main config','action':'changeDocumentViewerForm'},
176	)
177
178	# templates and forms
179	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
180	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
181	toc_text = PageTemplateFile('zpt/toc_text', globals())
182	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
183	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
184	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
185	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
186	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
187	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
188	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
189	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
190	head_main = PageTemplateFile('zpt/head_main', globals())
191	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
192	info_xml = PageTemplateFile('zpt/info_xml', globals())
193
194
195	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
196	security.declareProtected('View management screens','changeDocumentViewerForm')
197	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
198
199
200	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
201	"""init document viewer"""
202	self.id=id
203	self.title=title
204	self.thumbcols = thumbcols
205	self.thumbrows = thumbrows
206	# authgroups is list of authorized groups (delimited by ,)
207	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
208	# create template folder so we can always use template.something
209
210	templateFolder = Folder('template')
211	#self['template'] = templateFolder # Zope-2.12 style
212	self._setObject('template',templateFolder) # old style
213	try:
214	import MpdlXmlTextServer
215	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
216	#templateFolder['fulltextclient'] = xmlRpcClient
217	templateFolder._setObject('fulltextclient',textServer)
218	except Exception, e:
219	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
220	try:
221	from Products.zogiLib.zogiLib import zogiLib
222	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
223	#templateFolder['zogilib'] = zogilib
224	templateFolder._setObject('zogilib',zogilib)
225	except Exception, e:
226	logging.error("Unable to create zogiLib for zogilib: "+str(e))
227
228
229	# proxy text server methods to fulltextclient
230	def getTextPage(self, **args):
231	"""get page"""
232	return self.template.fulltextclient.getTextPage(**args)
233
234	def getQuery(self, **args):
235	"""get query in search"""
236	return self.template.fulltextclient.getQuery(**args)
237
238	def getSearch(self, **args):
239	"""get search"""
240	return self.template.fulltextclient.getSearch(**args)
241
242	def getGisPlaces(self, **args):
243	"""get gis places"""
244	return self.template.fulltextclient.getGisPlaces(**args)
245
246	def getAllGisPlaces(self, **args):
247	"""get all gis places """
248	return self.template.fulltextclient.getAllGisPlaces(**args)
249
250	def getTranslate(self, **args):
251	"""get translate"""
252	return self.template.fulltextclient.getTranslate(**args)
253
254	def getLemma(self, **args):
255	"""get lemma"""
256	return self.template.fulltextclient.getLemma(**args)
257
258	def getToc(self, **args):
259	"""get toc"""
260	return self.template.fulltextclient.getToc(**args)
261
262	def getTocPage(self, **args):
263	"""get tocpage"""
264	return self.template.fulltextclient.getTocPage(**args)
265
266
267	security.declareProtected('View','thumbs_rss')
268	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
269	'''
270	view it
271	@param mode: defines how to access the document behind url
272	@param url: url which contains display information
273	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
274
275	'''
276	logging.debug("HHHHHHHHHHHHHH:load the rss")
277	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
278
279	if not hasattr(self, 'template'):
280	# create template folder if it doesn't exist
281	self.manage_addFolder('template')
282
283	if not self.digilibBaseUrl:
284	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
285
286	docinfo = self.getDocinfo(mode=mode,url=url)
287	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
288	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
289	''' ZDES '''
290	pt = getattr(self.template, 'thumbs_main_rss')
291
292	if viewMode=="auto": # automodus gewaehlt
293	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
294	viewMode="text"
295	else:
296	viewMode="images"
297
298	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
299
300	security.declareProtected('View','index_html')
301	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
302	'''
303	view it
304	@param mode: defines how to access the document behind url
305	@param url: url which contains display information
306	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
307	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
308	@param characterNormalization type of text display (reg, norm, none)
309	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
310	'''
311
312	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
313
314	if not hasattr(self, 'template'):
315	# this won't work
316	logging.error("template folder missing!")
317	return "ERROR: template folder missing!"
318
319	if not getattr(self, 'digilibBaseUrl', None):
320	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
321
322	docinfo = self.getDocinfo(mode=mode,url=url)
323
324	if tocMode != "thumbs":
325	# get table of contents
326	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
327
328	if viewMode=="auto": # automodus gewaehlt
329	if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
330	viewMode="text_dict"
331	else:
332	viewMode="images"
333
334	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
335
336	if (docinfo.get('textURLPath',None)):
337	page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo)
338	pageinfo['textPage'] = page
339	tt = getattr(self, 'template')
340	pt = getattr(tt, 'viewer_main')
341	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
342
343	def generateMarks(self,mk):
344	ret=""
345	if mk is None:
346	return ""
347	if not isinstance(mk, list):
348	mk=[mk]
349	for m in mk:
350	ret+="mk=%s"%m
351	return ret
352
353
354	def getBrowser(self):
355	"""getBrowser the version of browser """
356	bt = browserCheck(self)
357	logging.debug("BROWSER VERSION: %s"%(bt))
358	return bt
359
360	def findDigilibUrl(self):
361	"""try to get the digilib URL from zogilib"""
362	url = self.template.zogilib.getDLBaseUrl()
363	return url
364
365	def getDocumentViewerURL(self):
366	"""returns the URL of this instance"""
367	return self.absolute_url()
368
369	def getStyle(self, idx, selected, style=""):
370	"""returns a string with the given style and append 'sel' if path == selected."""
371	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
372	if idx == selected:
373	return style + 'sel'
374	else:
375	return style
376
377	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
378	"""returns URL to documentviewer with parameter param set to val or from dict params"""
379	# copy existing request params
380	urlParams=self.REQUEST.form.copy()
381	# change single param
382	if param is not None:
383	if val is None:
384	if urlParams.has_key(param):
385	del urlParams[param]
386	else:
387	urlParams[param] = str(val)
388
389	# change more params
390	if params is not None:
391	for k in params.keys():
392	v = params[k]
393	if v is None:
394	# val=None removes param
395	if urlParams.has_key(k):
396	del urlParams[k]
397
398	else:
399	urlParams[k] = v
400
401	# FIXME: does this belong here?
402	if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
403	urlParams["mode"] = "imagepath"
404	urlParams["url"] = getParentDir(urlParams["url"])
405
406	# quote values and assemble into query string (not escaping '/')
407	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
408	#ps = urllib.urlencode(urlParams)
409	if baseUrl is None:
410	baseUrl = self.REQUEST['URL1']
411
412	url = "%s?%s"%(baseUrl, ps)
413	return url
414
415
416	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
417	"""link to documentviewer with parameter param set to val"""
418	return self.getLink(param, val, params, baseUrl, '&')
419
420	def getInfo_xml(self,url,mode):
421	"""returns info about the document as XML"""
422
423	if not self.digilibBaseUrl:
424	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
425
426	docinfo = self.getDocinfo(mode=mode,url=url)
427	pt = getattr(self.template, 'info_xml')
428	return pt(docinfo=docinfo)
429
430	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
431	"""returns new option state"""
432	if not self.REQUEST.SESSION.has_key(optionName):
433	# not in session -- initial
434	opt = {'lastState': newState, 'state': initialState}
435	else:
436	opt = self.REQUEST.SESSION.get(optionName)
437	if opt['lastState'] != newState:
438	# state in session has changed -- toggle
439	opt['state'] = not opt['state']
440	opt['lastState'] = newState
441
442	self.REQUEST.SESSION[optionName] = opt
443	return opt['state']
444
445	def isAccessible(self, docinfo):
446	"""returns if access to the resource is granted"""
447	access = docinfo.get('accessType', None)
448	logging.debug("documentViewer (accessOK) access type %s"%access)
449	if access is not None and access == 'free':
450	logging.debug("documentViewer (accessOK) access is free")
451	return True
452	elif access is None or access in self.authgroups:
453	# only local access -- only logged in users
454	user = getSecurityManager().getUser()
455	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
456	if user is not None:
457	#print "user: ", user
458	return (user.getUserName() != "Anonymous User")
459	else:
460	return False
461
462	logging.error("documentViewer (accessOK) unknown access type %s"%access)
463	return False
464
465
466	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
467	"""gibt param von dlInfo aus"""
468	if docinfo is None:
469	docinfo = {}
470
471	for x in range(cut):
472
473	path=getParentDir(path)
474
475	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
476
477	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
478
479	txt = getHttpData(infoUrl)
480	if txt is None:
481	raise IOError("Unable to get dir-info from %s"%(infoUrl))
482
483	dom = Parse(txt)
484	sizes=dom.xpath("//dir/size")
485	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
486
487	if sizes:
488	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
489	else:
490	docinfo['numPages'] = 0
491
492	# TODO: produce and keep list of image names and numbers
493
494	return docinfo
495
496	def getIndexMetaPath(self,url):
497	"""gib nur den Pfad zurueck"""
498	regexp = re.compile(r".(experimental\|permanent)/(.)")
499	regpath = regexp.match(url)
500	if (regpath==None):
501	return ""
502	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
503	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
504
505
506
507	def getIndexMetaUrl(self,url):
508	"""returns utr of index.meta document at url"""
509
510	metaUrl = None
511	if url.startswith("http://"):
512	# real URL
513	metaUrl = url
514	else:
515	# online path
516	server=self.digilibBaseUrl+"/servlet/Texter?fn="
517	metaUrl=server+url.replace("/mpiwg/online","")
518	if not metaUrl.endswith("index.meta"):
519	metaUrl += "/index.meta"
520
521	return metaUrl
522
523	def getDomFromIndexMeta(self, url):
524	"""get dom from index meta"""
525	dom = None
526	metaUrl = self.getIndexMetaUrl(url)
527
528	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
529	txt=getHttpData(metaUrl)
530	if txt is None:
531	raise IOError("Unable to read index meta from %s"%(url))
532
533	dom = Parse(txt)
534	return dom
535
536	def getPresentationInfoXML(self, url):
537	"""returns dom of info.xml document at url"""
538	dom = None
539	metaUrl = None
540	if url.startswith("http://"):
541	# real URL
542	metaUrl = url
543	else:
544	# online path
545	server=self.digilibBaseUrl+"/servlet/Texter?fn="
546	metaUrl=server+url.replace("/mpiwg/online","")
547
548	txt=getHttpData(metaUrl)
549	if txt is None:
550	raise IOError("Unable to read infoXMLfrom %s"%(url))
551
552	dom = Parse(txt)
553	return dom
554
555
556	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
557	"""gets authorization info from the index.meta file at path or given by dom"""
558	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
559
560	access = None
561
562	if docinfo is None:
563	docinfo = {}
564
565	if dom is None:
566	for x in range(cut):
567	path=getParentDir(path)
568	dom = self.getDomFromIndexMeta(path)
569
570	acctype = dom.xpath("//access-conditions/access/@type")
571	if acctype and (len(acctype)>0):
572	access=acctype[0].value
573	if access in ['group', 'institution']:
574	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
575
576	docinfo['accessType'] = access
577	return docinfo
578
579
580	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
581	"""gets bibliographical info from the index.meta file at path or given by dom"""
582	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
583
584	if docinfo is None:
585	docinfo = {}
586
587	if dom is None:
588	for x in range(cut):
589	path=getParentDir(path)
590	dom = self.getDomFromIndexMeta(path)
591
592	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
593
594	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
595	# put in all raw bib fields as dict "bib"
596	bib = dom.xpath("//bib/*")
597	if bib and len(bib)>0:
598	bibinfo = {}
599	for e in bib:
600	bibinfo[e.localName] = getTextFromNode(e)
601	docinfo['bib'] = bibinfo
602
603	# extract some fields (author, title, year) according to their mapping
604	metaData=self.metadata.main.meta.bib
605	bibtype=dom.xpath("//bib/@type")
606	if bibtype and (len(bibtype)>0):
607	bibtype=bibtype[0].value
608	else:
609	bibtype="generic"
610
611	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
612	docinfo['bib_type'] = bibtype
613	bibmap=metaData.generateMappingForType(bibtype)
614	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
615	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
616	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
617	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
618	try:
619	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
620	except: pass
621	try:
622	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
623	except: pass
624	try:
625	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
626	except: pass
627	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
628	try:
629	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
630	except:
631	docinfo['lang']=''
632	try:
633	docinfo['city']=getTextFromNode(dom.xpath("//bib/city")[0])
634	except:
635	docinfo['city']=''
636	try:
637	docinfo['number_of_pages']=getTextFromNode(dom.xpath("//bib/number_of_pages")[0])
638	except:
639	docinfo['number_of_pages']=''
640	try:
641	docinfo['series_volume']=getTextFromNode(dom.xpath("//bib/series_volume")[0])
642	except:
643	docinfo['series_volume']=''
644	try:
645	docinfo['number_of_volumes']=getTextFromNode(dom.xpath("//bib/number_of_volumes")[0])
646	except:
647	docinfo['number_of_volumes']=''
648	try:
649	docinfo['translator']=getTextFromNode(dom.xpath("//bib/translator")[0])
650	except:
651	docinfo['translator']=''
652	try:
653	docinfo['edition']=getTextFromNode(dom.xpath("//bib/edition")[0])
654	except:
655	docinfo['edition']=''
656	try:
657	docinfo['series_author']=getTextFromNode(dom.xpath("//bib/series_author")[0])
658	except:
659	docinfo['series_author']=''
660	try:
661	docinfo['publisher']=getTextFromNode(dom.xpath("//bib/publisher")[0])
662	except:
663	docinfo['publisher']=''
664	try:
665	docinfo['series_title']=getTextFromNode(dom.xpath("//bib/series_title")[0])
666	except:
667	docinfo['series_title']=''
668	try:
669	docinfo['isbn_issn']=getTextFromNode(dom.xpath("//bib/isbn_issn")[0])
670	except:
671	docinfo['isbn_issn']=''
672	return docinfo
673
674
675	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
676	"""gets name info from the index.meta file at path or given by dom"""
677	if docinfo is None:
678	docinfo = {}
679
680	if dom is None:
681	for x in range(cut):
682	path=getParentDir(path)
683	dom = self.getDomFromIndexMeta(path)
684
685	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
686	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
687	return docinfo
688
689	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
690	"""parse texttool tag in index meta"""
691	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
692	if docinfo is None:
693	docinfo = {}
694	if docinfo.get('lang', None) is None:
695	docinfo['lang'] = '' # default keine Sprache gesetzt
696	if dom is None:
697	dom = self.getDomFromIndexMeta(url)
698
699	archivePath = None
700	archiveName = None
701
702	archiveNames = dom.xpath("//resource/name")
703	if archiveNames and (len(archiveNames) > 0):
704	archiveName = getTextFromNode(archiveNames[0])
705	else:
706	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
707
708	archivePaths = dom.xpath("//resource/archive-path")
709	if archivePaths and (len(archivePaths) > 0):
710	archivePath = getTextFromNode(archivePaths[0])
711	# clean up archive path
712	if archivePath[0] != '/':
713	archivePath = '/' + archivePath
714	if archiveName and (not archivePath.endswith(archiveName)):
715	archivePath += "/" + archiveName
716	else:
717	# try to get archive-path from url
718	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
719	if (not url.startswith('http')):
720	archivePath = url.replace('index.meta', '')
721
722	if archivePath is None:
723	# we balk without archive-path
724	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
725
726	imageDirs = dom.xpath("//texttool/image")
727	if imageDirs and (len(imageDirs) > 0):
728	imageDir = getTextFromNode(imageDirs[0])
729
730	else:
731	# we balk with no image tag / not necessary anymore because textmode is now standard
732	#raise IOError("No text-tool info in %s"%(url))
733	imageDir = ""
734	#xquery="//pb"
735	docinfo['imagePath'] = "" # keine Bilder
736	docinfo['imageURL'] = ""
737
738	if imageDir and archivePath:
739	#print "image: ", imageDir, " archivepath: ", archivePath
740	imageDir = os.path.join(archivePath, imageDir)
741	imageDir = imageDir.replace("/mpiwg/online", '')
742	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
743	docinfo['imagePath'] = imageDir
744
745	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
746
747	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
748	if viewerUrls and (len(viewerUrls) > 0):
749	viewerUrl = getTextFromNode(viewerUrls[0])
750	docinfo['viewerURL'] = viewerUrl
751
752	# old style text URL
753	textUrls = dom.xpath("//texttool/text")
754	if textUrls and (len(textUrls) > 0):
755	textUrl = getTextFromNode(textUrls[0])
756	if urlparse.urlparse(textUrl)[0] == "": #keine url
757	textUrl = os.path.join(archivePath, textUrl)
758	# fix URLs starting with /mpiwg/online
759	if textUrl.startswith("/mpiwg/online"):
760	textUrl = textUrl.replace("/mpiwg/online", '', 1)
761
762	docinfo['textURL'] = textUrl
763
764	# new style text-url-path
765	textUrls = dom.xpath("//texttool/text-url-path")
766	if textUrls and (len(textUrls) > 0):
767	textUrl = getTextFromNode(textUrls[0])
768	docinfo['textURLPath'] = textUrl
769	textUrlkurz = string.split(textUrl, ".")[0]
770	docinfo['textURLPathkurz'] = textUrlkurz
771	#if not docinfo['imagePath']:
772	# text-only, no page images
773	#docinfo = self.getNumTextPages(docinfo)
774
775
776	presentationUrls = dom.xpath("//texttool/presentation")
777	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
778	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
779
780
781	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
782	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
783	# durch den relativen Pfad auf die presentation infos
784	presentationPath = getTextFromNode(presentationUrls[0])
785	if url.endswith("index.meta"):
786	presentationUrl = url.replace('index.meta', presentationPath)
787	else:
788	presentationUrl = url + "/" + presentationPath
789
790	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
791
792	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
793
794	return docinfo
795
796
797	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
798	"""gets the bibliographical information from the preseantion entry in texttools
799	"""
800	dom=self.getPresentationInfoXML(url)
801	try:
802	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
803	except:
804	pass
805	try:
806	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
807	except:
808	pass
809	try:
810	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
811	except:
812	pass
813	return docinfo
814
815	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
816	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
817	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
818	if docinfo is None:
819	docinfo = {}
820	path=path.replace("/mpiwg/online","")
821	docinfo['imagePath'] = path
822	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
823
824	pathorig=path
825	for x in range(cut):
826	path=getParentDir(path)
827	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
828	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
829	docinfo['imageURL'] = imageUrl
830
831	#path ist the path to the images it assumes that the index.meta file is one level higher.
832	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
833	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
834	return docinfo
835
836
837	def getDocinfo(self, mode, url):
838	"""returns docinfo depending on mode"""
839	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
840	# look for cached docinfo in session
841	if self.REQUEST.SESSION.has_key('docinfo'):
842	docinfo = self.REQUEST.SESSION['docinfo']
843	# check if its still current
844	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
845	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
846	return docinfo
847	# new docinfo
848	docinfo = {'mode': mode, 'url': url}
849	if mode=="texttool": #index.meta with texttool information
850	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
851	elif mode=="imagepath":
852	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
853	elif mode=="filepath":
854	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
855	else:
856	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
857	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
858
859	# FIXME: fake texturlpath
860	if not docinfo.has_key('textURLPath'):
861	docinfo['textURLPath'] = None
862
863	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
864	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
865	self.REQUEST.SESSION['docinfo'] = docinfo
866	return docinfo
867
868	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
869	"""returns pageinfo with the given parameters"""
870	pageinfo = {}
871	current = getInt(current)
872
873	pageinfo['current'] = current
874	rows = int(rows or self.thumbrows)
875	pageinfo['rows'] = rows
876	cols = int(cols or self.thumbcols)
877	pageinfo['cols'] = cols
878	grpsize = cols * rows
879	pageinfo['groupsize'] = grpsize
880	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
881	# int(current / grpsize) * grpsize +1))
882	pageinfo['start'] = start
883	pageinfo['end'] = start + grpsize
884	if (docinfo is not None) and ('numPages' in docinfo):
885	np = int(docinfo['numPages'])
886	pageinfo['end'] = min(pageinfo['end'], np)
887	pageinfo['numgroups'] = int(np / grpsize)
888	if np % grpsize > 0:
889	pageinfo['numgroups'] += 1
890	pageinfo['viewMode'] = viewMode
891	pageinfo['tocMode'] = tocMode
892	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
893	pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','')
894	pageinfo['query'] = self.REQUEST.get('query','')
895	pageinfo['queryType'] = self.REQUEST.get('queryType','')
896	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
897	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
898	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
899	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
900	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
901	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
902	toc = int (pageinfo['tocPN'])
903	pageinfo['textPages'] =int (toc)
904
905	if 'tocSize_%s'%tocMode in docinfo:
906	tocSize = int(docinfo['tocSize_%s'%tocMode])
907	tocPageSize = int(pageinfo['tocPageSize'])
908	# cached toc
909	if tocSize%tocPageSize>0:
910	tocPages=tocSize/tocPageSize+1
911	else:
912	tocPages=tocSize/tocPageSize
913	pageinfo['tocPN'] = min (tocPages,toc)
914	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
915	pageinfo['sn'] =self.REQUEST.get('sn','')
916	return pageinfo
917
918	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
919	"""init document viewer"""
920	self.title=title
921	self.digilibBaseUrl = digilibBaseUrl
922	self.thumbrows = thumbrows
923	self.thumbcols = thumbcols
924	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
925	if RESPONSE is not None:
926	RESPONSE.redirect('manage_main')
927
928	def manage_AddDocumentViewerForm(self):
929	"""add the viewer form"""
930	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
931	return pt()
932
933	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
934	"""add the viewer"""
935	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
936	self._setObject(id,newObj)
937
938	if RESPONSE is not None:
939	RESPONSE.redirect('manage_main')
940
941	## DocumentViewerTemplate class
942	class DocumentViewerTemplate(ZopePageTemplate):
943	"""Template for document viewer"""
944	meta_type="DocumentViewer Template"
945
946
947	def manage_addDocumentViewerTemplateForm(self):
948	"""Form for adding"""
949	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
950	return pt()
951
952	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
953	REQUEST=None, submit=None):
954	"Add a Page Template with optional file content."
955
956	self._setObject(id, DocumentViewerTemplate(id))
957	ob = getattr(self, id)
958	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
959	logging.info("txt %s:"%txt)
960	ob.pt_edit(txt,"text/html")
961	if title:
962	ob.pt_setTitle(title)
963	try:
964	u = self.DestinationURL()
965	except AttributeError:
966	u = REQUEST['URL1']
967
968	u = "%s/%s" % (u, urllib.quote(id))
969	REQUEST.RESPONSE.redirect(u+'/manage_main')
970	return ''
971
972
973

Note: See TracBrowser for help on using the repository browser.

Download in other formats: