Context Navigation

source: documentViewer/documentViewer.py @ 430:d6a2125a4b09

Last change on this file since 430:d6a2125a4b09 was 430:d6a2125a4b09, checked in by abukhman, 13 years ago
bibliograpfical info
File size: 38.1 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21	import string
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding='utf-8'):
47	"""returns a string containing node as XML"""
48	buf = cStringIO.StringIO()
49	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
50	s = buf.getvalue()
51	buf.close()
52	return s
53
54	def browserCheck(self):
55	"""check the browsers request to find out the browser type"""
56	bt = {}
57	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
58	bt['ua'] = ua
59	bt['isIE'] = False
60	bt['isN4'] = False
61	bt['versFirefox']=""
62	bt['versIE']=""
63	bt['versSafariChrome']=""
64	bt['versOpera']=""
65
66	if string.find(ua, 'MSIE') > -1:
67	bt['isIE'] = True
68	else:
69	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
70	# Safari oder Chrome identification
71	try:
72	nav = ua[string.find(ua, '('):]
73	nav1=ua[string.find(ua,')'):]
74	nav2=nav1[string.find(nav1,'('):]
75	nav3=nav2[string.find(nav2,')'):]
76	ie = string.split(nav, "; ")[1]
77	ie1 =string.split(nav1, " ")[2]
78	ie2 =string.split(nav3, " ")[1]
79	ie3 =string.split(nav3, " ")[2]
80	if string.find(ie3, "Safari") >-1:
81	bt['versSafariChrome']=string.split(ie2, "/")[1]
82	except: pass
83	# IE identification
84	try:
85	nav = ua[string.find(ua, '('):]
86	ie = string.split(nav, "; ")[1]
87	if string.find(ie, "MSIE") > -1:
88	bt['versIE'] = string.split(ie, " ")[1]
89	except:pass
90	# Firefox identification
91	try:
92	nav = ua[string.find(ua, '('):]
93	nav1=ua[string.find(ua,')'):]
94	if string.find(ie1, "Firefox") >-1:
95	nav5= string.split(ie1, "/")[1]
96	logging.debug("FIREFOX: %s"%(nav5))
97	bt['versFirefox']=nav5[0:3]
98	except:pass
99	#Opera identification
100	try:
101	if string.find(ua,"Opera") >-1:
102	nav = ua[string.find(ua, '('):]
103	nav1=nav[string.find(nav,')'):]
104	bt['versOpera']=string.split(nav1,"/")[2]
105	except:pass
106
107	bt['isMac'] = string.find(ua, 'Macintosh') > -1
108	bt['isWin'] = string.find(ua, 'Windows') > -1
109	bt['isIEWin'] = bt['isIE'] and bt['isWin']
110	bt['isIEMac'] = bt['isIE'] and bt['isMac']
111	bt['staticHTML'] = False
112
113	return bt
114
115
116	def getParentDir(path):
117	"""returns pathname shortened by one"""
118	return '/'.join(path.split('/')[0:-1])
119
120
121	def getHttpData(url, data=None, num_tries=3, timeout=10):
122	"""returns result from url+data HTTP request"""
123	# we do GET (by appending data to url)
124	if isinstance(data, str) or isinstance(data, unicode):
125	# if data is string then append
126	url = "%s?%s"%(url,data)
127	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
128	# urlencode
129	url = "%s?%s"%(url,urllib.urlencode(data))
130
131	response = None
132	errmsg = None
133	for cnt in range(num_tries):
134	try:
135	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
136	if sys.version_info < (2, 6):
137	# set timeout on socket -- ugly :-(
138	import socket
139	socket.setdefaulttimeout(float(timeout))
140	response = urllib2.urlopen(url)
141	else:
142	response = urllib2.urlopen(url,timeout=float(timeout))
143	# check result?
144	break
145	except urllib2.HTTPError, e:
146	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
147	errmsg = str(e)
148	# stop trying
149	break
150	except urllib2.URLError, e:
151	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
152	errmsg = str(e)
153	# stop trying
154	#break
155
156	if response is not None:
157	data = response.read()
158	response.close()
159	return data
160
161	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
162	#return None
163
164
165
166	##
167	## documentViewer class
168	##
169	class documentViewer(Folder):
170	"""document viewer"""
171	meta_type="Document viewer"
172
173	security=ClassSecurityInfo()
174	manage_options=Folder.manage_options+(
175	{'label':'main config','action':'changeDocumentViewerForm'},
176	)
177
178	# templates and forms
179	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
180	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
181	toc_text = PageTemplateFile('zpt/toc_text', globals())
182	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
183	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
184	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
185	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
186	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
187	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
188	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
189	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
190	head_main = PageTemplateFile('zpt/head_main', globals())
191	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
192	info_xml = PageTemplateFile('zpt/info_xml', globals())
193
194
195	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
196	security.declareProtected('View management screens','changeDocumentViewerForm')
197	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
198
199
200	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
201	"""init document viewer"""
202	self.id=id
203	self.title=title
204	self.thumbcols = thumbcols
205	self.thumbrows = thumbrows
206	# authgroups is list of authorized groups (delimited by ,)
207	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
208	# create template folder so we can always use template.something
209
210	templateFolder = Folder('template')
211	#self['template'] = templateFolder # Zope-2.12 style
212	self._setObject('template',templateFolder) # old style
213	try:
214	import MpdlXmlTextServer
215	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
216	#templateFolder['fulltextclient'] = xmlRpcClient
217	templateFolder._setObject('fulltextclient',textServer)
218	except Exception, e:
219	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
220	try:
221	from Products.zogiLib.zogiLib import zogiLib
222	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
223	#templateFolder['zogilib'] = zogilib
224	templateFolder._setObject('zogilib',zogilib)
225	except Exception, e:
226	logging.error("Unable to create zogiLib for zogilib: "+str(e))
227
228
229	# proxy text server methods to fulltextclient
230	def getTextPage(self, **args):
231	"""get page"""
232	return self.template.fulltextclient.getTextPage(**args)
233
234	def getText(self, **args):
235	"""get page text"""
236	return self.template.fulltextclient.getText(**args)
237
238	def getXML(self, **args):
239	"""get page xml"""
240	return self.template.fulltextclient.getXML(**args)
241
242	def getDictionary(self, **args):
243	"""get page dictionary"""
244	return self.template.fulltextclient.getDictionary(**args)
245
246	def getQuery(self, **args):
247	"""get query in search"""
248	return self.template.fulltextclient.getQuery(**args)
249
250	def getSearch(self, **args):
251	"""get search"""
252	return self.template.fulltextclient.getSearch(**args)
253
254	def getGisPlaces(self, **args):
255	"""get gis places"""
256	return self.template.fulltextclient.getGisPlaces(**args)
257
258	def getAllGisPlaces(self, **args):
259	"""get all gis places """
260	return self.template.fulltextclient.getAllGisPlaces(**args)
261
262	def getTranslate(self, **args):
263	"""get translate"""
264	return self.template.fulltextclient.getTranslate(**args)
265
266	def getLemma(self, **args):
267	"""get lemma"""
268	return self.template.fulltextclient.getLemma(**args)
269
270	def getToc(self, **args):
271	"""get toc"""
272	return self.template.fulltextclient.getToc(**args)
273
274	def getTocPage(self, **args):
275	"""get tocpage"""
276	return self.template.fulltextclient.getTocPage(**args)
277
278
279	security.declareProtected('View','thumbs_rss')
280	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
281	'''
282	view it
283	@param mode: defines how to access the document behind url
284	@param url: url which contains display information
285	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
286
287	'''
288	logging.debug("HHHHHHHHHHHHHH:load the rss")
289	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
290
291	if not hasattr(self, 'template'):
292	# create template folder if it doesn't exist
293	self.manage_addFolder('template')
294
295	if not self.digilibBaseUrl:
296	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
297
298	docinfo = self.getDocinfo(mode=mode,url=url)
299	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
300	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
301	''' ZDES '''
302	pt = getattr(self.template, 'thumbs_main_rss')
303
304	if viewMode=="auto": # automodus gewaehlt
305	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
306	viewMode="text"
307	else:
308	viewMode="images"
309
310	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
311
312	security.declareProtected('View','index_html')
313	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
314	'''
315	view it
316	@param mode: defines how to access the document behind url
317	@param url: url which contains display information
318	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
319	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
320	@param characterNormalization type of text display (reg, norm, none)
321	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
322	'''
323
324	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
325
326	if not hasattr(self, 'template'):
327	# this won't work
328	logging.error("template folder missing!")
329	return "ERROR: template folder missing!"
330
331	if not getattr(self, 'digilibBaseUrl', None):
332	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
333
334	docinfo = self.getDocinfo(mode=mode,url=url)
335
336	if tocMode != "thumbs":
337	# get table of contents
338	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
339
340	if viewMode=="auto": # automodus gewaehlt
341	if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
342	viewMode="text_dict"
343	else:
344	viewMode="images"
345
346	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
347
348	if (docinfo.get('textURLPath',None)):
349	page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo)
350	pageinfo['textPage'] = page
351	tt = getattr(self, 'template')
352	pt = getattr(tt, 'viewer_main')
353	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
354
355	def generateMarks(self,mk):
356	ret=""
357	if mk is None:
358	return ""
359	if not isinstance(mk, list):
360	mk=[mk]
361	for m in mk:
362	ret+="mk=%s"%m
363	return ret
364
365
366	def getBrowser(self):
367	"""getBrowser the version of browser """
368	bt = browserCheck(self)
369	logging.debug("BROWSER VERSION: %s"%(bt))
370	return bt
371
372	def findDigilibUrl(self):
373	"""try to get the digilib URL from zogilib"""
374	url = self.template.zogilib.getDLBaseUrl()
375	return url
376
377	def getDocumentViewerURL(self):
378	"""returns the URL of this instance"""
379	return self.absolute_url()
380
381	def getStyle(self, idx, selected, style=""):
382	"""returns a string with the given style and append 'sel' if path == selected."""
383	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
384	if idx == selected:
385	return style + 'sel'
386	else:
387	return style
388
389	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
390	"""returns URL to documentviewer with parameter param set to val or from dict params"""
391	# copy existing request params
392	urlParams=self.REQUEST.form.copy()
393	# change single param
394	if param is not None:
395	if val is None:
396	if urlParams.has_key(param):
397	del urlParams[param]
398	else:
399	urlParams[param] = str(val)
400
401	# change more params
402	if params is not None:
403	for k in params.keys():
404	v = params[k]
405	if v is None:
406	# val=None removes param
407	if urlParams.has_key(k):
408	del urlParams[k]
409
410	else:
411	urlParams[k] = v
412
413	# FIXME: does this belong here?
414	if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
415	urlParams["mode"] = "imagepath"
416	urlParams["url"] = getParentDir(urlParams["url"])
417
418	# quote values and assemble into query string (not escaping '/')
419	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
420	#ps = urllib.urlencode(urlParams)
421	if baseUrl is None:
422	baseUrl = self.REQUEST['URL1']
423
424	url = "%s?%s"%(baseUrl, ps)
425	return url
426
427
428	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
429	"""link to documentviewer with parameter param set to val"""
430	return self.getLink(param, val, params, baseUrl, '&')
431
432	def getInfo_xml(self,url,mode):
433	"""returns info about the document as XML"""
434
435	if not self.digilibBaseUrl:
436	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
437
438	docinfo = self.getDocinfo(mode=mode,url=url)
439	pt = getattr(self.template, 'info_xml')
440	return pt(docinfo=docinfo)
441
442	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
443	"""returns new option state"""
444	if not self.REQUEST.SESSION.has_key(optionName):
445	# not in session -- initial
446	opt = {'lastState': newState, 'state': initialState}
447	else:
448	opt = self.REQUEST.SESSION.get(optionName)
449	if opt['lastState'] != newState:
450	# state in session has changed -- toggle
451	opt['state'] = not opt['state']
452	opt['lastState'] = newState
453
454	self.REQUEST.SESSION[optionName] = opt
455	return opt['state']
456
457	def isAccessible(self, docinfo):
458	"""returns if access to the resource is granted"""
459	access = docinfo.get('accessType', None)
460	logging.debug("documentViewer (accessOK) access type %s"%access)
461	if access is not None and access == 'free':
462	logging.debug("documentViewer (accessOK) access is free")
463	return True
464	elif access is None or access in self.authgroups:
465	# only local access -- only logged in users
466	user = getSecurityManager().getUser()
467	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
468	if user is not None:
469	#print "user: ", user
470	return (user.getUserName() != "Anonymous User")
471	else:
472	return False
473
474	logging.error("documentViewer (accessOK) unknown access type %s"%access)
475	return False
476
477
478	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
479	"""gibt param von dlInfo aus"""
480	if docinfo is None:
481	docinfo = {}
482
483	for x in range(cut):
484
485	path=getParentDir(path)
486
487	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
488
489	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
490
491	txt = getHttpData(infoUrl)
492	if txt is None:
493	raise IOError("Unable to get dir-info from %s"%(infoUrl))
494
495	dom = Parse(txt)
496	sizes=dom.xpath("//dir/size")
497	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
498
499	if sizes:
500	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
501	else:
502	docinfo['numPages'] = 0
503
504	# TODO: produce and keep list of image names and numbers
505
506	return docinfo
507
508	def getIndexMetaPath(self,url):
509	"""gib nur den Pfad zurueck"""
510	regexp = re.compile(r".(experimental\|permanent)/(.)")
511	regpath = regexp.match(url)
512	if (regpath==None):
513	return ""
514	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
515	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
516
517
518
519	def getIndexMetaUrl(self,url):
520	"""returns utr of index.meta document at url"""
521
522	metaUrl = None
523	if url.startswith("http://"):
524	# real URL
525	metaUrl = url
526	else:
527	# online path
528	server=self.digilibBaseUrl+"/servlet/Texter?fn="
529	metaUrl=server+url.replace("/mpiwg/online","")
530	if not metaUrl.endswith("index.meta"):
531	metaUrl += "/index.meta"
532
533	return metaUrl
534
535	def getDomFromIndexMeta(self, url):
536	"""get dom from index meta"""
537	dom = None
538	metaUrl = self.getIndexMetaUrl(url)
539
540	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
541	txt=getHttpData(metaUrl)
542	if txt is None:
543	raise IOError("Unable to read index meta from %s"%(url))
544
545	dom = Parse(txt)
546	return dom
547
548	def getPresentationInfoXML(self, url):
549	"""returns dom of info.xml document at url"""
550	dom = None
551	metaUrl = None
552	if url.startswith("http://"):
553	# real URL
554	metaUrl = url
555	else:
556	# online path
557	server=self.digilibBaseUrl+"/servlet/Texter?fn="
558	metaUrl=server+url.replace("/mpiwg/online","")
559
560	txt=getHttpData(metaUrl)
561	if txt is None:
562	raise IOError("Unable to read infoXMLfrom %s"%(url))
563
564	dom = Parse(txt)
565	return dom
566
567
568	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
569	"""gets authorization info from the index.meta file at path or given by dom"""
570	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
571
572	access = None
573
574	if docinfo is None:
575	docinfo = {}
576
577	if dom is None:
578	for x in range(cut):
579	path=getParentDir(path)
580	dom = self.getDomFromIndexMeta(path)
581
582	acctype = dom.xpath("//access-conditions/access/@type")
583	if acctype and (len(acctype)>0):
584	access=acctype[0].value
585	if access in ['group', 'institution']:
586	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
587
588	docinfo['accessType'] = access
589	return docinfo
590
591
592	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
593	"""gets bibliographical info from the index.meta file at path or given by dom"""
594	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
595
596	if docinfo is None:
597	docinfo = {}
598
599	if dom is None:
600	for x in range(cut):
601	path=getParentDir(path)
602	dom = self.getDomFromIndexMeta(path)
603
604	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
605
606	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
607	# put in all raw bib fields as dict "bib"
608	bib = dom.xpath("//bib/*")
609	if bib and len(bib)>0:
610	bibinfo = {}
611	for e in bib:
612	bibinfo[e.localName] = getTextFromNode(e)
613	docinfo['bib'] = bibinfo
614
615	# extract some fields (author, title, year) according to their mapping
616	metaData=self.metadata.main.meta.bib
617	bibtype=dom.xpath("//bib/@type")
618	if bibtype and (len(bibtype)>0):
619	bibtype=bibtype[0].value
620	else:
621	bibtype="generic"
622
623	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
624	docinfo['bib_type'] = bibtype
625	bibmap=metaData.generateMappingForType(bibtype)
626	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
627	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
628	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
629	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
630	try:
631	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
632	except: pass
633	try:
634	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
635	except: pass
636	try:
637	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
638	except: pass
639	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
640	try:
641	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
642	except:
643	docinfo['lang']=''
644	try:
645	docinfo['city']=getTextFromNode(dom.xpath("//bib/city")[0])
646	except:
647	docinfo['city']=''
648	try:
649	docinfo['number_of_pages']=getTextFromNode(dom.xpath("//bib/number_of_pages")[0])
650	except:
651	docinfo['number_of_pages']=''
652	try:
653	docinfo['series_volume']=getTextFromNode(dom.xpath("//bib/series_volume")[0])
654	except:
655	docinfo['series_volume']=''
656	try:
657	docinfo['number_of_volumes']=getTextFromNode(dom.xpath("//bib/number_of_volumes")[0])
658	except:
659	docinfo['number_of_volumes']=''
660	try:
661	docinfo['translator']=getTextFromNode(dom.xpath("//bib/translator")[0])
662	except:
663	docinfo['translator']=''
664	try:
665	docinfo['edition']=getTextFromNode(dom.xpath("//bib/edition")[0])
666	except:
667	docinfo['edition']=''
668	try:
669	docinfo['series_author']=getTextFromNode(dom.xpath("//bib/series_author")[0])
670	except:
671	docinfo['series_author']=''
672	try:
673	docinfo['publisher']=getTextFromNode(dom.xpath("//bib/publisher")[0])
674	except:
675	docinfo['publisher']=''
676	try:
677	docinfo['series_title']=getTextFromNode(dom.xpath("//bib/series_title")[0])
678	except:
679	docinfo['series_title']=''
680	try:
681	docinfo['isbn_issn']=getTextFromNode(dom.xpath("//bib/isbn_issn")[0])
682	except:
683	docinfo['isbn_issn']=''
684	return docinfo
685
686
687	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
688	"""gets name info from the index.meta file at path or given by dom"""
689	if docinfo is None:
690	docinfo = {}
691
692	if dom is None:
693	for x in range(cut):
694	path=getParentDir(path)
695	dom = self.getDomFromIndexMeta(path)
696
697	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
698	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
699	return docinfo
700
701	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
702	"""parse texttool tag in index meta"""
703	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
704	if docinfo is None:
705	docinfo = {}
706	if docinfo.get('lang', None) is None:
707	docinfo['lang'] = '' # default keine Sprache gesetzt
708	if dom is None:
709	dom = self.getDomFromIndexMeta(url)
710
711	archivePath = None
712	archiveName = None
713
714	archiveNames = dom.xpath("//resource/name")
715	if archiveNames and (len(archiveNames) > 0):
716	archiveName = getTextFromNode(archiveNames[0])
717	else:
718	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
719
720	archivePaths = dom.xpath("//resource/archive-path")
721	if archivePaths and (len(archivePaths) > 0):
722	archivePath = getTextFromNode(archivePaths[0])
723	# clean up archive path
724	if archivePath[0] != '/':
725	archivePath = '/' + archivePath
726	if archiveName and (not archivePath.endswith(archiveName)):
727	archivePath += "/" + archiveName
728	else:
729	# try to get archive-path from url
730	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
731	if (not url.startswith('http')):
732	archivePath = url.replace('index.meta', '')
733
734	if archivePath is None:
735	# we balk without archive-path
736	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
737
738	imageDirs = dom.xpath("//texttool/image")
739	if imageDirs and (len(imageDirs) > 0):
740	imageDir = getTextFromNode(imageDirs[0])
741
742	else:
743	# we balk with no image tag / not necessary anymore because textmode is now standard
744	#raise IOError("No text-tool info in %s"%(url))
745	imageDir = ""
746	#xquery="//pb"
747	docinfo['imagePath'] = "" # keine Bilder
748	docinfo['imageURL'] = ""
749
750	if imageDir and archivePath:
751	#print "image: ", imageDir, " archivepath: ", archivePath
752	imageDir = os.path.join(archivePath, imageDir)
753	imageDir = imageDir.replace("/mpiwg/online", '')
754	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
755	docinfo['imagePath'] = imageDir
756
757	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
758
759	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
760	if viewerUrls and (len(viewerUrls) > 0):
761	viewerUrl = getTextFromNode(viewerUrls[0])
762	docinfo['viewerURL'] = viewerUrl
763
764	# old style text URL
765	textUrls = dom.xpath("//texttool/text")
766	if textUrls and (len(textUrls) > 0):
767	textUrl = getTextFromNode(textUrls[0])
768	if urlparse.urlparse(textUrl)[0] == "": #keine url
769	textUrl = os.path.join(archivePath, textUrl)
770	# fix URLs starting with /mpiwg/online
771	if textUrl.startswith("/mpiwg/online"):
772	textUrl = textUrl.replace("/mpiwg/online", '', 1)
773
774	docinfo['textURL'] = textUrl
775
776	# new style text-url-path
777	textUrls = dom.xpath("//texttool/text-url-path")
778	if textUrls and (len(textUrls) > 0):
779	textUrl = getTextFromNode(textUrls[0])
780	docinfo['textURLPath'] = textUrl
781	#if not docinfo['imagePath']:
782	# text-only, no page images
783	#docinfo = self.getNumTextPages(docinfo)
784
785
786	presentationUrls = dom.xpath("//texttool/presentation")
787	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
788	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
789
790
791	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
792	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
793	# durch den relativen Pfad auf die presentation infos
794	presentationPath = getTextFromNode(presentationUrls[0])
795	if url.endswith("index.meta"):
796	presentationUrl = url.replace('index.meta', presentationPath)
797	else:
798	presentationUrl = url + "/" + presentationPath
799
800	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
801
802	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
803
804	return docinfo
805
806
807	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
808	"""gets the bibliographical information from the preseantion entry in texttools
809	"""
810	dom=self.getPresentationInfoXML(url)
811	try:
812	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
813	except:
814	pass
815	try:
816	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
817	except:
818	pass
819	try:
820	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
821	except:
822	pass
823	return docinfo
824
825	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
826	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
827	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
828	if docinfo is None:
829	docinfo = {}
830	path=path.replace("/mpiwg/online","")
831	docinfo['imagePath'] = path
832	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
833
834	pathorig=path
835	for x in range(cut):
836	path=getParentDir(path)
837	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
838	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
839	docinfo['imageURL'] = imageUrl
840
841	#path ist the path to the images it assumes that the index.meta file is one level higher.
842	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
843	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
844	return docinfo
845
846
847	def getDocinfo(self, mode, url):
848	"""returns docinfo depending on mode"""
849	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
850	# look for cached docinfo in session
851	if self.REQUEST.SESSION.has_key('docinfo'):
852	docinfo = self.REQUEST.SESSION['docinfo']
853	# check if its still current
854	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
855	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
856	return docinfo
857	# new docinfo
858	docinfo = {'mode': mode, 'url': url}
859	if mode=="texttool": #index.meta with texttool information
860	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
861	elif mode=="imagepath":
862	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
863	elif mode=="filepath":
864	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
865	else:
866	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
867	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
868
869	# FIXME: fake texturlpath
870	if not docinfo.has_key('textURLPath'):
871	docinfo['textURLPath'] = None
872
873	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
874	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
875	self.REQUEST.SESSION['docinfo'] = docinfo
876	return docinfo
877
878	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
879	"""returns pageinfo with the given parameters"""
880	pageinfo = {}
881	current = getInt(current)
882
883	pageinfo['current'] = current
884	rows = int(rows or self.thumbrows)
885	pageinfo['rows'] = rows
886	cols = int(cols or self.thumbcols)
887	pageinfo['cols'] = cols
888	grpsize = cols * rows
889	pageinfo['groupsize'] = grpsize
890	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
891	# int(current / grpsize) * grpsize +1))
892	pageinfo['start'] = start
893	pageinfo['end'] = start + grpsize
894	if (docinfo is not None) and ('numPages' in docinfo):
895	np = int(docinfo['numPages'])
896	pageinfo['end'] = min(pageinfo['end'], np)
897	pageinfo['numgroups'] = int(np / grpsize)
898	if np % grpsize > 0:
899	pageinfo['numgroups'] += 1
900	pageinfo['viewMode'] = viewMode
901	pageinfo['tocMode'] = tocMode
902	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
903	pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','')
904	pageinfo['query'] = self.REQUEST.get('query','')
905	pageinfo['queryType'] = self.REQUEST.get('queryType','')
906	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
907	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
908	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
909	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
910	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
911	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
912
913	toc = int (pageinfo['tocPN'])
914	pageinfo['textPages'] =int (toc)
915
916
917
918	if 'tocSize_%s'%tocMode in docinfo:
919	tocSize = int(docinfo['tocSize_%s'%tocMode])
920	tocPageSize = int(pageinfo['tocPageSize'])
921	# cached toc
922	if tocSize%tocPageSize>0:
923	tocPages=tocSize/tocPageSize+1
924	else:
925	tocPages=tocSize/tocPageSize
926	pageinfo['tocPN'] = min (tocPages,toc)
927	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
928	pageinfo['sn'] =self.REQUEST.get('sn','')
929	return pageinfo
930
931	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
932	"""init document viewer"""
933	self.title=title
934	self.digilibBaseUrl = digilibBaseUrl
935	self.thumbrows = thumbrows
936	self.thumbcols = thumbcols
937	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
938	if RESPONSE is not None:
939	RESPONSE.redirect('manage_main')
940
941	def manage_AddDocumentViewerForm(self):
942	"""add the viewer form"""
943	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
944	return pt()
945
946	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
947	"""add the viewer"""
948	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
949	self._setObject(id,newObj)
950
951	if RESPONSE is not None:
952	RESPONSE.redirect('manage_main')
953
954	## DocumentViewerTemplate class
955	class DocumentViewerTemplate(ZopePageTemplate):
956	"""Template for document viewer"""
957	meta_type="DocumentViewer Template"
958
959
960	def manage_addDocumentViewerTemplateForm(self):
961	"""Form for adding"""
962	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
963	return pt()
964
965	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
966	REQUEST=None, submit=None):
967	"Add a Page Template with optional file content."
968
969	self._setObject(id, DocumentViewerTemplate(id))
970	ob = getattr(self, id)
971	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
972	logging.info("txt %s:"%txt)
973	ob.pt_edit(txt,"text/html")
974	if title:
975	ob.pt_setTitle(title)
976	try:
977	u = self.DestinationURL()
978	except AttributeError:
979	u = REQUEST['URL1']
980
981	u = "%s/%s" % (u, urllib.quote(id))
982	REQUEST.RESPONSE.redirect(u+'/manage_main')
983	return ''
984
985
986

Note: See TracBrowser for help on using the repository browser.

Download in other formats: