Context Navigation

source: documentViewer/documentViewer.py @ 436:0baf4e761be7

Last change on this file since 436:0baf4e761be7 was 436:0baf4e761be7, checked in by abukhman, 13 years ago
* empty log message *
File size: 38.2 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21	import string
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding="utf-8"):
47	"""returns a string containing node as XML"""
48	stream = cStringIO.StringIO()
49	#logging.debug("BUF: %s"%(stream))
50	Ft.Xml.Domlette.PrettyPrint(node, stream=stream, encoding=encoding)
51	s = stream.getvalue()
52	#logging.debug("BUF: %s"%(s))
53	stream.close()
54	return s
55
56	def browserCheck(self):
57	"""check the browsers request to find out the browser type"""
58	bt = {}
59	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
60	bt['ua'] = ua
61	bt['isIE'] = False
62	bt['isN4'] = False
63	bt['versFirefox']=""
64	bt['versIE']=""
65	bt['versSafariChrome']=""
66	bt['versOpera']=""
67
68	if string.find(ua, 'MSIE') > -1:
69	bt['isIE'] = True
70	else:
71	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
72	# Safari oder Chrome identification
73	try:
74	nav = ua[string.find(ua, '('):]
75	nav1=ua[string.find(ua,')'):]
76	nav2=nav1[string.find(nav1,'('):]
77	nav3=nav2[string.find(nav2,')'):]
78	ie = string.split(nav, "; ")[1]
79	ie1 =string.split(nav1, " ")[2]
80	ie2 =string.split(nav3, " ")[1]
81	ie3 =string.split(nav3, " ")[2]
82	if string.find(ie3, "Safari") >-1:
83	bt['versSafariChrome']=string.split(ie2, "/")[1]
84	except: pass
85	# IE identification
86	try:
87	nav = ua[string.find(ua, '('):]
88	ie = string.split(nav, "; ")[1]
89	if string.find(ie, "MSIE") > -1:
90	bt['versIE'] = string.split(ie, " ")[1]
91	except:pass
92	# Firefox identification
93	try:
94	nav = ua[string.find(ua, '('):]
95	nav1=ua[string.find(ua,')'):]
96	if string.find(ie1, "Firefox") >-1:
97	nav5= string.split(ie1, "/")[1]
98	logging.debug("FIREFOX: %s"%(nav5))
99	bt['versFirefox']=nav5[0:3]
100	except:pass
101	#Opera identification
102	try:
103	if string.find(ua,"Opera") >-1:
104	nav = ua[string.find(ua, '('):]
105	nav1=nav[string.find(nav,')'):]
106	bt['versOpera']=string.split(nav1,"/")[2]
107	except:pass
108
109	bt['isMac'] = string.find(ua, 'Macintosh') > -1
110	bt['isWin'] = string.find(ua, 'Windows') > -1
111	bt['isIEWin'] = bt['isIE'] and bt['isWin']
112	bt['isIEMac'] = bt['isIE'] and bt['isMac']
113	bt['staticHTML'] = False
114
115	return bt
116
117
118	def getParentDir(path):
119	"""returns pathname shortened by one"""
120	return '/'.join(path.split('/')[0:-1])
121
122
123	def getHttpData(url, data=None, num_tries=3, timeout=10):
124	"""returns result from url+data HTTP request"""
125	# we do GET (by appending data to url)
126	if isinstance(data, str) or isinstance(data, unicode):
127	# if data is string then append
128	url = "%s?%s"%(url,data)
129	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
130	# urlencode
131	url = "%s?%s"%(url,urllib.urlencode(data))
132
133	response = None
134	errmsg = None
135	for cnt in range(num_tries):
136	try:
137	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
138	if sys.version_info < (2, 6):
139	# set timeout on socket -- ugly :-(
140	import socket
141	socket.setdefaulttimeout(float(timeout))
142	response = urllib2.urlopen(url)
143	else:
144	response = urllib2.urlopen(url,timeout=float(timeout))
145	# check result?
146	break
147	except urllib2.HTTPError, e:
148	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
149	errmsg = str(e)
150	# stop trying
151	break
152	except urllib2.URLError, e:
153	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
154	errmsg = str(e)
155	# stop trying
156	#break
157
158	if response is not None:
159	data = response.read()
160	response.close()
161	return data
162
163	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
164	#return None
165
166	##
167	## documentViewer class
168	##
169	class documentViewer(Folder):
170	"""document viewer"""
171	meta_type="Document viewer"
172
173	security=ClassSecurityInfo()
174	manage_options=Folder.manage_options+(
175	{'label':'main config','action':'changeDocumentViewerForm'},
176	)
177
178	# templates and forms
179	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
180	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
181	toc_text = PageTemplateFile('zpt/toc_text', globals())
182	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
183	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
184	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
185	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
186	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
187	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
188	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
189	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
190	head_main = PageTemplateFile('zpt/head_main', globals())
191	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
192	info_xml = PageTemplateFile('zpt/info_xml', globals())
193
194
195	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
196	security.declareProtected('View management screens','changeDocumentViewerForm')
197	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
198
199
200	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
201	"""init document viewer"""
202	self.id=id
203	self.title=title
204	self.thumbcols = thumbcols
205	self.thumbrows = thumbrows
206	# authgroups is list of authorized groups (delimited by ,)
207	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
208	# create template folder so we can always use template.something
209
210	templateFolder = Folder('template')
211	#self['template'] = templateFolder # Zope-2.12 style
212	self._setObject('template',templateFolder) # old style
213	try:
214	import MpdlXmlTextServer
215	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
216	#templateFolder['fulltextclient'] = xmlRpcClient
217	templateFolder._setObject('fulltextclient',textServer)
218	except Exception, e:
219	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
220	try:
221	from Products.zogiLib.zogiLib import zogiLib
222	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
223	#templateFolder['zogilib'] = zogilib
224	templateFolder._setObject('zogilib',zogilib)
225	except Exception, e:
226	logging.error("Unable to create zogiLib for zogilib: "+str(e))
227
228
229	# proxy text server methods to fulltextclient
230	def getTextPage(self, **args):
231	"""get page"""
232	return self.template.fulltextclient.getTextPage(**args)
233
234	def getOrigPages(self, **args):
235	"""get page"""
236	return self.template.fulltextclient.getOrigPages(**args)
237
238	def getOrigPagesNorm(self, **args):
239	"""get page"""
240	return self.template.fulltextclient.getOrigPagesNorm(**args)
241
242	def getQuery(self, **args):
243	"""get query in search"""
244	return self.template.fulltextclient.getQuery(**args)
245
246	def getSearch(self, **args):
247	"""get search"""
248	return self.template.fulltextclient.getSearch(**args)
249
250	def getGisPlaces(self, **args):
251	"""get gis places"""
252	return self.template.fulltextclient.getGisPlaces(**args)
253
254	def getAllGisPlaces(self, **args):
255	"""get all gis places """
256	return self.template.fulltextclient.getAllGisPlaces(**args)
257
258	def getTranslate(self, **args):
259	"""get translate"""
260	return self.template.fulltextclient.getTranslate(**args)
261
262	def getLemma(self, **args):
263	"""get lemma"""
264	return self.template.fulltextclient.getLemma(**args)
265
266	def getToc(self, **args):
267	"""get toc"""
268	return self.template.fulltextclient.getToc(**args)
269
270	def getTocPage(self, **args):
271	"""get tocpage"""
272	return self.template.fulltextclient.getTocPage(**args)
273
274
275	security.declareProtected('View','thumbs_rss')
276	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
277	'''
278	view it
279	@param mode: defines how to access the document behind url
280	@param url: url which contains display information
281	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
282
283	'''
284	logging.debug("HHHHHHHHHHHHHH:load the rss")
285	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
286
287	if not hasattr(self, 'template'):
288	# create template folder if it doesn't exist
289	self.manage_addFolder('template')
290
291	if not self.digilibBaseUrl:
292	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
293
294	docinfo = self.getDocinfo(mode=mode,url=url)
295	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
296	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
297	''' ZDES '''
298	pt = getattr(self.template, 'thumbs_main_rss')
299
300	if viewMode=="auto": # automodus gewaehlt
301	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
302	viewMode="text"
303	else:
304	viewMode="images"
305
306	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
307
308	security.declareProtected('View','index_html')
309	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
310	'''
311	view it
312	@param mode: defines how to access the document behind url
313	@param url: url which contains display information
314	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
315	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
316	@param characterNormalization type of text display (reg, norm, none)
317	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
318	'''
319
320	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
321
322	if not hasattr(self, 'template'):
323	# this won't work
324	logging.error("template folder missing!")
325	return "ERROR: template folder missing!"
326
327	if not getattr(self, 'digilibBaseUrl', None):
328	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
329
330	docinfo = self.getDocinfo(mode=mode,url=url)
331
332	if tocMode != "thumbs":
333	# get table of contents
334	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
335
336	if viewMode=="auto": # automodus gewaehlt
337	if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
338	viewMode="text_dict"
339	else:
340	viewMode="images"
341
342	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
343
344	if (docinfo.get('textURLPath',None)):
345	page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo)
346	pageinfo['textPage'] = page
347	tt = getattr(self, 'template')
348	pt = getattr(tt, 'viewer_main')
349	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
350
351	def generateMarks(self,mk):
352	ret=""
353	if mk is None:
354	return ""
355	if not isinstance(mk, list):
356	mk=[mk]
357	for m in mk:
358	ret+="mk=%s"%m
359	return ret
360
361
362	def getBrowser(self):
363	"""getBrowser the version of browser """
364	bt = browserCheck(self)
365	logging.debug("BROWSER VERSION: %s"%(bt))
366	return bt
367
368	def findDigilibUrl(self):
369	"""try to get the digilib URL from zogilib"""
370	url = self.template.zogilib.getDLBaseUrl()
371	return url
372
373	def getDocumentViewerURL(self):
374	"""returns the URL of this instance"""
375	return self.absolute_url()
376
377	def getStyle(self, idx, selected, style=""):
378	"""returns a string with the given style and append 'sel' if path == selected."""
379	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
380	if idx == selected:
381	return style + 'sel'
382	else:
383	return style
384
385	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
386	"""returns URL to documentviewer with parameter param set to val or from dict params"""
387	# copy existing request params
388	urlParams=self.REQUEST.form.copy()
389	# change single param
390	if param is not None:
391	if val is None:
392	if urlParams.has_key(param):
393	del urlParams[param]
394	else:
395	urlParams[param] = str(val)
396
397	# change more params
398	if params is not None:
399	for k in params.keys():
400	v = params[k]
401	if v is None:
402	# val=None removes param
403	if urlParams.has_key(k):
404	del urlParams[k]
405
406	else:
407	urlParams[k] = v
408
409	# FIXME: does this belong here?
410	if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
411	urlParams["mode"] = "imagepath"
412	urlParams["url"] = getParentDir(urlParams["url"])
413
414	# quote values and assemble into query string (not escaping '/')
415	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
416	#ps = urllib.urlencode(urlParams)
417	if baseUrl is None:
418	baseUrl = self.REQUEST['URL1']
419
420	url = "%s?%s"%(baseUrl, ps)
421	return url
422
423
424	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
425	"""link to documentviewer with parameter param set to val"""
426	return self.getLink(param, val, params, baseUrl, '&')
427
428	def getInfo_xml(self,url,mode):
429	"""returns info about the document as XML"""
430
431	if not self.digilibBaseUrl:
432	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
433
434	docinfo = self.getDocinfo(mode=mode,url=url)
435	pt = getattr(self.template, 'info_xml')
436	return pt(docinfo=docinfo)
437
438	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
439	"""returns new option state"""
440	if not self.REQUEST.SESSION.has_key(optionName):
441	# not in session -- initial
442	opt = {'lastState': newState, 'state': initialState}
443	else:
444	opt = self.REQUEST.SESSION.get(optionName)
445	if opt['lastState'] != newState:
446	# state in session has changed -- toggle
447	opt['state'] = not opt['state']
448	opt['lastState'] = newState
449
450	self.REQUEST.SESSION[optionName] = opt
451	return opt['state']
452
453	def isAccessible(self, docinfo):
454	"""returns if access to the resource is granted"""
455	access = docinfo.get('accessType', None)
456	logging.debug("documentViewer (accessOK) access type %s"%access)
457	if access is not None and access == 'free':
458	logging.debug("documentViewer (accessOK) access is free")
459	return True
460	elif access is None or access in self.authgroups:
461	# only local access -- only logged in users
462	user = getSecurityManager().getUser()
463	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
464	if user is not None:
465	#print "user: ", user
466	return (user.getUserName() != "Anonymous User")
467	else:
468	return False
469
470	logging.error("documentViewer (accessOK) unknown access type %s"%access)
471	return False
472
473
474	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
475	"""gibt param von dlInfo aus"""
476	if docinfo is None:
477	docinfo = {}
478
479	for x in range(cut):
480
481	path=getParentDir(path)
482
483	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
484
485	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
486
487	txt = getHttpData(infoUrl)
488	if txt is None:
489	raise IOError("Unable to get dir-info from %s"%(infoUrl))
490
491	dom = Parse(txt)
492	sizes=dom.xpath("//dir/size")
493	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
494
495	if sizes:
496	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
497	else:
498	docinfo['numPages'] = 0
499
500	# TODO: produce and keep list of image names and numbers
501
502	return docinfo
503
504	def getIndexMetaPath(self,url):
505	"""gib nur den Pfad zurueck"""
506	regexp = re.compile(r".(experimental\|permanent)/(.)")
507	regpath = regexp.match(url)
508	if (regpath==None):
509	return ""
510	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
511	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
512
513
514
515	def getIndexMetaUrl(self,url):
516	"""returns utr of index.meta document at url"""
517
518	metaUrl = None
519	if url.startswith("http://"):
520	# real URL
521	metaUrl = url
522	else:
523	# online path
524	server=self.digilibBaseUrl+"/servlet/Texter?fn="
525	metaUrl=server+url.replace("/mpiwg/online","")
526	if not metaUrl.endswith("index.meta"):
527	metaUrl += "/index.meta"
528
529	return metaUrl
530
531	def getDomFromIndexMeta(self, url):
532	"""get dom from index meta"""
533	dom = None
534	metaUrl = self.getIndexMetaUrl(url)
535
536	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
537	txt=getHttpData(metaUrl)
538	if txt is None:
539	raise IOError("Unable to read index meta from %s"%(url))
540
541	dom = Parse(txt)
542	return dom
543
544	def getPresentationInfoXML(self, url):
545	"""returns dom of info.xml document at url"""
546	dom = None
547	metaUrl = None
548	if url.startswith("http://"):
549	# real URL
550	metaUrl = url
551	else:
552	# online path
553	server=self.digilibBaseUrl+"/servlet/Texter?fn="
554	metaUrl=server+url.replace("/mpiwg/online","")
555
556	txt=getHttpData(metaUrl)
557	if txt is None:
558	raise IOError("Unable to read infoXMLfrom %s"%(url))
559
560	dom = Parse(txt)
561	return dom
562
563
564	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
565	"""gets authorization info from the index.meta file at path or given by dom"""
566	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
567
568	access = None
569
570	if docinfo is None:
571	docinfo = {}
572
573	if dom is None:
574	for x in range(cut):
575	path=getParentDir(path)
576	dom = self.getDomFromIndexMeta(path)
577
578	acctype = dom.xpath("//access-conditions/access/@type")
579	if acctype and (len(acctype)>0):
580	access=acctype[0].value
581	if access in ['group', 'institution']:
582	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
583
584	docinfo['accessType'] = access
585	return docinfo
586
587
588	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
589	"""gets bibliographical info from the index.meta file at path or given by dom"""
590	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
591
592	if docinfo is None:
593	docinfo = {}
594
595	if dom is None:
596	for x in range(cut):
597	path=getParentDir(path)
598	dom = self.getDomFromIndexMeta(path)
599
600	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
601
602	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
603	# put in all raw bib fields as dict "bib"
604	bib = dom.xpath("//bib/*")
605	if bib and len(bib)>0:
606	bibinfo = {}
607	for e in bib:
608	bibinfo[e.localName] = getTextFromNode(e)
609	docinfo['bib'] = bibinfo
610
611	# extract some fields (author, title, year) according to their mapping
612	metaData=self.metadata.main.meta.bib
613	bibtype=dom.xpath("//bib/@type")
614	if bibtype and (len(bibtype)>0):
615	bibtype=bibtype[0].value
616	else:
617	bibtype="generic"
618
619	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
620	docinfo['bib_type'] = bibtype
621	bibmap=metaData.generateMappingForType(bibtype)
622	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
623	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
624	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
625	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
626	try:
627	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
628	except: pass
629	try:
630	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
631	except: pass
632	try:
633	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
634	except: pass
635	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
636	try:
637	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
638	except:
639	docinfo['lang']=''
640	try:
641	docinfo['city']=getTextFromNode(dom.xpath("//bib/city")[0])
642	except:
643	docinfo['city']=''
644	try:
645	docinfo['number_of_pages']=getTextFromNode(dom.xpath("//bib/number_of_pages")[0])
646	except:
647	docinfo['number_of_pages']=''
648	try:
649	docinfo['series_volume']=getTextFromNode(dom.xpath("//bib/series_volume")[0])
650	except:
651	docinfo['series_volume']=''
652	try:
653	docinfo['number_of_volumes']=getTextFromNode(dom.xpath("//bib/number_of_volumes")[0])
654	except:
655	docinfo['number_of_volumes']=''
656	try:
657	docinfo['translator']=getTextFromNode(dom.xpath("//bib/translator")[0])
658	except:
659	docinfo['translator']=''
660	try:
661	docinfo['edition']=getTextFromNode(dom.xpath("//bib/edition")[0])
662	except:
663	docinfo['edition']=''
664	try:
665	docinfo['series_author']=getTextFromNode(dom.xpath("//bib/series_author")[0])
666	except:
667	docinfo['series_author']=''
668	try:
669	docinfo['publisher']=getTextFromNode(dom.xpath("//bib/publisher")[0])
670	except:
671	docinfo['publisher']=''
672	try:
673	docinfo['series_title']=getTextFromNode(dom.xpath("//bib/series_title")[0])
674	except:
675	docinfo['series_title']=''
676	try:
677	docinfo['isbn_issn']=getTextFromNode(dom.xpath("//bib/isbn_issn")[0])
678	except:
679	docinfo['isbn_issn']=''
680	return docinfo
681
682
683	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
684	"""gets name info from the index.meta file at path or given by dom"""
685	if docinfo is None:
686	docinfo = {}
687
688	if dom is None:
689	for x in range(cut):
690	path=getParentDir(path)
691	dom = self.getDomFromIndexMeta(path)
692
693	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
694	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
695	return docinfo
696
697	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
698	"""parse texttool tag in index meta"""
699	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
700	if docinfo is None:
701	docinfo = {}
702	if docinfo.get('lang', None) is None:
703	docinfo['lang'] = '' # default keine Sprache gesetzt
704	if dom is None:
705	dom = self.getDomFromIndexMeta(url)
706
707	archivePath = None
708	archiveName = None
709
710	archiveNames = dom.xpath("//resource/name")
711	if archiveNames and (len(archiveNames) > 0):
712	archiveName = getTextFromNode(archiveNames[0])
713	else:
714	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
715
716	archivePaths = dom.xpath("//resource/archive-path")
717	if archivePaths and (len(archivePaths) > 0):
718	archivePath = getTextFromNode(archivePaths[0])
719	# clean up archive path
720	if archivePath[0] != '/':
721	archivePath = '/' + archivePath
722	if archiveName and (not archivePath.endswith(archiveName)):
723	archivePath += "/" + archiveName
724	else:
725	# try to get archive-path from url
726	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
727	if (not url.startswith('http')):
728	archivePath = url.replace('index.meta', '')
729
730	if archivePath is None:
731	# we balk without archive-path
732	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
733
734	imageDirs = dom.xpath("//texttool/image")
735	if imageDirs and (len(imageDirs) > 0):
736	imageDir = getTextFromNode(imageDirs[0])
737
738	else:
739	# we balk with no image tag / not necessary anymore because textmode is now standard
740	#raise IOError("No text-tool info in %s"%(url))
741	imageDir = ""
742	#xquery="//pb"
743	docinfo['imagePath'] = "" # keine Bilder
744	docinfo['imageURL'] = ""
745
746	if imageDir and archivePath:
747	#print "image: ", imageDir, " archivepath: ", archivePath
748	imageDir = os.path.join(archivePath, imageDir)
749	imageDir = imageDir.replace("/mpiwg/online", '')
750	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
751	docinfo['imagePath'] = imageDir
752
753	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
754
755	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
756	if viewerUrls and (len(viewerUrls) > 0):
757	viewerUrl = getTextFromNode(viewerUrls[0])
758	docinfo['viewerURL'] = viewerUrl
759
760	# old style text URL
761	textUrls = dom.xpath("//texttool/text")
762	if textUrls and (len(textUrls) > 0):
763	textUrl = getTextFromNode(textUrls[0])
764	if urlparse.urlparse(textUrl)[0] == "": #keine url
765	textUrl = os.path.join(archivePath, textUrl)
766	# fix URLs starting with /mpiwg/online
767	if textUrl.startswith("/mpiwg/online"):
768	textUrl = textUrl.replace("/mpiwg/online", '', 1)
769
770	docinfo['textURL'] = textUrl
771
772	# new style text-url-path
773	textUrls = dom.xpath("//texttool/text-url-path")
774	if textUrls and (len(textUrls) > 0):
775	textUrl = getTextFromNode(textUrls[0])
776	docinfo['textURLPath'] = textUrl
777	textUrlkurz = string.split(textUrl, ".")[0]
778	docinfo['textURLPathkurz'] = textUrlkurz
779	#if not docinfo['imagePath']:
780	# text-only, no page images
781	#docinfo = self.getNumTextPages(docinfo)
782
783
784	presentationUrls = dom.xpath("//texttool/presentation")
785	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
786	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
787
788
789	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
790	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
791	# durch den relativen Pfad auf die presentation infos
792	presentationPath = getTextFromNode(presentationUrls[0])
793	if url.endswith("index.meta"):
794	presentationUrl = url.replace('index.meta', presentationPath)
795	else:
796	presentationUrl = url + "/" + presentationPath
797
798	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
799
800	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
801
802	return docinfo
803
804
805	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
806	"""gets the bibliographical information from the preseantion entry in texttools
807	"""
808	dom=self.getPresentationInfoXML(url)
809	try:
810	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
811	except:
812	pass
813	try:
814	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
815	except:
816	pass
817	try:
818	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
819	except:
820	pass
821	return docinfo
822
823	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
824	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
825	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
826	if docinfo is None:
827	docinfo = {}
828	path=path.replace("/mpiwg/online","")
829	docinfo['imagePath'] = path
830	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
831
832	pathorig=path
833	for x in range(cut):
834	path=getParentDir(path)
835	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
836	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
837	docinfo['imageURL'] = imageUrl
838
839	#path ist the path to the images it assumes that the index.meta file is one level higher.
840	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
841	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
842	return docinfo
843
844
845	def getDocinfo(self, mode, url):
846	"""returns docinfo depending on mode"""
847	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
848	# look for cached docinfo in session
849	if self.REQUEST.SESSION.has_key('docinfo'):
850	docinfo = self.REQUEST.SESSION['docinfo']
851	# check if its still current
852	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
853	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
854	return docinfo
855	# new docinfo
856	docinfo = {'mode': mode, 'url': url}
857	if mode=="texttool": #index.meta with texttool information
858	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
859	elif mode=="imagepath":
860	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
861	elif mode=="filepath":
862	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
863	else:
864	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
865	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
866
867	# FIXME: fake texturlpath
868	if not docinfo.has_key('textURLPath'):
869	docinfo['textURLPath'] = None
870
871	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
872	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
873	self.REQUEST.SESSION['docinfo'] = docinfo
874	return docinfo
875
876	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
877	"""returns pageinfo with the given parameters"""
878	pageinfo = {}
879	current = getInt(current)
880
881	pageinfo['current'] = current
882	rows = int(rows or self.thumbrows)
883	pageinfo['rows'] = rows
884	cols = int(cols or self.thumbcols)
885	pageinfo['cols'] = cols
886	grpsize = cols * rows
887	pageinfo['groupsize'] = grpsize
888	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
889	# int(current / grpsize) * grpsize +1))
890	pageinfo['start'] = start
891	pageinfo['end'] = start + grpsize
892	if (docinfo is not None) and ('numPages' in docinfo):
893	np = int(docinfo['numPages'])
894	pageinfo['end'] = min(pageinfo['end'], np)
895	pageinfo['numgroups'] = int(np / grpsize)
896	if np % grpsize > 0:
897	pageinfo['numgroups'] += 1
898	pageinfo['viewMode'] = viewMode
899	pageinfo['tocMode'] = tocMode
900	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
901	pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','')
902	pageinfo['query'] = self.REQUEST.get('query','')
903	pageinfo['queryType'] = self.REQUEST.get('queryType','')
904	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
905	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
906	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
907	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
908	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
909	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
910	toc = int (pageinfo['tocPN'])
911	pageinfo['textPages'] =int (toc)
912
913	if 'tocSize_%s'%tocMode in docinfo:
914	tocSize = int(docinfo['tocSize_%s'%tocMode])
915	tocPageSize = int(pageinfo['tocPageSize'])
916	# cached toc
917	if tocSize%tocPageSize>0:
918	tocPages=tocSize/tocPageSize+1
919	else:
920	tocPages=tocSize/tocPageSize
921	pageinfo['tocPN'] = min (tocPages,toc)
922	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
923	pageinfo['sn'] =self.REQUEST.get('sn','')
924	return pageinfo
925
926	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
927	"""init document viewer"""
928	self.title=title
929	self.digilibBaseUrl = digilibBaseUrl
930	self.thumbrows = thumbrows
931	self.thumbcols = thumbcols
932	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
933	if RESPONSE is not None:
934	RESPONSE.redirect('manage_main')
935
936	def manage_AddDocumentViewerForm(self):
937	"""add the viewer form"""
938	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
939	return pt()
940
941	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
942	"""add the viewer"""
943	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
944	self._setObject(id,newObj)
945
946	if RESPONSE is not None:
947	RESPONSE.redirect('manage_main')
948
949	## DocumentViewerTemplate class
950	class DocumentViewerTemplate(ZopePageTemplate):
951	"""Template for document viewer"""
952	meta_type="DocumentViewer Template"
953
954
955	def manage_addDocumentViewerTemplateForm(self):
956	"""Form for adding"""
957	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
958	return pt()
959
960	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
961	REQUEST=None, submit=None):
962	"Add a Page Template with optional file content."
963
964	self._setObject(id, DocumentViewerTemplate(id))
965	ob = getattr(self, id)
966	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
967	logging.info("txt %s:"%txt)
968	ob.pt_edit(txt,"text/html")
969	if title:
970	ob.pt_setTitle(title)
971	try:
972	u = self.DestinationURL()
973	except AttributeError:
974	u = REQUEST['URL1']
975
976	u = "%s/%s" % (u, urllib.quote(id))
977	REQUEST.RESPONSE.redirect(u+'/manage_main')
978	return ''
979
980
981

Note: See TracBrowser for help on using the repository browser.

Download in other formats: