Context Navigation

source: documentViewer/documentViewer.py @ 453:beb7ccb92564

elementtree

Last change on this file since 453:beb7ccb92564 was 453:beb7ccb92564, checked in by casties, 13 years ago
first version using elementtree instead of 4suite xml
File size: 38.9 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	#from Ft.Xml import EMPTY_NAMESPACE, Parse
11	#import Ft.Xml.Domlette
12
13	import xml.etree.ElementTree as ET
14
15	import os.path
16	import sys
17	import urllib
18	import urllib2
19	import logging
20	import math
21	import urlparse
22	import re
23	import string
24
25	def logger(txt,method,txt2):
26	"""logging"""
27	logging.info(txt+ txt2)
28
29
30	def getInt(number, default=0):
31	"""returns always an int (0 in case of problems)"""
32	try:
33	return int(number)
34	except:
35	return int(default)
36
37	def getText(node):
38	"""get the cdata content of a node"""
39	if node is None:
40	return ""
41	# ET:
42	text = node.text or ""
43	for e in node:
44	text += gettext(e)
45	if e.tail:
46	text += e.tail
47
48	# 4Suite:
49	#nodelist=node.childNodes
50	#text = ""
51	#for n in nodelist:
52	# if n.nodeType == node.TEXT_NODE:
53	# text = text + n.data
54
55	return text
56
57	getTextFromNode = getText
58
59	def serializeNode(node, encoding="utf-8"):
60	"""returns a string containing node as XML"""
61	s = ET.tostring(node)
62
63	# 4Suite:
64	# stream = cStringIO.StringIO()
65	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
66	# s = stream.getvalue()
67	# stream.close()
68	return s
69
70	def browserCheck(self):
71	"""check the browsers request to find out the browser type"""
72	bt = {}
73	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
74	bt['ua'] = ua
75	bt['isIE'] = False
76	bt['isN4'] = False
77	bt['versFirefox']=""
78	bt['versIE']=""
79	bt['versSafariChrome']=""
80	bt['versOpera']=""
81
82	if string.find(ua, 'MSIE') > -1:
83	bt['isIE'] = True
84	else:
85	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
86	# Safari oder Chrome identification
87	try:
88	nav = ua[string.find(ua, '('):]
89	nav1=ua[string.find(ua,')'):]
90	nav2=nav1[string.find(nav1,'('):]
91	nav3=nav2[string.find(nav2,')'):]
92	ie = string.split(nav, "; ")[1]
93	ie1 =string.split(nav1, " ")[2]
94	ie2 =string.split(nav3, " ")[1]
95	ie3 =string.split(nav3, " ")[2]
96	if string.find(ie3, "Safari") >-1:
97	bt['versSafariChrome']=string.split(ie2, "/")[1]
98	except: pass
99	# IE identification
100	try:
101	nav = ua[string.find(ua, '('):]
102	ie = string.split(nav, "; ")[1]
103	if string.find(ie, "MSIE") > -1:
104	bt['versIE'] = string.split(ie, " ")[1]
105	except:pass
106	# Firefox identification
107	try:
108	nav = ua[string.find(ua, '('):]
109	nav1=ua[string.find(ua,')'):]
110	if string.find(ie1, "Firefox") >-1:
111	nav5= string.split(ie1, "/")[1]
112	logging.debug("FIREFOX: %s"%(nav5))
113	bt['versFirefox']=nav5[0:3]
114	except:pass
115	#Opera identification
116	try:
117	if string.find(ua,"Opera") >-1:
118	nav = ua[string.find(ua, '('):]
119	nav1=nav[string.find(nav,')'):]
120	bt['versOpera']=string.split(nav1,"/")[2]
121	except:pass
122
123	bt['isMac'] = string.find(ua, 'Macintosh') > -1
124	bt['isWin'] = string.find(ua, 'Windows') > -1
125	bt['isIEWin'] = bt['isIE'] and bt['isWin']
126	bt['isIEMac'] = bt['isIE'] and bt['isMac']
127	bt['staticHTML'] = False
128
129	return bt
130
131
132	def getParentDir(path):
133	"""returns pathname shortened by one"""
134	return '/'.join(path.split('/')[0:-1])
135
136
137	def getHttpData(url, data=None, num_tries=3, timeout=10):
138	"""returns result from url+data HTTP request"""
139	# we do GET (by appending data to url)
140	if isinstance(data, str) or isinstance(data, unicode):
141	# if data is string then append
142	url = "%s?%s"%(url,data)
143	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
144	# urlencode
145	url = "%s?%s"%(url,urllib.urlencode(data))
146
147	response = None
148	errmsg = None
149	for cnt in range(num_tries):
150	try:
151	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
152	if sys.version_info < (2, 6):
153	# set timeout on socket -- ugly :-(
154	import socket
155	socket.setdefaulttimeout(float(timeout))
156	response = urllib2.urlopen(url)
157	else:
158	response = urllib2.urlopen(url,timeout=float(timeout))
159	# check result?
160	break
161	except urllib2.HTTPError, e:
162	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
163	errmsg = str(e)
164	# stop trying
165	break
166	except urllib2.URLError, e:
167	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
168	errmsg = str(e)
169	# stop trying
170	#break
171
172	if response is not None:
173	data = response.read()
174	response.close()
175	return data
176
177	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
178	#return None
179
180	##
181	## documentViewer class
182	##
183	class documentViewer(Folder):
184	"""document viewer"""
185	meta_type="Document viewer"
186
187	security=ClassSecurityInfo()
188	manage_options=Folder.manage_options+(
189	{'label':'main config','action':'changeDocumentViewerForm'},
190	)
191
192	# templates and forms
193	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
194	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
195	toc_text = PageTemplateFile('zpt/toc_text', globals())
196	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
197	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
198	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
199	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
200	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
201	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
202	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
203	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
204	head_main = PageTemplateFile('zpt/head_main', globals())
205	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
206	info_xml = PageTemplateFile('zpt/info_xml', globals())
207
208
209	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
210	security.declareProtected('View management screens','changeDocumentViewerForm')
211	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
212
213
214	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
215	"""init document viewer"""
216	self.id=id
217	self.title=title
218	self.thumbcols = thumbcols
219	self.thumbrows = thumbrows
220	# authgroups is list of authorized groups (delimited by ,)
221	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
222	# create template folder so we can always use template.something
223
224	templateFolder = Folder('template')
225	#self['template'] = templateFolder # Zope-2.12 style
226	self._setObject('template',templateFolder) # old style
227	try:
228	import MpdlXmlTextServer
229	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
230	#templateFolder['fulltextclient'] = xmlRpcClient
231	templateFolder._setObject('fulltextclient',textServer)
232	except Exception, e:
233	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
234	try:
235	from Products.zogiLib.zogiLib import zogiLib
236	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
237	#templateFolder['zogilib'] = zogilib
238	templateFolder._setObject('zogilib',zogilib)
239	except Exception, e:
240	logging.error("Unable to create zogiLib for zogilib: "+str(e))
241
242
243	# proxy text server methods to fulltextclient
244	def getTextPage(self, **args):
245	"""get page"""
246	return self.template.fulltextclient.getTextPage(**args)
247
248	def getOrigPages(self, **args):
249	"""get page"""
250	return self.template.fulltextclient.getOrigPages(**args)
251
252	def getOrigPagesNorm(self, **args):
253	"""get page"""
254	return self.template.fulltextclient.getOrigPagesNorm(**args)
255
256	def getQuery(self, **args):
257	"""get query in search"""
258	return self.template.fulltextclient.getQuery(**args)
259
260	def getSearch(self, **args):
261	"""get search"""
262	return self.template.fulltextclient.getSearch(**args)
263
264	def getGisPlaces(self, **args):
265	"""get gis places"""
266	return self.template.fulltextclient.getGisPlaces(**args)
267
268	def getAllGisPlaces(self, **args):
269	"""get all gis places """
270	return self.template.fulltextclient.getAllGisPlaces(**args)
271
272	def getTranslate(self, **args):
273	"""get translate"""
274	return self.template.fulltextclient.getTranslate(**args)
275
276	def getLemma(self, **args):
277	"""get lemma"""
278	return self.template.fulltextclient.getLemma(**args)
279
280	def getLemmaQuery(self, **args):
281	"""get query"""
282	return self.template.fulltextclient.getLemmaQuery(**args)
283
284	def getLex(self, **args):
285	"""get lex"""
286	return self.template.fulltextclient.getLex(**args)
287
288	def getToc(self, **args):
289	"""get toc"""
290	return self.template.fulltextclient.getToc(**args)
291
292	def getTocPage(self, **args):
293	"""get tocpage"""
294	return self.template.fulltextclient.getTocPage(**args)
295
296
297	security.declareProtected('View','thumbs_rss')
298	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
299	'''
300	view it
301	@param mode: defines how to access the document behind url
302	@param url: url which contains display information
303	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
304
305	'''
306	logging.debug("HHHHHHHHHHHHHH:load the rss")
307	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
308
309	if not hasattr(self, 'template'):
310	# create template folder if it doesn't exist
311	self.manage_addFolder('template')
312
313	if not self.digilibBaseUrl:
314	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
315
316	docinfo = self.getDocinfo(mode=mode,url=url)
317	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
318	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
319	''' ZDES '''
320	pt = getattr(self.template, 'thumbs_main_rss')
321
322	if viewMode=="auto": # automodus gewaehlt
323	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
324	viewMode="text"
325	else:
326	viewMode="images"
327
328	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
329
330	security.declareProtected('View','index_html')
331	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
332	'''
333	view it
334	@param mode: defines how to access the document behind url
335	@param url: url which contains display information
336	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
337	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
338	@param characterNormalization type of text display (reg, norm, none)
339	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
340	'''
341
342	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
343
344	if not hasattr(self, 'template'):
345	# this won't work
346	logging.error("template folder missing!")
347	return "ERROR: template folder missing!"
348
349	if not getattr(self, 'digilibBaseUrl', None):
350	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
351
352	docinfo = self.getDocinfo(mode=mode,url=url)
353
354	if tocMode != "thumbs":
355	# get table of contents
356	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
357
358	if viewMode=="auto": # automodus gewaehlt
359	if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
360	viewMode="text_dict"
361	else:
362	viewMode="images"
363
364	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
365
366	if (docinfo.get('textURLPath',None)):
367	page = self.getTextPage(mode=viewMode, docinfo=docinfo, pageinfo=pageinfo)
368	pageinfo['textPage'] = page
369	tt = getattr(self, 'template')
370	pt = getattr(tt, 'viewer_main')
371	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
372
373	def generateMarks(self,mk):
374	ret=""
375	if mk is None:
376	return ""
377	if not isinstance(mk, list):
378	mk=[mk]
379	for m in mk:
380	ret+="mk=%s"%m
381	return ret
382
383
384	def getBrowser(self):
385	"""getBrowser the version of browser """
386	bt = browserCheck(self)
387	logging.debug("BROWSER VERSION: %s"%(bt))
388	return bt
389
390	def findDigilibUrl(self):
391	"""try to get the digilib URL from zogilib"""
392	url = self.template.zogilib.getDLBaseUrl()
393	return url
394
395	def getDocumentViewerURL(self):
396	"""returns the URL of this instance"""
397	return self.absolute_url()
398
399	def getStyle(self, idx, selected, style=""):
400	"""returns a string with the given style and append 'sel' if path == selected."""
401	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
402	if idx == selected:
403	return style + 'sel'
404	else:
405	return style
406
407	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
408	"""returns URL to documentviewer with parameter param set to val or from dict params"""
409	# copy existing request params
410	urlParams=self.REQUEST.form.copy()
411	# change single param
412	if param is not None:
413	if val is None:
414	if urlParams.has_key(param):
415	del urlParams[param]
416	else:
417	urlParams[param] = str(val)
418
419	# change more params
420	if params is not None:
421	for k in params.keys():
422	v = params[k]
423	if v is None:
424	# val=None removes param
425	if urlParams.has_key(k):
426	del urlParams[k]
427
428	else:
429	urlParams[k] = v
430
431	# FIXME: does this belong here?
432	if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
433	urlParams["mode"] = "imagepath"
434	urlParams["url"] = getParentDir(urlParams["url"])
435
436	# quote values and assemble into query string (not escaping '/')
437	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
438	#ps = urllib.urlencode(urlParams)
439	if baseUrl is None:
440	baseUrl = self.REQUEST['URL1']
441
442	url = "%s?%s"%(baseUrl, ps)
443	return url
444
445
446	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
447	"""link to documentviewer with parameter param set to val"""
448	return self.getLink(param, val, params, baseUrl, '&')
449
450	def getInfo_xml(self,url,mode):
451	"""returns info about the document as XML"""
452
453	if not self.digilibBaseUrl:
454	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
455
456	docinfo = self.getDocinfo(mode=mode,url=url)
457	pt = getattr(self.template, 'info_xml')
458	return pt(docinfo=docinfo)
459
460	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
461	"""returns new option state"""
462	if not self.REQUEST.SESSION.has_key(optionName):
463	# not in session -- initial
464	opt = {'lastState': newState, 'state': initialState}
465	else:
466	opt = self.REQUEST.SESSION.get(optionName)
467	if opt['lastState'] != newState:
468	# state in session has changed -- toggle
469	opt['state'] = not opt['state']
470	opt['lastState'] = newState
471
472	self.REQUEST.SESSION[optionName] = opt
473	return opt['state']
474
475	def isAccessible(self, docinfo):
476	"""returns if access to the resource is granted"""
477	access = docinfo.get('accessType', None)
478	logging.debug("documentViewer (accessOK) access type %s"%access)
479	if access is not None and access == 'free':
480	logging.debug("documentViewer (accessOK) access is free")
481	return True
482	elif access is None or access in self.authgroups:
483	# only local access -- only logged in users
484	user = getSecurityManager().getUser()
485	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
486	if user is not None:
487	#print "user: ", user
488	return (user.getUserName() != "Anonymous User")
489	else:
490	return False
491
492	logging.error("documentViewer (accessOK) unknown access type %s"%access)
493	return False
494
495
496	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
497	"""gibt param von dlInfo aus"""
498	if docinfo is None:
499	docinfo = {}
500
501	for x in range(cut):
502
503	path=getParentDir(path)
504
505	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
506
507	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
508
509	txt = getHttpData(infoUrl)
510	if txt is None:
511	raise IOError("Unable to get dir-info from %s"%(infoUrl))
512
513	dom = ET.fromstring(txt)
514	#dom = Parse(txt)
515	size=getText(dom.find("size"))
516	#sizes=dom.xpath("//dir/size")
517	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)
518
519	if size:
520	docinfo['numPages'] = int(size)
521	else:
522	docinfo['numPages'] = 0
523
524	# TODO: produce and keep list of image names and numbers
525
526	return docinfo
527
528	def getIndexMetaPath(self,url):
529	"""gib nur den Pfad zurueck"""
530	regexp = re.compile(r".(experimental\|permanent)/(.)")
531	regpath = regexp.match(url)
532	if (regpath==None):
533	return ""
534	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
535	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
536
537
538
539	def getIndexMetaUrl(self,url):
540	"""returns utr of index.meta document at url"""
541
542	metaUrl = None
543	if url.startswith("http://"):
544	# real URL
545	metaUrl = url
546	else:
547	# online path
548	server=self.digilibBaseUrl+"/servlet/Texter?fn="
549	metaUrl=server+url.replace("/mpiwg/online","")
550	if not metaUrl.endswith("index.meta"):
551	metaUrl += "/index.meta"
552
553	return metaUrl
554
555	def getDomFromIndexMeta(self, url):
556	"""get dom from index meta"""
557	dom = None
558	metaUrl = self.getIndexMetaUrl(url)
559
560	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
561	txt=getHttpData(metaUrl)
562	if txt is None:
563	raise IOError("Unable to read index meta from %s"%(url))
564
565	dom = ET.fromstring(txt)
566	#dom = Parse(txt)
567	return dom
568
569	def getPresentationInfoXML(self, url):
570	"""returns dom of info.xml document at url"""
571	dom = None
572	metaUrl = None
573	if url.startswith("http://"):
574	# real URL
575	metaUrl = url
576	else:
577	# online path
578	server=self.digilibBaseUrl+"/servlet/Texter?fn="
579	metaUrl=server+url.replace("/mpiwg/online","")
580
581	txt=getHttpData(metaUrl)
582	if txt is None:
583	raise IOError("Unable to read infoXMLfrom %s"%(url))
584
585	dom = ET.fromstring(txt)
586	#dom = Parse(txt)
587	return dom
588
589
590	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
591	"""gets authorization info from the index.meta file at path or given by dom"""
592	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
593
594	access = None
595
596	if docinfo is None:
597	docinfo = {}
598
599	if dom is None:
600	for x in range(cut):
601	path=getParentDir(path)
602	dom = self.getDomFromIndexMeta(path)
603
604	acc = dom.find(".//access-conditions/access")
605	if acc is not None:
606	acctype = acc.get('type')
607	#acctype = dom.xpath("//access-conditions/access/@type")
608	if acctype:
609	access=acctype
610	if access in ['group', 'institution']:
611	access = dom.find(".//access-conditions/access/name").text.lower()
612
613	docinfo['accessType'] = access
614	return docinfo
615
616
617	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
618	"""gets bibliographical info from the index.meta file at path or given by dom"""
619	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
620
621	if docinfo is None:
622	docinfo = {}
623
624	if dom is None:
625	for x in range(cut):
626	path=getParentDir(path)
627	dom = self.getDomFromIndexMeta(path)
628
629	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
630
631	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
632	# put in all raw bib fields as dict "bib"
633	bib = dom.find(".//bib/*")
634	#bib = dom.xpath("//bib/*")
635	if bib and len(bib)>0:
636	bibinfo = {}
637	for e in bib:
638	bibinfo[e.localName] = getTextFromNode(e)
639	docinfo['bib'] = bibinfo
640
641	# extract some fields (author, title, year) according to their mapping
642	metaData=self.metadata.main.meta.bib
643	bib = dom.find(".//bib")
644	bibtype=bib.get("type")
645	#bibtype=dom.xpath("//bib/@type")
646	if not bibtype:
647	bibtype="generic"
648
649	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
650	docinfo['bib_type'] = bibtype
651	bibmap=metaData.generateMappingForType(bibtype)
652	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
653	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
654	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
655	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0 or len(bibmap['title'][0]) > 0:
656	try:
657	docinfo['author']=getText(bib.find(bibmap['author'][0]))
658	except: pass
659	try:
660	docinfo['title']=getText(bib.find(bibmap['title'][0]))
661	except: pass
662	try:
663	docinfo['year']=getText(bib.find(bibmap['year'][0]))
664	except: pass
665
666	# ROC: why is this here?
667	# logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
668	# try:
669	# docinfo['lang']=getTextFromNode(dom.find(".//bib/lang")[0])
670	# except:
671	# docinfo['lang']=''
672	# try:
673	# docinfo['city']=getTextFromNode(dom.find(".//bib/city")[0])
674	# except:
675	# docinfo['city']=''
676	# try:
677	# docinfo['number_of_pages']=getTextFromNode(dom.find(".//bib/number_of_pages")[0])
678	# except:
679	# docinfo['number_of_pages']=''
680	# try:
681	# docinfo['series_volume']=getTextFromNode(dom.find(".//bib/series_volume")[0])
682	# except:
683	# docinfo['series_volume']=''
684	# try:
685	# docinfo['number_of_volumes']=getTextFromNode(dom.find(".//bib/number_of_volumes")[0])
686	# except:
687	# docinfo['number_of_volumes']=''
688	# try:
689	# docinfo['translator']=getTextFromNode(dom.find(".//bib/translator")[0])
690	# except:
691	# docinfo['translator']=''
692	# try:
693	# docinfo['edition']=getTextFromNode(dom.find(".//bib/edition")[0])
694	# except:
695	# docinfo['edition']=''
696	# try:
697	# docinfo['series_author']=getTextFromNode(dom.find(".//bib/series_author")[0])
698	# except:
699	# docinfo['series_author']=''
700	# try:
701	# docinfo['publisher']=getTextFromNode(dom.find(".//bib/publisher")[0])
702	# except:
703	# docinfo['publisher']=''
704	# try:
705	# docinfo['series_title']=getTextFromNode(dom.find(".//bib/series_title")[0])
706	# except:
707	# docinfo['series_title']=''
708	# try:
709	# docinfo['isbn_issn']=getTextFromNode(dom.find(".//bib/isbn_issn")[0])
710	# except:
711	# docinfo['isbn_issn']=''
712	return docinfo
713
714
715	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
716	"""gets name info from the index.meta file at path or given by dom"""
717	if docinfo is None:
718	docinfo = {}
719
720	if dom is None:
721	for x in range(cut):
722	path=getParentDir(path)
723	dom = self.getDomFromIndexMeta(path)
724
725	docinfo['name']=getText(dom.find("name"))
726	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
727	return docinfo
728
729	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
730	"""parse texttool tag in index meta"""
731	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
732	if docinfo is None:
733	docinfo = {}
734	if docinfo.get('lang', None) is None:
735	docinfo['lang'] = '' # default keine Sprache gesetzt
736	if dom is None:
737	dom = self.getDomFromIndexMeta(url)
738
739	archivePath = None
740	archiveName = None
741
742	archiveName = getTextFromNode(dom.find("name"))
743	if not archiveName:
744	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
745
746	archivePath = getTextFromNode(dom.find("archive-path"))
747	if archivePath:
748	# clean up archive path
749	if archivePath[0] != '/':
750	archivePath = '/' + archivePath
751	if archiveName and (not archivePath.endswith(archiveName)):
752	archivePath += "/" + archiveName
753	else:
754	# try to get archive-path from url
755	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
756	if (not url.startswith('http')):
757	archivePath = url.replace('index.meta', '')
758
759	if archivePath is None:
760	# we balk without archive-path
761	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
762
763	imageDir = getText(dom.find(".//texttool/image"))
764
765	if not imageDir:
766	# we balk with no image tag / not necessary anymore because textmode is now standard
767	#raise IOError("No text-tool info in %s"%(url))
768	imageDir = ""
769	#xquery="//pb"
770	docinfo['imagePath'] = "" # keine Bilder
771	docinfo['imageURL'] = ""
772
773	if imageDir and archivePath:
774	#print "image: ", imageDir, " archivepath: ", archivePath
775	imageDir = os.path.join(archivePath, imageDir)
776	imageDir = imageDir.replace("/mpiwg/online", '')
777	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
778	docinfo['imagePath'] = imageDir
779
780	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
781
782	viewerUrl = getText(dom.find(".//texttool/digiliburlprefix"))
783	if viewerUrl:
784	docinfo['viewerURL'] = viewerUrl
785
786	# old style text URL
787	textUrl = getText(dom.find(".//texttool/text"))
788	if textUrl:
789	if urlparse.urlparse(textUrl)[0] == "": #keine url
790	textUrl = os.path.join(archivePath, textUrl)
791	# fix URLs starting with /mpiwg/online
792	if textUrl.startswith("/mpiwg/online"):
793	textUrl = textUrl.replace("/mpiwg/online", '', 1)
794
795	docinfo['textURL'] = textUrl
796
797	# new style text-url-path
798	textUrl = getText(dom.find(".//texttool/text-url-path"))
799	if textUrl:
800	docinfo['textURLPath'] = textUrl
801	textUrlkurz = string.split(textUrl, ".")[0]
802	docinfo['textURLPathkurz'] = textUrlkurz
803	#if not docinfo['imagePath']:
804	# text-only, no page images
805	#docinfo = self.getNumTextPages(docinfo)
806
807
808	presentationUrl = getText(dom.find(".//texttool/presentation"))
809	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
810	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
811
812
813	if presentationUrl: # ueberschreibe diese durch presentation informationen
814	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
815	# durch den relativen Pfad auf die presentation infos
816	presentationPath = presentationUrl
817	if url.endswith("index.meta"):
818	presentationUrl = url.replace('index.meta', presentationPath)
819	else:
820	presentationUrl = url + "/" + presentationPath
821
822	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
823
824	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
825
826	return docinfo
827
828
829	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
830	"""gets the bibliographical information from the preseantion entry in texttools
831	"""
832	dom=self.getPresentationInfoXML(url)
833	try:
834	docinfo['author']=getText(dom.find(".//author"))
835	except:
836	pass
837	try:
838	docinfo['title']=getText(dom.find(".//title"))
839	except:
840	pass
841	try:
842	docinfo['year']=getText(dom.find(".//date"))
843	except:
844	pass
845	return docinfo
846
847	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
848	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
849	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
850	if docinfo is None:
851	docinfo = {}
852	path=path.replace("/mpiwg/online","")
853	docinfo['imagePath'] = path
854	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
855
856	pathorig=path
857	for x in range(cut):
858	path=getParentDir(path)
859	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
860	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
861	docinfo['imageURL'] = imageUrl
862
863	#path ist the path to the images it assumes that the index.meta file is one level higher.
864	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
865	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
866	return docinfo
867
868
869	def getDocinfo(self, mode, url):
870	"""returns docinfo depending on mode"""
871	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
872	# look for cached docinfo in session
873	if self.REQUEST.SESSION.has_key('docinfo'):
874	docinfo = self.REQUEST.SESSION['docinfo']
875	# check if its still current
876	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
877	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
878	return docinfo
879	# new docinfo
880	docinfo = {'mode': mode, 'url': url}
881	if mode=="texttool": #index.meta with texttool information
882	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
883	elif mode=="imagepath":
884	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
885	elif mode=="filepath":
886	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
887	else:
888	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
889	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
890
891	# FIXME: fake texturlpath
892	if not docinfo.has_key('textURLPath'):
893	docinfo['textURLPath'] = None
894
895	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
896	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
897	self.REQUEST.SESSION['docinfo'] = docinfo
898	return docinfo
899
900	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
901	"""returns pageinfo with the given parameters"""
902	pageinfo = {}
903	current = getInt(current)
904
905	pageinfo['current'] = current
906	rows = int(rows or self.thumbrows)
907	pageinfo['rows'] = rows
908	cols = int(cols or self.thumbcols)
909	pageinfo['cols'] = cols
910	grpsize = cols * rows
911	pageinfo['groupsize'] = grpsize
912	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
913	# int(current / grpsize) * grpsize +1))
914	pageinfo['start'] = start
915	pageinfo['end'] = start + grpsize
916	if (docinfo is not None) and ('numPages' in docinfo):
917	np = int(docinfo['numPages'])
918	pageinfo['end'] = min(pageinfo['end'], np)
919	pageinfo['numgroups'] = int(np / grpsize)
920	if np % grpsize > 0:
921	pageinfo['numgroups'] += 1
922	pageinfo['viewMode'] = viewMode
923	pageinfo['tocMode'] = tocMode
924	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
925	#pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1')
926	pageinfo['query'] = self.REQUEST.get('query','')
927	pageinfo['queryType'] = self.REQUEST.get('queryType','')
928	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
929	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
930	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
931	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
932	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
933	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
934	toc = int (pageinfo['tocPN'])
935	pageinfo['textPages'] =int (toc)
936
937	if 'tocSize_%s'%tocMode in docinfo:
938	tocSize = int(docinfo['tocSize_%s'%tocMode])
939	tocPageSize = int(pageinfo['tocPageSize'])
940	# cached toc
941	if tocSize%tocPageSize>0:
942	tocPages=tocSize/tocPageSize+1
943	else:
944	tocPages=tocSize/tocPageSize
945	pageinfo['tocPN'] = min (tocPages,toc)
946	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
947	pageinfo['sn'] =self.REQUEST.get('sn','')
948	return pageinfo
949
950	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
951	"""init document viewer"""
952	self.title=title
953	self.digilibBaseUrl = digilibBaseUrl
954	self.thumbrows = thumbrows
955	self.thumbcols = thumbcols
956	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
957	if RESPONSE is not None:
958	RESPONSE.redirect('manage_main')
959
960	def manage_AddDocumentViewerForm(self):
961	"""add the viewer form"""
962	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
963	return pt()
964
965	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
966	"""add the viewer"""
967	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
968	self._setObject(id,newObj)
969
970	if RESPONSE is not None:
971	RESPONSE.redirect('manage_main')
972
973	## DocumentViewerTemplate class
974	class DocumentViewerTemplate(ZopePageTemplate):
975	"""Template for document viewer"""
976	meta_type="DocumentViewer Template"
977
978
979	def manage_addDocumentViewerTemplateForm(self):
980	"""Form for adding"""
981	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
982	return pt()
983
984	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
985	REQUEST=None, submit=None):
986	"Add a Page Template with optional file content."
987
988	self._setObject(id, DocumentViewerTemplate(id))
989	ob = getattr(self, id)
990	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
991	logging.info("txt %s:"%txt)
992	ob.pt_edit(txt,"text/html")
993	if title:
994	ob.pt_setTitle(title)
995	try:
996	u = self.DestinationURL()
997	except AttributeError:
998	u = REQUEST['URL1']
999
1000	u = "%s/%s" % (u, urllib.quote(id))
1001	REQUEST.RESPONSE.redirect(u+'/manage_main')
1002	return ''
1003
1004
1005

Note: See TracBrowser for help on using the repository browser.

Download in other formats: