Context Navigation

source: documentViewer/documentViewer.py @ 454:73e3273c7624

elementtree

Last change on this file since 454:73e3273c7624 was 454:73e3273c7624, checked in by casties, 13 years ago
more work
File size: 38.7 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	#from Ft.Xml import EMPTY_NAMESPACE, Parse
11	#import Ft.Xml.Domlette
12
13	import xml.etree.ElementTree as ET
14
15	import os.path
16	import sys
17	import urllib
18	import urllib2
19	import logging
20	import math
21	import urlparse
22	import re
23	import string
24
25	def logger(txt,method,txt2):
26	"""logging"""
27	logging.info(txt+ txt2)
28
29
30	def getInt(number, default=0):
31	"""returns always an int (0 in case of problems)"""
32	try:
33	return int(number)
34	except:
35	return int(default)
36
37	def getText(node):
38	"""get the cdata content of a node"""
39	if node is None:
40	return ""
41	# ET:
42	text = node.text or ""
43	for e in node:
44	text += gettext(e)
45	if e.tail:
46	text += e.tail
47
48	# 4Suite:
49	#nodelist=node.childNodes
50	#text = ""
51	#for n in nodelist:
52	# if n.nodeType == node.TEXT_NODE:
53	# text = text + n.data
54
55	return text
56
57	getTextFromNode = getText
58
59	def serializeNode(node, encoding="utf-8"):
60	"""returns a string containing node as XML"""
61	s = ET.tostring(node)
62
63	# 4Suite:
64	# stream = cStringIO.StringIO()
65	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
66	# s = stream.getvalue()
67	# stream.close()
68	return s
69
70	def browserCheck(self):
71	"""check the browsers request to find out the browser type"""
72	bt = {}
73	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
74	bt['ua'] = ua
75	bt['isIE'] = False
76	bt['isN4'] = False
77	bt['versFirefox']=""
78	bt['versIE']=""
79	bt['versSafariChrome']=""
80	bt['versOpera']=""
81
82	if string.find(ua, 'MSIE') > -1:
83	bt['isIE'] = True
84	else:
85	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
86	# Safari oder Chrome identification
87	try:
88	nav = ua[string.find(ua, '('):]
89	nav1=ua[string.find(ua,')'):]
90	nav2=nav1[string.find(nav1,'('):]
91	nav3=nav2[string.find(nav2,')'):]
92	ie = string.split(nav, "; ")[1]
93	ie1 =string.split(nav1, " ")[2]
94	ie2 =string.split(nav3, " ")[1]
95	ie3 =string.split(nav3, " ")[2]
96	if string.find(ie3, "Safari") >-1:
97	bt['versSafariChrome']=string.split(ie2, "/")[1]
98	except: pass
99	# IE identification
100	try:
101	nav = ua[string.find(ua, '('):]
102	ie = string.split(nav, "; ")[1]
103	if string.find(ie, "MSIE") > -1:
104	bt['versIE'] = string.split(ie, " ")[1]
105	except:pass
106	# Firefox identification
107	try:
108	nav = ua[string.find(ua, '('):]
109	nav1=ua[string.find(ua,')'):]
110	if string.find(ie1, "Firefox") >-1:
111	nav5= string.split(ie1, "/")[1]
112	logging.debug("FIREFOX: %s"%(nav5))
113	bt['versFirefox']=nav5[0:3]
114	except:pass
115	#Opera identification
116	try:
117	if string.find(ua,"Opera") >-1:
118	nav = ua[string.find(ua, '('):]
119	nav1=nav[string.find(nav,')'):]
120	bt['versOpera']=string.split(nav1,"/")[2]
121	except:pass
122
123	bt['isMac'] = string.find(ua, 'Macintosh') > -1
124	bt['isWin'] = string.find(ua, 'Windows') > -1
125	bt['isIEWin'] = bt['isIE'] and bt['isWin']
126	bt['isIEMac'] = bt['isIE'] and bt['isMac']
127	bt['staticHTML'] = False
128
129	return bt
130
131
132	def getParentDir(path):
133	"""returns pathname shortened by one"""
134	return '/'.join(path.split('/')[0:-1])
135
136
137	def getHttpData(url, data=None, num_tries=3, timeout=10):
138	"""returns result from url+data HTTP request"""
139	# we do GET (by appending data to url)
140	if isinstance(data, str) or isinstance(data, unicode):
141	# if data is string then append
142	url = "%s?%s"%(url,data)
143	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
144	# urlencode
145	url = "%s?%s"%(url,urllib.urlencode(data))
146
147	response = None
148	errmsg = None
149	for cnt in range(num_tries):
150	try:
151	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
152	if sys.version_info < (2, 6):
153	# set timeout on socket -- ugly :-(
154	import socket
155	socket.setdefaulttimeout(float(timeout))
156	response = urllib2.urlopen(url)
157	else:
158	response = urllib2.urlopen(url,timeout=float(timeout))
159	# check result?
160	break
161	except urllib2.HTTPError, e:
162	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
163	errmsg = str(e)
164	# stop trying
165	break
166	except urllib2.URLError, e:
167	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
168	errmsg = str(e)
169	# stop trying
170	#break
171
172	if response is not None:
173	data = response.read()
174	response.close()
175	return data
176
177	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
178	#return None
179
180	##
181	## documentViewer class
182	##
183	class documentViewer(Folder):
184	"""document viewer"""
185	meta_type="Document viewer"
186
187	security=ClassSecurityInfo()
188	manage_options=Folder.manage_options+(
189	{'label':'main config','action':'changeDocumentViewerForm'},
190	)
191
192	# templates and forms
193	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
194	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
195	toc_text = PageTemplateFile('zpt/toc_text', globals())
196	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
197	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
198	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
199	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
200	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
201	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
202	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
203	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
204	head_main = PageTemplateFile('zpt/head_main', globals())
205	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
206	info_xml = PageTemplateFile('zpt/info_xml', globals())
207
208
209	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
210	security.declareProtected('View management screens','changeDocumentViewerForm')
211	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
212
213
214	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
215	"""init document viewer"""
216	self.id=id
217	self.title=title
218	self.thumbcols = thumbcols
219	self.thumbrows = thumbrows
220	# authgroups is list of authorized groups (delimited by ,)
221	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
222	# create template folder so we can always use template.something
223
224	templateFolder = Folder('template')
225	#self['template'] = templateFolder # Zope-2.12 style
226	self._setObject('template',templateFolder) # old style
227	try:
228	import MpdlXmlTextServer
229	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
230	#templateFolder['fulltextclient'] = xmlRpcClient
231	templateFolder._setObject('fulltextclient',textServer)
232	except Exception, e:
233	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
234	try:
235	from Products.zogiLib.zogiLib import zogiLib
236	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
237	#templateFolder['zogilib'] = zogilib
238	templateFolder._setObject('zogilib',zogilib)
239	except Exception, e:
240	logging.error("Unable to create zogiLib for zogilib: "+str(e))
241
242
243	# proxy text server methods to fulltextclient
244	def getTextPage(self, **args):
245	"""get page"""
246	return self.template.fulltextclient.getTextPage(**args)
247
248	def getOrigPages(self, **args):
249	"""get page"""
250	return self.template.fulltextclient.getOrigPages(**args)
251
252	def getOrigPagesNorm(self, **args):
253	"""get page"""
254	return self.template.fulltextclient.getOrigPagesNorm(**args)
255
256	def getQuery(self, **args):
257	"""get query in search"""
258	return self.template.fulltextclient.getQuery(**args)
259
260	def getSearch(self, **args):
261	"""get search"""
262	return self.template.fulltextclient.getSearch(**args)
263
264	def getGisPlaces(self, **args):
265	"""get gis places"""
266	return self.template.fulltextclient.getGisPlaces(**args)
267
268	def getAllGisPlaces(self, **args):
269	"""get all gis places """
270	return self.template.fulltextclient.getAllGisPlaces(**args)
271
272	def getTranslate(self, **args):
273	"""get translate"""
274	return self.template.fulltextclient.getTranslate(**args)
275
276	def getLemma(self, **args):
277	"""get lemma"""
278	return self.template.fulltextclient.getLemma(**args)
279
280	def getLemmaQuery(self, **args):
281	"""get query"""
282	return self.template.fulltextclient.getLemmaQuery(**args)
283
284	def getLex(self, **args):
285	"""get lex"""
286	return self.template.fulltextclient.getLex(**args)
287
288	def getToc(self, **args):
289	"""get toc"""
290	return self.template.fulltextclient.getToc(**args)
291
292	def getTocPage(self, **args):
293	"""get tocpage"""
294	return self.template.fulltextclient.getTocPage(**args)
295
296
297	security.declareProtected('View','thumbs_rss')
298	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
299	'''
300	view it
301	@param mode: defines how to access the document behind url
302	@param url: url which contains display information
303	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
304
305	'''
306	logging.debug("HHHHHHHHHHHHHH:load the rss")
307	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
308
309	if not hasattr(self, 'template'):
310	# create template folder if it doesn't exist
311	self.manage_addFolder('template')
312
313	if not self.digilibBaseUrl:
314	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
315
316	docinfo = self.getDocinfo(mode=mode,url=url)
317	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
318	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
319	''' ZDES '''
320	pt = getattr(self.template, 'thumbs_main_rss')
321
322	if viewMode=="auto": # automodus gewaehlt
323	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
324	viewMode="text"
325	else:
326	viewMode="images"
327
328	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
329
330	security.declareProtected('View','index_html')
331	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
332	'''
333	view it
334	@param mode: defines how to access the document behind url
335	@param url: url which contains display information
336	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
337	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
338	@param characterNormalization type of text display (reg, norm, none)
339	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
340	'''
341
342	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
343
344	if not hasattr(self, 'template'):
345	# this won't work
346	logging.error("template folder missing!")
347	return "ERROR: template folder missing!"
348
349	if not getattr(self, 'digilibBaseUrl', None):
350	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
351
352	docinfo = self.getDocinfo(mode=mode,url=url)
353
354	if tocMode != "thumbs":
355	# get table of contents
356	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
357
358	if viewMode=="auto": # automodus gewaehlt
359	if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
360	viewMode="text_dict"
361	else:
362	viewMode="images"
363
364	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
365
366	if (docinfo.get('textURLPath',None)):
367	page = self.getTextPage(mode=viewMode, docinfo=docinfo, pageinfo=pageinfo)
368	pageinfo['textPage'] = page
369	tt = getattr(self, 'template')
370	pt = getattr(tt, 'viewer_main')
371	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
372
373	def generateMarks(self,mk):
374	ret=""
375	if mk is None:
376	return ""
377	if not isinstance(mk, list):
378	mk=[mk]
379	for m in mk:
380	ret+="mk=%s"%m
381	return ret
382
383
384	def getBrowser(self):
385	"""getBrowser the version of browser """
386	bt = browserCheck(self)
387	logging.debug("BROWSER VERSION: %s"%(bt))
388	return bt
389
390	def findDigilibUrl(self):
391	"""try to get the digilib URL from zogilib"""
392	url = self.template.zogilib.getDLBaseUrl()
393	return url
394
395	def getDocumentViewerURL(self):
396	"""returns the URL of this instance"""
397	return self.absolute_url()
398
399	def getStyle(self, idx, selected, style=""):
400	"""returns a string with the given style and append 'sel' if path == selected."""
401	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
402	if idx == selected:
403	return style + 'sel'
404	else:
405	return style
406
407	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
408	"""returns URL to documentviewer with parameter param set to val or from dict params"""
409	# copy existing request params
410	urlParams=self.REQUEST.form.copy()
411	# change single param
412	if param is not None:
413	if val is None:
414	if urlParams.has_key(param):
415	del urlParams[param]
416	else:
417	urlParams[param] = str(val)
418
419	# change more params
420	if params is not None:
421	for k in params.keys():
422	v = params[k]
423	if v is None:
424	# val=None removes param
425	if urlParams.has_key(k):
426	del urlParams[k]
427
428	else:
429	urlParams[k] = v
430
431	# FIXME: does this belong here?
432	if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
433	urlParams["mode"] = "imagepath"
434	urlParams["url"] = getParentDir(urlParams["url"])
435
436	# quote values and assemble into query string (not escaping '/')
437	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
438	#ps = urllib.urlencode(urlParams)
439	if baseUrl is None:
440	baseUrl = self.REQUEST['URL1']
441
442	url = "%s?%s"%(baseUrl, ps)
443	return url
444
445
446	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
447	"""link to documentviewer with parameter param set to val"""
448	return self.getLink(param, val, params, baseUrl, '&')
449
450	def getInfo_xml(self,url,mode):
451	"""returns info about the document as XML"""
452
453	if not self.digilibBaseUrl:
454	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
455
456	docinfo = self.getDocinfo(mode=mode,url=url)
457	pt = getattr(self.template, 'info_xml')
458	return pt(docinfo=docinfo)
459
460	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
461	"""returns new option state"""
462	if not self.REQUEST.SESSION.has_key(optionName):
463	# not in session -- initial
464	opt = {'lastState': newState, 'state': initialState}
465	else:
466	opt = self.REQUEST.SESSION.get(optionName)
467	if opt['lastState'] != newState:
468	# state in session has changed -- toggle
469	opt['state'] = not opt['state']
470	opt['lastState'] = newState
471
472	self.REQUEST.SESSION[optionName] = opt
473	return opt['state']
474
475	def isAccessible(self, docinfo):
476	"""returns if access to the resource is granted"""
477	access = docinfo.get('accessType', None)
478	logging.debug("documentViewer (accessOK) access type %s"%access)
479	if access is not None and access == 'free':
480	logging.debug("documentViewer (accessOK) access is free")
481	return True
482	elif access is None or access in self.authgroups:
483	# only local access -- only logged in users
484	user = getSecurityManager().getUser()
485	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
486	if user is not None:
487	#print "user: ", user
488	return (user.getUserName() != "Anonymous User")
489	else:
490	return False
491
492	logging.error("documentViewer (accessOK) unknown access type %s"%access)
493	return False
494
495
496	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
497	"""gibt param von dlInfo aus"""
498	if docinfo is None:
499	docinfo = {}
500
501	for x in range(cut):
502	path=getParentDir(path)
503
504	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
505
506	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
507
508	txt = getHttpData(infoUrl)
509	if txt is None:
510	raise IOError("Unable to get dir-info from %s"%(infoUrl))
511
512	dom = ET.fromstring(txt)
513	#dom = Parse(txt)
514	size=getText(dom.find("size"))
515	#sizes=dom.xpath("//dir/size")
516	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)
517
518	if size:
519	docinfo['numPages'] = int(size)
520	else:
521	docinfo['numPages'] = 0
522
523	# TODO: produce and keep list of image names and numbers
524
525	return docinfo
526
527	def getIndexMetaPath(self,url):
528	"""gib nur den Pfad zurueck"""
529	regexp = re.compile(r".(experimental\|permanent)/(.)")
530	regpath = regexp.match(url)
531	if (regpath==None):
532	return ""
533	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
534	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
535
536
537
538	def getIndexMetaUrl(self,url):
539	"""returns utr of index.meta document at url"""
540
541	metaUrl = None
542	if url.startswith("http://"):
543	# real URL
544	metaUrl = url
545	else:
546	# online path
547	server=self.digilibBaseUrl+"/servlet/Texter?fn="
548	metaUrl=server+url.replace("/mpiwg/online","")
549	if not metaUrl.endswith("index.meta"):
550	metaUrl += "/index.meta"
551
552	return metaUrl
553
554	def getDomFromIndexMeta(self, url):
555	"""get dom from index meta"""
556	dom = None
557	metaUrl = self.getIndexMetaUrl(url)
558
559	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
560	txt=getHttpData(metaUrl)
561	if txt is None:
562	raise IOError("Unable to read index meta from %s"%(url))
563
564	dom = ET.fromstring(txt)
565	#dom = Parse(txt)
566	return dom
567
568	def getPresentationInfoXML(self, url):
569	"""returns dom of info.xml document at url"""
570	dom = None
571	metaUrl = None
572	if url.startswith("http://"):
573	# real URL
574	metaUrl = url
575	else:
576	# online path
577	server=self.digilibBaseUrl+"/servlet/Texter?fn="
578	metaUrl=server+url.replace("/mpiwg/online","")
579
580	txt=getHttpData(metaUrl)
581	if txt is None:
582	raise IOError("Unable to read infoXMLfrom %s"%(url))
583
584	dom = ET.fromstring(txt)
585	#dom = Parse(txt)
586	return dom
587
588
589	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
590	"""gets authorization info from the index.meta file at path or given by dom"""
591	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
592
593	access = None
594
595	if docinfo is None:
596	docinfo = {}
597
598	if dom is None:
599	for x in range(cut):
600	path=getParentDir(path)
601	dom = self.getDomFromIndexMeta(path)
602
603	acc = dom.find(".//access-conditions/access")
604	if acc is not None:
605	acctype = acc.get('type')
606	#acctype = dom.xpath("//access-conditions/access/@type")
607	if acctype:
608	access=acctype
609	if access in ['group', 'institution']:
610	access = dom.find(".//access-conditions/access/name").text.lower()
611
612	docinfo['accessType'] = access
613	return docinfo
614
615
616	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
617	"""gets bibliographical info from the index.meta file at path or given by dom"""
618	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
619
620	if docinfo is None:
621	docinfo = {}
622
623	if dom is None:
624	for x in range(cut):
625	path=getParentDir(path)
626	dom = self.getDomFromIndexMeta(path)
627
628	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
629
630	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
631	# put in all raw bib fields as dict "bib"
632	bib = dom.find(".//bib")
633	#bib = dom.xpath("//bib/*")
634	if bib is not None:
635	bibinfo = {}
636	for e in bib:
637	bibinfo[e.tag] = getText(e)
638
639	docinfo['bib'] = bibinfo
640
641	# extract some fields (author, title, year) according to their mapping
642	metaData=self.metadata.main.meta.bib
643	bibtype=bib.get("type")
644	#bibtype=dom.xpath("//bib/@type")
645	if not bibtype:
646	bibtype="generic"
647
648	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
649	docinfo['bib_type'] = bibtype
650	bibmap=metaData.generateMappingForType(bibtype)
651	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
652	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
653	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
654	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0 or len(bibmap['title'][0]) > 0:
655	try:
656	docinfo['author']=getText(bib.find(bibmap['author'][0]))
657	except: pass
658	try:
659	docinfo['title']=getText(bib.find(bibmap['title'][0]))
660	except: pass
661	try:
662	docinfo['year']=getText(bib.find(bibmap['year'][0]))
663	except: pass
664
665	# ROC: why is this here?
666	# logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
667	# try:
668	# docinfo['lang']=getTextFromNode(dom.find(".//bib/lang")[0])
669	# except:
670	# docinfo['lang']=''
671	# try:
672	# docinfo['city']=getTextFromNode(dom.find(".//bib/city")[0])
673	# except:
674	# docinfo['city']=''
675	# try:
676	# docinfo['number_of_pages']=getTextFromNode(dom.find(".//bib/number_of_pages")[0])
677	# except:
678	# docinfo['number_of_pages']=''
679	# try:
680	# docinfo['series_volume']=getTextFromNode(dom.find(".//bib/series_volume")[0])
681	# except:
682	# docinfo['series_volume']=''
683	# try:
684	# docinfo['number_of_volumes']=getTextFromNode(dom.find(".//bib/number_of_volumes")[0])
685	# except:
686	# docinfo['number_of_volumes']=''
687	# try:
688	# docinfo['translator']=getTextFromNode(dom.find(".//bib/translator")[0])
689	# except:
690	# docinfo['translator']=''
691	# try:
692	# docinfo['edition']=getTextFromNode(dom.find(".//bib/edition")[0])
693	# except:
694	# docinfo['edition']=''
695	# try:
696	# docinfo['series_author']=getTextFromNode(dom.find(".//bib/series_author")[0])
697	# except:
698	# docinfo['series_author']=''
699	# try:
700	# docinfo['publisher']=getTextFromNode(dom.find(".//bib/publisher")[0])
701	# except:
702	# docinfo['publisher']=''
703	# try:
704	# docinfo['series_title']=getTextFromNode(dom.find(".//bib/series_title")[0])
705	# except:
706	# docinfo['series_title']=''
707	# try:
708	# docinfo['isbn_issn']=getTextFromNode(dom.find(".//bib/isbn_issn")[0])
709	# except:
710	# docinfo['isbn_issn']=''
711	return docinfo
712
713
714	# TODO: is this needed?
715	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
716	"""gets name info from the index.meta file at path or given by dom"""
717	if docinfo is None:
718	docinfo = {}
719
720	if dom is None:
721	for x in range(cut):
722	path=getParentDir(path)
723	dom = self.getDomFromIndexMeta(path)
724
725	docinfo['name']=getText(dom.find("name"))
726	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
727	return docinfo
728
729	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
730	"""parse texttool tag in index meta"""
731	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
732	if docinfo is None:
733	docinfo = {}
734	if docinfo.get('lang', None) is None:
735	docinfo['lang'] = '' # default keine Sprache gesetzt
736	if dom is None:
737	dom = self.getDomFromIndexMeta(url)
738
739	archivePath = None
740	archiveName = None
741
742	archiveName = getText(dom.find("name"))
743	if not archiveName:
744	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
745
746	archivePath = getText(dom.find("archive-path"))
747	if archivePath:
748	# clean up archive path
749	if archivePath[0] != '/':
750	archivePath = '/' + archivePath
751	if archiveName and (not archivePath.endswith(archiveName)):
752	archivePath += "/" + archiveName
753	else:
754	# try to get archive-path from url
755	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
756	if (not url.startswith('http')):
757	archivePath = url.replace('index.meta', '')
758
759	if archivePath is None:
760	# we balk without archive-path
761	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
762
763	imageDir = getText(dom.find(".//texttool/image"))
764
765	if not imageDir:
766	# we balk with no image tag / not necessary anymore because textmode is now standard
767	#raise IOError("No text-tool info in %s"%(url))
768	imageDir = ""
769	#xquery="//pb"
770	docinfo['imagePath'] = "" # keine Bilder
771	docinfo['imageURL'] = ""
772
773	if imageDir and archivePath:
774	#print "image: ", imageDir, " archivepath: ", archivePath
775	imageDir = os.path.join(archivePath, imageDir)
776	imageDir = imageDir.replace("/mpiwg/online", '')
777	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
778	docinfo['imagePath'] = imageDir
779
780	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
781
782	viewerUrl = getText(dom.find(".//texttool/digiliburlprefix"))
783	if viewerUrl:
784	docinfo['viewerURL'] = viewerUrl
785
786	# old style text URL
787	textUrl = getText(dom.find(".//texttool/text"))
788	if textUrl:
789	if urlparse.urlparse(textUrl)[0] == "": #keine url
790	textUrl = os.path.join(archivePath, textUrl)
791	# fix URLs starting with /mpiwg/online
792	if textUrl.startswith("/mpiwg/online"):
793	textUrl = textUrl.replace("/mpiwg/online", '', 1)
794
795	docinfo['textURL'] = textUrl
796
797	# new style text-url-path
798	textUrl = getText(dom.find(".//texttool/text-url-path"))
799	if textUrl:
800	docinfo['textURLPath'] = textUrl
801	textUrlkurz = string.split(textUrl, ".")[0]
802	docinfo['textURLPathkurz'] = textUrlkurz
803	#if not docinfo['imagePath']:
804	# text-only, no page images
805	#docinfo = self.getNumTextPages(docinfo)
806
807
808	presentationUrl = getText(dom.find(".//texttool/presentation"))
809	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
810	# TODO: is this needed here?
811	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
812
813
814	if presentationUrl: # ueberschreibe diese durch presentation informationen
815	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
816	# durch den relativen Pfad auf die presentation infos
817	presentationPath = presentationUrl
818	if url.endswith("index.meta"):
819	presentationUrl = url.replace('index.meta', presentationPath)
820	else:
821	presentationUrl = url + "/" + presentationPath
822
823	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
824
825	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
826
827	return docinfo
828
829
830	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
831	"""gets the bibliographical information from the preseantion entry in texttools
832	"""
833	dom=self.getPresentationInfoXML(url)
834	docinfo['author']=getText(dom.find(".//author"))
835	docinfo['title']=getText(dom.find(".//title"))
836	docinfo['year']=getText(dom.find(".//date"))
837	return docinfo
838
839	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
840	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
841	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
842	if docinfo is None:
843	docinfo = {}
844	path=path.replace("/mpiwg/online","")
845	docinfo['imagePath'] = path
846	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
847
848	pathorig=path
849	for x in range(cut):
850	path=getParentDir(path)
851	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
852	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
853	docinfo['imageURL'] = imageUrl
854
855	#path ist the path to the images it assumes that the index.meta file is one level higher.
856	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
857	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
858	return docinfo
859
860
861	def getDocinfo(self, mode, url):
862	"""returns docinfo depending on mode"""
863	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
864	# look for cached docinfo in session
865	if self.REQUEST.SESSION.has_key('docinfo'):
866	docinfo = self.REQUEST.SESSION['docinfo']
867	# check if its still current
868	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
869	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
870	return docinfo
871	# new docinfo
872	docinfo = {'mode': mode, 'url': url}
873	if mode=="texttool": #index.meta with texttool information
874	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
875	elif mode=="imagepath":
876	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
877	elif mode=="filepath":
878	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
879	else:
880	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
881	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
882
883	# FIXME: fake texturlpath
884	if not docinfo.has_key('textURLPath'):
885	docinfo['textURLPath'] = None
886
887	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
888	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
889	self.REQUEST.SESSION['docinfo'] = docinfo
890	return docinfo
891
892	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
893	"""returns pageinfo with the given parameters"""
894	pageinfo = {}
895	current = getInt(current)
896
897	pageinfo['current'] = current
898	rows = int(rows or self.thumbrows)
899	pageinfo['rows'] = rows
900	cols = int(cols or self.thumbcols)
901	pageinfo['cols'] = cols
902	grpsize = cols * rows
903	pageinfo['groupsize'] = grpsize
904	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
905	# int(current / grpsize) * grpsize +1))
906	pageinfo['start'] = start
907	pageinfo['end'] = start + grpsize
908	if (docinfo is not None) and ('numPages' in docinfo):
909	np = int(docinfo['numPages'])
910	pageinfo['end'] = min(pageinfo['end'], np)
911	pageinfo['numgroups'] = int(np / grpsize)
912	if np % grpsize > 0:
913	pageinfo['numgroups'] += 1
914	pageinfo['viewMode'] = viewMode
915	pageinfo['tocMode'] = tocMode
916	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
917	#pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1')
918	pageinfo['query'] = self.REQUEST.get('query','')
919	pageinfo['queryType'] = self.REQUEST.get('queryType','')
920	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
921	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
922	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
923	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
924	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
925	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
926	toc = int (pageinfo['tocPN'])
927	pageinfo['textPages'] =int (toc)
928
929	if 'tocSize_%s'%tocMode in docinfo:
930	tocSize = int(docinfo['tocSize_%s'%tocMode])
931	tocPageSize = int(pageinfo['tocPageSize'])
932	# cached toc
933	if tocSize%tocPageSize>0:
934	tocPages=tocSize/tocPageSize+1
935	else:
936	tocPages=tocSize/tocPageSize
937	pageinfo['tocPN'] = min (tocPages,toc)
938	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
939	pageinfo['sn'] =self.REQUEST.get('sn','')
940	return pageinfo
941
942	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
943	"""init document viewer"""
944	self.title=title
945	self.digilibBaseUrl = digilibBaseUrl
946	self.thumbrows = thumbrows
947	self.thumbcols = thumbcols
948	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
949	if RESPONSE is not None:
950	RESPONSE.redirect('manage_main')
951
952	def manage_AddDocumentViewerForm(self):
953	"""add the viewer form"""
954	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
955	return pt()
956
957	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
958	"""add the viewer"""
959	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
960	self._setObject(id,newObj)
961
962	if RESPONSE is not None:
963	RESPONSE.redirect('manage_main')
964
965	## DocumentViewerTemplate class
966	class DocumentViewerTemplate(ZopePageTemplate):
967	"""Template for document viewer"""
968	meta_type="DocumentViewer Template"
969
970
971	def manage_addDocumentViewerTemplateForm(self):
972	"""Form for adding"""
973	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
974	return pt()
975
976	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
977	REQUEST=None, submit=None):
978	"Add a Page Template with optional file content."
979
980	self._setObject(id, DocumentViewerTemplate(id))
981	ob = getattr(self, id)
982	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
983	logging.info("txt %s:"%txt)
984	ob.pt_edit(txt,"text/html")
985	if title:
986	ob.pt_setTitle(title)
987	try:
988	u = self.DestinationURL()
989	except AttributeError:
990	u = REQUEST['URL1']
991
992	u = "%s/%s" % (u, urllib.quote(id))
993	REQUEST.RESPONSE.redirect(u+'/manage_main')
994	return ''
995
996
997

Note: See TracBrowser for help on using the repository browser.

Download in other formats: