Context Navigation

source: documentViewer/documentViewer.py @ 459:aabfa6124cfb

elementtree

Last change on this file since 459:aabfa6124cfb was 459:aabfa6124cfb, checked in by casties, 13 years ago
nicer comments
File size: 37.1 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from AccessControl import ClassSecurityInfo
5	from AccessControl import getSecurityManager
6	from Globals import package_home
7
8	#from Ft.Xml import EMPTY_NAMESPACE, Parse
9	#import Ft.Xml.Domlette
10
11	import xml.etree.ElementTree as ET
12
13	import os.path
14	import sys
15	import urllib
16	import logging
17	import math
18	import urlparse
19	import re
20	import string
21
22	from SrvTxtUtils import getInt, getText, getHttpData
23
24	def logger(txt,method,txt2):
25	"""logging"""
26	logging.info(txt+ txt2)
27
28
29	def serializeNode(node, encoding="utf-8"):
30	"""returns a string containing node as XML"""
31	s = ET.tostring(node)
32
33	# 4Suite:
34	# stream = cStringIO.StringIO()
35	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
36	# s = stream.getvalue()
37	# stream.close()
38	return s
39
40	def browserCheck(self):
41	"""check the browsers request to find out the browser type"""
42	bt = {}
43	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
44	bt['ua'] = ua
45	bt['isIE'] = False
46	bt['isN4'] = False
47	bt['versFirefox']=""
48	bt['versIE']=""
49	bt['versSafariChrome']=""
50	bt['versOpera']=""
51
52	if string.find(ua, 'MSIE') > -1:
53	bt['isIE'] = True
54	else:
55	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
56	# Safari oder Chrome identification
57	try:
58	nav = ua[string.find(ua, '('):]
59	nav1=ua[string.find(ua,')'):]
60	nav2=nav1[string.find(nav1,'('):]
61	nav3=nav2[string.find(nav2,')'):]
62	ie = string.split(nav, "; ")[1]
63	ie1 =string.split(nav1, " ")[2]
64	ie2 =string.split(nav3, " ")[1]
65	ie3 =string.split(nav3, " ")[2]
66	if string.find(ie3, "Safari") >-1:
67	bt['versSafariChrome']=string.split(ie2, "/")[1]
68	except: pass
69	# IE identification
70	try:
71	nav = ua[string.find(ua, '('):]
72	ie = string.split(nav, "; ")[1]
73	if string.find(ie, "MSIE") > -1:
74	bt['versIE'] = string.split(ie, " ")[1]
75	except:pass
76	# Firefox identification
77	try:
78	nav = ua[string.find(ua, '('):]
79	nav1=ua[string.find(ua,')'):]
80	if string.find(ie1, "Firefox") >-1:
81	nav5= string.split(ie1, "/")[1]
82	logging.debug("FIREFOX: %s"%(nav5))
83	bt['versFirefox']=nav5[0:3]
84	except:pass
85	#Opera identification
86	try:
87	if string.find(ua,"Opera") >-1:
88	nav = ua[string.find(ua, '('):]
89	nav1=nav[string.find(nav,')'):]
90	bt['versOpera']=string.split(nav1,"/")[2]
91	except:pass
92
93	bt['isMac'] = string.find(ua, 'Macintosh') > -1
94	bt['isWin'] = string.find(ua, 'Windows') > -1
95	bt['isIEWin'] = bt['isIE'] and bt['isWin']
96	bt['isIEMac'] = bt['isIE'] and bt['isMac']
97	bt['staticHTML'] = False
98
99	return bt
100
101	def getParentDir(path):
102	"""returns pathname shortened by one"""
103	return '/'.join(path.split('/')[0:-1])
104
105	def getBibdataFromDom(dom):
106	"""returns dict with all elements from bib-tag"""
107	bibinfo = {}
108	bib = dom.find(".//meta/bib")
109	if bib is not None:
110	# put type in @type
111	type = bib.get('type')
112	bibinfo['@type'] = type
113	# put all subelements in dict
114	for e in bib:
115	bibinfo[e.tag] = getText(e)
116
117	return bibinfo
118
119	##
120	## documentViewer class
121	##
122	class documentViewer(Folder):
123	"""document viewer"""
124	meta_type="Document viewer"
125
126	security=ClassSecurityInfo()
127	manage_options=Folder.manage_options+(
128	{'label':'main config','action':'changeDocumentViewerForm'},
129	)
130
131	# templates and forms
132	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
133	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
134	toc_text = PageTemplateFile('zpt/toc_text', globals())
135	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
136	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
137	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
138	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
139	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
140	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
141	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
142	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
143	head_main = PageTemplateFile('zpt/head_main', globals())
144	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
145	info_xml = PageTemplateFile('zpt/info_xml', globals())
146
147
148	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
149	security.declareProtected('View management screens','changeDocumentViewerForm')
150	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
151
152
153	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
154	"""init document viewer"""
155	self.id=id
156	self.title=title
157	self.thumbcols = thumbcols
158	self.thumbrows = thumbrows
159	# authgroups is list of authorized groups (delimited by ,)
160	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
161	# create template folder so we can always use template.something
162
163	templateFolder = Folder('template')
164	#self['template'] = templateFolder # Zope-2.12 style
165	self._setObject('template',templateFolder) # old style
166	try:
167	import MpdlXmlTextServer
168	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
169	#templateFolder['fulltextclient'] = xmlRpcClient
170	templateFolder._setObject('fulltextclient',textServer)
171	except Exception, e:
172	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
173	try:
174	from Products.zogiLib.zogiLib import zogiLib
175	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
176	#templateFolder['zogilib'] = zogilib
177	templateFolder._setObject('zogilib',zogilib)
178	except Exception, e:
179	logging.error("Unable to create zogiLib for zogilib: "+str(e))
180
181
182	# proxy text server methods to fulltextclient
183	def getTextPage(self, **args):
184	"""get page"""
185	return self.template.fulltextclient.getTextPage(**args)
186
187	def getOrigPages(self, **args):
188	"""get page"""
189	return self.template.fulltextclient.getOrigPages(**args)
190
191	def getOrigPagesNorm(self, **args):
192	"""get page"""
193	return self.template.fulltextclient.getOrigPagesNorm(**args)
194
195	def getQuery(self, **args):
196	"""get query in search"""
197	return self.template.fulltextclient.getQuery(**args)
198
199	def getSearch(self, **args):
200	"""get search"""
201	return self.template.fulltextclient.getSearch(**args)
202
203	def getGisPlaces(self, **args):
204	"""get gis places"""
205	return self.template.fulltextclient.getGisPlaces(**args)
206
207	def getAllGisPlaces(self, **args):
208	"""get all gis places """
209	return self.template.fulltextclient.getAllGisPlaces(**args)
210
211	def getTranslate(self, **args):
212	"""get translate"""
213	return self.template.fulltextclient.getTranslate(**args)
214
215	def getLemma(self, **args):
216	"""get lemma"""
217	return self.template.fulltextclient.getLemma(**args)
218
219	def getLemmaQuery(self, **args):
220	"""get query"""
221	return self.template.fulltextclient.getLemmaQuery(**args)
222
223	def getLex(self, **args):
224	"""get lex"""
225	return self.template.fulltextclient.getLex(**args)
226
227	def getToc(self, **args):
228	"""get toc"""
229	return self.template.fulltextclient.getToc(**args)
230
231	def getTocPage(self, **args):
232	"""get tocpage"""
233	return self.template.fulltextclient.getTocPage(**args)
234
235
236	security.declareProtected('View','thumbs_rss')
237	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
238	'''
239	view it
240	@param mode: defines how to access the document behind url
241	@param url: url which contains display information
242	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
243
244	'''
245	logging.debug("HHHHHHHHHHHHHH:load the rss")
246	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
247
248	if not hasattr(self, 'template'):
249	# create template folder if it doesn't exist
250	self.manage_addFolder('template')
251
252	if not self.digilibBaseUrl:
253	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
254
255	docinfo = self.getDocinfo(mode=mode,url=url)
256	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
257	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
258	''' ZDES '''
259	pt = getattr(self.template, 'thumbs_main_rss')
260
261	if viewMode=="auto": # automodus gewaehlt
262	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
263	viewMode="text"
264	else:
265	viewMode="images"
266
267	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
268
269	security.declareProtected('View','index_html')
270	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
271	'''
272	view it
273	@param mode: defines how to access the document behind url
274	@param url: url which contains display information
275	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
276	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
277	@param characterNormalization type of text display (reg, norm, none)
278	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
279	'''
280
281	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
282
283	if not hasattr(self, 'template'):
284	# this won't work
285	logging.error("template folder missing!")
286	return "ERROR: template folder missing!"
287
288	if not getattr(self, 'digilibBaseUrl', None):
289	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
290
291	docinfo = self.getDocinfo(mode=mode,url=url)
292
293	if tocMode != "thumbs":
294	# get table of contents
295	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
296
297	# auto viewMode: text_dict if text else images
298	if viewMode=="auto":
299	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
300	#texturl gesetzt und textViewer konfiguriert
301	viewMode="text_dict"
302	else:
303	viewMode="images"
304
305	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, tocMode=tocMode)
306
307	if viewMode != 'images' and docinfo.get('textURLPath', None):
308	# get full text page
309	page = self.getTextPage(mode=viewMode, pn=pn, docinfo=docinfo, pageinfo=pageinfo)
310	pageinfo['textPage'] = page
311
312	# get template /template/viewer_main
313	pt = getattr(self.template, 'viewer_main')
314	# and execute with parameters
315	return pt(docinfo=docinfo, pageinfo=pageinfo, viewMode=viewMode, mk=self.generateMarks(mk))
316
317	def generateMarks(self,mk):
318	ret=""
319	if mk is None:
320	return ""
321	if not isinstance(mk, list):
322	mk=[mk]
323	for m in mk:
324	ret+="mk=%s"%m
325	return ret
326
327
328	def getBrowser(self):
329	"""getBrowser the version of browser """
330	bt = browserCheck(self)
331	logging.debug("BROWSER VERSION: %s"%(bt))
332	return bt
333
334	def findDigilibUrl(self):
335	"""try to get the digilib URL from zogilib"""
336	url = self.template.zogilib.getDLBaseUrl()
337	return url
338
339	def getDocumentViewerURL(self):
340	"""returns the URL of this instance"""
341	return self.absolute_url()
342
343	def getStyle(self, idx, selected, style=""):
344	"""returns a string with the given style and append 'sel' if path == selected."""
345	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
346	if idx == selected:
347	return style + 'sel'
348	else:
349	return style
350
351	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
352	"""returns URL to documentviewer with parameter param set to val or from dict params"""
353	# copy existing request params
354	urlParams=self.REQUEST.form.copy()
355	# change single param
356	if param is not None:
357	if val is None:
358	if urlParams.has_key(param):
359	del urlParams[param]
360	else:
361	urlParams[param] = str(val)
362
363	# change more params
364	if params is not None:
365	for k in params.keys():
366	v = params[k]
367	if v is None:
368	# val=None removes param
369	if urlParams.has_key(k):
370	del urlParams[k]
371
372	else:
373	urlParams[k] = v
374
375	# FIXME: does this belong here?
376	if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
377	urlParams["mode"] = "imagepath"
378	urlParams["url"] = getParentDir(urlParams["url"])
379
380	# quote values and assemble into query string (not escaping '/')
381	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
382	#ps = urllib.urlencode(urlParams)
383	if baseUrl is None:
384	baseUrl = self.REQUEST['URL1']
385
386	url = "%s?%s"%(baseUrl, ps)
387	return url
388
389
390	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
391	"""link to documentviewer with parameter param set to val"""
392	return self.getLink(param, val, params, baseUrl, '&')
393
394	def getInfo_xml(self,url,mode):
395	"""returns info about the document as XML"""
396
397	if not self.digilibBaseUrl:
398	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
399
400	docinfo = self.getDocinfo(mode=mode,url=url)
401	pt = getattr(self.template, 'info_xml')
402	return pt(docinfo=docinfo)
403
404	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
405	"""returns new option state"""
406	if not self.REQUEST.SESSION.has_key(optionName):
407	# not in session -- initial
408	opt = {'lastState': newState, 'state': initialState}
409	else:
410	opt = self.REQUEST.SESSION.get(optionName)
411	if opt['lastState'] != newState:
412	# state in session has changed -- toggle
413	opt['state'] = not opt['state']
414	opt['lastState'] = newState
415
416	self.REQUEST.SESSION[optionName] = opt
417	return opt['state']
418
419	def isAccessible(self, docinfo):
420	"""returns if access to the resource is granted"""
421	access = docinfo.get('accessType', None)
422	logging.debug("documentViewer (accessOK) access type %s"%access)
423	if access is not None and access == 'free':
424	logging.debug("documentViewer (accessOK) access is free")
425	return True
426	elif access is None or access in self.authgroups:
427	# only local access -- only logged in users
428	user = getSecurityManager().getUser()
429	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
430	if user is not None:
431	#print "user: ", user
432	return (user.getUserName() != "Anonymous User")
433	else:
434	return False
435
436	logging.error("documentViewer (accessOK) unknown access type %s"%access)
437	return False
438
439
440	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
441	"""gibt param von dlInfo aus"""
442	if docinfo is None:
443	docinfo = {}
444
445	for x in range(cut):
446	path=getParentDir(path)
447
448	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
449
450	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
451
452	txt = getHttpData(infoUrl)
453	if txt is None:
454	raise IOError("Unable to get dir-info from %s"%(infoUrl))
455
456	dom = ET.fromstring(txt)
457	#dom = Parse(txt)
458	size=getText(dom.find("size"))
459	#sizes=dom.xpath("//dir/size")
460	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)
461
462	if size:
463	docinfo['numPages'] = int(size)
464	else:
465	docinfo['numPages'] = 0
466
467	# TODO: produce and keep list of image names and numbers
468
469	return docinfo
470
471	def getIndexMetaPath(self,url):
472	"""gib nur den Pfad zurueck"""
473	regexp = re.compile(r".(experimental\|permanent)/(.)")
474	regpath = regexp.match(url)
475	if (regpath==None):
476	return ""
477	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
478	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
479
480
481
482	def getIndexMetaUrl(self,url):
483	"""returns utr of index.meta document at url"""
484
485	metaUrl = None
486	if url.startswith("http://"):
487	# real URL
488	metaUrl = url
489	else:
490	# online path
491	server=self.digilibBaseUrl+"/servlet/Texter?fn="
492	metaUrl=server+url.replace("/mpiwg/online","")
493	if not metaUrl.endswith("index.meta"):
494	metaUrl += "/index.meta"
495
496	return metaUrl
497
498	def getDomFromIndexMeta(self, url):
499	"""get dom from index meta"""
500	dom = None
501	metaUrl = self.getIndexMetaUrl(url)
502
503	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
504	txt=getHttpData(metaUrl)
505	if txt is None:
506	raise IOError("Unable to read index meta from %s"%(url))
507
508	dom = ET.fromstring(txt)
509	#dom = Parse(txt)
510	return dom
511
512	def getPresentationInfoXML(self, url):
513	"""returns dom of info.xml document at url"""
514	dom = None
515	metaUrl = None
516	if url.startswith("http://"):
517	# real URL
518	metaUrl = url
519	else:
520	# online path
521	server=self.digilibBaseUrl+"/servlet/Texter?fn="
522	metaUrl=server+url.replace("/mpiwg/online","")
523
524	txt=getHttpData(metaUrl)
525	if txt is None:
526	raise IOError("Unable to read infoXMLfrom %s"%(url))
527
528	dom = ET.fromstring(txt)
529	#dom = Parse(txt)
530	return dom
531
532
533	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
534	"""gets authorization info from the index.meta file at path or given by dom"""
535	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
536
537	access = None
538
539	if docinfo is None:
540	docinfo = {}
541
542	if dom is None:
543	for x in range(cut):
544	path=getParentDir(path)
545	dom = self.getDomFromIndexMeta(path)
546
547	acc = dom.find(".//access-conditions/access")
548	if acc is not None:
549	acctype = acc.get('type')
550	#acctype = dom.xpath("//access-conditions/access/@type")
551	if acctype:
552	access=acctype
553	if access in ['group', 'institution']:
554	access = dom.find(".//access-conditions/access/name").text.lower()
555
556	docinfo['accessType'] = access
557	return docinfo
558
559
560	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
561	"""gets bibliographical info from the index.meta file at path or given by dom"""
562	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
563
564	if docinfo is None:
565	docinfo = {}
566
567	if dom is None:
568	for x in range(cut):
569	path=getParentDir(path)
570	dom = self.getDomFromIndexMeta(path)
571
572	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
573
574	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
575	# put all raw bib fields in dict "bib"
576	bib = getBibdataFromDom(dom)
577	docinfo['bib'] = bib
578
579	# extract some fields (author, title, year) according to their mapping
580	metaData=self.metadata.main.meta.bib
581	bibtype=bib.get("@type")
582	#bibtype=dom.xpath("//bib/@type")
583	if not bibtype:
584	bibtype="generic"
585
586	bibtype=bibtype.replace("-"," ") # wrong types in index meta "-" instead of " " (not wrong! ROC)
587	docinfo['bib_type'] = bibtype
588	bibmap=metaData.generateMappingForType(bibtype)
589	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
590	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
591	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
592	if len(bibmap) > 0 and bibmap.get('author',None) or bibmap.get('title',None):
593	try:
594	docinfo['author']=bib.get(bibmap['author'][0])
595	except: pass
596	try:
597	docinfo['title']=bib.get(bibmap['title'][0])
598	except: pass
599	try:
600	docinfo['year']=bib.get(bibmap['year'][0])
601	except: pass
602
603	# ROC: why is this here?
604	# logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
605	# try:
606	# docinfo['lang']=getTextFromNode(dom.find(".//bib/lang")[0])
607	# except:
608	# docinfo['lang']=''
609	# try:
610	# docinfo['city']=getTextFromNode(dom.find(".//bib/city")[0])
611	# except:
612	# docinfo['city']=''
613	# try:
614	# docinfo['number_of_pages']=getTextFromNode(dom.find(".//bib/number_of_pages")[0])
615	# except:
616	# docinfo['number_of_pages']=''
617	# try:
618	# docinfo['series_volume']=getTextFromNode(dom.find(".//bib/series_volume")[0])
619	# except:
620	# docinfo['series_volume']=''
621	# try:
622	# docinfo['number_of_volumes']=getTextFromNode(dom.find(".//bib/number_of_volumes")[0])
623	# except:
624	# docinfo['number_of_volumes']=''
625	# try:
626	# docinfo['translator']=getTextFromNode(dom.find(".//bib/translator")[0])
627	# except:
628	# docinfo['translator']=''
629	# try:
630	# docinfo['edition']=getTextFromNode(dom.find(".//bib/edition")[0])
631	# except:
632	# docinfo['edition']=''
633	# try:
634	# docinfo['series_author']=getTextFromNode(dom.find(".//bib/series_author")[0])
635	# except:
636	# docinfo['series_author']=''
637	# try:
638	# docinfo['publisher']=getTextFromNode(dom.find(".//bib/publisher")[0])
639	# except:
640	# docinfo['publisher']=''
641	# try:
642	# docinfo['series_title']=getTextFromNode(dom.find(".//bib/series_title")[0])
643	# except:
644	# docinfo['series_title']=''
645	# try:
646	# docinfo['isbn_issn']=getTextFromNode(dom.find(".//bib/isbn_issn")[0])
647	# except:
648	# docinfo['isbn_issn']=''
649	return docinfo
650
651
652	# TODO: is this needed?
653	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
654	"""gets name info from the index.meta file at path or given by dom"""
655	if docinfo is None:
656	docinfo = {}
657
658	if dom is None:
659	for x in range(cut):
660	path=getParentDir(path)
661	dom = self.getDomFromIndexMeta(path)
662
663	docinfo['name']=getText(dom.find("name"))
664	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
665	return docinfo
666
667	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
668	"""parse texttool tag in index meta"""
669	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
670	if docinfo is None:
671	docinfo = {}
672	if docinfo.get('lang', None) is None:
673	docinfo['lang'] = '' # default keine Sprache gesetzt
674	if dom is None:
675	dom = self.getDomFromIndexMeta(url)
676
677	archivePath = None
678	archiveName = None
679
680	archiveName = getText(dom.find("name"))
681	if not archiveName:
682	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
683
684	archivePath = getText(dom.find("archive-path"))
685	if archivePath:
686	# clean up archive path
687	if archivePath[0] != '/':
688	archivePath = '/' + archivePath
689	if archiveName and (not archivePath.endswith(archiveName)):
690	archivePath += "/" + archiveName
691	else:
692	# try to get archive-path from url
693	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
694	if (not url.startswith('http')):
695	archivePath = url.replace('index.meta', '')
696
697	if archivePath is None:
698	# we balk without archive-path
699	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
700
701	imageDir = getText(dom.find(".//texttool/image"))
702
703	if not imageDir:
704	# we balk with no image tag / not necessary anymore because textmode is now standard
705	#raise IOError("No text-tool info in %s"%(url))
706	imageDir = ""
707	#xquery="//pb"
708	docinfo['imagePath'] = "" # keine Bilder
709	docinfo['imageURL'] = ""
710
711	if imageDir and archivePath:
712	#print "image: ", imageDir, " archivepath: ", archivePath
713	imageDir = os.path.join(archivePath, imageDir)
714	imageDir = imageDir.replace("/mpiwg/online", '')
715	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
716	docinfo['imagePath'] = imageDir
717
718	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
719
720	viewerUrl = getText(dom.find(".//texttool/digiliburlprefix"))
721	if viewerUrl:
722	docinfo['viewerURL'] = viewerUrl
723
724	# old style text URL
725	textUrl = getText(dom.find(".//texttool/text"))
726	if textUrl:
727	if urlparse.urlparse(textUrl)[0] == "": #keine url
728	textUrl = os.path.join(archivePath, textUrl)
729	# fix URLs starting with /mpiwg/online
730	if textUrl.startswith("/mpiwg/online"):
731	textUrl = textUrl.replace("/mpiwg/online", '', 1)
732
733	docinfo['textURL'] = textUrl
734
735	# new style text-url-path
736	textUrl = getText(dom.find(".//texttool/text-url-path"))
737	if textUrl:
738	docinfo['textURLPath'] = textUrl
739	textUrlkurz = string.split(textUrl, ".")[0]
740	docinfo['textURLPathkurz'] = textUrlkurz
741	#if not docinfo['imagePath']:
742	# text-only, no page images
743	#docinfo = self.getNumTextPages(docinfo)
744
745
746	presentationUrl = getText(dom.find(".//texttool/presentation"))
747	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
748	# TODO: is this needed here?
749	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
750
751
752	if presentationUrl: # ueberschreibe diese durch presentation informationen
753	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
754	# durch den relativen Pfad auf die presentation infos
755	presentationPath = presentationUrl
756	if url.endswith("index.meta"):
757	presentationUrl = url.replace('index.meta', presentationPath)
758	else:
759	presentationUrl = url + "/" + presentationPath
760
761	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
762
763	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
764
765	return docinfo
766
767
768	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
769	"""gets the bibliographical information from the preseantion entry in texttools
770	"""
771	dom=self.getPresentationInfoXML(url)
772	docinfo['author']=getText(dom.find(".//author"))
773	docinfo['title']=getText(dom.find(".//title"))
774	docinfo['year']=getText(dom.find(".//date"))
775	return docinfo
776
777	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
778	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
779	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
780	if docinfo is None:
781	docinfo = {}
782	path=path.replace("/mpiwg/online","")
783	docinfo['imagePath'] = path
784	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
785
786	pathorig=path
787	for x in range(cut):
788	path=getParentDir(path)
789	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
790	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
791	docinfo['imageURL'] = imageUrl
792
793	#path ist the path to the images it assumes that the index.meta file is one level higher.
794	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
795	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
796	return docinfo
797
798
799	def getDocinfo(self, mode, url):
800	"""returns docinfo depending on mode"""
801	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
802	# look for cached docinfo in session
803	if self.REQUEST.SESSION.has_key('docinfo'):
804	docinfo = self.REQUEST.SESSION['docinfo']
805	# check if its still current
806	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
807	logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())
808	return docinfo
809
810	# new docinfo
811	docinfo = {'mode': mode, 'url': url}
812	# add self url
813	docinfo['viewerUrl'] = self.getDocumentViewerURL()
814	if mode=="texttool":
815	# index.meta with texttool information
816	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
817	elif mode=="imagepath":
818	# folder with images, index.meta optional
819	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
820	elif mode=="filepath":
821	# filename
822	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
823	else:
824	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
825	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
826
827	# FIXME: fake texturlpath
828	if not docinfo.has_key('textURLPath'):
829	docinfo['textURLPath'] = None
830
831	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
832	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
833	self.REQUEST.SESSION['docinfo'] = docinfo
834	return docinfo
835
836	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
837	"""returns pageinfo with the given parameters"""
838	pageinfo = {}
839	current = getInt(current)
840
841	pageinfo['current'] = current
842	rows = int(rows or self.thumbrows)
843	pageinfo['rows'] = rows
844	cols = int(cols or self.thumbcols)
845	pageinfo['cols'] = cols
846	grpsize = cols * rows
847	pageinfo['groupsize'] = grpsize
848	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
849	# int(current / grpsize) * grpsize +1))
850	pageinfo['start'] = start
851	pageinfo['end'] = start + grpsize
852	if (docinfo is not None) and ('numPages' in docinfo):
853	np = int(docinfo['numPages'])
854	pageinfo['end'] = min(pageinfo['end'], np)
855	pageinfo['numgroups'] = int(np / grpsize)
856	if np % grpsize > 0:
857	pageinfo['numgroups'] += 1
858	pageinfo['viewMode'] = viewMode
859	pageinfo['tocMode'] = tocMode
860	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
861	#pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1')
862	pageinfo['query'] = self.REQUEST.get('query','')
863	pageinfo['queryType'] = self.REQUEST.get('queryType','')
864	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
865	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
866	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
867	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
868	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
869	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
870	toc = int (pageinfo['tocPN'])
871	pageinfo['textPages'] =int (toc)
872
873	if 'tocSize_%s'%tocMode in docinfo:
874	tocSize = int(docinfo['tocSize_%s'%tocMode])
875	tocPageSize = int(pageinfo['tocPageSize'])
876	# cached toc
877	if tocSize%tocPageSize>0:
878	tocPages=tocSize/tocPageSize+1
879	else:
880	tocPages=tocSize/tocPageSize
881	pageinfo['tocPN'] = min (tocPages,toc)
882	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
883	pageinfo['sn'] =self.REQUEST.get('sn','')
884	return pageinfo
885
886	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
887	"""init document viewer"""
888	self.title=title
889	self.digilibBaseUrl = digilibBaseUrl
890	self.thumbrows = thumbrows
891	self.thumbcols = thumbcols
892	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
893	if RESPONSE is not None:
894	RESPONSE.redirect('manage_main')
895
896	def manage_AddDocumentViewerForm(self):
897	"""add the viewer form"""
898	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
899	return pt()
900
901	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
902	"""add the viewer"""
903	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
904	self._setObject(id,newObj)
905
906	if RESPONSE is not None:
907	RESPONSE.redirect('manage_main')
908
909	## DocumentViewerTemplate class
910	class DocumentViewerTemplate(ZopePageTemplate):
911	"""Template for document viewer"""
912	meta_type="DocumentViewer Template"
913
914
915	def manage_addDocumentViewerTemplateForm(self):
916	"""Form for adding"""
917	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
918	return pt()
919
920	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
921	REQUEST=None, submit=None):
922	"Add a Page Template with optional file content."
923
924	self._setObject(id, DocumentViewerTemplate(id))
925	ob = getattr(self, id)
926	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
927	logging.info("txt %s:"%txt)
928	ob.pt_edit(txt,"text/html")
929	if title:
930	ob.pt_setTitle(title)
931	try:
932	u = self.DestinationURL()
933	except AttributeError:
934	u = REQUEST['URL1']
935
936	u = "%s/%s" % (u, urllib.quote(id))
937	REQUEST.RESPONSE.redirect(u+'/manage_main')
938	return ''
939
940
941

Note: See TracBrowser for help on using the repository browser.

Download in other formats: