Context Navigation

source: documentViewer/documentViewer.py @ 397:5b078a6d8909

Last change on this file since 397:5b078a6d8909 was 397:5b078a6d8909, checked in by casties, 14 years ago
new toggle option method
File size: 35.3 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21	import string
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding='utf-8'):
47	"""returns a string containing node as XML"""
48	buf = cStringIO.StringIO()
49	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
50	s = buf.getvalue()
51	buf.close()
52	return s
53
54	def browserCheck(self):
55	"""check the browsers request to find out the browser type"""
56	bt = {}
57	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
58	bt['ua'] = ua
59	bt['isIE'] = False
60	bt['isN4'] = False
61	if string.find(ua, 'MSIE') > -1:
62	bt['isIE'] = True
63	else:
64	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
65
66	try:
67	nav = ua[string.find(ua, '('):]
68	ie = string.split(nav, "; ")[1]
69	if string.find(ie, "MSIE") > -1:
70	bt['versIE'] = string.split(ie, " ")[1]
71	except: pass
72
73	bt['isMac'] = string.find(ua, 'Macintosh') > -1
74	bt['isWin'] = string.find(ua, 'Windows') > -1
75	bt['isIEWin'] = bt['isIE'] and bt['isWin']
76	bt['isIEMac'] = bt['isIE'] and bt['isMac']
77	bt['staticHTML'] = False
78
79	return bt
80
81
82	def getParentDir(path):
83	"""returns pathname shortened by one"""
84	return '/'.join(path.split('/')[0:-1])
85
86
87	def getHttpData(url, data=None, num_tries=3, timeout=10):
88	"""returns result from url+data HTTP request"""
89	# we do GET (by appending data to url)
90	if isinstance(data, str) or isinstance(data, unicode):
91	# if data is string then append
92	url = "%s?%s"%(url,data)
93	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
94	# urlencode
95	url = "%s?%s"%(url,urllib.urlencode(data))
96
97	response = None
98	errmsg = None
99	for cnt in range(num_tries):
100	try:
101	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
102	if sys.version_info < (2, 6):
103	# set timeout on socket -- ugly :-(
104	import socket
105	socket.setdefaulttimeout(float(timeout))
106	response = urllib2.urlopen(url)
107	else:
108	response = urllib2.urlopen(url,timeout=float(timeout))
109	# check result?
110	break
111	except urllib2.HTTPError, e:
112	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
113	errmsg = str(e)
114	# stop trying
115	break
116	except urllib2.URLError, e:
117	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
118	errmsg = str(e)
119	# stop trying
120	#break
121
122	if response is not None:
123	data = response.read()
124	response.close()
125	return data
126
127	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
128	#return None
129
130
131
132	##
133	## documentViewer class
134	##
135	class documentViewer(Folder):
136	"""document viewer"""
137	meta_type="Document viewer"
138
139	security=ClassSecurityInfo()
140	manage_options=Folder.manage_options+(
141	{'label':'main config','action':'changeDocumentViewerForm'},
142	)
143
144	# templates and forms
145	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
146	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
147	toc_text = PageTemplateFile('zpt/toc_text', globals())
148	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
149	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
150	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
151	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
152	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
153	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
154	head_main = PageTemplateFile('zpt/head_main', globals())
155	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
156	info_xml = PageTemplateFile('zpt/info_xml', globals())
157
158
159	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
160	security.declareProtected('View management screens','changeDocumentViewerForm')
161	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
162
163
164	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
165	"""init document viewer"""
166	self.id=id
167	self.title=title
168	self.thumbcols = thumbcols
169	self.thumbrows = thumbrows
170	# authgroups is list of authorized groups (delimited by ,)
171	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
172	# create template folder so we can always use template.something
173
174	templateFolder = Folder('template')
175	#self['template'] = templateFolder # Zope-2.12 style
176	self._setObject('template',templateFolder) # old style
177	try:
178	import MpdlXmlTextServer
179	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
180	#templateFolder['fulltextclient'] = xmlRpcClient
181	templateFolder._setObject('fulltextclient',textServer)
182	except Exception, e:
183	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
184	try:
185	from Products.zogiLib.zogiLib import zogiLib
186	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
187	#templateFolder['zogilib'] = zogilib
188	templateFolder._setObject('zogilib',zogilib)
189	except Exception, e:
190	logging.error("Unable to create zogiLib for zogilib: "+str(e))
191
192
193	# proxy text server methods to fulltextclient
194	def getTextPage(self, **args):
195	"""get page"""
196	return self.template.fulltextclient.getTextPage(**args)
197
198	def getQuery(self, **args):
199	"""get query"""
200	return self.template.fulltextclient.getQuery(**args)
201
202	def getQueryResultHits(self, **args):
203	"""get query"""
204	return self.template.fulltextclient.getQueryResultHits(**args)
205
206	def getQueryResultHitsText(self, **args):
207	"""get query"""
208	return self.template.fulltextclient.getQueryResultHitsText(**args)
209
210	def getQueryResultHitsFigures(self, **args):
211	"""get query"""
212	return self.template.fulltextclient.getQueryResultHitsFigures(**args)
213
214	def getPDF(self, **args):
215	"""get query"""
216	return self.template.fulltextclient.getPDF(**args)
217
218	def getSearch(self, **args):
219	"""get search"""
220	return self.template.fulltextclient.getSearch(**args)
221
222	def getGisPlaces(self, **args):
223	"""get gis places"""
224	return self.template.fulltextclient.getGisPlaces(**args)
225
226	def getAllGisPlaces(self, **args):
227	"""get all gis places """
228	return self.template.fulltextclient.getAllGisPlaces(**args)
229
230	def getOrigPages(self, **args):
231	"""get original page number """
232	return self.template.fulltextclient.getOrigPages(**args)
233
234	def getNumPages(self, docinfo):
235	"""get numpages"""
236	return self.template.fulltextclient.getNumPages(docinfo)
237
238	def getNumTextPages(self, docinfo):
239	"""get numpages text"""
240	return self.template.fulltextclient.getNumTextPages(docinfo)
241
242	def getTranslate(self, **args):
243	"""get translate"""
244	return self.template.fulltextclient.getTranslate(**args)
245
246	def getLemma(self, **args):
247	"""get lemma"""
248	return self.template.fulltextclient.getLemma(**args)
249
250	def getToc(self, **args):
251	"""get toc"""
252	return self.template.fulltextclient.getToc(**args)
253
254	def getTocPage(self, **args):
255	"""get tocpage"""
256	return self.template.fulltextclient.getTocPage(**args)
257
258
259	security.declareProtected('View','thumbs_rss')
260	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
261	'''
262	view it
263	@param mode: defines how to access the document behind url
264	@param url: url which contains display information
265	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
266
267	'''
268	logging.debug("HHHHHHHHHHHHHH:load the rss")
269	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
270
271	if not hasattr(self, 'template'):
272	# create template folder if it doesn't exist
273	self.manage_addFolder('template')
274
275	if not self.digilibBaseUrl:
276	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
277
278	docinfo = self.getDocinfo(mode=mode,url=url)
279	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
280	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
281	''' ZDES '''
282	pt = getattr(self.template, 'thumbs_main_rss')
283
284	if viewMode=="auto": # automodus gewaehlt
285	if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
286	viewMode="text"
287	else:
288	viewMode="images"
289
290	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
291
292	security.declareProtected('View','index_html')
293	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization="", optionsClose=None):
294	'''
295	view it
296	@param mode: defines how to access the document behind url
297	@param url: url which contains display information
298	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
299	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
300	@param characterNormalization type of text display (reg, norm, none)
301	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
302	'''
303
304	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
305
306	if not hasattr(self, 'template'):
307	# this won't work
308	logging.error("template folder missing!")
309	return "ERROR: template folder missing!"
310
311	if not getattr(self, 'digilibBaseUrl', None):
312	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
313
314	docinfo = self.getDocinfo(mode=mode,url=url)
315
316	if tocMode != "thumbs":
317	# get table of contents
318	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
319
320	if viewMode=="auto": # automodus gewaehlt
321	if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
322	viewMode="text_dict"
323	else:
324	viewMode="images"
325
326	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
327
328	pt = getattr(self.template, 'viewer_main')
329	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
330
331	def generateMarks(self,mk):
332	ret=""
333	if mk is None:
334	return ""
335	if not isinstance(mk, list):
336	mk=[mk]
337	for m in mk:
338	ret+="mk=%s"%m
339	return ret
340
341
342	def getBrowser(self):
343	"""getBrowser the version of browser """
344	names=""
345	names = browserCheck(self)
346	#logging.debug("XXXXXXXXXXXXXXXX: %s"%names)
347	return names
348
349	def findDigilibUrl(self):
350	"""try to get the digilib URL from zogilib"""
351	url = self.template.zogilib.getDLBaseUrl()
352	return url
353
354	def getDocumentViewerURL(self):
355	"""returns the URL of this instance"""
356	return self.absolute_url()
357
358	def getStyle(self, idx, selected, style=""):
359	"""returns a string with the given style and append 'sel' if path == selected."""
360	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
361	if idx == selected:
362	return style + 'sel'
363	else:
364	return style
365
366	def getLink(self,param=None,val=None):
367	"""link to documentviewer with parameter param set to val"""
368	params=self.REQUEST.form.copy()
369	if param is not None:
370	if val is None:
371	if params.has_key(param):
372	del params[param]
373	else:
374	params[param] = str(val)
375
376	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
377	params["mode"] = "imagepath"
378	params["url"] = getParentDir(params["url"])
379
380	# quote values and assemble into query string
381	#ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
382	ps = urllib.urlencode(params)
383	url=self.REQUEST['URL1']+"?"+ps
384	return url
385
386	def getLinkAmp(self,param=None,val=None):
387	"""link to documentviewer with parameter param set to val"""
388	params=self.REQUEST.form.copy()
389	if param is not None:
390	if val is None:
391	if params.has_key(param):
392	del params[param]
393	else:
394	params[param] = str(val)
395
396	# quote values and assemble into query string
397	logging.debug("XYXXXXX: %s"%repr(params.items()))
398	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
399	url=self.REQUEST['URL1']+"?"+ps
400	return url
401
402	def getInfo_xml(self,url,mode):
403	"""returns info about the document as XML"""
404
405	if not self.digilibBaseUrl:
406	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
407
408	docinfo = self.getDocinfo(mode=mode,url=url)
409	pt = getattr(self.template, 'info_xml')
410	return pt(docinfo=docinfo)
411
412	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
413	"""returns new option state"""
414	if not self.REQUEST.SESSION.has_key(optionName):
415	# not in session -- initial
416	opt = {'lastState': newState, 'state': initialState}
417	else:
418	opt = self.REQUEST.SESSION.get(optionName)
419	if opt['lastState'] != newState:
420	# state in session has changed -- toggle
421	opt['state'] = not opt['state']
422	opt['lastState'] = newState
423
424	self.REQUEST.SESSION[optionName] = opt
425	return opt['state']
426
427	def isAccessible(self, docinfo):
428	"""returns if access to the resource is granted"""
429	access = docinfo.get('accessType', None)
430	logging.debug("documentViewer (accessOK) access type %s"%access)
431	if access is not None and access == 'free':
432	logging.debug("documentViewer (accessOK) access is free")
433	return True
434	elif access is None or access in self.authgroups:
435	# only local access -- only logged in users
436	user = getSecurityManager().getUser()
437	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
438	if user is not None:
439	#print "user: ", user
440	return (user.getUserName() != "Anonymous User")
441	else:
442	return False
443
444	logging.error("documentViewer (accessOK) unknown access type %s"%access)
445	return False
446
447
448	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
449	"""gibt param von dlInfo aus"""
450	if docinfo is None:
451	docinfo = {}
452
453	for x in range(cut):
454
455	path=getParentDir(path)
456
457	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
458
459	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
460
461	txt = getHttpData(infoUrl)
462	if txt is None:
463	raise IOError("Unable to get dir-info from %s"%(infoUrl))
464
465	dom = Parse(txt)
466	sizes=dom.xpath("//dir/size")
467	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
468
469	if sizes:
470	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
471	else:
472	docinfo['numPages'] = 0
473
474	# TODO: produce and keep list of image names and numbers
475
476	return docinfo
477
478	def getIndexMetaPath(self,url):
479	"""gib nur den Pfad zurueck"""
480	regexp = re.compile(r".(experimental\|permanent)/(.)")
481	regpath = regexp.match(url)
482	if (regpath==None):
483	return ""
484	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
485	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
486
487
488
489	def getIndexMetaUrl(self,url):
490	"""returns utr of index.meta document at url"""
491
492	metaUrl = None
493	if url.startswith("http://"):
494	# real URL
495	metaUrl = url
496	else:
497	# online path
498	server=self.digilibBaseUrl+"/servlet/Texter?fn="
499	metaUrl=server+url.replace("/mpiwg/online","")
500	if not metaUrl.endswith("index.meta"):
501	metaUrl += "/index.meta"
502
503	return metaUrl
504
505	def getDomFromIndexMeta(self, url):
506	"""get dom from index meta"""
507	dom = None
508	metaUrl = self.getIndexMetaUrl(url)
509
510	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
511	txt=getHttpData(metaUrl)
512	if txt is None:
513	raise IOError("Unable to read index meta from %s"%(url))
514
515	dom = Parse(txt)
516	return dom
517
518	def getPresentationInfoXML(self, url):
519	"""returns dom of info.xml document at url"""
520	dom = None
521	metaUrl = None
522	if url.startswith("http://"):
523	# real URL
524	metaUrl = url
525	else:
526	# online path
527	server=self.digilibBaseUrl+"/servlet/Texter?fn="
528	metaUrl=server+url.replace("/mpiwg/online","")
529
530	txt=getHttpData(metaUrl)
531	if txt is None:
532	raise IOError("Unable to read infoXMLfrom %s"%(url))
533
534	dom = Parse(txt)
535	return dom
536
537
538	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
539	"""gets authorization info from the index.meta file at path or given by dom"""
540	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
541
542	access = None
543
544	if docinfo is None:
545	docinfo = {}
546
547	if dom is None:
548	for x in range(cut):
549	path=getParentDir(path)
550	dom = self.getDomFromIndexMeta(path)
551
552	acctype = dom.xpath("//access-conditions/access/@type")
553	if acctype and (len(acctype)>0):
554	access=acctype[0].value
555	if access in ['group', 'institution']:
556	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
557
558	docinfo['accessType'] = access
559	return docinfo
560
561
562	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
563	"""gets bibliographical info from the index.meta file at path or given by dom"""
564	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
565
566	if docinfo is None:
567	docinfo = {}
568
569	if dom is None:
570	for x in range(cut):
571	path=getParentDir(path)
572	dom = self.getDomFromIndexMeta(path)
573
574	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
575
576	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
577	# put in all raw bib fields as dict "bib"
578	bib = dom.xpath("//bib/*")
579	if bib and len(bib)>0:
580	bibinfo = {}
581	for e in bib:
582	bibinfo[e.localName] = getTextFromNode(e)
583	docinfo['bib'] = bibinfo
584
585	# extract some fields (author, title, year) according to their mapping
586	metaData=self.metadata.main.meta.bib
587	bibtype=dom.xpath("//bib/@type")
588	if bibtype and (len(bibtype)>0):
589	bibtype=bibtype[0].value
590	else:
591	bibtype="generic"
592
593	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
594	docinfo['bib_type'] = bibtype
595	bibmap=metaData.generateMappingForType(bibtype)
596	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
597	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
598	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
599	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
600	try:
601	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
602	except: pass
603	try:
604	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
605	except: pass
606	try:
607	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
608	except: pass
609	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
610	try:
611	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
612	except:
613	docinfo['lang']=''
614
615	return docinfo
616
617
618	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
619	"""gets name info from the index.meta file at path or given by dom"""
620	if docinfo is None:
621	docinfo = {}
622
623	if dom is None:
624	for x in range(cut):
625	path=getParentDir(path)
626	dom = self.getDomFromIndexMeta(path)
627
628	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
629	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
630	return docinfo
631
632	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
633	"""parse texttool tag in index meta"""
634	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
635	if docinfo is None:
636	docinfo = {}
637	if docinfo.get('lang', None) is None:
638	docinfo['lang'] = '' # default keine Sprache gesetzt
639	if dom is None:
640	dom = self.getDomFromIndexMeta(url)
641
642	archivePath = None
643	archiveName = None
644
645	archiveNames = dom.xpath("//resource/name")
646	if archiveNames and (len(archiveNames) > 0):
647	archiveName = getTextFromNode(archiveNames[0])
648	else:
649	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
650
651	archivePaths = dom.xpath("//resource/archive-path")
652	if archivePaths and (len(archivePaths) > 0):
653	archivePath = getTextFromNode(archivePaths[0])
654	# clean up archive path
655	if archivePath[0] != '/':
656	archivePath = '/' + archivePath
657	if archiveName and (not archivePath.endswith(archiveName)):
658	archivePath += "/" + archiveName
659	else:
660	# try to get archive-path from url
661	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
662	if (not url.startswith('http')):
663	archivePath = url.replace('index.meta', '')
664
665	if archivePath is None:
666	# we balk without archive-path
667	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
668
669	imageDirs = dom.xpath("//texttool/image")
670	if imageDirs and (len(imageDirs) > 0):
671	imageDir = getTextFromNode(imageDirs[0])
672
673	else:
674	# we balk with no image tag / not necessary anymore because textmode is now standard
675	#raise IOError("No text-tool info in %s"%(url))
676	imageDir = ""
677	#xquery="//pb"
678	docinfo['imagePath'] = "" # keine Bilder
679	docinfo['imageURL'] = ""
680
681	if imageDir and archivePath:
682	#print "image: ", imageDir, " archivepath: ", archivePath
683	imageDir = os.path.join(archivePath, imageDir)
684	imageDir = imageDir.replace("/mpiwg/online", '')
685	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
686	docinfo['imagePath'] = imageDir
687
688	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
689
690	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
691	if viewerUrls and (len(viewerUrls) > 0):
692	viewerUrl = getTextFromNode(viewerUrls[0])
693	docinfo['viewerURL'] = viewerUrl
694
695	# old style text URL
696	textUrls = dom.xpath("//texttool/text")
697	if textUrls and (len(textUrls) > 0):
698	textUrl = getTextFromNode(textUrls[0])
699	if urlparse.urlparse(textUrl)[0] == "": #keine url
700	textUrl = os.path.join(archivePath, textUrl)
701	# fix URLs starting with /mpiwg/online
702	if textUrl.startswith("/mpiwg/online"):
703	textUrl = textUrl.replace("/mpiwg/online", '', 1)
704
705	docinfo['textURL'] = textUrl
706
707	# new style text-url-path
708	textUrls = dom.xpath("//texttool/text-url-path")
709	if textUrls and (len(textUrls) > 0):
710	textUrl = getTextFromNode(textUrls[0])
711	docinfo['textURLPath'] = textUrl
712	if not docinfo['imagePath']:
713	# text-only, no page images
714	docinfo = self.getNumTextPages(docinfo)
715
716	presentationUrls = dom.xpath("//texttool/presentation")
717	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
718	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
719
720
721	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
722	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
723	# durch den relativen Pfad auf die presentation infos
724	presentationPath = getTextFromNode(presentationUrls[0])
725	if url.endswith("index.meta"):
726	presentationUrl = url.replace('index.meta', presentationPath)
727	else:
728	presentationUrl = url + "/" + presentationPath
729
730	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
731
732	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
733
734	return docinfo
735
736
737	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
738	"""gets the bibliographical information from the preseantion entry in texttools
739	"""
740	dom=self.getPresentationInfoXML(url)
741	try:
742	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
743	except:
744	pass
745	try:
746	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
747	except:
748	pass
749	try:
750	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
751	except:
752	pass
753	return docinfo
754
755	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
756	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
757	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
758	if docinfo is None:
759	docinfo = {}
760	path=path.replace("/mpiwg/online","")
761	docinfo['imagePath'] = path
762	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
763
764	pathorig=path
765	for x in range(cut):
766	path=getParentDir(path)
767	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
768	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
769	docinfo['imageURL'] = imageUrl
770
771	#path ist the path to the images it assumes that the index.meta file is one level higher.
772	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
773	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
774	return docinfo
775
776
777	def getDocinfo(self, mode, url):
778	"""returns docinfo depending on mode"""
779	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
780	# look for cached docinfo in session
781	if self.REQUEST.SESSION.has_key('docinfo'):
782	docinfo = self.REQUEST.SESSION['docinfo']
783	# check if its still current
784	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
785	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
786	return docinfo
787	# new docinfo
788	docinfo = {'mode': mode, 'url': url}
789	if mode=="texttool": #index.meta with texttool information
790	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
791	elif mode=="imagepath":
792	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
793	elif mode=="filepath":
794	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
795	else:
796	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
797	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
798
799	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
800	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
801	self.REQUEST.SESSION['docinfo'] = docinfo
802	return docinfo
803
804	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization=""):
805	"""returns pageinfo with the given parameters"""
806	pageinfo = {}
807	current = getInt(current)
808
809	pageinfo['current'] = current
810	rows = int(rows or self.thumbrows)
811	pageinfo['rows'] = rows
812	cols = int(cols or self.thumbcols)
813	pageinfo['cols'] = cols
814	grpsize = cols * rows
815	pageinfo['groupsize'] = grpsize
816	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
817	# int(current / grpsize) * grpsize +1))
818	pageinfo['start'] = start
819	pageinfo['end'] = start + grpsize
820	if (docinfo is not None) and ('numPages' in docinfo):
821	np = int(docinfo['numPages'])
822	pageinfo['end'] = min(pageinfo['end'], np)
823	pageinfo['numgroups'] = int(np / grpsize)
824	if np % grpsize > 0:
825	pageinfo['numgroups'] += 1
826	pageinfo['viewMode'] = viewMode
827	pageinfo['tocMode'] = tocMode
828	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','')
829	pageinfo['query'] = self.REQUEST.get('query','')
830	pageinfo['optionsClose']= self.REQUEST.get('optionsClose','')
831	pageinfo['queryType'] = self.REQUEST.get('queryType','')
832	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
833	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
834	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
835	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
836	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
837	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
838	toc = int (pageinfo['tocPN'])
839	pageinfo['textPages'] =int (toc)
840
841
842
843	if 'tocSize_%s'%tocMode in docinfo:
844	tocSize = int(docinfo['tocSize_%s'%tocMode])
845	tocPageSize = int(pageinfo['tocPageSize'])
846	# cached toc
847	if tocSize%tocPageSize>0:
848	tocPages=tocSize/tocPageSize+1
849	else:
850	tocPages=tocSize/tocPageSize
851	pageinfo['tocPN'] = min (tocPages,toc)
852	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
853	pageinfo['sn'] =self.REQUEST.get('sn','')
854	return pageinfo
855
856	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
857	"""init document viewer"""
858	self.title=title
859	self.digilibBaseUrl = digilibBaseUrl
860	self.thumbrows = thumbrows
861	self.thumbcols = thumbcols
862	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
863	if RESPONSE is not None:
864	RESPONSE.redirect('manage_main')
865
866	def manage_AddDocumentViewerForm(self):
867	"""add the viewer form"""
868	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
869	return pt()
870
871	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
872	"""add the viewer"""
873	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
874	self._setObject(id,newObj)
875
876	if RESPONSE is not None:
877	RESPONSE.redirect('manage_main')
878
879	## DocumentViewerTemplate class
880	class DocumentViewerTemplate(ZopePageTemplate):
881	"""Template for document viewer"""
882	meta_type="DocumentViewer Template"
883
884
885	def manage_addDocumentViewerTemplateForm(self):
886	"""Form for adding"""
887	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
888	return pt()
889
890	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
891	REQUEST=None, submit=None):
892	"Add a Page Template with optional file content."
893
894	self._setObject(id, DocumentViewerTemplate(id))
895	ob = getattr(self, id)
896	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
897	logging.info("txt %s:"%txt)
898	ob.pt_edit(txt,"text/html")
899	if title:
900	ob.pt_setTitle(title)
901	try:
902	u = self.DestinationURL()
903	except AttributeError:
904	u = REQUEST['URL1']
905
906	u = "%s/%s" % (u, urllib.quote(id))
907	REQUEST.RESPONSE.redirect(u+'/manage_main')
908	return ''
909
910
911

Note: See TracBrowser for help on using the repository browser.

Download in other formats: