Context Navigation

source: documentViewer/documentViewer.py @ 394:ce9fa69f81c3

Last change on this file since 394:ce9fa69f81c3 was 394:ce9fa69f81c3, checked in by casties, 14 years ago
new toggle option method
File size: 34.8 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21	import string
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding='utf-8'):
47	"""returns a string containing node as XML"""
48	buf = cStringIO.StringIO()
49	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
50	s = buf.getvalue()
51	buf.close()
52	return s
53
54	def browserCheck(self):
55	"""check the browsers request to find out the browser type"""
56	bt = {}
57	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
58	bt['ua'] = ua
59	bt['isIE'] = False
60	bt['isN4'] = False
61	if string.find(ua, 'MSIE') > -1:
62	bt['isIE'] = True
63	else:
64	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
65
66	try:
67	nav = ua[string.find(ua, '('):]
68	ie = string.split(nav, "; ")[1]
69	if string.find(ie, "MSIE") > -1:
70	bt['versIE'] = string.split(ie, " ")[1]
71	except: pass
72
73	bt['isMac'] = string.find(ua, 'Macintosh') > -1
74	bt['isWin'] = string.find(ua, 'Windows') > -1
75	bt['isIEWin'] = bt['isIE'] and bt['isWin']
76	bt['isIEMac'] = bt['isIE'] and bt['isMac']
77	bt['staticHTML'] = False
78
79	return bt
80
81
82	def getParentDir(path):
83	"""returns pathname shortened by one"""
84	return '/'.join(path.split('/')[0:-1])
85
86
87	def getHttpData(url, data=None, num_tries=3, timeout=10):
88	"""returns result from url+data HTTP request"""
89	# we do GET (by appending data to url)
90	if isinstance(data, str) or isinstance(data, unicode):
91	# if data is string then append
92	url = "%s?%s"%(url,data)
93	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
94	# urlencode
95	url = "%s?%s"%(url,urllib.urlencode(data))
96
97	response = None
98	errmsg = None
99	for cnt in range(num_tries):
100	try:
101	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
102	if sys.version_info < (2, 6):
103	# set timeout on socket -- ugly :-(
104	import socket
105	socket.setdefaulttimeout(float(timeout))
106	response = urllib2.urlopen(url)
107	else:
108	response = urllib2.urlopen(url,timeout=float(timeout))
109	# check result?
110	break
111	except urllib2.HTTPError, e:
112	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
113	errmsg = str(e)
114	# stop trying
115	break
116	except urllib2.URLError, e:
117	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
118	errmsg = str(e)
119	# stop trying
120	#break
121
122	if response is not None:
123	data = response.read()
124	response.close()
125	return data
126
127	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
128	#return None
129
130
131
132	##
133	## documentViewer class
134	##
135	class documentViewer(Folder):
136	"""document viewer"""
137	meta_type="Document viewer"
138
139	security=ClassSecurityInfo()
140	manage_options=Folder.manage_options+(
141	{'label':'main config','action':'changeDocumentViewerForm'},
142	)
143
144	# templates and forms
145	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
146	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
147	toc_text = PageTemplateFile('zpt/toc_text', globals())
148	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
149	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
150	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
151	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
152	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
153	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
154	head_main = PageTemplateFile('zpt/head_main', globals())
155	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
156	info_xml = PageTemplateFile('zpt/info_xml', globals())
157
158
159	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
160	security.declareProtected('View management screens','changeDocumentViewerForm')
161	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
162
163
164	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
165	"""init document viewer"""
166	self.id=id
167	self.title=title
168	self.thumbcols = thumbcols
169	self.thumbrows = thumbrows
170	# authgroups is list of authorized groups (delimited by ,)
171	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
172	# create template folder so we can always use template.something
173
174	templateFolder = Folder('template')
175	#self['template'] = templateFolder # Zope-2.12 style
176	self._setObject('template',templateFolder) # old style
177	try:
178	import MpdlXmlTextServer
179	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
180	#templateFolder['fulltextclient'] = xmlRpcClient
181	templateFolder._setObject('fulltextclient',textServer)
182	except Exception, e:
183	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
184	try:
185	from Products.zogiLib.zogiLib import zogiLib
186	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
187	#templateFolder['zogilib'] = zogilib
188	templateFolder._setObject('zogilib',zogilib)
189	except Exception, e:
190	logging.error("Unable to create zogiLib for zogilib: "+str(e))
191
192
193	# proxy text server methods to fulltextclient
194	def getTextPage(self, **args):
195	"""get page"""
196	return self.template.fulltextclient.getTextPage(**args)
197
198	def getQuery(self, **args):
199	"""get query"""
200	return self.template.fulltextclient.getQuery(**args)
201
202	def getPDF(self, **args):
203	"""get query"""
204	return self.template.fulltextclient.getPDF(**args)
205
206	def getSearch(self, **args):
207	"""get search"""
208	return self.template.fulltextclient.getSearch(**args)
209
210	def getGisPlaces(self, **args):
211	"""get gis places"""
212	return self.template.fulltextclient.getGisPlaces(**args)
213
214	def getAllGisPlaces(self, **args):
215	"""get all gis places """
216	return self.template.fulltextclient.getAllGisPlaces(**args)
217
218	def getOrigPages(self, **args):
219	"""get original page number """
220	return self.template.fulltextclient.getOrigPages(**args)
221
222	def getNumPages(self, docinfo):
223	"""get numpages"""
224	return self.template.fulltextclient.getNumPages(docinfo)
225
226	def getNumTextPages(self, docinfo):
227	"""get numpages text"""
228	return self.template.fulltextclient.getNumTextPages(docinfo)
229
230	def getTranslate(self, **args):
231	"""get translate"""
232	return self.template.fulltextclient.getTranslate(**args)
233
234	def getLemma(self, **args):
235	"""get lemma"""
236	return self.template.fulltextclient.getLemma(**args)
237
238	def getToc(self, **args):
239	"""get toc"""
240	return self.template.fulltextclient.getToc(**args)
241
242	def getTocPage(self, **args):
243	"""get tocpage"""
244	return self.template.fulltextclient.getTocPage(**args)
245
246
247	security.declareProtected('View','thumbs_rss')
248	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
249	'''
250	view it
251	@param mode: defines how to access the document behind url
252	@param url: url which contains display information
253	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
254
255	'''
256	logging.debug("HHHHHHHHHHHHHH:load the rss")
257	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
258
259	if not hasattr(self, 'template'):
260	# create template folder if it doesn't exist
261	self.manage_addFolder('template')
262
263	if not self.digilibBaseUrl:
264	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
265
266	docinfo = self.getDocinfo(mode=mode,url=url)
267	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
268	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
269	''' ZDES '''
270	pt = getattr(self.template, 'thumbs_main_rss')
271
272	if viewMode=="auto": # automodus gewaehlt
273	if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
274	viewMode="text"
275	else:
276	viewMode="images"
277
278	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
279
280	security.declareProtected('View','index_html')
281	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization="", optionsClose=None):
282	'''
283	view it
284	@param mode: defines how to access the document behind url
285	@param url: url which contains display information
286	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
287	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
288	@param characterNormalization type of text display (reg, norm, none)
289	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
290	'''
291
292	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
293
294	if not hasattr(self, 'template'):
295	# this won't work
296	logging.error("template folder missing!")
297	return "ERROR: template folder missing!"
298
299	if not getattr(self, 'digilibBaseUrl', None):
300	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
301
302	docinfo = self.getDocinfo(mode=mode,url=url)
303
304	if tocMode != "thumbs":
305	# get table of contents
306	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
307
308	if viewMode=="auto": # automodus gewaehlt
309	if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
310	viewMode="text_dict"
311	else:
312	viewMode="images"
313
314	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
315
316	pt = getattr(self.template, 'viewer_main')
317	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
318
319	def generateMarks(self,mk):
320	ret=""
321	if mk is None:
322	return ""
323	if not isinstance(mk, list):
324	mk=[mk]
325	for m in mk:
326	ret+="mk=%s"%m
327	return ret
328
329
330	def getBrowser(self):
331	"""getBrowser the version of browser """
332	names=""
333	names = browserCheck(self)
334	#logging.debug("XXXXXXXXXXXXXXXX: %s"%names)
335	return names
336
337	def findDigilibUrl(self):
338	"""try to get the digilib URL from zogilib"""
339	url = self.template.zogilib.getDLBaseUrl()
340	return url
341
342	def getDocumentViewerURL(self):
343	"""returns the URL of this instance"""
344	return self.absolute_url()
345
346	def getStyle(self, idx, selected, style=""):
347	"""returns a string with the given style and append 'sel' if path == selected."""
348	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
349	if idx == selected:
350	return style + 'sel'
351	else:
352	return style
353
354	def getLink(self,param=None,val=None):
355	"""link to documentviewer with parameter param set to val"""
356	params=self.REQUEST.form.copy()
357	if param is not None:
358	if val is None:
359	if params.has_key(param):
360	del params[param]
361	else:
362	params[param] = str(val)
363
364	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
365	params["mode"] = "imagepath"
366	params["url"] = getParentDir(params["url"])
367
368	# quote values and assemble into query string
369	#ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
370	ps = urllib.urlencode(params)
371	url=self.REQUEST['URL1']+"?"+ps
372	return url
373
374	def getLinkAmp(self,param=None,val=None):
375	"""link to documentviewer with parameter param set to val"""
376	params=self.REQUEST.form.copy()
377	if param is not None:
378	if val is None:
379	if params.has_key(param):
380	del params[param]
381	else:
382	params[param] = str(val)
383
384	# quote values and assemble into query string
385	logging.debug("XYXXXXX: %s"%repr(params.items()))
386	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
387	url=self.REQUEST['URL1']+"?"+ps
388	return url
389
390	def getInfo_xml(self,url,mode):
391	"""returns info about the document as XML"""
392
393	if not self.digilibBaseUrl:
394	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
395
396	docinfo = self.getDocinfo(mode=mode,url=url)
397	pt = getattr(self.template, 'info_xml')
398	return pt(docinfo=docinfo)
399
400	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
401	"""returns new option state"""
402	opt = self.REQUEST.SESSION.get(optionName,None)
403	if opt is None:
404	# not in session -- initial
405	opt = {'lastState': newState, 'state': initialState}
406	else:
407	if opt['lastState'] != newState:
408	# state in session has changed -- toggle
409	opt['state'] = not opt['state']
410	opt['lastState'] = newState
411
412	self.REQUEST.SESSION[optionName] = opt
413	return opt['state']
414
415	def isAccessible(self, docinfo):
416	"""returns if access to the resource is granted"""
417	access = docinfo.get('accessType', None)
418	logging.debug("documentViewer (accessOK) access type %s"%access)
419	if access is not None and access == 'free':
420	logging.debug("documentViewer (accessOK) access is free")
421	return True
422	elif access is None or access in self.authgroups:
423	# only local access -- only logged in users
424	user = getSecurityManager().getUser()
425	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
426	if user is not None:
427	#print "user: ", user
428	return (user.getUserName() != "Anonymous User")
429	else:
430	return False
431
432	logging.error("documentViewer (accessOK) unknown access type %s"%access)
433	return False
434
435
436	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
437	"""gibt param von dlInfo aus"""
438	if docinfo is None:
439	docinfo = {}
440
441	for x in range(cut):
442
443	path=getParentDir(path)
444
445	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
446
447	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
448
449	txt = getHttpData(infoUrl)
450	if txt is None:
451	raise IOError("Unable to get dir-info from %s"%(infoUrl))
452
453	dom = Parse(txt)
454	sizes=dom.xpath("//dir/size")
455	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
456
457	if sizes:
458	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
459	else:
460	docinfo['numPages'] = 0
461
462	# TODO: produce and keep list of image names and numbers
463
464	return docinfo
465
466	def getIndexMetaPath(self,url):
467	"""gib nur den Pfad zurueck"""
468	regexp = re.compile(r".(experimental\|permanent)/(.)")
469	regpath = regexp.match(url)
470	if (regpath==None):
471	return ""
472	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
473	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
474
475
476
477	def getIndexMetaUrl(self,url):
478	"""returns utr of index.meta document at url"""
479
480	metaUrl = None
481	if url.startswith("http://"):
482	# real URL
483	metaUrl = url
484	else:
485	# online path
486	server=self.digilibBaseUrl+"/servlet/Texter?fn="
487	metaUrl=server+url.replace("/mpiwg/online","")
488	if not metaUrl.endswith("index.meta"):
489	metaUrl += "/index.meta"
490
491	return metaUrl
492
493	def getDomFromIndexMeta(self, url):
494	"""get dom from index meta"""
495	dom = None
496	metaUrl = self.getIndexMetaUrl(url)
497
498	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
499	txt=getHttpData(metaUrl)
500	if txt is None:
501	raise IOError("Unable to read index meta from %s"%(url))
502
503	dom = Parse(txt)
504	return dom
505
506	def getPresentationInfoXML(self, url):
507	"""returns dom of info.xml document at url"""
508	dom = None
509	metaUrl = None
510	if url.startswith("http://"):
511	# real URL
512	metaUrl = url
513	else:
514	# online path
515	server=self.digilibBaseUrl+"/servlet/Texter?fn="
516	metaUrl=server+url.replace("/mpiwg/online","")
517
518	txt=getHttpData(metaUrl)
519	if txt is None:
520	raise IOError("Unable to read infoXMLfrom %s"%(url))
521
522	dom = Parse(txt)
523	return dom
524
525
526	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
527	"""gets authorization info from the index.meta file at path or given by dom"""
528	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
529
530	access = None
531
532	if docinfo is None:
533	docinfo = {}
534
535	if dom is None:
536	for x in range(cut):
537	path=getParentDir(path)
538	dom = self.getDomFromIndexMeta(path)
539
540	acctype = dom.xpath("//access-conditions/access/@type")
541	if acctype and (len(acctype)>0):
542	access=acctype[0].value
543	if access in ['group', 'institution']:
544	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
545
546	docinfo['accessType'] = access
547	return docinfo
548
549
550	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
551	"""gets bibliographical info from the index.meta file at path or given by dom"""
552	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
553
554	if docinfo is None:
555	docinfo = {}
556
557	if dom is None:
558	for x in range(cut):
559	path=getParentDir(path)
560	dom = self.getDomFromIndexMeta(path)
561
562	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
563
564	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
565	# put in all raw bib fields as dict "bib"
566	bib = dom.xpath("//bib/*")
567	if bib and len(bib)>0:
568	bibinfo = {}
569	for e in bib:
570	bibinfo[e.localName] = getTextFromNode(e)
571	docinfo['bib'] = bibinfo
572
573	# extract some fields (author, title, year) according to their mapping
574	metaData=self.metadata.main.meta.bib
575	bibtype=dom.xpath("//bib/@type")
576	if bibtype and (len(bibtype)>0):
577	bibtype=bibtype[0].value
578	else:
579	bibtype="generic"
580
581	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
582	docinfo['bib_type'] = bibtype
583	bibmap=metaData.generateMappingForType(bibtype)
584	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
585	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
586	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
587	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
588	try:
589	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
590	except: pass
591	try:
592	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
593	except: pass
594	try:
595	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
596	except: pass
597	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
598	try:
599	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
600	except:
601	docinfo['lang']=''
602
603	return docinfo
604
605
606	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
607	"""gets name info from the index.meta file at path or given by dom"""
608	if docinfo is None:
609	docinfo = {}
610
611	if dom is None:
612	for x in range(cut):
613	path=getParentDir(path)
614	dom = self.getDomFromIndexMeta(path)
615
616	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
617	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
618	return docinfo
619
620	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
621	"""parse texttool tag in index meta"""
622	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
623	if docinfo is None:
624	docinfo = {}
625	if docinfo.get('lang', None) is None:
626	docinfo['lang'] = '' # default keine Sprache gesetzt
627	if dom is None:
628	dom = self.getDomFromIndexMeta(url)
629
630	archivePath = None
631	archiveName = None
632
633	archiveNames = dom.xpath("//resource/name")
634	if archiveNames and (len(archiveNames) > 0):
635	archiveName = getTextFromNode(archiveNames[0])
636	else:
637	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
638
639	archivePaths = dom.xpath("//resource/archive-path")
640	if archivePaths and (len(archivePaths) > 0):
641	archivePath = getTextFromNode(archivePaths[0])
642	# clean up archive path
643	if archivePath[0] != '/':
644	archivePath = '/' + archivePath
645	if archiveName and (not archivePath.endswith(archiveName)):
646	archivePath += "/" + archiveName
647	else:
648	# try to get archive-path from url
649	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
650	if (not url.startswith('http')):
651	archivePath = url.replace('index.meta', '')
652
653	if archivePath is None:
654	# we balk without archive-path
655	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
656
657	imageDirs = dom.xpath("//texttool/image")
658	if imageDirs and (len(imageDirs) > 0):
659	imageDir = getTextFromNode(imageDirs[0])
660
661	else:
662	# we balk with no image tag / not necessary anymore because textmode is now standard
663	#raise IOError("No text-tool info in %s"%(url))
664	imageDir = ""
665	#xquery="//pb"
666	docinfo['imagePath'] = "" # keine Bilder
667	docinfo['imageURL'] = ""
668
669	if imageDir and archivePath:
670	#print "image: ", imageDir, " archivepath: ", archivePath
671	imageDir = os.path.join(archivePath, imageDir)
672	imageDir = imageDir.replace("/mpiwg/online", '')
673	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
674	docinfo['imagePath'] = imageDir
675
676	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
677
678	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
679	if viewerUrls and (len(viewerUrls) > 0):
680	viewerUrl = getTextFromNode(viewerUrls[0])
681	docinfo['viewerURL'] = viewerUrl
682
683	# old style text URL
684	textUrls = dom.xpath("//texttool/text")
685	if textUrls and (len(textUrls) > 0):
686	textUrl = getTextFromNode(textUrls[0])
687	if urlparse.urlparse(textUrl)[0] == "": #keine url
688	textUrl = os.path.join(archivePath, textUrl)
689	# fix URLs starting with /mpiwg/online
690	if textUrl.startswith("/mpiwg/online"):
691	textUrl = textUrl.replace("/mpiwg/online", '', 1)
692
693	docinfo['textURL'] = textUrl
694
695	# new style text-url-path
696	textUrls = dom.xpath("//texttool/text-url-path")
697	if textUrls and (len(textUrls) > 0):
698	textUrl = getTextFromNode(textUrls[0])
699	docinfo['textURLPath'] = textUrl
700	if not docinfo['imagePath']:
701	# text-only, no page images
702	docinfo = self.getNumTextPages(docinfo)
703
704	presentationUrls = dom.xpath("//texttool/presentation")
705	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
706	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
707
708
709	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
710	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
711	# durch den relativen Pfad auf die presentation infos
712	presentationPath = getTextFromNode(presentationUrls[0])
713	if url.endswith("index.meta"):
714	presentationUrl = url.replace('index.meta', presentationPath)
715	else:
716	presentationUrl = url + "/" + presentationPath
717
718	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
719
720	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
721
722	return docinfo
723
724
725	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
726	"""gets the bibliographical information from the preseantion entry in texttools
727	"""
728	dom=self.getPresentationInfoXML(url)
729	try:
730	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
731	except:
732	pass
733	try:
734	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
735	except:
736	pass
737	try:
738	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
739	except:
740	pass
741	return docinfo
742
743	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
744	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
745	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
746	if docinfo is None:
747	docinfo = {}
748	path=path.replace("/mpiwg/online","")
749	docinfo['imagePath'] = path
750	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
751
752	pathorig=path
753	for x in range(cut):
754	path=getParentDir(path)
755	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
756	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
757	docinfo['imageURL'] = imageUrl
758
759	#path ist the path to the images it assumes that the index.meta file is one level higher.
760	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
761	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
762	return docinfo
763
764
765	def getDocinfo(self, mode, url):
766	"""returns docinfo depending on mode"""
767	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
768	# look for cached docinfo in session
769	if self.REQUEST.SESSION.has_key('docinfo'):
770	docinfo = self.REQUEST.SESSION['docinfo']
771	# check if its still current
772	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
773	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
774	return docinfo
775	# new docinfo
776	docinfo = {'mode': mode, 'url': url}
777	if mode=="texttool": #index.meta with texttool information
778	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
779	elif mode=="imagepath":
780	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
781	elif mode=="filepath":
782	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
783	else:
784	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
785	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
786
787	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
788	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
789	self.REQUEST.SESSION['docinfo'] = docinfo
790	return docinfo
791
792	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization=""):
793	"""returns pageinfo with the given parameters"""
794	pageinfo = {}
795	current = getInt(current)
796
797	pageinfo['current'] = current
798	rows = int(rows or self.thumbrows)
799	pageinfo['rows'] = rows
800	cols = int(cols or self.thumbcols)
801	pageinfo['cols'] = cols
802	grpsize = cols * rows
803	pageinfo['groupsize'] = grpsize
804	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
805	# int(current / grpsize) * grpsize +1))
806	pageinfo['start'] = start
807	pageinfo['end'] = start + grpsize
808	if (docinfo is not None) and ('numPages' in docinfo):
809	np = int(docinfo['numPages'])
810	pageinfo['end'] = min(pageinfo['end'], np)
811	pageinfo['numgroups'] = int(np / grpsize)
812	if np % grpsize > 0:
813	pageinfo['numgroups'] += 1
814	pageinfo['viewMode'] = viewMode
815	pageinfo['tocMode'] = tocMode
816	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','')
817	pageinfo['query'] = self.REQUEST.get('query','')
818	pageinfo['optionsClose']= self.REQUEST.get('optionsClose','')
819	pageinfo['queryType'] = self.REQUEST.get('queryType','')
820	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
821	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
822	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
823	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
824	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
825	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
826	toc = int (pageinfo['tocPN'])
827	pageinfo['textPages'] =int (toc)
828
829
830
831	if 'tocSize_%s'%tocMode in docinfo:
832	tocSize = int(docinfo['tocSize_%s'%tocMode])
833	tocPageSize = int(pageinfo['tocPageSize'])
834	# cached toc
835	if tocSize%tocPageSize>0:
836	tocPages=tocSize/tocPageSize+1
837	else:
838	tocPages=tocSize/tocPageSize
839	pageinfo['tocPN'] = min (tocPages,toc)
840	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
841	pageinfo['sn'] =self.REQUEST.get('sn','')
842	return pageinfo
843
844	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
845	"""init document viewer"""
846	self.title=title
847	self.digilibBaseUrl = digilibBaseUrl
848	self.thumbrows = thumbrows
849	self.thumbcols = thumbcols
850	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
851	if RESPONSE is not None:
852	RESPONSE.redirect('manage_main')
853
854	def manage_AddDocumentViewerForm(self):
855	"""add the viewer form"""
856	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
857	return pt()
858
859	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
860	"""add the viewer"""
861	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
862	self._setObject(id,newObj)
863
864	if RESPONSE is not None:
865	RESPONSE.redirect('manage_main')
866
867	## DocumentViewerTemplate class
868	class DocumentViewerTemplate(ZopePageTemplate):
869	"""Template for document viewer"""
870	meta_type="DocumentViewer Template"
871
872
873	def manage_addDocumentViewerTemplateForm(self):
874	"""Form for adding"""
875	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
876	return pt()
877
878	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
879	REQUEST=None, submit=None):
880	"Add a Page Template with optional file content."
881
882	self._setObject(id, DocumentViewerTemplate(id))
883	ob = getattr(self, id)
884	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
885	logging.info("txt %s:"%txt)
886	ob.pt_edit(txt,"text/html")
887	if title:
888	ob.pt_setTitle(title)
889	try:
890	u = self.DestinationURL()
891	except AttributeError:
892	u = REQUEST['URL1']
893
894	u = "%s/%s" % (u, urllib.quote(id))
895	REQUEST.RESPONSE.redirect(u+'/manage_main')
896	return ''
897
898
899

Note: See TracBrowser for help on using the repository browser.

Download in other formats: