Context Navigation

source: documentViewer/documentViewer.py @ 419:69205c9d9404

Last change on this file since 419:69205c9d9404 was 419:69205c9d9404, checked in by abukhman, 13 years ago
* empty log message *
File size: 34.9 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21	import string
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding='utf-8'):
47	"""returns a string containing node as XML"""
48	buf = cStringIO.StringIO()
49	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
50	s = buf.getvalue()
51	buf.close()
52	return s
53
54	def browserCheck(self):
55	"""check the browsers request to find out the browser type"""
56	bt = {}
57	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
58	bt['ua'] = ua
59	bt['isIE'] = False
60	bt['isN4'] = False
61	if string.find(ua, 'MSIE') > -1:
62	bt['isIE'] = True
63	else:
64	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
65
66	try:
67	nav = ua[string.find(ua, '('):]
68	ie = string.split(nav, "; ")[1]
69	if string.find(ie, "MSIE") > -1:
70	bt['versIE'] = string.split(ie, " ")[1]
71	except: pass
72
73	bt['isMac'] = string.find(ua, 'Macintosh') > -1
74	bt['isWin'] = string.find(ua, 'Windows') > -1
75	bt['isIEWin'] = bt['isIE'] and bt['isWin']
76	bt['isIEMac'] = bt['isIE'] and bt['isMac']
77	bt['staticHTML'] = False
78
79	return bt
80
81
82	def getParentDir(path):
83	"""returns pathname shortened by one"""
84	return '/'.join(path.split('/')[0:-1])
85
86
87	def getHttpData(url, data=None, num_tries=3, timeout=10):
88	"""returns result from url+data HTTP request"""
89	# we do GET (by appending data to url)
90	if isinstance(data, str) or isinstance(data, unicode):
91	# if data is string then append
92	url = "%s?%s"%(url,data)
93	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
94	# urlencode
95	url = "%s?%s"%(url,urllib.urlencode(data))
96
97	response = None
98	errmsg = None
99	for cnt in range(num_tries):
100	try:
101	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
102	if sys.version_info < (2, 6):
103	# set timeout on socket -- ugly :-(
104	import socket
105	socket.setdefaulttimeout(float(timeout))
106	response = urllib2.urlopen(url)
107	else:
108	response = urllib2.urlopen(url,timeout=float(timeout))
109	# check result?
110	break
111	except urllib2.HTTPError, e:
112	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
113	errmsg = str(e)
114	# stop trying
115	break
116	except urllib2.URLError, e:
117	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
118	errmsg = str(e)
119	# stop trying
120	#break
121
122	if response is not None:
123	data = response.read()
124	response.close()
125	return data
126
127	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
128	#return None
129
130
131
132	##
133	## documentViewer class
134	##
135	class documentViewer(Folder):
136	"""document viewer"""
137	meta_type="Document viewer"
138
139	security=ClassSecurityInfo()
140	manage_options=Folder.manage_options+(
141	{'label':'main config','action':'changeDocumentViewerForm'},
142	)
143
144	# templates and forms
145	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
146	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
147	toc_text = PageTemplateFile('zpt/toc_text', globals())
148	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
149	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
150	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
151	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
152	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
153	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
154	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
155	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
156	head_main = PageTemplateFile('zpt/head_main', globals())
157	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
158	info_xml = PageTemplateFile('zpt/info_xml', globals())
159
160
161	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
162	security.declareProtected('View management screens','changeDocumentViewerForm')
163	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
164
165
166	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
167	"""init document viewer"""
168	self.id=id
169	self.title=title
170	self.thumbcols = thumbcols
171	self.thumbrows = thumbrows
172	# authgroups is list of authorized groups (delimited by ,)
173	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
174	# create template folder so we can always use template.something
175
176	templateFolder = Folder('template')
177	#self['template'] = templateFolder # Zope-2.12 style
178	self._setObject('template',templateFolder) # old style
179	try:
180	import MpdlXmlTextServer
181	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
182	#templateFolder['fulltextclient'] = xmlRpcClient
183	templateFolder._setObject('fulltextclient',textServer)
184	except Exception, e:
185	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
186	try:
187	from Products.zogiLib.zogiLib import zogiLib
188	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
189	#templateFolder['zogilib'] = zogilib
190	templateFolder._setObject('zogilib',zogilib)
191	except Exception, e:
192	logging.error("Unable to create zogiLib for zogilib: "+str(e))
193
194
195	# proxy text server methods to fulltextclient
196	def getTextPage(self, **args):
197	"""get page"""
198	return self.template.fulltextclient.getTextPage(**args)
199
200	def getQuery(self, **args):
201	"""get query in search"""
202	return self.template.fulltextclient.getQuery(**args)
203
204	def getSearch(self, **args):
205	"""get search"""
206	return self.template.fulltextclient.getSearch(**args)
207
208	def getGisPlaces(self, **args):
209	"""get gis places"""
210	return self.template.fulltextclient.getGisPlaces(**args)
211
212	def getAllGisPlaces(self, **args):
213	"""get all gis places """
214	return self.template.fulltextclient.getAllGisPlaces(**args)
215
216	def getTranslate(self, **args):
217	"""get translate"""
218	return self.template.fulltextclient.getTranslate(**args)
219
220	def getLemma(self, **args):
221	"""get lemma"""
222	return self.template.fulltextclient.getLemma(**args)
223
224	def getToc(self, **args):
225	"""get toc"""
226	return self.template.fulltextclient.getToc(**args)
227
228	def getTocPage(self, **args):
229	"""get tocpage"""
230	return self.template.fulltextclient.getTocPage(**args)
231
232
233	security.declareProtected('View','thumbs_rss')
234	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
235	'''
236	view it
237	@param mode: defines how to access the document behind url
238	@param url: url which contains display information
239	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
240
241	'''
242	logging.debug("HHHHHHHHHHHHHH:load the rss")
243	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
244
245	if not hasattr(self, 'template'):
246	# create template folder if it doesn't exist
247	self.manage_addFolder('template')
248
249	if not self.digilibBaseUrl:
250	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
251
252	docinfo = self.getDocinfo(mode=mode,url=url)
253	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
254	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
255	''' ZDES '''
256	pt = getattr(self.template, 'thumbs_main_rss')
257
258	if viewMode=="auto": # automodus gewaehlt
259	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
260	viewMode="text"
261	else:
262	viewMode="images"
263
264	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
265
266	security.declareProtected('View','index_html')
267	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
268	'''
269	view it
270	@param mode: defines how to access the document behind url
271	@param url: url which contains display information
272	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
273	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
274	@param characterNormalization type of text display (reg, norm, none)
275	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
276	'''
277
278	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
279
280	if not hasattr(self, 'template'):
281	# this won't work
282	logging.error("template folder missing!")
283	return "ERROR: template folder missing!"
284
285	if not getattr(self, 'digilibBaseUrl', None):
286	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
287
288	docinfo = self.getDocinfo(mode=mode,url=url)
289
290	if tocMode != "thumbs":
291	# get table of contents
292	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
293
294	if viewMode=="auto": # automodus gewaehlt
295	if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
296	viewMode="text_dict"
297	else:
298	viewMode="images"
299
300	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
301
302	if (docinfo.get('textURLPath',None)):
303	page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo)
304	pageinfo['textPage'] = page
305	pt = getattr(self.template, 'viewer_main')
306	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
307
308	def generateMarks(self,mk):
309	ret=""
310	if mk is None:
311	return ""
312	if not isinstance(mk, list):
313	mk=[mk]
314	for m in mk:
315	ret+="mk=%s"%m
316	return ret
317
318
319	def getBrowser(self):
320	"""getBrowser the version of browser """
321	bt = browserCheck(self)
322	return bt
323
324	def findDigilibUrl(self):
325	"""try to get the digilib URL from zogilib"""
326	url = self.template.zogilib.getDLBaseUrl()
327	return url
328
329	def getDocumentViewerURL(self):
330	"""returns the URL of this instance"""
331	return self.absolute_url()
332
333	def getStyle(self, idx, selected, style=""):
334	"""returns a string with the given style and append 'sel' if path == selected."""
335	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
336	if idx == selected:
337	return style + 'sel'
338	else:
339	return style
340
341	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
342	"""returns URL to documentviewer with parameter param set to val or from dict params"""
343	# copy existing request params
344	urlParams=self.REQUEST.form.copy()
345	# change single param
346	if param is not None:
347	if val is None:
348	if urlParams.has_key(param):
349	del urlParams[param]
350	else:
351	urlParams[param] = str(val)
352
353	# change more params
354	if params is not None:
355	for k in params.keys():
356	v = params[k]
357	if v is None:
358	# val=None removes param
359	if urlParams.has_key(k):
360	del urlParams[k]
361
362	else:
363	urlParams[k] = v
364
365	# FIXME: does this belong here?
366	if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
367	urlParams["mode"] = "imagepath"
368	urlParams["url"] = getParentDir(urlParams["url"])
369
370	# quote values and assemble into query string (not escaping '/')
371	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
372	#ps = urllib.urlencode(urlParams)
373	if baseUrl is None:
374	baseUrl = self.REQUEST['URL1']
375
376	url = "%s?%s"%(baseUrl, ps)
377	return url
378
379
380	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
381	"""link to documentviewer with parameter param set to val"""
382	return self.getLink(param, val, params, baseUrl, '&')
383
384	def getInfo_xml(self,url,mode):
385	"""returns info about the document as XML"""
386
387	if not self.digilibBaseUrl:
388	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
389
390	docinfo = self.getDocinfo(mode=mode,url=url)
391	pt = getattr(self.template, 'info_xml')
392	return pt(docinfo=docinfo)
393
394	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
395	"""returns new option state"""
396	if not self.REQUEST.SESSION.has_key(optionName):
397	# not in session -- initial
398	opt = {'lastState': newState, 'state': initialState}
399	else:
400	opt = self.REQUEST.SESSION.get(optionName)
401	if opt['lastState'] != newState:
402	# state in session has changed -- toggle
403	opt['state'] = not opt['state']
404	opt['lastState'] = newState
405
406	self.REQUEST.SESSION[optionName] = opt
407	return opt['state']
408
409	def isAccessible(self, docinfo):
410	"""returns if access to the resource is granted"""
411	access = docinfo.get('accessType', None)
412	logging.debug("documentViewer (accessOK) access type %s"%access)
413	if access is not None and access == 'free':
414	logging.debug("documentViewer (accessOK) access is free")
415	return True
416	elif access is None or access in self.authgroups:
417	# only local access -- only logged in users
418	user = getSecurityManager().getUser()
419	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
420	if user is not None:
421	#print "user: ", user
422	return (user.getUserName() != "Anonymous User")
423	else:
424	return False
425
426	logging.error("documentViewer (accessOK) unknown access type %s"%access)
427	return False
428
429
430	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
431	"""gibt param von dlInfo aus"""
432	if docinfo is None:
433	docinfo = {}
434
435	for x in range(cut):
436
437	path=getParentDir(path)
438
439	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
440
441	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
442
443	txt = getHttpData(infoUrl)
444	if txt is None:
445	raise IOError("Unable to get dir-info from %s"%(infoUrl))
446
447	dom = Parse(txt)
448	sizes=dom.xpath("//dir/size")
449	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
450
451	if sizes:
452	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
453	else:
454	docinfo['numPages'] = 0
455
456	# TODO: produce and keep list of image names and numbers
457
458	return docinfo
459
460	def getIndexMetaPath(self,url):
461	"""gib nur den Pfad zurueck"""
462	regexp = re.compile(r".(experimental\|permanent)/(.)")
463	regpath = regexp.match(url)
464	if (regpath==None):
465	return ""
466	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
467	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
468
469
470
471	def getIndexMetaUrl(self,url):
472	"""returns utr of index.meta document at url"""
473
474	metaUrl = None
475	if url.startswith("http://"):
476	# real URL
477	metaUrl = url
478	else:
479	# online path
480	server=self.digilibBaseUrl+"/servlet/Texter?fn="
481	metaUrl=server+url.replace("/mpiwg/online","")
482	if not metaUrl.endswith("index.meta"):
483	metaUrl += "/index.meta"
484
485	return metaUrl
486
487	def getDomFromIndexMeta(self, url):
488	"""get dom from index meta"""
489	dom = None
490	metaUrl = self.getIndexMetaUrl(url)
491
492	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
493	txt=getHttpData(metaUrl)
494	if txt is None:
495	raise IOError("Unable to read index meta from %s"%(url))
496
497	dom = Parse(txt)
498	return dom
499
500	def getPresentationInfoXML(self, url):
501	"""returns dom of info.xml document at url"""
502	dom = None
503	metaUrl = None
504	if url.startswith("http://"):
505	# real URL
506	metaUrl = url
507	else:
508	# online path
509	server=self.digilibBaseUrl+"/servlet/Texter?fn="
510	metaUrl=server+url.replace("/mpiwg/online","")
511
512	txt=getHttpData(metaUrl)
513	if txt is None:
514	raise IOError("Unable to read infoXMLfrom %s"%(url))
515
516	dom = Parse(txt)
517	return dom
518
519
520	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
521	"""gets authorization info from the index.meta file at path or given by dom"""
522	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
523
524	access = None
525
526	if docinfo is None:
527	docinfo = {}
528
529	if dom is None:
530	for x in range(cut):
531	path=getParentDir(path)
532	dom = self.getDomFromIndexMeta(path)
533
534	acctype = dom.xpath("//access-conditions/access/@type")
535	if acctype and (len(acctype)>0):
536	access=acctype[0].value
537	if access in ['group', 'institution']:
538	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
539
540	docinfo['accessType'] = access
541	return docinfo
542
543
544	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
545	"""gets bibliographical info from the index.meta file at path or given by dom"""
546	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
547
548	if docinfo is None:
549	docinfo = {}
550
551	if dom is None:
552	for x in range(cut):
553	path=getParentDir(path)
554	dom = self.getDomFromIndexMeta(path)
555
556	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
557
558	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
559	# put in all raw bib fields as dict "bib"
560	bib = dom.xpath("//bib/*")
561	if bib and len(bib)>0:
562	bibinfo = {}
563	for e in bib:
564	bibinfo[e.localName] = getTextFromNode(e)
565	docinfo['bib'] = bibinfo
566
567	# extract some fields (author, title, year) according to their mapping
568	metaData=self.metadata.main.meta.bib
569	bibtype=dom.xpath("//bib/@type")
570	if bibtype and (len(bibtype)>0):
571	bibtype=bibtype[0].value
572	else:
573	bibtype="generic"
574
575	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
576	docinfo['bib_type'] = bibtype
577	bibmap=metaData.generateMappingForType(bibtype)
578	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
579	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
580	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
581	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
582	try:
583	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
584	except: pass
585	try:
586	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
587	except: pass
588	try:
589	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
590	except: pass
591	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
592	try:
593	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
594	except:
595	docinfo['lang']=''
596
597	return docinfo
598
599
600	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
601	"""gets name info from the index.meta file at path or given by dom"""
602	if docinfo is None:
603	docinfo = {}
604
605	if dom is None:
606	for x in range(cut):
607	path=getParentDir(path)
608	dom = self.getDomFromIndexMeta(path)
609
610	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
611	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
612	return docinfo
613
614	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
615	"""parse texttool tag in index meta"""
616	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
617	if docinfo is None:
618	docinfo = {}
619	if docinfo.get('lang', None) is None:
620	docinfo['lang'] = '' # default keine Sprache gesetzt
621	if dom is None:
622	dom = self.getDomFromIndexMeta(url)
623
624	archivePath = None
625	archiveName = None
626
627	archiveNames = dom.xpath("//resource/name")
628	if archiveNames and (len(archiveNames) > 0):
629	archiveName = getTextFromNode(archiveNames[0])
630	else:
631	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
632
633	archivePaths = dom.xpath("//resource/archive-path")
634	if archivePaths and (len(archivePaths) > 0):
635	archivePath = getTextFromNode(archivePaths[0])
636	# clean up archive path
637	if archivePath[0] != '/':
638	archivePath = '/' + archivePath
639	if archiveName and (not archivePath.endswith(archiveName)):
640	archivePath += "/" + archiveName
641	else:
642	# try to get archive-path from url
643	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
644	if (not url.startswith('http')):
645	archivePath = url.replace('index.meta', '')
646
647	if archivePath is None:
648	# we balk without archive-path
649	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
650
651	imageDirs = dom.xpath("//texttool/image")
652	if imageDirs and (len(imageDirs) > 0):
653	imageDir = getTextFromNode(imageDirs[0])
654
655	else:
656	# we balk with no image tag / not necessary anymore because textmode is now standard
657	#raise IOError("No text-tool info in %s"%(url))
658	imageDir = ""
659	#xquery="//pb"
660	docinfo['imagePath'] = "" # keine Bilder
661	docinfo['imageURL'] = ""
662
663	if imageDir and archivePath:
664	#print "image: ", imageDir, " archivepath: ", archivePath
665	imageDir = os.path.join(archivePath, imageDir)
666	imageDir = imageDir.replace("/mpiwg/online", '')
667	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
668	docinfo['imagePath'] = imageDir
669
670	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
671
672	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
673	if viewerUrls and (len(viewerUrls) > 0):
674	viewerUrl = getTextFromNode(viewerUrls[0])
675	docinfo['viewerURL'] = viewerUrl
676
677	# old style text URL
678	textUrls = dom.xpath("//texttool/text")
679	if textUrls and (len(textUrls) > 0):
680	textUrl = getTextFromNode(textUrls[0])
681	if urlparse.urlparse(textUrl)[0] == "": #keine url
682	textUrl = os.path.join(archivePath, textUrl)
683	# fix URLs starting with /mpiwg/online
684	if textUrl.startswith("/mpiwg/online"):
685	textUrl = textUrl.replace("/mpiwg/online", '', 1)
686
687	docinfo['textURL'] = textUrl
688
689	# new style text-url-path
690	textUrls = dom.xpath("//texttool/text-url-path")
691	if textUrls and (len(textUrls) > 0):
692	textUrl = getTextFromNode(textUrls[0])
693	docinfo['textURLPath'] = textUrl
694	#if not docinfo['imagePath']:
695	# text-only, no page images
696	#docinfo = self.getNumTextPages(docinfo)
697
698
699	presentationUrls = dom.xpath("//texttool/presentation")
700	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
701	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
702
703
704	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
705	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
706	# durch den relativen Pfad auf die presentation infos
707	presentationPath = getTextFromNode(presentationUrls[0])
708	if url.endswith("index.meta"):
709	presentationUrl = url.replace('index.meta', presentationPath)
710	else:
711	presentationUrl = url + "/" + presentationPath
712
713	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
714
715	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
716
717	return docinfo
718
719
720	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
721	"""gets the bibliographical information from the preseantion entry in texttools
722	"""
723	dom=self.getPresentationInfoXML(url)
724	try:
725	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
726	except:
727	pass
728	try:
729	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
730	except:
731	pass
732	try:
733	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
734	except:
735	pass
736	return docinfo
737
738	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
739	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
740	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
741	if docinfo is None:
742	docinfo = {}
743	path=path.replace("/mpiwg/online","")
744	docinfo['imagePath'] = path
745	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
746
747	pathorig=path
748	for x in range(cut):
749	path=getParentDir(path)
750	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
751	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
752	docinfo['imageURL'] = imageUrl
753
754	#path ist the path to the images it assumes that the index.meta file is one level higher.
755	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
756	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
757	return docinfo
758
759
760	def getDocinfo(self, mode, url):
761	"""returns docinfo depending on mode"""
762	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
763	# look for cached docinfo in session
764	if self.REQUEST.SESSION.has_key('docinfo'):
765	docinfo = self.REQUEST.SESSION['docinfo']
766	# check if its still current
767	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
768	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
769	return docinfo
770	# new docinfo
771	docinfo = {'mode': mode, 'url': url}
772	if mode=="texttool": #index.meta with texttool information
773	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
774	elif mode=="imagepath":
775	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
776	elif mode=="filepath":
777	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
778	else:
779	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
780	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
781
782	# FIXME: fake texturlpath
783	if not docinfo.has_key('textURLPath'):
784	docinfo['textURLPath'] = None
785
786	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
787	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
788	self.REQUEST.SESSION['docinfo'] = docinfo
789	return docinfo
790
791	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
792	"""returns pageinfo with the given parameters"""
793	pageinfo = {}
794	current = getInt(current)
795
796	pageinfo['current'] = current
797	rows = int(rows or self.thumbrows)
798	pageinfo['rows'] = rows
799	cols = int(cols or self.thumbcols)
800	pageinfo['cols'] = cols
801	grpsize = cols * rows
802	pageinfo['groupsize'] = grpsize
803	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
804	# int(current / grpsize) * grpsize +1))
805	pageinfo['start'] = start
806	pageinfo['end'] = start + grpsize
807	if (docinfo is not None) and ('numPages' in docinfo):
808	np = int(docinfo['numPages'])
809	pageinfo['end'] = min(pageinfo['end'], np)
810	pageinfo['numgroups'] = int(np / grpsize)
811	if np % grpsize > 0:
812	pageinfo['numgroups'] += 1
813	pageinfo['viewMode'] = viewMode
814	pageinfo['tocMode'] = tocMode
815	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
816	pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','')
817	pageinfo['query'] = self.REQUEST.get('query','')
818	pageinfo['queryType'] = self.REQUEST.get('queryType','')
819	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
820	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
821	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
822	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
823	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
824	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
825
826	toc = int (pageinfo['tocPN'])
827	pageinfo['textPages'] =int (toc)
828
829
830
831	if 'tocSize_%s'%tocMode in docinfo:
832	tocSize = int(docinfo['tocSize_%s'%tocMode])
833	tocPageSize = int(pageinfo['tocPageSize'])
834	# cached toc
835	if tocSize%tocPageSize>0:
836	tocPages=tocSize/tocPageSize+1
837	else:
838	tocPages=tocSize/tocPageSize
839	pageinfo['tocPN'] = min (tocPages,toc)
840	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
841	pageinfo['sn'] =self.REQUEST.get('sn','')
842	return pageinfo
843
844	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
845	"""init document viewer"""
846	self.title=title
847	self.digilibBaseUrl = digilibBaseUrl
848	self.thumbrows = thumbrows
849	self.thumbcols = thumbcols
850	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
851	if RESPONSE is not None:
852	RESPONSE.redirect('manage_main')
853
854	def manage_AddDocumentViewerForm(self):
855	"""add the viewer form"""
856	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
857	return pt()
858
859	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
860	"""add the viewer"""
861	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
862	self._setObject(id,newObj)
863
864	if RESPONSE is not None:
865	RESPONSE.redirect('manage_main')
866
867	## DocumentViewerTemplate class
868	class DocumentViewerTemplate(ZopePageTemplate):
869	"""Template for document viewer"""
870	meta_type="DocumentViewer Template"
871
872
873	def manage_addDocumentViewerTemplateForm(self):
874	"""Form for adding"""
875	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
876	return pt()
877
878	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
879	REQUEST=None, submit=None):
880	"Add a Page Template with optional file content."
881
882	self._setObject(id, DocumentViewerTemplate(id))
883	ob = getattr(self, id)
884	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
885	logging.info("txt %s:"%txt)
886	ob.pt_edit(txt,"text/html")
887	if title:
888	ob.pt_setTitle(title)
889	try:
890	u = self.DestinationURL()
891	except AttributeError:
892	u = REQUEST['URL1']
893
894	u = "%s/%s" % (u, urllib.quote(id))
895	REQUEST.RESPONSE.redirect(u+'/manage_main')
896	return ''
897
898
899

Note: See TracBrowser for help on using the repository browser.

Download in other formats: