Context Navigation

source: documentViewer/documentViewer.py @ 413:ea9aaecd73f1

Last change on this file since 413:ea9aaecd73f1 was 413:ea9aaecd73f1, checked in by casties, 13 years ago
better getLink method
File size: 36.0 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21	import string
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding='utf-8'):
47	"""returns a string containing node as XML"""
48	buf = cStringIO.StringIO()
49	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
50	s = buf.getvalue()
51	buf.close()
52	return s
53
54	def browserCheck(self):
55	"""check the browsers request to find out the browser type"""
56	bt = {}
57	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
58	bt['ua'] = ua
59	bt['isIE'] = False
60	bt['isN4'] = False
61	if string.find(ua, 'MSIE') > -1:
62	bt['isIE'] = True
63	else:
64	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
65
66	try:
67	nav = ua[string.find(ua, '('):]
68	ie = string.split(nav, "; ")[1]
69	if string.find(ie, "MSIE") > -1:
70	bt['versIE'] = string.split(ie, " ")[1]
71	except: pass
72
73	bt['isMac'] = string.find(ua, 'Macintosh') > -1
74	bt['isWin'] = string.find(ua, 'Windows') > -1
75	bt['isIEWin'] = bt['isIE'] and bt['isWin']
76	bt['isIEMac'] = bt['isIE'] and bt['isMac']
77	bt['staticHTML'] = False
78
79	return bt
80
81
82	def getParentDir(path):
83	"""returns pathname shortened by one"""
84	return '/'.join(path.split('/')[0:-1])
85
86
87	def getHttpData(url, data=None, num_tries=3, timeout=10):
88	"""returns result from url+data HTTP request"""
89	# we do GET (by appending data to url)
90	if isinstance(data, str) or isinstance(data, unicode):
91	# if data is string then append
92	url = "%s?%s"%(url,data)
93	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
94	# urlencode
95	url = "%s?%s"%(url,urllib.urlencode(data))
96
97	response = None
98	errmsg = None
99	for cnt in range(num_tries):
100	try:
101	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
102	if sys.version_info < (2, 6):
103	# set timeout on socket -- ugly :-(
104	import socket
105	socket.setdefaulttimeout(float(timeout))
106	response = urllib2.urlopen(url)
107	else:
108	response = urllib2.urlopen(url,timeout=float(timeout))
109	# check result?
110	break
111	except urllib2.HTTPError, e:
112	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
113	errmsg = str(e)
114	# stop trying
115	break
116	except urllib2.URLError, e:
117	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
118	errmsg = str(e)
119	# stop trying
120	#break
121
122	if response is not None:
123	data = response.read()
124	response.close()
125	return data
126
127	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
128	#return None
129
130
131
132	##
133	## documentViewer class
134	##
135	class documentViewer(Folder):
136	"""document viewer"""
137	meta_type="Document viewer"
138
139	security=ClassSecurityInfo()
140	manage_options=Folder.manage_options+(
141	{'label':'main config','action':'changeDocumentViewerForm'},
142	)
143
144	# templates and forms
145	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
146	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
147	toc_text = PageTemplateFile('zpt/toc_text', globals())
148	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
149	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
150	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
151	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
152	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
153	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
154	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
155	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
156	head_main = PageTemplateFile('zpt/head_main', globals())
157	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
158	info_xml = PageTemplateFile('zpt/info_xml', globals())
159
160
161	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
162	security.declareProtected('View management screens','changeDocumentViewerForm')
163	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
164
165
166	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
167	"""init document viewer"""
168	self.id=id
169	self.title=title
170	self.thumbcols = thumbcols
171	self.thumbrows = thumbrows
172	# authgroups is list of authorized groups (delimited by ,)
173	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
174	# create template folder so we can always use template.something
175
176	templateFolder = Folder('template')
177	#self['template'] = templateFolder # Zope-2.12 style
178	self._setObject('template',templateFolder) # old style
179	try:
180	import MpdlXmlTextServer
181	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
182	#templateFolder['fulltextclient'] = xmlRpcClient
183	templateFolder._setObject('fulltextclient',textServer)
184	except Exception, e:
185	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
186	try:
187	from Products.zogiLib.zogiLib import zogiLib
188	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
189	#templateFolder['zogilib'] = zogilib
190	templateFolder._setObject('zogilib',zogilib)
191	except Exception, e:
192	logging.error("Unable to create zogiLib for zogilib: "+str(e))
193
194
195	# proxy text server methods to fulltextclient
196	def getTextPage(self, **args):
197	"""get page"""
198	return self.template.fulltextclient.getTextPage(**args)
199
200	def getQuery(self, **args):
201	"""get query"""
202	return self.template.fulltextclient.getQuery(**args)
203
204	def getQueryResultHits(self, **args):
205	"""get query"""
206	return self.template.fulltextclient.getQueryResultHits(**args)
207
208	def getQueryResultHitsText(self, **args):
209	"""get query"""
210	return self.template.fulltextclient.getQueryResultHitsText(**args)
211
212	def getQueryResultHitsFigures(self, **args):
213	"""get query"""
214	return self.template.fulltextclient.getQueryResultHitsFigures(**args)
215
216	def getSearch(self, **args):
217	"""get search"""
218	return self.template.fulltextclient.getSearch(**args)
219
220	def getGisPlaces(self, **args):
221	"""get gis places"""
222	return self.template.fulltextclient.getGisPlaces(**args)
223
224	def getAllGisPlaces(self, **args):
225	"""get all gis places """
226	return self.template.fulltextclient.getAllGisPlaces(**args)
227
228	def getOrigPages(self, **args):
229	"""get original page number """
230	return self.template.fulltextclient.getOrigPages(**args)
231
232	def getAllPlaces(self, **args):
233	"""get original page number """
234	return self.template.fulltextclient.getAllPlaces(**args)
235
236	def getTocEntries(self, **args):
237	"""get original page number """
238	return self.template.fulltextclient.getTocEntries(**args)
239
240	def getFigureEntries(self, **args):
241	"""get original page number """
242	return self.template.fulltextclient.getFigureEntries(**args)
243
244	def getNumPages(self, docinfo):
245	"""get numpages"""
246	return self.template.fulltextclient.getNumPages(docinfo)
247
248	def getNumTextPages(self, docinfo):
249	"""get numpages text"""
250	return self.template.fulltextclient.getNumTextPages(docinfo)
251
252	def getTranslate(self, **args):
253	"""get translate"""
254	return self.template.fulltextclient.getTranslate(**args)
255
256	def getLemma(self, **args):
257	"""get lemma"""
258	return self.template.fulltextclient.getLemma(**args)
259
260	def getToc(self, **args):
261	"""get toc"""
262	return self.template.fulltextclient.getToc(**args)
263
264	def getTocPage(self, **args):
265	"""get tocpage"""
266	return self.template.fulltextclient.getTocPage(**args)
267
268
269	security.declareProtected('View','thumbs_rss')
270	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
271	'''
272	view it
273	@param mode: defines how to access the document behind url
274	@param url: url which contains display information
275	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
276
277	'''
278	logging.debug("HHHHHHHHHHHHHH:load the rss")
279	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
280
281	if not hasattr(self, 'template'):
282	# create template folder if it doesn't exist
283	self.manage_addFolder('template')
284
285	if not self.digilibBaseUrl:
286	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
287
288	docinfo = self.getDocinfo(mode=mode,url=url)
289	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
290	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
291	''' ZDES '''
292	pt = getattr(self.template, 'thumbs_main_rss')
293
294	if viewMode=="auto": # automodus gewaehlt
295	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
296	viewMode="text"
297	else:
298	viewMode="images"
299
300	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
301
302	security.declareProtected('View','index_html')
303	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
304	'''
305	view it
306	@param mode: defines how to access the document behind url
307	@param url: url which contains display information
308	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
309	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
310	@param characterNormalization type of text display (reg, norm, none)
311	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
312	'''
313
314	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
315
316	if not hasattr(self, 'template'):
317	# this won't work
318	logging.error("template folder missing!")
319	return "ERROR: template folder missing!"
320
321	if not getattr(self, 'digilibBaseUrl', None):
322	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
323
324	docinfo = self.getDocinfo(mode=mode,url=url)
325
326	if tocMode != "thumbs":
327	# get table of contents
328	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
329
330	if viewMode=="auto": # automodus gewaehlt
331	if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
332	viewMode="text_dict"
333	else:
334	viewMode="images"
335
336	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
337
338	pt = getattr(self.template, 'viewer_main')
339	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
340
341	def generateMarks(self,mk):
342	ret=""
343	if mk is None:
344	return ""
345	if not isinstance(mk, list):
346	mk=[mk]
347	for m in mk:
348	ret+="mk=%s"%m
349	return ret
350
351
352	def getBrowser(self):
353	"""getBrowser the version of browser """
354	bt = browserCheck(self)
355	return bt
356
357	def findDigilibUrl(self):
358	"""try to get the digilib URL from zogilib"""
359	url = self.template.zogilib.getDLBaseUrl()
360	return url
361
362	def getDocumentViewerURL(self):
363	"""returns the URL of this instance"""
364	return self.absolute_url()
365
366	def getStyle(self, idx, selected, style=""):
367	"""returns a string with the given style and append 'sel' if path == selected."""
368	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
369	if idx == selected:
370	return style + 'sel'
371	else:
372	return style
373
374	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
375	"""returns URL to documentviewer with parameter param set to val or from dict params"""
376	# copy existing request params
377	urlParams=self.REQUEST.form.copy()
378	# change single param
379	if param is not None:
380	if val is None:
381	if urlParams.has_key(param):
382	del urlParams[param]
383	else:
384	urlParams[param] = str(val)
385
386	# change more params
387	if params is not None:
388	for k in params.keys():
389	v = params[k]
390	if v is None:
391	# val=None removes param
392	if urlParams.has_key(k):
393	del urlParams[k]
394
395	else:
396	urlParams[k] = v
397
398	# FIXME: does this belong here?
399	if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
400	urlParams["mode"] = "imagepath"
401	urlParams["url"] = getParentDir(urlParams["url"])
402
403	# quote values and assemble into query string (not escaping '/')
404	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
405	#ps = urllib.urlencode(urlParams)
406	if baseUrl is None:
407	baseUrl = self.REQUEST['URL1']
408
409	url = "%s?%s"%(baseUrl, ps)
410	return url
411
412
413	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
414	"""link to documentviewer with parameter param set to val"""
415	return self.getLink(param, val, params, baseUrl, '&')
416
417	def getInfo_xml(self,url,mode):
418	"""returns info about the document as XML"""
419
420	if not self.digilibBaseUrl:
421	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
422
423	docinfo = self.getDocinfo(mode=mode,url=url)
424	pt = getattr(self.template, 'info_xml')
425	return pt(docinfo=docinfo)
426
427	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
428	"""returns new option state"""
429	if not self.REQUEST.SESSION.has_key(optionName):
430	# not in session -- initial
431	opt = {'lastState': newState, 'state': initialState}
432	else:
433	opt = self.REQUEST.SESSION.get(optionName)
434	if opt['lastState'] != newState:
435	# state in session has changed -- toggle
436	opt['state'] = not opt['state']
437	opt['lastState'] = newState
438
439	self.REQUEST.SESSION[optionName] = opt
440	return opt['state']
441
442	def isAccessible(self, docinfo):
443	"""returns if access to the resource is granted"""
444	access = docinfo.get('accessType', None)
445	logging.debug("documentViewer (accessOK) access type %s"%access)
446	if access is not None and access == 'free':
447	logging.debug("documentViewer (accessOK) access is free")
448	return True
449	elif access is None or access in self.authgroups:
450	# only local access -- only logged in users
451	user = getSecurityManager().getUser()
452	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
453	if user is not None:
454	#print "user: ", user
455	return (user.getUserName() != "Anonymous User")
456	else:
457	return False
458
459	logging.error("documentViewer (accessOK) unknown access type %s"%access)
460	return False
461
462
463	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
464	"""gibt param von dlInfo aus"""
465	if docinfo is None:
466	docinfo = {}
467
468	for x in range(cut):
469
470	path=getParentDir(path)
471
472	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
473
474	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
475
476	txt = getHttpData(infoUrl)
477	if txt is None:
478	raise IOError("Unable to get dir-info from %s"%(infoUrl))
479
480	dom = Parse(txt)
481	sizes=dom.xpath("//dir/size")
482	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
483
484	if sizes:
485	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
486	else:
487	docinfo['numPages'] = 0
488
489	# TODO: produce and keep list of image names and numbers
490
491	return docinfo
492
493	def getIndexMetaPath(self,url):
494	"""gib nur den Pfad zurueck"""
495	regexp = re.compile(r".(experimental\|permanent)/(.)")
496	regpath = regexp.match(url)
497	if (regpath==None):
498	return ""
499	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
500	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
501
502
503
504	def getIndexMetaUrl(self,url):
505	"""returns utr of index.meta document at url"""
506
507	metaUrl = None
508	if url.startswith("http://"):
509	# real URL
510	metaUrl = url
511	else:
512	# online path
513	server=self.digilibBaseUrl+"/servlet/Texter?fn="
514	metaUrl=server+url.replace("/mpiwg/online","")
515	if not metaUrl.endswith("index.meta"):
516	metaUrl += "/index.meta"
517
518	return metaUrl
519
520	def getDomFromIndexMeta(self, url):
521	"""get dom from index meta"""
522	dom = None
523	metaUrl = self.getIndexMetaUrl(url)
524
525	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
526	txt=getHttpData(metaUrl)
527	if txt is None:
528	raise IOError("Unable to read index meta from %s"%(url))
529
530	dom = Parse(txt)
531	return dom
532
533	def getPresentationInfoXML(self, url):
534	"""returns dom of info.xml document at url"""
535	dom = None
536	metaUrl = None
537	if url.startswith("http://"):
538	# real URL
539	metaUrl = url
540	else:
541	# online path
542	server=self.digilibBaseUrl+"/servlet/Texter?fn="
543	metaUrl=server+url.replace("/mpiwg/online","")
544
545	txt=getHttpData(metaUrl)
546	if txt is None:
547	raise IOError("Unable to read infoXMLfrom %s"%(url))
548
549	dom = Parse(txt)
550	return dom
551
552
553	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
554	"""gets authorization info from the index.meta file at path or given by dom"""
555	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
556
557	access = None
558
559	if docinfo is None:
560	docinfo = {}
561
562	if dom is None:
563	for x in range(cut):
564	path=getParentDir(path)
565	dom = self.getDomFromIndexMeta(path)
566
567	acctype = dom.xpath("//access-conditions/access/@type")
568	if acctype and (len(acctype)>0):
569	access=acctype[0].value
570	if access in ['group', 'institution']:
571	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
572
573	docinfo['accessType'] = access
574	return docinfo
575
576
577	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
578	"""gets bibliographical info from the index.meta file at path or given by dom"""
579	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
580
581	if docinfo is None:
582	docinfo = {}
583
584	if dom is None:
585	for x in range(cut):
586	path=getParentDir(path)
587	dom = self.getDomFromIndexMeta(path)
588
589	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
590
591	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
592	# put in all raw bib fields as dict "bib"
593	bib = dom.xpath("//bib/*")
594	if bib and len(bib)>0:
595	bibinfo = {}
596	for e in bib:
597	bibinfo[e.localName] = getTextFromNode(e)
598	docinfo['bib'] = bibinfo
599
600	# extract some fields (author, title, year) according to their mapping
601	metaData=self.metadata.main.meta.bib
602	bibtype=dom.xpath("//bib/@type")
603	if bibtype and (len(bibtype)>0):
604	bibtype=bibtype[0].value
605	else:
606	bibtype="generic"
607
608	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
609	docinfo['bib_type'] = bibtype
610	bibmap=metaData.generateMappingForType(bibtype)
611	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
612	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
613	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
614	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
615	try:
616	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
617	except: pass
618	try:
619	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
620	except: pass
621	try:
622	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
623	except: pass
624	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
625	try:
626	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
627	except:
628	docinfo['lang']=''
629
630	return docinfo
631
632
633	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
634	"""gets name info from the index.meta file at path or given by dom"""
635	if docinfo is None:
636	docinfo = {}
637
638	if dom is None:
639	for x in range(cut):
640	path=getParentDir(path)
641	dom = self.getDomFromIndexMeta(path)
642
643	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
644	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
645	return docinfo
646
647	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
648	"""parse texttool tag in index meta"""
649	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
650	if docinfo is None:
651	docinfo = {}
652	if docinfo.get('lang', None) is None:
653	docinfo['lang'] = '' # default keine Sprache gesetzt
654	if dom is None:
655	dom = self.getDomFromIndexMeta(url)
656
657	archivePath = None
658	archiveName = None
659
660	archiveNames = dom.xpath("//resource/name")
661	if archiveNames and (len(archiveNames) > 0):
662	archiveName = getTextFromNode(archiveNames[0])
663	else:
664	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
665
666	archivePaths = dom.xpath("//resource/archive-path")
667	if archivePaths and (len(archivePaths) > 0):
668	archivePath = getTextFromNode(archivePaths[0])
669	# clean up archive path
670	if archivePath[0] != '/':
671	archivePath = '/' + archivePath
672	if archiveName and (not archivePath.endswith(archiveName)):
673	archivePath += "/" + archiveName
674	else:
675	# try to get archive-path from url
676	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
677	if (not url.startswith('http')):
678	archivePath = url.replace('index.meta', '')
679
680	if archivePath is None:
681	# we balk without archive-path
682	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
683
684	imageDirs = dom.xpath("//texttool/image")
685	if imageDirs and (len(imageDirs) > 0):
686	imageDir = getTextFromNode(imageDirs[0])
687
688	else:
689	# we balk with no image tag / not necessary anymore because textmode is now standard
690	#raise IOError("No text-tool info in %s"%(url))
691	imageDir = ""
692	#xquery="//pb"
693	docinfo['imagePath'] = "" # keine Bilder
694	docinfo['imageURL'] = ""
695
696	if imageDir and archivePath:
697	#print "image: ", imageDir, " archivepath: ", archivePath
698	imageDir = os.path.join(archivePath, imageDir)
699	imageDir = imageDir.replace("/mpiwg/online", '')
700	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
701	docinfo['imagePath'] = imageDir
702
703	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
704
705	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
706	if viewerUrls and (len(viewerUrls) > 0):
707	viewerUrl = getTextFromNode(viewerUrls[0])
708	docinfo['viewerURL'] = viewerUrl
709
710	# old style text URL
711	textUrls = dom.xpath("//texttool/text")
712	if textUrls and (len(textUrls) > 0):
713	textUrl = getTextFromNode(textUrls[0])
714	if urlparse.urlparse(textUrl)[0] == "": #keine url
715	textUrl = os.path.join(archivePath, textUrl)
716	# fix URLs starting with /mpiwg/online
717	if textUrl.startswith("/mpiwg/online"):
718	textUrl = textUrl.replace("/mpiwg/online", '', 1)
719
720	docinfo['textURL'] = textUrl
721
722	# new style text-url-path
723	textUrls = dom.xpath("//texttool/text-url-path")
724	if textUrls and (len(textUrls) > 0):
725	textUrl = getTextFromNode(textUrls[0])
726	docinfo['textURLPath'] = textUrl
727	if not docinfo['imagePath']:
728	# text-only, no page images
729	docinfo = self.getNumTextPages(docinfo)
730
731	presentationUrls = dom.xpath("//texttool/presentation")
732	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
733	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
734
735
736	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
737	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
738	# durch den relativen Pfad auf die presentation infos
739	presentationPath = getTextFromNode(presentationUrls[0])
740	if url.endswith("index.meta"):
741	presentationUrl = url.replace('index.meta', presentationPath)
742	else:
743	presentationUrl = url + "/" + presentationPath
744
745	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
746
747	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
748
749	return docinfo
750
751
752	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
753	"""gets the bibliographical information from the preseantion entry in texttools
754	"""
755	dom=self.getPresentationInfoXML(url)
756	try:
757	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
758	except:
759	pass
760	try:
761	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
762	except:
763	pass
764	try:
765	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
766	except:
767	pass
768	return docinfo
769
770	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
771	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
772	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
773	if docinfo is None:
774	docinfo = {}
775	path=path.replace("/mpiwg/online","")
776	docinfo['imagePath'] = path
777	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
778
779	pathorig=path
780	for x in range(cut):
781	path=getParentDir(path)
782	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
783	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
784	docinfo['imageURL'] = imageUrl
785
786	#path ist the path to the images it assumes that the index.meta file is one level higher.
787	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
788	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
789	return docinfo
790
791
792	def getDocinfo(self, mode, url):
793	"""returns docinfo depending on mode"""
794	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
795	# look for cached docinfo in session
796	if self.REQUEST.SESSION.has_key('docinfo'):
797	docinfo = self.REQUEST.SESSION['docinfo']
798	# check if its still current
799	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
800	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
801	return docinfo
802	# new docinfo
803	docinfo = {'mode': mode, 'url': url}
804	if mode=="texttool": #index.meta with texttool information
805	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
806	elif mode=="imagepath":
807	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
808	elif mode=="filepath":
809	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
810	else:
811	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
812	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
813
814	# FIXME: fake texturlpath
815	if not docinfo.has_key('textURLPath'):
816	docinfo['textURLPath'] = None
817
818	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
819	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
820	self.REQUEST.SESSION['docinfo'] = docinfo
821	return docinfo
822
823	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
824	"""returns pageinfo with the given parameters"""
825	pageinfo = {}
826	current = getInt(current)
827
828	pageinfo['current'] = current
829	rows = int(rows or self.thumbrows)
830	pageinfo['rows'] = rows
831	cols = int(cols or self.thumbcols)
832	pageinfo['cols'] = cols
833	grpsize = cols * rows
834	pageinfo['groupsize'] = grpsize
835	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
836	# int(current / grpsize) * grpsize +1))
837	pageinfo['start'] = start
838	pageinfo['end'] = start + grpsize
839	if (docinfo is not None) and ('numPages' in docinfo):
840	np = int(docinfo['numPages'])
841	pageinfo['end'] = min(pageinfo['end'], np)
842	pageinfo['numgroups'] = int(np / grpsize)
843	if np % grpsize > 0:
844	pageinfo['numgroups'] += 1
845	pageinfo['viewMode'] = viewMode
846	pageinfo['tocMode'] = tocMode
847	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
848	pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','')
849	pageinfo['query'] = self.REQUEST.get('query','')
850	pageinfo['queryType'] = self.REQUEST.get('queryType','')
851	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
852	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
853	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
854	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
855	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
856	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
857	toc = int (pageinfo['tocPN'])
858	pageinfo['textPages'] =int (toc)
859
860
861
862	if 'tocSize_%s'%tocMode in docinfo:
863	tocSize = int(docinfo['tocSize_%s'%tocMode])
864	tocPageSize = int(pageinfo['tocPageSize'])
865	# cached toc
866	if tocSize%tocPageSize>0:
867	tocPages=tocSize/tocPageSize+1
868	else:
869	tocPages=tocSize/tocPageSize
870	pageinfo['tocPN'] = min (tocPages,toc)
871	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
872	pageinfo['sn'] =self.REQUEST.get('sn','')
873	return pageinfo
874
875	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
876	"""init document viewer"""
877	self.title=title
878	self.digilibBaseUrl = digilibBaseUrl
879	self.thumbrows = thumbrows
880	self.thumbcols = thumbcols
881	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
882	if RESPONSE is not None:
883	RESPONSE.redirect('manage_main')
884
885	def manage_AddDocumentViewerForm(self):
886	"""add the viewer form"""
887	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
888	return pt()
889
890	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
891	"""add the viewer"""
892	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
893	self._setObject(id,newObj)
894
895	if RESPONSE is not None:
896	RESPONSE.redirect('manage_main')
897
898	## DocumentViewerTemplate class
899	class DocumentViewerTemplate(ZopePageTemplate):
900	"""Template for document viewer"""
901	meta_type="DocumentViewer Template"
902
903
904	def manage_addDocumentViewerTemplateForm(self):
905	"""Form for adding"""
906	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
907	return pt()
908
909	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
910	REQUEST=None, submit=None):
911	"Add a Page Template with optional file content."
912
913	self._setObject(id, DocumentViewerTemplate(id))
914	ob = getattr(self, id)
915	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
916	logging.info("txt %s:"%txt)
917	ob.pt_edit(txt,"text/html")
918	if title:
919	ob.pt_setTitle(title)
920	try:
921	u = self.DestinationURL()
922	except AttributeError:
923	u = REQUEST['URL1']
924
925	u = "%s/%s" % (u, urllib.quote(id))
926	REQUEST.RESPONSE.redirect(u+'/manage_main')
927	return ''
928
929
930

Note: See TracBrowser for help on using the repository browser.

Download in other formats: