Context Navigation

source: documentViewer/documentViewer.py @ 424:6646227c281e

Last change on this file since 424:6646227c281e was 424:6646227c281e, checked in by abukhman, 13 years ago
* empty log message *
File size: 34.9 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21	import string
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding='utf-8'):
47	"""returns a string containing node as XML"""
48	buf = cStringIO.StringIO()
49	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
50	s = buf.getvalue()
51	buf.close()
52	return s
53
54	def browserCheck(self):
55	"""check the browsers request to find out the browser type"""
56	bt = {}
57	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
58	bt['ua'] = ua
59	bt['isIE'] = False
60	bt['isN4'] = False
61	if string.find(ua, 'MSIE') > -1:
62	bt['isIE'] = True
63	else:
64	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
65
66	try:
67	nav = ua[string.find(ua, '('):]
68	ie = string.split(nav, "; ")[1]
69	if string.find(ie, "MSIE") > -1:
70	bt['versIE'] = string.split(ie, " ")[1]
71	except: pass
72
73	bt['isMac'] = string.find(ua, 'Macintosh') > -1
74	bt['isWin'] = string.find(ua, 'Windows') > -1
75	bt['isIEWin'] = bt['isIE'] and bt['isWin']
76	bt['isIEMac'] = bt['isIE'] and bt['isMac']
77	bt['staticHTML'] = False
78
79	return bt
80
81
82	def getParentDir(path):
83	"""returns pathname shortened by one"""
84	return '/'.join(path.split('/')[0:-1])
85
86
87	def getHttpData(url, data=None, num_tries=3, timeout=10):
88	"""returns result from url+data HTTP request"""
89	# we do GET (by appending data to url)
90	if isinstance(data, str) or isinstance(data, unicode):
91	# if data is string then append
92	url = "%s?%s"%(url,data)
93	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
94	# urlencode
95	url = "%s?%s"%(url,urllib.urlencode(data))
96
97	response = None
98	errmsg = None
99	for cnt in range(num_tries):
100	try:
101	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
102	if sys.version_info < (2, 6):
103	# set timeout on socket -- ugly :-(
104	import socket
105	socket.setdefaulttimeout(float(timeout))
106	response = urllib2.urlopen(url)
107	else:
108	response = urllib2.urlopen(url,timeout=float(timeout))
109	# check result?
110	break
111	except urllib2.HTTPError, e:
112	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
113	errmsg = str(e)
114	# stop trying
115	break
116	except urllib2.URLError, e:
117	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
118	errmsg = str(e)
119	# stop trying
120	#break
121
122	if response is not None:
123	data = response.read()
124	response.close()
125	return data
126
127	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
128	#return None
129
130
131
132	##
133	## documentViewer class
134	##
135	class documentViewer(Folder):
136	"""document viewer"""
137	meta_type="Document viewer"
138
139	security=ClassSecurityInfo()
140	manage_options=Folder.manage_options+(
141	{'label':'main config','action':'changeDocumentViewerForm'},
142	)
143
144	# templates and forms
145	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
146	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
147	toc_text = PageTemplateFile('zpt/toc_text', globals())
148	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
149	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
150	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
151	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
152	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
153	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
154	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
155	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
156	head_main = PageTemplateFile('zpt/head_main', globals())
157	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
158	info_xml = PageTemplateFile('zpt/info_xml', globals())
159
160
161	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
162	security.declareProtected('View management screens','changeDocumentViewerForm')
163	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
164
165
166	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
167	"""init document viewer"""
168	self.id=id
169	self.title=title
170	self.thumbcols = thumbcols
171	self.thumbrows = thumbrows
172	# authgroups is list of authorized groups (delimited by ,)
173	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
174	# create template folder so we can always use template.something
175
176	templateFolder = Folder('template')
177	#self['template'] = templateFolder # Zope-2.12 style
178	self._setObject('template',templateFolder) # old style
179	try:
180	import MpdlXmlTextServer
181	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
182	#templateFolder['fulltextclient'] = xmlRpcClient
183	templateFolder._setObject('fulltextclient',textServer)
184	except Exception, e:
185	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
186	try:
187	from Products.zogiLib.zogiLib import zogiLib
188	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
189	#templateFolder['zogilib'] = zogilib
190	templateFolder._setObject('zogilib',zogilib)
191	except Exception, e:
192	logging.error("Unable to create zogiLib for zogilib: "+str(e))
193
194
195	# proxy text server methods to fulltextclient
196	def getTextPage(self, **args):
197	"""get page"""
198	return self.template.fulltextclient.getTextPage(**args)
199
200	def getQuery(self, **args):
201	"""get query in search"""
202	return self.template.fulltextclient.getQuery(**args)
203
204	def getSearch(self, **args):
205	"""get search"""
206	return self.template.fulltextclient.getSearch(**args)
207
208	def getGisPlaces(self, **args):
209	"""get gis places"""
210	return self.template.fulltextclient.getGisPlaces(**args)
211
212	def getAllGisPlaces(self, **args):
213	"""get all gis places """
214	return self.template.fulltextclient.getAllGisPlaces(**args)
215
216	def getTranslate(self, **args):
217	"""get translate"""
218	return self.template.fulltextclient.getTranslate(**args)
219
220	def getLemma(self, **args):
221	"""get lemma"""
222	return self.template.fulltextclient.getLemma(**args)
223
224	def getToc(self, **args):
225	"""get toc"""
226	return self.template.fulltextclient.getToc(**args)
227
228	def getTocPage(self, **args):
229	"""get tocpage"""
230	return self.template.fulltextclient.getTocPage(**args)
231
232
233	security.declareProtected('View','thumbs_rss')
234	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
235	'''
236	view it
237	@param mode: defines how to access the document behind url
238	@param url: url which contains display information
239	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
240
241	'''
242	logging.debug("HHHHHHHHHHHHHH:load the rss")
243	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
244
245	if not hasattr(self, 'template'):
246	# create template folder if it doesn't exist
247	self.manage_addFolder('template')
248
249	if not self.digilibBaseUrl:
250	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
251
252	docinfo = self.getDocinfo(mode=mode,url=url)
253	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
254	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
255	''' ZDES '''
256	pt = getattr(self.template, 'thumbs_main_rss')
257
258	if viewMode=="auto": # automodus gewaehlt
259	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
260	viewMode="text"
261	else:
262	viewMode="images"
263
264	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
265
266	security.declareProtected('View','index_html')
267	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
268	'''
269	view it
270	@param mode: defines how to access the document behind url
271	@param url: url which contains display information
272	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
273	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
274	@param characterNormalization type of text display (reg, norm, none)
275	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
276	'''
277
278	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
279
280	if not hasattr(self, 'template'):
281	# this won't work
282	logging.error("template folder missing!")
283	return "ERROR: template folder missing!"
284
285	if not getattr(self, 'digilibBaseUrl', None):
286	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
287
288	docinfo = self.getDocinfo(mode=mode,url=url)
289
290	if tocMode != "thumbs":
291	# get table of contents
292	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
293
294	if viewMode=="auto": # automodus gewaehlt
295	if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
296	viewMode="text_dict"
297	else:
298	viewMode="images"
299
300	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
301
302	if (docinfo.get('textURLPath',None)):
303	page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo)
304	pageinfo['textPage'] = page
305	pt = getattr(self.template, 'viewer_main')
306	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
307
308	def generateMarks(self,mk):
309	ret=""
310	if mk is None:
311	return ""
312	if not isinstance(mk, list):
313	mk=[mk]
314	for m in mk:
315	ret+="mk=%s"%m
316	return ret
317
318
319	def getBrowser(self):
320	"""getBrowser the version of browser """
321	bt = browserCheck(self)
322	logging.debug("BROWSER VERSION: %s"%(bt))
323	return bt
324
325	def findDigilibUrl(self):
326	"""try to get the digilib URL from zogilib"""
327	url = self.template.zogilib.getDLBaseUrl()
328	return url
329
330	def getDocumentViewerURL(self):
331	"""returns the URL of this instance"""
332	return self.absolute_url()
333
334	def getStyle(self, idx, selected, style=""):
335	"""returns a string with the given style and append 'sel' if path == selected."""
336	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
337	if idx == selected:
338	return style + 'sel'
339	else:
340	return style
341
342	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
343	"""returns URL to documentviewer with parameter param set to val or from dict params"""
344	# copy existing request params
345	urlParams=self.REQUEST.form.copy()
346	# change single param
347	if param is not None:
348	if val is None:
349	if urlParams.has_key(param):
350	del urlParams[param]
351	else:
352	urlParams[param] = str(val)
353
354	# change more params
355	if params is not None:
356	for k in params.keys():
357	v = params[k]
358	if v is None:
359	# val=None removes param
360	if urlParams.has_key(k):
361	del urlParams[k]
362
363	else:
364	urlParams[k] = v
365
366	# FIXME: does this belong here?
367	if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
368	urlParams["mode"] = "imagepath"
369	urlParams["url"] = getParentDir(urlParams["url"])
370
371	# quote values and assemble into query string (not escaping '/')
372	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
373	#ps = urllib.urlencode(urlParams)
374	if baseUrl is None:
375	baseUrl = self.REQUEST['URL1']
376
377	url = "%s?%s"%(baseUrl, ps)
378	return url
379
380
381	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
382	"""link to documentviewer with parameter param set to val"""
383	return self.getLink(param, val, params, baseUrl, '&')
384
385	def getInfo_xml(self,url,mode):
386	"""returns info about the document as XML"""
387
388	if not self.digilibBaseUrl:
389	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
390
391	docinfo = self.getDocinfo(mode=mode,url=url)
392	pt = getattr(self.template, 'info_xml')
393	return pt(docinfo=docinfo)
394
395	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
396	"""returns new option state"""
397	if not self.REQUEST.SESSION.has_key(optionName):
398	# not in session -- initial
399	opt = {'lastState': newState, 'state': initialState}
400	else:
401	opt = self.REQUEST.SESSION.get(optionName)
402	if opt['lastState'] != newState:
403	# state in session has changed -- toggle
404	opt['state'] = not opt['state']
405	opt['lastState'] = newState
406
407	self.REQUEST.SESSION[optionName] = opt
408	return opt['state']
409
410	def isAccessible(self, docinfo):
411	"""returns if access to the resource is granted"""
412	access = docinfo.get('accessType', None)
413	logging.debug("documentViewer (accessOK) access type %s"%access)
414	if access is not None and access == 'free':
415	logging.debug("documentViewer (accessOK) access is free")
416	return True
417	elif access is None or access in self.authgroups:
418	# only local access -- only logged in users
419	user = getSecurityManager().getUser()
420	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
421	if user is not None:
422	#print "user: ", user
423	return (user.getUserName() != "Anonymous User")
424	else:
425	return False
426
427	logging.error("documentViewer (accessOK) unknown access type %s"%access)
428	return False
429
430
431	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
432	"""gibt param von dlInfo aus"""
433	if docinfo is None:
434	docinfo = {}
435
436	for x in range(cut):
437
438	path=getParentDir(path)
439
440	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
441
442	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
443
444	txt = getHttpData(infoUrl)
445	if txt is None:
446	raise IOError("Unable to get dir-info from %s"%(infoUrl))
447
448	dom = Parse(txt)
449	sizes=dom.xpath("//dir/size")
450	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
451
452	if sizes:
453	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
454	else:
455	docinfo['numPages'] = 0
456
457	# TODO: produce and keep list of image names and numbers
458
459	return docinfo
460
461	def getIndexMetaPath(self,url):
462	"""gib nur den Pfad zurueck"""
463	regexp = re.compile(r".(experimental\|permanent)/(.)")
464	regpath = regexp.match(url)
465	if (regpath==None):
466	return ""
467	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
468	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
469
470
471
472	def getIndexMetaUrl(self,url):
473	"""returns utr of index.meta document at url"""
474
475	metaUrl = None
476	if url.startswith("http://"):
477	# real URL
478	metaUrl = url
479	else:
480	# online path
481	server=self.digilibBaseUrl+"/servlet/Texter?fn="
482	metaUrl=server+url.replace("/mpiwg/online","")
483	if not metaUrl.endswith("index.meta"):
484	metaUrl += "/index.meta"
485
486	return metaUrl
487
488	def getDomFromIndexMeta(self, url):
489	"""get dom from index meta"""
490	dom = None
491	metaUrl = self.getIndexMetaUrl(url)
492
493	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
494	txt=getHttpData(metaUrl)
495	if txt is None:
496	raise IOError("Unable to read index meta from %s"%(url))
497
498	dom = Parse(txt)
499	return dom
500
501	def getPresentationInfoXML(self, url):
502	"""returns dom of info.xml document at url"""
503	dom = None
504	metaUrl = None
505	if url.startswith("http://"):
506	# real URL
507	metaUrl = url
508	else:
509	# online path
510	server=self.digilibBaseUrl+"/servlet/Texter?fn="
511	metaUrl=server+url.replace("/mpiwg/online","")
512
513	txt=getHttpData(metaUrl)
514	if txt is None:
515	raise IOError("Unable to read infoXMLfrom %s"%(url))
516
517	dom = Parse(txt)
518	return dom
519
520
521	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
522	"""gets authorization info from the index.meta file at path or given by dom"""
523	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
524
525	access = None
526
527	if docinfo is None:
528	docinfo = {}
529
530	if dom is None:
531	for x in range(cut):
532	path=getParentDir(path)
533	dom = self.getDomFromIndexMeta(path)
534
535	acctype = dom.xpath("//access-conditions/access/@type")
536	if acctype and (len(acctype)>0):
537	access=acctype[0].value
538	if access in ['group', 'institution']:
539	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
540
541	docinfo['accessType'] = access
542	return docinfo
543
544
545	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
546	"""gets bibliographical info from the index.meta file at path or given by dom"""
547	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
548
549	if docinfo is None:
550	docinfo = {}
551
552	if dom is None:
553	for x in range(cut):
554	path=getParentDir(path)
555	dom = self.getDomFromIndexMeta(path)
556
557	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
558
559	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
560	# put in all raw bib fields as dict "bib"
561	bib = dom.xpath("//bib/*")
562	if bib and len(bib)>0:
563	bibinfo = {}
564	for e in bib:
565	bibinfo[e.localName] = getTextFromNode(e)
566	docinfo['bib'] = bibinfo
567
568	# extract some fields (author, title, year) according to their mapping
569	metaData=self.metadata.main.meta.bib
570	bibtype=dom.xpath("//bib/@type")
571	if bibtype and (len(bibtype)>0):
572	bibtype=bibtype[0].value
573	else:
574	bibtype="generic"
575
576	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
577	docinfo['bib_type'] = bibtype
578	bibmap=metaData.generateMappingForType(bibtype)
579	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
580	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
581	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
582	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
583	try:
584	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
585	except: pass
586	try:
587	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
588	except: pass
589	try:
590	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
591	except: pass
592	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
593	try:
594	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
595	except:
596	docinfo['lang']=''
597
598	return docinfo
599
600
601	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
602	"""gets name info from the index.meta file at path or given by dom"""
603	if docinfo is None:
604	docinfo = {}
605
606	if dom is None:
607	for x in range(cut):
608	path=getParentDir(path)
609	dom = self.getDomFromIndexMeta(path)
610
611	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
612	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
613	return docinfo
614
615	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
616	"""parse texttool tag in index meta"""
617	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
618	if docinfo is None:
619	docinfo = {}
620	if docinfo.get('lang', None) is None:
621	docinfo['lang'] = '' # default keine Sprache gesetzt
622	if dom is None:
623	dom = self.getDomFromIndexMeta(url)
624
625	archivePath = None
626	archiveName = None
627
628	archiveNames = dom.xpath("//resource/name")
629	if archiveNames and (len(archiveNames) > 0):
630	archiveName = getTextFromNode(archiveNames[0])
631	else:
632	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
633
634	archivePaths = dom.xpath("//resource/archive-path")
635	if archivePaths and (len(archivePaths) > 0):
636	archivePath = getTextFromNode(archivePaths[0])
637	# clean up archive path
638	if archivePath[0] != '/':
639	archivePath = '/' + archivePath
640	if archiveName and (not archivePath.endswith(archiveName)):
641	archivePath += "/" + archiveName
642	else:
643	# try to get archive-path from url
644	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
645	if (not url.startswith('http')):
646	archivePath = url.replace('index.meta', '')
647
648	if archivePath is None:
649	# we balk without archive-path
650	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
651
652	imageDirs = dom.xpath("//texttool/image")
653	if imageDirs and (len(imageDirs) > 0):
654	imageDir = getTextFromNode(imageDirs[0])
655
656	else:
657	# we balk with no image tag / not necessary anymore because textmode is now standard
658	#raise IOError("No text-tool info in %s"%(url))
659	imageDir = ""
660	#xquery="//pb"
661	docinfo['imagePath'] = "" # keine Bilder
662	docinfo['imageURL'] = ""
663
664	if imageDir and archivePath:
665	#print "image: ", imageDir, " archivepath: ", archivePath
666	imageDir = os.path.join(archivePath, imageDir)
667	imageDir = imageDir.replace("/mpiwg/online", '')
668	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
669	docinfo['imagePath'] = imageDir
670
671	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
672
673	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
674	if viewerUrls and (len(viewerUrls) > 0):
675	viewerUrl = getTextFromNode(viewerUrls[0])
676	docinfo['viewerURL'] = viewerUrl
677
678	# old style text URL
679	textUrls = dom.xpath("//texttool/text")
680	if textUrls and (len(textUrls) > 0):
681	textUrl = getTextFromNode(textUrls[0])
682	if urlparse.urlparse(textUrl)[0] == "": #keine url
683	textUrl = os.path.join(archivePath, textUrl)
684	# fix URLs starting with /mpiwg/online
685	if textUrl.startswith("/mpiwg/online"):
686	textUrl = textUrl.replace("/mpiwg/online", '', 1)
687
688	docinfo['textURL'] = textUrl
689
690	# new style text-url-path
691	textUrls = dom.xpath("//texttool/text-url-path")
692	if textUrls and (len(textUrls) > 0):
693	textUrl = getTextFromNode(textUrls[0])
694	docinfo['textURLPath'] = textUrl
695	#if not docinfo['imagePath']:
696	# text-only, no page images
697	#docinfo = self.getNumTextPages(docinfo)
698
699
700	presentationUrls = dom.xpath("//texttool/presentation")
701	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
702	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
703
704
705	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
706	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
707	# durch den relativen Pfad auf die presentation infos
708	presentationPath = getTextFromNode(presentationUrls[0])
709	if url.endswith("index.meta"):
710	presentationUrl = url.replace('index.meta', presentationPath)
711	else:
712	presentationUrl = url + "/" + presentationPath
713
714	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
715
716	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
717
718	return docinfo
719
720
721	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
722	"""gets the bibliographical information from the preseantion entry in texttools
723	"""
724	dom=self.getPresentationInfoXML(url)
725	try:
726	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
727	except:
728	pass
729	try:
730	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
731	except:
732	pass
733	try:
734	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
735	except:
736	pass
737	return docinfo
738
739	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
740	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
741	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
742	if docinfo is None:
743	docinfo = {}
744	path=path.replace("/mpiwg/online","")
745	docinfo['imagePath'] = path
746	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
747
748	pathorig=path
749	for x in range(cut):
750	path=getParentDir(path)
751	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
752	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
753	docinfo['imageURL'] = imageUrl
754
755	#path ist the path to the images it assumes that the index.meta file is one level higher.
756	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
757	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
758	return docinfo
759
760
761	def getDocinfo(self, mode, url):
762	"""returns docinfo depending on mode"""
763	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
764	# look for cached docinfo in session
765	if self.REQUEST.SESSION.has_key('docinfo'):
766	docinfo = self.REQUEST.SESSION['docinfo']
767	# check if its still current
768	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
769	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
770	return docinfo
771	# new docinfo
772	docinfo = {'mode': mode, 'url': url}
773	if mode=="texttool": #index.meta with texttool information
774	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
775	elif mode=="imagepath":
776	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
777	elif mode=="filepath":
778	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
779	else:
780	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
781	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
782
783	# FIXME: fake texturlpath
784	if not docinfo.has_key('textURLPath'):
785	docinfo['textURLPath'] = None
786
787	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
788	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
789	self.REQUEST.SESSION['docinfo'] = docinfo
790	return docinfo
791
792	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
793	"""returns pageinfo with the given parameters"""
794	pageinfo = {}
795	current = getInt(current)
796
797	pageinfo['current'] = current
798	rows = int(rows or self.thumbrows)
799	pageinfo['rows'] = rows
800	cols = int(cols or self.thumbcols)
801	pageinfo['cols'] = cols
802	grpsize = cols * rows
803	pageinfo['groupsize'] = grpsize
804	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
805	# int(current / grpsize) * grpsize +1))
806	pageinfo['start'] = start
807	pageinfo['end'] = start + grpsize
808	if (docinfo is not None) and ('numPages' in docinfo):
809	np = int(docinfo['numPages'])
810	pageinfo['end'] = min(pageinfo['end'], np)
811	pageinfo['numgroups'] = int(np / grpsize)
812	if np % grpsize > 0:
813	pageinfo['numgroups'] += 1
814	pageinfo['viewMode'] = viewMode
815	pageinfo['tocMode'] = tocMode
816	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
817	pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','')
818	pageinfo['query'] = self.REQUEST.get('query','')
819	pageinfo['queryType'] = self.REQUEST.get('queryType','')
820	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
821	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
822	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
823	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
824	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
825	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
826
827	toc = int (pageinfo['tocPN'])
828	pageinfo['textPages'] =int (toc)
829
830
831
832	if 'tocSize_%s'%tocMode in docinfo:
833	tocSize = int(docinfo['tocSize_%s'%tocMode])
834	tocPageSize = int(pageinfo['tocPageSize'])
835	# cached toc
836	if tocSize%tocPageSize>0:
837	tocPages=tocSize/tocPageSize+1
838	else:
839	tocPages=tocSize/tocPageSize
840	pageinfo['tocPN'] = min (tocPages,toc)
841	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
842	pageinfo['sn'] =self.REQUEST.get('sn','')
843	return pageinfo
844
845	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
846	"""init document viewer"""
847	self.title=title
848	self.digilibBaseUrl = digilibBaseUrl
849	self.thumbrows = thumbrows
850	self.thumbcols = thumbcols
851	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
852	if RESPONSE is not None:
853	RESPONSE.redirect('manage_main')
854
855	def manage_AddDocumentViewerForm(self):
856	"""add the viewer form"""
857	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
858	return pt()
859
860	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
861	"""add the viewer"""
862	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
863	self._setObject(id,newObj)
864
865	if RESPONSE is not None:
866	RESPONSE.redirect('manage_main')
867
868	## DocumentViewerTemplate class
869	class DocumentViewerTemplate(ZopePageTemplate):
870	"""Template for document viewer"""
871	meta_type="DocumentViewer Template"
872
873
874	def manage_addDocumentViewerTemplateForm(self):
875	"""Form for adding"""
876	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
877	return pt()
878
879	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
880	REQUEST=None, submit=None):
881	"Add a Page Template with optional file content."
882
883	self._setObject(id, DocumentViewerTemplate(id))
884	ob = getattr(self, id)
885	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
886	logging.info("txt %s:"%txt)
887	ob.pt_edit(txt,"text/html")
888	if title:
889	ob.pt_setTitle(title)
890	try:
891	u = self.DestinationURL()
892	except AttributeError:
893	u = REQUEST['URL1']
894
895	u = "%s/%s" % (u, urllib.quote(id))
896	REQUEST.RESPONSE.redirect(u+'/manage_main')
897	return ''
898
899
900

Note: See TracBrowser for help on using the repository browser.

Download in other formats: