Context Navigation

source: documentViewer/documentViewer.py @ 412:759ed7577eef

Last change on this file since 412:759ed7577eef was 412:759ed7577eef, checked in by abukhman, 13 years ago
zwei neue Methoden
File size: 35.7 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21	import string
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding='utf-8'):
47	"""returns a string containing node as XML"""
48	buf = cStringIO.StringIO()
49	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
50	s = buf.getvalue()
51	buf.close()
52	return s
53
54	def browserCheck(self):
55	"""check the browsers request to find out the browser type"""
56	bt = {}
57	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
58	bt['ua'] = ua
59	bt['isIE'] = False
60	bt['isN4'] = False
61	if string.find(ua, 'MSIE') > -1:
62	bt['isIE'] = True
63	else:
64	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
65
66	try:
67	nav = ua[string.find(ua, '('):]
68	ie = string.split(nav, "; ")[1]
69	if string.find(ie, "MSIE") > -1:
70	bt['versIE'] = string.split(ie, " ")[1]
71	except: pass
72
73	bt['isMac'] = string.find(ua, 'Macintosh') > -1
74	bt['isWin'] = string.find(ua, 'Windows') > -1
75	bt['isIEWin'] = bt['isIE'] and bt['isWin']
76	bt['isIEMac'] = bt['isIE'] and bt['isMac']
77	bt['staticHTML'] = False
78
79	return bt
80
81
82	def getParentDir(path):
83	"""returns pathname shortened by one"""
84	return '/'.join(path.split('/')[0:-1])
85
86
87	def getHttpData(url, data=None, num_tries=3, timeout=10):
88	"""returns result from url+data HTTP request"""
89	# we do GET (by appending data to url)
90	if isinstance(data, str) or isinstance(data, unicode):
91	# if data is string then append
92	url = "%s?%s"%(url,data)
93	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
94	# urlencode
95	url = "%s?%s"%(url,urllib.urlencode(data))
96
97	response = None
98	errmsg = None
99	for cnt in range(num_tries):
100	try:
101	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
102	if sys.version_info < (2, 6):
103	# set timeout on socket -- ugly :-(
104	import socket
105	socket.setdefaulttimeout(float(timeout))
106	response = urllib2.urlopen(url)
107	else:
108	response = urllib2.urlopen(url,timeout=float(timeout))
109	# check result?
110	break
111	except urllib2.HTTPError, e:
112	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
113	errmsg = str(e)
114	# stop trying
115	break
116	except urllib2.URLError, e:
117	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
118	errmsg = str(e)
119	# stop trying
120	#break
121
122	if response is not None:
123	data = response.read()
124	response.close()
125	return data
126
127	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
128	#return None
129
130
131
132	##
133	## documentViewer class
134	##
135	class documentViewer(Folder):
136	"""document viewer"""
137	meta_type="Document viewer"
138
139	security=ClassSecurityInfo()
140	manage_options=Folder.manage_options+(
141	{'label':'main config','action':'changeDocumentViewerForm'},
142	)
143
144	# templates and forms
145	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
146	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
147	toc_text = PageTemplateFile('zpt/toc_text', globals())
148	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
149	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
150	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
151	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
152	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
153	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
154	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
155	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
156	head_main = PageTemplateFile('zpt/head_main', globals())
157	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
158	info_xml = PageTemplateFile('zpt/info_xml', globals())
159
160
161	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
162	security.declareProtected('View management screens','changeDocumentViewerForm')
163	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
164
165
166	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
167	"""init document viewer"""
168	self.id=id
169	self.title=title
170	self.thumbcols = thumbcols
171	self.thumbrows = thumbrows
172	# authgroups is list of authorized groups (delimited by ,)
173	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
174	# create template folder so we can always use template.something
175
176	templateFolder = Folder('template')
177	#self['template'] = templateFolder # Zope-2.12 style
178	self._setObject('template',templateFolder) # old style
179	try:
180	import MpdlXmlTextServer
181	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
182	#templateFolder['fulltextclient'] = xmlRpcClient
183	templateFolder._setObject('fulltextclient',textServer)
184	except Exception, e:
185	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
186	try:
187	from Products.zogiLib.zogiLib import zogiLib
188	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
189	#templateFolder['zogilib'] = zogilib
190	templateFolder._setObject('zogilib',zogilib)
191	except Exception, e:
192	logging.error("Unable to create zogiLib for zogilib: "+str(e))
193
194
195	# proxy text server methods to fulltextclient
196	def getTextPage(self, **args):
197	"""get page"""
198	return self.template.fulltextclient.getTextPage(**args)
199
200	def getQuery(self, **args):
201	"""get query"""
202	return self.template.fulltextclient.getQuery(**args)
203
204	def getQueryResultHits(self, **args):
205	"""get query"""
206	return self.template.fulltextclient.getQueryResultHits(**args)
207
208	def getQueryResultHitsText(self, **args):
209	"""get query"""
210	return self.template.fulltextclient.getQueryResultHitsText(**args)
211
212	def getQueryResultHitsFigures(self, **args):
213	"""get query"""
214	return self.template.fulltextclient.getQueryResultHitsFigures(**args)
215
216	def getSearch(self, **args):
217	"""get search"""
218	return self.template.fulltextclient.getSearch(**args)
219
220	def getGisPlaces(self, **args):
221	"""get gis places"""
222	return self.template.fulltextclient.getGisPlaces(**args)
223
224	def getAllGisPlaces(self, **args):
225	"""get all gis places """
226	return self.template.fulltextclient.getAllGisPlaces(**args)
227
228	def getOrigPages(self, **args):
229	"""get original page number """
230	return self.template.fulltextclient.getOrigPages(**args)
231
232	def getAllPlaces(self, **args):
233	"""get original page number """
234	return self.template.fulltextclient.getAllPlaces(**args)
235
236	def getTocEntries(self, **args):
237	"""get original page number """
238	return self.template.fulltextclient.getTocEntries(**args)
239
240	def getFigureEntries(self, **args):
241	"""get original page number """
242	return self.template.fulltextclient.getFigureEntries(**args)
243
244	def getNumPages(self, docinfo):
245	"""get numpages"""
246	return self.template.fulltextclient.getNumPages(docinfo)
247
248	def getNumTextPages(self, docinfo):
249	"""get numpages text"""
250	return self.template.fulltextclient.getNumTextPages(docinfo)
251
252	def getTranslate(self, **args):
253	"""get translate"""
254	return self.template.fulltextclient.getTranslate(**args)
255
256	def getLemma(self, **args):
257	"""get lemma"""
258	return self.template.fulltextclient.getLemma(**args)
259
260	def getToc(self, **args):
261	"""get toc"""
262	return self.template.fulltextclient.getToc(**args)
263
264	def getTocPage(self, **args):
265	"""get tocpage"""
266	return self.template.fulltextclient.getTocPage(**args)
267
268
269	security.declareProtected('View','thumbs_rss')
270	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
271	'''
272	view it
273	@param mode: defines how to access the document behind url
274	@param url: url which contains display information
275	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
276
277	'''
278	logging.debug("HHHHHHHHHHHHHH:load the rss")
279	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
280
281	if not hasattr(self, 'template'):
282	# create template folder if it doesn't exist
283	self.manage_addFolder('template')
284
285	if not self.digilibBaseUrl:
286	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
287
288	docinfo = self.getDocinfo(mode=mode,url=url)
289	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
290	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
291	''' ZDES '''
292	pt = getattr(self.template, 'thumbs_main_rss')
293
294	if viewMode=="auto": # automodus gewaehlt
295	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
296	viewMode="text"
297	else:
298	viewMode="images"
299
300	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
301
302	security.declareProtected('View','index_html')
303	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
304	'''
305	view it
306	@param mode: defines how to access the document behind url
307	@param url: url which contains display information
308	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
309	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
310	@param characterNormalization type of text display (reg, norm, none)
311	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
312	'''
313
314	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
315
316	if not hasattr(self, 'template'):
317	# this won't work
318	logging.error("template folder missing!")
319	return "ERROR: template folder missing!"
320
321	if not getattr(self, 'digilibBaseUrl', None):
322	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
323
324	docinfo = self.getDocinfo(mode=mode,url=url)
325
326	if tocMode != "thumbs":
327	# get table of contents
328	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
329
330	if viewMode=="auto": # automodus gewaehlt
331	if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
332	viewMode="text_dict"
333	else:
334	viewMode="images"
335
336	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
337
338	pt = getattr(self.template, 'viewer_main')
339	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
340
341	def generateMarks(self,mk):
342	ret=""
343	if mk is None:
344	return ""
345	if not isinstance(mk, list):
346	mk=[mk]
347	for m in mk:
348	ret+="mk=%s"%m
349	return ret
350
351
352	def getBrowser(self):
353	"""getBrowser the version of browser """
354	names=""
355	names = browserCheck(self)
356	return names
357
358	def findDigilibUrl(self):
359	"""try to get the digilib URL from zogilib"""
360	url = self.template.zogilib.getDLBaseUrl()
361	return url
362
363	def getDocumentViewerURL(self):
364	"""returns the URL of this instance"""
365	return self.absolute_url()
366
367	def getStyle(self, idx, selected, style=""):
368	"""returns a string with the given style and append 'sel' if path == selected."""
369	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
370	if idx == selected:
371	return style + 'sel'
372	else:
373	return style
374
375	def getLink(self,param=None,val=None):
376	"""link to documentviewer with parameter param set to val"""
377	params=self.REQUEST.form.copy()
378	if param is not None:
379	if val is None:
380	if params.has_key(param):
381	del params[param]
382	else:
383	params[param] = str(val)
384
385	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
386	params["mode"] = "imagepath"
387	params["url"] = getParentDir(params["url"])
388
389	# quote values and assemble into query string
390	#ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
391	ps = urllib.urlencode(params)
392	url=self.REQUEST['URL1']+"?"+ps
393	return url
394
395	def getLinkAmp(self,param=None,val=None):
396	"""link to documentviewer with parameter param set to val"""
397	params=self.REQUEST.form.copy()
398	if param is not None:
399	if val is None:
400	if params.has_key(param):
401	del params[param]
402	else:
403	params[param] = str(val)
404
405	# quote values and assemble into query string
406	logging.debug("XYXXXXX: %s"%repr(params.items()))
407	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
408	url=self.REQUEST['URL1']+"?"+ps
409	return url
410
411	def getInfo_xml(self,url,mode):
412	"""returns info about the document as XML"""
413
414	if not self.digilibBaseUrl:
415	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
416
417	docinfo = self.getDocinfo(mode=mode,url=url)
418	pt = getattr(self.template, 'info_xml')
419	return pt(docinfo=docinfo)
420
421	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
422	"""returns new option state"""
423	if not self.REQUEST.SESSION.has_key(optionName):
424	# not in session -- initial
425	opt = {'lastState': newState, 'state': initialState}
426	else:
427	opt = self.REQUEST.SESSION.get(optionName)
428	if opt['lastState'] != newState:
429	# state in session has changed -- toggle
430	opt['state'] = not opt['state']
431	opt['lastState'] = newState
432
433	self.REQUEST.SESSION[optionName] = opt
434	return opt['state']
435
436	def isAccessible(self, docinfo):
437	"""returns if access to the resource is granted"""
438	access = docinfo.get('accessType', None)
439	logging.debug("documentViewer (accessOK) access type %s"%access)
440	if access is not None and access == 'free':
441	logging.debug("documentViewer (accessOK) access is free")
442	return True
443	elif access is None or access in self.authgroups:
444	# only local access -- only logged in users
445	user = getSecurityManager().getUser()
446	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
447	if user is not None:
448	#print "user: ", user
449	return (user.getUserName() != "Anonymous User")
450	else:
451	return False
452
453	logging.error("documentViewer (accessOK) unknown access type %s"%access)
454	return False
455
456
457	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
458	"""gibt param von dlInfo aus"""
459	if docinfo is None:
460	docinfo = {}
461
462	for x in range(cut):
463
464	path=getParentDir(path)
465
466	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
467
468	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
469
470	txt = getHttpData(infoUrl)
471	if txt is None:
472	raise IOError("Unable to get dir-info from %s"%(infoUrl))
473
474	dom = Parse(txt)
475	sizes=dom.xpath("//dir/size")
476	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
477
478	if sizes:
479	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
480	else:
481	docinfo['numPages'] = 0
482
483	# TODO: produce and keep list of image names and numbers
484
485	return docinfo
486
487	def getIndexMetaPath(self,url):
488	"""gib nur den Pfad zurueck"""
489	regexp = re.compile(r".(experimental\|permanent)/(.)")
490	regpath = regexp.match(url)
491	if (regpath==None):
492	return ""
493	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
494	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
495
496
497
498	def getIndexMetaUrl(self,url):
499	"""returns utr of index.meta document at url"""
500
501	metaUrl = None
502	if url.startswith("http://"):
503	# real URL
504	metaUrl = url
505	else:
506	# online path
507	server=self.digilibBaseUrl+"/servlet/Texter?fn="
508	metaUrl=server+url.replace("/mpiwg/online","")
509	if not metaUrl.endswith("index.meta"):
510	metaUrl += "/index.meta"
511
512	return metaUrl
513
514	def getDomFromIndexMeta(self, url):
515	"""get dom from index meta"""
516	dom = None
517	metaUrl = self.getIndexMetaUrl(url)
518
519	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
520	txt=getHttpData(metaUrl)
521	if txt is None:
522	raise IOError("Unable to read index meta from %s"%(url))
523
524	dom = Parse(txt)
525	return dom
526
527	def getPresentationInfoXML(self, url):
528	"""returns dom of info.xml document at url"""
529	dom = None
530	metaUrl = None
531	if url.startswith("http://"):
532	# real URL
533	metaUrl = url
534	else:
535	# online path
536	server=self.digilibBaseUrl+"/servlet/Texter?fn="
537	metaUrl=server+url.replace("/mpiwg/online","")
538
539	txt=getHttpData(metaUrl)
540	if txt is None:
541	raise IOError("Unable to read infoXMLfrom %s"%(url))
542
543	dom = Parse(txt)
544	return dom
545
546
547	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
548	"""gets authorization info from the index.meta file at path or given by dom"""
549	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
550
551	access = None
552
553	if docinfo is None:
554	docinfo = {}
555
556	if dom is None:
557	for x in range(cut):
558	path=getParentDir(path)
559	dom = self.getDomFromIndexMeta(path)
560
561	acctype = dom.xpath("//access-conditions/access/@type")
562	if acctype and (len(acctype)>0):
563	access=acctype[0].value
564	if access in ['group', 'institution']:
565	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
566
567	docinfo['accessType'] = access
568	return docinfo
569
570
571	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
572	"""gets bibliographical info from the index.meta file at path or given by dom"""
573	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
574
575	if docinfo is None:
576	docinfo = {}
577
578	if dom is None:
579	for x in range(cut):
580	path=getParentDir(path)
581	dom = self.getDomFromIndexMeta(path)
582
583	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
584
585	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
586	# put in all raw bib fields as dict "bib"
587	bib = dom.xpath("//bib/*")
588	if bib and len(bib)>0:
589	bibinfo = {}
590	for e in bib:
591	bibinfo[e.localName] = getTextFromNode(e)
592	docinfo['bib'] = bibinfo
593
594	# extract some fields (author, title, year) according to their mapping
595	metaData=self.metadata.main.meta.bib
596	bibtype=dom.xpath("//bib/@type")
597	if bibtype and (len(bibtype)>0):
598	bibtype=bibtype[0].value
599	else:
600	bibtype="generic"
601
602	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
603	docinfo['bib_type'] = bibtype
604	bibmap=metaData.generateMappingForType(bibtype)
605	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
606	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
607	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
608	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
609	try:
610	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
611	except: pass
612	try:
613	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
614	except: pass
615	try:
616	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
617	except: pass
618	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
619	try:
620	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
621	except:
622	docinfo['lang']=''
623
624	return docinfo
625
626
627	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
628	"""gets name info from the index.meta file at path or given by dom"""
629	if docinfo is None:
630	docinfo = {}
631
632	if dom is None:
633	for x in range(cut):
634	path=getParentDir(path)
635	dom = self.getDomFromIndexMeta(path)
636
637	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
638	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
639	return docinfo
640
641	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
642	"""parse texttool tag in index meta"""
643	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
644	if docinfo is None:
645	docinfo = {}
646	if docinfo.get('lang', None) is None:
647	docinfo['lang'] = '' # default keine Sprache gesetzt
648	if dom is None:
649	dom = self.getDomFromIndexMeta(url)
650
651	archivePath = None
652	archiveName = None
653
654	archiveNames = dom.xpath("//resource/name")
655	if archiveNames and (len(archiveNames) > 0):
656	archiveName = getTextFromNode(archiveNames[0])
657	else:
658	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
659
660	archivePaths = dom.xpath("//resource/archive-path")
661	if archivePaths and (len(archivePaths) > 0):
662	archivePath = getTextFromNode(archivePaths[0])
663	# clean up archive path
664	if archivePath[0] != '/':
665	archivePath = '/' + archivePath
666	if archiveName and (not archivePath.endswith(archiveName)):
667	archivePath += "/" + archiveName
668	else:
669	# try to get archive-path from url
670	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
671	if (not url.startswith('http')):
672	archivePath = url.replace('index.meta', '')
673
674	if archivePath is None:
675	# we balk without archive-path
676	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
677
678	imageDirs = dom.xpath("//texttool/image")
679	if imageDirs and (len(imageDirs) > 0):
680	imageDir = getTextFromNode(imageDirs[0])
681
682	else:
683	# we balk with no image tag / not necessary anymore because textmode is now standard
684	#raise IOError("No text-tool info in %s"%(url))
685	imageDir = ""
686	#xquery="//pb"
687	docinfo['imagePath'] = "" # keine Bilder
688	docinfo['imageURL'] = ""
689
690	if imageDir and archivePath:
691	#print "image: ", imageDir, " archivepath: ", archivePath
692	imageDir = os.path.join(archivePath, imageDir)
693	imageDir = imageDir.replace("/mpiwg/online", '')
694	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
695	docinfo['imagePath'] = imageDir
696
697	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
698
699	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
700	if viewerUrls and (len(viewerUrls) > 0):
701	viewerUrl = getTextFromNode(viewerUrls[0])
702	docinfo['viewerURL'] = viewerUrl
703
704	# old style text URL
705	textUrls = dom.xpath("//texttool/text")
706	if textUrls and (len(textUrls) > 0):
707	textUrl = getTextFromNode(textUrls[0])
708	if urlparse.urlparse(textUrl)[0] == "": #keine url
709	textUrl = os.path.join(archivePath, textUrl)
710	# fix URLs starting with /mpiwg/online
711	if textUrl.startswith("/mpiwg/online"):
712	textUrl = textUrl.replace("/mpiwg/online", '', 1)
713
714	docinfo['textURL'] = textUrl
715
716	# new style text-url-path
717	textUrls = dom.xpath("//texttool/text-url-path")
718	if textUrls and (len(textUrls) > 0):
719	textUrl = getTextFromNode(textUrls[0])
720	docinfo['textURLPath'] = textUrl
721	if not docinfo['imagePath']:
722	# text-only, no page images
723	docinfo = self.getNumTextPages(docinfo)
724
725	presentationUrls = dom.xpath("//texttool/presentation")
726	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
727	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
728
729
730	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
731	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
732	# durch den relativen Pfad auf die presentation infos
733	presentationPath = getTextFromNode(presentationUrls[0])
734	if url.endswith("index.meta"):
735	presentationUrl = url.replace('index.meta', presentationPath)
736	else:
737	presentationUrl = url + "/" + presentationPath
738
739	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
740
741	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
742
743	return docinfo
744
745
746	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
747	"""gets the bibliographical information from the preseantion entry in texttools
748	"""
749	dom=self.getPresentationInfoXML(url)
750	try:
751	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
752	except:
753	pass
754	try:
755	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
756	except:
757	pass
758	try:
759	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
760	except:
761	pass
762	return docinfo
763
764	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
765	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
766	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
767	if docinfo is None:
768	docinfo = {}
769	path=path.replace("/mpiwg/online","")
770	docinfo['imagePath'] = path
771	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
772
773	pathorig=path
774	for x in range(cut):
775	path=getParentDir(path)
776	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
777	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
778	docinfo['imageURL'] = imageUrl
779
780	#path ist the path to the images it assumes that the index.meta file is one level higher.
781	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
782	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
783	return docinfo
784
785
786	def getDocinfo(self, mode, url):
787	"""returns docinfo depending on mode"""
788	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
789	# look for cached docinfo in session
790	if self.REQUEST.SESSION.has_key('docinfo'):
791	docinfo = self.REQUEST.SESSION['docinfo']
792	# check if its still current
793	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
794	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
795	return docinfo
796	# new docinfo
797	docinfo = {'mode': mode, 'url': url}
798	if mode=="texttool": #index.meta with texttool information
799	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
800	elif mode=="imagepath":
801	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
802	elif mode=="filepath":
803	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
804	else:
805	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
806	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
807
808	# FIXME: fake texturlpath
809	if not docinfo.has_key('textURLPath'):
810	docinfo['textURLPath'] = None
811
812	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
813	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
814	self.REQUEST.SESSION['docinfo'] = docinfo
815	return docinfo
816
817	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
818	"""returns pageinfo with the given parameters"""
819	pageinfo = {}
820	current = getInt(current)
821
822	pageinfo['current'] = current
823	rows = int(rows or self.thumbrows)
824	pageinfo['rows'] = rows
825	cols = int(cols or self.thumbcols)
826	pageinfo['cols'] = cols
827	grpsize = cols * rows
828	pageinfo['groupsize'] = grpsize
829	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
830	# int(current / grpsize) * grpsize +1))
831	pageinfo['start'] = start
832	pageinfo['end'] = start + grpsize
833	if (docinfo is not None) and ('numPages' in docinfo):
834	np = int(docinfo['numPages'])
835	pageinfo['end'] = min(pageinfo['end'], np)
836	pageinfo['numgroups'] = int(np / grpsize)
837	if np % grpsize > 0:
838	pageinfo['numgroups'] += 1
839	pageinfo['viewMode'] = viewMode
840	pageinfo['tocMode'] = tocMode
841	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
842	pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','')
843	pageinfo['query'] = self.REQUEST.get('query','')
844	pageinfo['queryType'] = self.REQUEST.get('queryType','')
845	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
846	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
847	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
848	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
849	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
850	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
851	toc = int (pageinfo['tocPN'])
852	pageinfo['textPages'] =int (toc)
853
854
855
856	if 'tocSize_%s'%tocMode in docinfo:
857	tocSize = int(docinfo['tocSize_%s'%tocMode])
858	tocPageSize = int(pageinfo['tocPageSize'])
859	# cached toc
860	if tocSize%tocPageSize>0:
861	tocPages=tocSize/tocPageSize+1
862	else:
863	tocPages=tocSize/tocPageSize
864	pageinfo['tocPN'] = min (tocPages,toc)
865	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
866	pageinfo['sn'] =self.REQUEST.get('sn','')
867	return pageinfo
868
869	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
870	"""init document viewer"""
871	self.title=title
872	self.digilibBaseUrl = digilibBaseUrl
873	self.thumbrows = thumbrows
874	self.thumbcols = thumbcols
875	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
876	if RESPONSE is not None:
877	RESPONSE.redirect('manage_main')
878
879	def manage_AddDocumentViewerForm(self):
880	"""add the viewer form"""
881	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
882	return pt()
883
884	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
885	"""add the viewer"""
886	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
887	self._setObject(id,newObj)
888
889	if RESPONSE is not None:
890	RESPONSE.redirect('manage_main')
891
892	## DocumentViewerTemplate class
893	class DocumentViewerTemplate(ZopePageTemplate):
894	"""Template for document viewer"""
895	meta_type="DocumentViewer Template"
896
897
898	def manage_addDocumentViewerTemplateForm(self):
899	"""Form for adding"""
900	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
901	return pt()
902
903	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
904	REQUEST=None, submit=None):
905	"Add a Page Template with optional file content."
906
907	self._setObject(id, DocumentViewerTemplate(id))
908	ob = getattr(self, id)
909	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
910	logging.info("txt %s:"%txt)
911	ob.pt_edit(txt,"text/html")
912	if title:
913	ob.pt_setTitle(title)
914	try:
915	u = self.DestinationURL()
916	except AttributeError:
917	u = REQUEST['URL1']
918
919	u = "%s/%s" % (u, urllib.quote(id))
920	REQUEST.RESPONSE.redirect(u+'/manage_main')
921	return ''
922
923
924

Note: See TracBrowser for help on using the repository browser.

Download in other formats: