Context Navigation

source: documentViewer/documentViewer.py @ 410:13dd83f46f6f

Last change on this file since 410:13dd83f46f6f was 410:13dd83f46f6f, checked in by abukhman, 13 years ago
* empty log message *
File size: 35.5 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21	import string
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding='utf-8'):
47	"""returns a string containing node as XML"""
48	buf = cStringIO.StringIO()
49	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
50	s = buf.getvalue()
51	buf.close()
52	return s
53
54	def browserCheck(self):
55	"""check the browsers request to find out the browser type"""
56	bt = {}
57	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
58	bt['ua'] = ua
59	bt['isIE'] = False
60	bt['isN4'] = False
61	if string.find(ua, 'MSIE') > -1:
62	bt['isIE'] = True
63	else:
64	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
65
66	try:
67	nav = ua[string.find(ua, '('):]
68	ie = string.split(nav, "; ")[1]
69	if string.find(ie, "MSIE") > -1:
70	bt['versIE'] = string.split(ie, " ")[1]
71	except: pass
72
73	bt['isMac'] = string.find(ua, 'Macintosh') > -1
74	bt['isWin'] = string.find(ua, 'Windows') > -1
75	bt['isIEWin'] = bt['isIE'] and bt['isWin']
76	bt['isIEMac'] = bt['isIE'] and bt['isMac']
77	bt['staticHTML'] = False
78
79	return bt
80
81
82	def getParentDir(path):
83	"""returns pathname shortened by one"""
84	return '/'.join(path.split('/')[0:-1])
85
86
87	def getHttpData(url, data=None, num_tries=3, timeout=10):
88	"""returns result from url+data HTTP request"""
89	# we do GET (by appending data to url)
90	if isinstance(data, str) or isinstance(data, unicode):
91	# if data is string then append
92	url = "%s?%s"%(url,data)
93	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
94	# urlencode
95	url = "%s?%s"%(url,urllib.urlencode(data))
96
97	response = None
98	errmsg = None
99	for cnt in range(num_tries):
100	try:
101	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
102	if sys.version_info < (2, 6):
103	# set timeout on socket -- ugly :-(
104	import socket
105	socket.setdefaulttimeout(float(timeout))
106	response = urllib2.urlopen(url)
107	else:
108	response = urllib2.urlopen(url,timeout=float(timeout))
109	# check result?
110	break
111	except urllib2.HTTPError, e:
112	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
113	errmsg = str(e)
114	# stop trying
115	break
116	except urllib2.URLError, e:
117	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
118	errmsg = str(e)
119	# stop trying
120	#break
121
122	if response is not None:
123	data = response.read()
124	response.close()
125	return data
126
127	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
128	#return None
129
130
131
132	##
133	## documentViewer class
134	##
135	class documentViewer(Folder):
136	"""document viewer"""
137	meta_type="Document viewer"
138
139	security=ClassSecurityInfo()
140	manage_options=Folder.manage_options+(
141	{'label':'main config','action':'changeDocumentViewerForm'},
142	)
143
144	# templates and forms
145	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
146	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
147	toc_text = PageTemplateFile('zpt/toc_text', globals())
148	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
149	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
150	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
151	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
152	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
153	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
154	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
155	head_main = PageTemplateFile('zpt/head_main', globals())
156	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
157	info_xml = PageTemplateFile('zpt/info_xml', globals())
158
159
160	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
161	security.declareProtected('View management screens','changeDocumentViewerForm')
162	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
163
164
165	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
166	"""init document viewer"""
167	self.id=id
168	self.title=title
169	self.thumbcols = thumbcols
170	self.thumbrows = thumbrows
171	# authgroups is list of authorized groups (delimited by ,)
172	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
173	# create template folder so we can always use template.something
174
175	templateFolder = Folder('template')
176	#self['template'] = templateFolder # Zope-2.12 style
177	self._setObject('template',templateFolder) # old style
178	try:
179	import MpdlXmlTextServer
180	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
181	#templateFolder['fulltextclient'] = xmlRpcClient
182	templateFolder._setObject('fulltextclient',textServer)
183	except Exception, e:
184	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
185	try:
186	from Products.zogiLib.zogiLib import zogiLib
187	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
188	#templateFolder['zogilib'] = zogilib
189	templateFolder._setObject('zogilib',zogilib)
190	except Exception, e:
191	logging.error("Unable to create zogiLib for zogilib: "+str(e))
192
193
194	# proxy text server methods to fulltextclient
195	def getTextPage(self, **args):
196	"""get page"""
197	return self.template.fulltextclient.getTextPage(**args)
198
199	def getQuery(self, **args):
200	"""get query"""
201	return self.template.fulltextclient.getQuery(**args)
202
203	def getQueryResultHits(self, **args):
204	"""get query"""
205	return self.template.fulltextclient.getQueryResultHits(**args)
206
207	def getQueryResultHitsText(self, **args):
208	"""get query"""
209	return self.template.fulltextclient.getQueryResultHitsText(**args)
210
211	def getQueryResultHitsFigures(self, **args):
212	"""get query"""
213	return self.template.fulltextclient.getQueryResultHitsFigures(**args)
214
215	def getPDF(self, **args):
216	"""get query"""
217	return self.template.fulltextclient.getPDF(**args)
218
219	def getSearch(self, **args):
220	"""get search"""
221	return self.template.fulltextclient.getSearch(**args)
222
223	def getGisPlaces(self, **args):
224	"""get gis places"""
225	return self.template.fulltextclient.getGisPlaces(**args)
226
227	def getAllGisPlaces(self, **args):
228	"""get all gis places """
229	return self.template.fulltextclient.getAllGisPlaces(**args)
230
231	def getOrigPages(self, **args):
232	"""get original page number """
233	return self.template.fulltextclient.getOrigPages(**args)
234
235	def getAllPlaces(self, **args):
236	"""get original page number """
237	return self.template.fulltextclient.getAllPlaces(**args)
238
239	def getNumPages(self, docinfo):
240	"""get numpages"""
241	return self.template.fulltextclient.getNumPages(docinfo)
242
243	def getNumTextPages(self, docinfo):
244	"""get numpages text"""
245	return self.template.fulltextclient.getNumTextPages(docinfo)
246
247	def getTranslate(self, **args):
248	"""get translate"""
249	return self.template.fulltextclient.getTranslate(**args)
250
251	def getLemma(self, **args):
252	"""get lemma"""
253	return self.template.fulltextclient.getLemma(**args)
254
255	def getToc(self, **args):
256	"""get toc"""
257	return self.template.fulltextclient.getToc(**args)
258
259	def getTocPage(self, **args):
260	"""get tocpage"""
261	return self.template.fulltextclient.getTocPage(**args)
262
263
264	security.declareProtected('View','thumbs_rss')
265	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
266	'''
267	view it
268	@param mode: defines how to access the document behind url
269	@param url: url which contains display information
270	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
271
272	'''
273	logging.debug("HHHHHHHHHHHHHH:load the rss")
274	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
275
276	if not hasattr(self, 'template'):
277	# create template folder if it doesn't exist
278	self.manage_addFolder('template')
279
280	if not self.digilibBaseUrl:
281	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
282
283	docinfo = self.getDocinfo(mode=mode,url=url)
284	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
285	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
286	''' ZDES '''
287	pt = getattr(self.template, 'thumbs_main_rss')
288
289	if viewMode=="auto": # automodus gewaehlt
290	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
291	viewMode="text"
292	else:
293	viewMode="images"
294
295	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
296
297	security.declareProtected('View','index_html')
298	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
299	'''
300	view it
301	@param mode: defines how to access the document behind url
302	@param url: url which contains display information
303	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
304	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
305	@param characterNormalization type of text display (reg, norm, none)
306	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
307	'''
308
309	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
310
311	if not hasattr(self, 'template'):
312	# this won't work
313	logging.error("template folder missing!")
314	return "ERROR: template folder missing!"
315
316	if not getattr(self, 'digilibBaseUrl', None):
317	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
318
319	docinfo = self.getDocinfo(mode=mode,url=url)
320
321	if tocMode != "thumbs":
322	# get table of contents
323	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
324
325	if viewMode=="auto": # automodus gewaehlt
326	if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
327	viewMode="text_dict"
328	else:
329	viewMode="images"
330
331	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
332
333	pt = getattr(self.template, 'viewer_main')
334	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
335
336	def generateMarks(self,mk):
337	ret=""
338	if mk is None:
339	return ""
340	if not isinstance(mk, list):
341	mk=[mk]
342	for m in mk:
343	ret+="mk=%s"%m
344	return ret
345
346
347	def getBrowser(self):
348	"""getBrowser the version of browser """
349	names=""
350	names = browserCheck(self)
351	#logging.debug("XXXXXXXXXXXXXXXX: %s"%names)
352	return names
353
354	def findDigilibUrl(self):
355	"""try to get the digilib URL from zogilib"""
356	url = self.template.zogilib.getDLBaseUrl()
357	return url
358
359	def getDocumentViewerURL(self):
360	"""returns the URL of this instance"""
361	return self.absolute_url()
362
363	def getStyle(self, idx, selected, style=""):
364	"""returns a string with the given style and append 'sel' if path == selected."""
365	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
366	if idx == selected:
367	return style + 'sel'
368	else:
369	return style
370
371	def getLink(self,param=None,val=None):
372	"""link to documentviewer with parameter param set to val"""
373	params=self.REQUEST.form.copy()
374	if param is not None:
375	if val is None:
376	if params.has_key(param):
377	del params[param]
378	else:
379	params[param] = str(val)
380
381	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
382	params["mode"] = "imagepath"
383	params["url"] = getParentDir(params["url"])
384
385	# quote values and assemble into query string
386	#ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
387	ps = urllib.urlencode(params)
388	url=self.REQUEST['URL1']+"?"+ps
389	return url
390
391	def getLinkAmp(self,param=None,val=None):
392	"""link to documentviewer with parameter param set to val"""
393	params=self.REQUEST.form.copy()
394	if param is not None:
395	if val is None:
396	if params.has_key(param):
397	del params[param]
398	else:
399	params[param] = str(val)
400
401	# quote values and assemble into query string
402	logging.debug("XYXXXXX: %s"%repr(params.items()))
403	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
404	url=self.REQUEST['URL1']+"?"+ps
405	return url
406
407	def getInfo_xml(self,url,mode):
408	"""returns info about the document as XML"""
409
410	if not self.digilibBaseUrl:
411	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
412
413	docinfo = self.getDocinfo(mode=mode,url=url)
414	pt = getattr(self.template, 'info_xml')
415	return pt(docinfo=docinfo)
416
417	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
418	"""returns new option state"""
419	if not self.REQUEST.SESSION.has_key(optionName):
420	# not in session -- initial
421	opt = {'lastState': newState, 'state': initialState}
422	else:
423	opt = self.REQUEST.SESSION.get(optionName)
424	if opt['lastState'] != newState:
425	# state in session has changed -- toggle
426	opt['state'] = not opt['state']
427	opt['lastState'] = newState
428
429	self.REQUEST.SESSION[optionName] = opt
430	return opt['state']
431
432	def isAccessible(self, docinfo):
433	"""returns if access to the resource is granted"""
434	access = docinfo.get('accessType', None)
435	logging.debug("documentViewer (accessOK) access type %s"%access)
436	if access is not None and access == 'free':
437	logging.debug("documentViewer (accessOK) access is free")
438	return True
439	elif access is None or access in self.authgroups:
440	# only local access -- only logged in users
441	user = getSecurityManager().getUser()
442	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
443	if user is not None:
444	#print "user: ", user
445	return (user.getUserName() != "Anonymous User")
446	else:
447	return False
448
449	logging.error("documentViewer (accessOK) unknown access type %s"%access)
450	return False
451
452
453	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
454	"""gibt param von dlInfo aus"""
455	if docinfo is None:
456	docinfo = {}
457
458	for x in range(cut):
459
460	path=getParentDir(path)
461
462	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
463
464	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
465
466	txt = getHttpData(infoUrl)
467	if txt is None:
468	raise IOError("Unable to get dir-info from %s"%(infoUrl))
469
470	dom = Parse(txt)
471	sizes=dom.xpath("//dir/size")
472	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
473
474	if sizes:
475	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
476	else:
477	docinfo['numPages'] = 0
478
479	# TODO: produce and keep list of image names and numbers
480
481	return docinfo
482
483	def getIndexMetaPath(self,url):
484	"""gib nur den Pfad zurueck"""
485	regexp = re.compile(r".(experimental\|permanent)/(.)")
486	regpath = regexp.match(url)
487	if (regpath==None):
488	return ""
489	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
490	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
491
492
493
494	def getIndexMetaUrl(self,url):
495	"""returns utr of index.meta document at url"""
496
497	metaUrl = None
498	if url.startswith("http://"):
499	# real URL
500	metaUrl = url
501	else:
502	# online path
503	server=self.digilibBaseUrl+"/servlet/Texter?fn="
504	metaUrl=server+url.replace("/mpiwg/online","")
505	if not metaUrl.endswith("index.meta"):
506	metaUrl += "/index.meta"
507
508	return metaUrl
509
510	def getDomFromIndexMeta(self, url):
511	"""get dom from index meta"""
512	dom = None
513	metaUrl = self.getIndexMetaUrl(url)
514
515	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
516	txt=getHttpData(metaUrl)
517	if txt is None:
518	raise IOError("Unable to read index meta from %s"%(url))
519
520	dom = Parse(txt)
521	return dom
522
523	def getPresentationInfoXML(self, url):
524	"""returns dom of info.xml document at url"""
525	dom = None
526	metaUrl = None
527	if url.startswith("http://"):
528	# real URL
529	metaUrl = url
530	else:
531	# online path
532	server=self.digilibBaseUrl+"/servlet/Texter?fn="
533	metaUrl=server+url.replace("/mpiwg/online","")
534
535	txt=getHttpData(metaUrl)
536	if txt is None:
537	raise IOError("Unable to read infoXMLfrom %s"%(url))
538
539	dom = Parse(txt)
540	return dom
541
542
543	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
544	"""gets authorization info from the index.meta file at path or given by dom"""
545	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
546
547	access = None
548
549	if docinfo is None:
550	docinfo = {}
551
552	if dom is None:
553	for x in range(cut):
554	path=getParentDir(path)
555	dom = self.getDomFromIndexMeta(path)
556
557	acctype = dom.xpath("//access-conditions/access/@type")
558	if acctype and (len(acctype)>0):
559	access=acctype[0].value
560	if access in ['group', 'institution']:
561	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
562
563	docinfo['accessType'] = access
564	return docinfo
565
566
567	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
568	"""gets bibliographical info from the index.meta file at path or given by dom"""
569	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
570
571	if docinfo is None:
572	docinfo = {}
573
574	if dom is None:
575	for x in range(cut):
576	path=getParentDir(path)
577	dom = self.getDomFromIndexMeta(path)
578
579	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
580
581	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
582	# put in all raw bib fields as dict "bib"
583	bib = dom.xpath("//bib/*")
584	if bib and len(bib)>0:
585	bibinfo = {}
586	for e in bib:
587	bibinfo[e.localName] = getTextFromNode(e)
588	docinfo['bib'] = bibinfo
589
590	# extract some fields (author, title, year) according to their mapping
591	metaData=self.metadata.main.meta.bib
592	bibtype=dom.xpath("//bib/@type")
593	if bibtype and (len(bibtype)>0):
594	bibtype=bibtype[0].value
595	else:
596	bibtype="generic"
597
598	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
599	docinfo['bib_type'] = bibtype
600	bibmap=metaData.generateMappingForType(bibtype)
601	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
602	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
603	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
604	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
605	try:
606	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
607	except: pass
608	try:
609	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
610	except: pass
611	try:
612	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
613	except: pass
614	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
615	try:
616	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
617	except:
618	docinfo['lang']=''
619
620	return docinfo
621
622
623	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
624	"""gets name info from the index.meta file at path or given by dom"""
625	if docinfo is None:
626	docinfo = {}
627
628	if dom is None:
629	for x in range(cut):
630	path=getParentDir(path)
631	dom = self.getDomFromIndexMeta(path)
632
633	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
634	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
635	return docinfo
636
637	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
638	"""parse texttool tag in index meta"""
639	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
640	if docinfo is None:
641	docinfo = {}
642	if docinfo.get('lang', None) is None:
643	docinfo['lang'] = '' # default keine Sprache gesetzt
644	if dom is None:
645	dom = self.getDomFromIndexMeta(url)
646
647	archivePath = None
648	archiveName = None
649
650	archiveNames = dom.xpath("//resource/name")
651	if archiveNames and (len(archiveNames) > 0):
652	archiveName = getTextFromNode(archiveNames[0])
653	else:
654	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
655
656	archivePaths = dom.xpath("//resource/archive-path")
657	if archivePaths and (len(archivePaths) > 0):
658	archivePath = getTextFromNode(archivePaths[0])
659	# clean up archive path
660	if archivePath[0] != '/':
661	archivePath = '/' + archivePath
662	if archiveName and (not archivePath.endswith(archiveName)):
663	archivePath += "/" + archiveName
664	else:
665	# try to get archive-path from url
666	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
667	if (not url.startswith('http')):
668	archivePath = url.replace('index.meta', '')
669
670	if archivePath is None:
671	# we balk without archive-path
672	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
673
674	imageDirs = dom.xpath("//texttool/image")
675	if imageDirs and (len(imageDirs) > 0):
676	imageDir = getTextFromNode(imageDirs[0])
677
678	else:
679	# we balk with no image tag / not necessary anymore because textmode is now standard
680	#raise IOError("No text-tool info in %s"%(url))
681	imageDir = ""
682	#xquery="//pb"
683	docinfo['imagePath'] = "" # keine Bilder
684	docinfo['imageURL'] = ""
685
686	if imageDir and archivePath:
687	#print "image: ", imageDir, " archivepath: ", archivePath
688	imageDir = os.path.join(archivePath, imageDir)
689	imageDir = imageDir.replace("/mpiwg/online", '')
690	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
691	docinfo['imagePath'] = imageDir
692
693	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
694
695	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
696	if viewerUrls and (len(viewerUrls) > 0):
697	viewerUrl = getTextFromNode(viewerUrls[0])
698	docinfo['viewerURL'] = viewerUrl
699
700	# old style text URL
701	textUrls = dom.xpath("//texttool/text")
702	if textUrls and (len(textUrls) > 0):
703	textUrl = getTextFromNode(textUrls[0])
704	if urlparse.urlparse(textUrl)[0] == "": #keine url
705	textUrl = os.path.join(archivePath, textUrl)
706	# fix URLs starting with /mpiwg/online
707	if textUrl.startswith("/mpiwg/online"):
708	textUrl = textUrl.replace("/mpiwg/online", '', 1)
709
710	docinfo['textURL'] = textUrl
711
712	# new style text-url-path
713	textUrls = dom.xpath("//texttool/text-url-path")
714	if textUrls and (len(textUrls) > 0):
715	textUrl = getTextFromNode(textUrls[0])
716	docinfo['textURLPath'] = textUrl
717	if not docinfo['imagePath']:
718	# text-only, no page images
719	docinfo = self.getNumTextPages(docinfo)
720
721	presentationUrls = dom.xpath("//texttool/presentation")
722	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
723	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
724
725
726	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
727	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
728	# durch den relativen Pfad auf die presentation infos
729	presentationPath = getTextFromNode(presentationUrls[0])
730	if url.endswith("index.meta"):
731	presentationUrl = url.replace('index.meta', presentationPath)
732	else:
733	presentationUrl = url + "/" + presentationPath
734
735	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
736
737	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
738
739	return docinfo
740
741
742	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
743	"""gets the bibliographical information from the preseantion entry in texttools
744	"""
745	dom=self.getPresentationInfoXML(url)
746	try:
747	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
748	except:
749	pass
750	try:
751	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
752	except:
753	pass
754	try:
755	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
756	except:
757	pass
758	return docinfo
759
760	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
761	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
762	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
763	if docinfo is None:
764	docinfo = {}
765	path=path.replace("/mpiwg/online","")
766	docinfo['imagePath'] = path
767	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
768
769	pathorig=path
770	for x in range(cut):
771	path=getParentDir(path)
772	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
773	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
774	docinfo['imageURL'] = imageUrl
775
776	#path ist the path to the images it assumes that the index.meta file is one level higher.
777	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
778	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
779	return docinfo
780
781
782	def getDocinfo(self, mode, url):
783	"""returns docinfo depending on mode"""
784	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
785	# look for cached docinfo in session
786	if self.REQUEST.SESSION.has_key('docinfo'):
787	docinfo = self.REQUEST.SESSION['docinfo']
788	# check if its still current
789	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
790	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
791	return docinfo
792	# new docinfo
793	docinfo = {'mode': mode, 'url': url}
794	if mode=="texttool": #index.meta with texttool information
795	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
796	elif mode=="imagepath":
797	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
798	elif mode=="filepath":
799	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
800	else:
801	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
802	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
803
804	# FIXME: fake texturlpath
805	if not docinfo.has_key('textURLPath'):
806	docinfo['textURLPath'] = None
807
808	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
809	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
810	self.REQUEST.SESSION['docinfo'] = docinfo
811	return docinfo
812
813	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
814	"""returns pageinfo with the given parameters"""
815	pageinfo = {}
816	current = getInt(current)
817
818	pageinfo['current'] = current
819	rows = int(rows or self.thumbrows)
820	pageinfo['rows'] = rows
821	cols = int(cols or self.thumbcols)
822	pageinfo['cols'] = cols
823	grpsize = cols * rows
824	pageinfo['groupsize'] = grpsize
825	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
826	# int(current / grpsize) * grpsize +1))
827	pageinfo['start'] = start
828	pageinfo['end'] = start + grpsize
829	if (docinfo is not None) and ('numPages' in docinfo):
830	np = int(docinfo['numPages'])
831	pageinfo['end'] = min(pageinfo['end'], np)
832	pageinfo['numgroups'] = int(np / grpsize)
833	if np % grpsize > 0:
834	pageinfo['numgroups'] += 1
835	pageinfo['viewMode'] = viewMode
836	pageinfo['tocMode'] = tocMode
837	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','regPlusNorm')
838	pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','')
839	pageinfo['query'] = self.REQUEST.get('query','')
840	pageinfo['queryType'] = self.REQUEST.get('queryType','')
841	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
842	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
843	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
844	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
845	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
846	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
847	toc = int (pageinfo['tocPN'])
848	pageinfo['textPages'] =int (toc)
849
850
851
852	if 'tocSize_%s'%tocMode in docinfo:
853	tocSize = int(docinfo['tocSize_%s'%tocMode])
854	tocPageSize = int(pageinfo['tocPageSize'])
855	# cached toc
856	if tocSize%tocPageSize>0:
857	tocPages=tocSize/tocPageSize+1
858	else:
859	tocPages=tocSize/tocPageSize
860	pageinfo['tocPN'] = min (tocPages,toc)
861	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
862	pageinfo['sn'] =self.REQUEST.get('sn','')
863	return pageinfo
864
865	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
866	"""init document viewer"""
867	self.title=title
868	self.digilibBaseUrl = digilibBaseUrl
869	self.thumbrows = thumbrows
870	self.thumbcols = thumbcols
871	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
872	if RESPONSE is not None:
873	RESPONSE.redirect('manage_main')
874
875	def manage_AddDocumentViewerForm(self):
876	"""add the viewer form"""
877	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
878	return pt()
879
880	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
881	"""add the viewer"""
882	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
883	self._setObject(id,newObj)
884
885	if RESPONSE is not None:
886	RESPONSE.redirect('manage_main')
887
888	## DocumentViewerTemplate class
889	class DocumentViewerTemplate(ZopePageTemplate):
890	"""Template for document viewer"""
891	meta_type="DocumentViewer Template"
892
893
894	def manage_addDocumentViewerTemplateForm(self):
895	"""Form for adding"""
896	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
897	return pt()
898
899	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
900	REQUEST=None, submit=None):
901	"Add a Page Template with optional file content."
902
903	self._setObject(id, DocumentViewerTemplate(id))
904	ob = getattr(self, id)
905	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
906	logging.info("txt %s:"%txt)
907	ob.pt_edit(txt,"text/html")
908	if title:
909	ob.pt_setTitle(title)
910	try:
911	u = self.DestinationURL()
912	except AttributeError:
913	u = REQUEST['URL1']
914
915	u = "%s/%s" % (u, urllib.quote(id))
916	REQUEST.RESPONSE.redirect(u+'/manage_main')
917	return ''
918
919
920

Note: See TracBrowser for help on using the repository browser.

Download in other formats: