Context Navigation

source: documentViewer/documentViewer.py @ 387:580db757e0eb

Last change on this file since 387:580db757e0eb was 387:580db757e0eb, checked in by abukhman, 14 years ago
* empty log message *
File size: 34.1 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding='utf-8'):
47	"""returns a string containing node as XML"""
48	buf = cStringIO.StringIO()
49	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
50	s = buf.getvalue()
51	buf.close()
52	return s
53
54	def browserCheck(self):
55	"""check the browsers request to find out the browser type"""
56	bt = {}
57	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
58	bt['ua'] = ua
59	bt['isIE'] = False
60	bt['isN4'] = False
61	if string.find(ua, 'MSIE') > -1:
62	bt['isIE'] = True
63	else:
64	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
65
66	try:
67	nav = ua[string.find(ua, '('):]
68	ie = string.split(nav, "; ")[1]
69	if string.find(ie, "MSIE") > -1:
70	bt['versIE'] = string.split(ie, " ")[1]
71	except: pass
72
73	bt['isMac'] = string.find(ua, 'Macintosh') > -1
74	bt['isWin'] = string.find(ua, 'Windows') > -1
75	bt['isIEWin'] = bt['isIE'] and bt['isWin']
76	bt['isIEMac'] = bt['isIE'] and bt['isMac']
77	bt['staticHTML'] = False
78
79	return bt
80
81
82	def getParentDir(path):
83	"""returns pathname shortened by one"""
84	return '/'.join(path.split('/')[0:-1])
85
86
87	def getHttpData(url, data=None, num_tries=3, timeout=10):
88	"""returns result from url+data HTTP request"""
89	# we do GET (by appending data to url)
90	if isinstance(data, str) or isinstance(data, unicode):
91	# if data is string then append
92	url = "%s?%s"%(url,data)
93	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
94	# urlencode
95	url = "%s?%s"%(url,urllib.urlencode(data))
96
97	response = None
98	errmsg = None
99	for cnt in range(num_tries):
100	try:
101	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
102	if sys.version_info < (2, 6):
103	# set timeout on socket -- ugly :-(
104	import socket
105	socket.setdefaulttimeout(float(timeout))
106	response = urllib2.urlopen(url)
107	else:
108	response = urllib2.urlopen(url,timeout=float(timeout))
109	# check result?
110	break
111	except urllib2.HTTPError, e:
112	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
113	errmsg = str(e)
114	# stop trying
115	break
116	except urllib2.URLError, e:
117	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
118	errmsg = str(e)
119	# stop trying
120	#break
121
122	if response is not None:
123	data = response.read()
124	response.close()
125	return data
126
127	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
128	#return None
129
130
131
132	##
133	## documentViewer class
134	##
135	class documentViewer(Folder):
136	"""document viewer"""
137	meta_type="Document viewer"
138
139	security=ClassSecurityInfo()
140	manage_options=Folder.manage_options+(
141	{'label':'main config','action':'changeDocumentViewerForm'},
142	)
143
144	# templates and forms
145	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
146	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
147	toc_text = PageTemplateFile('zpt/toc_text', globals())
148	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
149	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
150	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
151	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
152	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
153	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
154	head_main = PageTemplateFile('zpt/head_main', globals())
155	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
156	info_xml = PageTemplateFile('zpt/info_xml', globals())
157
158
159	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
160	security.declareProtected('View management screens','changeDocumentViewerForm')
161	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
162
163
164	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
165	"""init document viewer"""
166	self.id=id
167	self.title=title
168	self.thumbcols = thumbcols
169	self.thumbrows = thumbrows
170	# authgroups is list of authorized groups (delimited by ,)
171	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
172	# create template folder so we can always use template.something
173
174	templateFolder = Folder('template')
175	#self['template'] = templateFolder # Zope-2.12 style
176	self._setObject('template',templateFolder) # old style
177	try:
178	import MpdlXmlTextServer
179	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
180	#templateFolder['fulltextclient'] = xmlRpcClient
181	templateFolder._setObject('fulltextclient',textServer)
182	except Exception, e:
183	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
184	try:
185	from Products.zogiLib.zogiLib import zogiLib
186	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
187	#templateFolder['zogilib'] = zogilib
188	templateFolder._setObject('zogilib',zogilib)
189	except Exception, e:
190	logging.error("Unable to create zogiLib for zogilib: "+str(e))
191
192
193	# proxy text server methods to fulltextclient
194	def getTextPage(self, **args):
195	"""get page"""
196	return self.template.fulltextclient.getTextPage(**args)
197
198	def getQuery(self, **args):
199	"""get query"""
200	return self.template.fulltextclient.getQuery(**args)
201
202	def getPDF(self, **args):
203	"""get query"""
204	return self.template.fulltextclient.getPDF(**args)
205
206	def getSearch(self, **args):
207	"""get search"""
208	return self.template.fulltextclient.getSearch(**args)
209
210	def getGisPlaces(self, **args):
211	"""get gis places"""
212	return self.template.fulltextclient.getGisPlaces(**args)
213
214	def getAllGisPlaces(self, **args):
215	"""get all gis places """
216	return self.template.fulltextclient.getAllGisPlaces(**args)
217
218	def getOrigPages(self, **args):
219	"""get original page number """
220	return self.template.fulltextclient.getOrigPages(**args)
221
222	def getNumPages(self, docinfo):
223	"""get numpages"""
224	return self.template.fulltextclient.getNumPages(docinfo)
225
226	def getNumTextPages(self, docinfo):
227	"""get numpages text"""
228	return self.template.fulltextclient.getNumTextPages(docinfo)
229
230	def getTranslate(self, **args):
231	"""get translate"""
232	return self.template.fulltextclient.getTranslate(**args)
233
234	def getLemma(self, **args):
235	"""get lemma"""
236	return self.template.fulltextclient.getLemma(**args)
237
238	def getToc(self, **args):
239	"""get toc"""
240	return self.template.fulltextclient.getToc(**args)
241
242	def getTocPage(self, **args):
243	"""get tocpage"""
244	return self.template.fulltextclient.getTocPage(**args)
245
246
247	security.declareProtected('View','thumbs_rss')
248	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
249	'''
250	view it
251	@param mode: defines how to access the document behind url
252	@param url: url which contains display information
253	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
254
255	'''
256	logging.debug("HHHHHHHHHHHHHH:load the rss")
257	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
258
259	if not hasattr(self, 'template'):
260	# create template folder if it doesn't exist
261	self.manage_addFolder('template')
262
263	if not self.digilibBaseUrl:
264	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
265
266	docinfo = self.getDocinfo(mode=mode,url=url)
267	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
268	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
269	''' ZDES '''
270	pt = getattr(self.template, 'thumbs_main_rss')
271
272	if viewMode=="auto": # automodus gewaehlt
273	if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
274	viewMode="text"
275	else:
276	viewMode="images"
277
278	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
279
280	security.declareProtected('View','index_html')
281	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization=""):
282	'''
283	view it
284	@param mode: defines how to access the document behind url
285	@param url: url which contains display information
286	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
287	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
288	@param characterNormalization type of text display (reg, norm, none)
289	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
290	'''
291
292	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
293
294	if not hasattr(self, 'template'):
295	# this won't work
296	logging.error("template folder missing!")
297	return "ERROR: template folder missing!"
298
299	if not getattr(self, 'digilibBaseUrl', None):
300	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
301
302	docinfo = self.getDocinfo(mode=mode,url=url)
303
304	if tocMode != "thumbs":
305	# get table of contents
306	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
307
308	if viewMode=="auto": # automodus gewaehlt
309	if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
310	viewMode="text_dict"
311	else:
312	viewMode="images"
313
314	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
315
316	pt = getattr(self.template, 'viewer_main')
317	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
318
319	def generateMarks(self,mk):
320	ret=""
321	if mk is None:
322	return ""
323	if not isinstance(mk, list):
324	mk=[mk]
325	for m in mk:
326	ret+="mk=%s"%m
327	return ret
328
329	def getBrowser(self):
330	"""getBrowser the version of browser """
331	bt = browserCheck(self)
332	logging.debug("XXXXXXXXXXXXXXXX: %s"%bt)
333	return bt
334
335	def findDigilibUrl(self):
336	"""try to get the digilib URL from zogilib"""
337	url = self.template.zogilib.getDLBaseUrl()
338	return url
339
340	def getDocumentViewerURL(self):
341	"""returns the URL of this instance"""
342	return self.absolute_url()
343
344	def getStyle(self, idx, selected, style=""):
345	"""returns a string with the given style and append 'sel' if path == selected."""
346	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
347	if idx == selected:
348	return style + 'sel'
349	else:
350	return style
351
352	def getLink(self,param=None,val=None):
353	"""link to documentviewer with parameter param set to val"""
354	params=self.REQUEST.form.copy()
355	if param is not None:
356	if val is None:
357	if params.has_key(param):
358	del params[param]
359	else:
360	params[param] = str(val)
361
362	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
363	params["mode"] = "imagepath"
364	params["url"] = getParentDir(params["url"])
365
366	# quote values and assemble into query string
367	#ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
368	ps = urllib.urlencode(params)
369	url=self.REQUEST['URL1']+"?"+ps
370	return url
371
372	def getLinkAmp(self,param=None,val=None):
373	"""link to documentviewer with parameter param set to val"""
374	params=self.REQUEST.form.copy()
375	if param is not None:
376	if val is None:
377	if params.has_key(param):
378	del params[param]
379	else:
380	params[param] = str(val)
381
382	# quote values and assemble into query string
383	logging.debug("XYXXXXX: %s"%repr(params.items()))
384	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
385	url=self.REQUEST['URL1']+"?"+ps
386	return url
387
388	def getInfo_xml(self,url,mode):
389	"""returns info about the document as XML"""
390
391	if not self.digilibBaseUrl:
392	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
393
394	docinfo = self.getDocinfo(mode=mode,url=url)
395	pt = getattr(self.template, 'info_xml')
396	return pt(docinfo=docinfo)
397
398
399	def isAccessible(self, docinfo):
400	"""returns if access to the resource is granted"""
401	access = docinfo.get('accessType', None)
402	logging.debug("documentViewer (accessOK) access type %s"%access)
403	if access is not None and access == 'free':
404	logging.debug("documentViewer (accessOK) access is free")
405	return True
406	elif access is None or access in self.authgroups:
407	# only local access -- only logged in users
408	user = getSecurityManager().getUser()
409	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
410	if user is not None:
411	#print "user: ", user
412	return (user.getUserName() != "Anonymous User")
413	else:
414	return False
415
416	logging.error("documentViewer (accessOK) unknown access type %s"%access)
417	return False
418
419
420	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
421	"""gibt param von dlInfo aus"""
422	if docinfo is None:
423	docinfo = {}
424
425	for x in range(cut):
426
427	path=getParentDir(path)
428
429	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
430
431	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
432
433	txt = getHttpData(infoUrl)
434	if txt is None:
435	raise IOError("Unable to get dir-info from %s"%(infoUrl))
436
437	dom = Parse(txt)
438	sizes=dom.xpath("//dir/size")
439	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
440
441	if sizes:
442	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
443	else:
444	docinfo['numPages'] = 0
445
446	# TODO: produce and keep list of image names and numbers
447
448	return docinfo
449
450	def getIndexMetaPath(self,url):
451	"""gib nur den Pfad zurueck"""
452	regexp = re.compile(r".(experimental\|permanent)/(.)")
453	regpath = regexp.match(url)
454	if (regpath==None):
455	return ""
456	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
457	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
458
459
460
461	def getIndexMetaUrl(self,url):
462	"""returns utr of index.meta document at url"""
463
464	metaUrl = None
465	if url.startswith("http://"):
466	# real URL
467	metaUrl = url
468	else:
469	# online path
470	server=self.digilibBaseUrl+"/servlet/Texter?fn="
471	metaUrl=server+url.replace("/mpiwg/online","")
472	if not metaUrl.endswith("index.meta"):
473	metaUrl += "/index.meta"
474
475	return metaUrl
476
477	def getDomFromIndexMeta(self, url):
478	"""get dom from index meta"""
479	dom = None
480	metaUrl = self.getIndexMetaUrl(url)
481
482	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
483	txt=getHttpData(metaUrl)
484	if txt is None:
485	raise IOError("Unable to read index meta from %s"%(url))
486
487	dom = Parse(txt)
488	return dom
489
490	def getPresentationInfoXML(self, url):
491	"""returns dom of info.xml document at url"""
492	dom = None
493	metaUrl = None
494	if url.startswith("http://"):
495	# real URL
496	metaUrl = url
497	else:
498	# online path
499	server=self.digilibBaseUrl+"/servlet/Texter?fn="
500	metaUrl=server+url.replace("/mpiwg/online","")
501
502	txt=getHttpData(metaUrl)
503	if txt is None:
504	raise IOError("Unable to read infoXMLfrom %s"%(url))
505
506	dom = Parse(txt)
507	return dom
508
509
510	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
511	"""gets authorization info from the index.meta file at path or given by dom"""
512	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
513
514	access = None
515
516	if docinfo is None:
517	docinfo = {}
518
519	if dom is None:
520	for x in range(cut):
521	path=getParentDir(path)
522	dom = self.getDomFromIndexMeta(path)
523
524	acctype = dom.xpath("//access-conditions/access/@type")
525	if acctype and (len(acctype)>0):
526	access=acctype[0].value
527	if access in ['group', 'institution']:
528	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
529
530	docinfo['accessType'] = access
531	return docinfo
532
533
534	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
535	"""gets bibliographical info from the index.meta file at path or given by dom"""
536	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
537
538	if docinfo is None:
539	docinfo = {}
540
541	if dom is None:
542	for x in range(cut):
543	path=getParentDir(path)
544	dom = self.getDomFromIndexMeta(path)
545
546	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
547
548	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
549	# put in all raw bib fields as dict "bib"
550	bib = dom.xpath("//bib/*")
551	if bib and len(bib)>0:
552	bibinfo = {}
553	for e in bib:
554	bibinfo[e.localName] = getTextFromNode(e)
555	docinfo['bib'] = bibinfo
556
557	# extract some fields (author, title, year) according to their mapping
558	metaData=self.metadata.main.meta.bib
559	bibtype=dom.xpath("//bib/@type")
560	if bibtype and (len(bibtype)>0):
561	bibtype=bibtype[0].value
562	else:
563	bibtype="generic"
564
565	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
566	docinfo['bib_type'] = bibtype
567	bibmap=metaData.generateMappingForType(bibtype)
568	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
569	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
570	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
571	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
572	try:
573	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
574	except: pass
575	try:
576	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
577	except: pass
578	try:
579	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
580	except: pass
581	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
582	try:
583	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
584	except:
585	docinfo['lang']=''
586
587	return docinfo
588
589
590	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
591	"""gets name info from the index.meta file at path or given by dom"""
592	if docinfo is None:
593	docinfo = {}
594
595	if dom is None:
596	for x in range(cut):
597	path=getParentDir(path)
598	dom = self.getDomFromIndexMeta(path)
599
600	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
601	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
602	return docinfo
603
604	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
605	"""parse texttool tag in index meta"""
606	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
607	if docinfo is None:
608	docinfo = {}
609	if docinfo.get('lang', None) is None:
610	docinfo['lang'] = '' # default keine Sprache gesetzt
611	if dom is None:
612	dom = self.getDomFromIndexMeta(url)
613
614	archivePath = None
615	archiveName = None
616
617	archiveNames = dom.xpath("//resource/name")
618	if archiveNames and (len(archiveNames) > 0):
619	archiveName = getTextFromNode(archiveNames[0])
620	else:
621	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
622
623	archivePaths = dom.xpath("//resource/archive-path")
624	if archivePaths and (len(archivePaths) > 0):
625	archivePath = getTextFromNode(archivePaths[0])
626	# clean up archive path
627	if archivePath[0] != '/':
628	archivePath = '/' + archivePath
629	if archiveName and (not archivePath.endswith(archiveName)):
630	archivePath += "/" + archiveName
631	else:
632	# try to get archive-path from url
633	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
634	if (not url.startswith('http')):
635	archivePath = url.replace('index.meta', '')
636
637	if archivePath is None:
638	# we balk without archive-path
639	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
640
641	imageDirs = dom.xpath("//texttool/image")
642	if imageDirs and (len(imageDirs) > 0):
643	imageDir = getTextFromNode(imageDirs[0])
644
645	else:
646	# we balk with no image tag / not necessary anymore because textmode is now standard
647	#raise IOError("No text-tool info in %s"%(url))
648	imageDir = ""
649	#xquery="//pb"
650	docinfo['imagePath'] = "" # keine Bilder
651	docinfo['imageURL'] = ""
652
653	if imageDir and archivePath:
654	#print "image: ", imageDir, " archivepath: ", archivePath
655	imageDir = os.path.join(archivePath, imageDir)
656	imageDir = imageDir.replace("/mpiwg/online", '')
657	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
658	docinfo['imagePath'] = imageDir
659
660	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
661
662	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
663	if viewerUrls and (len(viewerUrls) > 0):
664	viewerUrl = getTextFromNode(viewerUrls[0])
665	docinfo['viewerURL'] = viewerUrl
666
667	# old style text URL
668	textUrls = dom.xpath("//texttool/text")
669	if textUrls and (len(textUrls) > 0):
670	textUrl = getTextFromNode(textUrls[0])
671	if urlparse.urlparse(textUrl)[0] == "": #keine url
672	textUrl = os.path.join(archivePath, textUrl)
673	# fix URLs starting with /mpiwg/online
674	if textUrl.startswith("/mpiwg/online"):
675	textUrl = textUrl.replace("/mpiwg/online", '', 1)
676
677	docinfo['textURL'] = textUrl
678
679	# new style text-url-path
680	textUrls = dom.xpath("//texttool/text-url-path")
681	if textUrls and (len(textUrls) > 0):
682	textUrl = getTextFromNode(textUrls[0])
683	docinfo['textURLPath'] = textUrl
684	if not docinfo['imagePath']:
685	# text-only, no page images
686	docinfo = self.getNumTextPages(docinfo)
687
688	presentationUrls = dom.xpath("//texttool/presentation")
689	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
690	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
691
692
693	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
694	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
695	# durch den relativen Pfad auf die presentation infos
696	presentationPath = getTextFromNode(presentationUrls[0])
697	if url.endswith("index.meta"):
698	presentationUrl = url.replace('index.meta', presentationPath)
699	else:
700	presentationUrl = url + "/" + presentationPath
701
702	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
703
704	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
705
706	return docinfo
707
708
709	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
710	"""gets the bibliographical information from the preseantion entry in texttools
711	"""
712	dom=self.getPresentationInfoXML(url)
713	try:
714	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
715	except:
716	pass
717	try:
718	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
719	except:
720	pass
721	try:
722	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
723	except:
724	pass
725	return docinfo
726
727	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
728	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
729	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
730	if docinfo is None:
731	docinfo = {}
732	path=path.replace("/mpiwg/online","")
733	docinfo['imagePath'] = path
734	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
735
736	pathorig=path
737	for x in range(cut):
738	path=getParentDir(path)
739	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
740	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
741	docinfo['imageURL'] = imageUrl
742
743	#path ist the path to the images it assumes that the index.meta file is one level higher.
744	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
745	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
746	return docinfo
747
748
749	def getDocinfo(self, mode, url):
750	"""returns docinfo depending on mode"""
751	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
752	# look for cached docinfo in session
753	if self.REQUEST.SESSION.has_key('docinfo'):
754	docinfo = self.REQUEST.SESSION['docinfo']
755	# check if its still current
756	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
757	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
758	return docinfo
759	# new docinfo
760	docinfo = {'mode': mode, 'url': url}
761	if mode=="texttool": #index.meta with texttool information
762	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
763	elif mode=="imagepath":
764	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
765	elif mode=="filepath":
766	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
767	else:
768	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
769	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
770
771	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
772	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
773	self.REQUEST.SESSION['docinfo'] = docinfo
774	return docinfo
775
776	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization=""):
777	"""returns pageinfo with the given parameters"""
778	pageinfo = {}
779	current = getInt(current)
780
781	pageinfo['current'] = current
782	rows = int(rows or self.thumbrows)
783	pageinfo['rows'] = rows
784	cols = int(cols or self.thumbcols)
785	pageinfo['cols'] = cols
786	grpsize = cols * rows
787	pageinfo['groupsize'] = grpsize
788	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
789	# int(current / grpsize) * grpsize +1))
790	pageinfo['start'] = start
791	pageinfo['end'] = start + grpsize
792	if (docinfo is not None) and ('numPages' in docinfo):
793	np = int(docinfo['numPages'])
794	pageinfo['end'] = min(pageinfo['end'], np)
795	pageinfo['numgroups'] = int(np / grpsize)
796	if np % grpsize > 0:
797	pageinfo['numgroups'] += 1
798	pageinfo['viewMode'] = viewMode
799	pageinfo['tocMode'] = tocMode
800	#pageinfo ['originalPage'] = self.getOrigPages(docinfo=None, pageinfo=None)
801	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','')
802	pageinfo['query'] = self.REQUEST.get('query','')
803	pageinfo['queryType'] = self.REQUEST.get('queryType','')
804	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
805	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
806	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
807	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
808	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
809	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
810	toc = int (pageinfo['tocPN'])
811	pageinfo['textPages'] =int (toc)
812
813
814
815	if 'tocSize_%s'%tocMode in docinfo:
816	tocSize = int(docinfo['tocSize_%s'%tocMode])
817	tocPageSize = int(pageinfo['tocPageSize'])
818	# cached toc
819	if tocSize%tocPageSize>0:
820	tocPages=tocSize/tocPageSize+1
821	else:
822	tocPages=tocSize/tocPageSize
823	pageinfo['tocPN'] = min (tocPages,toc)
824	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
825	pageinfo['sn'] =self.REQUEST.get('sn','')
826	return pageinfo
827
828	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
829	"""init document viewer"""
830	self.title=title
831	self.digilibBaseUrl = digilibBaseUrl
832	self.thumbrows = thumbrows
833	self.thumbcols = thumbcols
834	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
835	if RESPONSE is not None:
836	RESPONSE.redirect('manage_main')
837
838	def manage_AddDocumentViewerForm(self):
839	"""add the viewer form"""
840	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
841	return pt()
842
843	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
844	"""add the viewer"""
845	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
846	self._setObject(id,newObj)
847
848	if RESPONSE is not None:
849	RESPONSE.redirect('manage_main')
850
851	## DocumentViewerTemplate class
852	class DocumentViewerTemplate(ZopePageTemplate):
853	"""Template for document viewer"""
854	meta_type="DocumentViewer Template"
855
856
857	def manage_addDocumentViewerTemplateForm(self):
858	"""Form for adding"""
859	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
860	return pt()
861
862	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
863	REQUEST=None, submit=None):
864	"Add a Page Template with optional file content."
865
866	self._setObject(id, DocumentViewerTemplate(id))
867	ob = getattr(self, id)
868	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
869	logging.info("txt %s:"%txt)
870	ob.pt_edit(txt,"text/html")
871	if title:
872	ob.pt_setTitle(title)
873	try:
874	u = self.DestinationURL()
875	except AttributeError:
876	u = REQUEST['URL1']
877
878	u = "%s/%s" % (u, urllib.quote(id))
879	REQUEST.RESPONSE.redirect(u+'/manage_main')
880	return ''
881
882
883

Note: See TracBrowser for help on using the repository browser.

Download in other formats: