Context Navigation

source: documentViewer/documentViewer.py @ 390:6f8910846430

Last change on this file since 390:6f8910846430 was 390:6f8910846430, checked in by abukhman, 14 years ago
* empty log message *
File size: 34.2 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21	import string
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding='utf-8'):
47	"""returns a string containing node as XML"""
48	buf = cStringIO.StringIO()
49	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
50	s = buf.getvalue()
51	buf.close()
52	return s
53
54	def browserCheck(self):
55	"""check the browsers request to find out the browser type"""
56	bt = {}
57	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
58	bt['ua'] = ua
59	bt['isIE'] = False
60	bt['isN4'] = False
61	if string.find(ua, 'MSIE') > -1:
62	bt['isIE'] = True
63	else:
64	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
65
66	try:
67	nav = ua[string.find(ua, '('):]
68	ie = string.split(nav, "; ")[1]
69	if string.find(ie, "MSIE") > -1:
70	bt['versIE'] = string.split(ie, " ")[1]
71	except: pass
72
73	bt['isMac'] = string.find(ua, 'Macintosh') > -1
74	bt['isWin'] = string.find(ua, 'Windows') > -1
75	bt['isIEWin'] = bt['isIE'] and bt['isWin']
76	bt['isIEMac'] = bt['isIE'] and bt['isMac']
77	bt['staticHTML'] = False
78
79	return bt
80
81
82	def getParentDir(path):
83	"""returns pathname shortened by one"""
84	return '/'.join(path.split('/')[0:-1])
85
86
87	def getHttpData(url, data=None, num_tries=3, timeout=10):
88	"""returns result from url+data HTTP request"""
89	# we do GET (by appending data to url)
90	if isinstance(data, str) or isinstance(data, unicode):
91	# if data is string then append
92	url = "%s?%s"%(url,data)
93	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
94	# urlencode
95	url = "%s?%s"%(url,urllib.urlencode(data))
96
97	response = None
98	errmsg = None
99	for cnt in range(num_tries):
100	try:
101	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
102	if sys.version_info < (2, 6):
103	# set timeout on socket -- ugly :-(
104	import socket
105	socket.setdefaulttimeout(float(timeout))
106	response = urllib2.urlopen(url)
107	else:
108	response = urllib2.urlopen(url,timeout=float(timeout))
109	# check result?
110	break
111	except urllib2.HTTPError, e:
112	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
113	errmsg = str(e)
114	# stop trying
115	break
116	except urllib2.URLError, e:
117	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
118	errmsg = str(e)
119	# stop trying
120	#break
121
122	if response is not None:
123	data = response.read()
124	response.close()
125	return data
126
127	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
128	#return None
129
130
131
132	##
133	## documentViewer class
134	##
135	class documentViewer(Folder):
136	"""document viewer"""
137	meta_type="Document viewer"
138
139	security=ClassSecurityInfo()
140	manage_options=Folder.manage_options+(
141	{'label':'main config','action':'changeDocumentViewerForm'},
142	)
143
144	# templates and forms
145	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
146	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
147	toc_text = PageTemplateFile('zpt/toc_text', globals())
148	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
149	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
150	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
151	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
152	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
153	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
154	head_main = PageTemplateFile('zpt/head_main', globals())
155	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
156	info_xml = PageTemplateFile('zpt/info_xml', globals())
157
158
159	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
160	security.declareProtected('View management screens','changeDocumentViewerForm')
161	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
162
163
164	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
165	"""init document viewer"""
166	self.id=id
167	self.title=title
168	self.thumbcols = thumbcols
169	self.thumbrows = thumbrows
170	# authgroups is list of authorized groups (delimited by ,)
171	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
172	# create template folder so we can always use template.something
173
174	templateFolder = Folder('template')
175	#self['template'] = templateFolder # Zope-2.12 style
176	self._setObject('template',templateFolder) # old style
177	try:
178	import MpdlXmlTextServer
179	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
180	#templateFolder['fulltextclient'] = xmlRpcClient
181	templateFolder._setObject('fulltextclient',textServer)
182	except Exception, e:
183	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
184	try:
185	from Products.zogiLib.zogiLib import zogiLib
186	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
187	#templateFolder['zogilib'] = zogilib
188	templateFolder._setObject('zogilib',zogilib)
189	except Exception, e:
190	logging.error("Unable to create zogiLib for zogilib: "+str(e))
191
192
193	# proxy text server methods to fulltextclient
194	def getTextPage(self, **args):
195	"""get page"""
196	return self.template.fulltextclient.getTextPage(**args)
197
198	def getQuery(self, **args):
199	"""get query"""
200	return self.template.fulltextclient.getQuery(**args)
201
202	def getPDF(self, **args):
203	"""get query"""
204	return self.template.fulltextclient.getPDF(**args)
205
206	def getSearch(self, **args):
207	"""get search"""
208	return self.template.fulltextclient.getSearch(**args)
209
210	def getGisPlaces(self, **args):
211	"""get gis places"""
212	return self.template.fulltextclient.getGisPlaces(**args)
213
214	def getAllGisPlaces(self, **args):
215	"""get all gis places """
216	return self.template.fulltextclient.getAllGisPlaces(**args)
217
218	def getOrigPages(self, **args):
219	"""get original page number """
220	return self.template.fulltextclient.getOrigPages(**args)
221
222	def getNumPages(self, docinfo):
223	"""get numpages"""
224	return self.template.fulltextclient.getNumPages(docinfo)
225
226	def getNumTextPages(self, docinfo):
227	"""get numpages text"""
228	return self.template.fulltextclient.getNumTextPages(docinfo)
229
230	def getTranslate(self, **args):
231	"""get translate"""
232	return self.template.fulltextclient.getTranslate(**args)
233
234	def getLemma(self, **args):
235	"""get lemma"""
236	return self.template.fulltextclient.getLemma(**args)
237
238	def getToc(self, **args):
239	"""get toc"""
240	return self.template.fulltextclient.getToc(**args)
241
242	def getTocPage(self, **args):
243	"""get tocpage"""
244	return self.template.fulltextclient.getTocPage(**args)
245
246
247	security.declareProtected('View','thumbs_rss')
248	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
249	'''
250	view it
251	@param mode: defines how to access the document behind url
252	@param url: url which contains display information
253	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
254
255	'''
256	logging.debug("HHHHHHHHHHHHHH:load the rss")
257	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
258
259	if not hasattr(self, 'template'):
260	# create template folder if it doesn't exist
261	self.manage_addFolder('template')
262
263	if not self.digilibBaseUrl:
264	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
265
266	docinfo = self.getDocinfo(mode=mode,url=url)
267	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
268	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
269	''' ZDES '''
270	pt = getattr(self.template, 'thumbs_main_rss')
271
272	if viewMode=="auto": # automodus gewaehlt
273	if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
274	viewMode="text"
275	else:
276	viewMode="images"
277
278	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
279
280	security.declareProtected('View','index_html')
281	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization=""):
282	'''
283	view it
284	@param mode: defines how to access the document behind url
285	@param url: url which contains display information
286	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
287	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
288	@param characterNormalization type of text display (reg, norm, none)
289	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
290	'''
291
292	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
293
294	if not hasattr(self, 'template'):
295	# this won't work
296	logging.error("template folder missing!")
297	return "ERROR: template folder missing!"
298
299	if not getattr(self, 'digilibBaseUrl', None):
300	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
301
302	docinfo = self.getDocinfo(mode=mode,url=url)
303
304	if tocMode != "thumbs":
305	# get table of contents
306	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
307
308	if viewMode=="auto": # automodus gewaehlt
309	if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
310	viewMode="text_dict"
311	else:
312	viewMode="images"
313
314	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
315
316	pt = getattr(self.template, 'viewer_main')
317	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
318
319	def generateMarks(self,mk):
320	ret=""
321	if mk is None:
322	return ""
323	if not isinstance(mk, list):
324	mk=[mk]
325	for m in mk:
326	ret+="mk=%s"%m
327	return ret
328
329
330	def getBrowser(self):
331	"""getBrowser the version of browser """
332	names=""
333	names = browserCheck(self)
334	#logging.debug("XXXXXXXXXXXXXXXX: %s"%names)
335	return names
336
337	def findDigilibUrl(self):
338	"""try to get the digilib URL from zogilib"""
339	url = self.template.zogilib.getDLBaseUrl()
340	return url
341
342	def getDocumentViewerURL(self):
343	"""returns the URL of this instance"""
344	return self.absolute_url()
345
346	def getStyle(self, idx, selected, style=""):
347	"""returns a string with the given style and append 'sel' if path == selected."""
348	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
349	if idx == selected:
350	return style + 'sel'
351	else:
352	return style
353
354	def getLink(self,param=None,val=None):
355	"""link to documentviewer with parameter param set to val"""
356	params=self.REQUEST.form.copy()
357	if param is not None:
358	if val is None:
359	if params.has_key(param):
360	del params[param]
361	else:
362	params[param] = str(val)
363
364	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
365	params["mode"] = "imagepath"
366	params["url"] = getParentDir(params["url"])
367
368	# quote values and assemble into query string
369	#ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
370	ps = urllib.urlencode(params)
371	url=self.REQUEST['URL1']+"?"+ps
372	return url
373
374	def getLinkAmp(self,param=None,val=None):
375	"""link to documentviewer with parameter param set to val"""
376	params=self.REQUEST.form.copy()
377	if param is not None:
378	if val is None:
379	if params.has_key(param):
380	del params[param]
381	else:
382	params[param] = str(val)
383
384	# quote values and assemble into query string
385	logging.debug("XYXXXXX: %s"%repr(params.items()))
386	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
387	url=self.REQUEST['URL1']+"?"+ps
388	return url
389
390	def getInfo_xml(self,url,mode):
391	"""returns info about the document as XML"""
392
393	if not self.digilibBaseUrl:
394	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
395
396	docinfo = self.getDocinfo(mode=mode,url=url)
397	pt = getattr(self.template, 'info_xml')
398	return pt(docinfo=docinfo)
399
400
401	def isAccessible(self, docinfo):
402	"""returns if access to the resource is granted"""
403	access = docinfo.get('accessType', None)
404	logging.debug("documentViewer (accessOK) access type %s"%access)
405	if access is not None and access == 'free':
406	logging.debug("documentViewer (accessOK) access is free")
407	return True
408	elif access is None or access in self.authgroups:
409	# only local access -- only logged in users
410	user = getSecurityManager().getUser()
411	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
412	if user is not None:
413	#print "user: ", user
414	return (user.getUserName() != "Anonymous User")
415	else:
416	return False
417
418	logging.error("documentViewer (accessOK) unknown access type %s"%access)
419	return False
420
421
422	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
423	"""gibt param von dlInfo aus"""
424	if docinfo is None:
425	docinfo = {}
426
427	for x in range(cut):
428
429	path=getParentDir(path)
430
431	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
432
433	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
434
435	txt = getHttpData(infoUrl)
436	if txt is None:
437	raise IOError("Unable to get dir-info from %s"%(infoUrl))
438
439	dom = Parse(txt)
440	sizes=dom.xpath("//dir/size")
441	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
442
443	if sizes:
444	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
445	else:
446	docinfo['numPages'] = 0
447
448	# TODO: produce and keep list of image names and numbers
449
450	return docinfo
451
452	def getIndexMetaPath(self,url):
453	"""gib nur den Pfad zurueck"""
454	regexp = re.compile(r".(experimental\|permanent)/(.)")
455	regpath = regexp.match(url)
456	if (regpath==None):
457	return ""
458	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
459	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
460
461
462
463	def getIndexMetaUrl(self,url):
464	"""returns utr of index.meta document at url"""
465
466	metaUrl = None
467	if url.startswith("http://"):
468	# real URL
469	metaUrl = url
470	else:
471	# online path
472	server=self.digilibBaseUrl+"/servlet/Texter?fn="
473	metaUrl=server+url.replace("/mpiwg/online","")
474	if not metaUrl.endswith("index.meta"):
475	metaUrl += "/index.meta"
476
477	return metaUrl
478
479	def getDomFromIndexMeta(self, url):
480	"""get dom from index meta"""
481	dom = None
482	metaUrl = self.getIndexMetaUrl(url)
483
484	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
485	txt=getHttpData(metaUrl)
486	if txt is None:
487	raise IOError("Unable to read index meta from %s"%(url))
488
489	dom = Parse(txt)
490	return dom
491
492	def getPresentationInfoXML(self, url):
493	"""returns dom of info.xml document at url"""
494	dom = None
495	metaUrl = None
496	if url.startswith("http://"):
497	# real URL
498	metaUrl = url
499	else:
500	# online path
501	server=self.digilibBaseUrl+"/servlet/Texter?fn="
502	metaUrl=server+url.replace("/mpiwg/online","")
503
504	txt=getHttpData(metaUrl)
505	if txt is None:
506	raise IOError("Unable to read infoXMLfrom %s"%(url))
507
508	dom = Parse(txt)
509	return dom
510
511
512	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
513	"""gets authorization info from the index.meta file at path or given by dom"""
514	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
515
516	access = None
517
518	if docinfo is None:
519	docinfo = {}
520
521	if dom is None:
522	for x in range(cut):
523	path=getParentDir(path)
524	dom = self.getDomFromIndexMeta(path)
525
526	acctype = dom.xpath("//access-conditions/access/@type")
527	if acctype and (len(acctype)>0):
528	access=acctype[0].value
529	if access in ['group', 'institution']:
530	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
531
532	docinfo['accessType'] = access
533	return docinfo
534
535
536	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
537	"""gets bibliographical info from the index.meta file at path or given by dom"""
538	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
539
540	if docinfo is None:
541	docinfo = {}
542
543	if dom is None:
544	for x in range(cut):
545	path=getParentDir(path)
546	dom = self.getDomFromIndexMeta(path)
547
548	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
549
550	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
551	# put in all raw bib fields as dict "bib"
552	bib = dom.xpath("//bib/*")
553	if bib and len(bib)>0:
554	bibinfo = {}
555	for e in bib:
556	bibinfo[e.localName] = getTextFromNode(e)
557	docinfo['bib'] = bibinfo
558
559	# extract some fields (author, title, year) according to their mapping
560	metaData=self.metadata.main.meta.bib
561	bibtype=dom.xpath("//bib/@type")
562	if bibtype and (len(bibtype)>0):
563	bibtype=bibtype[0].value
564	else:
565	bibtype="generic"
566
567	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
568	docinfo['bib_type'] = bibtype
569	bibmap=metaData.generateMappingForType(bibtype)
570	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
571	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
572	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
573	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
574	try:
575	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
576	except: pass
577	try:
578	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
579	except: pass
580	try:
581	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
582	except: pass
583	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
584	try:
585	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
586	except:
587	docinfo['lang']=''
588
589	return docinfo
590
591
592	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
593	"""gets name info from the index.meta file at path or given by dom"""
594	if docinfo is None:
595	docinfo = {}
596
597	if dom is None:
598	for x in range(cut):
599	path=getParentDir(path)
600	dom = self.getDomFromIndexMeta(path)
601
602	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
603	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
604	return docinfo
605
606	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
607	"""parse texttool tag in index meta"""
608	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
609	if docinfo is None:
610	docinfo = {}
611	if docinfo.get('lang', None) is None:
612	docinfo['lang'] = '' # default keine Sprache gesetzt
613	if dom is None:
614	dom = self.getDomFromIndexMeta(url)
615
616	archivePath = None
617	archiveName = None
618
619	archiveNames = dom.xpath("//resource/name")
620	if archiveNames and (len(archiveNames) > 0):
621	archiveName = getTextFromNode(archiveNames[0])
622	else:
623	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
624
625	archivePaths = dom.xpath("//resource/archive-path")
626	if archivePaths and (len(archivePaths) > 0):
627	archivePath = getTextFromNode(archivePaths[0])
628	# clean up archive path
629	if archivePath[0] != '/':
630	archivePath = '/' + archivePath
631	if archiveName and (not archivePath.endswith(archiveName)):
632	archivePath += "/" + archiveName
633	else:
634	# try to get archive-path from url
635	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
636	if (not url.startswith('http')):
637	archivePath = url.replace('index.meta', '')
638
639	if archivePath is None:
640	# we balk without archive-path
641	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
642
643	imageDirs = dom.xpath("//texttool/image")
644	if imageDirs and (len(imageDirs) > 0):
645	imageDir = getTextFromNode(imageDirs[0])
646
647	else:
648	# we balk with no image tag / not necessary anymore because textmode is now standard
649	#raise IOError("No text-tool info in %s"%(url))
650	imageDir = ""
651	#xquery="//pb"
652	docinfo['imagePath'] = "" # keine Bilder
653	docinfo['imageURL'] = ""
654
655	if imageDir and archivePath:
656	#print "image: ", imageDir, " archivepath: ", archivePath
657	imageDir = os.path.join(archivePath, imageDir)
658	imageDir = imageDir.replace("/mpiwg/online", '')
659	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
660	docinfo['imagePath'] = imageDir
661
662	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
663
664	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
665	if viewerUrls and (len(viewerUrls) > 0):
666	viewerUrl = getTextFromNode(viewerUrls[0])
667	docinfo['viewerURL'] = viewerUrl
668
669	# old style text URL
670	textUrls = dom.xpath("//texttool/text")
671	if textUrls and (len(textUrls) > 0):
672	textUrl = getTextFromNode(textUrls[0])
673	if urlparse.urlparse(textUrl)[0] == "": #keine url
674	textUrl = os.path.join(archivePath, textUrl)
675	# fix URLs starting with /mpiwg/online
676	if textUrl.startswith("/mpiwg/online"):
677	textUrl = textUrl.replace("/mpiwg/online", '', 1)
678
679	docinfo['textURL'] = textUrl
680
681	# new style text-url-path
682	textUrls = dom.xpath("//texttool/text-url-path")
683	if textUrls and (len(textUrls) > 0):
684	textUrl = getTextFromNode(textUrls[0])
685	docinfo['textURLPath'] = textUrl
686	if not docinfo['imagePath']:
687	# text-only, no page images
688	docinfo = self.getNumTextPages(docinfo)
689
690	presentationUrls = dom.xpath("//texttool/presentation")
691	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
692	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
693
694
695	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
696	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
697	# durch den relativen Pfad auf die presentation infos
698	presentationPath = getTextFromNode(presentationUrls[0])
699	if url.endswith("index.meta"):
700	presentationUrl = url.replace('index.meta', presentationPath)
701	else:
702	presentationUrl = url + "/" + presentationPath
703
704	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
705
706	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
707
708	return docinfo
709
710
711	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
712	"""gets the bibliographical information from the preseantion entry in texttools
713	"""
714	dom=self.getPresentationInfoXML(url)
715	try:
716	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
717	except:
718	pass
719	try:
720	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
721	except:
722	pass
723	try:
724	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
725	except:
726	pass
727	return docinfo
728
729	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
730	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
731	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
732	if docinfo is None:
733	docinfo = {}
734	path=path.replace("/mpiwg/online","")
735	docinfo['imagePath'] = path
736	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
737
738	pathorig=path
739	for x in range(cut):
740	path=getParentDir(path)
741	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
742	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
743	docinfo['imageURL'] = imageUrl
744
745	#path ist the path to the images it assumes that the index.meta file is one level higher.
746	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
747	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
748	return docinfo
749
750
751	def getDocinfo(self, mode, url):
752	"""returns docinfo depending on mode"""
753	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
754	# look for cached docinfo in session
755	if self.REQUEST.SESSION.has_key('docinfo'):
756	docinfo = self.REQUEST.SESSION['docinfo']
757	# check if its still current
758	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
759	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
760	return docinfo
761	# new docinfo
762	docinfo = {'mode': mode, 'url': url}
763	if mode=="texttool": #index.meta with texttool information
764	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
765	elif mode=="imagepath":
766	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
767	elif mode=="filepath":
768	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
769	else:
770	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
771	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
772
773	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
774	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
775	self.REQUEST.SESSION['docinfo'] = docinfo
776	return docinfo
777
778	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization=""):
779	"""returns pageinfo with the given parameters"""
780	pageinfo = {}
781	current = getInt(current)
782
783	pageinfo['current'] = current
784	rows = int(rows or self.thumbrows)
785	pageinfo['rows'] = rows
786	cols = int(cols or self.thumbcols)
787	pageinfo['cols'] = cols
788	grpsize = cols * rows
789	pageinfo['groupsize'] = grpsize
790	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
791	# int(current / grpsize) * grpsize +1))
792	pageinfo['start'] = start
793	pageinfo['end'] = start + grpsize
794	if (docinfo is not None) and ('numPages' in docinfo):
795	np = int(docinfo['numPages'])
796	pageinfo['end'] = min(pageinfo['end'], np)
797	pageinfo['numgroups'] = int(np / grpsize)
798	if np % grpsize > 0:
799	pageinfo['numgroups'] += 1
800	pageinfo['viewMode'] = viewMode
801	pageinfo['tocMode'] = tocMode
802	#pageinfo ['originalPage'] = self.getOrigPages(docinfo=None, pageinfo=None)
803	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','')
804	pageinfo['query'] = self.REQUEST.get('query','')
805	pageinfo['optionsClose']= self.REQUEST.get('optionsClose','')
806	pageinfo['queryType'] = self.REQUEST.get('queryType','')
807	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
808	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
809	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
810	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
811	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
812	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
813	toc = int (pageinfo['tocPN'])
814	pageinfo['textPages'] =int (toc)
815
816
817
818	if 'tocSize_%s'%tocMode in docinfo:
819	tocSize = int(docinfo['tocSize_%s'%tocMode])
820	tocPageSize = int(pageinfo['tocPageSize'])
821	# cached toc
822	if tocSize%tocPageSize>0:
823	tocPages=tocSize/tocPageSize+1
824	else:
825	tocPages=tocSize/tocPageSize
826	pageinfo['tocPN'] = min (tocPages,toc)
827	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
828	pageinfo['sn'] =self.REQUEST.get('sn','')
829	return pageinfo
830
831	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
832	"""init document viewer"""
833	self.title=title
834	self.digilibBaseUrl = digilibBaseUrl
835	self.thumbrows = thumbrows
836	self.thumbcols = thumbcols
837	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
838	if RESPONSE is not None:
839	RESPONSE.redirect('manage_main')
840
841	def manage_AddDocumentViewerForm(self):
842	"""add the viewer form"""
843	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
844	return pt()
845
846	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
847	"""add the viewer"""
848	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
849	self._setObject(id,newObj)
850
851	if RESPONSE is not None:
852	RESPONSE.redirect('manage_main')
853
854	## DocumentViewerTemplate class
855	class DocumentViewerTemplate(ZopePageTemplate):
856	"""Template for document viewer"""
857	meta_type="DocumentViewer Template"
858
859
860	def manage_addDocumentViewerTemplateForm(self):
861	"""Form for adding"""
862	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
863	return pt()
864
865	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
866	REQUEST=None, submit=None):
867	"Add a Page Template with optional file content."
868
869	self._setObject(id, DocumentViewerTemplate(id))
870	ob = getattr(self, id)
871	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
872	logging.info("txt %s:"%txt)
873	ob.pt_edit(txt,"text/html")
874	if title:
875	ob.pt_setTitle(title)
876	try:
877	u = self.DestinationURL()
878	except AttributeError:
879	u = REQUEST['URL1']
880
881	u = "%s/%s" % (u, urllib.quote(id))
882	REQUEST.RESPONSE.redirect(u+'/manage_main')
883	return ''
884
885
886

Note: See TracBrowser for help on using the repository browser.

Download in other formats: