Context Navigation

source: documentViewer/documentViewer.py @ 455:0a53fea83df7

elementtree

Last change on this file since 455:0a53fea83df7 was 455:0a53fea83df7, checked in by casties, 13 years ago
more work renovating
File size: 39.1 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	#from Ft.Xml import EMPTY_NAMESPACE, Parse
11	#import Ft.Xml.Domlette
12
13	import xml.etree.ElementTree as ET
14
15	import os.path
16	import sys
17	import urllib
18	import urllib2
19	import logging
20	import math
21	import urlparse
22	import re
23	import string
24
25	def logger(txt,method,txt2):
26	"""logging"""
27	logging.info(txt+ txt2)
28
29
30	def getInt(number, default=0):
31	"""returns always an int (0 in case of problems)"""
32	try:
33	return int(number)
34	except:
35	return int(default)
36
37	def getText(node):
38	"""get the cdata content of a node"""
39	if node is None:
40	return ""
41	# ET:
42	text = node.text or ""
43	for e in node:
44	text += gettext(e)
45	if e.tail:
46	text += e.tail
47
48	# 4Suite:
49	#nodelist=node.childNodes
50	#text = ""
51	#for n in nodelist:
52	# if n.nodeType == node.TEXT_NODE:
53	# text = text + n.data
54
55	return text
56
57	getTextFromNode = getText
58
59	def serializeNode(node, encoding="utf-8"):
60	"""returns a string containing node as XML"""
61	s = ET.tostring(node)
62
63	# 4Suite:
64	# stream = cStringIO.StringIO()
65	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
66	# s = stream.getvalue()
67	# stream.close()
68	return s
69
70	def browserCheck(self):
71	"""check the browsers request to find out the browser type"""
72	bt = {}
73	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
74	bt['ua'] = ua
75	bt['isIE'] = False
76	bt['isN4'] = False
77	bt['versFirefox']=""
78	bt['versIE']=""
79	bt['versSafariChrome']=""
80	bt['versOpera']=""
81
82	if string.find(ua, 'MSIE') > -1:
83	bt['isIE'] = True
84	else:
85	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
86	# Safari oder Chrome identification
87	try:
88	nav = ua[string.find(ua, '('):]
89	nav1=ua[string.find(ua,')'):]
90	nav2=nav1[string.find(nav1,'('):]
91	nav3=nav2[string.find(nav2,')'):]
92	ie = string.split(nav, "; ")[1]
93	ie1 =string.split(nav1, " ")[2]
94	ie2 =string.split(nav3, " ")[1]
95	ie3 =string.split(nav3, " ")[2]
96	if string.find(ie3, "Safari") >-1:
97	bt['versSafariChrome']=string.split(ie2, "/")[1]
98	except: pass
99	# IE identification
100	try:
101	nav = ua[string.find(ua, '('):]
102	ie = string.split(nav, "; ")[1]
103	if string.find(ie, "MSIE") > -1:
104	bt['versIE'] = string.split(ie, " ")[1]
105	except:pass
106	# Firefox identification
107	try:
108	nav = ua[string.find(ua, '('):]
109	nav1=ua[string.find(ua,')'):]
110	if string.find(ie1, "Firefox") >-1:
111	nav5= string.split(ie1, "/")[1]
112	logging.debug("FIREFOX: %s"%(nav5))
113	bt['versFirefox']=nav5[0:3]
114	except:pass
115	#Opera identification
116	try:
117	if string.find(ua,"Opera") >-1:
118	nav = ua[string.find(ua, '('):]
119	nav1=nav[string.find(nav,')'):]
120	bt['versOpera']=string.split(nav1,"/")[2]
121	except:pass
122
123	bt['isMac'] = string.find(ua, 'Macintosh') > -1
124	bt['isWin'] = string.find(ua, 'Windows') > -1
125	bt['isIEWin'] = bt['isIE'] and bt['isWin']
126	bt['isIEMac'] = bt['isIE'] and bt['isMac']
127	bt['staticHTML'] = False
128
129	return bt
130
131
132	def getParentDir(path):
133	"""returns pathname shortened by one"""
134	return '/'.join(path.split('/')[0:-1])
135
136
137	def getHttpData(url, data=None, num_tries=3, timeout=10):
138	"""returns result from url+data HTTP request"""
139	# we do GET (by appending data to url)
140	if isinstance(data, str) or isinstance(data, unicode):
141	# if data is string then append
142	url = "%s?%s"%(url,data)
143	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
144	# urlencode
145	url = "%s?%s"%(url,urllib.urlencode(data))
146
147	response = None
148	errmsg = None
149	for cnt in range(num_tries):
150	try:
151	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
152	if sys.version_info < (2, 6):
153	# set timeout on socket -- ugly :-(
154	import socket
155	socket.setdefaulttimeout(float(timeout))
156	response = urllib2.urlopen(url)
157	else:
158	response = urllib2.urlopen(url,timeout=float(timeout))
159	# check result?
160	break
161	except urllib2.HTTPError, e:
162	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
163	errmsg = str(e)
164	# stop trying
165	break
166	except urllib2.URLError, e:
167	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
168	errmsg = str(e)
169	# stop trying
170	#break
171
172	if response is not None:
173	data = response.read()
174	response.close()
175	return data
176
177	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
178	#return None
179
180	##
181	## documentViewer class
182	##
183	class documentViewer(Folder):
184	"""document viewer"""
185	meta_type="Document viewer"
186
187	security=ClassSecurityInfo()
188	manage_options=Folder.manage_options+(
189	{'label':'main config','action':'changeDocumentViewerForm'},
190	)
191
192	# templates and forms
193	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
194	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
195	toc_text = PageTemplateFile('zpt/toc_text', globals())
196	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
197	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
198	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
199	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
200	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
201	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
202	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
203	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
204	head_main = PageTemplateFile('zpt/head_main', globals())
205	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
206	info_xml = PageTemplateFile('zpt/info_xml', globals())
207
208
209	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
210	security.declareProtected('View management screens','changeDocumentViewerForm')
211	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
212
213
214	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
215	"""init document viewer"""
216	self.id=id
217	self.title=title
218	self.thumbcols = thumbcols
219	self.thumbrows = thumbrows
220	# authgroups is list of authorized groups (delimited by ,)
221	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
222	# create template folder so we can always use template.something
223
224	templateFolder = Folder('template')
225	#self['template'] = templateFolder # Zope-2.12 style
226	self._setObject('template',templateFolder) # old style
227	try:
228	import MpdlXmlTextServer
229	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
230	#templateFolder['fulltextclient'] = xmlRpcClient
231	templateFolder._setObject('fulltextclient',textServer)
232	except Exception, e:
233	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
234	try:
235	from Products.zogiLib.zogiLib import zogiLib
236	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
237	#templateFolder['zogilib'] = zogilib
238	templateFolder._setObject('zogilib',zogilib)
239	except Exception, e:
240	logging.error("Unable to create zogiLib for zogilib: "+str(e))
241
242
243	# proxy text server methods to fulltextclient
244	def getTextPage(self, **args):
245	"""get page"""
246	return self.template.fulltextclient.getTextPage(**args)
247
248	def getOrigPages(self, **args):
249	"""get page"""
250	return self.template.fulltextclient.getOrigPages(**args)
251
252	def getOrigPagesNorm(self, **args):
253	"""get page"""
254	return self.template.fulltextclient.getOrigPagesNorm(**args)
255
256	def getQuery(self, **args):
257	"""get query in search"""
258	return self.template.fulltextclient.getQuery(**args)
259
260	def getSearch(self, **args):
261	"""get search"""
262	return self.template.fulltextclient.getSearch(**args)
263
264	def getGisPlaces(self, **args):
265	"""get gis places"""
266	return self.template.fulltextclient.getGisPlaces(**args)
267
268	def getAllGisPlaces(self, **args):
269	"""get all gis places """
270	return self.template.fulltextclient.getAllGisPlaces(**args)
271
272	def getTranslate(self, **args):
273	"""get translate"""
274	return self.template.fulltextclient.getTranslate(**args)
275
276	def getLemma(self, **args):
277	"""get lemma"""
278	return self.template.fulltextclient.getLemma(**args)
279
280	def getLemmaQuery(self, **args):
281	"""get query"""
282	return self.template.fulltextclient.getLemmaQuery(**args)
283
284	def getLex(self, **args):
285	"""get lex"""
286	return self.template.fulltextclient.getLex(**args)
287
288	def getToc(self, **args):
289	"""get toc"""
290	return self.template.fulltextclient.getToc(**args)
291
292	def getTocPage(self, **args):
293	"""get tocpage"""
294	return self.template.fulltextclient.getTocPage(**args)
295
296
297	security.declareProtected('View','thumbs_rss')
298	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
299	'''
300	view it
301	@param mode: defines how to access the document behind url
302	@param url: url which contains display information
303	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
304
305	'''
306	logging.debug("HHHHHHHHHHHHHH:load the rss")
307	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
308
309	if not hasattr(self, 'template'):
310	# create template folder if it doesn't exist
311	self.manage_addFolder('template')
312
313	if not self.digilibBaseUrl:
314	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
315
316	docinfo = self.getDocinfo(mode=mode,url=url)
317	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
318	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
319	''' ZDES '''
320	pt = getattr(self.template, 'thumbs_main_rss')
321
322	if viewMode=="auto": # automodus gewaehlt
323	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
324	viewMode="text"
325	else:
326	viewMode="images"
327
328	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
329
330	security.declareProtected('View','index_html')
331	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
332	'''
333	view it
334	@param mode: defines how to access the document behind url
335	@param url: url which contains display information
336	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
337	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
338	@param characterNormalization type of text display (reg, norm, none)
339	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
340	'''
341
342	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
343
344	if not hasattr(self, 'template'):
345	# this won't work
346	logging.error("template folder missing!")
347	return "ERROR: template folder missing!"
348
349	if not getattr(self, 'digilibBaseUrl', None):
350	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
351
352	docinfo = self.getDocinfo(mode=mode,url=url)
353
354	if tocMode != "thumbs":
355	# get table of contents
356	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
357
358	# auto viewMode: text_dict if text else images
359	if viewMode=="auto":
360	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
361	#texturl gesetzt und textViewer konfiguriert
362	viewMode="text_dict"
363	else:
364	viewMode="images"
365
366	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, tocMode=tocMode)
367
368	if viewMode != 'images' and docinfo.get('textURLPath', None):
369	# get full text page
370	page = self.getTextPage(mode=viewMode, pn=pn, docinfo=docinfo, pageinfo=pageinfo)
371	pageinfo['textPage'] = page
372
373	# get template /template/viewer_main
374	pt = getattr(self.template, 'viewer_main')
375	# and execute with parameters
376	return pt(docinfo=docinfo, pageinfo=pageinfo, viewMode=viewMode, mk=self.generateMarks(mk))
377
378	def generateMarks(self,mk):
379	ret=""
380	if mk is None:
381	return ""
382	if not isinstance(mk, list):
383	mk=[mk]
384	for m in mk:
385	ret+="mk=%s"%m
386	return ret
387
388
389	def getBrowser(self):
390	"""getBrowser the version of browser """
391	bt = browserCheck(self)
392	logging.debug("BROWSER VERSION: %s"%(bt))
393	return bt
394
395	def findDigilibUrl(self):
396	"""try to get the digilib URL from zogilib"""
397	url = self.template.zogilib.getDLBaseUrl()
398	return url
399
400	def getDocumentViewerURL(self):
401	"""returns the URL of this instance"""
402	return self.absolute_url()
403
404	def getStyle(self, idx, selected, style=""):
405	"""returns a string with the given style and append 'sel' if path == selected."""
406	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
407	if idx == selected:
408	return style + 'sel'
409	else:
410	return style
411
412	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
413	"""returns URL to documentviewer with parameter param set to val or from dict params"""
414	# copy existing request params
415	urlParams=self.REQUEST.form.copy()
416	# change single param
417	if param is not None:
418	if val is None:
419	if urlParams.has_key(param):
420	del urlParams[param]
421	else:
422	urlParams[param] = str(val)
423
424	# change more params
425	if params is not None:
426	for k in params.keys():
427	v = params[k]
428	if v is None:
429	# val=None removes param
430	if urlParams.has_key(k):
431	del urlParams[k]
432
433	else:
434	urlParams[k] = v
435
436	# FIXME: does this belong here?
437	if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
438	urlParams["mode"] = "imagepath"
439	urlParams["url"] = getParentDir(urlParams["url"])
440
441	# quote values and assemble into query string (not escaping '/')
442	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
443	#ps = urllib.urlencode(urlParams)
444	if baseUrl is None:
445	baseUrl = self.REQUEST['URL1']
446
447	url = "%s?%s"%(baseUrl, ps)
448	return url
449
450
451	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
452	"""link to documentviewer with parameter param set to val"""
453	return self.getLink(param, val, params, baseUrl, '&')
454
455	def getInfo_xml(self,url,mode):
456	"""returns info about the document as XML"""
457
458	if not self.digilibBaseUrl:
459	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
460
461	docinfo = self.getDocinfo(mode=mode,url=url)
462	pt = getattr(self.template, 'info_xml')
463	return pt(docinfo=docinfo)
464
465	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
466	"""returns new option state"""
467	if not self.REQUEST.SESSION.has_key(optionName):
468	# not in session -- initial
469	opt = {'lastState': newState, 'state': initialState}
470	else:
471	opt = self.REQUEST.SESSION.get(optionName)
472	if opt['lastState'] != newState:
473	# state in session has changed -- toggle
474	opt['state'] = not opt['state']
475	opt['lastState'] = newState
476
477	self.REQUEST.SESSION[optionName] = opt
478	return opt['state']
479
480	def isAccessible(self, docinfo):
481	"""returns if access to the resource is granted"""
482	access = docinfo.get('accessType', None)
483	logging.debug("documentViewer (accessOK) access type %s"%access)
484	if access is not None and access == 'free':
485	logging.debug("documentViewer (accessOK) access is free")
486	return True
487	elif access is None or access in self.authgroups:
488	# only local access -- only logged in users
489	user = getSecurityManager().getUser()
490	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
491	if user is not None:
492	#print "user: ", user
493	return (user.getUserName() != "Anonymous User")
494	else:
495	return False
496
497	logging.error("documentViewer (accessOK) unknown access type %s"%access)
498	return False
499
500
501	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
502	"""gibt param von dlInfo aus"""
503	if docinfo is None:
504	docinfo = {}
505
506	for x in range(cut):
507	path=getParentDir(path)
508
509	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
510
511	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
512
513	txt = getHttpData(infoUrl)
514	if txt is None:
515	raise IOError("Unable to get dir-info from %s"%(infoUrl))
516
517	dom = ET.fromstring(txt)
518	#dom = Parse(txt)
519	size=getText(dom.find("size"))
520	#sizes=dom.xpath("//dir/size")
521	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)
522
523	if size:
524	docinfo['numPages'] = int(size)
525	else:
526	docinfo['numPages'] = 0
527
528	# TODO: produce and keep list of image names and numbers
529
530	return docinfo
531
532	def getIndexMetaPath(self,url):
533	"""gib nur den Pfad zurueck"""
534	regexp = re.compile(r".(experimental\|permanent)/(.)")
535	regpath = regexp.match(url)
536	if (regpath==None):
537	return ""
538	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
539	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
540
541
542
543	def getIndexMetaUrl(self,url):
544	"""returns utr of index.meta document at url"""
545
546	metaUrl = None
547	if url.startswith("http://"):
548	# real URL
549	metaUrl = url
550	else:
551	# online path
552	server=self.digilibBaseUrl+"/servlet/Texter?fn="
553	metaUrl=server+url.replace("/mpiwg/online","")
554	if not metaUrl.endswith("index.meta"):
555	metaUrl += "/index.meta"
556
557	return metaUrl
558
559	def getDomFromIndexMeta(self, url):
560	"""get dom from index meta"""
561	dom = None
562	metaUrl = self.getIndexMetaUrl(url)
563
564	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
565	txt=getHttpData(metaUrl)
566	if txt is None:
567	raise IOError("Unable to read index meta from %s"%(url))
568
569	dom = ET.fromstring(txt)
570	#dom = Parse(txt)
571	return dom
572
573	def getPresentationInfoXML(self, url):
574	"""returns dom of info.xml document at url"""
575	dom = None
576	metaUrl = None
577	if url.startswith("http://"):
578	# real URL
579	metaUrl = url
580	else:
581	# online path
582	server=self.digilibBaseUrl+"/servlet/Texter?fn="
583	metaUrl=server+url.replace("/mpiwg/online","")
584
585	txt=getHttpData(metaUrl)
586	if txt is None:
587	raise IOError("Unable to read infoXMLfrom %s"%(url))
588
589	dom = ET.fromstring(txt)
590	#dom = Parse(txt)
591	return dom
592
593
594	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
595	"""gets authorization info from the index.meta file at path or given by dom"""
596	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
597
598	access = None
599
600	if docinfo is None:
601	docinfo = {}
602
603	if dom is None:
604	for x in range(cut):
605	path=getParentDir(path)
606	dom = self.getDomFromIndexMeta(path)
607
608	acc = dom.find(".//access-conditions/access")
609	if acc is not None:
610	acctype = acc.get('type')
611	#acctype = dom.xpath("//access-conditions/access/@type")
612	if acctype:
613	access=acctype
614	if access in ['group', 'institution']:
615	access = dom.find(".//access-conditions/access/name").text.lower()
616
617	docinfo['accessType'] = access
618	return docinfo
619
620
621	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
622	"""gets bibliographical info from the index.meta file at path or given by dom"""
623	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
624
625	if docinfo is None:
626	docinfo = {}
627
628	if dom is None:
629	for x in range(cut):
630	path=getParentDir(path)
631	dom = self.getDomFromIndexMeta(path)
632
633	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
634
635	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
636	# put in all raw bib fields as dict "bib"
637	bib = dom.find(".//bib")
638	#bib = dom.xpath("//bib/*")
639	if bib is not None:
640	bibinfo = {}
641	for e in bib:
642	bibinfo[e.tag] = getText(e)
643
644	docinfo['bib'] = bibinfo
645
646	# extract some fields (author, title, year) according to their mapping
647	metaData=self.metadata.main.meta.bib
648	bibtype=bib.get("type")
649	#bibtype=dom.xpath("//bib/@type")
650	if not bibtype:
651	bibtype="generic"
652
653	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
654	docinfo['bib_type'] = bibtype
655	bibmap=metaData.generateMappingForType(bibtype)
656	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
657	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
658	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
659	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0 or len(bibmap['title'][0]) > 0:
660	try:
661	docinfo['author']=getText(bib.find(bibmap['author'][0]))
662	except: pass
663	try:
664	docinfo['title']=getText(bib.find(bibmap['title'][0]))
665	except: pass
666	try:
667	docinfo['year']=getText(bib.find(bibmap['year'][0]))
668	except: pass
669
670	# ROC: why is this here?
671	# logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
672	# try:
673	# docinfo['lang']=getTextFromNode(dom.find(".//bib/lang")[0])
674	# except:
675	# docinfo['lang']=''
676	# try:
677	# docinfo['city']=getTextFromNode(dom.find(".//bib/city")[0])
678	# except:
679	# docinfo['city']=''
680	# try:
681	# docinfo['number_of_pages']=getTextFromNode(dom.find(".//bib/number_of_pages")[0])
682	# except:
683	# docinfo['number_of_pages']=''
684	# try:
685	# docinfo['series_volume']=getTextFromNode(dom.find(".//bib/series_volume")[0])
686	# except:
687	# docinfo['series_volume']=''
688	# try:
689	# docinfo['number_of_volumes']=getTextFromNode(dom.find(".//bib/number_of_volumes")[0])
690	# except:
691	# docinfo['number_of_volumes']=''
692	# try:
693	# docinfo['translator']=getTextFromNode(dom.find(".//bib/translator")[0])
694	# except:
695	# docinfo['translator']=''
696	# try:
697	# docinfo['edition']=getTextFromNode(dom.find(".//bib/edition")[0])
698	# except:
699	# docinfo['edition']=''
700	# try:
701	# docinfo['series_author']=getTextFromNode(dom.find(".//bib/series_author")[0])
702	# except:
703	# docinfo['series_author']=''
704	# try:
705	# docinfo['publisher']=getTextFromNode(dom.find(".//bib/publisher")[0])
706	# except:
707	# docinfo['publisher']=''
708	# try:
709	# docinfo['series_title']=getTextFromNode(dom.find(".//bib/series_title")[0])
710	# except:
711	# docinfo['series_title']=''
712	# try:
713	# docinfo['isbn_issn']=getTextFromNode(dom.find(".//bib/isbn_issn")[0])
714	# except:
715	# docinfo['isbn_issn']=''
716	return docinfo
717
718
719	# TODO: is this needed?
720	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
721	"""gets name info from the index.meta file at path or given by dom"""
722	if docinfo is None:
723	docinfo = {}
724
725	if dom is None:
726	for x in range(cut):
727	path=getParentDir(path)
728	dom = self.getDomFromIndexMeta(path)
729
730	docinfo['name']=getText(dom.find("name"))
731	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
732	return docinfo
733
734	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
735	"""parse texttool tag in index meta"""
736	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
737	if docinfo is None:
738	docinfo = {}
739	if docinfo.get('lang', None) is None:
740	docinfo['lang'] = '' # default keine Sprache gesetzt
741	if dom is None:
742	dom = self.getDomFromIndexMeta(url)
743
744	archivePath = None
745	archiveName = None
746
747	archiveName = getText(dom.find("name"))
748	if not archiveName:
749	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
750
751	archivePath = getText(dom.find("archive-path"))
752	if archivePath:
753	# clean up archive path
754	if archivePath[0] != '/':
755	archivePath = '/' + archivePath
756	if archiveName and (not archivePath.endswith(archiveName)):
757	archivePath += "/" + archiveName
758	else:
759	# try to get archive-path from url
760	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
761	if (not url.startswith('http')):
762	archivePath = url.replace('index.meta', '')
763
764	if archivePath is None:
765	# we balk without archive-path
766	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
767
768	imageDir = getText(dom.find(".//texttool/image"))
769
770	if not imageDir:
771	# we balk with no image tag / not necessary anymore because textmode is now standard
772	#raise IOError("No text-tool info in %s"%(url))
773	imageDir = ""
774	#xquery="//pb"
775	docinfo['imagePath'] = "" # keine Bilder
776	docinfo['imageURL'] = ""
777
778	if imageDir and archivePath:
779	#print "image: ", imageDir, " archivepath: ", archivePath
780	imageDir = os.path.join(archivePath, imageDir)
781	imageDir = imageDir.replace("/mpiwg/online", '')
782	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
783	docinfo['imagePath'] = imageDir
784
785	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
786
787	viewerUrl = getText(dom.find(".//texttool/digiliburlprefix"))
788	if viewerUrl:
789	docinfo['viewerURL'] = viewerUrl
790
791	# old style text URL
792	textUrl = getText(dom.find(".//texttool/text"))
793	if textUrl:
794	if urlparse.urlparse(textUrl)[0] == "": #keine url
795	textUrl = os.path.join(archivePath, textUrl)
796	# fix URLs starting with /mpiwg/online
797	if textUrl.startswith("/mpiwg/online"):
798	textUrl = textUrl.replace("/mpiwg/online", '', 1)
799
800	docinfo['textURL'] = textUrl
801
802	# new style text-url-path
803	textUrl = getText(dom.find(".//texttool/text-url-path"))
804	if textUrl:
805	docinfo['textURLPath'] = textUrl
806	textUrlkurz = string.split(textUrl, ".")[0]
807	docinfo['textURLPathkurz'] = textUrlkurz
808	#if not docinfo['imagePath']:
809	# text-only, no page images
810	#docinfo = self.getNumTextPages(docinfo)
811
812
813	presentationUrl = getText(dom.find(".//texttool/presentation"))
814	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
815	# TODO: is this needed here?
816	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
817
818
819	if presentationUrl: # ueberschreibe diese durch presentation informationen
820	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
821	# durch den relativen Pfad auf die presentation infos
822	presentationPath = presentationUrl
823	if url.endswith("index.meta"):
824	presentationUrl = url.replace('index.meta', presentationPath)
825	else:
826	presentationUrl = url + "/" + presentationPath
827
828	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
829
830	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
831
832	return docinfo
833
834
835	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
836	"""gets the bibliographical information from the preseantion entry in texttools
837	"""
838	dom=self.getPresentationInfoXML(url)
839	docinfo['author']=getText(dom.find(".//author"))
840	docinfo['title']=getText(dom.find(".//title"))
841	docinfo['year']=getText(dom.find(".//date"))
842	return docinfo
843
844	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
845	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
846	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
847	if docinfo is None:
848	docinfo = {}
849	path=path.replace("/mpiwg/online","")
850	docinfo['imagePath'] = path
851	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
852
853	pathorig=path
854	for x in range(cut):
855	path=getParentDir(path)
856	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
857	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
858	docinfo['imageURL'] = imageUrl
859
860	#path ist the path to the images it assumes that the index.meta file is one level higher.
861	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
862	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
863	return docinfo
864
865
866	def getDocinfo(self, mode, url):
867	"""returns docinfo depending on mode"""
868	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
869	# look for cached docinfo in session
870	if self.REQUEST.SESSION.has_key('docinfo'):
871	docinfo = self.REQUEST.SESSION['docinfo']
872	# check if its still current
873	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
874	logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())
875	return docinfo
876
877	# new docinfo
878	docinfo = {'mode': mode, 'url': url}
879	# add self url
880	docinfo['viewerUrl'] = self.getDocumentViewerURL()
881	if mode=="texttool":
882	# index.meta with texttool information
883	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
884	elif mode=="imagepath":
885	# folder with images, index.meta optional
886	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
887	elif mode=="filepath":
888	# filename
889	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
890	else:
891	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
892	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
893
894	# FIXME: fake texturlpath
895	if not docinfo.has_key('textURLPath'):
896	docinfo['textURLPath'] = None
897
898	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
899	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
900	self.REQUEST.SESSION['docinfo'] = docinfo
901	return docinfo
902
903	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
904	"""returns pageinfo with the given parameters"""
905	pageinfo = {}
906	current = getInt(current)
907
908	pageinfo['current'] = current
909	rows = int(rows or self.thumbrows)
910	pageinfo['rows'] = rows
911	cols = int(cols or self.thumbcols)
912	pageinfo['cols'] = cols
913	grpsize = cols * rows
914	pageinfo['groupsize'] = grpsize
915	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
916	# int(current / grpsize) * grpsize +1))
917	pageinfo['start'] = start
918	pageinfo['end'] = start + grpsize
919	if (docinfo is not None) and ('numPages' in docinfo):
920	np = int(docinfo['numPages'])
921	pageinfo['end'] = min(pageinfo['end'], np)
922	pageinfo['numgroups'] = int(np / grpsize)
923	if np % grpsize > 0:
924	pageinfo['numgroups'] += 1
925	pageinfo['viewMode'] = viewMode
926	pageinfo['tocMode'] = tocMode
927	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
928	#pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1')
929	pageinfo['query'] = self.REQUEST.get('query','')
930	pageinfo['queryType'] = self.REQUEST.get('queryType','')
931	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
932	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
933	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
934	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
935	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
936	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
937	toc = int (pageinfo['tocPN'])
938	pageinfo['textPages'] =int (toc)
939
940	if 'tocSize_%s'%tocMode in docinfo:
941	tocSize = int(docinfo['tocSize_%s'%tocMode])
942	tocPageSize = int(pageinfo['tocPageSize'])
943	# cached toc
944	if tocSize%tocPageSize>0:
945	tocPages=tocSize/tocPageSize+1
946	else:
947	tocPages=tocSize/tocPageSize
948	pageinfo['tocPN'] = min (tocPages,toc)
949	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
950	pageinfo['sn'] =self.REQUEST.get('sn','')
951	return pageinfo
952
953	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
954	"""init document viewer"""
955	self.title=title
956	self.digilibBaseUrl = digilibBaseUrl
957	self.thumbrows = thumbrows
958	self.thumbcols = thumbcols
959	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
960	if RESPONSE is not None:
961	RESPONSE.redirect('manage_main')
962
963	def manage_AddDocumentViewerForm(self):
964	"""add the viewer form"""
965	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
966	return pt()
967
968	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
969	"""add the viewer"""
970	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
971	self._setObject(id,newObj)
972
973	if RESPONSE is not None:
974	RESPONSE.redirect('manage_main')
975
976	## DocumentViewerTemplate class
977	class DocumentViewerTemplate(ZopePageTemplate):
978	"""Template for document viewer"""
979	meta_type="DocumentViewer Template"
980
981
982	def manage_addDocumentViewerTemplateForm(self):
983	"""Form for adding"""
984	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
985	return pt()
986
987	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
988	REQUEST=None, submit=None):
989	"Add a Page Template with optional file content."
990
991	self._setObject(id, DocumentViewerTemplate(id))
992	ob = getattr(self, id)
993	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
994	logging.info("txt %s:"%txt)
995	ob.pt_edit(txt,"text/html")
996	if title:
997	ob.pt_setTitle(title)
998	try:
999	u = self.DestinationURL()
1000	except AttributeError:
1001	u = REQUEST['URL1']
1002
1003	u = "%s/%s" % (u, urllib.quote(id))
1004	REQUEST.RESPONSE.redirect(u+'/manage_main')
1005	return ''
1006
1007
1008

Note: See TracBrowser for help on using the repository browser.

Download in other formats: