Context Navigation

source: documentViewer/documentViewer.py @ 407:1cea48640992

Last change on this file since 407:1cea48640992 was 405:b8fb4c750d74, checked in by casties, 14 years ago
fix for characternormalization default
File size: 35.2 KB

Line
1
2	from OFS.Folder import Folder
3	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8	from Products.zogiLib.zogiLib import browserCheck
9
10	from Ft.Xml import EMPTY_NAMESPACE, Parse
11	import Ft.Xml.Domlette
12	import os.path
13	import sys
14	import urllib
15	import urllib2
16	import logging
17	import math
18	import urlparse
19	import cStringIO
20	import re
21	import string
22
23	def logger(txt,method,txt2):
24	"""logging"""
25	logging.info(txt+ txt2)
26
27
28	def getInt(number, default=0):
29	"""returns always an int (0 in case of problems)"""
30	try:
31	return int(number)
32	except:
33	return int(default)
34
35	def getTextFromNode(nodename):
36	"""get the cdata content of a node"""
37	if nodename is None:
38	return ""
39	nodelist=nodename.childNodes
40	rc = ""
41	for node in nodelist:
42	if node.nodeType == node.TEXT_NODE:
43	rc = rc + node.data
44	return rc
45
46	def serializeNode(node, encoding='utf-8'):
47	"""returns a string containing node as XML"""
48	buf = cStringIO.StringIO()
49	Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
50	s = buf.getvalue()
51	buf.close()
52	return s
53
54	def browserCheck(self):
55	"""check the browsers request to find out the browser type"""
56	bt = {}
57	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
58	bt['ua'] = ua
59	bt['isIE'] = False
60	bt['isN4'] = False
61	if string.find(ua, 'MSIE') > -1:
62	bt['isIE'] = True
63	else:
64	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
65
66	try:
67	nav = ua[string.find(ua, '('):]
68	ie = string.split(nav, "; ")[1]
69	if string.find(ie, "MSIE") > -1:
70	bt['versIE'] = string.split(ie, " ")[1]
71	except: pass
72
73	bt['isMac'] = string.find(ua, 'Macintosh') > -1
74	bt['isWin'] = string.find(ua, 'Windows') > -1
75	bt['isIEWin'] = bt['isIE'] and bt['isWin']
76	bt['isIEMac'] = bt['isIE'] and bt['isMac']
77	bt['staticHTML'] = False
78
79	return bt
80
81
82	def getParentDir(path):
83	"""returns pathname shortened by one"""
84	return '/'.join(path.split('/')[0:-1])
85
86
87	def getHttpData(url, data=None, num_tries=3, timeout=10):
88	"""returns result from url+data HTTP request"""
89	# we do GET (by appending data to url)
90	if isinstance(data, str) or isinstance(data, unicode):
91	# if data is string then append
92	url = "%s?%s"%(url,data)
93	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
94	# urlencode
95	url = "%s?%s"%(url,urllib.urlencode(data))
96
97	response = None
98	errmsg = None
99	for cnt in range(num_tries):
100	try:
101	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
102	if sys.version_info < (2, 6):
103	# set timeout on socket -- ugly :-(
104	import socket
105	socket.setdefaulttimeout(float(timeout))
106	response = urllib2.urlopen(url)
107	else:
108	response = urllib2.urlopen(url,timeout=float(timeout))
109	# check result?
110	break
111	except urllib2.HTTPError, e:
112	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
113	errmsg = str(e)
114	# stop trying
115	break
116	except urllib2.URLError, e:
117	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
118	errmsg = str(e)
119	# stop trying
120	#break
121
122	if response is not None:
123	data = response.read()
124	response.close()
125	return data
126
127	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
128	#return None
129
130
131
132	##
133	## documentViewer class
134	##
135	class documentViewer(Folder):
136	"""document viewer"""
137	meta_type="Document viewer"
138
139	security=ClassSecurityInfo()
140	manage_options=Folder.manage_options+(
141	{'label':'main config','action':'changeDocumentViewerForm'},
142	)
143
144	# templates and forms
145	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
146	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
147	toc_text = PageTemplateFile('zpt/toc_text', globals())
148	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
149	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
150	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
151	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
152	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
153	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
154	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
155	head_main = PageTemplateFile('zpt/head_main', globals())
156	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
157	info_xml = PageTemplateFile('zpt/info_xml', globals())
158
159
160	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
161	security.declareProtected('View management screens','changeDocumentViewerForm')
162	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
163
164
165	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
166	"""init document viewer"""
167	self.id=id
168	self.title=title
169	self.thumbcols = thumbcols
170	self.thumbrows = thumbrows
171	# authgroups is list of authorized groups (delimited by ,)
172	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
173	# create template folder so we can always use template.something
174
175	templateFolder = Folder('template')
176	#self['template'] = templateFolder # Zope-2.12 style
177	self._setObject('template',templateFolder) # old style
178	try:
179	import MpdlXmlTextServer
180	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
181	#templateFolder['fulltextclient'] = xmlRpcClient
182	templateFolder._setObject('fulltextclient',textServer)
183	except Exception, e:
184	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
185	try:
186	from Products.zogiLib.zogiLib import zogiLib
187	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
188	#templateFolder['zogilib'] = zogilib
189	templateFolder._setObject('zogilib',zogilib)
190	except Exception, e:
191	logging.error("Unable to create zogiLib for zogilib: "+str(e))
192
193
194	# proxy text server methods to fulltextclient
195	def getTextPage(self, **args):
196	"""get page"""
197	return self.template.fulltextclient.getTextPage(**args)
198
199	def getQuery(self, **args):
200	"""get query"""
201	return self.template.fulltextclient.getQuery(**args)
202
203	def getQueryResultHits(self, **args):
204	"""get query"""
205	return self.template.fulltextclient.getQueryResultHits(**args)
206
207	def getQueryResultHitsText(self, **args):
208	"""get query"""
209	return self.template.fulltextclient.getQueryResultHitsText(**args)
210
211	def getQueryResultHitsFigures(self, **args):
212	"""get query"""
213	return self.template.fulltextclient.getQueryResultHitsFigures(**args)
214
215	def getPDF(self, **args):
216	"""get query"""
217	return self.template.fulltextclient.getPDF(**args)
218
219	def getSearch(self, **args):
220	"""get search"""
221	return self.template.fulltextclient.getSearch(**args)
222
223	def getGisPlaces(self, **args):
224	"""get gis places"""
225	return self.template.fulltextclient.getGisPlaces(**args)
226
227	def getAllGisPlaces(self, **args):
228	"""get all gis places """
229	return self.template.fulltextclient.getAllGisPlaces(**args)
230
231	def getOrigPages(self, **args):
232	"""get original page number """
233	return self.template.fulltextclient.getOrigPages(**args)
234
235	def getNumPages(self, docinfo):
236	"""get numpages"""
237	return self.template.fulltextclient.getNumPages(docinfo)
238
239	def getNumTextPages(self, docinfo):
240	"""get numpages text"""
241	return self.template.fulltextclient.getNumTextPages(docinfo)
242
243	def getTranslate(self, **args):
244	"""get translate"""
245	return self.template.fulltextclient.getTranslate(**args)
246
247	def getLemma(self, **args):
248	"""get lemma"""
249	return self.template.fulltextclient.getLemma(**args)
250
251	def getToc(self, **args):
252	"""get toc"""
253	return self.template.fulltextclient.getToc(**args)
254
255	def getTocPage(self, **args):
256	"""get tocpage"""
257	return self.template.fulltextclient.getTocPage(**args)
258
259
260	security.declareProtected('View','thumbs_rss')
261	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
262	'''
263	view it
264	@param mode: defines how to access the document behind url
265	@param url: url which contains display information
266	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
267
268	'''
269	logging.debug("HHHHHHHHHHHHHH:load the rss")
270	logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
271
272	if not hasattr(self, 'template'):
273	# create template folder if it doesn't exist
274	self.manage_addFolder('template')
275
276	if not self.digilibBaseUrl:
277	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
278
279	docinfo = self.getDocinfo(mode=mode,url=url)
280	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
281	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
282	''' ZDES '''
283	pt = getattr(self.template, 'thumbs_main_rss')
284
285	if viewMode=="auto": # automodus gewaehlt
286	if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
287	viewMode="text"
288	else:
289	viewMode="images"
290
291	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
292
293	security.declareProtected('View','index_html')
294	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
295	'''
296	view it
297	@param mode: defines how to access the document behind url
298	@param url: url which contains display information
299	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
300	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
301	@param characterNormalization type of text display (reg, norm, none)
302	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
303	'''
304
305	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
306
307	if not hasattr(self, 'template'):
308	# this won't work
309	logging.error("template folder missing!")
310	return "ERROR: template folder missing!"
311
312	if not getattr(self, 'digilibBaseUrl', None):
313	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
314
315	docinfo = self.getDocinfo(mode=mode,url=url)
316
317	if tocMode != "thumbs":
318	# get table of contents
319	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
320
321	if viewMode=="auto": # automodus gewaehlt
322	if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
323	viewMode="text_dict"
324	else:
325	viewMode="images"
326
327	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
328
329	pt = getattr(self.template, 'viewer_main')
330	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
331
332	def generateMarks(self,mk):
333	ret=""
334	if mk is None:
335	return ""
336	if not isinstance(mk, list):
337	mk=[mk]
338	for m in mk:
339	ret+="mk=%s"%m
340	return ret
341
342
343	def getBrowser(self):
344	"""getBrowser the version of browser """
345	names=""
346	names = browserCheck(self)
347	#logging.debug("XXXXXXXXXXXXXXXX: %s"%names)
348	return names
349
350	def findDigilibUrl(self):
351	"""try to get the digilib URL from zogilib"""
352	url = self.template.zogilib.getDLBaseUrl()
353	return url
354
355	def getDocumentViewerURL(self):
356	"""returns the URL of this instance"""
357	return self.absolute_url()
358
359	def getStyle(self, idx, selected, style=""):
360	"""returns a string with the given style and append 'sel' if path == selected."""
361	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
362	if idx == selected:
363	return style + 'sel'
364	else:
365	return style
366
367	def getLink(self,param=None,val=None):
368	"""link to documentviewer with parameter param set to val"""
369	params=self.REQUEST.form.copy()
370	if param is not None:
371	if val is None:
372	if params.has_key(param):
373	del params[param]
374	else:
375	params[param] = str(val)
376
377	if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
378	params["mode"] = "imagepath"
379	params["url"] = getParentDir(params["url"])
380
381	# quote values and assemble into query string
382	#ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
383	ps = urllib.urlencode(params)
384	url=self.REQUEST['URL1']+"?"+ps
385	return url
386
387	def getLinkAmp(self,param=None,val=None):
388	"""link to documentviewer with parameter param set to val"""
389	params=self.REQUEST.form.copy()
390	if param is not None:
391	if val is None:
392	if params.has_key(param):
393	del params[param]
394	else:
395	params[param] = str(val)
396
397	# quote values and assemble into query string
398	logging.debug("XYXXXXX: %s"%repr(params.items()))
399	ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
400	url=self.REQUEST['URL1']+"?"+ps
401	return url
402
403	def getInfo_xml(self,url,mode):
404	"""returns info about the document as XML"""
405
406	if not self.digilibBaseUrl:
407	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
408
409	docinfo = self.getDocinfo(mode=mode,url=url)
410	pt = getattr(self.template, 'info_xml')
411	return pt(docinfo=docinfo)
412
413	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
414	"""returns new option state"""
415	if not self.REQUEST.SESSION.has_key(optionName):
416	# not in session -- initial
417	opt = {'lastState': newState, 'state': initialState}
418	else:
419	opt = self.REQUEST.SESSION.get(optionName)
420	if opt['lastState'] != newState:
421	# state in session has changed -- toggle
422	opt['state'] = not opt['state']
423	opt['lastState'] = newState
424
425	self.REQUEST.SESSION[optionName] = opt
426	return opt['state']
427
428	def isAccessible(self, docinfo):
429	"""returns if access to the resource is granted"""
430	access = docinfo.get('accessType', None)
431	logging.debug("documentViewer (accessOK) access type %s"%access)
432	if access is not None and access == 'free':
433	logging.debug("documentViewer (accessOK) access is free")
434	return True
435	elif access is None or access in self.authgroups:
436	# only local access -- only logged in users
437	user = getSecurityManager().getUser()
438	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
439	if user is not None:
440	#print "user: ", user
441	return (user.getUserName() != "Anonymous User")
442	else:
443	return False
444
445	logging.error("documentViewer (accessOK) unknown access type %s"%access)
446	return False
447
448
449	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
450	"""gibt param von dlInfo aus"""
451	if docinfo is None:
452	docinfo = {}
453
454	for x in range(cut):
455
456	path=getParentDir(path)
457
458	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
459
460	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
461
462	txt = getHttpData(infoUrl)
463	if txt is None:
464	raise IOError("Unable to get dir-info from %s"%(infoUrl))
465
466	dom = Parse(txt)
467	sizes=dom.xpath("//dir/size")
468	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
469
470	if sizes:
471	docinfo['numPages'] = int(getTextFromNode(sizes[0]))
472	else:
473	docinfo['numPages'] = 0
474
475	# TODO: produce and keep list of image names and numbers
476
477	return docinfo
478
479	def getIndexMetaPath(self,url):
480	"""gib nur den Pfad zurueck"""
481	regexp = re.compile(r".(experimental\|permanent)/(.)")
482	regpath = regexp.match(url)
483	if (regpath==None):
484	return ""
485	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
486	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
487
488
489
490	def getIndexMetaUrl(self,url):
491	"""returns utr of index.meta document at url"""
492
493	metaUrl = None
494	if url.startswith("http://"):
495	# real URL
496	metaUrl = url
497	else:
498	# online path
499	server=self.digilibBaseUrl+"/servlet/Texter?fn="
500	metaUrl=server+url.replace("/mpiwg/online","")
501	if not metaUrl.endswith("index.meta"):
502	metaUrl += "/index.meta"
503
504	return metaUrl
505
506	def getDomFromIndexMeta(self, url):
507	"""get dom from index meta"""
508	dom = None
509	metaUrl = self.getIndexMetaUrl(url)
510
511	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
512	txt=getHttpData(metaUrl)
513	if txt is None:
514	raise IOError("Unable to read index meta from %s"%(url))
515
516	dom = Parse(txt)
517	return dom
518
519	def getPresentationInfoXML(self, url):
520	"""returns dom of info.xml document at url"""
521	dom = None
522	metaUrl = None
523	if url.startswith("http://"):
524	# real URL
525	metaUrl = url
526	else:
527	# online path
528	server=self.digilibBaseUrl+"/servlet/Texter?fn="
529	metaUrl=server+url.replace("/mpiwg/online","")
530
531	txt=getHttpData(metaUrl)
532	if txt is None:
533	raise IOError("Unable to read infoXMLfrom %s"%(url))
534
535	dom = Parse(txt)
536	return dom
537
538
539	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
540	"""gets authorization info from the index.meta file at path or given by dom"""
541	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
542
543	access = None
544
545	if docinfo is None:
546	docinfo = {}
547
548	if dom is None:
549	for x in range(cut):
550	path=getParentDir(path)
551	dom = self.getDomFromIndexMeta(path)
552
553	acctype = dom.xpath("//access-conditions/access/@type")
554	if acctype and (len(acctype)>0):
555	access=acctype[0].value
556	if access in ['group', 'institution']:
557	access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
558
559	docinfo['accessType'] = access
560	return docinfo
561
562
563	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
564	"""gets bibliographical info from the index.meta file at path or given by dom"""
565	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
566
567	if docinfo is None:
568	docinfo = {}
569
570	if dom is None:
571	for x in range(cut):
572	path=getParentDir(path)
573	dom = self.getDomFromIndexMeta(path)
574
575	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
576
577	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
578	# put in all raw bib fields as dict "bib"
579	bib = dom.xpath("//bib/*")
580	if bib and len(bib)>0:
581	bibinfo = {}
582	for e in bib:
583	bibinfo[e.localName] = getTextFromNode(e)
584	docinfo['bib'] = bibinfo
585
586	# extract some fields (author, title, year) according to their mapping
587	metaData=self.metadata.main.meta.bib
588	bibtype=dom.xpath("//bib/@type")
589	if bibtype and (len(bibtype)>0):
590	bibtype=bibtype[0].value
591	else:
592	bibtype="generic"
593
594	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
595	docinfo['bib_type'] = bibtype
596	bibmap=metaData.generateMappingForType(bibtype)
597	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
598	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
599	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
600	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
601	try:
602	docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
603	except: pass
604	try:
605	docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
606	except: pass
607	try:
608	docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
609	except: pass
610	logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
611	try:
612	docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
613	except:
614	docinfo['lang']=''
615
616	return docinfo
617
618
619	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
620	"""gets name info from the index.meta file at path or given by dom"""
621	if docinfo is None:
622	docinfo = {}
623
624	if dom is None:
625	for x in range(cut):
626	path=getParentDir(path)
627	dom = self.getDomFromIndexMeta(path)
628
629	docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
630	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
631	return docinfo
632
633	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
634	"""parse texttool tag in index meta"""
635	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
636	if docinfo is None:
637	docinfo = {}
638	if docinfo.get('lang', None) is None:
639	docinfo['lang'] = '' # default keine Sprache gesetzt
640	if dom is None:
641	dom = self.getDomFromIndexMeta(url)
642
643	archivePath = None
644	archiveName = None
645
646	archiveNames = dom.xpath("//resource/name")
647	if archiveNames and (len(archiveNames) > 0):
648	archiveName = getTextFromNode(archiveNames[0])
649	else:
650	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
651
652	archivePaths = dom.xpath("//resource/archive-path")
653	if archivePaths and (len(archivePaths) > 0):
654	archivePath = getTextFromNode(archivePaths[0])
655	# clean up archive path
656	if archivePath[0] != '/':
657	archivePath = '/' + archivePath
658	if archiveName and (not archivePath.endswith(archiveName)):
659	archivePath += "/" + archiveName
660	else:
661	# try to get archive-path from url
662	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
663	if (not url.startswith('http')):
664	archivePath = url.replace('index.meta', '')
665
666	if archivePath is None:
667	# we balk without archive-path
668	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
669
670	imageDirs = dom.xpath("//texttool/image")
671	if imageDirs and (len(imageDirs) > 0):
672	imageDir = getTextFromNode(imageDirs[0])
673
674	else:
675	# we balk with no image tag / not necessary anymore because textmode is now standard
676	#raise IOError("No text-tool info in %s"%(url))
677	imageDir = ""
678	#xquery="//pb"
679	docinfo['imagePath'] = "" # keine Bilder
680	docinfo['imageURL'] = ""
681
682	if imageDir and archivePath:
683	#print "image: ", imageDir, " archivepath: ", archivePath
684	imageDir = os.path.join(archivePath, imageDir)
685	imageDir = imageDir.replace("/mpiwg/online", '')
686	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
687	docinfo['imagePath'] = imageDir
688
689	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
690
691	viewerUrls = dom.xpath("//texttool/digiliburlprefix")
692	if viewerUrls and (len(viewerUrls) > 0):
693	viewerUrl = getTextFromNode(viewerUrls[0])
694	docinfo['viewerURL'] = viewerUrl
695
696	# old style text URL
697	textUrls = dom.xpath("//texttool/text")
698	if textUrls and (len(textUrls) > 0):
699	textUrl = getTextFromNode(textUrls[0])
700	if urlparse.urlparse(textUrl)[0] == "": #keine url
701	textUrl = os.path.join(archivePath, textUrl)
702	# fix URLs starting with /mpiwg/online
703	if textUrl.startswith("/mpiwg/online"):
704	textUrl = textUrl.replace("/mpiwg/online", '', 1)
705
706	docinfo['textURL'] = textUrl
707
708	# new style text-url-path
709	textUrls = dom.xpath("//texttool/text-url-path")
710	if textUrls and (len(textUrls) > 0):
711	textUrl = getTextFromNode(textUrls[0])
712	docinfo['textURLPath'] = textUrl
713	if not docinfo['imagePath']:
714	# text-only, no page images
715	docinfo = self.getNumTextPages(docinfo)
716
717	presentationUrls = dom.xpath("//texttool/presentation")
718	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
719	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
720
721
722	if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
723	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
724	# durch den relativen Pfad auf die presentation infos
725	presentationPath = getTextFromNode(presentationUrls[0])
726	if url.endswith("index.meta"):
727	presentationUrl = url.replace('index.meta', presentationPath)
728	else:
729	presentationUrl = url + "/" + presentationPath
730
731	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
732
733	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
734
735	return docinfo
736
737
738	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
739	"""gets the bibliographical information from the preseantion entry in texttools
740	"""
741	dom=self.getPresentationInfoXML(url)
742	try:
743	docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
744	except:
745	pass
746	try:
747	docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
748	except:
749	pass
750	try:
751	docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
752	except:
753	pass
754	return docinfo
755
756	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
757	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
758	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
759	if docinfo is None:
760	docinfo = {}
761	path=path.replace("/mpiwg/online","")
762	docinfo['imagePath'] = path
763	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
764
765	pathorig=path
766	for x in range(cut):
767	path=getParentDir(path)
768	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
769	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
770	docinfo['imageURL'] = imageUrl
771
772	#path ist the path to the images it assumes that the index.meta file is one level higher.
773	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
774	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
775	return docinfo
776
777
778	def getDocinfo(self, mode, url):
779	"""returns docinfo depending on mode"""
780	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
781	# look for cached docinfo in session
782	if self.REQUEST.SESSION.has_key('docinfo'):
783	docinfo = self.REQUEST.SESSION['docinfo']
784	# check if its still current
785	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
786	logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
787	return docinfo
788	# new docinfo
789	docinfo = {'mode': mode, 'url': url}
790	if mode=="texttool": #index.meta with texttool information
791	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
792	elif mode=="imagepath":
793	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
794	elif mode=="filepath":
795	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
796	else:
797	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
798	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
799
800	logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
801	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
802	self.REQUEST.SESSION['docinfo'] = docinfo
803	return docinfo
804
805	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
806	"""returns pageinfo with the given parameters"""
807	pageinfo = {}
808	current = getInt(current)
809
810	pageinfo['current'] = current
811	rows = int(rows or self.thumbrows)
812	pageinfo['rows'] = rows
813	cols = int(cols or self.thumbcols)
814	pageinfo['cols'] = cols
815	grpsize = cols * rows
816	pageinfo['groupsize'] = grpsize
817	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
818	# int(current / grpsize) * grpsize +1))
819	pageinfo['start'] = start
820	pageinfo['end'] = start + grpsize
821	if (docinfo is not None) and ('numPages' in docinfo):
822	np = int(docinfo['numPages'])
823	pageinfo['end'] = min(pageinfo['end'], np)
824	pageinfo['numgroups'] = int(np / grpsize)
825	if np % grpsize > 0:
826	pageinfo['numgroups'] += 1
827	pageinfo['viewMode'] = viewMode
828	pageinfo['tocMode'] = tocMode
829	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','regPlusNorm')
830	pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','')
831	pageinfo['query'] = self.REQUEST.get('query','')
832	pageinfo['queryType'] = self.REQUEST.get('queryType','')
833	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
834	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
835	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
836	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
837	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
838	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
839	toc = int (pageinfo['tocPN'])
840	pageinfo['textPages'] =int (toc)
841
842
843
844	if 'tocSize_%s'%tocMode in docinfo:
845	tocSize = int(docinfo['tocSize_%s'%tocMode])
846	tocPageSize = int(pageinfo['tocPageSize'])
847	# cached toc
848	if tocSize%tocPageSize>0:
849	tocPages=tocSize/tocPageSize+1
850	else:
851	tocPages=tocSize/tocPageSize
852	pageinfo['tocPN'] = min (tocPages,toc)
853	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
854	pageinfo['sn'] =self.REQUEST.get('sn','')
855	return pageinfo
856
857	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
858	"""init document viewer"""
859	self.title=title
860	self.digilibBaseUrl = digilibBaseUrl
861	self.thumbrows = thumbrows
862	self.thumbcols = thumbcols
863	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
864	if RESPONSE is not None:
865	RESPONSE.redirect('manage_main')
866
867	def manage_AddDocumentViewerForm(self):
868	"""add the viewer form"""
869	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
870	return pt()
871
872	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
873	"""add the viewer"""
874	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
875	self._setObject(id,newObj)
876
877	if RESPONSE is not None:
878	RESPONSE.redirect('manage_main')
879
880	## DocumentViewerTemplate class
881	class DocumentViewerTemplate(ZopePageTemplate):
882	"""Template for document viewer"""
883	meta_type="DocumentViewer Template"
884
885
886	def manage_addDocumentViewerTemplateForm(self):
887	"""Form for adding"""
888	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
889	return pt()
890
891	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
892	REQUEST=None, submit=None):
893	"Add a Page Template with optional file content."
894
895	self._setObject(id, DocumentViewerTemplate(id))
896	ob = getattr(self, id)
897	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
898	logging.info("txt %s:"%txt)
899	ob.pt_edit(txt,"text/html")
900	if title:
901	ob.pt_setTitle(title)
902	try:
903	u = self.DestinationURL()
904	except AttributeError:
905	u = REQUEST['URL1']
906
907	u = "%s/%s" % (u, urllib.quote(id))
908	REQUEST.RESPONSE.redirect(u+'/manage_main')
909	return ''
910
911
912

Note: See TracBrowser for help on using the repository browser.

Download in other formats: