Context Navigation

source: documentViewer/documentViewer.py @ 461:8732f15df5f3

elementtree

Last change on this file since 461:8732f15df5f3 was 461:8732f15df5f3, checked in by casties, 13 years ago
more renovation
File size: 34.3 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from AccessControl import ClassSecurityInfo
5	from AccessControl import getSecurityManager
6	from Globals import package_home
7
8	#from Ft.Xml import EMPTY_NAMESPACE, Parse
9	#import Ft.Xml.Domlette
10
11	import xml.etree.ElementTree as ET
12
13	import os.path
14	import sys
15	import urllib
16	import logging
17	import math
18	import urlparse
19	import re
20	import string
21
22	from SrvTxtUtils import getInt, getText, getHttpData
23
24	def logger(txt,method,txt2):
25	"""logging"""
26	logging.info(txt+ txt2)
27
28
29	def serializeNode(node, encoding="utf-8"):
30	"""returns a string containing node as XML"""
31	s = ET.tostring(node)
32
33	# 4Suite:
34	# stream = cStringIO.StringIO()
35	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
36	# s = stream.getvalue()
37	# stream.close()
38	return s
39
40	def browserCheck(self):
41	"""check the browsers request to find out the browser type"""
42	bt = {}
43	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
44	bt['ua'] = ua
45	bt['isIE'] = False
46	bt['isN4'] = False
47	bt['versFirefox']=""
48	bt['versIE']=""
49	bt['versSafariChrome']=""
50	bt['versOpera']=""
51
52	if string.find(ua, 'MSIE') > -1:
53	bt['isIE'] = True
54	else:
55	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
56	# Safari oder Chrome identification
57	try:
58	nav = ua[string.find(ua, '('):]
59	nav1=ua[string.find(ua,')'):]
60	nav2=nav1[string.find(nav1,'('):]
61	nav3=nav2[string.find(nav2,')'):]
62	ie = string.split(nav, "; ")[1]
63	ie1 =string.split(nav1, " ")[2]
64	ie2 =string.split(nav3, " ")[1]
65	ie3 =string.split(nav3, " ")[2]
66	if string.find(ie3, "Safari") >-1:
67	bt['versSafariChrome']=string.split(ie2, "/")[1]
68	except: pass
69	# IE identification
70	try:
71	nav = ua[string.find(ua, '('):]
72	ie = string.split(nav, "; ")[1]
73	if string.find(ie, "MSIE") > -1:
74	bt['versIE'] = string.split(ie, " ")[1]
75	except:pass
76	# Firefox identification
77	try:
78	nav = ua[string.find(ua, '('):]
79	nav1=ua[string.find(ua,')'):]
80	if string.find(ie1, "Firefox") >-1:
81	nav5= string.split(ie1, "/")[1]
82	logging.debug("FIREFOX: %s"%(nav5))
83	bt['versFirefox']=nav5[0:3]
84	except:pass
85	#Opera identification
86	try:
87	if string.find(ua,"Opera") >-1:
88	nav = ua[string.find(ua, '('):]
89	nav1=nav[string.find(nav,')'):]
90	bt['versOpera']=string.split(nav1,"/")[2]
91	except:pass
92
93	bt['isMac'] = string.find(ua, 'Macintosh') > -1
94	bt['isWin'] = string.find(ua, 'Windows') > -1
95	bt['isIEWin'] = bt['isIE'] and bt['isWin']
96	bt['isIEMac'] = bt['isIE'] and bt['isMac']
97	bt['staticHTML'] = False
98
99	return bt
100
101	def getParentDir(path):
102	"""returns pathname shortened by one"""
103	return '/'.join(path.split('/')[0:-1])
104
105	def normalizeBibField(bt, underscore=True):
106	"""returns normalised bib type for looking up mappings"""
107	bt = bt.strip().replace(' ', '-').lower()
108	if underscore:
109	bt = bt.replace('_', '-')
110
111	return bt
112
113	def getBibdataFromDom(dom):
114	"""returns dict with all elements from bib-tag"""
115	bibinfo = {}
116	bib = dom.find(".//meta/bib")
117	if bib is not None:
118	# put type in @type
119	type = bib.get('type')
120	bibinfo['@type'] = normalizeBibField(type)
121	# put all subelements in dict
122	for e in bib:
123	bibinfo[normalizeBibField(e.tag)] = getText(e)
124
125	return bibinfo
126
127
128	##
129	## documentViewer class
130	##
131	class documentViewer(Folder):
132	"""document viewer"""
133	meta_type="Document viewer"
134
135	security=ClassSecurityInfo()
136	manage_options=Folder.manage_options+(
137	{'label':'main config','action':'changeDocumentViewerForm'},
138	)
139
140	# templates and forms
141	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
142	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
143	toc_text = PageTemplateFile('zpt/toc_text', globals())
144	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
145	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
146	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
147	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
148	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
149	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
150	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
151	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
152	head_main = PageTemplateFile('zpt/head_main', globals())
153	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
154	info_xml = PageTemplateFile('zpt/info_xml', globals())
155
156
157	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
158	security.declareProtected('View management screens','changeDocumentViewerForm')
159	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
160
161
162	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
163	"""init document viewer"""
164	self.id=id
165	self.title=title
166	self.thumbcols = thumbcols
167	self.thumbrows = thumbrows
168	# authgroups is list of authorized groups (delimited by ,)
169	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
170	# create template folder so we can always use template.something
171
172	templateFolder = Folder('template')
173	#self['template'] = templateFolder # Zope-2.12 style
174	self._setObject('template',templateFolder) # old style
175	try:
176	import MpdlXmlTextServer
177	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
178	#templateFolder['fulltextclient'] = xmlRpcClient
179	templateFolder._setObject('fulltextclient',textServer)
180	except Exception, e:
181	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
182	try:
183	from Products.zogiLib.zogiLib import zogiLib
184	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
185	#templateFolder['zogilib'] = zogilib
186	templateFolder._setObject('zogilib',zogilib)
187	except Exception, e:
188	logging.error("Unable to create zogiLib for zogilib: "+str(e))
189
190
191	# proxy text server methods to fulltextclient
192	def getTextPage(self, **args):
193	"""get page"""
194	return self.template.fulltextclient.getTextPage(**args)
195
196	def getOrigPages(self, **args):
197	"""get page"""
198	return self.template.fulltextclient.getOrigPages(**args)
199
200	def getOrigPagesNorm(self, **args):
201	"""get page"""
202	return self.template.fulltextclient.getOrigPagesNorm(**args)
203
204	def getQuery(self, **args):
205	"""get query in search"""
206	return self.template.fulltextclient.getQuery(**args)
207
208	def getSearch(self, **args):
209	"""get search"""
210	return self.template.fulltextclient.getSearch(**args)
211
212	def getGisPlaces(self, **args):
213	"""get gis places"""
214	return self.template.fulltextclient.getGisPlaces(**args)
215
216	def getAllGisPlaces(self, **args):
217	"""get all gis places """
218	return self.template.fulltextclient.getAllGisPlaces(**args)
219
220	def getTranslate(self, **args):
221	"""get translate"""
222	return self.template.fulltextclient.getTranslate(**args)
223
224	def getLemma(self, **args):
225	"""get lemma"""
226	return self.template.fulltextclient.getLemma(**args)
227
228	def getLemmaQuery(self, **args):
229	"""get query"""
230	return self.template.fulltextclient.getLemmaQuery(**args)
231
232	def getLex(self, **args):
233	"""get lex"""
234	return self.template.fulltextclient.getLex(**args)
235
236	def getToc(self, **args):
237	"""get toc"""
238	return self.template.fulltextclient.getToc(**args)
239
240	def getTocPage(self, **args):
241	"""get tocpage"""
242	return self.template.fulltextclient.getTocPage(**args)
243
244
245	security.declareProtected('View','thumbs_rss')
246	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
247	'''
248	view it
249	@param mode: defines how to access the document behind url
250	@param url: url which contains display information
251	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
252
253	'''
254	logging.debug("HHHHHHHHHHHHHH:load the rss")
255	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
256
257	if not hasattr(self, 'template'):
258	# create template folder if it doesn't exist
259	self.manage_addFolder('template')
260
261	if not self.digilibBaseUrl:
262	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
263
264	docinfo = self.getDocinfo(mode=mode,url=url)
265	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
266	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
267	''' ZDES '''
268	pt = getattr(self.template, 'thumbs_main_rss')
269
270	if viewMode=="auto": # automodus gewaehlt
271	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
272	viewMode="text"
273	else:
274	viewMode="images"
275
276	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
277
278	security.declareProtected('View','index_html')
279	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
280	'''
281	view it
282	@param mode: defines how to access the document behind url
283	@param url: url which contains display information
284	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
285	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
286	@param characterNormalization type of text display (reg, norm, none)
287	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
288	'''
289
290	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
291
292	if not hasattr(self, 'template'):
293	# this won't work
294	logging.error("template folder missing!")
295	return "ERROR: template folder missing!"
296
297	if not getattr(self, 'digilibBaseUrl', None):
298	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
299
300	docinfo = self.getDocinfo(mode=mode,url=url)
301
302	if tocMode != "thumbs":
303	# get table of contents
304	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
305
306	# auto viewMode: text_dict if text else images
307	if viewMode=="auto":
308	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
309	#texturl gesetzt und textViewer konfiguriert
310	viewMode="text_dict"
311	else:
312	viewMode="images"
313
314	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, tocMode=tocMode)
315
316	if viewMode != 'images' and docinfo.get('textURLPath', None):
317	# get full text page
318	page = self.getTextPage(mode=viewMode, pn=pn, docinfo=docinfo, pageinfo=pageinfo)
319	pageinfo['textPage'] = page
320
321	# get template /template/viewer_main
322	pt = getattr(self.template, 'viewer_main')
323	# and execute with parameters
324	return pt(docinfo=docinfo, pageinfo=pageinfo, viewMode=viewMode, mk=self.generateMarks(mk))
325
326	def generateMarks(self,mk):
327	ret=""
328	if mk is None:
329	return ""
330	if not isinstance(mk, list):
331	mk=[mk]
332	for m in mk:
333	ret+="mk=%s"%m
334	return ret
335
336
337	def getBrowser(self):
338	"""getBrowser the version of browser """
339	bt = browserCheck(self)
340	logging.debug("BROWSER VERSION: %s"%(bt))
341	return bt
342
343	def findDigilibUrl(self):
344	"""try to get the digilib URL from zogilib"""
345	url = self.template.zogilib.getDLBaseUrl()
346	return url
347
348	def getDocumentViewerURL(self):
349	"""returns the URL of this instance"""
350	return self.absolute_url()
351
352	def getStyle(self, idx, selected, style=""):
353	"""returns a string with the given style and append 'sel' if path == selected."""
354	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
355	if idx == selected:
356	return style + 'sel'
357	else:
358	return style
359
360	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
361	"""returns URL to documentviewer with parameter param set to val or from dict params"""
362	# copy existing request params
363	urlParams=self.REQUEST.form.copy()
364	# change single param
365	if param is not None:
366	if val is None:
367	if urlParams.has_key(param):
368	del urlParams[param]
369	else:
370	urlParams[param] = str(val)
371
372	# change more params
373	if params is not None:
374	for k in params.keys():
375	v = params[k]
376	if v is None:
377	# val=None removes param
378	if urlParams.has_key(k):
379	del urlParams[k]
380
381	else:
382	urlParams[k] = v
383
384	# FIXME: does this belong here?
385	if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
386	urlParams["mode"] = "imagepath"
387	urlParams["url"] = getParentDir(urlParams["url"])
388
389	# quote values and assemble into query string (not escaping '/')
390	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
391	#ps = urllib.urlencode(urlParams)
392	if baseUrl is None:
393	baseUrl = self.REQUEST['URL1']
394
395	url = "%s?%s"%(baseUrl, ps)
396	return url
397
398
399	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
400	"""link to documentviewer with parameter param set to val"""
401	return self.getLink(param, val, params, baseUrl, '&')
402
403	def getInfo_xml(self,url,mode):
404	"""returns info about the document as XML"""
405
406	if not self.digilibBaseUrl:
407	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
408
409	docinfo = self.getDocinfo(mode=mode,url=url)
410	pt = getattr(self.template, 'info_xml')
411	return pt(docinfo=docinfo)
412
413	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
414	"""returns new option state"""
415	if not self.REQUEST.SESSION.has_key(optionName):
416	# not in session -- initial
417	opt = {'lastState': newState, 'state': initialState}
418	else:
419	opt = self.REQUEST.SESSION.get(optionName)
420	if opt['lastState'] != newState:
421	# state in session has changed -- toggle
422	opt['state'] = not opt['state']
423	opt['lastState'] = newState
424
425	self.REQUEST.SESSION[optionName] = opt
426	return opt['state']
427
428	def isAccessible(self, docinfo):
429	"""returns if access to the resource is granted"""
430	access = docinfo.get('accessType', None)
431	logging.debug("documentViewer (accessOK) access type %s"%access)
432	if access is not None and access == 'free':
433	logging.debug("documentViewer (accessOK) access is free")
434	return True
435	elif access is None or access in self.authgroups:
436	# only local access -- only logged in users
437	user = getSecurityManager().getUser()
438	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
439	if user is not None:
440	#print "user: ", user
441	return (user.getUserName() != "Anonymous User")
442	else:
443	return False
444
445	logging.error("documentViewer (accessOK) unknown access type %s"%access)
446	return False
447
448
449	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
450	"""gibt param von dlInfo aus"""
451	if docinfo is None:
452	docinfo = {}
453
454	for x in range(cut):
455	path=getParentDir(path)
456
457	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
458
459	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
460
461	txt = getHttpData(infoUrl)
462	if txt is None:
463	raise IOError("Unable to get dir-info from %s"%(infoUrl))
464
465	dom = ET.fromstring(txt)
466	#dom = Parse(txt)
467	size=getText(dom.find("size"))
468	#sizes=dom.xpath("//dir/size")
469	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)
470
471	if size:
472	docinfo['numPages'] = int(size)
473	else:
474	docinfo['numPages'] = 0
475
476	# TODO: produce and keep list of image names and numbers
477
478	return docinfo
479
480	def getIndexMetaPath(self,url):
481	"""gib nur den Pfad zurueck"""
482	regexp = re.compile(r".(experimental\|permanent)/(.)")
483	regpath = regexp.match(url)
484	if (regpath==None):
485	return ""
486	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
487	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
488
489
490
491	def getIndexMetaUrl(self,url):
492	"""returns utr of index.meta document at url"""
493
494	metaUrl = None
495	if url.startswith("http://"):
496	# real URL
497	metaUrl = url
498	else:
499	# online path
500	server=self.digilibBaseUrl+"/servlet/Texter?fn="
501	metaUrl=server+url.replace("/mpiwg/online","")
502	if not metaUrl.endswith("index.meta"):
503	metaUrl += "/index.meta"
504
505	return metaUrl
506
507	def getDomFromIndexMeta(self, url):
508	"""get dom from index meta"""
509	dom = None
510	metaUrl = self.getIndexMetaUrl(url)
511
512	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
513	txt=getHttpData(metaUrl)
514	if txt is None:
515	raise IOError("Unable to read index meta from %s"%(url))
516
517	dom = ET.fromstring(txt)
518	#dom = Parse(txt)
519	return dom
520
521	def getPresentationInfoXML(self, url):
522	"""returns dom of info.xml document at url"""
523	dom = None
524	metaUrl = None
525	if url.startswith("http://"):
526	# real URL
527	metaUrl = url
528	else:
529	# online path
530	server=self.digilibBaseUrl+"/servlet/Texter?fn="
531	metaUrl=server+url.replace("/mpiwg/online","")
532
533	txt=getHttpData(metaUrl)
534	if txt is None:
535	raise IOError("Unable to read infoXMLfrom %s"%(url))
536
537	dom = ET.fromstring(txt)
538	#dom = Parse(txt)
539	return dom
540
541
542	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
543	"""gets authorization info from the index.meta file at path or given by dom"""
544	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
545
546	access = None
547
548	if docinfo is None:
549	docinfo = {}
550
551	if dom is None:
552	for x in range(cut):
553	path=getParentDir(path)
554	dom = self.getDomFromIndexMeta(path)
555
556	acc = dom.find(".//access-conditions/access")
557	if acc is not None:
558	acctype = acc.get('type')
559	#acctype = dom.xpath("//access-conditions/access/@type")
560	if acctype:
561	access=acctype
562	if access in ['group', 'institution']:
563	access = dom.find(".//access-conditions/access/name").text.lower()
564
565	docinfo['accessType'] = access
566	return docinfo
567
568
569	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
570	"""gets bibliographical info from the index.meta file at path or given by dom"""
571	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
572
573	if docinfo is None:
574	docinfo = {}
575
576	if dom is None:
577	for x in range(cut):
578	path=getParentDir(path)
579	dom = self.getDomFromIndexMeta(path)
580
581	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
582
583	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
584	# put all raw bib fields in dict "bib"
585	bib = getBibdataFromDom(dom)
586	docinfo['bib'] = bib
587	bibtype = bib.get('@type', None)
588	docinfo['bib_type'] = bibtype
589	if bibtype:
590	# also store standard mapped metadata for convenience
591	try:
592	stdbib = self.metadata.getStdMappedHash(bib)
593	docinfo['std_bib'] = stdbib
594	docinfo['author'] = stdbib['author']
595	docinfo['title'] = stdbib['title']
596	docinfo['year'] = stdbib['year']
597	except:
598	pass
599
600	return docinfo
601
602
603	# TODO: is this needed?
604	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
605	"""gets name info from the index.meta file at path or given by dom"""
606	if docinfo is None:
607	docinfo = {}
608
609	if dom is None:
610	for x in range(cut):
611	path=getParentDir(path)
612	dom = self.getDomFromIndexMeta(path)
613
614	docinfo['name']=getText(dom.find("name"))
615	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
616	return docinfo
617
618	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
619	"""parse texttool tag in index meta"""
620	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
621	if docinfo is None:
622	docinfo = {}
623	if docinfo.get('lang', None) is None:
624	docinfo['lang'] = '' # default keine Sprache gesetzt
625	if dom is None:
626	dom = self.getDomFromIndexMeta(url)
627
628	archivePath = None
629	archiveName = None
630
631	archiveName = getText(dom.find("name"))
632	if not archiveName:
633	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
634
635	archivePath = getText(dom.find("archive-path"))
636	if archivePath:
637	# clean up archive path
638	if archivePath[0] != '/':
639	archivePath = '/' + archivePath
640	if archiveName and (not archivePath.endswith(archiveName)):
641	archivePath += "/" + archiveName
642	else:
643	# try to get archive-path from url
644	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
645	if (not url.startswith('http')):
646	archivePath = url.replace('index.meta', '')
647
648	if archivePath is None:
649	# we balk without archive-path
650	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
651
652	imageDir = getText(dom.find(".//texttool/image"))
653
654	if not imageDir:
655	# we balk with no image tag / not necessary anymore because textmode is now standard
656	#raise IOError("No text-tool info in %s"%(url))
657	imageDir = ""
658	#xquery="//pb"
659	docinfo['imagePath'] = "" # keine Bilder
660	docinfo['imageURL'] = ""
661
662	if imageDir and archivePath:
663	#print "image: ", imageDir, " archivepath: ", archivePath
664	imageDir = os.path.join(archivePath, imageDir)
665	imageDir = imageDir.replace("/mpiwg/online", '')
666	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
667	docinfo['imagePath'] = imageDir
668
669	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
670
671	viewerUrl = getText(dom.find(".//texttool/digiliburlprefix"))
672	if viewerUrl:
673	docinfo['viewerURL'] = viewerUrl
674
675	# old style text URL
676	textUrl = getText(dom.find(".//texttool/text"))
677	if textUrl:
678	if urlparse.urlparse(textUrl)[0] == "": #keine url
679	textUrl = os.path.join(archivePath, textUrl)
680	# fix URLs starting with /mpiwg/online
681	if textUrl.startswith("/mpiwg/online"):
682	textUrl = textUrl.replace("/mpiwg/online", '', 1)
683
684	docinfo['textURL'] = textUrl
685
686	# new style text-url-path
687	textUrl = getText(dom.find(".//texttool/text-url-path"))
688	if textUrl:
689	docinfo['textURLPath'] = textUrl
690	textUrlkurz = string.split(textUrl, ".")[0]
691	docinfo['textURLPathkurz'] = textUrlkurz
692	#if not docinfo['imagePath']:
693	# text-only, no page images
694	#docinfo = self.getNumTextPages(docinfo)
695
696
697	presentationUrl = getText(dom.find(".//texttool/presentation"))
698	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
699	# TODO: is this needed here?
700	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
701
702
703	if presentationUrl: # ueberschreibe diese durch presentation informationen
704	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
705	# durch den relativen Pfad auf die presentation infos
706	presentationPath = presentationUrl
707	if url.endswith("index.meta"):
708	presentationUrl = url.replace('index.meta', presentationPath)
709	else:
710	presentationUrl = url + "/" + presentationPath
711
712	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
713
714	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
715
716	return docinfo
717
718
719	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
720	"""gets the bibliographical information from the preseantion entry in texttools
721	"""
722	dom=self.getPresentationInfoXML(url)
723	docinfo['author']=getText(dom.find(".//author"))
724	docinfo['title']=getText(dom.find(".//title"))
725	docinfo['year']=getText(dom.find(".//date"))
726	return docinfo
727
728	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
729	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
730	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
731	if docinfo is None:
732	docinfo = {}
733	path=path.replace("/mpiwg/online","")
734	docinfo['imagePath'] = path
735	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
736
737	pathorig=path
738	for x in range(cut):
739	path=getParentDir(path)
740	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
741	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
742	docinfo['imageURL'] = imageUrl
743
744	#TODO: use getDocinfoFromIndexMeta
745	#path ist the path to the images it assumes that the index.meta file is one level higher.
746	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
747	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
748	return docinfo
749
750
751	def getDocinfo(self, mode, url):
752	"""returns docinfo depending on mode"""
753	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
754	# look for cached docinfo in session
755	if self.REQUEST.SESSION.has_key('docinfo'):
756	docinfo = self.REQUEST.SESSION['docinfo']
757	# check if its still current
758	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
759	logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())
760	return docinfo
761
762	# new docinfo
763	docinfo = {'mode': mode, 'url': url}
764	# add self url
765	docinfo['viewerUrl'] = self.getDocumentViewerURL()
766	if mode=="texttool":
767	# index.meta with texttool information
768	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
769	elif mode=="imagepath":
770	# folder with images, index.meta optional
771	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
772	elif mode=="filepath":
773	# filename
774	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
775	else:
776	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
777	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
778
779	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
780	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
781	# store in session
782	self.REQUEST.SESSION['docinfo'] = docinfo
783	return docinfo
784
785	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
786	"""returns pageinfo with the given parameters"""
787	pageinfo = {}
788	current = getInt(current)
789
790	pageinfo['current'] = current
791	rows = int(rows or self.thumbrows)
792	pageinfo['rows'] = rows
793	cols = int(cols or self.thumbcols)
794	pageinfo['cols'] = cols
795	grpsize = cols * rows
796	pageinfo['groupsize'] = grpsize
797	# what does this do?
798	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
799	# int(current / grpsize) * grpsize +1))
800	pageinfo['start'] = start
801	pageinfo['end'] = start + grpsize
802	if (docinfo is not None) and ('numPages' in docinfo):
803	np = int(docinfo['numPages'])
804	pageinfo['end'] = min(pageinfo['end'], np)
805	pageinfo['numgroups'] = int(np / grpsize)
806	if np % grpsize > 0:
807	pageinfo['numgroups'] += 1
808
809	pageinfo['viewMode'] = viewMode
810	pageinfo['tocMode'] = tocMode
811	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
812	#pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1')
813	pageinfo['query'] = self.REQUEST.get('query','')
814	pageinfo['queryType'] = self.REQUEST.get('queryType','')
815	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
816	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
817	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
818	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
819	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
820	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
821	# WTF?:
822	toc = int(pageinfo['tocPN'])
823	pageinfo['textPages'] =int(toc)
824
825	# What does this do?
826	if 'tocSize_%s'%tocMode in docinfo:
827	tocSize = int(docinfo['tocSize_%s'%tocMode])
828	tocPageSize = int(pageinfo['tocPageSize'])
829	# cached toc
830	if tocSize%tocPageSize>0:
831	tocPages=tocSize/tocPageSize+1
832	else:
833	tocPages=tocSize/tocPageSize
834
835	pageinfo['tocPN'] = min(tocPages,toc)
836
837	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
838	pageinfo['sn'] =self.REQUEST.get('sn','')
839	return pageinfo
840
841
842	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
843	"""init document viewer"""
844	self.title=title
845	self.digilibBaseUrl = digilibBaseUrl
846	self.thumbrows = thumbrows
847	self.thumbcols = thumbcols
848	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
849	if RESPONSE is not None:
850	RESPONSE.redirect('manage_main')
851
852	def manage_AddDocumentViewerForm(self):
853	"""add the viewer form"""
854	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
855	return pt()
856
857	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
858	"""add the viewer"""
859	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
860	self._setObject(id,newObj)
861
862	if RESPONSE is not None:
863	RESPONSE.redirect('manage_main')
864
865	## DocumentViewerTemplate class
866	class DocumentViewerTemplate(ZopePageTemplate):
867	"""Template for document viewer"""
868	meta_type="DocumentViewer Template"
869
870
871	def manage_addDocumentViewerTemplateForm(self):
872	"""Form for adding"""
873	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
874	return pt()
875
876	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
877	REQUEST=None, submit=None):
878	"Add a Page Template with optional file content."
879
880	self._setObject(id, DocumentViewerTemplate(id))
881	ob = getattr(self, id)
882	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
883	logging.info("txt %s:"%txt)
884	ob.pt_edit(txt,"text/html")
885	if title:
886	ob.pt_setTitle(title)
887	try:
888	u = self.DestinationURL()
889	except AttributeError:
890	u = REQUEST['URL1']
891
892	u = "%s/%s" % (u, urllib.quote(id))
893	REQUEST.RESPONSE.redirect(u+'/manage_main')
894	return ''
895
896
897

Note: See TracBrowser for help on using the repository browser.

Download in other formats: