Context Navigation

source: documentViewer/documentViewer.py @ 460:76bc2317146f

elementtree

Last change on this file since 460:76bc2317146f was 460:76bc2317146f, checked in by casties, 13 years ago
more renovation
File size: 34.2 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from AccessControl import ClassSecurityInfo
5	from AccessControl import getSecurityManager
6	from Globals import package_home
7
8	#from Ft.Xml import EMPTY_NAMESPACE, Parse
9	#import Ft.Xml.Domlette
10
11	import xml.etree.ElementTree as ET
12
13	import os.path
14	import sys
15	import urllib
16	import logging
17	import math
18	import urlparse
19	import re
20	import string
21
22	from SrvTxtUtils import getInt, getText, getHttpData
23
24	def logger(txt,method,txt2):
25	"""logging"""
26	logging.info(txt+ txt2)
27
28
29	def serializeNode(node, encoding="utf-8"):
30	"""returns a string containing node as XML"""
31	s = ET.tostring(node)
32
33	# 4Suite:
34	# stream = cStringIO.StringIO()
35	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
36	# s = stream.getvalue()
37	# stream.close()
38	return s
39
40	def browserCheck(self):
41	"""check the browsers request to find out the browser type"""
42	bt = {}
43	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
44	bt['ua'] = ua
45	bt['isIE'] = False
46	bt['isN4'] = False
47	bt['versFirefox']=""
48	bt['versIE']=""
49	bt['versSafariChrome']=""
50	bt['versOpera']=""
51
52	if string.find(ua, 'MSIE') > -1:
53	bt['isIE'] = True
54	else:
55	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
56	# Safari oder Chrome identification
57	try:
58	nav = ua[string.find(ua, '('):]
59	nav1=ua[string.find(ua,')'):]
60	nav2=nav1[string.find(nav1,'('):]
61	nav3=nav2[string.find(nav2,')'):]
62	ie = string.split(nav, "; ")[1]
63	ie1 =string.split(nav1, " ")[2]
64	ie2 =string.split(nav3, " ")[1]
65	ie3 =string.split(nav3, " ")[2]
66	if string.find(ie3, "Safari") >-1:
67	bt['versSafariChrome']=string.split(ie2, "/")[1]
68	except: pass
69	# IE identification
70	try:
71	nav = ua[string.find(ua, '('):]
72	ie = string.split(nav, "; ")[1]
73	if string.find(ie, "MSIE") > -1:
74	bt['versIE'] = string.split(ie, " ")[1]
75	except:pass
76	# Firefox identification
77	try:
78	nav = ua[string.find(ua, '('):]
79	nav1=ua[string.find(ua,')'):]
80	if string.find(ie1, "Firefox") >-1:
81	nav5= string.split(ie1, "/")[1]
82	logging.debug("FIREFOX: %s"%(nav5))
83	bt['versFirefox']=nav5[0:3]
84	except:pass
85	#Opera identification
86	try:
87	if string.find(ua,"Opera") >-1:
88	nav = ua[string.find(ua, '('):]
89	nav1=nav[string.find(nav,')'):]
90	bt['versOpera']=string.split(nav1,"/")[2]
91	except:pass
92
93	bt['isMac'] = string.find(ua, 'Macintosh') > -1
94	bt['isWin'] = string.find(ua, 'Windows') > -1
95	bt['isIEWin'] = bt['isIE'] and bt['isWin']
96	bt['isIEMac'] = bt['isIE'] and bt['isMac']
97	bt['staticHTML'] = False
98
99	return bt
100
101	def getParentDir(path):
102	"""returns pathname shortened by one"""
103	return '/'.join(path.split('/')[0:-1])
104
105	def normalizeBibtype(bt):
106	"""returns normalised bib type for looking up mappings"""
107	bt = bt.strip().replace(' ', '-').lower()
108	return bt
109
110	def getBibdataFromDom(dom):
111	"""returns dict with all elements from bib-tag"""
112	bibinfo = {}
113	bib = dom.find(".//meta/bib")
114	if bib is not None:
115	# put type in @type
116	type = bib.get('type')
117	bibinfo['@type'] = normalizeBibtype(type)
118	# put all subelements in dict
119	for e in bib:
120	bibinfo[e.tag] = getText(e)
121
122	return bibinfo
123
124	##
125	## documentViewer class
126	##
127	class documentViewer(Folder):
128	"""document viewer"""
129	meta_type="Document viewer"
130
131	security=ClassSecurityInfo()
132	manage_options=Folder.manage_options+(
133	{'label':'main config','action':'changeDocumentViewerForm'},
134	)
135
136	# templates and forms
137	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
138	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
139	toc_text = PageTemplateFile('zpt/toc_text', globals())
140	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
141	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
142	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
143	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
144	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
145	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
146	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
147	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
148	head_main = PageTemplateFile('zpt/head_main', globals())
149	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
150	info_xml = PageTemplateFile('zpt/info_xml', globals())
151
152
153	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
154	security.declareProtected('View management screens','changeDocumentViewerForm')
155	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
156
157
158	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
159	"""init document viewer"""
160	self.id=id
161	self.title=title
162	self.thumbcols = thumbcols
163	self.thumbrows = thumbrows
164	# authgroups is list of authorized groups (delimited by ,)
165	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
166	# create template folder so we can always use template.something
167
168	templateFolder = Folder('template')
169	#self['template'] = templateFolder # Zope-2.12 style
170	self._setObject('template',templateFolder) # old style
171	try:
172	import MpdlXmlTextServer
173	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
174	#templateFolder['fulltextclient'] = xmlRpcClient
175	templateFolder._setObject('fulltextclient',textServer)
176	except Exception, e:
177	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
178	try:
179	from Products.zogiLib.zogiLib import zogiLib
180	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
181	#templateFolder['zogilib'] = zogilib
182	templateFolder._setObject('zogilib',zogilib)
183	except Exception, e:
184	logging.error("Unable to create zogiLib for zogilib: "+str(e))
185
186
187	# proxy text server methods to fulltextclient
188	def getTextPage(self, **args):
189	"""get page"""
190	return self.template.fulltextclient.getTextPage(**args)
191
192	def getOrigPages(self, **args):
193	"""get page"""
194	return self.template.fulltextclient.getOrigPages(**args)
195
196	def getOrigPagesNorm(self, **args):
197	"""get page"""
198	return self.template.fulltextclient.getOrigPagesNorm(**args)
199
200	def getQuery(self, **args):
201	"""get query in search"""
202	return self.template.fulltextclient.getQuery(**args)
203
204	def getSearch(self, **args):
205	"""get search"""
206	return self.template.fulltextclient.getSearch(**args)
207
208	def getGisPlaces(self, **args):
209	"""get gis places"""
210	return self.template.fulltextclient.getGisPlaces(**args)
211
212	def getAllGisPlaces(self, **args):
213	"""get all gis places """
214	return self.template.fulltextclient.getAllGisPlaces(**args)
215
216	def getTranslate(self, **args):
217	"""get translate"""
218	return self.template.fulltextclient.getTranslate(**args)
219
220	def getLemma(self, **args):
221	"""get lemma"""
222	return self.template.fulltextclient.getLemma(**args)
223
224	def getLemmaQuery(self, **args):
225	"""get query"""
226	return self.template.fulltextclient.getLemmaQuery(**args)
227
228	def getLex(self, **args):
229	"""get lex"""
230	return self.template.fulltextclient.getLex(**args)
231
232	def getToc(self, **args):
233	"""get toc"""
234	return self.template.fulltextclient.getToc(**args)
235
236	def getTocPage(self, **args):
237	"""get tocpage"""
238	return self.template.fulltextclient.getTocPage(**args)
239
240
241	security.declareProtected('View','thumbs_rss')
242	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
243	'''
244	view it
245	@param mode: defines how to access the document behind url
246	@param url: url which contains display information
247	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
248
249	'''
250	logging.debug("HHHHHHHHHHHHHH:load the rss")
251	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
252
253	if not hasattr(self, 'template'):
254	# create template folder if it doesn't exist
255	self.manage_addFolder('template')
256
257	if not self.digilibBaseUrl:
258	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
259
260	docinfo = self.getDocinfo(mode=mode,url=url)
261	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
262	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
263	''' ZDES '''
264	pt = getattr(self.template, 'thumbs_main_rss')
265
266	if viewMode=="auto": # automodus gewaehlt
267	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
268	viewMode="text"
269	else:
270	viewMode="images"
271
272	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
273
274	security.declareProtected('View','index_html')
275	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
276	'''
277	view it
278	@param mode: defines how to access the document behind url
279	@param url: url which contains display information
280	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
281	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
282	@param characterNormalization type of text display (reg, norm, none)
283	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
284	'''
285
286	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
287
288	if not hasattr(self, 'template'):
289	# this won't work
290	logging.error("template folder missing!")
291	return "ERROR: template folder missing!"
292
293	if not getattr(self, 'digilibBaseUrl', None):
294	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
295
296	docinfo = self.getDocinfo(mode=mode,url=url)
297
298	if tocMode != "thumbs":
299	# get table of contents
300	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
301
302	# auto viewMode: text_dict if text else images
303	if viewMode=="auto":
304	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
305	#texturl gesetzt und textViewer konfiguriert
306	viewMode="text_dict"
307	else:
308	viewMode="images"
309
310	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, tocMode=tocMode)
311
312	if viewMode != 'images' and docinfo.get('textURLPath', None):
313	# get full text page
314	page = self.getTextPage(mode=viewMode, pn=pn, docinfo=docinfo, pageinfo=pageinfo)
315	pageinfo['textPage'] = page
316
317	# get template /template/viewer_main
318	pt = getattr(self.template, 'viewer_main')
319	# and execute with parameters
320	return pt(docinfo=docinfo, pageinfo=pageinfo, viewMode=viewMode, mk=self.generateMarks(mk))
321
322	def generateMarks(self,mk):
323	ret=""
324	if mk is None:
325	return ""
326	if not isinstance(mk, list):
327	mk=[mk]
328	for m in mk:
329	ret+="mk=%s"%m
330	return ret
331
332
333	def getBrowser(self):
334	"""getBrowser the version of browser """
335	bt = browserCheck(self)
336	logging.debug("BROWSER VERSION: %s"%(bt))
337	return bt
338
339	def findDigilibUrl(self):
340	"""try to get the digilib URL from zogilib"""
341	url = self.template.zogilib.getDLBaseUrl()
342	return url
343
344	def getDocumentViewerURL(self):
345	"""returns the URL of this instance"""
346	return self.absolute_url()
347
348	def getStyle(self, idx, selected, style=""):
349	"""returns a string with the given style and append 'sel' if path == selected."""
350	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
351	if idx == selected:
352	return style + 'sel'
353	else:
354	return style
355
356	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
357	"""returns URL to documentviewer with parameter param set to val or from dict params"""
358	# copy existing request params
359	urlParams=self.REQUEST.form.copy()
360	# change single param
361	if param is not None:
362	if val is None:
363	if urlParams.has_key(param):
364	del urlParams[param]
365	else:
366	urlParams[param] = str(val)
367
368	# change more params
369	if params is not None:
370	for k in params.keys():
371	v = params[k]
372	if v is None:
373	# val=None removes param
374	if urlParams.has_key(k):
375	del urlParams[k]
376
377	else:
378	urlParams[k] = v
379
380	# FIXME: does this belong here?
381	if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
382	urlParams["mode"] = "imagepath"
383	urlParams["url"] = getParentDir(urlParams["url"])
384
385	# quote values and assemble into query string (not escaping '/')
386	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
387	#ps = urllib.urlencode(urlParams)
388	if baseUrl is None:
389	baseUrl = self.REQUEST['URL1']
390
391	url = "%s?%s"%(baseUrl, ps)
392	return url
393
394
395	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
396	"""link to documentviewer with parameter param set to val"""
397	return self.getLink(param, val, params, baseUrl, '&')
398
399	def getInfo_xml(self,url,mode):
400	"""returns info about the document as XML"""
401
402	if not self.digilibBaseUrl:
403	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
404
405	docinfo = self.getDocinfo(mode=mode,url=url)
406	pt = getattr(self.template, 'info_xml')
407	return pt(docinfo=docinfo)
408
409	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
410	"""returns new option state"""
411	if not self.REQUEST.SESSION.has_key(optionName):
412	# not in session -- initial
413	opt = {'lastState': newState, 'state': initialState}
414	else:
415	opt = self.REQUEST.SESSION.get(optionName)
416	if opt['lastState'] != newState:
417	# state in session has changed -- toggle
418	opt['state'] = not opt['state']
419	opt['lastState'] = newState
420
421	self.REQUEST.SESSION[optionName] = opt
422	return opt['state']
423
424	def isAccessible(self, docinfo):
425	"""returns if access to the resource is granted"""
426	access = docinfo.get('accessType', None)
427	logging.debug("documentViewer (accessOK) access type %s"%access)
428	if access is not None and access == 'free':
429	logging.debug("documentViewer (accessOK) access is free")
430	return True
431	elif access is None or access in self.authgroups:
432	# only local access -- only logged in users
433	user = getSecurityManager().getUser()
434	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
435	if user is not None:
436	#print "user: ", user
437	return (user.getUserName() != "Anonymous User")
438	else:
439	return False
440
441	logging.error("documentViewer (accessOK) unknown access type %s"%access)
442	return False
443
444
445	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
446	"""gibt param von dlInfo aus"""
447	if docinfo is None:
448	docinfo = {}
449
450	for x in range(cut):
451	path=getParentDir(path)
452
453	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
454
455	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
456
457	txt = getHttpData(infoUrl)
458	if txt is None:
459	raise IOError("Unable to get dir-info from %s"%(infoUrl))
460
461	dom = ET.fromstring(txt)
462	#dom = Parse(txt)
463	size=getText(dom.find("size"))
464	#sizes=dom.xpath("//dir/size")
465	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)
466
467	if size:
468	docinfo['numPages'] = int(size)
469	else:
470	docinfo['numPages'] = 0
471
472	# TODO: produce and keep list of image names and numbers
473
474	return docinfo
475
476	def getIndexMetaPath(self,url):
477	"""gib nur den Pfad zurueck"""
478	regexp = re.compile(r".(experimental\|permanent)/(.)")
479	regpath = regexp.match(url)
480	if (regpath==None):
481	return ""
482	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
483	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
484
485
486
487	def getIndexMetaUrl(self,url):
488	"""returns utr of index.meta document at url"""
489
490	metaUrl = None
491	if url.startswith("http://"):
492	# real URL
493	metaUrl = url
494	else:
495	# online path
496	server=self.digilibBaseUrl+"/servlet/Texter?fn="
497	metaUrl=server+url.replace("/mpiwg/online","")
498	if not metaUrl.endswith("index.meta"):
499	metaUrl += "/index.meta"
500
501	return metaUrl
502
503	def getDomFromIndexMeta(self, url):
504	"""get dom from index meta"""
505	dom = None
506	metaUrl = self.getIndexMetaUrl(url)
507
508	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
509	txt=getHttpData(metaUrl)
510	if txt is None:
511	raise IOError("Unable to read index meta from %s"%(url))
512
513	dom = ET.fromstring(txt)
514	#dom = Parse(txt)
515	return dom
516
517	def getPresentationInfoXML(self, url):
518	"""returns dom of info.xml document at url"""
519	dom = None
520	metaUrl = None
521	if url.startswith("http://"):
522	# real URL
523	metaUrl = url
524	else:
525	# online path
526	server=self.digilibBaseUrl+"/servlet/Texter?fn="
527	metaUrl=server+url.replace("/mpiwg/online","")
528
529	txt=getHttpData(metaUrl)
530	if txt is None:
531	raise IOError("Unable to read infoXMLfrom %s"%(url))
532
533	dom = ET.fromstring(txt)
534	#dom = Parse(txt)
535	return dom
536
537
538	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
539	"""gets authorization info from the index.meta file at path or given by dom"""
540	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
541
542	access = None
543
544	if docinfo is None:
545	docinfo = {}
546
547	if dom is None:
548	for x in range(cut):
549	path=getParentDir(path)
550	dom = self.getDomFromIndexMeta(path)
551
552	acc = dom.find(".//access-conditions/access")
553	if acc is not None:
554	acctype = acc.get('type')
555	#acctype = dom.xpath("//access-conditions/access/@type")
556	if acctype:
557	access=acctype
558	if access in ['group', 'institution']:
559	access = dom.find(".//access-conditions/access/name").text.lower()
560
561	docinfo['accessType'] = access
562	return docinfo
563
564
565	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
566	"""gets bibliographical info from the index.meta file at path or given by dom"""
567	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
568
569	if docinfo is None:
570	docinfo = {}
571
572	if dom is None:
573	for x in range(cut):
574	path=getParentDir(path)
575	dom = self.getDomFromIndexMeta(path)
576
577	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
578
579	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
580	# put all raw bib fields in dict "bib"
581	bib = getBibdataFromDom(dom)
582	docinfo['bib'] = bib
583	bibtype = bib.get('@type', None)
584	docinfo['bib_type'] = bibtype
585	if bibtype:
586	# also store standard mapped metadata for convenience
587	try:
588	stdbib = self.metadata.getStdMappedHash(bib)
589	docinfo['std_bib'] = stdbib
590	docinfo['author'] = stdbib['author']
591	docinfo['title'] = stdbib['title']
592	docinfo['year'] = stdbib['year']
593	except:
594	pass
595
596	return docinfo
597
598
599	# TODO: is this needed?
600	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
601	"""gets name info from the index.meta file at path or given by dom"""
602	if docinfo is None:
603	docinfo = {}
604
605	if dom is None:
606	for x in range(cut):
607	path=getParentDir(path)
608	dom = self.getDomFromIndexMeta(path)
609
610	docinfo['name']=getText(dom.find("name"))
611	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
612	return docinfo
613
614	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
615	"""parse texttool tag in index meta"""
616	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
617	if docinfo is None:
618	docinfo = {}
619	if docinfo.get('lang', None) is None:
620	docinfo['lang'] = '' # default keine Sprache gesetzt
621	if dom is None:
622	dom = self.getDomFromIndexMeta(url)
623
624	archivePath = None
625	archiveName = None
626
627	archiveName = getText(dom.find("name"))
628	if not archiveName:
629	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
630
631	archivePath = getText(dom.find("archive-path"))
632	if archivePath:
633	# clean up archive path
634	if archivePath[0] != '/':
635	archivePath = '/' + archivePath
636	if archiveName and (not archivePath.endswith(archiveName)):
637	archivePath += "/" + archiveName
638	else:
639	# try to get archive-path from url
640	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
641	if (not url.startswith('http')):
642	archivePath = url.replace('index.meta', '')
643
644	if archivePath is None:
645	# we balk without archive-path
646	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
647
648	imageDir = getText(dom.find(".//texttool/image"))
649
650	if not imageDir:
651	# we balk with no image tag / not necessary anymore because textmode is now standard
652	#raise IOError("No text-tool info in %s"%(url))
653	imageDir = ""
654	#xquery="//pb"
655	docinfo['imagePath'] = "" # keine Bilder
656	docinfo['imageURL'] = ""
657
658	if imageDir and archivePath:
659	#print "image: ", imageDir, " archivepath: ", archivePath
660	imageDir = os.path.join(archivePath, imageDir)
661	imageDir = imageDir.replace("/mpiwg/online", '')
662	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
663	docinfo['imagePath'] = imageDir
664
665	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
666
667	viewerUrl = getText(dom.find(".//texttool/digiliburlprefix"))
668	if viewerUrl:
669	docinfo['viewerURL'] = viewerUrl
670
671	# old style text URL
672	textUrl = getText(dom.find(".//texttool/text"))
673	if textUrl:
674	if urlparse.urlparse(textUrl)[0] == "": #keine url
675	textUrl = os.path.join(archivePath, textUrl)
676	# fix URLs starting with /mpiwg/online
677	if textUrl.startswith("/mpiwg/online"):
678	textUrl = textUrl.replace("/mpiwg/online", '', 1)
679
680	docinfo['textURL'] = textUrl
681
682	# new style text-url-path
683	textUrl = getText(dom.find(".//texttool/text-url-path"))
684	if textUrl:
685	docinfo['textURLPath'] = textUrl
686	textUrlkurz = string.split(textUrl, ".")[0]
687	docinfo['textURLPathkurz'] = textUrlkurz
688	#if not docinfo['imagePath']:
689	# text-only, no page images
690	#docinfo = self.getNumTextPages(docinfo)
691
692
693	presentationUrl = getText(dom.find(".//texttool/presentation"))
694	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
695	# TODO: is this needed here?
696	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
697
698
699	if presentationUrl: # ueberschreibe diese durch presentation informationen
700	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
701	# durch den relativen Pfad auf die presentation infos
702	presentationPath = presentationUrl
703	if url.endswith("index.meta"):
704	presentationUrl = url.replace('index.meta', presentationPath)
705	else:
706	presentationUrl = url + "/" + presentationPath
707
708	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
709
710	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
711
712	return docinfo
713
714
715	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
716	"""gets the bibliographical information from the preseantion entry in texttools
717	"""
718	dom=self.getPresentationInfoXML(url)
719	docinfo['author']=getText(dom.find(".//author"))
720	docinfo['title']=getText(dom.find(".//title"))
721	docinfo['year']=getText(dom.find(".//date"))
722	return docinfo
723
724	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
725	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
726	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
727	if docinfo is None:
728	docinfo = {}
729	path=path.replace("/mpiwg/online","")
730	docinfo['imagePath'] = path
731	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
732
733	pathorig=path
734	for x in range(cut):
735	path=getParentDir(path)
736	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
737	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
738	docinfo['imageURL'] = imageUrl
739
740	#TODO: use getDocinfoFromIndexMeta
741	#path ist the path to the images it assumes that the index.meta file is one level higher.
742	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
743	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
744	return docinfo
745
746
747	def getDocinfo(self, mode, url):
748	"""returns docinfo depending on mode"""
749	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
750	# look for cached docinfo in session
751	if self.REQUEST.SESSION.has_key('docinfo'):
752	docinfo = self.REQUEST.SESSION['docinfo']
753	# check if its still current
754	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
755	logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())
756	return docinfo
757
758	# new docinfo
759	docinfo = {'mode': mode, 'url': url}
760	# add self url
761	docinfo['viewerUrl'] = self.getDocumentViewerURL()
762	if mode=="texttool":
763	# index.meta with texttool information
764	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
765	elif mode=="imagepath":
766	# folder with images, index.meta optional
767	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
768	elif mode=="filepath":
769	# filename
770	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
771	else:
772	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
773	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
774
775	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
776	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
777	# store in session
778	self.REQUEST.SESSION['docinfo'] = docinfo
779	return docinfo
780
781	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
782	"""returns pageinfo with the given parameters"""
783	pageinfo = {}
784	current = getInt(current)
785
786	pageinfo['current'] = current
787	rows = int(rows or self.thumbrows)
788	pageinfo['rows'] = rows
789	cols = int(cols or self.thumbcols)
790	pageinfo['cols'] = cols
791	grpsize = cols * rows
792	pageinfo['groupsize'] = grpsize
793	# what does this do?
794	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
795	# int(current / grpsize) * grpsize +1))
796	pageinfo['start'] = start
797	pageinfo['end'] = start + grpsize
798	if (docinfo is not None) and ('numPages' in docinfo):
799	np = int(docinfo['numPages'])
800	pageinfo['end'] = min(pageinfo['end'], np)
801	pageinfo['numgroups'] = int(np / grpsize)
802	if np % grpsize > 0:
803	pageinfo['numgroups'] += 1
804
805	pageinfo['viewMode'] = viewMode
806	pageinfo['tocMode'] = tocMode
807	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
808	#pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1')
809	pageinfo['query'] = self.REQUEST.get('query','')
810	pageinfo['queryType'] = self.REQUEST.get('queryType','')
811	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
812	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
813	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
814	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
815	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
816	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
817	# WTF?:
818	toc = int(pageinfo['tocPN'])
819	pageinfo['textPages'] =int(toc)
820
821	# What does this do?
822	if 'tocSize_%s'%tocMode in docinfo:
823	tocSize = int(docinfo['tocSize_%s'%tocMode])
824	tocPageSize = int(pageinfo['tocPageSize'])
825	# cached toc
826	if tocSize%tocPageSize>0:
827	tocPages=tocSize/tocPageSize+1
828	else:
829	tocPages=tocSize/tocPageSize
830
831	pageinfo['tocPN'] = min(tocPages,toc)
832
833	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
834	pageinfo['sn'] =self.REQUEST.get('sn','')
835	return pageinfo
836
837
838	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
839	"""init document viewer"""
840	self.title=title
841	self.digilibBaseUrl = digilibBaseUrl
842	self.thumbrows = thumbrows
843	self.thumbcols = thumbcols
844	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
845	if RESPONSE is not None:
846	RESPONSE.redirect('manage_main')
847
848	def manage_AddDocumentViewerForm(self):
849	"""add the viewer form"""
850	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
851	return pt()
852
853	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
854	"""add the viewer"""
855	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
856	self._setObject(id,newObj)
857
858	if RESPONSE is not None:
859	RESPONSE.redirect('manage_main')
860
861	## DocumentViewerTemplate class
862	class DocumentViewerTemplate(ZopePageTemplate):
863	"""Template for document viewer"""
864	meta_type="DocumentViewer Template"
865
866
867	def manage_addDocumentViewerTemplateForm(self):
868	"""Form for adding"""
869	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
870	return pt()
871
872	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
873	REQUEST=None, submit=None):
874	"Add a Page Template with optional file content."
875
876	self._setObject(id, DocumentViewerTemplate(id))
877	ob = getattr(self, id)
878	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
879	logging.info("txt %s:"%txt)
880	ob.pt_edit(txt,"text/html")
881	if title:
882	ob.pt_setTitle(title)
883	try:
884	u = self.DestinationURL()
885	except AttributeError:
886	u = REQUEST['URL1']
887
888	u = "%s/%s" % (u, urllib.quote(id))
889	REQUEST.RESPONSE.redirect(u+'/manage_main')
890	return ''
891
892
893

Note: See TracBrowser for help on using the repository browser.

Download in other formats: