Context Navigation

source: documentViewer/documentViewer.py @ 462:0d378e8ebcc3

elementtree

Last change on this file since 462:0d378e8ebcc3 was 462:0d378e8ebcc3, checked in by casties, 13 years ago
accomodate new MetaDataFolder?
File size: 34.4 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from AccessControl import ClassSecurityInfo
5	from AccessControl import getSecurityManager
6	from Globals import package_home
7
8	#from Ft.Xml import EMPTY_NAMESPACE, Parse
9	#import Ft.Xml.Domlette
10
11	import xml.etree.ElementTree as ET
12
13	import os.path
14	import sys
15	import urllib
16	import logging
17	import math
18	import urlparse
19	import re
20	import string
21
22	from SrvTxtUtils import getInt, getText, getHttpData
23
24	def logger(txt,method,txt2):
25	"""logging"""
26	logging.info(txt+ txt2)
27
28
29	def serializeNode(node, encoding="utf-8"):
30	"""returns a string containing node as XML"""
31	s = ET.tostring(node)
32
33	# 4Suite:
34	# stream = cStringIO.StringIO()
35	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
36	# s = stream.getvalue()
37	# stream.close()
38	return s
39
40	def browserCheck(self):
41	"""check the browsers request to find out the browser type"""
42	bt = {}
43	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
44	bt['ua'] = ua
45	bt['isIE'] = False
46	bt['isN4'] = False
47	bt['versFirefox']=""
48	bt['versIE']=""
49	bt['versSafariChrome']=""
50	bt['versOpera']=""
51
52	if string.find(ua, 'MSIE') > -1:
53	bt['isIE'] = True
54	else:
55	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
56	# Safari oder Chrome identification
57	try:
58	nav = ua[string.find(ua, '('):]
59	nav1=ua[string.find(ua,')'):]
60	nav2=nav1[string.find(nav1,'('):]
61	nav3=nav2[string.find(nav2,')'):]
62	ie = string.split(nav, "; ")[1]
63	ie1 =string.split(nav1, " ")[2]
64	ie2 =string.split(nav3, " ")[1]
65	ie3 =string.split(nav3, " ")[2]
66	if string.find(ie3, "Safari") >-1:
67	bt['versSafariChrome']=string.split(ie2, "/")[1]
68	except: pass
69	# IE identification
70	try:
71	nav = ua[string.find(ua, '('):]
72	ie = string.split(nav, "; ")[1]
73	if string.find(ie, "MSIE") > -1:
74	bt['versIE'] = string.split(ie, " ")[1]
75	except:pass
76	# Firefox identification
77	try:
78	nav = ua[string.find(ua, '('):]
79	nav1=ua[string.find(ua,')'):]
80	if string.find(ie1, "Firefox") >-1:
81	nav5= string.split(ie1, "/")[1]
82	logging.debug("FIREFOX: %s"%(nav5))
83	bt['versFirefox']=nav5[0:3]
84	except:pass
85	#Opera identification
86	try:
87	if string.find(ua,"Opera") >-1:
88	nav = ua[string.find(ua, '('):]
89	nav1=nav[string.find(nav,')'):]
90	bt['versOpera']=string.split(nav1,"/")[2]
91	except:pass
92
93	bt['isMac'] = string.find(ua, 'Macintosh') > -1
94	bt['isWin'] = string.find(ua, 'Windows') > -1
95	bt['isIEWin'] = bt['isIE'] and bt['isWin']
96	bt['isIEMac'] = bt['isIE'] and bt['isMac']
97	bt['staticHTML'] = False
98
99	return bt
100
101	def getParentDir(path):
102	"""returns pathname shortened by one"""
103	return '/'.join(path.split('/')[0:-1])
104
105	def normalizeBibField(bt, underscore=True):
106	"""returns normalised bib type for looking up mappings"""
107	bt = bt.strip().replace(' ', '-').lower()
108	if underscore:
109	bt = bt.replace('_', '-')
110
111	return bt
112
113	def getBibdataFromDom(dom):
114	"""returns dict with all elements from bib-tag"""
115	bibinfo = {}
116	bib = dom.find(".//meta/bib")
117	if bib is not None:
118	# put type in @type
119	type = bib.get('type')
120	bibinfo['@type'] = normalizeBibField(type)
121	# put all subelements in dict
122	for e in bib:
123	bibinfo[normalizeBibField(e.tag)] = getText(e)
124
125	return bibinfo
126
127
128	##
129	## documentViewer class
130	##
131	class documentViewer(Folder):
132	"""document viewer"""
133	meta_type="Document viewer"
134
135	security=ClassSecurityInfo()
136	manage_options=Folder.manage_options+(
137	{'label':'main config','action':'changeDocumentViewerForm'},
138	)
139
140	# templates and forms
141	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
142	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
143	toc_text = PageTemplateFile('zpt/toc_text', globals())
144	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
145	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
146	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
147	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
148	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
149	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
150	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
151	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
152	head_main = PageTemplateFile('zpt/head_main', globals())
153	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
154	info_xml = PageTemplateFile('zpt/info_xml', globals())
155
156
157	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
158	security.declareProtected('View management screens','changeDocumentViewerForm')
159	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
160
161
162	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
163	"""init document viewer"""
164	self.id=id
165	self.title=title
166	self.thumbcols = thumbcols
167	self.thumbrows = thumbrows
168	# authgroups is list of authorized groups (delimited by ,)
169	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
170	# create template folder so we can always use template.something
171
172	templateFolder = Folder('template')
173	#self['template'] = templateFolder # Zope-2.12 style
174	self._setObject('template',templateFolder) # old style
175	try:
176	import MpdlXmlTextServer
177	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
178	#templateFolder['fulltextclient'] = xmlRpcClient
179	templateFolder._setObject('fulltextclient',textServer)
180	except Exception, e:
181	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
182	try:
183	from Products.zogiLib.zogiLib import zogiLib
184	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
185	#templateFolder['zogilib'] = zogilib
186	templateFolder._setObject('zogilib',zogilib)
187	except Exception, e:
188	logging.error("Unable to create zogiLib for zogilib: "+str(e))
189
190
191	# proxy text server methods to fulltextclient
192	def getTextPage(self, **args):
193	"""get page"""
194	return self.template.fulltextclient.getTextPage(**args)
195
196	def getOrigPages(self, **args):
197	"""get page"""
198	return self.template.fulltextclient.getOrigPages(**args)
199
200	def getOrigPagesNorm(self, **args):
201	"""get page"""
202	return self.template.fulltextclient.getOrigPagesNorm(**args)
203
204	def getQuery(self, **args):
205	"""get query in search"""
206	return self.template.fulltextclient.getQuery(**args)
207
208	def getSearch(self, **args):
209	"""get search"""
210	return self.template.fulltextclient.getSearch(**args)
211
212	def getGisPlaces(self, **args):
213	"""get gis places"""
214	return self.template.fulltextclient.getGisPlaces(**args)
215
216	def getAllGisPlaces(self, **args):
217	"""get all gis places """
218	return self.template.fulltextclient.getAllGisPlaces(**args)
219
220	def getTranslate(self, **args):
221	"""get translate"""
222	return self.template.fulltextclient.getTranslate(**args)
223
224	def getLemma(self, **args):
225	"""get lemma"""
226	return self.template.fulltextclient.getLemma(**args)
227
228	def getLemmaQuery(self, **args):
229	"""get query"""
230	return self.template.fulltextclient.getLemmaQuery(**args)
231
232	def getLex(self, **args):
233	"""get lex"""
234	return self.template.fulltextclient.getLex(**args)
235
236	def getToc(self, **args):
237	"""get toc"""
238	return self.template.fulltextclient.getToc(**args)
239
240	def getTocPage(self, **args):
241	"""get tocpage"""
242	return self.template.fulltextclient.getTocPage(**args)
243
244
245	security.declareProtected('View','thumbs_rss')
246	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
247	'''
248	view it
249	@param mode: defines how to access the document behind url
250	@param url: url which contains display information
251	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
252
253	'''
254	logging.debug("HHHHHHHHHHHHHH:load the rss")
255	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
256
257	if not hasattr(self, 'template'):
258	# create template folder if it doesn't exist
259	self.manage_addFolder('template')
260
261	if not self.digilibBaseUrl:
262	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
263
264	docinfo = self.getDocinfo(mode=mode,url=url)
265	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
266	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
267	''' ZDES '''
268	pt = getattr(self.template, 'thumbs_main_rss')
269
270	if viewMode=="auto": # automodus gewaehlt
271	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
272	viewMode="text"
273	else:
274	viewMode="images"
275
276	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
277
278	security.declareProtected('View','index_html')
279	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
280	'''
281	view it
282	@param mode: defines how to access the document behind url
283	@param url: url which contains display information
284	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
285	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
286	@param characterNormalization type of text display (reg, norm, none)
287	@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
288	'''
289
290	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
291
292	if not hasattr(self, 'template'):
293	# this won't work
294	logging.error("template folder missing!")
295	return "ERROR: template folder missing!"
296
297	if not getattr(self, 'digilibBaseUrl', None):
298	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
299
300	docinfo = self.getDocinfo(mode=mode,url=url)
301
302	if tocMode != "thumbs":
303	# get table of contents
304	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
305
306	# auto viewMode: text_dict if text else images
307	if viewMode=="auto":
308	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
309	#texturl gesetzt und textViewer konfiguriert
310	viewMode="text_dict"
311	else:
312	viewMode="images"
313
314	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, tocMode=tocMode)
315
316	if viewMode != 'images' and docinfo.get('textURLPath', None):
317	# get full text page
318	page = self.getTextPage(mode=viewMode, pn=pn, docinfo=docinfo, pageinfo=pageinfo)
319	pageinfo['textPage'] = page
320
321	# get template /template/viewer_main
322	pt = getattr(self.template, 'viewer_main')
323	# and execute with parameters
324	return pt(docinfo=docinfo, pageinfo=pageinfo, viewMode=viewMode, mk=self.generateMarks(mk))
325
326	def generateMarks(self,mk):
327	ret=""
328	if mk is None:
329	return ""
330	if not isinstance(mk, list):
331	mk=[mk]
332	for m in mk:
333	ret+="mk=%s"%m
334	return ret
335
336
337	def getBrowser(self):
338	"""getBrowser the version of browser """
339	bt = browserCheck(self)
340	logging.debug("BROWSER VERSION: %s"%(bt))
341	return bt
342
343	def findDigilibUrl(self):
344	"""try to get the digilib URL from zogilib"""
345	url = self.template.zogilib.getDLBaseUrl()
346	return url
347
348	def getDocumentViewerURL(self):
349	"""returns the URL of this instance"""
350	return self.absolute_url()
351
352	def getStyle(self, idx, selected, style=""):
353	"""returns a string with the given style and append 'sel' if path == selected."""
354	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
355	if idx == selected:
356	return style + 'sel'
357	else:
358	return style
359
360	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
361	"""returns URL to documentviewer with parameter param set to val or from dict params"""
362	# copy existing request params
363	urlParams=self.REQUEST.form.copy()
364	# change single param
365	if param is not None:
366	if val is None:
367	if urlParams.has_key(param):
368	del urlParams[param]
369	else:
370	urlParams[param] = str(val)
371
372	# change more params
373	if params is not None:
374	for k in params.keys():
375	v = params[k]
376	if v is None:
377	# val=None removes param
378	if urlParams.has_key(k):
379	del urlParams[k]
380
381	else:
382	urlParams[k] = v
383
384	# FIXME: does this belong here?
385	if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
386	urlParams["mode"] = "imagepath"
387	urlParams["url"] = getParentDir(urlParams["url"])
388
389	# quote values and assemble into query string (not escaping '/')
390	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
391	#ps = urllib.urlencode(urlParams)
392	if baseUrl is None:
393	baseUrl = self.REQUEST['URL1']
394
395	url = "%s?%s"%(baseUrl, ps)
396	return url
397
398
399	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
400	"""link to documentviewer with parameter param set to val"""
401	return self.getLink(param, val, params, baseUrl, '&')
402
403	def getInfo_xml(self,url,mode):
404	"""returns info about the document as XML"""
405
406	if not self.digilibBaseUrl:
407	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
408
409	docinfo = self.getDocinfo(mode=mode,url=url)
410	pt = getattr(self.template, 'info_xml')
411	return pt(docinfo=docinfo)
412
413	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
414	"""returns new option state"""
415	if not self.REQUEST.SESSION.has_key(optionName):
416	# not in session -- initial
417	opt = {'lastState': newState, 'state': initialState}
418	else:
419	opt = self.REQUEST.SESSION.get(optionName)
420	if opt['lastState'] != newState:
421	# state in session has changed -- toggle
422	opt['state'] = not opt['state']
423	opt['lastState'] = newState
424
425	self.REQUEST.SESSION[optionName] = opt
426	return opt['state']
427
428	def isAccessible(self, docinfo):
429	"""returns if access to the resource is granted"""
430	access = docinfo.get('accessType', None)
431	logging.debug("documentViewer (accessOK) access type %s"%access)
432	if access is not None and access == 'free':
433	logging.debug("documentViewer (accessOK) access is free")
434	return True
435	elif access is None or access in self.authgroups:
436	# only local access -- only logged in users
437	user = getSecurityManager().getUser()
438	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
439	if user is not None:
440	#print "user: ", user
441	return (user.getUserName() != "Anonymous User")
442	else:
443	return False
444
445	logging.error("documentViewer (accessOK) unknown access type %s"%access)
446	return False
447
448
449	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
450	"""gibt param von dlInfo aus"""
451	if docinfo is None:
452	docinfo = {}
453
454	for x in range(cut):
455	path=getParentDir(path)
456
457	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
458
459	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
460
461	txt = getHttpData(infoUrl)
462	if txt is None:
463	raise IOError("Unable to get dir-info from %s"%(infoUrl))
464
465	dom = ET.fromstring(txt)
466	#dom = Parse(txt)
467	size=getText(dom.find("size"))
468	#sizes=dom.xpath("//dir/size")
469	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)
470
471	if size:
472	docinfo['numPages'] = int(size)
473	else:
474	docinfo['numPages'] = 0
475
476	# TODO: produce and keep list of image names and numbers
477
478	return docinfo
479
480	def getIndexMetaPath(self,url):
481	"""gib nur den Pfad zurueck"""
482	regexp = re.compile(r".(experimental\|permanent)/(.)")
483	regpath = regexp.match(url)
484	if (regpath==None):
485	return ""
486	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
487	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
488
489
490
491	def getIndexMetaUrl(self,url):
492	"""returns utr of index.meta document at url"""
493
494	metaUrl = None
495	if url.startswith("http://"):
496	# real URL
497	metaUrl = url
498	else:
499	# online path
500	server=self.digilibBaseUrl+"/servlet/Texter?fn="
501	metaUrl=server+url.replace("/mpiwg/online","")
502	if not metaUrl.endswith("index.meta"):
503	metaUrl += "/index.meta"
504
505	return metaUrl
506
507	def getDomFromIndexMeta(self, url):
508	"""get dom from index meta"""
509	dom = None
510	metaUrl = self.getIndexMetaUrl(url)
511
512	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
513	txt=getHttpData(metaUrl)
514	if txt is None:
515	raise IOError("Unable to read index meta from %s"%(url))
516
517	dom = ET.fromstring(txt)
518	#dom = Parse(txt)
519	return dom
520
521	def getPresentationInfoXML(self, url):
522	"""returns dom of info.xml document at url"""
523	dom = None
524	metaUrl = None
525	if url.startswith("http://"):
526	# real URL
527	metaUrl = url
528	else:
529	# online path
530	server=self.digilibBaseUrl+"/servlet/Texter?fn="
531	metaUrl=server+url.replace("/mpiwg/online","")
532
533	txt=getHttpData(metaUrl)
534	if txt is None:
535	raise IOError("Unable to read infoXMLfrom %s"%(url))
536
537	dom = ET.fromstring(txt)
538	#dom = Parse(txt)
539	return dom
540
541
542	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
543	"""gets authorization info from the index.meta file at path or given by dom"""
544	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
545
546	access = None
547
548	if docinfo is None:
549	docinfo = {}
550
551	if dom is None:
552	for x in range(cut):
553	path=getParentDir(path)
554	dom = self.getDomFromIndexMeta(path)
555
556	acc = dom.find(".//access-conditions/access")
557	if acc is not None:
558	acctype = acc.get('type')
559	#acctype = dom.xpath("//access-conditions/access/@type")
560	if acctype:
561	access=acctype
562	if access in ['group', 'institution']:
563	access = dom.find(".//access-conditions/access/name").text.lower()
564
565	docinfo['accessType'] = access
566	return docinfo
567
568
569	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
570	"""gets bibliographical info from the index.meta file at path or given by dom"""
571	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
572
573	if docinfo is None:
574	docinfo = {}
575
576	if dom is None:
577	for x in range(cut):
578	path=getParentDir(path)
579	dom = self.getDomFromIndexMeta(path)
580
581	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
582
583	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
584	# try to get MetaDataFolder
585	metadata = getattr(self, 'metadata')
586	if metadata is not None:
587	# put all raw bib fields in dict "bib"
588	bib = metadata.getBibdataFromDom(dom)
589	docinfo['bib'] = bib
590	bibtype = bib.get('@type', None)
591	docinfo['bib_type'] = bibtype
592	# also store DC metadata for convenience
593	dc = metadata.getDCMappedData(bib)
594	docinfo['creator'] = dc.get('creator',None)
595	docinfo['title'] = dc.get('title',None)
596	docinfo['date'] = dc.get('date',None)
597	else:
598	logging.error("MetaDataFolder 'metadata' not found!")
599	#TODO: remove
600	bib = getBibdataFromDom(dom)
601	return docinfo
602
603
604	# TODO: is this needed?
605	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
606	"""gets name info from the index.meta file at path or given by dom"""
607	if docinfo is None:
608	docinfo = {}
609
610	if dom is None:
611	for x in range(cut):
612	path=getParentDir(path)
613	dom = self.getDomFromIndexMeta(path)
614
615	docinfo['name']=getText(dom.find("name"))
616	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
617	return docinfo
618
619	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
620	"""parse texttool tag in index meta"""
621	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
622	if docinfo is None:
623	docinfo = {}
624	if docinfo.get('lang', None) is None:
625	docinfo['lang'] = '' # default keine Sprache gesetzt
626	if dom is None:
627	dom = self.getDomFromIndexMeta(url)
628
629	archivePath = None
630	archiveName = None
631
632	archiveName = getText(dom.find("name"))
633	if not archiveName:
634	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
635
636	archivePath = getText(dom.find("archive-path"))
637	if archivePath:
638	# clean up archive path
639	if archivePath[0] != '/':
640	archivePath = '/' + archivePath
641	if archiveName and (not archivePath.endswith(archiveName)):
642	archivePath += "/" + archiveName
643	else:
644	# try to get archive-path from url
645	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
646	if (not url.startswith('http')):
647	archivePath = url.replace('index.meta', '')
648
649	if archivePath is None:
650	# we balk without archive-path
651	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
652
653	imageDir = getText(dom.find(".//texttool/image"))
654
655	if not imageDir:
656	# we balk with no image tag / not necessary anymore because textmode is now standard
657	#raise IOError("No text-tool info in %s"%(url))
658	imageDir = ""
659	#xquery="//pb"
660	docinfo['imagePath'] = "" # keine Bilder
661	docinfo['imageURL'] = ""
662
663	if imageDir and archivePath:
664	#print "image: ", imageDir, " archivepath: ", archivePath
665	imageDir = os.path.join(archivePath, imageDir)
666	imageDir = imageDir.replace("/mpiwg/online", '')
667	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
668	docinfo['imagePath'] = imageDir
669
670	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
671
672	viewerUrl = getText(dom.find(".//texttool/digiliburlprefix"))
673	if viewerUrl:
674	docinfo['viewerURL'] = viewerUrl
675
676	# old style text URL
677	textUrl = getText(dom.find(".//texttool/text"))
678	if textUrl:
679	if urlparse.urlparse(textUrl)[0] == "": #keine url
680	textUrl = os.path.join(archivePath, textUrl)
681	# fix URLs starting with /mpiwg/online
682	if textUrl.startswith("/mpiwg/online"):
683	textUrl = textUrl.replace("/mpiwg/online", '', 1)
684
685	docinfo['textURL'] = textUrl
686
687	# new style text-url-path
688	textUrl = getText(dom.find(".//texttool/text-url-path"))
689	if textUrl:
690	docinfo['textURLPath'] = textUrl
691	textUrlkurz = string.split(textUrl, ".")[0]
692	docinfo['textURLPathkurz'] = textUrlkurz
693	#if not docinfo['imagePath']:
694	# text-only, no page images
695	#docinfo = self.getNumTextPages(docinfo)
696
697
698	presentationUrl = getText(dom.find(".//texttool/presentation"))
699	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
700	# TODO: is this needed here?
701	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
702
703
704	if presentationUrl: # ueberschreibe diese durch presentation informationen
705	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
706	# durch den relativen Pfad auf die presentation infos
707	presentationPath = presentationUrl
708	if url.endswith("index.meta"):
709	presentationUrl = url.replace('index.meta', presentationPath)
710	else:
711	presentationUrl = url + "/" + presentationPath
712
713	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
714
715	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
716
717	return docinfo
718
719
720	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
721	"""gets the bibliographical information from the preseantion entry in texttools
722	"""
723	dom=self.getPresentationInfoXML(url)
724	docinfo['author']=getText(dom.find(".//author"))
725	docinfo['title']=getText(dom.find(".//title"))
726	docinfo['year']=getText(dom.find(".//date"))
727	return docinfo
728
729	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
730	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
731	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
732	if docinfo is None:
733	docinfo = {}
734	path=path.replace("/mpiwg/online","")
735	docinfo['imagePath'] = path
736	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
737
738	pathorig=path
739	for x in range(cut):
740	path=getParentDir(path)
741	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
742	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
743	docinfo['imageURL'] = imageUrl
744
745	#TODO: use getDocinfoFromIndexMeta
746	#path ist the path to the images it assumes that the index.meta file is one level higher.
747	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
748	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
749	return docinfo
750
751
752	def getDocinfo(self, mode, url):
753	"""returns docinfo depending on mode"""
754	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
755	# look for cached docinfo in session
756	if self.REQUEST.SESSION.has_key('docinfo'):
757	docinfo = self.REQUEST.SESSION['docinfo']
758	# check if its still current
759	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
760	logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())
761	return docinfo
762
763	# new docinfo
764	docinfo = {'mode': mode, 'url': url}
765	# add self url
766	docinfo['viewerUrl'] = self.getDocumentViewerURL()
767	if mode=="texttool":
768	# index.meta with texttool information
769	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
770	elif mode=="imagepath":
771	# folder with images, index.meta optional
772	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
773	elif mode=="filepath":
774	# filename
775	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
776	else:
777	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
778	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
779
780	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
781	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
782	# store in session
783	self.REQUEST.SESSION['docinfo'] = docinfo
784	return docinfo
785
786	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
787	"""returns pageinfo with the given parameters"""
788	pageinfo = {}
789	current = getInt(current)
790
791	pageinfo['current'] = current
792	rows = int(rows or self.thumbrows)
793	pageinfo['rows'] = rows
794	cols = int(cols or self.thumbcols)
795	pageinfo['cols'] = cols
796	grpsize = cols * rows
797	pageinfo['groupsize'] = grpsize
798	# what does this do?
799	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
800	# int(current / grpsize) * grpsize +1))
801	pageinfo['start'] = start
802	pageinfo['end'] = start + grpsize
803	if (docinfo is not None) and ('numPages' in docinfo):
804	np = int(docinfo['numPages'])
805	pageinfo['end'] = min(pageinfo['end'], np)
806	pageinfo['numgroups'] = int(np / grpsize)
807	if np % grpsize > 0:
808	pageinfo['numgroups'] += 1
809
810	pageinfo['viewMode'] = viewMode
811	pageinfo['tocMode'] = tocMode
812	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
813	#pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1')
814	pageinfo['query'] = self.REQUEST.get('query','')
815	pageinfo['queryType'] = self.REQUEST.get('queryType','')
816	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
817	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
818	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
819	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
820	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
821	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
822	# WTF?:
823	toc = int(pageinfo['tocPN'])
824	pageinfo['textPages'] =int(toc)
825
826	# What does this do?
827	if 'tocSize_%s'%tocMode in docinfo:
828	tocSize = int(docinfo['tocSize_%s'%tocMode])
829	tocPageSize = int(pageinfo['tocPageSize'])
830	# cached toc
831	if tocSize%tocPageSize>0:
832	tocPages=tocSize/tocPageSize+1
833	else:
834	tocPages=tocSize/tocPageSize
835
836	pageinfo['tocPN'] = min(tocPages,toc)
837
838	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
839	pageinfo['sn'] =self.REQUEST.get('sn','')
840	return pageinfo
841
842
843	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
844	"""init document viewer"""
845	self.title=title
846	self.digilibBaseUrl = digilibBaseUrl
847	self.thumbrows = thumbrows
848	self.thumbcols = thumbcols
849	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
850	if RESPONSE is not None:
851	RESPONSE.redirect('manage_main')
852
853	def manage_AddDocumentViewerForm(self):
854	"""add the viewer form"""
855	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
856	return pt()
857
858	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
859	"""add the viewer"""
860	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
861	self._setObject(id,newObj)
862
863	if RESPONSE is not None:
864	RESPONSE.redirect('manage_main')
865
866	## DocumentViewerTemplate class
867	class DocumentViewerTemplate(ZopePageTemplate):
868	"""Template for document viewer"""
869	meta_type="DocumentViewer Template"
870
871
872	def manage_addDocumentViewerTemplateForm(self):
873	"""Form for adding"""
874	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
875	return pt()
876
877	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
878	REQUEST=None, submit=None):
879	"Add a Page Template with optional file content."
880
881	self._setObject(id, DocumentViewerTemplate(id))
882	ob = getattr(self, id)
883	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
884	logging.info("txt %s:"%txt)
885	ob.pt_edit(txt,"text/html")
886	if title:
887	ob.pt_setTitle(title)
888	try:
889	u = self.DestinationURL()
890	except AttributeError:
891	u = REQUEST['URL1']
892
893	u = "%s/%s" % (u, urllib.quote(id))
894	REQUEST.RESPONSE.redirect(u+'/manage_main')
895	return ''
896
897
898

Note: See TracBrowser for help on using the repository browser.

Download in other formats: