Context Navigation

source: documentViewer/documentViewer.py @ 464:19bd41d95f62

elementtree

Last change on this file since 464:19bd41d95f62 was 464:19bd41d95f62, checked in by casties, 13 years ago
first version with new getdocinfo
File size: 41.4 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from AccessControl import ClassSecurityInfo
5	from AccessControl import getSecurityManager
6	from Globals import package_home
7
8	#from Ft.Xml import EMPTY_NAMESPACE, Parse
9	#import Ft.Xml.Domlette
10
11	import xml.etree.ElementTree as ET
12
13	import os.path
14	import sys
15	import urllib
16	import logging
17	import math
18	import urlparse
19	import re
20	import string
21
22	from SrvTxtUtils import getInt, getText, getHttpData
23
24	def logger(txt,method,txt2):
25	"""logging"""
26	logging.info(txt+ txt2)
27
28
29	def serializeNode(node, encoding="utf-8"):
30	"""returns a string containing node as XML"""
31	s = ET.tostring(node)
32
33	# 4Suite:
34	# stream = cStringIO.StringIO()
35	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
36	# s = stream.getvalue()
37	# stream.close()
38	return s
39
40	def browserCheck(self):
41	"""check the browsers request to find out the browser type"""
42	bt = {}
43	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
44	bt['ua'] = ua
45	bt['isIE'] = False
46	bt['isN4'] = False
47	bt['versFirefox']=""
48	bt['versIE']=""
49	bt['versSafariChrome']=""
50	bt['versOpera']=""
51
52	if string.find(ua, 'MSIE') > -1:
53	bt['isIE'] = True
54	else:
55	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
56	# Safari oder Chrome identification
57	try:
58	nav = ua[string.find(ua, '('):]
59	nav1=ua[string.find(ua,')'):]
60	nav2=nav1[string.find(nav1,'('):]
61	nav3=nav2[string.find(nav2,')'):]
62	ie = string.split(nav, "; ")[1]
63	ie1 =string.split(nav1, " ")[2]
64	ie2 =string.split(nav3, " ")[1]
65	ie3 =string.split(nav3, " ")[2]
66	if string.find(ie3, "Safari") >-1:
67	bt['versSafariChrome']=string.split(ie2, "/")[1]
68	except: pass
69	# IE identification
70	try:
71	nav = ua[string.find(ua, '('):]
72	ie = string.split(nav, "; ")[1]
73	if string.find(ie, "MSIE") > -1:
74	bt['versIE'] = string.split(ie, " ")[1]
75	except:pass
76	# Firefox identification
77	try:
78	nav = ua[string.find(ua, '('):]
79	nav1=ua[string.find(ua,')'):]
80	if string.find(ie1, "Firefox") >-1:
81	nav5= string.split(ie1, "/")[1]
82	logging.debug("FIREFOX: %s"%(nav5))
83	bt['versFirefox']=nav5[0:3]
84	except:pass
85	#Opera identification
86	try:
87	if string.find(ua,"Opera") >-1:
88	nav = ua[string.find(ua, '('):]
89	nav1=nav[string.find(nav,')'):]
90	bt['versOpera']=string.split(nav1,"/")[2]
91	except:pass
92
93	bt['isMac'] = string.find(ua, 'Macintosh') > -1
94	bt['isWin'] = string.find(ua, 'Windows') > -1
95	bt['isIEWin'] = bt['isIE'] and bt['isWin']
96	bt['isIEMac'] = bt['isIE'] and bt['isMac']
97	bt['staticHTML'] = False
98
99	return bt
100
101	def getParentPath(path, cnt=1):
102	"""returns pathname shortened by cnt"""
103	# make sure path doesn't end with /
104	path = path.rstrip('/')
105	# split by /, shorten, and reassemble
106	return '/'.join(path.split('/')[0:-cnt])
107
108
109	##
110	## documentViewer class
111	##
112	class documentViewer(Folder):
113	"""document viewer"""
114	meta_type="Document viewer"
115
116	security=ClassSecurityInfo()
117	manage_options=Folder.manage_options+(
118	{'label':'main config','action':'changeDocumentViewerForm'},
119	)
120
121	metadataService = None
122	"""MetaDataFolder instance"""
123
124	# templates and forms
125	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
126	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
127	toc_text = PageTemplateFile('zpt/toc_text', globals())
128	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
129	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
130	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
131	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
132	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
133	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
134	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
135	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
136	head_main = PageTemplateFile('zpt/head_main', globals())
137	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
138	info_xml = PageTemplateFile('zpt/info_xml', globals())
139
140
141	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
142
143
144	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
145	"""init document viewer"""
146	self.id=id
147	self.title=title
148	self.thumbcols = thumbcols
149	self.thumbrows = thumbrows
150	# authgroups is list of authorized groups (delimited by ,)
151	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
152	# create template folder so we can always use template.something
153
154	templateFolder = Folder('template')
155	#self['template'] = templateFolder # Zope-2.12 style
156	self._setObject('template',templateFolder) # old style
157	try:
158	import MpdlXmlTextServer
159	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
160	#templateFolder['fulltextclient'] = xmlRpcClient
161	templateFolder._setObject('fulltextclient',textServer)
162	except Exception, e:
163	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
164
165	try:
166	from Products.zogiLib.zogiLib import zogiLib
167	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
168	#templateFolder['zogilib'] = zogilib
169	templateFolder._setObject('zogilib',zogilib)
170	except Exception, e:
171	logging.error("Unable to create zogiLib for zogilib: "+str(e))
172
173	try:
174	# assume MetaDataFolder instance is called metadata
175	self.metadataService = getattr(self, 'metadata')
176	except Exception, e:
177	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
178
179
180	# proxy text server methods to fulltextclient
181	def getTextPage(self, **args):
182	"""get page"""
183	return self.template.fulltextclient.getTextPage(**args)
184
185	def getOrigPages(self, **args):
186	"""get page"""
187	return self.template.fulltextclient.getOrigPages(**args)
188
189	def getOrigPagesNorm(self, **args):
190	"""get page"""
191	return self.template.fulltextclient.getOrigPagesNorm(**args)
192
193	def getQuery(self, **args):
194	"""get query in search"""
195	return self.template.fulltextclient.getQuery(**args)
196
197	def getSearch(self, **args):
198	"""get search"""
199	return self.template.fulltextclient.getSearch(**args)
200
201	def getGisPlaces(self, **args):
202	"""get gis places"""
203	return self.template.fulltextclient.getGisPlaces(**args)
204
205	def getAllGisPlaces(self, **args):
206	"""get all gis places """
207	return self.template.fulltextclient.getAllGisPlaces(**args)
208
209	def getTranslate(self, **args):
210	"""get translate"""
211	return self.template.fulltextclient.getTranslate(**args)
212
213	def getLemma(self, **args):
214	"""get lemma"""
215	return self.template.fulltextclient.getLemma(**args)
216
217	def getLemmaQuery(self, **args):
218	"""get query"""
219	return self.template.fulltextclient.getLemmaQuery(**args)
220
221	def getLex(self, **args):
222	"""get lex"""
223	return self.template.fulltextclient.getLex(**args)
224
225	def getToc(self, **args):
226	"""get toc"""
227	return self.template.fulltextclient.getToc(**args)
228
229	def getTocPage(self, **args):
230	"""get tocpage"""
231	return self.template.fulltextclient.getTocPage(**args)
232
233
234	security.declareProtected('View','thumbs_rss')
235	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
236	'''
237	view it
238	@param mode: defines how to access the document behind url
239	@param url: url which contains display information
240	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
241
242	'''
243	logging.debug("HHHHHHHHHHHHHH:load the rss")
244	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
245
246	if not hasattr(self, 'template'):
247	# create template folder if it doesn't exist
248	self.manage_addFolder('template')
249
250	if not self.digilibBaseUrl:
251	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
252
253	docinfo = self.getDocinfo(mode=mode,url=url)
254	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
255	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
256	''' ZDES '''
257	pt = getattr(self.template, 'thumbs_main_rss')
258
259	if viewMode=="auto": # automodus gewaehlt
260	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
261	viewMode="text"
262	else:
263	viewMode="images"
264
265	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
266
267	security.declareProtected('View','index_html')
268	def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
269	"""
270	view it
271	@param mode: defines how to access the document behind url
272	@param url: url which contains display information
273	@param viewMode: if images display images, if text display text, default is auto (text,images or auto)
274	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
275	"""
276
277	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
278
279	if not hasattr(self, 'template'):
280	# this won't work
281	logging.error("template folder missing!")
282	return "ERROR: template folder missing!"
283
284	if not getattr(self, 'digilibBaseUrl', None):
285	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
286
287	docinfo = self.getDocinfo(mode=mode,url=url)
288
289	if tocMode != "thumbs":
290	# get table of contents
291	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
292
293	# auto viewMode: text_dict if text else images
294	if viewMode=="auto":
295	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
296	#texturl gesetzt und textViewer konfiguriert
297	viewMode="text_dict"
298	else:
299	viewMode="images"
300
301	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, tocMode=tocMode)
302
303	if viewMode != 'images' and docinfo.get('textURLPath', None):
304	# get full text page
305	page = self.getTextPage(mode=viewMode, pn=pn, docinfo=docinfo, pageinfo=pageinfo)
306	pageinfo['textPage'] = page
307
308	# get template /template/viewer_main
309	pt = getattr(self.template, 'viewer_main')
310	# and execute with parameters
311	return pt(docinfo=docinfo, pageinfo=pageinfo, viewMode=viewMode, mk=self.generateMarks(mk))
312
313	def generateMarks(self,mk):
314	ret=""
315	if mk is None:
316	return ""
317	if not isinstance(mk, list):
318	mk=[mk]
319	for m in mk:
320	ret+="mk=%s"%m
321	return ret
322
323
324	def getBrowser(self):
325	"""getBrowser the version of browser """
326	bt = browserCheck(self)
327	logging.debug("BROWSER VERSION: %s"%(bt))
328	return bt
329
330	def findDigilibUrl(self):
331	"""try to get the digilib URL from zogilib"""
332	url = self.template.zogilib.getDLBaseUrl()
333	return url
334
335	def getDocumentViewerURL(self):
336	"""returns the URL of this instance"""
337	return self.absolute_url()
338
339	def getStyle(self, idx, selected, style=""):
340	"""returns a string with the given style and append 'sel' if path == selected."""
341	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
342	if idx == selected:
343	return style + 'sel'
344	else:
345	return style
346
347	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
348	"""returns URL to documentviewer with parameter param set to val or from dict params"""
349	# copy existing request params
350	urlParams=self.REQUEST.form.copy()
351	# change single param
352	if param is not None:
353	if val is None:
354	if urlParams.has_key(param):
355	del urlParams[param]
356	else:
357	urlParams[param] = str(val)
358
359	# change more params
360	if params is not None:
361	for k in params.keys():
362	v = params[k]
363	if v is None:
364	# val=None removes param
365	if urlParams.has_key(k):
366	del urlParams[k]
367
368	else:
369	urlParams[k] = v
370
371	# FIXME: does this belong here?
372	if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
373	urlParams["mode"] = "imagepath"
374	urlParams["url"] = getParentPath(urlParams["url"])
375
376	# quote values and assemble into query string (not escaping '/')
377	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
378	#ps = urllib.urlencode(urlParams)
379	if baseUrl is None:
380	baseUrl = self.REQUEST['URL1']
381
382	url = "%s?%s"%(baseUrl, ps)
383	return url
384
385
386	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
387	"""link to documentviewer with parameter param set to val"""
388	return self.getLink(param, val, params, baseUrl, '&')
389
390	def getInfo_xml(self,url,mode):
391	"""returns info about the document as XML"""
392
393	if not self.digilibBaseUrl:
394	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
395
396	docinfo = self.getDocinfo(mode=mode,url=url)
397	pt = getattr(self.template, 'info_xml')
398	return pt(docinfo=docinfo)
399
400	def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
401	"""returns new option state"""
402	if not self.REQUEST.SESSION.has_key(optionName):
403	# not in session -- initial
404	opt = {'lastState': newState, 'state': initialState}
405	else:
406	opt = self.REQUEST.SESSION.get(optionName)
407	if opt['lastState'] != newState:
408	# state in session has changed -- toggle
409	opt['state'] = not opt['state']
410	opt['lastState'] = newState
411
412	self.REQUEST.SESSION[optionName] = opt
413	return opt['state']
414
415	def isAccessible(self, docinfo):
416	"""returns if access to the resource is granted"""
417	access = docinfo.get('accessType', None)
418	logging.debug("documentViewer (accessOK) access type %s"%access)
419	if access is not None and access == 'free':
420	logging.debug("documentViewer (accessOK) access is free")
421	return True
422	elif access is None or access in self.authgroups:
423	# only local access -- only logged in users
424	user = getSecurityManager().getUser()
425	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
426	if user is not None:
427	#print "user: ", user
428	return (user.getUserName() != "Anonymous User")
429	else:
430	return False
431
432	logging.error("documentViewer (accessOK) unknown access type %s"%access)
433	return False
434
435
436	def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
437	"""gibt param von dlInfo aus"""
438	if docinfo is None:
439	docinfo = {}
440
441	for x in range(cut):
442	path=getParentPath(path)
443
444	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
445
446	logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
447
448	txt = getHttpData(infoUrl)
449	if txt is None:
450	raise IOError("Unable to get dir-info from %s"%(infoUrl))
451
452	dom = ET.fromstring(txt)
453	#dom = Parse(txt)
454	size=getText(dom.find("size"))
455	#sizes=dom.xpath("//dir/size")
456	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)
457
458	if size:
459	docinfo['numPages'] = int(size)
460	else:
461	docinfo['numPages'] = 0
462
463	# TODO: produce and keep list of image names and numbers
464
465	return docinfo
466
467	def getIndexMetaPath(self,url):
468	"""gib nur den Pfad zurueck"""
469	regexp = re.compile(r".(experimental\|permanent)/(.)")
470	regpath = regexp.match(url)
471	if (regpath==None):
472	return ""
473	logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
474	return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
475
476
477
478	def getIndexMetaUrl(self,url):
479	"""returns utr of index.meta document at url"""
480
481	metaUrl = None
482	if url.startswith("http://"):
483	# real URL
484	metaUrl = url
485	else:
486	# online path
487	server=self.digilibBaseUrl+"/servlet/Texter?fn="
488	metaUrl=server+url.replace("/mpiwg/online","")
489	if not metaUrl.endswith("index.meta"):
490	metaUrl += "/index.meta"
491
492	return metaUrl
493
494	def getDomFromIndexMeta(self, url):
495	"""get dom from index meta"""
496	dom = None
497	metaUrl = self.getIndexMetaUrl(url)
498
499	logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
500	txt=getHttpData(metaUrl)
501	if txt is None:
502	raise IOError("Unable to read index meta from %s"%(url))
503
504	dom = ET.fromstring(txt)
505	#dom = Parse(txt)
506	return dom
507
508	def getPresentationInfoXML(self, url):
509	"""returns dom of info.xml document at url"""
510	dom = None
511	metaUrl = None
512	if url.startswith("http://"):
513	# real URL
514	metaUrl = url
515	else:
516	# online path
517	server=self.digilibBaseUrl+"/servlet/Texter?fn="
518	metaUrl=server+url.replace("/mpiwg/online","")
519
520	txt=getHttpData(metaUrl)
521	if txt is None:
522	raise IOError("Unable to read infoXMLfrom %s"%(url))
523
524	dom = ET.fromstring(txt)
525	#dom = Parse(txt)
526	return dom
527
528
529	def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
530	"""gets authorization info from the index.meta file at path or given by dom"""
531	logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
532
533	access = None
534
535	if docinfo is None:
536	docinfo = {}
537
538	if dom is None:
539	for x in range(cut):
540	path=getParentPath(path)
541	dom = self.getDomFromIndexMeta(path)
542
543	acc = dom.find(".//access-conditions/access")
544	if acc is not None:
545	acctype = acc.get('type')
546	#acctype = dom.xpath("//access-conditions/access/@type")
547	if acctype:
548	access=acctype
549	if access in ['group', 'institution']:
550	access = dom.find(".//access-conditions/access/name").text.lower()
551
552	docinfo['accessType'] = access
553	return docinfo
554
555
556	def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
557	"""gets bibliographical info from the index.meta file at path or given by dom"""
558	logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
559
560	if docinfo is None:
561	docinfo = {}
562
563	if dom is None:
564	for x in range(cut):
565	path=getParentDir(path)
566	dom = self.getDomFromIndexMeta(path)
567
568	docinfo['indexMetaPath']=self.getIndexMetaPath(path);
569
570	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
571	if self.metadataService is not None:
572	# put all raw bib fields in dict "bib"
573	bib = self.metadataService.getBibData(dom=dom)
574	docinfo['bib'] = bib
575	bibtype = bib.get('@type', None)
576	docinfo['bib_type'] = bibtype
577	# also store DC metadata for convenience
578	dc = self.metadataService.getDCMappedData(bib)
579	docinfo['creator'] = dc.get('creator',None)
580	docinfo['title'] = dc.get('title',None)
581	docinfo['date'] = dc.get('date',None)
582	else:
583	logging.error("MetadataService not found!")
584	return docinfo
585
586
587	# TODO: is this needed?
588	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
589	"""gets name info from the index.meta file at path or given by dom"""
590	if docinfo is None:
591	docinfo = {}
592
593	if dom is None:
594	for x in range(cut):
595	path=getParentPath(path)
596	dom = self.getDomFromIndexMeta(path)
597
598	docinfo['name']=getText(dom.find("name"))
599	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
600	return docinfo
601
602
603	def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
604	"""parse texttool tag in index meta"""
605	logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
606	if docinfo is None:
607	docinfo = {}
608	if docinfo.get('lang', None) is None:
609	docinfo['lang'] = '' # default keine Sprache gesetzt
610	if dom is None:
611	dom = self.getDomFromIndexMeta(url)
612
613	texttool = self.metadata.getTexttoolData(dom=dom)
614
615	archivePath = None
616	archiveName = None
617
618	archiveName = getText(dom.find("name"))
619	if not archiveName:
620	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
621
622	archivePath = getText(dom.find("archive-path"))
623	if archivePath:
624	# clean up archive path
625	if archivePath[0] != '/':
626	archivePath = '/' + archivePath
627	if archiveName and (not archivePath.endswith(archiveName)):
628	archivePath += "/" + archiveName
629	else:
630	# try to get archive-path from url
631	logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
632	if (not url.startswith('http')):
633	archivePath = url.replace('index.meta', '')
634
635	if archivePath is None:
636	# we balk without archive-path
637	raise IOError("Missing archive-path (for text-tool) in %s" % (url))
638
639	imageDir = texttool.get('image', None)
640
641	if not imageDir:
642	# we balk with no image tag / not necessary anymore because textmode is now standard
643	#raise IOError("No text-tool info in %s"%(url))
644	imageDir = ""
645	#xquery="//pb"
646	docinfo['imagePath'] = "" # keine Bilder
647	docinfo['imageURL'] = ""
648
649	if imageDir and archivePath:
650	#print "image: ", imageDir, " archivepath: ", archivePath
651	imageDir = os.path.join(archivePath, imageDir)
652	imageDir = imageDir.replace("/mpiwg/online", '')
653	docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
654	docinfo['imagePath'] = imageDir
655
656	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
657
658	viewerUrl = texttool.get('digiliburlprefix', None)
659	if viewerUrl:
660	docinfo['viewerURL'] = viewerUrl
661
662	# old style text URL
663	textUrl = texttool.get('text', None)
664	if textUrl:
665	if urlparse.urlparse(textUrl)[0] == "": #keine url
666	textUrl = os.path.join(archivePath, textUrl)
667	# fix URLs starting with /mpiwg/online
668	if textUrl.startswith("/mpiwg/online"):
669	textUrl = textUrl.replace("/mpiwg/online", '', 1)
670
671	docinfo['textURL'] = textUrl
672
673	# new style text-url-path
674	textUrl = texttool.get('text-url-path', None)
675	if textUrl:
676	docinfo['textURLPath'] = textUrl
677	textUrlkurz = string.split(textUrl, ".")[0]
678	docinfo['textURLPathkurz'] = textUrlkurz
679	#if not docinfo['imagePath']:
680	# text-only, no page images
681	#docinfo = self.getNumTextPages(docinfo)
682
683	# get bib info
684	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
685	# TODO: is this needed here?
686	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
687
688	# TODO: what to do with presentation?
689	presentationUrl = texttool.get('presentation', None)
690	if presentationUrl: # ueberschreibe diese durch presentation informationen
691	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
692	# durch den relativen Pfad auf die presentation infos
693	presentationPath = presentationUrl
694	if url.endswith("index.meta"):
695	presentationUrl = url.replace('index.meta', presentationPath)
696	else:
697	presentationUrl = url + "/" + presentationPath
698
699	docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
700
701	# get authorization
702	docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
703
704	return docinfo
705
706
707	def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
708	"""gets the bibliographical information from the preseantion entry in texttools
709	"""
710	dom=self.getPresentationInfoXML(url)
711	docinfo['author']=getText(dom.find(".//author"))
712	docinfo['title']=getText(dom.find(".//title"))
713	docinfo['year']=getText(dom.find(".//date"))
714	return docinfo
715
716	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
717	"""path ist the path to the images it assumes that the index.meta file is one level higher."""
718	logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
719	if docinfo is None:
720	docinfo = {}
721	path=path.replace("/mpiwg/online","")
722	docinfo['imagePath'] = path
723	docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
724
725	pathorig=path
726	for x in range(cut):
727	path=getParentPath(path)
728	logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
729	imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
730	docinfo['imageURL'] = imageUrl
731
732	#TODO: use getDocinfoFromIndexMeta
733	#path ist the path to the images it assumes that the index.meta file is one level higher.
734	docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
735	docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
736	return docinfo
737
738
739	def OLDgetDocinfo(self, mode, url):
740	"""returns docinfo depending on mode"""
741	logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
742	# look for cached docinfo in session
743	if self.REQUEST.SESSION.has_key('docinfo'):
744	docinfo = self.REQUEST.SESSION['docinfo']
745	# check if its still current
746	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
747	logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())
748	return docinfo
749
750	# new docinfo
751	docinfo = {'mode': mode, 'url': url}
752	# add self url
753	docinfo['viewerUrl'] = self.getDocumentViewerURL()
754	if mode=="texttool":
755	# index.meta with texttool information
756	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
757	elif mode=="imagepath":
758	# folder with images, index.meta optional
759	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
760	elif mode=="filepath":
761	# filename
762	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
763	else:
764	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
765	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
766
767	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
768	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
769	# store in session
770	self.REQUEST.SESSION['docinfo'] = docinfo
771	return docinfo
772
773
774	def getDocinfo(self, mode, url):
775	"""returns docinfo depending on mode"""
776	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
777	# look for cached docinfo in session
778	if self.REQUEST.SESSION.has_key('docinfo'):
779	docinfo = self.REQUEST.SESSION['docinfo']
780	# check if its still current
781	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
782	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
783	return docinfo
784
785	# new docinfo
786	docinfo = {'mode': mode, 'url': url}
787	# add self url
788	docinfo['viewerUrl'] = self.getDocumentViewerURL()
789	# get index.meta DOM
790	docUrl = None
791	metaDom = None
792	if mode=="texttool":
793	# url points to document dir or index.meta
794	metaDom = self.metadataService.getDomFromPathOrUrl(url)
795	docUrl = url.replace('/index.meta', '')
796	if metaDom is None:
797	raise IOError("Unable to find index.meta for mode=texttool!")
798
799	elif mode=="imagepath":
800	# url points to folder with images, index.meta optional
801	# asssume index.meta in parent dir
802	docUrl = getParentPath(url)
803	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
804
805	elif mode=="filepath":
806	# url points to image file, index.meta optional
807	# asssume index.meta is two path segments up
808	docUrl = getParentPath(url, 2)
809	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
810
811	else:
812	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
813	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
814
815	docinfo['documentUrl'] = docUrl
816	# process index.meta contents
817	if metaDom is not None:
818	# document directory name and path
819	resource = self.metadataService.getResourceData(dom=metaDom)
820	if resource:
821	docinfo = self.getDocinfoFromResource(docinfo, resource)
822
823	# texttool info
824	texttool = self.metadataService.getTexttoolData(dom=metaDom)
825	if texttool:
826	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
827
828	# bib info
829	bib = self.metadataService.getBibData(dom=metaDom)
830	if bib:
831	docinfo = self.getDocinfoFromBib(docinfo, bib)
832
833	# auth info
834	access = self.metadataService.getAccessData(dom=metaDom)
835	if access:
836	docinfo = self.getDocinfoFromAccess(docinfo, access)
837
838	# image path
839	if mode != 'texttool':
840	# override image path from texttool
841	docinfo['imagePath'] = url
842
843	# number of images from digilib
844	if docinfo.get('imagePath', None):
845	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
846	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
847
848	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
849	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
850	# store in session
851	self.REQUEST.SESSION['docinfo'] = docinfo
852	return docinfo
853
854	def getDocinfoFromResource(self, docinfo, resource):
855	"""reads contents of resource element into docinfo"""
856	docName = resource.get('name', None)
857	docinfo['documentName'] = docName
858	docPath = resource.get('archive-path', None)
859	if docPath:
860	# clean up document path
861	if docPath[0] != '/':
862	docPath = '/' + docPath
863
864	if docName and (not docPath.endswith(docName)):
865	docPath += "/" + docName
866
867	else:
868	# use docUrl as docPath
869	docUrl = docinfo['documentURL']
870	if not docUrl.startswith('http:'):
871	docPath = docUrl
872
873	docinfo['documentPath'] = docPath
874	return docinfo
875
876	def getDocinfoFromTexttool(self, docinfo, texttool):
877	"""reads contents of texttool element into docinfo"""
878	# image dir
879	imageDir = texttool.get('image', None)
880	docPath = docinfo.get('documentPath', None)
881	if imageDir and docPath:
882	#print "image: ", imageDir, " archivepath: ", archivePath
883	imageDir = os.path.join(docPath, imageDir)
884	imageDir = imageDir.replace('/mpiwg/online', '', 1)
885	docinfo['imagePath'] = imageDir
886
887	# old style text URL
888	textUrl = texttool.get('text', None)
889	if textUrl and docPath:
890	if urlparse.urlparse(textUrl)[0] == "": #keine url
891	textUrl = os.path.join(docPath, textUrl)
892	# fix URLs starting with /mpiwg/online
893	textUrl = textUrl.replace('/mpiwg/online', '', 1)
894
895	docinfo['textURL'] = textUrl
896
897	# new style text-url-path
898	textUrl = texttool.get('text-url-path', None)
899	if textUrl:
900	docinfo['textURLPath'] = textUrl
901	#TODO: ugly:
902	#textUrlkurz = string.split(textUrl, ".")[0]
903	#docinfo['textURLPathkurz'] = textUrlkurz
904
905	# old presentation stuff
906	presentation = texttool.get('presentation', None)
907	if presentation and docPath:
908	docinfo['presentationPath'] = os.path.join(docPath, presentation)
909
910	return docinfo
911
912	def getDocinfoFromBib(self, docinfo, bib):
913	"""reads contents of bib element into docinfo"""
914	# put all raw bib fields in dict "bib"
915	docinfo['bib'] = bib
916	bibtype = bib.get('@type', None)
917	docinfo['bibType'] = bibtype
918	# also store DC metadata for convenience
919	dc = self.metadataService.getDCMappedData(bib)
920	docinfo['creator'] = dc.get('creator',None)
921	docinfo['title'] = dc.get('title',None)
922	docinfo['date'] = dc.get('date',None)
923	return docinfo
924
925	def getDocinfoFromAccess(self, docinfo, acc):
926	"""reads contents of access element into docinfo"""
927	#TODO: also read resource type
928	try:
929	acctype = accc['@attr']['type']
930	if acctype:
931	access=acctype
932	if access in ['group', 'institution']:
933	access = acc['name'].lower()
934
935	docinfo['accessType'] = access
936
937	except:
938	pass
939
940	return docinfo
941
942	def getDocinfoFromDigilib(self, docinfo, path):
943	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
944	# fetch data
945	txt = getHttpData(infoUrl)
946	if not txt:
947	logging.error("Unable to get dir-info from %s"%(infoUrl))
948	return docinfo
949
950	dom = ET.fromstring(txt)
951	size = getText(dom.find("size"))
952	logging.debug("getDocinfoFromDigilib: size=%s"%size)
953	if size:
954	docinfo['numPages'] = int(size)
955	else:
956	docinfo['numPages'] = 0
957
958	# TODO: produce and keep list of image names and numbers
959	return docinfo
960
961
962	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
963	"""returns pageinfo with the given parameters"""
964	pageinfo = {}
965	current = getInt(current)
966
967	pageinfo['current'] = current
968	rows = int(rows or self.thumbrows)
969	pageinfo['rows'] = rows
970	cols = int(cols or self.thumbcols)
971	pageinfo['cols'] = cols
972	grpsize = cols * rows
973	pageinfo['groupsize'] = grpsize
974	# what does this do?
975	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
976	# int(current / grpsize) * grpsize +1))
977	pageinfo['start'] = start
978	pageinfo['end'] = start + grpsize
979	if (docinfo is not None) and ('numPages' in docinfo):
980	np = int(docinfo['numPages'])
981	pageinfo['end'] = min(pageinfo['end'], np)
982	pageinfo['numgroups'] = int(np / grpsize)
983	if np % grpsize > 0:
984	pageinfo['numgroups'] += 1
985
986	pageinfo['viewMode'] = viewMode
987	pageinfo['tocMode'] = tocMode
988	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
989	#pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1')
990	pageinfo['query'] = self.REQUEST.get('query','')
991	pageinfo['queryType'] = self.REQUEST.get('queryType','')
992	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
993	pageinfo['textPN'] = self.REQUEST.get('textPN','1')
994	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
995	pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
996	pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
997	pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
998	# WTF?:
999	toc = int(pageinfo['tocPN'])
1000	pageinfo['textPages'] =int(toc)
1001
1002	# What does this do?
1003	if 'tocSize_%s'%tocMode in docinfo:
1004	tocSize = int(docinfo['tocSize_%s'%tocMode])
1005	tocPageSize = int(pageinfo['tocPageSize'])
1006	# cached toc
1007	if tocSize%tocPageSize>0:
1008	tocPages=tocSize/tocPageSize+1
1009	else:
1010	tocPages=tocSize/tocPageSize
1011
1012	pageinfo['tocPN'] = min(tocPages,toc)
1013
1014	pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
1015	pageinfo['sn'] =self.REQUEST.get('sn','')
1016	return pageinfo
1017
1018
1019	security.declareProtected('View management screens','changeDocumentViewerForm')
1020	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1021
1022	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
1023	"""init document viewer"""
1024	self.title=title
1025	self.digilibBaseUrl = digilibBaseUrl
1026	self.thumbrows = thumbrows
1027	self.thumbcols = thumbcols
1028	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1029	try:
1030	# assume MetaDataFolder instance is called metadata
1031	self.metadataService = getattr(self, 'metadata')
1032	except Exception, e:
1033	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1034
1035	if RESPONSE is not None:
1036	RESPONSE.redirect('manage_main')
1037
1038	def manage_AddDocumentViewerForm(self):
1039	"""add the viewer form"""
1040	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1041	return pt()
1042
1043	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1044	"""add the viewer"""
1045	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1046	self._setObject(id,newObj)
1047
1048	if RESPONSE is not None:
1049	RESPONSE.redirect('manage_main')
1050
1051	## DocumentViewerTemplate class
1052	class DocumentViewerTemplate(ZopePageTemplate):
1053	"""Template for document viewer"""
1054	meta_type="DocumentViewer Template"
1055
1056
1057	def manage_addDocumentViewerTemplateForm(self):
1058	"""Form for adding"""
1059	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
1060	return pt()
1061
1062	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
1063	REQUEST=None, submit=None):
1064	"Add a Page Template with optional file content."
1065
1066	self._setObject(id, DocumentViewerTemplate(id))
1067	ob = getattr(self, id)
1068	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
1069	logging.info("txt %s:"%txt)
1070	ob.pt_edit(txt,"text/html")
1071	if title:
1072	ob.pt_setTitle(title)
1073	try:
1074	u = self.DestinationURL()
1075	except AttributeError:
1076	u = REQUEST['URL1']
1077
1078	u = "%s/%s" % (u, urllib.quote(id))
1079	REQUEST.RESPONSE.redirect(u+'/manage_main')
1080	return ''
1081
1082
1083

Note: See TracBrowser for help on using the repository browser.

Download in other formats: