Context Navigation

source: documentViewer/documentViewer.py @ 561:9255acc4518d

Last change on this file since 561:9255acc4518d was 561:9255acc4518d, checked in by casties, 12 years ago
CLOSED - # 256: display texts from different backends (sandbox) https://it-dev.mpiwg-berlin.mpg.de/tracs/mpdl-project-software/ticket/256
File size: 41.6 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def getMDText(node):
37	"""returns the @text content from the MetaDataProvider metadata node"""
38	if isinstance(node, dict):
39	return node.get('@text', None)
40
41	return node
42
43	def browserCheck(self):
44	"""check the browsers request to find out the browser type"""
45	bt = {}
46	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
47	bt['ua'] = ua
48	bt['isIE'] = False
49	bt['isN4'] = False
50	bt['versFirefox']=""
51	bt['versIE']=""
52	bt['versSafariChrome']=""
53	bt['versOpera']=""
54
55	if string.find(ua, 'MSIE') > -1:
56	bt['isIE'] = True
57	else:
58	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
59	# Safari oder Chrome identification
60	try:
61	nav = ua[string.find(ua, '('):]
62	nav1=ua[string.find(ua,')'):]
63	nav2=nav1[string.find(nav1,'('):]
64	nav3=nav2[string.find(nav2,')'):]
65	ie = string.split(nav, "; ")[1]
66	ie1 =string.split(nav1, " ")[2]
67	ie2 =string.split(nav3, " ")[1]
68	ie3 =string.split(nav3, " ")[2]
69	if string.find(ie3, "Safari") >-1:
70	bt['versSafariChrome']=string.split(ie2, "/")[1]
71	except: pass
72	# IE identification
73	try:
74	nav = ua[string.find(ua, '('):]
75	ie = string.split(nav, "; ")[1]
76	if string.find(ie, "MSIE") > -1:
77	bt['versIE'] = string.split(ie, " ")[1]
78	except:pass
79	# Firefox identification
80	try:
81	nav = ua[string.find(ua, '('):]
82	nav1=ua[string.find(ua,')'):]
83	if string.find(ie1, "Firefox") >-1:
84	nav5= string.split(ie1, "/")[1]
85	logging.debug("FIREFOX: %s"%(nav5))
86	bt['versFirefox']=nav5[0:3]
87	except:pass
88	#Opera identification
89	try:
90	if string.find(ua,"Opera") >-1:
91	nav = ua[string.find(ua, '('):]
92	nav1=nav[string.find(nav,')'):]
93	bt['versOpera']=string.split(nav1,"/")[2]
94	except:pass
95
96	bt['isMac'] = string.find(ua, 'Macintosh') > -1
97	bt['isWin'] = string.find(ua, 'Windows') > -1
98	bt['isIEWin'] = bt['isIE'] and bt['isWin']
99	bt['isIEMac'] = bt['isIE'] and bt['isMac']
100	bt['staticHTML'] = False
101
102	return bt
103
104	def getParentPath(path, cnt=1):
105	"""returns pathname shortened by cnt"""
106	# make sure path doesn't end with /
107	path = path.rstrip('/')
108	# split by /, shorten, and reassemble
109	return '/'.join(path.split('/')[0:-cnt])
110
111	##
112	## documentViewer class
113	##
114	class documentViewer(Folder):
115	"""document viewer"""
116	meta_type="Document viewer"
117
118	security=ClassSecurityInfo()
119	manage_options=Folder.manage_options+(
120	{'label':'Configuration','action':'changeDocumentViewerForm'},
121	)
122
123	metadataService = None
124	"""MetaDataFolder instance"""
125
126
127	#
128	# templates and forms
129	#
130	# viewMode templates
131	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
132	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
133	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
134	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
135	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
136	# available layer types (annotator not default)
137	builtinLayers = {'text': ['dict','search','gis'],
138	'xml': None, 'images': None, 'index': ['extended']}
139	availableLayers = builtinLayers;
140	# layer templates
141	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
142	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
143	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
144	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
145	layer_text_pundit = PageTemplateFile('zpt/layer_text_pundit', globals())
146	layer_index_extended = PageTemplateFile('zpt/layer_index_extended', globals())
147	# toc templates
148	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
149	toc_text = PageTemplateFile('zpt/toc_text', globals())
150	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
151	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
152	toc_none = PageTemplateFile('zpt/toc_none', globals())
153	# other templates
154	common_template = PageTemplateFile('zpt/common_template', globals())
155	info_xml = PageTemplateFile('zpt/info_xml', globals())
156	docuviewer_css = ImageFile('css/docuviewer.css',globals())
157	# make docuviewer_css refreshable for development
158	docuviewer_css.index_html = refreshingImageFileIndexHtml
159	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
160	# make docuviewer_ie_css refreshable for development
161	docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
162	jquery_js = ImageFile('js/jquery.js',globals())
163
164
165	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
166	"""init document viewer"""
167	self.id=id
168	self.title=title
169	self.thumbcols = thumbcols
170	self.thumbrows = thumbrows
171	# authgroups is list of authorized groups (delimited by ,)
172	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
173	# create template folder so we can always use template.something
174
175	templateFolder = Folder('template')
176	self['template'] = templateFolder # Zope-2.12 style
177	#self._setObject('template',templateFolder) # old style
178	try:
179	import MpdlXmlTextServer
180	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
181	templateFolder['fulltextclient'] = textServer
182	#templateFolder._setObject('fulltextclient',textServer)
183	except Exception, e:
184	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
185
186	try:
187	from Products.zogiLib.zogiLib import zogiLib
188	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
189	templateFolder['zogilib'] = zogilib
190	#templateFolder._setObject('zogilib',zogilib)
191	except Exception, e:
192	logging.error("Unable to create zogiLib for zogilib: "+str(e))
193
194	try:
195	# assume MetaDataFolder instance is called metadata
196	self.metadataService = getattr(self, 'metadata')
197	except Exception, e:
198	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
199
200	if digilibBaseUrl is not None:
201	self.digilibBaseUrl = digilibBaseUrl
202
203
204	# proxy text server methods to fulltextclient
205	def getTextPage(self, **args):
206	"""returns full text content of page"""
207	return self.template.fulltextclient.getTextPage(**args)
208
209	def getSearchResults(self, **args):
210	"""loads list of search results and stores XML in docinfo"""
211	return self.template.fulltextclient.getSearchResults(**args)
212
213	def getResultsPage(self, **args):
214	"""returns one page of the search results"""
215	return self.template.fulltextclient.getResultsPage(**args)
216
217	def getTextInfo(self, **args):
218	"""returns document info from the text server"""
219	return self.template.fulltextclient.getTextInfo(**args)
220
221	def getToc(self, **args):
222	"""loads table of contents and stores XML in docinfo"""
223	return self.template.fulltextclient.getToc(**args)
224
225	def getTocPage(self, **args):
226	"""returns one page of the table of contents"""
227	return self.template.fulltextclient.getTocPage(**args)
228
229	def getRepositoryType(self, **args):
230	"""get repository type"""
231	return self.template.fulltextclient.getRepositoryType(**args)
232
233	def getTextDownloadUrl(self, **args):
234	"""get list of gis places on one page"""
235	return self.template.fulltextclient.getTextDownloadUrl(**args)
236
237	def getPlacesOnPage(self, **args):
238	"""get list of gis places on one page"""
239	return self.template.fulltextclient.getPlacesOnPage(**args)
240
241	# Thumb list for CoolIris Plugin
242	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
243	security.declareProtected('View','thumbs_rss')
244	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
245	'''
246	view it
247	@param mode: defines how to access the document behind url
248	@param url: url which contains display information
249	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
250
251	'''
252
253	if not hasattr(self, 'template'):
254	# create template folder if it doesn't exist
255	self.manage_addFolder('template')
256
257	if not self.digilibBaseUrl:
258	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
259
260	docinfo = self.getDocinfo(mode=mode,url=url)
261	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
262	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
263	''' ZDES '''
264	pt = getattr(self.template, 'thumbs_main_rss')
265
266	if viewMode=="auto": # automodus gewaehlt
267	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
268	viewMode="text"
269	else:
270	viewMode="images"
271
272	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
273
274
275	security.declareProtected('View','index_html')
276	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode=None,start=1,pn=1):
277	"""
278	show page
279	@param url: url which contains display information
280	@param mode: defines how to access the document behind url
281	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
282	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
283	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
284	"""
285
286	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
287
288	if not hasattr(self, 'template'):
289	# this won't work
290	logging.error("template folder missing!")
291	return "ERROR: template folder missing!"
292
293	if not getattr(self, 'digilibBaseUrl', None):
294	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
295
296	# mode=filepath should not have toc-thumbs
297	if tocMode is None:
298	if mode == "filepath":
299	tocMode = "none"
300	else:
301	tocMode = "thumbs"
302
303	# docinfo: information about document (cached)
304	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
305
306	# userinfo: user settings (cached)
307	userinfo = self.getUserinfo()
308
309	# auto viewMode: text if there is a text else images
310	if viewMode=="auto":
311	if docinfo.get('textURLPath', None):
312	# docinfo.get('textURL', None) not implemented yet
313	viewMode = "text"
314	if viewLayer is None and 'viewLayer' not in userinfo:
315	# use layer dict as default
316	viewLayer = "dict"
317	else:
318	viewMode = "images"
319
320	elif viewMode == "text_dict":
321	# legacy fix
322	viewMode = "text"
323	viewLayer = "dict"
324
325	# safe viewLayer in userinfo
326	userinfo['viewLayer'] = viewLayer
327
328	# pageinfo: information about page (not cached)
329	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
330
331	# get template /template/viewer_$viewMode
332	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
333	if pt is None:
334	logging.error("No template for viewMode=%s!"%viewMode)
335	# TODO: error page?
336	return "No template for viewMode=%s!"%viewMode
337
338	# and execute with parameters
339	return pt(docinfo=docinfo, pageinfo=pageinfo)
340
341	def getAvailableLayers(self):
342	"""returns dict with list of available layers per viewMode"""
343	return self.availableLayers
344
345	def getBrowser(self):
346	"""getBrowser the version of browser """
347	bt = browserCheck(self)
348	logging.debug("BROWSER VERSION: %s"%(bt))
349	return bt
350
351	def findDigilibUrl(self):
352	"""try to get the digilib URL from zogilib"""
353	url = self.template.zogilib.getDLBaseUrl()
354	return url
355
356	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
357	"""returns URL to digilib Scaler with params"""
358	url = None
359	if docinfo is not None:
360	url = docinfo.get('imageURL', None)
361
362	if url is None:
363	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
364	if fn is None and docinfo is not None:
365	fn = docinfo.get('imagePath','')
366
367	url += "fn=%s"%fn
368
369	if pn:
370	url += "&pn=%s"%pn
371
372	url += "&dw=%s&dh=%s"%(dw,dh)
373	return url
374
375	def getDocumentViewerURL(self):
376	"""returns the URL of this instance"""
377	return self.absolute_url()
378
379	def getStyle(self, idx, selected, style=""):
380	"""returns a string with the given style and append 'sel' if idx == selected."""
381	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
382	if idx == selected:
383	return style + 'sel'
384	else:
385	return style
386
387	def getParams(self, param=None, val=None, params=None, duplicates=None):
388	"""returns dict with URL parameters.
389
390	Takes URL parameters and additionally param=val or dict params.
391	Deletes key if value is None."""
392	# copy existing request params
393	newParams=self.REQUEST.form.copy()
394	# change single param
395	if param is not None:
396	if val is None:
397	if newParams.has_key(param):
398	del newParams[param]
399	else:
400	newParams[param] = str(val)
401
402	# change more params
403	if params is not None:
404	for (k, v) in params.items():
405	if v is None:
406	# val=None removes param
407	if newParams.has_key(k):
408	del newParams[k]
409
410	else:
411	newParams[k] = v
412
413	if duplicates:
414	# eliminate lists (coming from duplicate keys)
415	for (k,v) in newParams.items():
416	if isinstance(v, list):
417	if duplicates == 'comma':
418	# make comma-separated list of non-empty entries
419	newParams[k] = ','.join([t for t in v if t])
420	elif duplicates == 'first':
421	# take first non-empty entry
422	newParams[k] = [t for t in v if t][0]
423
424	return newParams
425
426	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
427	"""returns URL to documentviewer with parameter param set to val or from dict params"""
428	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
429	# quote values and assemble into query string (not escaping '/')
430	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
431	if baseUrl is None:
432	baseUrl = self.getDocumentViewerURL()
433
434	url = "%s?%s"%(baseUrl, ps)
435	return url
436
437	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
438	"""link to documentviewer with parameter param set to val"""
439	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
440
441
442	def setAvailableLayers(self, newLayerString=None):
443	"""sets availableLayers to newLayerString or tries to autodetect available layers.
444	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
445	newLayerString is parsed as JSON."""
446	if newLayerString is not None:
447	try:
448	layers = json.loads(newLayerString)
449	if 'text' in layers and 'images' in layers:
450	self.availableLayers = layers
451	return
452	except:
453	pass
454
455	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
456
457	# start with builtin layers
458	self.availableLayers = self.builtinLayers.copy()
459	# add layers from templates
460	for t in self.template:
461	if t.startswith('layer_'):
462	try:
463	(x, m, l) = t.split('_', 3)
464	if m not in self.availableLayers:
465	# mode m doesn't exist -> new list
466	self.availableLayers[m] = [l]
467
468	else:
469	# m exists -> append
470	if l not in self.availableLayers[m]:
471	self.availableLayers[m].append()
472
473	except:
474	pass
475
476	def getAvailableLayersJson(self):
477	"""returns available layers as JSON string."""
478	return json.dumps(self.availableLayers)
479
480
481	def getInfo_xml(self,url,mode):
482	"""returns info about the document as XML"""
483	if not self.digilibBaseUrl:
484	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
485
486	docinfo = self.getDocinfo(mode=mode,url=url)
487	pt = getattr(self.template, 'info_xml')
488	return pt(docinfo=docinfo)
489
490	def getAuthenticatedUser(self, anon=None):
491	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
492	user = getSecurityManager().getUser()
493	if user is not None and user.getUserName() != "Anonymous User":
494	return user
495	else:
496	return anon
497
498	def isAccessible(self, docinfo):
499	"""returns if access to the resource is granted"""
500	access = docinfo.get('accessType', None)
501	logging.debug("documentViewer (accessOK) access type %s"%access)
502	if access == 'free':
503	logging.debug("documentViewer (accessOK) access is free")
504	return True
505
506	elif access is None or access in self.authgroups:
507	# only local access -- only logged in users
508	user = self.getAuthenticatedUser()
509	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
510	return (user is not None)
511
512	logging.error("documentViewer (accessOK) unknown access type %s"%access)
513	return False
514
515
516	def getUserinfo(self):
517	"""returns userinfo object"""
518	logging.debug("getUserinfo")
519	userinfo = {}
520	# look for cached userinfo in session
521	if self.REQUEST.SESSION.has_key('userinfo'):
522	userinfo = self.REQUEST.SESSION['userinfo']
523	# check if its still current?
524	else:
525	# store in session
526	self.REQUEST.SESSION['userinfo'] = userinfo
527
528	return userinfo
529
530	def getDocinfo(self, mode, url, tocMode=None):
531	"""returns docinfo depending on mode"""
532	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
533	# look for cached docinfo in session
534	if self.REQUEST.SESSION.has_key('docinfo'):
535	docinfo = self.REQUEST.SESSION['docinfo']
536	# check if its still current
537	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
538	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
539	return docinfo
540
541	# new docinfo
542	docinfo = {'mode': mode, 'url': url}
543	# add self url
544	docinfo['viewerUrl'] = self.getDocumentViewerURL()
545	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
546	# get index.meta DOM
547	docUrl = None
548	metaDom = None
549	if mode=="texttool":
550	# url points to document dir or index.meta
551	metaDom = self.metadataService.getDomFromPathOrUrl(url)
552	docUrl = url.replace('/index.meta', '')
553	if metaDom is None:
554	raise IOError("Unable to find index.meta for mode=texttool!")
555
556	elif mode=="imagepath":
557	# url points to folder with images, index.meta optional
558	# asssume index.meta in parent dir
559	docUrl = getParentPath(url)
560	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
561
562	elif mode=="filepath":
563	# url points to image file, index.meta optional
564	docinfo['imagePath'] = url
565	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + url
566	docinfo['numPages'] = 1
567	# asssume index.meta is two path segments up
568	docUrl = getParentPath(url, 2)
569	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
570
571	else:
572	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
573	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
574
575	docinfo['documentUrl'] = docUrl
576	# process index.meta contents
577	if metaDom is not None and metaDom.tag == 'resource':
578	# document directory name and path
579	resource = self.metadataService.getResourceData(dom=metaDom)
580	if resource:
581	docinfo = self.getDocinfoFromResource(docinfo, resource)
582
583	# texttool info
584	texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
585	if texttool:
586	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
587	# document info (including toc) from full text
588	if docinfo.get('textURLPath', None):
589	docinfo = self.getTextInfo(mode=tocMode, docinfo=docinfo)
590
591	# bib info
592	bib = self.metadataService.getBibData(dom=metaDom)
593	if bib:
594	# save extended version as 'bibx' TODO: ugly
595	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
596	if len(bibx) == 1:
597	# unwrap list if possible
598	bibx = bibx[0]
599
600	docinfo['bibx'] = bibx
601	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
602	else:
603	# no bib - try info.xml
604	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
605
606	# auth info
607	access = self.metadataService.getAccessData(dom=metaDom)
608	if access:
609	docinfo = self.getDocinfoFromAccess(docinfo, access)
610
611	# attribution info
612	attribution = self.metadataService.getAttributionData(dom=metaDom)
613	if attribution:
614	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
615	docinfo['attribution'] = attribution
616
617	# copyright info
618	copyright = self.metadataService.getCopyrightData(dom=metaDom)
619	if copyright:
620	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
621	docinfo['copyright'] = copyright
622
623	# DRI (permanent ID)
624	dri = self.metadataService.getDRI(dom=metaDom, type='escidoc')
625	if dri:
626	logging.debug("getDRI: dri=%s"%repr(dri))
627	docinfo['DRI'] = dri
628
629	# image path
630	if mode != 'texttool':
631	# override image path from texttool with url parameter TODO: how about mode=auto?
632	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
633
634	# check numPages
635	if docinfo.get('numPages', 0) == 0:
636	# number of images from digilib
637	if docinfo.get('imagePath', None):
638	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
639	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
640	else:
641	# imagePath still missing? try "./pageimg"
642	imgPath = os.path.join(docUrl, 'pageimg')
643	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
644	if docinfo.get('numPages', 0) > 0:
645	# there are pages
646	docinfo['imagePath'] = imgPath
647	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
648
649	# check numPages
650	if docinfo.get('numPages', 0) == 0:
651	if docinfo.get('numTextPages', 0) > 0:
652	# replace with numTextPages (text-only?)
653	docinfo['numPages'] = docinfo['numTextPages']
654
655	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
656	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
657	# store in session
658	self.REQUEST.SESSION['docinfo'] = docinfo
659	return docinfo
660
661
662	def getDocinfoFromResource(self, docinfo, resource):
663	"""reads contents of resource element into docinfo"""
664	docName = resource.get('name', None)
665	docinfo['documentName'] = docName
666	docPath = resource.get('archive-path', None)
667	if docPath:
668	# clean up document path
669	if docPath[0] != '/':
670	docPath = '/' + docPath
671
672	if docName and (not docPath.endswith(docName)):
673	docPath += "/" + docName
674
675	else:
676	# use docUrl as docPath
677	docUrl = docinfo['documentURL']
678	if not docUrl.startswith('http:'):
679	docPath = docUrl
680	if docPath:
681	# fix URLs starting with /mpiwg/online
682	docPath = docPath.replace('/mpiwg/online', '', 1)
683
684	docinfo['documentPath'] = docPath
685	return docinfo
686
687	def getDocinfoFromTexttool(self, docinfo, texttool):
688	"""reads contents of texttool element into docinfo"""
689	logging.debug("texttool=%s"%repr(texttool))
690	# unpack list if necessary
691	if isinstance(texttool, list):
692	texttool = texttool[0]
693
694	# image dir
695	imageDir = getMDText(texttool.get('image', None))
696	docPath = getMDText(docinfo.get('documentPath', None))
697	if imageDir and docPath:
698	#print "image: ", imageDir, " archivepath: ", archivePath
699	imageDir = os.path.join(docPath, imageDir)
700	imageDir = imageDir.replace('/mpiwg/online', '', 1)
701	docinfo['imagePath'] = imageDir
702
703	# old style text URL
704	textUrl = getMDText(texttool.get('text', None))
705	if textUrl and docPath:
706	if urlparse.urlparse(textUrl)[0] == "": #keine url
707	textUrl = os.path.join(docPath, textUrl)
708
709	docinfo['textURL'] = textUrl
710
711	# new style text-url-path (can be more than one with "repository" attribute)
712	textUrlNode = texttool.get('text-url-path', None)
713	if not isinstance(textUrlNode, list):
714	textUrlNode = [textUrlNode]
715
716	for tun in textUrlNode:
717	textUrl = getMDText(tun)
718	if textUrl:
719	textUrlAtts = tun.get('@attr')
720	if (textUrlAtts and 'repository' in textUrlAtts):
721	textRepo = textUrlAtts['repository']
722	# use matching repository
723	if self.getRepositoryType() == textRepo:
724	docinfo['textURLPath'] = textUrl
725	docinfo['textURLRepository'] = textRepo
726
727	else:
728	# no repo attribute - use always
729	docinfo['textURLPath'] = textUrl
730
731	# page flow
732	docinfo['pageFlow'] = getMDText(texttool.get('page-flow', 'ltr'))
733
734	# odd pages are left
735	docinfo['oddPage'] = getMDText(texttool.get('odd-scan-position', 'left'))
736
737	# number of title page (default 1)
738	docinfo['titlePage'] = getMDText(texttool.get('title-scan-no', 1))
739
740	# old presentation stuff
741	presentation = getMDText(texttool.get('presentation', None))
742	if presentation and docPath:
743	if presentation.startswith('http:'):
744	docinfo['presentationUrl'] = presentation
745	else:
746	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
747
748	return docinfo
749
750	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
751	"""reads contents of bib element into docinfo"""
752	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
753	# put all raw bib fields in dict "bib"
754	docinfo['bib'] = bib
755	bibtype = bib.get('@type', None)
756	docinfo['bibType'] = bibtype
757	# also store DC metadata for convenience
758	dc = self.metadataService.getDCMappedData(bib)
759	docinfo['creator'] = dc.get('creator','')
760	docinfo['title'] = dc.get('title','')
761	docinfo['date'] = dc.get('date','')
762	return docinfo
763
764	def getDocinfoFromAccess(self, docinfo, acc):
765	"""reads contents of access element into docinfo"""
766	#TODO: also read resource type
767	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
768	try:
769	acctype = acc['@attr']['type']
770	if acctype:
771	access=acctype
772	if access in ['group', 'institution']:
773	access = acc['name'].lower()
774
775	docinfo['accessType'] = access
776
777	except:
778	pass
779
780	return docinfo
781
782	def getDocinfoFromDigilib(self, docinfo, path):
783	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
784	# fetch data
785	txt = getHttpData(infoUrl)
786	if not txt:
787	logging.error("Unable to get dir-info from %s"%(infoUrl))
788	return docinfo
789
790	dom = ET.fromstring(txt)
791	size = getText(dom.find("size"))
792	logging.debug("getDocinfoFromDigilib: size=%s"%size)
793	if size:
794	docinfo['numPages'] = int(size)
795	else:
796	docinfo['numPages'] = 0
797
798	# TODO: produce and keep list of image names and numbers
799	return docinfo
800
801
802	def getDocinfoFromPresentationInfoXml(self,docinfo):
803	"""gets DC-like bibliographical information from the presentation entry in texttools"""
804	url = docinfo.get('presentationUrl', None)
805	if not url:
806	logging.error("getDocinfoFromPresentation: no URL!")
807	return docinfo
808
809	dom = None
810	metaUrl = None
811	if url.startswith("http://"):
812	# real URL
813	metaUrl = url
814	else:
815	# online path
816
817	server=self.digilibBaseUrl+"/servlet/Texter?fn="
818	metaUrl=server+url
819
820	txt=getHttpData(metaUrl)
821	if txt is None:
822	logging.error("Unable to read info.xml from %s"%(url))
823	return docinfo
824
825	dom = ET.fromstring(txt)
826	docinfo['creator']=getText(dom.find(".//author"))
827	docinfo['title']=getText(dom.find(".//title"))
828	docinfo['date']=getText(dom.find(".//date"))
829	return docinfo
830
831
832	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
833	"""returns pageinfo with the given parameters"""
834	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
835	pageinfo = {}
836	pageinfo['viewMode'] = viewMode
837	# split viewLayer if necessary
838	if isinstance(viewLayer,basestring):
839	viewLayer = viewLayer.split(',')
840
841	if isinstance(viewLayer, list):
842	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
843	# save (unique) list in viewLayers
844	seen = set()
845	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
846	pageinfo['viewLayers'] = viewLayers
847	# stringify viewLayer
848	viewLayer = ','.join(viewLayers)
849	else:
850	#create list
851	pageinfo['viewLayers'] = [viewLayer]
852
853	pageinfo['viewLayer'] = viewLayer
854	pageinfo['tocMode'] = tocMode
855
856	# TODO: unify current and pn!
857	current = getInt(current)
858	pageinfo['current'] = current
859	pageinfo['pn'] = current
860	rows = int(rows or self.thumbrows)
861	pageinfo['rows'] = rows
862	cols = int(cols or self.thumbcols)
863	pageinfo['cols'] = cols
864	grpsize = cols * rows
865	pageinfo['groupsize'] = grpsize
866	# is start is empty use one around current
867	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
868	# int(current / grpsize) * grpsize +1))
869	pageinfo['start'] = start
870	# get number of pages
871	np = int(docinfo.get('numPages', 0))
872	if np == 0:
873	# try numTextPages
874	np = docinfo.get('numTextPages', 0)
875	if np != 0:
876	docinfo['numPages'] = np
877
878	# cache table of contents
879	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
880	pageinfo['numgroups'] = int(np / grpsize)
881	if np % grpsize > 0:
882	pageinfo['numgroups'] += 1
883
884	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
885	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
886	# add zeroth page for two columns
887	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
888	pageinfo['pageZero'] = pageZero
889	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
890	# more page parameters
891	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
892	if docinfo.get('pageNumbers'):
893	# get original page numbers
894	pageNumber = docinfo['pageNumbers'].get(current, None)
895	if pageNumber is not None:
896	pageinfo['pageNumberOrig'] = pageNumber['no']
897	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
898
899	# cache search results
900	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
901	query = self.REQUEST.get('query',None)
902	pageinfo['query'] = query
903	if query:
904	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
905	pageinfo['queryType'] = queryType
906	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
907	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
908
909	# highlighting
910	highlightQuery = self.REQUEST.get('highlightQuery', None)
911	if highlightQuery:
912	pageinfo['highlightQuery'] = highlightQuery
913	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
914	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
915
916	return pageinfo
917
918
919	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
920	"""returns dict with array of page informations for one screenfull of thumbnails"""
921	batch = {}
922	grpsize = rows * cols
923	if maxIdx == 0:
924	maxIdx = start + grpsize
925
926	nb = int(math.ceil(maxIdx / float(grpsize)))
927	# list of all batch start and end points
928	batches = []
929	if pageZero:
930	ofs = 0
931	else:
932	ofs = 1
933
934	for i in range(nb):
935	s = i * grpsize + ofs
936	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
937	batches.append({'start':s, 'end':e})
938
939	batch['batches'] = batches
940
941	pages = []
942	if pageZero and start == 1:
943	# correct beginning
944	idx = 0
945	else:
946	idx = start
947
948	for r in range(rows):
949	row = []
950	for c in range(cols):
951	if idx < minIdx or idx > maxIdx:
952	page = {'idx':None}
953	else:
954	page = {'idx':idx}
955
956	idx += 1
957	if pageFlowLtr:
958	row.append(page)
959	else:
960	row.insert(0, page)
961
962	pages.append(row)
963
964	if start > 1:
965	batch['prevStart'] = max(start - grpsize, 1)
966	else:
967	batch['prevStart'] = None
968
969	if start + grpsize <= maxIdx:
970	batch['nextStart'] = start + grpsize
971	else:
972	batch['nextStart'] = None
973
974	batch['pages'] = pages
975	return batch
976
977	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
978	"""returns dict with information for one screenfull of data."""
979	batch = {}
980	if end == 0:
981	end = start + size
982
983	nb = int(math.ceil(end / float(size)))
984	# list of all batch start and end points
985	batches = []
986	for i in range(nb):
987	s = i * size + 1
988	e = min((i + 1) * size, end)
989	batches.append({'start':s, 'end':e})
990
991	batch['batches'] = batches
992	# list of elements in this batch
993	this = []
994	j = 0
995	for i in range(start, min(start+size, end+1)):
996	if data:
997	if fullData:
998	d = data.get(i, None)
999	else:
1000	d = data.get(j, None)
1001	j += 1
1002
1003	else:
1004	d = i+1
1005
1006	this.append(d)
1007
1008	batch['this'] = this
1009	if start > 1:
1010	batch['prevStart'] = max(start - size, 1)
1011	else:
1012	batch['prevStart'] = None
1013
1014	if start + size < end:
1015	batch['nextStart'] = start + size
1016	else:
1017	batch['nextStart'] = None
1018
1019	logging.debug("getBatch start=%s size=%s end=%s batch=%s"%(start,size,end,repr(batch)))
1020	return batch
1021
1022
1023	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
1024	"""returns list of groups {name:, id:} on the annotation server for the user"""
1025	groups = []
1026	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
1027	data = getHttpData(url=groupsUrl, noExceptions=True)
1028	if data:
1029	res = json.loads(data)
1030	rows = res.get('rows', None)
1031	if rows is None:
1032	return groups
1033	for r in rows:
1034	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
1035
1036	return groups
1037
1038
1039	security.declareProtected('View management screens','changeDocumentViewerForm')
1040	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1041
1042	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1043	"""init document viewer"""
1044	self.title=title
1045	self.digilibBaseUrl = digilibBaseUrl
1046	self.thumbrows = thumbrows
1047	self.thumbcols = thumbcols
1048	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1049	try:
1050	# assume MetaDataFolder instance is called metadata
1051	self.metadataService = getattr(self, 'metadata')
1052	except Exception, e:
1053	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1054
1055	self.setAvailableLayers(availableLayers)
1056
1057	if RESPONSE is not None:
1058	RESPONSE.redirect('manage_main')
1059
1060	def manage_AddDocumentViewerForm(self):
1061	"""add the viewer form"""
1062	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1063	return pt()
1064
1065	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1066	"""add the viewer"""
1067	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1068	self._setObject(id,newObj)
1069
1070	if RESPONSE is not None:
1071	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: