Context Navigation

source: documentViewer/documentViewer.py @ 564:31f562fa7214

Last change on this file since 564:31f562fa7214 was 564:31f562fa7214, checked in by casties, 12 years ago
first version of MpiwgXmlTextServer?.
File size: 41.7 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def getMDText(node):
37	"""returns the @text content from the MetaDataProvider metadata node"""
38	if isinstance(node, dict):
39	return node.get('@text', None)
40
41	return node
42
43	def browserCheck(self):
44	"""check the browsers request to find out the browser type"""
45	bt = {}
46	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
47	bt['ua'] = ua
48	bt['isIE'] = False
49	bt['isN4'] = False
50	bt['versFirefox']=""
51	bt['versIE']=""
52	bt['versSafariChrome']=""
53	bt['versOpera']=""
54
55	if string.find(ua, 'MSIE') > -1:
56	bt['isIE'] = True
57	else:
58	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
59	# Safari oder Chrome identification
60	try:
61	nav = ua[string.find(ua, '('):]
62	nav1=ua[string.find(ua,')'):]
63	nav2=nav1[string.find(nav1,'('):]
64	nav3=nav2[string.find(nav2,')'):]
65	ie = string.split(nav, "; ")[1]
66	ie1 =string.split(nav1, " ")[2]
67	ie2 =string.split(nav3, " ")[1]
68	ie3 =string.split(nav3, " ")[2]
69	if string.find(ie3, "Safari") >-1:
70	bt['versSafariChrome']=string.split(ie2, "/")[1]
71	except: pass
72	# IE identification
73	try:
74	nav = ua[string.find(ua, '('):]
75	ie = string.split(nav, "; ")[1]
76	if string.find(ie, "MSIE") > -1:
77	bt['versIE'] = string.split(ie, " ")[1]
78	except:pass
79	# Firefox identification
80	try:
81	nav = ua[string.find(ua, '('):]
82	nav1=ua[string.find(ua,')'):]
83	if string.find(ie1, "Firefox") >-1:
84	nav5= string.split(ie1, "/")[1]
85	logging.debug("FIREFOX: %s"%(nav5))
86	bt['versFirefox']=nav5[0:3]
87	except:pass
88	#Opera identification
89	try:
90	if string.find(ua,"Opera") >-1:
91	nav = ua[string.find(ua, '('):]
92	nav1=nav[string.find(nav,')'):]
93	bt['versOpera']=string.split(nav1,"/")[2]
94	except:pass
95
96	bt['isMac'] = string.find(ua, 'Macintosh') > -1
97	bt['isWin'] = string.find(ua, 'Windows') > -1
98	bt['isIEWin'] = bt['isIE'] and bt['isWin']
99	bt['isIEMac'] = bt['isIE'] and bt['isMac']
100	bt['staticHTML'] = False
101
102	return bt
103
104	def getParentPath(path, cnt=1):
105	"""returns pathname shortened by cnt"""
106	# make sure path doesn't end with /
107	path = path.rstrip('/')
108	# split by /, shorten, and reassemble
109	return '/'.join(path.split('/')[0:-cnt])
110
111	##
112	## documentViewer class
113	##
114	class documentViewer(Folder):
115	"""document viewer"""
116	meta_type="Document viewer"
117
118	security=ClassSecurityInfo()
119	manage_options=Folder.manage_options+(
120	{'label':'Configuration','action':'changeDocumentViewerForm'},
121	)
122
123	metadataService = None
124	"""MetaDataFolder instance"""
125
126
127	#
128	# templates and forms
129	#
130	# viewMode templates
131	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
132	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
133	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
134	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
135	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
136	# available layer types (annotator not default)
137	builtinLayers = {'text': ['dict','search','gis'],
138	'xml': None, 'images': None, 'index': ['extended']}
139	availableLayers = builtinLayers;
140	# layer templates
141	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
142	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
143	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
144	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
145	layer_text_pundit = PageTemplateFile('zpt/layer_text_pundit', globals())
146	layer_index_extended = PageTemplateFile('zpt/layer_index_extended', globals())
147	# toc templates
148	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
149	toc_text = PageTemplateFile('zpt/toc_text', globals())
150	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
151	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
152	toc_none = PageTemplateFile('zpt/toc_none', globals())
153	# other templates
154	common_template = PageTemplateFile('zpt/common_template', globals())
155	info_xml = PageTemplateFile('zpt/info_xml', globals())
156	docuviewer_css = ImageFile('css/docuviewer.css',globals())
157	# make docuviewer_css refreshable for development
158	docuviewer_css.index_html = refreshingImageFileIndexHtml
159	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
160	# make docuviewer_ie_css refreshable for development
161	docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
162	jquery_js = ImageFile('js/jquery.js',globals())
163
164
165	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
166	"""init document viewer"""
167	self.id=id
168	self.title=title
169	self.thumbcols = thumbcols
170	self.thumbrows = thumbrows
171	# authgroups is list of authorized groups (delimited by ,)
172	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
173	# create template folder so we can always use template.something
174
175	templateFolder = Folder('template')
176	self['template'] = templateFolder # Zope-2.12 style
177	#self._setObject('template',templateFolder) # old style
178	try:
179	import MpdlXmlTextServer
180	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
181	templateFolder['fulltextclient'] = textServer
182	#templateFolder._setObject('fulltextclient',textServer)
183	except Exception, e:
184	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
185
186	try:
187	from Products.zogiLib.zogiLib import zogiLib
188	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
189	templateFolder['zogilib'] = zogilib
190	#templateFolder._setObject('zogilib',zogilib)
191	except Exception, e:
192	logging.error("Unable to create zogiLib for zogilib: "+str(e))
193
194	try:
195	# assume MetaDataFolder instance is called metadata
196	self.metadataService = getattr(self, 'metadata')
197	except Exception, e:
198	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
199
200	if digilibBaseUrl is not None:
201	self.digilibBaseUrl = digilibBaseUrl
202
203
204	# proxy text server methods to fulltextclient
205	def getTextPage(self, **args):
206	"""returns full text content of page"""
207	return self.template.fulltextclient.getTextPage(**args)
208
209	def getSearchResults(self, **args):
210	"""loads list of search results and stores XML in docinfo"""
211	return self.template.fulltextclient.getSearchResults(**args)
212
213	def getResultsPage(self, **args):
214	"""returns one page of the search results"""
215	return self.template.fulltextclient.getResultsPage(**args)
216
217	def getTextInfo(self, **args):
218	"""returns document info from the text server"""
219	return self.template.fulltextclient.getTextInfo(**args)
220
221	def getToc(self, **args):
222	"""loads table of contents and stores XML in docinfo"""
223	return self.template.fulltextclient.getToc(**args)
224
225	def getTocPage(self, **args):
226	"""returns one page of the table of contents"""
227	return self.template.fulltextclient.getTocPage(**args)
228
229	def getRepositoryType(self, **args):
230	"""get repository type"""
231	return self.template.fulltextclient.getRepositoryType(**args)
232
233	def getTextDownloadUrl(self, **args):
234	"""get list of gis places on one page"""
235	return self.template.fulltextclient.getTextDownloadUrl(**args)
236
237	def getPlacesOnPage(self, **args):
238	"""get list of gis places on one page"""
239	return self.template.fulltextclient.getPlacesOnPage(**args)
240
241	# Thumb list for CoolIris Plugin
242	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
243	security.declareProtected('View','thumbs_rss')
244	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
245	'''
246	view it
247	@param mode: defines how to access the document behind url
248	@param url: url which contains display information
249	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
250
251	'''
252
253	if not hasattr(self, 'template'):
254	# create template folder if it doesn't exist
255	self.manage_addFolder('template')
256
257	if not self.digilibBaseUrl:
258	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
259
260	docinfo = self.getDocinfo(mode=mode,url=url)
261	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
262	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
263	''' ZDES '''
264	pt = getattr(self.template, 'thumbs_main_rss')
265
266	if viewMode=="auto": # automodus gewaehlt
267	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
268	viewMode="text"
269	else:
270	viewMode="images"
271
272	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
273
274
275	security.declareProtected('View','index_html')
276	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode=None,start=1,pn=1):
277	"""
278	show page
279	@param url: url which contains display information
280	@param mode: defines how to access the document behind url
281	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
282	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
283	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
284	"""
285
286	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
287
288	if not hasattr(self, 'template'):
289	# this won't work
290	logging.error("template folder missing!")
291	return "ERROR: template folder missing!"
292
293	if not getattr(self, 'digilibBaseUrl', None):
294	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
295
296	# mode=filepath should not have toc-thumbs
297	if tocMode is None:
298	if mode == "filepath":
299	tocMode = "none"
300	else:
301	tocMode = "thumbs"
302
303	# docinfo: information about document (cached)
304	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
305
306	# userinfo: user settings (cached)
307	userinfo = self.getUserinfo()
308
309	# auto viewMode: text if there is a text else images
310	if viewMode=="auto":
311	if docinfo.get('textURLPath', None):
312	# docinfo.get('textURL', None) not implemented yet
313	viewMode = "text"
314	if viewLayer is None and 'viewLayer' not in userinfo:
315	# use layer dict as default
316	viewLayer = "dict"
317	else:
318	viewMode = "images"
319
320	elif viewMode == "text_dict":
321	# legacy fix
322	viewMode = "text"
323	viewLayer = "dict"
324
325	# safe viewLayer in userinfo
326	userinfo['viewLayer'] = viewLayer
327
328	# pageinfo: information about page (not cached)
329	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
330
331	# get template /template/viewer_$viewMode
332	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
333	if pt is None:
334	logging.error("No template for viewMode=%s!"%viewMode)
335	# TODO: error page?
336	return "No template for viewMode=%s!"%viewMode
337
338	# and execute with parameters
339	return pt(docinfo=docinfo, pageinfo=pageinfo)
340
341	def getAvailableLayers(self):
342	"""returns dict with list of available layers per viewMode"""
343	return self.availableLayers
344
345	def getBrowser(self):
346	"""getBrowser the version of browser """
347	bt = browserCheck(self)
348	logging.debug("BROWSER VERSION: %s"%(bt))
349	return bt
350
351	def findDigilibUrl(self):
352	"""try to get the digilib URL from zogilib"""
353	url = self.template.zogilib.getDLBaseUrl()
354	return url
355
356	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
357	"""returns URL to digilib Scaler with params"""
358	url = None
359	if docinfo is not None:
360	url = docinfo.get('imageURL', None)
361
362	if url is None:
363	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
364	if fn is None and docinfo is not None:
365	fn = docinfo.get('imagePath','')
366
367	url += "fn=%s"%fn
368
369	if pn:
370	url += "&pn=%s"%pn
371
372	url += "&dw=%s&dh=%s"%(dw,dh)
373	return url
374
375	def getDocumentViewerURL(self):
376	"""returns the URL of this instance"""
377	return self.absolute_url()
378
379	def getStyle(self, idx, selected, style=""):
380	"""returns a string with the given style and append 'sel' if idx == selected."""
381	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
382	if idx == selected:
383	return style + 'sel'
384	else:
385	return style
386
387	def getParams(self, param=None, val=None, params=None, duplicates=None):
388	"""returns dict with URL parameters.
389
390	Takes URL parameters and additionally param=val or dict params.
391	Deletes key if value is None."""
392	# copy existing request params
393	newParams=self.REQUEST.form.copy()
394	# change single param
395	if param is not None:
396	if val is None:
397	if newParams.has_key(param):
398	del newParams[param]
399	else:
400	newParams[param] = str(val)
401
402	# change more params
403	if params is not None:
404	for (k, v) in params.items():
405	if v is None:
406	# val=None removes param
407	if newParams.has_key(k):
408	del newParams[k]
409
410	else:
411	newParams[k] = v
412
413	if duplicates:
414	# eliminate lists (coming from duplicate keys)
415	for (k,v) in newParams.items():
416	if isinstance(v, list):
417	if duplicates == 'comma':
418	# make comma-separated list of non-empty entries
419	newParams[k] = ','.join([t for t in v if t])
420	elif duplicates == 'first':
421	# take first non-empty entry
422	newParams[k] = [t for t in v if t][0]
423
424	return newParams
425
426	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
427	"""returns URL to documentviewer with parameter param set to val or from dict params"""
428	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
429	# quote values and assemble into query string (not escaping '/')
430	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
431	if baseUrl is None:
432	baseUrl = self.getDocumentViewerURL()
433
434	url = "%s?%s"%(baseUrl, ps)
435	return url
436
437	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
438	"""link to documentviewer with parameter param set to val"""
439	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
440
441
442	def setAvailableLayers(self, newLayerString=None):
443	"""sets availableLayers to newLayerString or tries to autodetect available layers.
444	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
445	newLayerString is parsed as JSON."""
446	if newLayerString is not None:
447	try:
448	layers = json.loads(newLayerString)
449	if 'text' in layers and 'images' in layers:
450	self.availableLayers = layers
451	return
452	except:
453	pass
454
455	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
456
457	# start with builtin layers
458	self.availableLayers = self.builtinLayers.copy()
459	# add layers from templates
460	for t in self.template:
461	if t.startswith('layer_'):
462	try:
463	(x, m, l) = t.split('_', 3)
464	if m not in self.availableLayers:
465	# mode m doesn't exist -> new list
466	self.availableLayers[m] = [l]
467
468	else:
469	# m exists -> append
470	if l not in self.availableLayers[m]:
471	self.availableLayers[m].append()
472
473	except:
474	pass
475
476	def getAvailableLayersJson(self):
477	"""returns available layers as JSON string."""
478	return json.dumps(self.availableLayers)
479
480
481	def getInfo_xml(self,url,mode):
482	"""returns info about the document as XML"""
483	if not self.digilibBaseUrl:
484	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
485
486	docinfo = self.getDocinfo(mode=mode,url=url)
487	pt = getattr(self.template, 'info_xml')
488	return pt(docinfo=docinfo)
489
490	def getAuthenticatedUser(self, anon=None):
491	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
492	user = getSecurityManager().getUser()
493	if user is not None and user.getUserName() != "Anonymous User":
494	return user
495	else:
496	return anon
497
498	def isAccessible(self, docinfo):
499	"""returns if access to the resource is granted"""
500	access = docinfo.get('accessType', None)
501	logging.debug("documentViewer (accessOK) access type %s"%access)
502	if access == 'free':
503	logging.debug("documentViewer (accessOK) access is free")
504	return True
505
506	elif access is None or access in self.authgroups:
507	# only local access -- only logged in users
508	user = self.getAuthenticatedUser()
509	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
510	return (user is not None)
511
512	logging.error("documentViewer (accessOK) unknown access type %s"%access)
513	return False
514
515
516	def getUserinfo(self):
517	"""returns userinfo object"""
518	logging.debug("getUserinfo")
519	userinfo = {}
520	# look for cached userinfo in session
521	if self.REQUEST.SESSION.has_key('userinfo'):
522	userinfo = self.REQUEST.SESSION['userinfo']
523	# check if its still current?
524	else:
525	# store in session
526	self.REQUEST.SESSION['userinfo'] = userinfo
527
528	return userinfo
529
530	def getDocinfo(self, mode, url, tocMode=None):
531	"""returns docinfo depending on mode"""
532	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
533	# look for cached docinfo in session
534	if self.REQUEST.SESSION.has_key('docinfo'):
535	docinfo = self.REQUEST.SESSION['docinfo']
536	# check if its still current
537	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
538	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
539	return docinfo
540
541	# new docinfo
542	docinfo = {'mode': mode, 'url': url}
543	# add self url
544	docinfo['viewerUrl'] = self.getDocumentViewerURL()
545	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
546	# get index.meta DOM
547	docUrl = None
548	metaDom = None
549	if mode=="texttool":
550	# url points to document dir or index.meta
551	metaDom = self.metadataService.getDomFromPathOrUrl(url)
552	docUrl = url.replace('/index.meta', '')
553	if metaDom is None:
554	raise IOError("Unable to find index.meta for mode=texttool!")
555
556	elif mode=="imagepath":
557	# url points to folder with images, index.meta optional
558	# asssume index.meta in parent dir
559	docUrl = getParentPath(url)
560	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
561	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
562
563	elif mode=="filepath":
564	# url points to image file, index.meta optional
565	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + url
566	docinfo['numPages'] = 1
567	# asssume index.meta is two path segments up
568	docUrl = getParentPath(url, 2)
569	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
570
571	else:
572	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
573	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
574
575	docinfo['documentUrl'] = docUrl
576	# process index.meta contents
577	if metaDom is not None and metaDom.tag == 'resource':
578	# document directory name and path
579	resource = self.metadataService.getResourceData(dom=metaDom)
580	if resource:
581	docinfo = self.getDocinfoFromResource(docinfo, resource)
582
583	# texttool info
584	texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
585	if texttool:
586	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
587	# document info (including toc) from full text
588	if docinfo.get('textURLPath', None):
589	docinfo = self.getTextInfo(mode='pages', docinfo=docinfo)
590
591	# bib info
592	bib = self.metadataService.getBibData(dom=metaDom)
593	if bib:
594	# save extended version as 'bibx' TODO: ugly
595	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
596	if len(bibx) == 1:
597	# unwrap list if possible
598	bibx = bibx[0]
599
600	docinfo['bibx'] = bibx
601	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
602	else:
603	# no bib - try info.xml
604	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
605
606	# auth info
607	access = self.metadataService.getAccessData(dom=metaDom)
608	if access:
609	docinfo = self.getDocinfoFromAccess(docinfo, access)
610
611	# attribution info
612	attribution = self.metadataService.getAttributionData(dom=metaDom)
613	if attribution:
614	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
615	docinfo['attribution'] = attribution
616
617	# copyright info
618	copyright = self.metadataService.getCopyrightData(dom=metaDom)
619	if copyright:
620	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
621	docinfo['copyright'] = copyright
622
623	# DRI (permanent ID)
624	dri = self.metadataService.getDRI(dom=metaDom, type='escidoc')
625	if dri:
626	logging.debug("getDRI: dri=%s"%repr(dri))
627	docinfo['DRI'] = dri
628
629	# image path
630	if mode != 'texttool':
631	# override image path from texttool with url parameter TODO: how about mode=auto?
632	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
633
634	# check numPages
635	if docinfo.get('numPages', 0) == 0:
636	# number of images from digilib
637	if docinfo.get('imagePath', None):
638	imgpath = docinfo['imagePath'].replace('/mpiwg/online/', '', 1)
639	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imgpath
640	docinfo = self.getDocinfoFromDigilib(docinfo, imgpath)
641	else:
642	# imagePath still missing? try "./pageimg"
643	imgPath = os.path.join(docUrl, 'pageimg')
644	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
645	if docinfo.get('numPages', 0) > 0:
646	# there are pages
647	docinfo['imagePath'] = imgPath
648	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
649
650	# check numPages
651	if docinfo.get('numPages', 0) == 0:
652	if docinfo.get('numTextPages', 0) > 0:
653	# replace with numTextPages (text-only?)
654	docinfo['numPages'] = docinfo['numTextPages']
655
656	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
657	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
658	# store in session
659	self.REQUEST.SESSION['docinfo'] = docinfo
660	return docinfo
661
662
663	def getDocinfoFromResource(self, docinfo, resource):
664	"""reads contents of resource element into docinfo"""
665	docName = resource.get('name', None)
666	docinfo['documentName'] = docName
667	docPath = resource.get('archive-path', None)
668	if docPath:
669	# clean up document path
670	if docPath[0] != '/':
671	docPath = '/' + docPath
672
673	if docName and (not docPath.endswith(docName)):
674	docPath += "/" + docName
675
676	else:
677	# use docUrl as docPath
678	docUrl = docinfo['documentURL']
679	if not docUrl.startswith('http:'):
680	docPath = docUrl
681	if docPath:
682	# fix URLs starting with /mpiwg/online
683	docPath = docPath.replace('/mpiwg/online', '', 1)
684
685	docinfo['documentPath'] = docPath
686	return docinfo
687
688	def getDocinfoFromTexttool(self, docinfo, texttool):
689	"""reads contents of texttool element into docinfo"""
690	logging.debug("texttool=%s"%repr(texttool))
691	# unpack list if necessary
692	if isinstance(texttool, list):
693	texttool = texttool[0]
694
695	# image dir
696	imageDir = getMDText(texttool.get('image', None))
697	docPath = getMDText(docinfo.get('documentPath', None))
698	if imageDir and docPath:
699	imageDir = os.path.join(docPath, imageDir)
700	imageDir = imageDir.replace('/mpiwg/online', '', 1)
701	docinfo['imagePath'] = imageDir
702
703	# old style text URL
704	textUrl = getMDText(texttool.get('text', None))
705	if textUrl and docPath:
706	if urlparse.urlparse(textUrl)[0] == "": #keine url
707	textUrl = os.path.join(docPath, textUrl)
708
709	docinfo['textURL'] = textUrl
710
711	# new style text-url-path (can be more than one with "repository" attribute)
712	textUrlNode = texttool.get('text-url-path', None)
713	if not isinstance(textUrlNode, list):
714	textUrlNode = [textUrlNode]
715
716	for tun in textUrlNode:
717	textUrl = getMDText(tun)
718	if textUrl:
719	textUrlAtts = tun.get('@attr')
720	if (textUrlAtts and 'repository' in textUrlAtts):
721	textRepo = textUrlAtts['repository']
722	# use matching repository
723	if self.getRepositoryType() == textRepo:
724	docinfo['textURLPath'] = textUrl
725	docinfo['textURLRepository'] = textRepo
726	break
727
728	else:
729	# no repo attribute - use always
730	docinfo['textURLPath'] = textUrl
731
732	# page flow
733	docinfo['pageFlow'] = getMDText(texttool.get('page-flow', 'ltr'))
734
735	# odd pages are left
736	docinfo['oddPage'] = getMDText(texttool.get('odd-scan-position', 'left'))
737
738	# number of title page (default 1)
739	docinfo['titlePage'] = getMDText(texttool.get('title-scan-no', 1))
740
741	# old presentation stuff
742	presentation = getMDText(texttool.get('presentation', None))
743	if presentation and docPath:
744	if presentation.startswith('http:'):
745	docinfo['presentationUrl'] = presentation
746	else:
747	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
748
749	return docinfo
750
751	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
752	"""reads contents of bib element into docinfo"""
753	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
754	# put all raw bib fields in dict "bib"
755	docinfo['bib'] = bib
756	bibtype = bib.get('@type', None)
757	docinfo['bibType'] = bibtype
758	# also store DC metadata for convenience
759	dc = self.metadataService.getDCMappedData(bib)
760	docinfo['creator'] = dc.get('creator','')
761	docinfo['title'] = dc.get('title','')
762	docinfo['date'] = dc.get('date','')
763	return docinfo
764
765	def getDocinfoFromAccess(self, docinfo, acc):
766	"""reads contents of access element into docinfo"""
767	#TODO: also read resource type
768	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
769	try:
770	acctype = acc['@attr']['type']
771	if acctype:
772	access=acctype
773	if access in ['group', 'institution']:
774	access = acc['name'].lower()
775
776	docinfo['accessType'] = access
777
778	except:
779	pass
780
781	return docinfo
782
783	def getDocinfoFromDigilib(self, docinfo, path):
784	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
785	# fetch data
786	txt = getHttpData(infoUrl)
787	if not txt:
788	logging.error("Unable to get dir-info from %s"%(infoUrl))
789	return docinfo
790
791	dom = ET.fromstring(txt)
792	size = getText(dom.find("size"))
793	logging.debug("getDocinfoFromDigilib: size=%s"%size)
794	if size:
795	docinfo['numPages'] = int(size)
796	else:
797	docinfo['numPages'] = 0
798
799	# TODO: produce and keep list of image names and numbers
800	return docinfo
801
802
803	def getDocinfoFromPresentationInfoXml(self,docinfo):
804	"""gets DC-like bibliographical information from the presentation entry in texttools"""
805	url = docinfo.get('presentationUrl', None)
806	if not url:
807	logging.error("getDocinfoFromPresentation: no URL!")
808	return docinfo
809
810	dom = None
811	metaUrl = None
812	if url.startswith("http://"):
813	# real URL
814	metaUrl = url
815	else:
816	# online path
817
818	server=self.digilibBaseUrl+"/servlet/Texter?fn="
819	metaUrl=server+url
820
821	txt=getHttpData(metaUrl)
822	if txt is None:
823	logging.error("Unable to read info.xml from %s"%(url))
824	return docinfo
825
826	dom = ET.fromstring(txt)
827	docinfo['creator']=getText(dom.find(".//author"))
828	docinfo['title']=getText(dom.find(".//title"))
829	docinfo['date']=getText(dom.find(".//date"))
830	return docinfo
831
832
833	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
834	"""returns pageinfo with the given parameters"""
835	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
836	pageinfo = {}
837	pageinfo['viewMode'] = viewMode
838	# split viewLayer if necessary
839	if isinstance(viewLayer,basestring):
840	viewLayer = viewLayer.split(',')
841
842	if isinstance(viewLayer, list):
843	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
844	# save (unique) list in viewLayers
845	seen = set()
846	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
847	pageinfo['viewLayers'] = viewLayers
848	# stringify viewLayer
849	viewLayer = ','.join(viewLayers)
850	else:
851	#create list
852	pageinfo['viewLayers'] = [viewLayer]
853
854	pageinfo['viewLayer'] = viewLayer
855	pageinfo['tocMode'] = tocMode
856
857	# TODO: unify current and pn!
858	current = getInt(current)
859	pageinfo['current'] = current
860	pageinfo['pn'] = current
861	rows = int(rows or self.thumbrows)
862	pageinfo['rows'] = rows
863	cols = int(cols or self.thumbcols)
864	pageinfo['cols'] = cols
865	grpsize = cols * rows
866	pageinfo['groupsize'] = grpsize
867	# is start is empty use one around current
868	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
869	# int(current / grpsize) * grpsize +1))
870	pageinfo['start'] = start
871	# get number of pages
872	np = int(docinfo.get('numPages', 0))
873	if np == 0:
874	# try numTextPages
875	np = docinfo.get('numTextPages', 0)
876	if np != 0:
877	docinfo['numPages'] = np
878
879	# cache table of contents
880	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
881	pageinfo['numgroups'] = int(np / grpsize)
882	if np % grpsize > 0:
883	pageinfo['numgroups'] += 1
884
885	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
886	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
887	# add zeroth page for two columns
888	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
889	pageinfo['pageZero'] = pageZero
890	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
891	# more page parameters
892	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
893	if docinfo.get('pageNumbers'):
894	# get original page numbers
895	pageNumber = docinfo['pageNumbers'].get(current, None)
896	if pageNumber is not None:
897	pageinfo['pageNumberOrig'] = pageNumber['no']
898	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
899
900	# cache search results
901	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
902	query = self.REQUEST.get('query',None)
903	pageinfo['query'] = query
904	if query:
905	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
906	pageinfo['queryType'] = queryType
907	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
908	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
909
910	# highlighting
911	highlightQuery = self.REQUEST.get('highlightQuery', None)
912	if highlightQuery:
913	pageinfo['highlightQuery'] = highlightQuery
914	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
915	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
916
917	return pageinfo
918
919
920	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
921	"""returns dict with array of page informations for one screenfull of thumbnails"""
922	batch = {}
923	grpsize = rows * cols
924	if maxIdx == 0:
925	maxIdx = start + grpsize
926
927	nb = int(math.ceil(maxIdx / float(grpsize)))
928	# list of all batch start and end points
929	batches = []
930	if pageZero:
931	ofs = 0
932	else:
933	ofs = 1
934
935	for i in range(nb):
936	s = i * grpsize + ofs
937	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
938	batches.append({'start':s, 'end':e})
939
940	batch['batches'] = batches
941
942	pages = []
943	if pageZero and start == 1:
944	# correct beginning
945	idx = 0
946	else:
947	idx = start
948
949	for r in range(rows):
950	row = []
951	for c in range(cols):
952	if idx < minIdx or idx > maxIdx:
953	page = {'idx':None}
954	else:
955	page = {'idx':idx}
956
957	idx += 1
958	if pageFlowLtr:
959	row.append(page)
960	else:
961	row.insert(0, page)
962
963	pages.append(row)
964
965	if start > 1:
966	batch['prevStart'] = max(start - grpsize, 1)
967	else:
968	batch['prevStart'] = None
969
970	if start + grpsize <= maxIdx:
971	batch['nextStart'] = start + grpsize
972	else:
973	batch['nextStart'] = None
974
975	batch['pages'] = pages
976	return batch
977
978	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
979	"""returns dict with information for one screenfull of data."""
980	batch = {}
981	if end == 0:
982	end = start + size
983
984	nb = int(math.ceil(end / float(size)))
985	# list of all batch start and end points
986	batches = []
987	for i in range(nb):
988	s = i * size + 1
989	e = min((i + 1) * size, end)
990	batches.append({'start':s, 'end':e})
991
992	batch['batches'] = batches
993	# list of elements in this batch
994	this = []
995	j = 0
996	for i in range(start, min(start+size, end+1)):
997	if data:
998	if fullData:
999	d = data.get(i, None)
1000	else:
1001	d = data.get(j, None)
1002	j += 1
1003
1004	else:
1005	d = i+1
1006
1007	this.append(d)
1008
1009	batch['this'] = this
1010	if start > 1:
1011	batch['prevStart'] = max(start - size, 1)
1012	else:
1013	batch['prevStart'] = None
1014
1015	if start + size < end:
1016	batch['nextStart'] = start + size
1017	else:
1018	batch['nextStart'] = None
1019
1020	logging.debug("getBatch start=%s size=%s end=%s batch=%s"%(start,size,end,repr(batch)))
1021	return batch
1022
1023
1024	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
1025	"""returns list of groups {name:, id:} on the annotation server for the user"""
1026	groups = []
1027	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
1028	data = getHttpData(url=groupsUrl, noExceptions=True)
1029	if data:
1030	res = json.loads(data)
1031	rows = res.get('rows', None)
1032	if rows is None:
1033	return groups
1034	for r in rows:
1035	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
1036
1037	return groups
1038
1039
1040	security.declareProtected('View management screens','changeDocumentViewerForm')
1041	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1042
1043	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1044	"""init document viewer"""
1045	self.title=title
1046	self.digilibBaseUrl = digilibBaseUrl
1047	self.thumbrows = thumbrows
1048	self.thumbcols = thumbcols
1049	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1050	try:
1051	# assume MetaDataFolder instance is called metadata
1052	self.metadataService = getattr(self, 'metadata')
1053	except Exception, e:
1054	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1055
1056	self.setAvailableLayers(availableLayers)
1057
1058	if RESPONSE is not None:
1059	RESPONSE.redirect('manage_main')
1060
1061	def manage_AddDocumentViewerForm(self):
1062	"""add the viewer form"""
1063	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1064	return pt()
1065
1066	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1067	"""add the viewer"""
1068	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1069	self._setObject(id,newObj)
1070
1071	if RESPONSE is not None:
1072	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: