Context Navigation

source: documentViewer/documentViewer.py @ 567:8b1e20bf300d

Last change on this file since 567:8b1e20bf300d was 566:4a31608f8b0e, checked in by casties, 12 years ago
more new MpiwgXmlTextServer?.
File size: 42.0 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from Products.MetaDataProvider import MetaDataFolder
22
23	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
24
25	def serializeNode(node, encoding="utf-8"):
26	"""returns a string containing node as XML"""
27	s = ET.tostring(node)
28
29	# 4Suite:
30	# stream = cStringIO.StringIO()
31	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
32	# s = stream.getvalue()
33	# stream.close()
34	return s
35
36	def getMDText(node):
37	"""returns the @text content from the MetaDataProvider metadata node"""
38	if isinstance(node, dict):
39	return node.get('@text', None)
40
41	return node
42
43	def browserCheck(self):
44	"""check the browsers request to find out the browser type"""
45	bt = {}
46	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
47	bt['ua'] = ua
48	bt['isIE'] = False
49	bt['isN4'] = False
50	bt['versFirefox']=""
51	bt['versIE']=""
52	bt['versSafariChrome']=""
53	bt['versOpera']=""
54
55	if string.find(ua, 'MSIE') > -1:
56	bt['isIE'] = True
57	else:
58	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
59	# Safari oder Chrome identification
60	try:
61	nav = ua[string.find(ua, '('):]
62	nav1=ua[string.find(ua,')'):]
63	nav2=nav1[string.find(nav1,'('):]
64	nav3=nav2[string.find(nav2,')'):]
65	ie = string.split(nav, "; ")[1]
66	ie1 =string.split(nav1, " ")[2]
67	ie2 =string.split(nav3, " ")[1]
68	ie3 =string.split(nav3, " ")[2]
69	if string.find(ie3, "Safari") >-1:
70	bt['versSafariChrome']=string.split(ie2, "/")[1]
71	except: pass
72	# IE identification
73	try:
74	nav = ua[string.find(ua, '('):]
75	ie = string.split(nav, "; ")[1]
76	if string.find(ie, "MSIE") > -1:
77	bt['versIE'] = string.split(ie, " ")[1]
78	except:pass
79	# Firefox identification
80	try:
81	nav = ua[string.find(ua, '('):]
82	nav1=ua[string.find(ua,')'):]
83	if string.find(ie1, "Firefox") >-1:
84	nav5= string.split(ie1, "/")[1]
85	logging.debug("FIREFOX: %s"%(nav5))
86	bt['versFirefox']=nav5[0:3]
87	except:pass
88	#Opera identification
89	try:
90	if string.find(ua,"Opera") >-1:
91	nav = ua[string.find(ua, '('):]
92	nav1=nav[string.find(nav,')'):]
93	bt['versOpera']=string.split(nav1,"/")[2]
94	except:pass
95
96	bt['isMac'] = string.find(ua, 'Macintosh') > -1
97	bt['isWin'] = string.find(ua, 'Windows') > -1
98	bt['isIEWin'] = bt['isIE'] and bt['isWin']
99	bt['isIEMac'] = bt['isIE'] and bt['isMac']
100	bt['staticHTML'] = False
101
102	return bt
103
104	def getParentPath(path, cnt=1):
105	"""returns pathname shortened by cnt"""
106	# make sure path doesn't end with /
107	path = path.rstrip('/')
108	# split by /, shorten, and reassemble
109	return '/'.join(path.split('/')[0:-cnt])
110
111	##
112	## documentViewer class
113	##
114	class documentViewer(Folder):
115	"""document viewer"""
116	meta_type="Document viewer"
117
118	security=ClassSecurityInfo()
119	manage_options=Folder.manage_options+(
120	{'label':'Configuration','action':'changeDocumentViewerForm'},
121	)
122
123	metadataService = None
124	"""MetaDataFolder instance"""
125
126
127	#
128	# templates and forms
129	#
130	# viewMode templates
131	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
132	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
133	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
134	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
135	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
136	# available layer types (annotator not default)
137	builtinLayers = {'text': ['dict','search','gis'],
138	'xml': None, 'images': None, 'index': ['extended']}
139	availableLayers = builtinLayers;
140	# layer templates
141	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
142	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
143	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
144	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
145	layer_text_pundit = PageTemplateFile('zpt/layer_text_pundit', globals())
146	layer_index_extended = PageTemplateFile('zpt/layer_index_extended', globals())
147	# toc templates
148	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
149	toc_text = PageTemplateFile('zpt/toc_text', globals())
150	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
151	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
152	toc_none = PageTemplateFile('zpt/toc_none', globals())
153	# other templates
154	common_template = PageTemplateFile('zpt/common_template', globals())
155	info_xml = PageTemplateFile('zpt/info_xml', globals())
156	docuviewer_css = ImageFile('css/docuviewer.css',globals())
157	# make docuviewer_css refreshable for development
158	docuviewer_css.index_html = refreshingImageFileIndexHtml
159	docuviewer_ie_css = ImageFile('css/docuviewer_ie.css',globals())
160	# make docuviewer_ie_css refreshable for development
161	docuviewer_ie_css.index_html = refreshingImageFileIndexHtml
162	jquery_js = ImageFile('js/jquery.js',globals())
163
164
165	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
166	"""init document viewer"""
167	self.id=id
168	self.title=title
169	self.thumbcols = thumbcols
170	self.thumbrows = thumbrows
171	# authgroups is list of authorized groups (delimited by ,)
172	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
173	# create template folder so we can always use template.something
174
175	templateFolder = Folder('template')
176	self['template'] = templateFolder # Zope-2.12 style
177	#self._setObject('template',templateFolder) # old style
178	try:
179	import MpdlXmlTextServer
180	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
181	templateFolder['fulltextclient'] = textServer
182	#templateFolder._setObject('fulltextclient',textServer)
183	except Exception, e:
184	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
185
186	try:
187	from Products.zogiLib.zogiLib import zogiLib
188	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
189	templateFolder['zogilib'] = zogilib
190	#templateFolder._setObject('zogilib',zogilib)
191	except Exception, e:
192	logging.error("Unable to create zogiLib for zogilib: "+str(e))
193
194	try:
195	# assume MetaDataFolder instance is called metadata
196	self.metadataService = getattr(self, 'metadata')
197	except Exception, e:
198	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
199
200	if digilibBaseUrl is not None:
201	self.digilibBaseUrl = digilibBaseUrl
202	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
203	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
204
205
206	# proxy text server methods to fulltextclient
207	def getTextPage(self, **args):
208	"""returns full text content of page"""
209	return self.template.fulltextclient.getTextPage(**args)
210
211	def getSearchResults(self, **args):
212	"""loads list of search results and stores XML in docinfo"""
213	return self.template.fulltextclient.getSearchResults(**args)
214
215	def getResultsPage(self, **args):
216	"""returns one page of the search results"""
217	return self.template.fulltextclient.getResultsPage(**args)
218
219	def getTextInfo(self, **args):
220	"""returns document info from the text server"""
221	return self.template.fulltextclient.getTextInfo(**args)
222
223	def getToc(self, **args):
224	"""loads table of contents and stores XML in docinfo"""
225	return self.template.fulltextclient.getToc(**args)
226
227	def getTocPage(self, **args):
228	"""returns one page of the table of contents"""
229	return self.template.fulltextclient.getTocPage(**args)
230
231	def getRepositoryType(self, **args):
232	"""get repository type"""
233	return self.template.fulltextclient.getRepositoryType(**args)
234
235	def getTextDownloadUrl(self, **args):
236	"""get list of gis places on one page"""
237	return self.template.fulltextclient.getTextDownloadUrl(**args)
238
239	def getPlacesOnPage(self, **args):
240	"""get list of gis places on one page"""
241	return self.template.fulltextclient.getPlacesOnPage(**args)
242
243	# Thumb list for CoolIris Plugin
244	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
245	security.declareProtected('View','thumbs_rss')
246	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
247	'''
248	view it
249	@param mode: defines how to access the document behind url
250	@param url: url which contains display information
251	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
252
253	'''
254
255	if not hasattr(self, 'template'):
256	# create template folder if it doesn't exist
257	self.manage_addFolder('template')
258
259	if not self.digilibBaseUrl:
260	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
261
262	docinfo = self.getDocinfo(mode=mode,url=url)
263	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
264	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
265	''' ZDES '''
266	pt = getattr(self.template, 'thumbs_main_rss')
267
268	if viewMode=="auto": # automodus gewaehlt
269	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
270	viewMode="text"
271	else:
272	viewMode="images"
273
274	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
275
276
277	security.declareProtected('View','index_html')
278	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode=None,start=1,pn=1):
279	"""
280	show page
281	@param url: url which contains display information
282	@param mode: defines how to access the document behind url
283	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
284	@param viewLayer: sub-type of viewMode, e.g. layer 'dict' for viewMode='text'
285	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
286	"""
287
288	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
289
290	if not hasattr(self, 'template'):
291	# this won't work
292	logging.error("template folder missing!")
293	return "ERROR: template folder missing!"
294
295	if not getattr(self, 'digilibBaseUrl', None):
296	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
297
298	# mode=filepath should not have toc-thumbs
299	if tocMode is None:
300	if mode == "filepath":
301	tocMode = "none"
302	else:
303	tocMode = "thumbs"
304
305	# docinfo: information about document (cached)
306	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
307
308	# userinfo: user settings (cached)
309	userinfo = self.getUserinfo()
310
311	# auto viewMode: text if there is a text else images
312	if viewMode=="auto":
313	if docinfo.get('textURLPath', None):
314	# docinfo.get('textURL', None) not implemented yet
315	viewMode = "text"
316	if viewLayer is None and 'viewLayer' not in userinfo:
317	# use layer dict as default
318	viewLayer = "dict"
319	else:
320	viewMode = "images"
321
322	elif viewMode == "text_dict":
323	# legacy fix
324	viewMode = "text"
325	viewLayer = "dict"
326
327	# safe viewLayer in userinfo
328	userinfo['viewLayer'] = viewLayer
329
330	# pageinfo: information about page (not cached)
331	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, userinfo=userinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
332
333	# get template /template/viewer_$viewMode
334	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
335	if pt is None:
336	logging.error("No template for viewMode=%s!"%viewMode)
337	# TODO: error page?
338	return "No template for viewMode=%s!"%viewMode
339
340	# and execute with parameters
341	return pt(docinfo=docinfo, pageinfo=pageinfo)
342
343	def getAvailableLayers(self):
344	"""returns dict with list of available layers per viewMode"""
345	return self.availableLayers
346
347	def getBrowser(self):
348	"""getBrowser the version of browser """
349	bt = browserCheck(self)
350	logging.debug("BROWSER VERSION: %s"%(bt))
351	return bt
352
353	def findDigilibUrl(self):
354	"""try to get the digilib URL from zogilib"""
355	url = self.template.zogilib.getDLBaseUrl()
356	return url
357
358	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
359	"""returns URL to digilib Scaler with params"""
360	url = None
361	if docinfo is not None:
362	url = docinfo.get('imageURL', None)
363
364	if url is None:
365	url = self.digilibScalerUrl
366	if fn is None and docinfo is not None:
367	fn = docinfo.get('imagePath','')
368
369	url += "fn=%s"%fn
370
371	if pn:
372	url += "&pn=%s"%pn
373
374	url += "&dw=%s&dh=%s"%(dw,dh)
375	return url
376
377	def getDocumentViewerURL(self):
378	"""returns the URL of this instance"""
379	return self.absolute_url()
380
381	def getStyle(self, idx, selected, style=""):
382	"""returns a string with the given style and append 'sel' if idx == selected."""
383	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
384	if idx == selected:
385	return style + 'sel'
386	else:
387	return style
388
389	def getParams(self, param=None, val=None, params=None, duplicates=None):
390	"""returns dict with URL parameters.
391
392	Takes URL parameters and additionally param=val or dict params.
393	Deletes key if value is None."""
394	# copy existing request params
395	newParams=self.REQUEST.form.copy()
396	# change single param
397	if param is not None:
398	if val is None:
399	if newParams.has_key(param):
400	del newParams[param]
401	else:
402	newParams[param] = str(val)
403
404	# change more params
405	if params is not None:
406	for (k, v) in params.items():
407	if v is None:
408	# val=None removes param
409	if newParams.has_key(k):
410	del newParams[k]
411
412	else:
413	newParams[k] = v
414
415	if duplicates:
416	# eliminate lists (coming from duplicate keys)
417	for (k,v) in newParams.items():
418	if isinstance(v, list):
419	if duplicates == 'comma':
420	# make comma-separated list of non-empty entries
421	newParams[k] = ','.join([t for t in v if t])
422	elif duplicates == 'first':
423	# take first non-empty entry
424	newParams[k] = [t for t in v if t][0]
425
426	return newParams
427
428	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
429	"""returns URL to documentviewer with parameter param set to val or from dict params"""
430	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
431	# quote values and assemble into query string (not escaping '/')
432	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
433	if baseUrl is None:
434	baseUrl = self.getDocumentViewerURL()
435
436	url = "%s?%s"%(baseUrl, ps)
437	return url
438
439	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
440	"""link to documentviewer with parameter param set to val"""
441	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
442
443
444	def setAvailableLayers(self, newLayerString=None):
445	"""sets availableLayers to newLayerString or tries to autodetect available layers.
446	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
447	newLayerString is parsed as JSON."""
448	if newLayerString is not None:
449	try:
450	layers = json.loads(newLayerString)
451	if 'text' in layers and 'images' in layers:
452	self.availableLayers = layers
453	return
454	except:
455	pass
456
457	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
458
459	# start with builtin layers
460	self.availableLayers = self.builtinLayers.copy()
461	# add layers from templates
462	for t in self.template:
463	if t.startswith('layer_'):
464	try:
465	(x, m, l) = t.split('_', 3)
466	if m not in self.availableLayers:
467	# mode m doesn't exist -> new list
468	self.availableLayers[m] = [l]
469
470	else:
471	# m exists -> append
472	if l not in self.availableLayers[m]:
473	self.availableLayers[m].append()
474
475	except:
476	pass
477
478	def getAvailableLayersJson(self):
479	"""returns available layers as JSON string."""
480	return json.dumps(self.availableLayers)
481
482
483	def getInfo_xml(self,url,mode):
484	"""returns info about the document as XML"""
485	if not self.digilibBaseUrl:
486	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
487
488	docinfo = self.getDocinfo(mode=mode,url=url)
489	pt = getattr(self.template, 'info_xml')
490	return pt(docinfo=docinfo)
491
492	def getAuthenticatedUser(self, anon=None):
493	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
494	user = getSecurityManager().getUser()
495	if user is not None and user.getUserName() != "Anonymous User":
496	return user
497	else:
498	return anon
499
500	def isAccessible(self, docinfo):
501	"""returns if access to the resource is granted"""
502	access = docinfo.get('accessType', None)
503	logging.debug("documentViewer (accessOK) access type %s"%access)
504	if access == 'free':
505	logging.debug("documentViewer (accessOK) access is free")
506	return True
507
508	elif access is None or access in self.authgroups:
509	# only local access -- only logged in users
510	user = self.getAuthenticatedUser()
511	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
512	return (user is not None)
513
514	logging.error("documentViewer (accessOK) unknown access type %s"%access)
515	return False
516
517
518	def getUserinfo(self):
519	"""returns userinfo object"""
520	logging.debug("getUserinfo")
521	userinfo = {}
522	# look for cached userinfo in session
523	if self.REQUEST.SESSION.has_key('userinfo'):
524	userinfo = self.REQUEST.SESSION['userinfo']
525	# check if its still current?
526	else:
527	# store in session
528	self.REQUEST.SESSION['userinfo'] = userinfo
529
530	return userinfo
531
532	def getDocinfo(self, mode, url, tocMode=None):
533	"""returns docinfo depending on mode"""
534	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
535	# look for cached docinfo in session
536	if self.REQUEST.SESSION.has_key('docinfo'):
537	docinfo = self.REQUEST.SESSION['docinfo']
538	# check if its still current
539	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
540	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
541	return docinfo
542
543	# new docinfo
544	docinfo = {'mode': mode, 'url': url}
545	# add self url
546	docinfo['viewerUrl'] = self.getDocumentViewerURL()
547	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
548	docinfo['digilibScalerUrl'] = self.digilibScalerUrl
549	docinfo['digilibViewerUrl'] = self.digilibViewerUrl
550	# get index.meta DOM
551	docUrl = None
552	metaDom = None
553	if mode=="texttool":
554	# url points to document dir or index.meta
555	metaDom = self.metadataService.getDomFromPathOrUrl(url)
556	docUrl = url.replace('/index.meta', '')
557	if metaDom is None:
558	raise IOError("Unable to find index.meta for mode=texttool!")
559
560	elif mode=="imagepath":
561	# url points to folder with images, index.meta optional
562	# asssume index.meta in parent dir
563	docUrl = getParentPath(url)
564	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
565	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
566
567	elif mode=="filepath":
568	# url points to image file, index.meta optional
569	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, url)
570	docinfo['numPages'] = 1
571	# asssume index.meta is two path segments up
572	docUrl = getParentPath(url, 2)
573	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
574
575	else:
576	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
577	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
578
579	docinfo['documentUrl'] = docUrl
580	# process index.meta contents
581	if metaDom is not None and metaDom.tag == 'resource':
582	# document directory name and path
583	resource = self.metadataService.getResourceData(dom=metaDom)
584	if resource:
585	docinfo = self.getDocinfoFromResource(docinfo, resource)
586
587	# texttool info
588	texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
589	if texttool:
590	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
591	# document info (including toc) from full text
592	if docinfo.get('textURLPath', None):
593	docinfo = self.getTextInfo(mode=None, docinfo=docinfo)
594
595	# bib info
596	bib = self.metadataService.getBibData(dom=metaDom)
597	if bib:
598	# save extended version as 'bibx' TODO: ugly
599	bibx = self.metadataService.getBibData(dom=metaDom, all=True, recursive=1)
600	if len(bibx) == 1:
601	# unwrap list if possible
602	bibx = bibx[0]
603
604	docinfo['bibx'] = bibx
605	docinfo = self.getDocinfoFromBib(docinfo, bib, bibx)
606	else:
607	# no bib - try info.xml
608	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
609
610	# auth info
611	access = self.metadataService.getAccessData(dom=metaDom)
612	if access:
613	docinfo = self.getDocinfoFromAccess(docinfo, access)
614
615	# attribution info
616	attribution = self.metadataService.getAttributionData(dom=metaDom)
617	if attribution:
618	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
619	docinfo['attribution'] = attribution
620
621	# copyright info
622	copyright = self.metadataService.getCopyrightData(dom=metaDom)
623	if copyright:
624	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
625	docinfo['copyright'] = copyright
626
627	# DRI (permanent ID)
628	dri = self.metadataService.getDRI(dom=metaDom, type='escidoc')
629	if dri:
630	logging.debug("getDRI: dri=%s"%repr(dri))
631	docinfo['DRI'] = dri
632
633	# image path
634	if mode != 'texttool':
635	# override image path from texttool with url parameter TODO: how about mode=auto?
636	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
637
638	# check numPages
639	if docinfo.get('numPages', 0) == 0:
640	# number of images from digilib
641	if docinfo.get('imagePath', None):
642	imgpath = docinfo['imagePath'].replace('/mpiwg/online/', '', 1)
643	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, imgpath)
644	docinfo = self.getDocinfoFromDigilib(docinfo, imgpath)
645	else:
646	# imagePath still missing? try "./pageimg"
647	imgPath = os.path.join(docUrl, 'pageimg')
648	docinfo = self.getDocinfoFromDigilib(docinfo, imgPath)
649	if docinfo.get('numPages', 0) > 0:
650	# there are pages
651	docinfo['imagePath'] = imgPath
652	docinfo['imageURL'] = "%s?fn=%s"%(self.digilibScalerUrl, docinfo['imagePath'])
653
654	# check numPages
655	if docinfo.get('numPages', 0) == 0:
656	if docinfo.get('numTextPages', 0) > 0:
657	# replace with numTextPages (text-only?)
658	docinfo['numPages'] = docinfo['numTextPages']
659
660	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
661	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
662	# store in session
663	self.REQUEST.SESSION['docinfo'] = docinfo
664	return docinfo
665
666
667	def getDocinfoFromResource(self, docinfo, resource):
668	"""reads contents of resource element into docinfo"""
669	docName = resource.get('name', None)
670	docinfo['documentName'] = docName
671	docPath = resource.get('archive-path', None)
672	if docPath:
673	# clean up document path
674	if docPath[0] != '/':
675	docPath = '/' + docPath
676
677	if docName and (not docPath.endswith(docName)):
678	docPath += "/" + docName
679
680	else:
681	# use docUrl as docPath
682	docUrl = docinfo['documentURL']
683	if not docUrl.startswith('http:'):
684	docPath = docUrl
685	if docPath:
686	# fix URLs starting with /mpiwg/online
687	docPath = docPath.replace('/mpiwg/online', '', 1)
688
689	docinfo['documentPath'] = docPath
690	return docinfo
691
692	def getDocinfoFromTexttool(self, docinfo, texttool):
693	"""reads contents of texttool element into docinfo"""
694	logging.debug("texttool=%s"%repr(texttool))
695	# unpack list if necessary
696	if isinstance(texttool, list):
697	texttool = texttool[0]
698
699	# image dir
700	imageDir = getMDText(texttool.get('image', None))
701	docPath = getMDText(docinfo.get('documentPath', None))
702	if imageDir and docPath:
703	imageDir = os.path.join(docPath, imageDir)
704	imageDir = imageDir.replace('/mpiwg/online', '', 1)
705	docinfo['imagePath'] = imageDir
706
707	# old style text URL
708	textUrl = getMDText(texttool.get('text', None))
709	if textUrl and docPath:
710	if urlparse.urlparse(textUrl)[0] == "": #keine url
711	textUrl = os.path.join(docPath, textUrl)
712
713	docinfo['textURL'] = textUrl
714
715	# new style text-url-path (can be more than one with "repository" attribute)
716	textUrlNode = texttool.get('text-url-path', None)
717	if not isinstance(textUrlNode, list):
718	textUrlNode = [textUrlNode]
719
720	for tun in textUrlNode:
721	textUrl = getMDText(tun)
722	if textUrl:
723	textUrlAtts = tun.get('@attr')
724	if (textUrlAtts and 'repository' in textUrlAtts):
725	textRepo = textUrlAtts['repository']
726	# use matching repository
727	if self.getRepositoryType() == textRepo:
728	docinfo['textURLPath'] = textUrl
729	docinfo['textURLRepository'] = textRepo
730	break
731
732	else:
733	# no repo attribute - use always
734	docinfo['textURLPath'] = textUrl
735
736	# page flow
737	docinfo['pageFlow'] = getMDText(texttool.get('page-flow', 'ltr'))
738
739	# odd pages are left
740	docinfo['oddPage'] = getMDText(texttool.get('odd-scan-position', 'left'))
741
742	# number of title page (default 1)
743	docinfo['titlePage'] = getMDText(texttool.get('title-scan-no', 1))
744
745	# old presentation stuff
746	presentation = getMDText(texttool.get('presentation', None))
747	if presentation and docPath:
748	if presentation.startswith('http:'):
749	docinfo['presentationUrl'] = presentation
750	else:
751	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
752
753	return docinfo
754
755	def getDocinfoFromBib(self, docinfo, bib, bibx=None):
756	"""reads contents of bib element into docinfo"""
757	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
758	# put all raw bib fields in dict "bib"
759	docinfo['bib'] = bib
760	bibtype = bib.get('@type', None)
761	docinfo['bibType'] = bibtype
762	# also store DC metadata for convenience
763	dc = self.metadataService.getDCMappedData(bib)
764	docinfo['creator'] = dc.get('creator','')
765	docinfo['title'] = dc.get('title','')
766	docinfo['date'] = dc.get('date','')
767	return docinfo
768
769	def getDocinfoFromAccess(self, docinfo, acc):
770	"""reads contents of access element into docinfo"""
771	#TODO: also read resource type
772	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
773	try:
774	acctype = acc['@attr']['type']
775	if acctype:
776	access=acctype
777	if access in ['group', 'institution']:
778	access = acc['name'].lower()
779
780	docinfo['accessType'] = access
781
782	except:
783	pass
784
785	return docinfo
786
787	def getDocinfoFromDigilib(self, docinfo, path):
788	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
789	# fetch data
790	txt = getHttpData(infoUrl)
791	if not txt:
792	logging.error("Unable to get dir-info from %s"%(infoUrl))
793	return docinfo
794
795	dom = ET.fromstring(txt)
796	size = getText(dom.find("size"))
797	logging.debug("getDocinfoFromDigilib: size=%s"%size)
798	if size:
799	docinfo['numPages'] = int(size)
800	else:
801	docinfo['numPages'] = 0
802
803	# TODO: produce and keep list of image names and numbers
804	return docinfo
805
806
807	def getDocinfoFromPresentationInfoXml(self,docinfo):
808	"""gets DC-like bibliographical information from the presentation entry in texttools"""
809	url = docinfo.get('presentationUrl', None)
810	if not url:
811	logging.error("getDocinfoFromPresentation: no URL!")
812	return docinfo
813
814	dom = None
815	metaUrl = None
816	if url.startswith("http://"):
817	# real URL
818	metaUrl = url
819	else:
820	# online path
821
822	server=self.digilibBaseUrl+"/servlet/Texter?fn="
823	metaUrl=server+url
824
825	txt=getHttpData(metaUrl)
826	if txt is None:
827	logging.error("Unable to read info.xml from %s"%(url))
828	return docinfo
829
830	dom = ET.fromstring(txt)
831	docinfo['creator']=getText(dom.find(".//author"))
832	docinfo['title']=getText(dom.find(".//title"))
833	docinfo['date']=getText(dom.find(".//date"))
834	return docinfo
835
836
837	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, userinfo=None, viewMode=None, viewLayer=None, tocMode=None):
838	"""returns pageinfo with the given parameters"""
839	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
840	pageinfo = {}
841	pageinfo['viewMode'] = viewMode
842	# split viewLayer if necessary
843	if isinstance(viewLayer,basestring):
844	viewLayer = viewLayer.split(',')
845
846	if isinstance(viewLayer, list):
847	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
848	# save (unique) list in viewLayers
849	seen = set()
850	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
851	pageinfo['viewLayers'] = viewLayers
852	# stringify viewLayer
853	viewLayer = ','.join(viewLayers)
854	else:
855	#create list
856	pageinfo['viewLayers'] = [viewLayer]
857
858	pageinfo['viewLayer'] = viewLayer
859	pageinfo['tocMode'] = tocMode
860
861	# TODO: unify current and pn!
862	current = getInt(current)
863	pageinfo['current'] = current
864	pageinfo['pn'] = current
865	rows = int(rows or self.thumbrows)
866	pageinfo['rows'] = rows
867	cols = int(cols or self.thumbcols)
868	pageinfo['cols'] = cols
869	grpsize = cols * rows
870	pageinfo['groupsize'] = grpsize
871	# is start is empty use one around current
872	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
873	# int(current / grpsize) * grpsize +1))
874	pageinfo['start'] = start
875	# get number of pages
876	np = int(docinfo.get('numPages', 0))
877	if np == 0:
878	# try numTextPages
879	np = docinfo.get('numTextPages', 0)
880	if np != 0:
881	docinfo['numPages'] = np
882
883	# cache table of contents
884	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
885	pageinfo['numgroups'] = int(np / grpsize)
886	if np % grpsize > 0:
887	pageinfo['numgroups'] += 1
888
889	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
890	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
891	# add zeroth page for two columns
892	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
893	pageinfo['pageZero'] = pageZero
894	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
895	# more page parameters
896	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
897	if docinfo.get('pageNumbers'):
898	# get original page numbers
899	pageNumber = docinfo['pageNumbers'].get(current, None)
900	if pageNumber is not None:
901	pageinfo['pageNumberOrig'] = pageNumber['no']
902	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
903
904	# cache search results
905	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
906	query = self.REQUEST.get('query',None)
907	pageinfo['query'] = query
908	if query:
909	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
910	pageinfo['queryType'] = queryType
911	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
912	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
913
914	# highlighting
915	highlightQuery = self.REQUEST.get('highlightQuery', None)
916	if highlightQuery:
917	pageinfo['highlightQuery'] = highlightQuery
918	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
919	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
920
921	return pageinfo
922
923
924	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
925	"""returns dict with array of page informations for one screenfull of thumbnails"""
926	batch = {}
927	grpsize = rows * cols
928	if maxIdx == 0:
929	maxIdx = start + grpsize
930
931	nb = int(math.ceil(maxIdx / float(grpsize)))
932	# list of all batch start and end points
933	batches = []
934	if pageZero:
935	ofs = 0
936	else:
937	ofs = 1
938
939	for i in range(nb):
940	s = i * grpsize + ofs
941	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
942	batches.append({'start':s, 'end':e})
943
944	batch['batches'] = batches
945
946	pages = []
947	if pageZero and start == 1:
948	# correct beginning
949	idx = 0
950	else:
951	idx = start
952
953	for r in range(rows):
954	row = []
955	for c in range(cols):
956	if idx < minIdx or idx > maxIdx:
957	page = {'idx':None}
958	else:
959	page = {'idx':idx}
960
961	idx += 1
962	if pageFlowLtr:
963	row.append(page)
964	else:
965	row.insert(0, page)
966
967	pages.append(row)
968
969	if start > 1:
970	batch['prevStart'] = max(start - grpsize, 1)
971	else:
972	batch['prevStart'] = None
973
974	if start + grpsize <= maxIdx:
975	batch['nextStart'] = start + grpsize
976	else:
977	batch['nextStart'] = None
978
979	batch['pages'] = pages
980	return batch
981
982	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
983	"""returns dict with information for one screenfull of data."""
984	batch = {}
985	if end == 0:
986	end = start + size
987
988	nb = int(math.ceil(end / float(size)))
989	# list of all batch start and end points
990	batches = []
991	for i in range(nb):
992	s = i * size + 1
993	e = min((i + 1) * size, end)
994	batches.append({'start':s, 'end':e})
995
996	batch['batches'] = batches
997	# list of elements in this batch
998	this = []
999	j = 0
1000	for i in range(start, min(start+size, end+1)):
1001	if data:
1002	if fullData:
1003	d = data.get(i, None)
1004	else:
1005	d = data.get(j, None)
1006	j += 1
1007
1008	else:
1009	d = i+1
1010
1011	this.append(d)
1012
1013	batch['this'] = this
1014	if start > 1:
1015	batch['prevStart'] = max(start - size, 1)
1016	else:
1017	batch['prevStart'] = None
1018
1019	if start + size < end:
1020	batch['nextStart'] = start + size
1021	else:
1022	batch['nextStart'] = None
1023
1024	logging.debug("getBatch start=%s size=%s end=%s batch=%s"%(start,size,end,repr(batch)))
1025	return batch
1026
1027
1028	def getAnnotatorGroupsForUser(self, user, annotationServerUrl="http://tuxserve03.mpiwg-berlin.mpg.de/AnnotationManager"):
1029	"""returns list of groups {name:, id:} on the annotation server for the user"""
1030	groups = []
1031	groupsUrl = "%s/annotator/groups?user=%s"%(annotationServerUrl,user)
1032	data = getHttpData(url=groupsUrl, noExceptions=True)
1033	if data:
1034	res = json.loads(data)
1035	rows = res.get('rows', None)
1036	if rows is None:
1037	return groups
1038	for r in rows:
1039	groups.append({'id': r.get('id', None), 'name': r.get('name', None), 'uri': r.get('uri', None)})
1040
1041	return groups
1042
1043
1044	security.declareProtected('View management screens','changeDocumentViewerForm')
1045	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
1046
1047	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
1048	"""init document viewer"""
1049	self.title=title
1050	self.digilibBaseUrl = digilibBaseUrl
1051	self.digilibScalerUrl = digilibBaseUrl + '/servlet/Scaler'
1052	self.digilibViewerUrl = digilibBaseUrl + '/jquery/digilib.html'
1053	self.thumbrows = thumbrows
1054	self.thumbcols = thumbcols
1055	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1056	try:
1057	# assume MetaDataFolder instance is called metadata
1058	self.metadataService = getattr(self, 'metadata')
1059	except Exception, e:
1060	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
1061
1062	self.setAvailableLayers(availableLayers)
1063
1064	if RESPONSE is not None:
1065	RESPONSE.redirect('manage_main')
1066
1067	def manage_AddDocumentViewerForm(self):
1068	"""add the viewer form"""
1069	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1070	return pt()
1071
1072	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1073	"""add the viewer"""
1074	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1075	self._setObject(id,newObj)
1076
1077	if RESPONSE is not None:
1078	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: