Context Navigation

source: documentViewer/documentViewer.py @ 530:5c7433c2515c

Last change on this file since 530:5c7433c2515c was 530:5c7433c2515c, checked in by casties, 12 years ago
fix problems with texttool/text
File size: 37.3 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
22
23	def serializeNode(node, encoding="utf-8"):
24	"""returns a string containing node as XML"""
25	s = ET.tostring(node)
26
27	# 4Suite:
28	# stream = cStringIO.StringIO()
29	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
30	# s = stream.getvalue()
31	# stream.close()
32	return s
33
34	def browserCheck(self):
35	"""check the browsers request to find out the browser type"""
36	bt = {}
37	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
38	bt['ua'] = ua
39	bt['isIE'] = False
40	bt['isN4'] = False
41	bt['versFirefox']=""
42	bt['versIE']=""
43	bt['versSafariChrome']=""
44	bt['versOpera']=""
45
46	if string.find(ua, 'MSIE') > -1:
47	bt['isIE'] = True
48	else:
49	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
50	# Safari oder Chrome identification
51	try:
52	nav = ua[string.find(ua, '('):]
53	nav1=ua[string.find(ua,')'):]
54	nav2=nav1[string.find(nav1,'('):]
55	nav3=nav2[string.find(nav2,')'):]
56	ie = string.split(nav, "; ")[1]
57	ie1 =string.split(nav1, " ")[2]
58	ie2 =string.split(nav3, " ")[1]
59	ie3 =string.split(nav3, " ")[2]
60	if string.find(ie3, "Safari") >-1:
61	bt['versSafariChrome']=string.split(ie2, "/")[1]
62	except: pass
63	# IE identification
64	try:
65	nav = ua[string.find(ua, '('):]
66	ie = string.split(nav, "; ")[1]
67	if string.find(ie, "MSIE") > -1:
68	bt['versIE'] = string.split(ie, " ")[1]
69	except:pass
70	# Firefox identification
71	try:
72	nav = ua[string.find(ua, '('):]
73	nav1=ua[string.find(ua,')'):]
74	if string.find(ie1, "Firefox") >-1:
75	nav5= string.split(ie1, "/")[1]
76	logging.debug("FIREFOX: %s"%(nav5))
77	bt['versFirefox']=nav5[0:3]
78	except:pass
79	#Opera identification
80	try:
81	if string.find(ua,"Opera") >-1:
82	nav = ua[string.find(ua, '('):]
83	nav1=nav[string.find(nav,')'):]
84	bt['versOpera']=string.split(nav1,"/")[2]
85	except:pass
86
87	bt['isMac'] = string.find(ua, 'Macintosh') > -1
88	bt['isWin'] = string.find(ua, 'Windows') > -1
89	bt['isIEWin'] = bt['isIE'] and bt['isWin']
90	bt['isIEMac'] = bt['isIE'] and bt['isMac']
91	bt['staticHTML'] = False
92
93	return bt
94
95	def getParentPath(path, cnt=1):
96	"""returns pathname shortened by cnt"""
97	# make sure path doesn't end with /
98	path = path.rstrip('/')
99	# split by /, shorten, and reassemble
100	return '/'.join(path.split('/')[0:-cnt])
101
102	##
103	## documentViewer class
104	##
105	class documentViewer(Folder):
106	"""document viewer"""
107	meta_type="Document viewer"
108
109	security=ClassSecurityInfo()
110	manage_options=Folder.manage_options+(
111	{'label':'Configuration','action':'changeDocumentViewerForm'},
112	)
113
114	metadataService = None
115	"""MetaDataFolder instance"""
116
117
118	#
119	# templates and forms
120	#
121	# viewMode templates
122	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
123	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
124	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
125	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
126	viewer_thumbs = PageTemplateFile('zpt/viewer_thumbs', globals())
127	# available layer types
128	builtinLayers = {'text': ['dict','search','gis','annotator'],
129	'xml': None, 'images': None, 'index': None}
130	availableLayers = builtinLayers;
131	# layer templates
132	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
133	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
134	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
135	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
136	# toc templates
137	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
138	toc_text = PageTemplateFile('zpt/toc_text', globals())
139	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
140	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
141	toc_none = PageTemplateFile('zpt/toc_none', globals())
142	# other templates
143	common_template = PageTemplateFile('zpt/common_template', globals())
144	info_xml = PageTemplateFile('zpt/info_xml', globals())
145	docuviewer_css = ImageFile('css/docuviewer.css',globals())
146	# make ImageFile better for development
147	docuviewer_css.index_html = refreshingImageFileIndexHtml
148	jquery_js = ImageFile('js/jquery.js',globals())
149
150
151	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
152	"""init document viewer"""
153	self.id=id
154	self.title=title
155	self.thumbcols = thumbcols
156	self.thumbrows = thumbrows
157	# authgroups is list of authorized groups (delimited by ,)
158	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
159	# create template folder so we can always use template.something
160
161	templateFolder = Folder('template')
162	self['template'] = templateFolder # Zope-2.12 style
163	#self._setObject('template',templateFolder) # old style
164	try:
165	import MpdlXmlTextServer
166	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
167	templateFolder['fulltextclient'] = textServer
168	#templateFolder._setObject('fulltextclient',textServer)
169	except Exception, e:
170	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
171
172	try:
173	from Products.zogiLib.zogiLib import zogiLib
174	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
175	templateFolder['zogilib'] = zogilib
176	#templateFolder._setObject('zogilib',zogilib)
177	except Exception, e:
178	logging.error("Unable to create zogiLib for zogilib: "+str(e))
179
180	try:
181	# assume MetaDataFolder instance is called metadata
182	self.metadataService = getattr(self, 'metadata')
183	except Exception, e:
184	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
185
186	if digilibBaseUrl is not None:
187	self.digilibBaseUrl = digilibBaseUrl
188
189
190	# proxy text server methods to fulltextclient
191	def getTextPage(self, **args):
192	"""returns full text content of page"""
193	return self.template.fulltextclient.getTextPage(**args)
194
195	def getSearchResults(self, **args):
196	"""loads list of search results and stores XML in docinfo"""
197	return self.template.fulltextclient.getSearchResults(**args)
198
199	def getResultsPage(self, **args):
200	"""returns one page of the search results"""
201	return self.template.fulltextclient.getResultsPage(**args)
202
203	def getTextInfo(self, **args):
204	"""returns document info from the text server"""
205	return self.template.fulltextclient.getTextInfo(**args)
206
207	def getToc(self, **args):
208	"""loads table of contents and stores XML in docinfo"""
209	return self.template.fulltextclient.getToc(**args)
210
211	def getTocPage(self, **args):
212	"""returns one page of the table of contents"""
213	return self.template.fulltextclient.getTocPage(**args)
214
215	def getPlacesOnPage(self, **args):
216	"""get list of gis places on one page"""
217	return self.template.fulltextclient.getPlacesOnPage(**args)
218
219	#WTF?
220	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
221	security.declareProtected('View','thumbs_rss')
222	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
223	'''
224	view it
225	@param mode: defines how to access the document behind url
226	@param url: url which contains display information
227	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
228
229	'''
230	logging.debug("HHHHHHHHHHHHHH:load the rss")
231	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
232
233	if not hasattr(self, 'template'):
234	# create template folder if it doesn't exist
235	self.manage_addFolder('template')
236
237	if not self.digilibBaseUrl:
238	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
239
240	docinfo = self.getDocinfo(mode=mode,url=url)
241	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
242	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
243	''' ZDES '''
244	pt = getattr(self.template, 'thumbs_main_rss')
245
246	if viewMode=="auto": # automodus gewaehlt
247	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
248	viewMode="text"
249	else:
250	viewMode="images"
251
252	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
253
254
255	security.declareProtected('View','index_html')
256	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode="thumbs",start=1,pn=1):
257	"""
258	view page
259	@param url: url which contains display information
260	@param mode: defines how to access the document behind url
261	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
262	@param viewLayer: sub-type of viewMode, e.g. 'dict' for viewMode='text'
263	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
264	"""
265
266	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
267
268	if not hasattr(self, 'template'):
269	# this won't work
270	logging.error("template folder missing!")
271	return "ERROR: template folder missing!"
272
273	if not getattr(self, 'digilibBaseUrl', None):
274	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
275
276	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
277
278	# auto viewMode: text if there is a text else images
279	if viewMode=="auto":
280	if docinfo.get('textURLPath', None):
281	# docinfo.get('textURL', None) not implemented yet
282	viewMode = "text"
283	if viewLayer is None:
284	viewLayer = "dict"
285	else:
286	viewMode = "images"
287
288	elif viewMode == "text_dict":
289	# legacy fix
290	viewMode = "text"
291	viewLayer = "dict"
292
293	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
294
295	# get template /template/viewer_$viewMode
296	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
297	if pt is None:
298	logging.error("No template for viewMode=%s!"%viewMode)
299	# TODO: error page?
300	return "No template for viewMode=%s!"%viewMode
301
302	# and execute with parameters
303	return pt(docinfo=docinfo, pageinfo=pageinfo)
304
305	#WTF?
306	def generateMarks(self,mk):
307	ret=""
308	if mk is None:
309	return ""
310	if not isinstance(mk, list):
311	mk=[mk]
312	for m in mk:
313	ret+="mk=%s"%m
314	return ret
315
316
317	def getAvailableLayers(self):
318	"""returns dict with list of available layers per viewMode"""
319	return self.availableLayers
320
321	def getBrowser(self):
322	"""getBrowser the version of browser """
323	bt = browserCheck(self)
324	logging.debug("BROWSER VERSION: %s"%(bt))
325	return bt
326
327	def findDigilibUrl(self):
328	"""try to get the digilib URL from zogilib"""
329	url = self.template.zogilib.getDLBaseUrl()
330	return url
331
332	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
333	"""returns URL to digilib Scaler with params"""
334	url = None
335	if docinfo is not None:
336	url = docinfo.get('imageURL', None)
337
338	if url is None:
339	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
340	if fn is None and docinfo is not None:
341	fn = docinfo.get('imagePath','')
342
343	url += "fn=%s"%fn
344
345	if pn:
346	url += "&pn=%s"%pn
347
348	url += "&dw=%s&dh=%s"%(dw,dh)
349	return url
350
351	def getDocumentViewerURL(self):
352	"""returns the URL of this instance"""
353	return self.absolute_url()
354
355	def getStyle(self, idx, selected, style=""):
356	"""returns a string with the given style and append 'sel' if idx == selected."""
357	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
358	if idx == selected:
359	return style + 'sel'
360	else:
361	return style
362
363	def getParams(self, param=None, val=None, params=None, duplicates=None):
364	"""returns dict with URL parameters.
365
366	Takes URL parameters and additionally param=val or dict params.
367	Deletes key if value is None."""
368	# copy existing request params
369	newParams=self.REQUEST.form.copy()
370	# change single param
371	if param is not None:
372	if val is None:
373	if newParams.has_key(param):
374	del newParams[param]
375	else:
376	newParams[param] = str(val)
377
378	# change more params
379	if params is not None:
380	for (k, v) in params.items():
381	if v is None:
382	# val=None removes param
383	if newParams.has_key(k):
384	del newParams[k]
385
386	else:
387	newParams[k] = v
388
389	if duplicates:
390	# eliminate lists (coming from duplicate keys)
391	for (k,v) in newParams.items():
392	if isinstance(v, list):
393	if duplicates == 'comma':
394	# make comma-separated list of non-empty entries
395	newParams[k] = ','.join([t for t in v if t])
396	elif duplicates == 'first':
397	# take first non-empty entry
398	newParams[k] = [t for t in v if t][0]
399
400	return newParams
401
402	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
403	"""returns URL to documentviewer with parameter param set to val or from dict params"""
404	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
405	# quote values and assemble into query string (not escaping '/')
406	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
407	if baseUrl is None:
408	baseUrl = self.getDocumentViewerURL()
409
410	url = "%s?%s"%(baseUrl, ps)
411	return url
412
413	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
414	"""link to documentviewer with parameter param set to val"""
415	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
416
417
418	def setAvailableLayers(self, newLayerString=None):
419	"""sets availableLayers to newLayerString or tries to autodetect available layers.
420	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
421	newLayerString is parsed as JSON."""
422	if newLayerString is not None:
423	try:
424	layers = json.loads(newLayerString)
425	if 'text' in layers and 'images' in layers:
426	self.availableLayers = layers
427	return
428	except:
429	pass
430
431	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
432
433	# start with builtin layers
434	self.availableLayers = self.builtinLayers.copy()
435	# add layers from templates
436	for t in self.template:
437	if t.startswith('layer_'):
438	try:
439	(x, m, l) = t.split('_', 3)
440	if m not in self.availableLayers:
441	# mode m doesn't exist -> new list
442	self.availableLayers[m] = [l]
443
444	else:
445	# m exists -> append
446	if l not in self.availableLayers[m]:
447	self.availableLayers[m].append()
448
449	except:
450	pass
451
452	def getAvailableLayersJson(self):
453	"""returns available layers as JSON string."""
454	return json.dumps(self.availableLayers)
455
456
457	def getInfo_xml(self,url,mode):
458	"""returns info about the document as XML"""
459	if not self.digilibBaseUrl:
460	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
461
462	docinfo = self.getDocinfo(mode=mode,url=url)
463	pt = getattr(self.template, 'info_xml')
464	return pt(docinfo=docinfo)
465
466	def getAuthenticatedUser(self, anon=None):
467	"""returns the authenticated user object or None. (ignores Zopes anonymous user)"""
468	user = getSecurityManager().getUser()
469	if user is not None and user.getUserName() != "Anonymous User":
470	return user
471	else:
472	return anon
473
474	def isAccessible(self, docinfo):
475	"""returns if access to the resource is granted"""
476	access = docinfo.get('accessType', None)
477	logging.debug("documentViewer (accessOK) access type %s"%access)
478	if access == 'free':
479	logging.debug("documentViewer (accessOK) access is free")
480	return True
481
482	elif access is None or access in self.authgroups:
483	# only local access -- only logged in users
484	user = self.getAuthenticatedUser()
485	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
486	return (user is not None)
487
488	logging.error("documentViewer (accessOK) unknown access type %s"%access)
489	return False
490
491
492	def getDocinfo(self, mode, url, tocMode=None):
493	"""returns docinfo depending on mode"""
494	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
495	# look for cached docinfo in session
496	if self.REQUEST.SESSION.has_key('docinfo'):
497	docinfo = self.REQUEST.SESSION['docinfo']
498	# check if its still current
499	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
500	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
501	return docinfo
502
503	# new docinfo
504	docinfo = {'mode': mode, 'url': url}
505	# add self url
506	docinfo['viewerUrl'] = self.getDocumentViewerURL()
507	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
508	# get index.meta DOM
509	docUrl = None
510	metaDom = None
511	if mode=="texttool":
512	# url points to document dir or index.meta
513	metaDom = self.metadataService.getDomFromPathOrUrl(url)
514	docUrl = url.replace('/index.meta', '')
515	if metaDom is None:
516	raise IOError("Unable to find index.meta for mode=texttool!")
517
518	elif mode=="imagepath":
519	# url points to folder with images, index.meta optional
520	# asssume index.meta in parent dir
521	docUrl = getParentPath(url)
522	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
523
524	elif mode=="filepath":
525	# url points to image file, index.meta optional
526	# asssume index.meta is two path segments up
527	docUrl = getParentPath(url, 2)
528	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
529
530	else:
531	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
532	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
533
534	docinfo['documentUrl'] = docUrl
535	# process index.meta contents
536	if metaDom is not None and metaDom.tag == 'resource':
537	# document directory name and path
538	resource = self.metadataService.getResourceData(dom=metaDom)
539	if resource:
540	docinfo = self.getDocinfoFromResource(docinfo, resource)
541
542	# texttool info
543	texttool = self.metadataService.getTexttoolData(dom=metaDom)
544	if texttool:
545	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
546	# document info (including toc) from full text
547	if docinfo.get('textURLPath', None):
548	docinfo = self.getTextInfo(mode=tocMode, docinfo=docinfo)
549
550	# bib info
551	bib = self.metadataService.getBibData(dom=metaDom)
552	if bib:
553	docinfo = self.getDocinfoFromBib(docinfo, bib)
554	else:
555	# no bib - try info.xml
556	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
557
558	# auth info
559	access = self.metadataService.getAccessData(dom=metaDom)
560	if access:
561	docinfo = self.getDocinfoFromAccess(docinfo, access)
562
563	# attribution info
564	attribution = self.metadataService.getAttributionData(dom=metaDom)
565	if attribution:
566	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
567	docinfo['attribution'] = attribution
568	#docinfo = self.getDocinfoFromAccess(docinfo, access)
569
570	# copyright info
571	copyright = self.metadataService.getCopyrightData(dom=metaDom)
572	if copyright:
573	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
574	docinfo['copyright'] = copyright
575	#docinfo = self.getDocinfoFromAccess(docinfo, access)
576
577	# image path
578	if mode != 'texttool':
579	# override image path from texttool with url TODO: how about mode=auto?
580	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
581
582	# number of images from digilib
583	if docinfo.get('imagePath', None):
584	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
585	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
586
587	# check numPages
588	if docinfo.get('numPages', 0) == 0:
589	if docinfo.get('numTextPages', 0) > 0:
590	# replace with numTextPages (text-only?)
591	docinfo['numPages'] = docinfo['numTextPages']
592
593	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
594	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
595	# store in session
596	self.REQUEST.SESSION['docinfo'] = docinfo
597	return docinfo
598
599
600	def getDocinfoFromResource(self, docinfo, resource):
601	"""reads contents of resource element into docinfo"""
602	docName = resource.get('name', None)
603	docinfo['documentName'] = docName
604	docPath = resource.get('archive-path', None)
605	if docPath:
606	# clean up document path
607	if docPath[0] != '/':
608	docPath = '/' + docPath
609
610	if docName and (not docPath.endswith(docName)):
611	docPath += "/" + docName
612
613	else:
614	# use docUrl as docPath
615	docUrl = docinfo['documentURL']
616	if not docUrl.startswith('http:'):
617	docPath = docUrl
618	if docPath:
619	# fix URLs starting with /mpiwg/online
620	docPath = docPath.replace('/mpiwg/online', '', 1)
621
622	docinfo['documentPath'] = docPath
623	return docinfo
624
625	def getDocinfoFromTexttool(self, docinfo, texttool):
626	"""reads contents of texttool element into docinfo"""
627	# image dir
628	imageDir = texttool.get('image', None)
629	docPath = docinfo.get('documentPath', None)
630	if imageDir and docPath:
631	#print "image: ", imageDir, " archivepath: ", archivePath
632	imageDir = os.path.join(docPath, imageDir)
633	imageDir = imageDir.replace('/mpiwg/online', '', 1)
634	docinfo['imagePath'] = imageDir
635
636	# old style text URL
637	textUrl = texttool.get('text', None)
638	if textUrl and docPath:
639	if urlparse.urlparse(textUrl)[0] == "": #keine url
640	textUrl = os.path.join(docPath, textUrl)
641
642	docinfo['textURL'] = textUrl
643
644	# new style text-url-path
645	textUrl = texttool.get('text-url-path', None)
646	if textUrl:
647	docinfo['textURLPath'] = textUrl
648
649	# page flow
650	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
651
652	# odd pages are left
653	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
654
655	# number of title page (0: not defined)
656	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
657
658	# old presentation stuff
659	presentation = texttool.get('presentation', None)
660	if presentation and docPath:
661	if presentation.startswith('http:'):
662	docinfo['presentationUrl'] = presentation
663	else:
664	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
665
666
667	return docinfo
668
669	def getDocinfoFromBib(self, docinfo, bib):
670	"""reads contents of bib element into docinfo"""
671	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
672	# put all raw bib fields in dict "bib"
673	docinfo['bib'] = bib
674	bibtype = bib.get('@type', None)
675	docinfo['bibType'] = bibtype
676	# also store DC metadata for convenience
677	dc = self.metadataService.getDCMappedData(bib)
678	docinfo['creator'] = dc.get('creator',None)
679	docinfo['title'] = dc.get('title',None)
680	docinfo['date'] = dc.get('date',None)
681	return docinfo
682
683	def getDocinfoFromAccess(self, docinfo, acc):
684	"""reads contents of access element into docinfo"""
685	#TODO: also read resource type
686	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
687	try:
688	acctype = acc['@attr']['type']
689	if acctype:
690	access=acctype
691	if access in ['group', 'institution']:
692	access = acc['name'].lower()
693
694	docinfo['accessType'] = access
695
696	except:
697	pass
698
699	return docinfo
700
701	def getDocinfoFromDigilib(self, docinfo, path):
702	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
703	# fetch data
704	txt = getHttpData(infoUrl)
705	if not txt:
706	logging.error("Unable to get dir-info from %s"%(infoUrl))
707	return docinfo
708
709	dom = ET.fromstring(txt)
710	size = getText(dom.find("size"))
711	logging.debug("getDocinfoFromDigilib: size=%s"%size)
712	if size:
713	docinfo['numPages'] = int(size)
714	else:
715	docinfo['numPages'] = 0
716
717	# TODO: produce and keep list of image names and numbers
718	return docinfo
719
720
721	def getDocinfoFromPresentationInfoXml(self,docinfo):
722	"""gets DC-like bibliographical information from the presentation entry in texttools"""
723	url = docinfo.get('presentationUrl', None)
724	if not url:
725	logging.error("getDocinfoFromPresentation: no URL!")
726	return docinfo
727
728	dom = None
729	metaUrl = None
730	if url.startswith("http://"):
731	# real URL
732	metaUrl = url
733	else:
734	# online path
735
736	server=self.digilibBaseUrl+"/servlet/Texter?fn="
737	metaUrl=server+url
738
739	txt=getHttpData(metaUrl)
740	if txt is None:
741	logging.error("Unable to read info.xml from %s"%(url))
742	return docinfo
743
744	dom = ET.fromstring(txt)
745	docinfo['creator']=getText(dom.find(".//author"))
746	docinfo['title']=getText(dom.find(".//title"))
747	docinfo['date']=getText(dom.find(".//date"))
748	return docinfo
749
750
751	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewLayer=None, tocMode=None):
752	"""returns pageinfo with the given parameters"""
753	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
754	pageinfo = {}
755	pageinfo['viewMode'] = viewMode
756	# split viewLayer if necessary
757	if isinstance(viewLayer,basestring):
758	viewLayer = viewLayer.split(',')
759
760	if isinstance(viewLayer, list):
761	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
762	# save (unique) list in viewLayers
763	seen = set()
764	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
765	pageinfo['viewLayers'] = viewLayers
766	# stringify viewLayer
767	viewLayer = ','.join(viewLayers)
768	else:
769	#create list
770	pageinfo['viewLayers'] = [viewLayer]
771
772	pageinfo['viewLayer'] = viewLayer
773	pageinfo['tocMode'] = tocMode
774
775	# TODO: unify current and pn!
776	current = getInt(current)
777	pageinfo['current'] = current
778	pageinfo['pn'] = current
779	rows = int(rows or self.thumbrows)
780	pageinfo['rows'] = rows
781	cols = int(cols or self.thumbcols)
782	pageinfo['cols'] = cols
783	grpsize = cols * rows
784	pageinfo['groupsize'] = grpsize
785	# is start is empty use one around current
786	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
787	# int(current / grpsize) * grpsize +1))
788	pageinfo['start'] = start
789	# get number of pages
790	np = int(docinfo.get('numPages', 0))
791	if np == 0:
792	# try numTextPages
793	np = docinfo.get('numTextPages', 0)
794	if np != 0:
795	docinfo['numPages'] = np
796
797	# cache table of contents
798	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
799	pageinfo['numgroups'] = int(np / grpsize)
800	if np % grpsize > 0:
801	pageinfo['numgroups'] += 1
802
803	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
804	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
805	# add zeroth page for two columns
806	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
807	pageinfo['pageZero'] = pageZero
808	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
809	# more page parameters
810	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
811	if docinfo.get('pageNumbers'):
812	# get original page numbers
813	pageNumber = docinfo['pageNumbers'].get(current, None)
814	if pageNumber is not None:
815	pageinfo['pageNumberOrig'] = pageNumber['no']
816	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
817
818	# cache search results
819	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
820	query = self.REQUEST.get('query',None)
821	pageinfo['query'] = query
822	if query:
823	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
824	pageinfo['queryType'] = queryType
825	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
826	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
827
828	# highlighting
829	highlightQuery = self.REQUEST.get('highlightQuery', None)
830	if highlightQuery:
831	pageinfo['highlightQuery'] = highlightQuery
832	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
833	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
834
835	return pageinfo
836
837
838	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
839	"""returns dict with array of page informations for one screenfull of thumbnails"""
840	batch = {}
841	grpsize = rows * cols
842	if maxIdx == 0:
843	maxIdx = start + grpsize
844
845	nb = int(math.ceil(maxIdx / float(grpsize)))
846	# list of all batch start and end points
847	batches = []
848	if pageZero:
849	ofs = 0
850	else:
851	ofs = 1
852
853	for i in range(nb):
854	s = i * grpsize + ofs
855	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
856	batches.append({'start':s, 'end':e})
857
858	batch['batches'] = batches
859
860	pages = []
861	if pageZero and start == 1:
862	# correct beginning
863	idx = 0
864	else:
865	idx = start
866
867	for r in range(rows):
868	row = []
869	for c in range(cols):
870	if idx < minIdx or idx > maxIdx:
871	page = {'idx':None}
872	else:
873	page = {'idx':idx}
874
875	idx += 1
876	if pageFlowLtr:
877	row.append(page)
878	else:
879	row.insert(0, page)
880
881	pages.append(row)
882
883	if start > 1:
884	batch['prevStart'] = max(start - grpsize, 1)
885	else:
886	batch['prevStart'] = None
887
888	if start + grpsize <= maxIdx:
889	batch['nextStart'] = start + grpsize
890	else:
891	batch['nextStart'] = None
892
893	batch['pages'] = pages
894	return batch
895
896	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
897	"""returns dict with information for one screenfull of data."""
898	batch = {}
899	if end == 0:
900	end = start + size
901
902	nb = int(math.ceil(end / float(size)))
903	# list of all batch start and end points
904	batches = []
905	for i in range(nb):
906	s = i * size + 1
907	e = min((i + 1) * size, end)
908	batches.append({'start':s, 'end':e})
909
910	batch['batches'] = batches
911	# list of elements in this batch
912	this = []
913	j = 0
914	for i in range(start, min(start+size, end+1)):
915	if data:
916	if fullData:
917	d = data.get(i, None)
918	else:
919	d = data.get(j, None)
920	j += 1
921
922	else:
923	d = i+1
924
925	this.append(d)
926
927	batch['this'] = this
928	if start > 1:
929	batch['prevStart'] = max(start - size, 1)
930	else:
931	batch['prevStart'] = None
932
933	if start + size < end:
934	batch['nextStart'] = start + size
935	else:
936	batch['nextStart'] = None
937
938	logging.debug("getBatch start=%s size=%s end=%s batch=%s"%(start,size,end,repr(batch)))
939	return batch
940
941
942	security.declareProtected('View management screens','changeDocumentViewerForm')
943	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
944
945	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
946	"""init document viewer"""
947	self.title=title
948	self.digilibBaseUrl = digilibBaseUrl
949	self.thumbrows = thumbrows
950	self.thumbcols = thumbcols
951	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
952	try:
953	# assume MetaDataFolder instance is called metadata
954	self.metadataService = getattr(self, 'metadata')
955	except Exception, e:
956	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
957
958	self.setAvailableLayers(availableLayers)
959
960	if RESPONSE is not None:
961	RESPONSE.redirect('manage_main')
962
963	def manage_AddDocumentViewerForm(self):
964	"""add the viewer form"""
965	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
966	return pt()
967
968	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
969	"""add the viewer"""
970	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
971	self._setObject(id,newObj)
972
973	if RESPONSE is not None:
974	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: