Context Navigation

source: documentViewer/documentViewer.py @ 526:3f375a048402

Last change on this file since 526:3f375a048402 was 526:3f375a048402, checked in by casties, 12 years ago
moved search and dict into separate layers. removed search_template. added tocMode=concordance. fixed bug with paging tocs.
File size: 37.0 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from App.ImageFile import ImageFile
5	from AccessControl import ClassSecurityInfo
6	from AccessControl import getSecurityManager
7	from Globals import package_home
8
9	import xml.etree.ElementTree as ET
10
11	import os
12	import sys
13	import urllib
14	import logging
15	import math
16	import urlparse
17	import re
18	import string
19	import json
20
21	from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
22
23	def serializeNode(node, encoding="utf-8"):
24	"""returns a string containing node as XML"""
25	s = ET.tostring(node)
26
27	# 4Suite:
28	# stream = cStringIO.StringIO()
29	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
30	# s = stream.getvalue()
31	# stream.close()
32	return s
33
34	def browserCheck(self):
35	"""check the browsers request to find out the browser type"""
36	bt = {}
37	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
38	bt['ua'] = ua
39	bt['isIE'] = False
40	bt['isN4'] = False
41	bt['versFirefox']=""
42	bt['versIE']=""
43	bt['versSafariChrome']=""
44	bt['versOpera']=""
45
46	if string.find(ua, 'MSIE') > -1:
47	bt['isIE'] = True
48	else:
49	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
50	# Safari oder Chrome identification
51	try:
52	nav = ua[string.find(ua, '('):]
53	nav1=ua[string.find(ua,')'):]
54	nav2=nav1[string.find(nav1,'('):]
55	nav3=nav2[string.find(nav2,')'):]
56	ie = string.split(nav, "; ")[1]
57	ie1 =string.split(nav1, " ")[2]
58	ie2 =string.split(nav3, " ")[1]
59	ie3 =string.split(nav3, " ")[2]
60	if string.find(ie3, "Safari") >-1:
61	bt['versSafariChrome']=string.split(ie2, "/")[1]
62	except: pass
63	# IE identification
64	try:
65	nav = ua[string.find(ua, '('):]
66	ie = string.split(nav, "; ")[1]
67	if string.find(ie, "MSIE") > -1:
68	bt['versIE'] = string.split(ie, " ")[1]
69	except:pass
70	# Firefox identification
71	try:
72	nav = ua[string.find(ua, '('):]
73	nav1=ua[string.find(ua,')'):]
74	if string.find(ie1, "Firefox") >-1:
75	nav5= string.split(ie1, "/")[1]
76	logging.debug("FIREFOX: %s"%(nav5))
77	bt['versFirefox']=nav5[0:3]
78	except:pass
79	#Opera identification
80	try:
81	if string.find(ua,"Opera") >-1:
82	nav = ua[string.find(ua, '('):]
83	nav1=nav[string.find(nav,')'):]
84	bt['versOpera']=string.split(nav1,"/")[2]
85	except:pass
86
87	bt['isMac'] = string.find(ua, 'Macintosh') > -1
88	bt['isWin'] = string.find(ua, 'Windows') > -1
89	bt['isIEWin'] = bt['isIE'] and bt['isWin']
90	bt['isIEMac'] = bt['isIE'] and bt['isMac']
91	bt['staticHTML'] = False
92
93	return bt
94
95	def getParentPath(path, cnt=1):
96	"""returns pathname shortened by cnt"""
97	# make sure path doesn't end with /
98	path = path.rstrip('/')
99	# split by /, shorten, and reassemble
100	return '/'.join(path.split('/')[0:-cnt])
101
102	##
103	## documentViewer class
104	##
105	class documentViewer(Folder):
106	"""document viewer"""
107	meta_type="Document viewer"
108
109	security=ClassSecurityInfo()
110	manage_options=Folder.manage_options+(
111	{'label':'Configuration','action':'changeDocumentViewerForm'},
112	)
113
114	metadataService = None
115	"""MetaDataFolder instance"""
116
117
118	#
119	# templates and forms
120	#
121	# viewMode templates
122	viewer_text = PageTemplateFile('zpt/viewer_text', globals())
123	viewer_xml = PageTemplateFile('zpt/viewer_xml', globals())
124	viewer_images = PageTemplateFile('zpt/viewer_images', globals())
125	viewer_index = PageTemplateFile('zpt/viewer_index', globals())
126	# available layer types
127	builtinLayers = {'text': ['dict','search','gis','annotator'],
128	'xml': None, 'images': None, 'index': None}
129	availableLayers = builtinLayers;
130	# layer templates
131	layer_text_dict = PageTemplateFile('zpt/layer_text_dict', globals())
132	layer_text_search = PageTemplateFile('zpt/layer_text_search', globals())
133	layer_text_annotator = PageTemplateFile('zpt/layer_text_annotator', globals())
134	layer_text_gis = PageTemplateFile('zpt/layer_text_gis', globals())
135	# toc templates
136	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
137	toc_text = PageTemplateFile('zpt/toc_text', globals())
138	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
139	toc_concordance = PageTemplateFile('zpt/toc_concordance', globals())
140	toc_none = PageTemplateFile('zpt/toc_none', globals())
141	# other templates
142	common_template = PageTemplateFile('zpt/common_template', globals())
143	info_xml = PageTemplateFile('zpt/info_xml', globals())
144	docuviewer_css = ImageFile('css/docuviewer.css',globals())
145	# make ImageFile better for development
146	docuviewer_css.index_html = refreshingImageFileIndexHtml
147	jquery_js = ImageFile('js/jquery.js',globals())
148
149
150	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
151	"""init document viewer"""
152	self.id=id
153	self.title=title
154	self.thumbcols = thumbcols
155	self.thumbrows = thumbrows
156	# authgroups is list of authorized groups (delimited by ,)
157	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
158	# create template folder so we can always use template.something
159
160	templateFolder = Folder('template')
161	self['template'] = templateFolder # Zope-2.12 style
162	#self._setObject('template',templateFolder) # old style
163	try:
164	import MpdlXmlTextServer
165	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
166	templateFolder['fulltextclient'] = textServer
167	#templateFolder._setObject('fulltextclient',textServer)
168	except Exception, e:
169	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
170
171	try:
172	from Products.zogiLib.zogiLib import zogiLib
173	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
174	templateFolder['zogilib'] = zogilib
175	#templateFolder._setObject('zogilib',zogilib)
176	except Exception, e:
177	logging.error("Unable to create zogiLib for zogilib: "+str(e))
178
179	try:
180	# assume MetaDataFolder instance is called metadata
181	self.metadataService = getattr(self, 'metadata')
182	except Exception, e:
183	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
184
185	if digilibBaseUrl is not None:
186	self.digilibBaseUrl = digilibBaseUrl
187
188
189	# proxy text server methods to fulltextclient
190	def getTextPage(self, **args):
191	"""returns full text content of page"""
192	return self.template.fulltextclient.getTextPage(**args)
193
194	def getSearchResults(self, **args):
195	"""loads list of search results and stores XML in docinfo"""
196	return self.template.fulltextclient.getSearchResults(**args)
197
198	def getResultsPage(self, **args):
199	"""returns one page of the search results"""
200	return self.template.fulltextclient.getResultsPage(**args)
201
202	def getTextInfo(self, **args):
203	"""returns document info from the text server"""
204	return self.template.fulltextclient.getTextInfo(**args)
205
206	def getToc(self, **args):
207	"""loads table of contents and stores XML in docinfo"""
208	return self.template.fulltextclient.getToc(**args)
209
210	def getTocPage(self, **args):
211	"""returns one page of the table of contents"""
212	return self.template.fulltextclient.getTocPage(**args)
213
214	def getPlacesOnPage(self, **args):
215	"""get list of gis places on one page"""
216	return self.template.fulltextclient.getPlacesOnPage(**args)
217
218	#WTF?
219	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
220	security.declareProtected('View','thumbs_rss')
221	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
222	'''
223	view it
224	@param mode: defines how to access the document behind url
225	@param url: url which contains display information
226	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
227
228	'''
229	logging.debug("HHHHHHHHHHHHHH:load the rss")
230	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
231
232	if not hasattr(self, 'template'):
233	# create template folder if it doesn't exist
234	self.manage_addFolder('template')
235
236	if not self.digilibBaseUrl:
237	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
238
239	docinfo = self.getDocinfo(mode=mode,url=url)
240	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
241	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
242	''' ZDES '''
243	pt = getattr(self.template, 'thumbs_main_rss')
244
245	if viewMode=="auto": # automodus gewaehlt
246	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
247	viewMode="text"
248	else:
249	viewMode="images"
250
251	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
252
253
254	security.declareProtected('View','index_html')
255	def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode="thumbs",start=1,pn=1):
256	"""
257	view page
258	@param url: url which contains display information
259	@param mode: defines how to access the document behind url
260	@param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto'
261	@param viewLayer: sub-type of viewMode, e.g. 'dict' for viewMode='text'
262	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
263	"""
264
265	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn))
266
267	if not hasattr(self, 'template'):
268	# this won't work
269	logging.error("template folder missing!")
270	return "ERROR: template folder missing!"
271
272	if not getattr(self, 'digilibBaseUrl', None):
273	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
274
275	docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode)
276
277	# auto viewMode: text if there is a text else images
278	if viewMode=="auto":
279	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
280	viewMode = "text"
281	if viewLayer is None:
282	viewLayer = "dict"
283	else:
284	viewMode = "images"
285
286	elif viewMode == "text_dict":
287	# legacy fix
288	viewMode = "text"
289	viewLayer = "dict"
290
291	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode)
292
293	# get template /template/viewer_$viewMode
294	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
295	if pt is None:
296	logging.error("No template for viewMode=%s!"%viewMode)
297	# TODO: error page?
298	return "No template for viewMode=%s!"%viewMode
299
300	# and execute with parameters
301	return pt(docinfo=docinfo, pageinfo=pageinfo)
302
303	#WTF?
304	def generateMarks(self,mk):
305	ret=""
306	if mk is None:
307	return ""
308	if not isinstance(mk, list):
309	mk=[mk]
310	for m in mk:
311	ret+="mk=%s"%m
312	return ret
313
314
315	def getAvailableLayers(self):
316	"""returns dict with list of available layers per viewMode"""
317	return self.availableLayers
318
319	def getBrowser(self):
320	"""getBrowser the version of browser """
321	bt = browserCheck(self)
322	logging.debug("BROWSER VERSION: %s"%(bt))
323	return bt
324
325	def findDigilibUrl(self):
326	"""try to get the digilib URL from zogilib"""
327	url = self.template.zogilib.getDLBaseUrl()
328	return url
329
330	def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None):
331	"""returns URL to digilib Scaler with params"""
332	url = None
333	if docinfo is not None:
334	url = docinfo.get('imageURL', None)
335
336	if url is None:
337	url = "%s/servlet/Scaler?"%self.digilibBaseUrl
338	if fn is None and docinfo is not None:
339	fn = docinfo.get('imagePath','')
340
341	url += "fn=%s"%fn
342
343	if pn:
344	url += "&pn=%s"%pn
345
346	url += "&dw=%s&dh=%s"%(dw,dh)
347	return url
348
349	def getDocumentViewerURL(self):
350	"""returns the URL of this instance"""
351	return self.absolute_url()
352
353	def getStyle(self, idx, selected, style=""):
354	"""returns a string with the given style and append 'sel' if idx == selected."""
355	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
356	if idx == selected:
357	return style + 'sel'
358	else:
359	return style
360
361	def getParams(self, param=None, val=None, params=None, duplicates=None):
362	"""returns dict with URL parameters.
363
364	Takes URL parameters and additionally param=val or dict params.
365	Deletes key if value is None."""
366	# copy existing request params
367	newParams=self.REQUEST.form.copy()
368	# change single param
369	if param is not None:
370	if val is None:
371	if newParams.has_key(param):
372	del newParams[param]
373	else:
374	newParams[param] = str(val)
375
376	# change more params
377	if params is not None:
378	for (k, v) in params.items():
379	if v is None:
380	# val=None removes param
381	if newParams.has_key(k):
382	del newParams[k]
383
384	else:
385	newParams[k] = v
386
387	if duplicates:
388	# eliminate lists (coming from duplicate keys)
389	for (k,v) in newParams.items():
390	if isinstance(v, list):
391	if duplicates == 'comma':
392	# make comma-separated list of non-empty entries
393	newParams[k] = ','.join([t for t in v if t])
394	elif duplicates == 'first':
395	# take first non-empty entry
396	newParams[k] = [t for t in v if t][0]
397
398	return newParams
399
400	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
401	"""returns URL to documentviewer with parameter param set to val or from dict params"""
402	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
403	# quote values and assemble into query string (not escaping '/')
404	ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
405	if baseUrl is None:
406	baseUrl = self.getDocumentViewerURL()
407
408	url = "%s?%s"%(baseUrl, ps)
409	return url
410
411	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
412	"""link to documentviewer with parameter param set to val"""
413	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
414
415
416	def setAvailableLayers(self, newLayerString=None):
417	"""sets availableLayers to newLayerString or tries to autodetect available layers.
418	assumes layer templates have the form layer_{m}_{l} for layer l in mode m.
419	newLayerString is parsed as JSON."""
420	if newLayerString is not None:
421	try:
422	layers = json.loads(newLayerString)
423	if 'text' in layers and 'images' in layers:
424	self.availableLayers = layers
425	return
426	except:
427	pass
428
429	logging.error("invalid layers=%s! autodetecting..."%repr(newLayerString))
430
431	# start with builtin layers
432	self.availableLayers = self.builtinLayers.copy()
433	# add layers from templates
434	for t in self.template:
435	if t.startswith('layer_'):
436	try:
437	(x, m, l) = t.split('_', 3)
438	if m not in self.availableLayers:
439	# mode m doesn't exist -> new list
440	self.availableLayers[m] = [l]
441
442	else:
443	# m exists -> append
444	if l not in self.availableLayers[m]:
445	self.availableLayers[m].append()
446
447	except:
448	pass
449
450	def getAvailableLayersJson(self):
451	"""returns available layers as JSON string."""
452	return json.dumps(self.availableLayers)
453
454
455	def getInfo_xml(self,url,mode):
456	"""returns info about the document as XML"""
457	if not self.digilibBaseUrl:
458	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
459
460	docinfo = self.getDocinfo(mode=mode,url=url)
461	pt = getattr(self.template, 'info_xml')
462	return pt(docinfo=docinfo)
463
464	def isAccessible(self, docinfo):
465	"""returns if access to the resource is granted"""
466	access = docinfo.get('accessType', None)
467	logging.debug("documentViewer (accessOK) access type %s"%access)
468	if access == 'free':
469	logging.debug("documentViewer (accessOK) access is free")
470	return True
471
472	elif access is None or access in self.authgroups:
473	# only local access -- only logged in users
474	user = getSecurityManager().getUser()
475	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
476	if user is not None:
477	#print "user: ", user
478	return (user.getUserName() != "Anonymous User")
479	else:
480	return False
481
482	logging.error("documentViewer (accessOK) unknown access type %s"%access)
483	return False
484
485
486
487	def getDocinfo(self, mode, url, tocMode=None):
488	"""returns docinfo depending on mode"""
489	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
490	# look for cached docinfo in session
491	if self.REQUEST.SESSION.has_key('docinfo'):
492	docinfo = self.REQUEST.SESSION['docinfo']
493	# check if its still current
494	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
495	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
496	return docinfo
497
498	# new docinfo
499	docinfo = {'mode': mode, 'url': url}
500	# add self url
501	docinfo['viewerUrl'] = self.getDocumentViewerURL()
502	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
503	# get index.meta DOM
504	docUrl = None
505	metaDom = None
506	if mode=="texttool":
507	# url points to document dir or index.meta
508	metaDom = self.metadataService.getDomFromPathOrUrl(url)
509	docUrl = url.replace('/index.meta', '')
510	if metaDom is None:
511	raise IOError("Unable to find index.meta for mode=texttool!")
512
513	elif mode=="imagepath":
514	# url points to folder with images, index.meta optional
515	# asssume index.meta in parent dir
516	docUrl = getParentPath(url)
517	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
518
519	elif mode=="filepath":
520	# url points to image file, index.meta optional
521	# asssume index.meta is two path segments up
522	docUrl = getParentPath(url, 2)
523	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
524
525	else:
526	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
527	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
528
529	docinfo['documentUrl'] = docUrl
530	# process index.meta contents
531	if metaDom is not None and metaDom.tag == 'resource':
532	# document directory name and path
533	resource = self.metadataService.getResourceData(dom=metaDom)
534	if resource:
535	docinfo = self.getDocinfoFromResource(docinfo, resource)
536
537	# texttool info
538	texttool = self.metadataService.getTexttoolData(dom=metaDom)
539	if texttool:
540	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
541	# document info (including toc) from full text
542	if docinfo.get('textURLPath', None):
543	docinfo = self.getTextInfo(mode=tocMode, docinfo=docinfo)
544
545	# bib info
546	bib = self.metadataService.getBibData(dom=metaDom)
547	if bib:
548	docinfo = self.getDocinfoFromBib(docinfo, bib)
549	else:
550	# no bib - try info.xml
551	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
552
553	# auth info
554	access = self.metadataService.getAccessData(dom=metaDom)
555	if access:
556	docinfo = self.getDocinfoFromAccess(docinfo, access)
557
558	# attribution info
559	attribution = self.metadataService.getAttributionData(dom=metaDom)
560	if attribution:
561	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
562	docinfo['attribution'] = attribution
563	#docinfo = self.getDocinfoFromAccess(docinfo, access)
564
565	# copyright info
566	copyright = self.metadataService.getCopyrightData(dom=metaDom)
567	if copyright:
568	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
569	docinfo['copyright'] = copyright
570	#docinfo = self.getDocinfoFromAccess(docinfo, access)
571
572	# image path
573	if mode != 'texttool':
574	# override image path from texttool with url TODO: how about mode=auto?
575	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
576
577	# number of images from digilib
578	if docinfo.get('imagePath', None):
579	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
580	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
581
582	# check numPages
583	if docinfo.get('numPages', 0) == 0:
584	if docinfo.get('numTextPages', 0) > 0:
585	# replace with numTextPages (text-only?)
586	docinfo['numPages'] = docinfo['numTextPages']
587
588	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
589	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
590	# store in session
591	self.REQUEST.SESSION['docinfo'] = docinfo
592	return docinfo
593
594
595	def getDocinfoFromResource(self, docinfo, resource):
596	"""reads contents of resource element into docinfo"""
597	docName = resource.get('name', None)
598	docinfo['documentName'] = docName
599	docPath = resource.get('archive-path', None)
600	if docPath:
601	# clean up document path
602	if docPath[0] != '/':
603	docPath = '/' + docPath
604
605	if docName and (not docPath.endswith(docName)):
606	docPath += "/" + docName
607
608	else:
609	# use docUrl as docPath
610	docUrl = docinfo['documentURL']
611	if not docUrl.startswith('http:'):
612	docPath = docUrl
613	if docPath:
614	# fix URLs starting with /mpiwg/online
615	docPath = docPath.replace('/mpiwg/online', '', 1)
616
617	docinfo['documentPath'] = docPath
618	return docinfo
619
620	def getDocinfoFromTexttool(self, docinfo, texttool):
621	"""reads contents of texttool element into docinfo"""
622	# image dir
623	imageDir = texttool.get('image', None)
624	docPath = docinfo.get('documentPath', None)
625	if imageDir and docPath:
626	#print "image: ", imageDir, " archivepath: ", archivePath
627	imageDir = os.path.join(docPath, imageDir)
628	imageDir = imageDir.replace('/mpiwg/online', '', 1)
629	docinfo['imagePath'] = imageDir
630
631	# old style text URL
632	textUrl = texttool.get('text', None)
633	if textUrl and docPath:
634	if urlparse.urlparse(textUrl)[0] == "": #keine url
635	textUrl = os.path.join(docPath, textUrl)
636
637	docinfo['textURL'] = textUrl
638
639	# new style text-url-path
640	textUrl = texttool.get('text-url-path', None)
641	if textUrl:
642	docinfo['textURLPath'] = textUrl
643
644	# page flow
645	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
646
647	# odd pages are left
648	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
649
650	# number of title page (0: not defined)
651	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
652
653	# old presentation stuff
654	presentation = texttool.get('presentation', None)
655	if presentation and docPath:
656	if presentation.startswith('http:'):
657	docinfo['presentationUrl'] = presentation
658	else:
659	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
660
661
662	return docinfo
663
664	def getDocinfoFromBib(self, docinfo, bib):
665	"""reads contents of bib element into docinfo"""
666	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
667	# put all raw bib fields in dict "bib"
668	docinfo['bib'] = bib
669	bibtype = bib.get('@type', None)
670	docinfo['bibType'] = bibtype
671	# also store DC metadata for convenience
672	dc = self.metadataService.getDCMappedData(bib)
673	docinfo['creator'] = dc.get('creator',None)
674	docinfo['title'] = dc.get('title',None)
675	docinfo['date'] = dc.get('date',None)
676	return docinfo
677
678	def getDocinfoFromAccess(self, docinfo, acc):
679	"""reads contents of access element into docinfo"""
680	#TODO: also read resource type
681	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
682	try:
683	acctype = acc['@attr']['type']
684	if acctype:
685	access=acctype
686	if access in ['group', 'institution']:
687	access = acc['name'].lower()
688
689	docinfo['accessType'] = access
690
691	except:
692	pass
693
694	return docinfo
695
696	def getDocinfoFromDigilib(self, docinfo, path):
697	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
698	# fetch data
699	txt = getHttpData(infoUrl)
700	if not txt:
701	logging.error("Unable to get dir-info from %s"%(infoUrl))
702	return docinfo
703
704	dom = ET.fromstring(txt)
705	size = getText(dom.find("size"))
706	logging.debug("getDocinfoFromDigilib: size=%s"%size)
707	if size:
708	docinfo['numPages'] = int(size)
709	else:
710	docinfo['numPages'] = 0
711
712	# TODO: produce and keep list of image names and numbers
713	return docinfo
714
715
716	def getDocinfoFromPresentationInfoXml(self,docinfo):
717	"""gets DC-like bibliographical information from the presentation entry in texttools"""
718	url = docinfo.get('presentationUrl', None)
719	if not url:
720	logging.error("getDocinfoFromPresentation: no URL!")
721	return docinfo
722
723	dom = None
724	metaUrl = None
725	if url.startswith("http://"):
726	# real URL
727	metaUrl = url
728	else:
729	# online path
730
731	server=self.digilibBaseUrl+"/servlet/Texter?fn="
732	metaUrl=server+url
733
734	txt=getHttpData(metaUrl)
735	if txt is None:
736	logging.error("Unable to read info.xml from %s"%(url))
737	return docinfo
738
739	dom = ET.fromstring(txt)
740	docinfo['creator']=getText(dom.find(".//author"))
741	docinfo['title']=getText(dom.find(".//title"))
742	docinfo['date']=getText(dom.find(".//date"))
743	return docinfo
744
745
746	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewLayer=None, tocMode=None):
747	"""returns pageinfo with the given parameters"""
748	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode))
749	pageinfo = {}
750	pageinfo['viewMode'] = viewMode
751	# split viewLayer if necessary
752	if isinstance(viewLayer,basestring):
753	viewLayer = viewLayer.split(',')
754
755	if isinstance(viewLayer, list):
756	logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer)
757	# save (unique) list in viewLayers
758	seen = set()
759	viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)]
760	pageinfo['viewLayers'] = viewLayers
761	# stringify viewLayer
762	viewLayer = ','.join(viewLayers)
763	else:
764	#create list
765	pageinfo['viewLayers'] = [viewLayer]
766
767	pageinfo['viewLayer'] = viewLayer
768	pageinfo['tocMode'] = tocMode
769
770	# TODO: unify current and pn!
771	current = getInt(current)
772	pageinfo['current'] = current
773	pageinfo['pn'] = current
774	rows = int(rows or self.thumbrows)
775	pageinfo['rows'] = rows
776	cols = int(cols or self.thumbcols)
777	pageinfo['cols'] = cols
778	grpsize = cols * rows
779	pageinfo['groupsize'] = grpsize
780	# is start is empty use one around current
781	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
782	# int(current / grpsize) * grpsize +1))
783	pageinfo['start'] = start
784	# get number of pages
785	np = int(docinfo.get('numPages', 0))
786	if np == 0:
787	# try numTextPages
788	np = docinfo.get('numTextPages', 0)
789	if np != 0:
790	docinfo['numPages'] = np
791
792	# cache table of contents
793	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
794	pageinfo['numgroups'] = int(np / grpsize)
795	if np % grpsize > 0:
796	pageinfo['numgroups'] += 1
797
798	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
799	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
800	# add zeroth page for two columns
801	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
802	pageinfo['pageZero'] = pageZero
803	pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
804	# more page parameters
805	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
806	if docinfo.get('pageNumbers'):
807	# get original page numbers
808	pageNumber = docinfo['pageNumbers'].get(current, None)
809	if pageNumber is not None:
810	pageinfo['pageNumberOrig'] = pageNumber['no']
811	pageinfo['pageNumberOrigNorm'] = pageNumber['non']
812
813	# cache search results
814	pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
815	query = self.REQUEST.get('query',None)
816	pageinfo['query'] = query
817	if query:
818	queryType = self.REQUEST.get('queryType', 'fulltextMorph')
819	pageinfo['queryType'] = queryType
820	pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
821	self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
822
823	# highlighting
824	highlightQuery = self.REQUEST.get('highlightQuery', None)
825	if highlightQuery:
826	pageinfo['highlightQuery'] = highlightQuery
827	pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
828	pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
829
830	return pageinfo
831
832
833	def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
834	"""returns dict with array of page informations for one screenfull of thumbnails"""
835	batch = {}
836	grpsize = rows * cols
837	if maxIdx == 0:
838	maxIdx = start + grpsize
839
840	nb = int(math.ceil(maxIdx / float(grpsize)))
841	# list of all batch start and end points
842	batches = []
843	if pageZero:
844	ofs = 0
845	else:
846	ofs = 1
847
848	for i in range(nb):
849	s = i * grpsize + ofs
850	e = min((i + 1) * grpsize + ofs - 1, maxIdx)
851	batches.append({'start':s, 'end':e})
852
853	batch['batches'] = batches
854
855	pages = []
856	if pageZero and start == 1:
857	# correct beginning
858	idx = 0
859	else:
860	idx = start
861
862	for r in range(rows):
863	row = []
864	for c in range(cols):
865	if idx < minIdx or idx > maxIdx:
866	page = {'idx':None}
867	else:
868	page = {'idx':idx}
869
870	idx += 1
871	if pageFlowLtr:
872	row.append(page)
873	else:
874	row.insert(0, page)
875
876	pages.append(row)
877
878	if start > 1:
879	batch['prevStart'] = max(start - grpsize, 1)
880	else:
881	batch['prevStart'] = None
882
883	if start + grpsize <= maxIdx:
884	batch['nextStart'] = start + grpsize
885	else:
886	batch['nextStart'] = None
887
888	batch['pages'] = pages
889	return batch
890
891	def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
892	"""returns dict with information for one screenfull of data."""
893	batch = {}
894	if end == 0:
895	end = start + size
896
897	nb = int(math.ceil(end / float(size)))
898	# list of all batch start and end points
899	batches = []
900	for i in range(nb):
901	s = i * size + 1
902	e = min((i + 1) * size, end)
903	batches.append({'start':s, 'end':e})
904
905	batch['batches'] = batches
906	# list of elements in this batch
907	this = []
908	j = 0
909	for i in range(start, min(start+size, end+1)):
910	if data:
911	if fullData:
912	d = data.get(i, None)
913	else:
914	d = data.get(j, None)
915	j += 1
916
917	else:
918	d = i+1
919
920	this.append(d)
921
922	batch['this'] = this
923	if start > 1:
924	batch['prevStart'] = max(start - size, 1)
925	else:
926	batch['prevStart'] = None
927
928	if start + size < end:
929	batch['nextStart'] = start + size
930	else:
931	batch['nextStart'] = None
932
933	logging.debug("getBatch start=%s size=%s end=%s batch=%s"%(start,size,end,repr(batch)))
934	return batch
935
936
937	security.declareProtected('View management screens','changeDocumentViewerForm')
938	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
939
940	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',availableLayers=None,RESPONSE=None):
941	"""init document viewer"""
942	self.title=title
943	self.digilibBaseUrl = digilibBaseUrl
944	self.thumbrows = thumbrows
945	self.thumbcols = thumbcols
946	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
947	try:
948	# assume MetaDataFolder instance is called metadata
949	self.metadataService = getattr(self, 'metadata')
950	except Exception, e:
951	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
952
953	self.setAvailableLayers(availableLayers)
954
955	if RESPONSE is not None:
956	RESPONSE.redirect('manage_main')
957
958	def manage_AddDocumentViewerForm(self):
959	"""add the viewer form"""
960	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
961	return pt()
962
963	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
964	"""add the viewer"""
965	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
966	self._setObject(id,newObj)
967
968	if RESPONSE is not None:
969	RESPONSE.redirect('manage_main')

Note: See TracBrowser for help on using the repository browser.

Download in other formats: