Context Navigation

source: documentViewer/documentViewer.py @ 478:cd37d6f8d5e8

elementtree

Last change on this file since 478:cd37d6f8d5e8 was 478:cd37d6f8d5e8, checked in by casties, 13 years ago
more cleanup
File size: 32.6 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from AccessControl import ClassSecurityInfo
5	from AccessControl import getSecurityManager
6	from Globals import package_home
7
8	#from Ft.Xml import EMPTY_NAMESPACE, Parse
9	#import Ft.Xml.Domlette
10
11	import xml.etree.ElementTree as ET
12
13	import os.path
14	import sys
15	import urllib
16	import logging
17	import math
18	import urlparse
19	import re
20	import string
21
22	from SrvTxtUtils import getInt, getText, getHttpData
23
24	def logger(txt,method,txt2):
25	"""logging"""
26	logging.info(txt+ txt2)
27
28
29	def serializeNode(node, encoding="utf-8"):
30	"""returns a string containing node as XML"""
31	s = ET.tostring(node)
32
33	# 4Suite:
34	# stream = cStringIO.StringIO()
35	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
36	# s = stream.getvalue()
37	# stream.close()
38	return s
39
40	def browserCheck(self):
41	"""check the browsers request to find out the browser type"""
42	bt = {}
43	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
44	bt['ua'] = ua
45	bt['isIE'] = False
46	bt['isN4'] = False
47	bt['versFirefox']=""
48	bt['versIE']=""
49	bt['versSafariChrome']=""
50	bt['versOpera']=""
51
52	if string.find(ua, 'MSIE') > -1:
53	bt['isIE'] = True
54	else:
55	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
56	# Safari oder Chrome identification
57	try:
58	nav = ua[string.find(ua, '('):]
59	nav1=ua[string.find(ua,')'):]
60	nav2=nav1[string.find(nav1,'('):]
61	nav3=nav2[string.find(nav2,')'):]
62	ie = string.split(nav, "; ")[1]
63	ie1 =string.split(nav1, " ")[2]
64	ie2 =string.split(nav3, " ")[1]
65	ie3 =string.split(nav3, " ")[2]
66	if string.find(ie3, "Safari") >-1:
67	bt['versSafariChrome']=string.split(ie2, "/")[1]
68	except: pass
69	# IE identification
70	try:
71	nav = ua[string.find(ua, '('):]
72	ie = string.split(nav, "; ")[1]
73	if string.find(ie, "MSIE") > -1:
74	bt['versIE'] = string.split(ie, " ")[1]
75	except:pass
76	# Firefox identification
77	try:
78	nav = ua[string.find(ua, '('):]
79	nav1=ua[string.find(ua,')'):]
80	if string.find(ie1, "Firefox") >-1:
81	nav5= string.split(ie1, "/")[1]
82	logging.debug("FIREFOX: %s"%(nav5))
83	bt['versFirefox']=nav5[0:3]
84	except:pass
85	#Opera identification
86	try:
87	if string.find(ua,"Opera") >-1:
88	nav = ua[string.find(ua, '('):]
89	nav1=nav[string.find(nav,')'):]
90	bt['versOpera']=string.split(nav1,"/")[2]
91	except:pass
92
93	bt['isMac'] = string.find(ua, 'Macintosh') > -1
94	bt['isWin'] = string.find(ua, 'Windows') > -1
95	bt['isIEWin'] = bt['isIE'] and bt['isWin']
96	bt['isIEMac'] = bt['isIE'] and bt['isMac']
97	bt['staticHTML'] = False
98
99	return bt
100
101	def getParentPath(path, cnt=1):
102	"""returns pathname shortened by cnt"""
103	# make sure path doesn't end with /
104	path = path.rstrip('/')
105	# split by /, shorten, and reassemble
106	return '/'.join(path.split('/')[0:-cnt])
107
108
109	##
110	## documentViewer class
111	##
112	class documentViewer(Folder):
113	"""document viewer"""
114	meta_type="Document viewer"
115
116	security=ClassSecurityInfo()
117	manage_options=Folder.manage_options+(
118	{'label':'main config','action':'changeDocumentViewerForm'},
119	)
120
121	metadataService = None
122	"""MetaDataFolder instance"""
123
124	# templates and forms
125	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
126	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
127	toc_text = PageTemplateFile('zpt/toc_text', globals())
128	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
129	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
130	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
131	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
132	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
133	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
134	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
135	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
136	head_main = PageTemplateFile('zpt/head_main', globals())
137	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
138	info_xml = PageTemplateFile('zpt/info_xml', globals())
139
140
141	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
142
143
144	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
145	"""init document viewer"""
146	self.id=id
147	self.title=title
148	self.thumbcols = thumbcols
149	self.thumbrows = thumbrows
150	# authgroups is list of authorized groups (delimited by ,)
151	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
152	# create template folder so we can always use template.something
153
154	templateFolder = Folder('template')
155	#self['template'] = templateFolder # Zope-2.12 style
156	self._setObject('template',templateFolder) # old style
157	try:
158	import MpdlXmlTextServer
159	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
160	#templateFolder['fulltextclient'] = xmlRpcClient
161	templateFolder._setObject('fulltextclient',textServer)
162	except Exception, e:
163	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
164
165	try:
166	from Products.zogiLib.zogiLib import zogiLib
167	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
168	#templateFolder['zogilib'] = zogilib
169	templateFolder._setObject('zogilib',zogilib)
170	except Exception, e:
171	logging.error("Unable to create zogiLib for zogilib: "+str(e))
172
173	try:
174	# assume MetaDataFolder instance is called metadata
175	self.metadataService = getattr(self, 'metadata')
176	except Exception, e:
177	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
178
179	if digilibBaseUrl is not None:
180	self.digilibBaseUrl = digilibBaseUrl
181
182
183	# proxy text server methods to fulltextclient
184	def getTextPage(self, **args):
185	"""get page"""
186	return self.template.fulltextclient.getTextPage(**args)
187
188	def getOrigPages(self, **args):
189	"""get page"""
190	return self.template.fulltextclient.getOrigPages(**args)
191
192	def getOrigPagesNorm(self, **args):
193	"""get page"""
194	return self.template.fulltextclient.getOrigPagesNorm(**args)
195
196	def getQuery(self, **args):
197	"""get query in search"""
198	return self.template.fulltextclient.getQuery(**args)
199
200	def getSearch(self, **args):
201	"""get search"""
202	return self.template.fulltextclient.getSearch(**args)
203
204	def getGisPlaces(self, **args):
205	"""get gis places"""
206	return self.template.fulltextclient.getGisPlaces(**args)
207
208	def getAllGisPlaces(self, **args):
209	"""get all gis places """
210	return self.template.fulltextclient.getAllGisPlaces(**args)
211
212	def getWordInfo(self, **args):
213	"""get translate"""
214	return self.template.fulltextclient.getWordInfo(**args)
215
216	def getLemma(self, **args):
217	"""get lemma"""
218	return self.template.fulltextclient.getLemma(**args)
219
220	def getLemmaQuery(self, **args):
221	"""get query"""
222	return self.template.fulltextclient.getLemmaQuery(**args)
223
224	def getLex(self, **args):
225	"""get lex"""
226	return self.template.fulltextclient.getLex(**args)
227
228	def getToc(self, **args):
229	"""get toc"""
230	return self.template.fulltextclient.getToc(**args)
231
232	def getTocPage(self, **args):
233	"""get tocpage"""
234	return self.template.fulltextclient.getTocPage(**args)
235
236
237	security.declareProtected('View','thumbs_rss')
238	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
239	'''
240	view it
241	@param mode: defines how to access the document behind url
242	@param url: url which contains display information
243	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
244
245	'''
246	logging.debug("HHHHHHHHHHHHHH:load the rss")
247	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
248
249	if not hasattr(self, 'template'):
250	# create template folder if it doesn't exist
251	self.manage_addFolder('template')
252
253	if not self.digilibBaseUrl:
254	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
255
256	docinfo = self.getDocinfo(mode=mode,url=url)
257	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
258	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
259	''' ZDES '''
260	pt = getattr(self.template, 'thumbs_main_rss')
261
262	if viewMode=="auto": # automodus gewaehlt
263	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
264	viewMode="text"
265	else:
266	viewMode="images"
267
268	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
269
270
271	security.declareProtected('View','index_html')
272	def index_html(self,url,mode="texttool",viewMode="auto",viewType=None,tocMode="thumbs",start=1,pn=1):
273	"""
274	view page
275	@param url: url which contains display information
276	@param mode: defines how to access the document behind url
277	@param viewMode: 'images': display images, 'text': display text, default is 'auto'
278	@param viewType: sub-type of viewMode, e.g. 'dict' for viewMode='text'
279	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
280	"""
281
282	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewType=%s start=%s pn=%s"%(mode,url,viewMode,viewType,start,pn))
283
284	if not hasattr(self, 'template'):
285	# this won't work
286	logging.error("template folder missing!")
287	return "ERROR: template folder missing!"
288
289	if not getattr(self, 'digilibBaseUrl', None):
290	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
291
292	docinfo = self.getDocinfo(mode=mode,url=url)
293
294	if tocMode != "thumbs":
295	# get table of contents
296	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
297
298	# auto viewMode: text if there is a text else images
299	if viewMode=="auto":
300	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
301	viewMode = "text"
302	viewType = "dict"
303	else:
304	viewMode = "images"
305
306	elif viewMode == "text_dict":
307	# legacy fix
308	viewMode = "text"
309	viewType = "dict"
310
311	# stringify viewType
312	if isinstance(viewType, list):
313	logging.debug("index_html: viewType is list:%s"%viewType)
314	viewType = ','.join([t for t in viewType if t])
315
316	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewType=viewType, tocMode=tocMode)
317
318	# get template /template/viewer_$viewMode
319	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
320	if pt is None:
321	logging.error("No template for viewMode=%s!"%viewMode)
322	# TODO: error page?
323	return "No template for viewMode=%s!"%viewMode
324
325	# and execute with parameters
326	return pt(docinfo=docinfo, pageinfo=pageinfo)
327
328	def generateMarks(self,mk):
329	ret=""
330	if mk is None:
331	return ""
332	if not isinstance(mk, list):
333	mk=[mk]
334	for m in mk:
335	ret+="mk=%s"%m
336	return ret
337
338
339	def getBrowser(self):
340	"""getBrowser the version of browser """
341	bt = browserCheck(self)
342	logging.debug("BROWSER VERSION: %s"%(bt))
343	return bt
344
345	def findDigilibUrl(self):
346	"""try to get the digilib URL from zogilib"""
347	url = self.template.zogilib.getDLBaseUrl()
348	return url
349
350	def getDocumentViewerURL(self):
351	"""returns the URL of this instance"""
352	return self.absolute_url()
353
354	def getStyle(self, idx, selected, style=""):
355	"""returns a string with the given style and append 'sel' if path == selected."""
356	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
357	if idx == selected:
358	return style + 'sel'
359	else:
360	return style
361
362	def getParams(self, param=None, val=None, params=None, duplicates=None):
363	"""returns dict with URL parameters.
364
365	Takes URL parameters and additionally param=val or dict params.
366	Deletes key if value is None."""
367	# copy existing request params
368	newParams=self.REQUEST.form.copy()
369	# change single param
370	if param is not None:
371	if val is None:
372	if newParams.has_key(param):
373	del newParams[param]
374	else:
375	newParams[param] = str(val)
376
377	# change more params
378	if params is not None:
379	for (k, v) in params.items():
380	if v is None:
381	# val=None removes param
382	if newParams.has_key(k):
383	del newParams[k]
384
385	else:
386	newParams[k] = v
387
388	if duplicates:
389	# eliminate lists (coming from duplicate keys)
390	for (k,v) in newParams.items():
391	if isinstance(v, list):
392	if duplicates == 'comma':
393	# make comma-separated list of non-empty entries
394	newParams[k] = ','.join([t for t in v if t])
395	elif duplicates == 'first':
396	# take first non-empty entry
397	newParams[k] = [t for t in v if t][0]
398
399	return newParams
400
401	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
402	"""returns URL to documentviewer with parameter param set to val or from dict params"""
403	urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
404	# quote values and assemble into query string (not escaping '/')
405	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()])
406	if baseUrl is None:
407	baseUrl = self.getDocumentViewerURL()
408
409	url = "%s?%s"%(baseUrl, ps)
410	return url
411
412	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
413	"""link to documentviewer with parameter param set to val"""
414	return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
415
416
417	def getInfo_xml(self,url,mode):
418	"""returns info about the document as XML"""
419	if not self.digilibBaseUrl:
420	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
421
422	docinfo = self.getDocinfo(mode=mode,url=url)
423	pt = getattr(self.template, 'info_xml')
424	return pt(docinfo=docinfo)
425
426	def isAccessible(self, docinfo):
427	"""returns if access to the resource is granted"""
428	access = docinfo.get('accessType', None)
429	logging.debug("documentViewer (accessOK) access type %s"%access)
430	if access == 'free':
431	logging.debug("documentViewer (accessOK) access is free")
432	return True
433
434	elif access is None or access in self.authgroups:
435	# only local access -- only logged in users
436	user = getSecurityManager().getUser()
437	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
438	if user is not None:
439	#print "user: ", user
440	return (user.getUserName() != "Anonymous User")
441	else:
442	return False
443
444	logging.error("documentViewer (accessOK) unknown access type %s"%access)
445	return False
446
447
448
449	def getDocinfo(self, mode, url):
450	"""returns docinfo depending on mode"""
451	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
452	# look for cached docinfo in session
453	if self.REQUEST.SESSION.has_key('docinfo'):
454	docinfo = self.REQUEST.SESSION['docinfo']
455	# check if its still current
456	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
457	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
458	return docinfo
459
460	# new docinfo
461	docinfo = {'mode': mode, 'url': url}
462	# add self url
463	docinfo['viewerUrl'] = self.getDocumentViewerURL()
464	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
465	# get index.meta DOM
466	docUrl = None
467	metaDom = None
468	if mode=="texttool":
469	# url points to document dir or index.meta
470	metaDom = self.metadataService.getDomFromPathOrUrl(url)
471	docUrl = url.replace('/index.meta', '')
472	if metaDom is None:
473	raise IOError("Unable to find index.meta for mode=texttool!")
474
475	elif mode=="imagepath":
476	# url points to folder with images, index.meta optional
477	# asssume index.meta in parent dir
478	docUrl = getParentPath(url)
479	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
480
481	elif mode=="filepath":
482	# url points to image file, index.meta optional
483	# asssume index.meta is two path segments up
484	docUrl = getParentPath(url, 2)
485	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
486
487	else:
488	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
489	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
490
491	docinfo['documentUrl'] = docUrl
492	# process index.meta contents
493	if metaDom is not None and metaDom.tag == 'resource':
494	# document directory name and path
495	resource = self.metadataService.getResourceData(dom=metaDom)
496	if resource:
497	docinfo = self.getDocinfoFromResource(docinfo, resource)
498
499	# texttool info
500	texttool = self.metadataService.getTexttoolData(dom=metaDom)
501	if texttool:
502	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
503
504	# bib info
505	bib = self.metadataService.getBibData(dom=metaDom)
506	if bib:
507	docinfo = self.getDocinfoFromBib(docinfo, bib)
508	else:
509	# no bib - try info.xml
510	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
511
512	# auth info
513	access = self.metadataService.getAccessData(dom=metaDom)
514	if access:
515	docinfo = self.getDocinfoFromAccess(docinfo, access)
516
517	# attribution info
518	attribution = self.metadataService.getAttributionData(dom=metaDom)
519	if attribution:
520	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
521	docinfo['attribution'] = attribution
522	#docinfo = self.getDocinfoFromAccess(docinfo, access)
523
524	# copyright info
525	copyright = self.metadataService.getCopyrightData(dom=metaDom)
526	if copyright:
527	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
528	docinfo['copyright'] = copyright
529	#docinfo = self.getDocinfoFromAccess(docinfo, access)
530
531	# image path
532	if mode != 'texttool':
533	# override image path from texttool with url
534	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
535
536
537
538	# number of images from digilib
539	if docinfo.get('imagePath', None):
540	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
541	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
542
543	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
544	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
545	# store in session
546	self.REQUEST.SESSION['docinfo'] = docinfo
547	return docinfo
548
549	def getDocinfoFromResource(self, docinfo, resource):
550	"""reads contents of resource element into docinfo"""
551	docName = resource.get('name', None)
552	docinfo['documentName'] = docName
553	docPath = resource.get('archive-path', None)
554	if docPath:
555	# clean up document path
556	if docPath[0] != '/':
557	docPath = '/' + docPath
558
559	if docName and (not docPath.endswith(docName)):
560	docPath += "/" + docName
561
562	else:
563	# use docUrl as docPath
564	docUrl = docinfo['documentURL']
565	if not docUrl.startswith('http:'):
566	docPath = docUrl
567	if docPath:
568	# fix URLs starting with /mpiwg/online
569	docPath = docPath.replace('/mpiwg/online', '', 1)
570
571	docinfo['documentPath'] = docPath
572	return docinfo
573
574	def getDocinfoFromTexttool(self, docinfo, texttool):
575	"""reads contents of texttool element into docinfo"""
576	# image dir
577	imageDir = texttool.get('image', None)
578	docPath = docinfo.get('documentPath', None)
579	if imageDir and docPath:
580	#print "image: ", imageDir, " archivepath: ", archivePath
581	imageDir = os.path.join(docPath, imageDir)
582	imageDir = imageDir.replace('/mpiwg/online', '', 1)
583	docinfo['imagePath'] = imageDir
584
585	# old style text URL
586	textUrl = texttool.get('text', None)
587	if textUrl and docPath:
588	if urlparse.urlparse(textUrl)[0] == "": #keine url
589	textUrl = os.path.join(docPath, textUrl)
590
591	docinfo['textURL'] = textUrl
592
593	# new style text-url-path
594	textUrl = texttool.get('text-url-path', None)
595	if textUrl:
596	docinfo['textURLPath'] = textUrl
597
598	# page flow
599	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
600
601	# odd pages are left
602	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
603
604	# number of title page (0: not defined)
605	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
606
607	# old presentation stuff
608	presentation = texttool.get('presentation', None)
609	if presentation and docPath:
610	if presentation.startswith('http:'):
611	docinfo['presentationUrl'] = presentation
612	else:
613	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
614
615
616	return docinfo
617
618	def getDocinfoFromBib(self, docinfo, bib):
619	"""reads contents of bib element into docinfo"""
620	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
621	# put all raw bib fields in dict "bib"
622	docinfo['bib'] = bib
623	bibtype = bib.get('@type', None)
624	docinfo['bibType'] = bibtype
625	# also store DC metadata for convenience
626	dc = self.metadataService.getDCMappedData(bib)
627	docinfo['creator'] = dc.get('creator',None)
628	docinfo['title'] = dc.get('title',None)
629	docinfo['date'] = dc.get('date',None)
630	return docinfo
631
632	def getDocinfoFromAccess(self, docinfo, acc):
633	"""reads contents of access element into docinfo"""
634	#TODO: also read resource type
635	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
636	try:
637	acctype = acc['@attr']['type']
638	if acctype:
639	access=acctype
640	if access in ['group', 'institution']:
641	access = acc['name'].lower()
642
643	docinfo['accessType'] = access
644
645	except:
646	pass
647
648	return docinfo
649
650	def getDocinfoFromDigilib(self, docinfo, path):
651	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
652	# fetch data
653	txt = getHttpData(infoUrl)
654	if not txt:
655	logging.error("Unable to get dir-info from %s"%(infoUrl))
656	return docinfo
657
658	dom = ET.fromstring(txt)
659	size = getText(dom.find("size"))
660	logging.debug("getDocinfoFromDigilib: size=%s"%size)
661	if size:
662	docinfo['numPages'] = int(size)
663	else:
664	docinfo['numPages'] = 0
665
666	# TODO: produce and keep list of image names and numbers
667	return docinfo
668
669
670	def getDocinfoFromPresentationInfoXml(self,docinfo):
671	"""gets DC-like bibliographical information from the presentation entry in texttools"""
672	url = docinfo.get('presentationUrl', None)
673	if not url:
674	logging.error("getDocinfoFromPresentation: no URL!")
675	return docinfo
676
677	dom = None
678	metaUrl = None
679	if url.startswith("http://"):
680	# real URL
681	metaUrl = url
682	else:
683	# online path
684
685	server=self.digilibBaseUrl+"/servlet/Texter?fn="
686	metaUrl=server+url
687
688	txt=getHttpData(metaUrl)
689	if txt is None:
690	logging.error("Unable to read info.xml from %s"%(url))
691	return docinfo
692
693	dom = ET.fromstring(txt)
694	docinfo['creator']=getText(dom.find(".//author"))
695	docinfo['title']=getText(dom.find(".//title"))
696	docinfo['date']=getText(dom.find(".//date"))
697	return docinfo
698
699
700	def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewType=None, tocMode=None):
701	"""returns pageinfo with the given parameters"""
702	logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewType=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewType,tocMode))
703	pageinfo = {}
704	pageinfo['viewMode'] = viewMode
705	pageinfo['viewType'] = viewType
706	pageinfo['tocMode'] = tocMode
707
708	current = getInt(current)
709	pageinfo['current'] = current
710	rows = int(rows or self.thumbrows)
711	pageinfo['rows'] = rows
712	cols = int(cols or self.thumbcols)
713	pageinfo['cols'] = cols
714	grpsize = cols * rows
715	pageinfo['groupsize'] = grpsize
716	# is start is empty use one around current
717	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
718	# int(current / grpsize) * grpsize +1))
719	pageinfo['start'] = start
720	pn = self.REQUEST.get('pn','1')
721	pageinfo['pn'] = pn
722	np = int(docinfo.get('numPages', 0))
723	if np == 0:
724	# numPages unknown - maybe we can get it from text page
725	if docinfo.get('textURLPath', None):
726	# cache text page as well
727	pageinfo['textPage'] = self.getTextPage(mode=viewType, pn=pn, docinfo=docinfo, pageinfo=pageinfo)
728	np = int(docinfo.get('numPages', 0))
729
730	pageinfo['numgroups'] = int(np / grpsize)
731	if np % grpsize > 0:
732	pageinfo['numgroups'] += 1
733
734	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
735	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
736	# add zeroth page for two columns
737	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
738	pageinfo['pageZero'] = pageZero
739	pageinfo['pageList'] = self.getPageList(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
740
741	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
742	pageinfo['query'] = self.REQUEST.get('query','')
743	pageinfo['queryType'] = self.REQUEST.get('queryType','')
744	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
745	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
746	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
747	pageinfo['queryPageSize'] = getInt(self.REQUEST.get('queryPageSize', 10))
748	pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1'))
749	pageinfo['searchPN'] = getInt(self.REQUEST.get('searchPN','1'))
750
751	# limit tocPN
752	if 'tocSize_%s'%tocMode in docinfo:
753	tocSize = docinfo['tocSize_%s'%tocMode]
754	tocPageSize = pageinfo['tocPageSize']
755	# cached toc
756	if tocSize%tocPageSize>0:
757	tocPages=tocSize/tocPageSize+1
758	else:
759	tocPages=tocSize/tocPageSize
760
761	pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN'])
762
763	return pageinfo
764
765
766	def getPageList(self, start=None, rows=None, cols=None, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
767	"""returns array of page informations for one screenfull of thumbnails"""
768	if maxIdx == 0:
769	maxIdx = start + rows * cols
770
771	pages = []
772	if pageZero and start == 1:
773	# correct beginning
774	idx = 0
775	else:
776	idx = start
777
778	for r in range(rows):
779	row = []
780	for c in range(cols):
781	if idx < minIdx or idx > maxIdx:
782	page = {'idx':None}
783	else:
784	page = {'idx':idx}
785
786	idx += 1
787	if pageFlowLtr:
788	row.append(page)
789	else:
790	row.insert(0, page)
791
792	pages.append(row)
793
794	logging.debug("getPageList returns=%s"%(pages))
795	return pages
796
797
798	security.declareProtected('View management screens','changeDocumentViewerForm')
799	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
800
801	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
802	"""init document viewer"""
803	self.title=title
804	self.digilibBaseUrl = digilibBaseUrl
805	self.thumbrows = thumbrows
806	self.thumbcols = thumbcols
807	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
808	try:
809	# assume MetaDataFolder instance is called metadata
810	self.metadataService = getattr(self, 'metadata')
811	except Exception, e:
812	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
813
814	if RESPONSE is not None:
815	RESPONSE.redirect('manage_main')
816
817	def manage_AddDocumentViewerForm(self):
818	"""add the viewer form"""
819	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
820	return pt()
821
822	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
823	"""add the viewer"""
824	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
825	self._setObject(id,newObj)
826
827	if RESPONSE is not None:
828	RESPONSE.redirect('manage_main')
829
830	## DocumentViewerTemplate class
831	class DocumentViewerTemplate(ZopePageTemplate):
832	"""Template for document viewer"""
833	meta_type="DocumentViewer Template"
834
835
836	def manage_addDocumentViewerTemplateForm(self):
837	"""Form for adding"""
838	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
839	return pt()
840
841	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
842	REQUEST=None, submit=None):
843	"Add a Page Template with optional file content."
844
845	self._setObject(id, DocumentViewerTemplate(id))
846	ob = getattr(self, id)
847	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
848	logging.info("txt %s:"%txt)
849	ob.pt_edit(txt,"text/html")
850	if title:
851	ob.pt_setTitle(title)
852	try:
853	u = self.DestinationURL()
854	except AttributeError:
855	u = REQUEST['URL1']
856
857	u = "%s/%s" % (u, urllib.quote(id))
858	REQUEST.RESPONSE.redirect(u+'/manage_main')
859	return ''
860
861
862

Note: See TracBrowser for help on using the repository browser.

Download in other formats: