Context Navigation

source: documentViewer/documentViewer.py @ 477:17f0290b2327

elementtree

Last change on this file since 477:17f0290b2327 was 477:17f0290b2327, checked in by casties, 13 years ago
small fixes.
File size: 31.7 KB

Line
1	from OFS.Folder import Folder
2	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4	from AccessControl import ClassSecurityInfo
5	from AccessControl import getSecurityManager
6	from Globals import package_home
7
8	#from Ft.Xml import EMPTY_NAMESPACE, Parse
9	#import Ft.Xml.Domlette
10
11	import xml.etree.ElementTree as ET
12
13	import os.path
14	import sys
15	import urllib
16	import logging
17	import math
18	import urlparse
19	import re
20	import string
21
22	from SrvTxtUtils import getInt, getText, getHttpData
23
24	def logger(txt,method,txt2):
25	"""logging"""
26	logging.info(txt+ txt2)
27
28
29	def serializeNode(node, encoding="utf-8"):
30	"""returns a string containing node as XML"""
31	s = ET.tostring(node)
32
33	# 4Suite:
34	# stream = cStringIO.StringIO()
35	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
36	# s = stream.getvalue()
37	# stream.close()
38	return s
39
40	def browserCheck(self):
41	"""check the browsers request to find out the browser type"""
42	bt = {}
43	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
44	bt['ua'] = ua
45	bt['isIE'] = False
46	bt['isN4'] = False
47	bt['versFirefox']=""
48	bt['versIE']=""
49	bt['versSafariChrome']=""
50	bt['versOpera']=""
51
52	if string.find(ua, 'MSIE') > -1:
53	bt['isIE'] = True
54	else:
55	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
56	# Safari oder Chrome identification
57	try:
58	nav = ua[string.find(ua, '('):]
59	nav1=ua[string.find(ua,')'):]
60	nav2=nav1[string.find(nav1,'('):]
61	nav3=nav2[string.find(nav2,')'):]
62	ie = string.split(nav, "; ")[1]
63	ie1 =string.split(nav1, " ")[2]
64	ie2 =string.split(nav3, " ")[1]
65	ie3 =string.split(nav3, " ")[2]
66	if string.find(ie3, "Safari") >-1:
67	bt['versSafariChrome']=string.split(ie2, "/")[1]
68	except: pass
69	# IE identification
70	try:
71	nav = ua[string.find(ua, '('):]
72	ie = string.split(nav, "; ")[1]
73	if string.find(ie, "MSIE") > -1:
74	bt['versIE'] = string.split(ie, " ")[1]
75	except:pass
76	# Firefox identification
77	try:
78	nav = ua[string.find(ua, '('):]
79	nav1=ua[string.find(ua,')'):]
80	if string.find(ie1, "Firefox") >-1:
81	nav5= string.split(ie1, "/")[1]
82	logging.debug("FIREFOX: %s"%(nav5))
83	bt['versFirefox']=nav5[0:3]
84	except:pass
85	#Opera identification
86	try:
87	if string.find(ua,"Opera") >-1:
88	nav = ua[string.find(ua, '('):]
89	nav1=nav[string.find(nav,')'):]
90	bt['versOpera']=string.split(nav1,"/")[2]
91	except:pass
92
93	bt['isMac'] = string.find(ua, 'Macintosh') > -1
94	bt['isWin'] = string.find(ua, 'Windows') > -1
95	bt['isIEWin'] = bt['isIE'] and bt['isWin']
96	bt['isIEMac'] = bt['isIE'] and bt['isMac']
97	bt['staticHTML'] = False
98
99	return bt
100
101	def getParentPath(path, cnt=1):
102	"""returns pathname shortened by cnt"""
103	# make sure path doesn't end with /
104	path = path.rstrip('/')
105	# split by /, shorten, and reassemble
106	return '/'.join(path.split('/')[0:-cnt])
107
108
109	##
110	## documentViewer class
111	##
112	class documentViewer(Folder):
113	"""document viewer"""
114	meta_type="Document viewer"
115
116	security=ClassSecurityInfo()
117	manage_options=Folder.manage_options+(
118	{'label':'main config','action':'changeDocumentViewerForm'},
119	)
120
121	metadataService = None
122	"""MetaDataFolder instance"""
123
124	# templates and forms
125	viewer_main = PageTemplateFile('zpt/viewer_main', globals())
126	toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
127	toc_text = PageTemplateFile('zpt/toc_text', globals())
128	toc_figures = PageTemplateFile('zpt/toc_figures', globals())
129	page_main_images = PageTemplateFile('zpt/page_main_images', globals())
130	page_main_double = PageTemplateFile('zpt/page_main_double', globals())
131	page_main_text = PageTemplateFile('zpt/page_main_text', globals())
132	page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
133	page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
134	page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
135	page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
136	head_main = PageTemplateFile('zpt/head_main', globals())
137	docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
138	info_xml = PageTemplateFile('zpt/info_xml', globals())
139
140
141	thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
142
143
144	def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
145	"""init document viewer"""
146	self.id=id
147	self.title=title
148	self.thumbcols = thumbcols
149	self.thumbrows = thumbrows
150	# authgroups is list of authorized groups (delimited by ,)
151	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
152	# create template folder so we can always use template.something
153
154	templateFolder = Folder('template')
155	#self['template'] = templateFolder # Zope-2.12 style
156	self._setObject('template',templateFolder) # old style
157	try:
158	import MpdlXmlTextServer
159	textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
160	#templateFolder['fulltextclient'] = xmlRpcClient
161	templateFolder._setObject('fulltextclient',textServer)
162	except Exception, e:
163	logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
164
165	try:
166	from Products.zogiLib.zogiLib import zogiLib
167	zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
168	#templateFolder['zogilib'] = zogilib
169	templateFolder._setObject('zogilib',zogilib)
170	except Exception, e:
171	logging.error("Unable to create zogiLib for zogilib: "+str(e))
172
173	try:
174	# assume MetaDataFolder instance is called metadata
175	self.metadataService = getattr(self, 'metadata')
176	except Exception, e:
177	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
178
179	if digilibBaseUrl is not None:
180	self.digilibBaseUrl = digilibBaseUrl
181
182
183	# proxy text server methods to fulltextclient
184	def getTextPage(self, **args):
185	"""get page"""
186	return self.template.fulltextclient.getTextPage(**args)
187
188	def getOrigPages(self, **args):
189	"""get page"""
190	return self.template.fulltextclient.getOrigPages(**args)
191
192	def getOrigPagesNorm(self, **args):
193	"""get page"""
194	return self.template.fulltextclient.getOrigPagesNorm(**args)
195
196	def getQuery(self, **args):
197	"""get query in search"""
198	return self.template.fulltextclient.getQuery(**args)
199
200	def getSearch(self, **args):
201	"""get search"""
202	return self.template.fulltextclient.getSearch(**args)
203
204	def getGisPlaces(self, **args):
205	"""get gis places"""
206	return self.template.fulltextclient.getGisPlaces(**args)
207
208	def getAllGisPlaces(self, **args):
209	"""get all gis places """
210	return self.template.fulltextclient.getAllGisPlaces(**args)
211
212	def getTranslate(self, **args):
213	"""get translate"""
214	return self.template.fulltextclient.getTranslate(**args)
215
216	def getLemma(self, **args):
217	"""get lemma"""
218	return self.template.fulltextclient.getLemma(**args)
219
220	def getLemmaQuery(self, **args):
221	"""get query"""
222	return self.template.fulltextclient.getLemmaQuery(**args)
223
224	def getLex(self, **args):
225	"""get lex"""
226	return self.template.fulltextclient.getLex(**args)
227
228	def getToc(self, **args):
229	"""get toc"""
230	return self.template.fulltextclient.getToc(**args)
231
232	def getTocPage(self, **args):
233	"""get tocpage"""
234	return self.template.fulltextclient.getTocPage(**args)
235
236
237	security.declareProtected('View','thumbs_rss')
238	def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
239	'''
240	view it
241	@param mode: defines how to access the document behind url
242	@param url: url which contains display information
243	@param viewMode: if images display images, if text display text, default is images (text,images or auto)
244
245	'''
246	logging.debug("HHHHHHHHHHHHHH:load the rss")
247	logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
248
249	if not hasattr(self, 'template'):
250	# create template folder if it doesn't exist
251	self.manage_addFolder('template')
252
253	if not self.digilibBaseUrl:
254	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
255
256	docinfo = self.getDocinfo(mode=mode,url=url)
257	#pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
258	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
259	''' ZDES '''
260	pt = getattr(self.template, 'thumbs_main_rss')
261
262	if viewMode=="auto": # automodus gewaehlt
263	if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
264	viewMode="text"
265	else:
266	viewMode="images"
267
268	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
269
270
271	security.declareProtected('View','index_html')
272	def index_html(self,url,mode="texttool",viewMode="auto",viewType=None,tocMode="thumbs",start=1,pn=1):
273	"""
274	view page
275	@param url: url which contains display information
276	@param mode: defines how to access the document behind url
277	@param viewMode: 'images': display images, 'text': display text, default is 'auto'
278	@param viewType: sub-type of viewMode, e.g. 'dict' for viewMode='text'
279	@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
280	"""
281
282	logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewType=%s start=%s pn=%s"%(mode,url,viewMode,viewType,start,pn))
283
284	if not hasattr(self, 'template'):
285	# this won't work
286	logging.error("template folder missing!")
287	return "ERROR: template folder missing!"
288
289	if not getattr(self, 'digilibBaseUrl', None):
290	self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
291
292	docinfo = self.getDocinfo(mode=mode,url=url)
293
294	if tocMode != "thumbs":
295	# get table of contents
296	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
297
298	# auto viewMode: text if there is a text else images
299	if viewMode=="auto":
300	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
301	viewMode = "text"
302	viewType = "dict"
303	else:
304	viewMode = "images"
305
306	elif viewMode == "text_dict":
307	# legacy fix
308	viewMode = "text"
309	viewType = "dict"
310
311	# stringify viewType
312	if isinstance(viewType, list):
313	viewType = ','.join([t for t in viewType if t])
314
315	pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewType=viewType, tocMode=tocMode)
316
317	# get template /template/viewer_$viewMode
318	pt = getattr(self.template, 'viewer_%s'%viewMode, None)
319	if pt is None:
320	logging.error("No template for viewMode=%s!"%viewMode)
321	# TODO: error page?
322	return "No template for viewMode=%s!"%viewMode
323
324	# and execute with parameters
325	return pt(docinfo=docinfo, pageinfo=pageinfo)
326
327	def generateMarks(self,mk):
328	ret=""
329	if mk is None:
330	return ""
331	if not isinstance(mk, list):
332	mk=[mk]
333	for m in mk:
334	ret+="mk=%s"%m
335	return ret
336
337
338	def getBrowser(self):
339	"""getBrowser the version of browser """
340	bt = browserCheck(self)
341	logging.debug("BROWSER VERSION: %s"%(bt))
342	return bt
343
344	def findDigilibUrl(self):
345	"""try to get the digilib URL from zogilib"""
346	url = self.template.zogilib.getDLBaseUrl()
347	return url
348
349	def getDocumentViewerURL(self):
350	"""returns the URL of this instance"""
351	return self.absolute_url()
352
353	def getStyle(self, idx, selected, style=""):
354	"""returns a string with the given style and append 'sel' if path == selected."""
355	#logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
356	if idx == selected:
357	return style + 'sel'
358	else:
359	return style
360
361	def getParams(self, param=None, val=None, params=None):
362	"""returns dict with URL parameters.
363
364	Takes URL parameters and additionally param=val or dict params.
365	Deletes key if value is None."""
366	# copy existing request params
367	newParams=self.REQUEST.form.copy()
368	# change single param
369	if param is not None:
370	if val is None:
371	if newParams.has_key(param):
372	del newParams[param]
373	else:
374	newParams[param] = str(val)
375
376	# change more params
377	if params is not None:
378	for k in params.keys():
379	v = params[k]
380	if v is None:
381	# val=None removes param
382	if newParams.has_key(k):
383	del newParams[k]
384
385	else:
386	newParams[k] = v
387
388	return newParams
389
390	def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
391	"""returns URL to documentviewer with parameter param set to val or from dict params"""
392	urlParams = self.getParams(param=param, val=val, params=params)
393	# quote values and assemble into query string (not escaping '/')
394	ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()])
395	if baseUrl is None:
396	baseUrl = self.getDocumentViewerURL()
397
398	url = "%s?%s"%(baseUrl, ps)
399	return url
400
401	def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
402	"""link to documentviewer with parameter param set to val"""
403	return self.getLink(param, val, params, baseUrl, '&')
404
405
406	def getInfo_xml(self,url,mode):
407	"""returns info about the document as XML"""
408	if not self.digilibBaseUrl:
409	self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
410
411	docinfo = self.getDocinfo(mode=mode,url=url)
412	pt = getattr(self.template, 'info_xml')
413	return pt(docinfo=docinfo)
414
415	def isAccessible(self, docinfo):
416	"""returns if access to the resource is granted"""
417	access = docinfo.get('accessType', None)
418	logging.debug("documentViewer (accessOK) access type %s"%access)
419	if access == 'free':
420	logging.debug("documentViewer (accessOK) access is free")
421	return True
422
423	elif access is None or access in self.authgroups:
424	# only local access -- only logged in users
425	user = getSecurityManager().getUser()
426	logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
427	if user is not None:
428	#print "user: ", user
429	return (user.getUserName() != "Anonymous User")
430	else:
431	return False
432
433	logging.error("documentViewer (accessOK) unknown access type %s"%access)
434	return False
435
436
437
438	def getDocinfo(self, mode, url):
439	"""returns docinfo depending on mode"""
440	logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
441	# look for cached docinfo in session
442	if self.REQUEST.SESSION.has_key('docinfo'):
443	docinfo = self.REQUEST.SESSION['docinfo']
444	# check if its still current
445	if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
446	logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
447	return docinfo
448
449	# new docinfo
450	docinfo = {'mode': mode, 'url': url}
451	# add self url
452	docinfo['viewerUrl'] = self.getDocumentViewerURL()
453	docinfo['digilibBaseUrl'] = self.digilibBaseUrl
454	# get index.meta DOM
455	docUrl = None
456	metaDom = None
457	if mode=="texttool":
458	# url points to document dir or index.meta
459	metaDom = self.metadataService.getDomFromPathOrUrl(url)
460	docUrl = url.replace('/index.meta', '')
461	if metaDom is None:
462	raise IOError("Unable to find index.meta for mode=texttool!")
463
464	elif mode=="imagepath":
465	# url points to folder with images, index.meta optional
466	# asssume index.meta in parent dir
467	docUrl = getParentPath(url)
468	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
469
470	elif mode=="filepath":
471	# url points to image file, index.meta optional
472	# asssume index.meta is two path segments up
473	docUrl = getParentPath(url, 2)
474	metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
475
476	else:
477	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
478	raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
479
480	docinfo['documentUrl'] = docUrl
481	# process index.meta contents
482	if metaDom is not None and metaDom.tag == 'resource':
483	# document directory name and path
484	resource = self.metadataService.getResourceData(dom=metaDom)
485	if resource:
486	docinfo = self.getDocinfoFromResource(docinfo, resource)
487
488	# texttool info
489	texttool = self.metadataService.getTexttoolData(dom=metaDom)
490	if texttool:
491	docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
492
493	# bib info
494	bib = self.metadataService.getBibData(dom=metaDom)
495	if bib:
496	docinfo = self.getDocinfoFromBib(docinfo, bib)
497	else:
498	# no bib - try info.xml
499	docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
500
501	# auth info
502	access = self.metadataService.getAccessData(dom=metaDom)
503	if access:
504	docinfo = self.getDocinfoFromAccess(docinfo, access)
505
506	# attribution info
507	attribution = self.metadataService.getAttributionData(dom=metaDom)
508	if attribution:
509	logging.debug("getDocinfo: attribution=%s"%repr(attribution))
510	docinfo['attribution'] = attribution
511	#docinfo = self.getDocinfoFromAccess(docinfo, access)
512
513	# copyright info
514	copyright = self.metadataService.getCopyrightData(dom=metaDom)
515	if copyright:
516	logging.debug("getDocinfo: copyright=%s"%repr(copyright))
517	docinfo['copyright'] = copyright
518	#docinfo = self.getDocinfoFromAccess(docinfo, access)
519
520	# image path
521	if mode != 'texttool':
522	# override image path from texttool with url
523	docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
524
525
526
527	# number of images from digilib
528	if docinfo.get('imagePath', None):
529	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
530	docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
531
532	logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
533	#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
534	# store in session
535	self.REQUEST.SESSION['docinfo'] = docinfo
536	return docinfo
537
538	def getDocinfoFromResource(self, docinfo, resource):
539	"""reads contents of resource element into docinfo"""
540	docName = resource.get('name', None)
541	docinfo['documentName'] = docName
542	docPath = resource.get('archive-path', None)
543	if docPath:
544	# clean up document path
545	if docPath[0] != '/':
546	docPath = '/' + docPath
547
548	if docName and (not docPath.endswith(docName)):
549	docPath += "/" + docName
550
551	else:
552	# use docUrl as docPath
553	docUrl = docinfo['documentURL']
554	if not docUrl.startswith('http:'):
555	docPath = docUrl
556	if docPath:
557	# fix URLs starting with /mpiwg/online
558	docPath = docPath.replace('/mpiwg/online', '', 1)
559
560	docinfo['documentPath'] = docPath
561	return docinfo
562
563	def getDocinfoFromTexttool(self, docinfo, texttool):
564	"""reads contents of texttool element into docinfo"""
565	# image dir
566	imageDir = texttool.get('image', None)
567	docPath = docinfo.get('documentPath', None)
568	if imageDir and docPath:
569	#print "image: ", imageDir, " archivepath: ", archivePath
570	imageDir = os.path.join(docPath, imageDir)
571	imageDir = imageDir.replace('/mpiwg/online', '', 1)
572	docinfo['imagePath'] = imageDir
573
574	# old style text URL
575	textUrl = texttool.get('text', None)
576	if textUrl and docPath:
577	if urlparse.urlparse(textUrl)[0] == "": #keine url
578	textUrl = os.path.join(docPath, textUrl)
579
580	docinfo['textURL'] = textUrl
581
582	# new style text-url-path
583	textUrl = texttool.get('text-url-path', None)
584	if textUrl:
585	docinfo['textURLPath'] = textUrl
586
587	# page flow
588	docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
589
590	# odd pages are left
591	docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
592
593	# number of title page (0: not defined)
594	docinfo['titlePage'] = texttool.get('title-scan-no', 0)
595
596	# old presentation stuff
597	presentation = texttool.get('presentation', None)
598	if presentation and docPath:
599	if presentation.startswith('http:'):
600	docinfo['presentationUrl'] = presentation
601	else:
602	docinfo['presentationUrl'] = os.path.join(docPath, presentation)
603
604
605	return docinfo
606
607	def getDocinfoFromBib(self, docinfo, bib):
608	"""reads contents of bib element into docinfo"""
609	logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
610	# put all raw bib fields in dict "bib"
611	docinfo['bib'] = bib
612	bibtype = bib.get('@type', None)
613	docinfo['bibType'] = bibtype
614	# also store DC metadata for convenience
615	dc = self.metadataService.getDCMappedData(bib)
616	docinfo['creator'] = dc.get('creator',None)
617	docinfo['title'] = dc.get('title',None)
618	docinfo['date'] = dc.get('date',None)
619	return docinfo
620
621	def getDocinfoFromAccess(self, docinfo, acc):
622	"""reads contents of access element into docinfo"""
623	#TODO: also read resource type
624	logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
625	try:
626	acctype = acc['@attr']['type']
627	if acctype:
628	access=acctype
629	if access in ['group', 'institution']:
630	access = acc['name'].lower()
631
632	docinfo['accessType'] = access
633
634	except:
635	pass
636
637	return docinfo
638
639	def getDocinfoFromDigilib(self, docinfo, path):
640	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
641	# fetch data
642	txt = getHttpData(infoUrl)
643	if not txt:
644	logging.error("Unable to get dir-info from %s"%(infoUrl))
645	return docinfo
646
647	dom = ET.fromstring(txt)
648	size = getText(dom.find("size"))
649	logging.debug("getDocinfoFromDigilib: size=%s"%size)
650	if size:
651	docinfo['numPages'] = int(size)
652	else:
653	docinfo['numPages'] = 0
654
655	# TODO: produce and keep list of image names and numbers
656	return docinfo
657
658
659	def getDocinfoFromPresentationInfoXml(self,docinfo):
660	"""gets DC-like bibliographical information from the presentation entry in texttools"""
661	url = docinfo.get('presentationUrl', None)
662	if not url:
663	logging.error("getDocinfoFromPresentation: no URL!")
664	return docinfo
665
666	dom = None
667	metaUrl = None
668	if url.startswith("http://"):
669	# real URL
670	metaUrl = url
671	else:
672	# online path
673
674	server=self.digilibBaseUrl+"/servlet/Texter?fn="
675	metaUrl=server+url
676
677	txt=getHttpData(metaUrl)
678	if txt is None:
679	logging.error("Unable to read info.xml from %s"%(url))
680	return docinfo
681
682	dom = ET.fromstring(txt)
683	docinfo['creator']=getText(dom.find(".//author"))
684	docinfo['title']=getText(dom.find(".//title"))
685	docinfo['date']=getText(dom.find(".//date"))
686	return docinfo
687
688
689	def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewType=None, tocMode=None):
690	"""returns pageinfo with the given parameters"""
691	pageinfo = {}
692	pageinfo['viewMode'] = viewMode
693	pageinfo['viewType'] = viewType
694	pageinfo['tocMode'] = tocMode
695
696	current = getInt(current)
697	pageinfo['current'] = current
698	rows = int(rows or self.thumbrows)
699	pageinfo['rows'] = rows
700	cols = int(cols or self.thumbcols)
701	pageinfo['cols'] = cols
702	grpsize = cols * rows
703	pageinfo['groupsize'] = grpsize
704	# is start is empty use one around current
705	start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
706	# int(current / grpsize) * grpsize +1))
707	pageinfo['start'] = start
708	pn = self.REQUEST.get('pn','1')
709	pageinfo['pn'] = pn
710	np = int(docinfo.get('numPages', 0))
711	if np == 0:
712	# numPages unknown - maybe we can get it from text page
713	if docinfo.get('textURLPath', None):
714	# cache text page as well
715	pageinfo['textPage'] = self.getTextPage(mode=viewType, pn=pn, docinfo=docinfo, pageinfo=pageinfo)
716	np = int(docinfo.get('numPages', 0))
717
718	pageinfo['numgroups'] = int(np / grpsize)
719	if np % grpsize > 0:
720	pageinfo['numgroups'] += 1
721
722	pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
723	oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
724	# add zeroth page for two columns
725	pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
726	pageinfo['pageZero'] = pageZero
727	pageinfo['pageList'] = self.getPageList(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
728
729	pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
730	pageinfo['query'] = self.REQUEST.get('query','')
731	pageinfo['queryType'] = self.REQUEST.get('queryType','')
732	pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
733	pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
734	pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
735	pageinfo['queryPageSize'] = getInt(self.REQUEST.get('queryPageSize', 10))
736	pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1'))
737	pageinfo['searchPN'] = getInt(self.REQUEST.get('searchPN','1'))
738
739	# limit tocPN
740	if 'tocSize_%s'%tocMode in docinfo:
741	tocSize = docinfo['tocSize_%s'%tocMode]
742	tocPageSize = pageinfo['tocPageSize']
743	# cached toc
744	if tocSize%tocPageSize>0:
745	tocPages=tocSize/tocPageSize+1
746	else:
747	tocPages=tocSize/tocPageSize
748
749	pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN'])
750
751	return pageinfo
752
753
754	def getPageList(self, start=None, rows=None, cols=None, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
755	"""returns array of page informations for one screenfull of thumbnails"""
756	if maxIdx == 0:
757	maxIdx = start + rows * cols
758
759	pages = []
760	if pageZero and start == 1:
761	# correct beginning
762	idx = 0
763	else:
764	idx = start
765
766	for r in range(rows):
767	row = []
768	for c in range(cols):
769	if idx < minIdx or idx > maxIdx:
770	page = {'idx':None}
771	else:
772	page = {'idx':idx}
773
774	idx += 1
775	if pageFlowLtr:
776	row.append(page)
777	else:
778	row.insert(0, page)
779
780	pages.append(row)
781
782	logging.debug("getPageList returns=%s"%(pages))
783	return pages
784
785
786	security.declareProtected('View management screens','changeDocumentViewerForm')
787	changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
788
789	def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
790	"""init document viewer"""
791	self.title=title
792	self.digilibBaseUrl = digilibBaseUrl
793	self.thumbrows = thumbrows
794	self.thumbcols = thumbcols
795	self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
796	try:
797	# assume MetaDataFolder instance is called metadata
798	self.metadataService = getattr(self, 'metadata')
799	except Exception, e:
800	logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
801
802	if RESPONSE is not None:
803	RESPONSE.redirect('manage_main')
804
805	def manage_AddDocumentViewerForm(self):
806	"""add the viewer form"""
807	pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
808	return pt()
809
810	def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
811	"""add the viewer"""
812	newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
813	self._setObject(id,newObj)
814
815	if RESPONSE is not None:
816	RESPONSE.redirect('manage_main')
817
818	## DocumentViewerTemplate class
819	class DocumentViewerTemplate(ZopePageTemplate):
820	"""Template for document viewer"""
821	meta_type="DocumentViewer Template"
822
823
824	def manage_addDocumentViewerTemplateForm(self):
825	"""Form for adding"""
826	pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
827	return pt()
828
829	def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
830	REQUEST=None, submit=None):
831	"Add a Page Template with optional file content."
832
833	self._setObject(id, DocumentViewerTemplate(id))
834	ob = getattr(self, id)
835	txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
836	logging.info("txt %s:"%txt)
837	ob.pt_edit(txt,"text/html")
838	if title:
839	ob.pt_setTitle(title)
840	try:
841	u = self.DestinationURL()
842	except AttributeError:
843	u = REQUEST['URL1']
844
845	u = "%s/%s" % (u, urllib.quote(id))
846	REQUEST.RESPONSE.redirect(u+'/manage_main')
847	return ''
848
849
850

Note: See TracBrowser for help on using the repository browser.

Download in other formats: