comparison documentViewer.py @ 2:8cc283757c39 modularisierung

New Version
author abukhman
date Mon, 14 Jun 2010 12:50:06 +0200
parents 96f74b2bab24
children e9085ba2bb51
comparison
equal deleted inserted replaced
1:312446f900da 2:8cc283757c39
1
2 genericDigilib="http://nausikaa2.rz-berlin.mpg.de/digitallibrary/"
3 1
4 from OFS.Folder import Folder 2 from OFS.Folder import Folder
5 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate 3 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
6 from Products.PageTemplates.PageTemplateFile import PageTemplateFile 4 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5 from Products.PythonScripts.standard import url_quote
7 from AccessControl import ClassSecurityInfo 6 from AccessControl import ClassSecurityInfo
7 from AccessControl import getSecurityManager
8 from Globals import package_home 8 from Globals import package_home
9 9
10 from Ft.Xml.Domlette import NonvalidatingReader 10 from Ft.Xml.Domlette import NonvalidatingReader
11 from Ft.Xml.Domlette import PrettyPrint, Print 11 from Ft.Xml.Domlette import PrettyPrint, Print
12 from Ft.Xml import EMPTY_NAMESPACE 12 from Ft.Xml import EMPTY_NAMESPACE, Parse
13
14 from xml.dom.minidom import parse, parseString
15
16 from extraFunction import *
17
13 18
14 import Ft.Xml.XPath 19 import Ft.Xml.XPath
15 20 import cStringIO
21 import xmlrpclib
16 import os.path 22 import os.path
23 import sys
17 import cgi 24 import cgi
18 import urllib 25 import urllib
26 import logging
27 import math
28
29 import urlparse
30 from types import *
31
32 def logger(txt,method,txt2):
33 """logging"""
34 logging.info(txt+ txt2)
35
36
37 def getInt(number, default=0):
38 """returns always an int (0 in case of problems)"""
39 try:
40 return int(number)
41 except:
42 return int(default)
19 43
20 def getTextFromNode(nodename): 44 def getTextFromNode(nodename):
45 """get the cdata content of a node"""
46 if nodename is None:
47 return ""
21 nodelist=nodename.childNodes 48 nodelist=nodename.childNodes
22 rc = "" 49 rc = ""
23 for node in nodelist: 50 for node in nodelist:
24 if node.nodeType == node.TEXT_NODE: 51 if node.nodeType == node.TEXT_NODE:
25 rc = rc + node.data 52 rc = rc + node.data
26 return rc 53 return rc
27 54
55 def serializeNode(node, encoding='utf-8'):
56 """returns a string containing node as XML"""
57 buf = cStringIO.StringIO()
58 Print(node, stream=buf, encoding=encoding)
59 s = buf.getvalue()
60 buf.close()
61 return s
62
63
64 def getParentDir(path):
65 """returns pathname shortened by one"""
66 return '/'.join(path.split('/')[0:-1])
67
68
28 import socket 69 import socket
29 70
30 def urlopen(url): 71 def urlopen(url,timeout=2):
31 """urlopen mit timeout""" 72 """urlopen mit timeout"""
32 socket.setdefaulttimeout(2) 73 socket.setdefaulttimeout(timeout)
33 ret=urllib.urlopen(url) 74 ret=urllib.urlopen(url)
34 socket.setdefaulttimeout(5) 75 socket.setdefaulttimeout(5)
35 return ret 76 return ret
36 77
37 def getParamFromDigilib(path,param): 78
38 """gibt param von dlInfo aus""" 79 ##
39 imageUrl=genericDigilib+"/dlInfo-xml.jsp?fn="+path 80 ## documentViewer class
40 81 ##
41 try: 82 class documentViewer(Folder, extraFunction):
42 dom = NonvalidatingReader.parseUri(imageUrl)
43 except:
44 return None
45
46
47 params=dom.xpath("//document-parameters/parameter[@name='%s']/@value"%param)
48
49 if params:
50 return params[0].value
51
52 def parseUrlTextTool(url):
53 """parse index meta"""
54
55 try:
56 dom = NonvalidatingReader.parseUri(url)
57 except:
58 zLOG.LOG("documentViewer (parseUrlTexttool)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2])
59 return (None,None,None)
60
61 archivePaths=dom.xpath("//resource/archive-path")
62
63 if archivePaths and (len(archivePaths)>0):
64 archivePath=getTextFromNode(archivePaths[0])
65 else:
66 archivePath=None
67
68
69 images=dom.xpath("//texttool/image")
70
71 if images and (len(images)>0):
72 image=getTextFromNode(images[0])
73 else:
74 image=None
75
76 if image and archivePath:
77 image=os.path.join(archivePath,image)
78 image=image.replace("/mpiwg/online",'')
79 pt=getParamFromDigilib(image,'pt')
80
81 else:
82 image=None
83
84 viewerUrls=dom.xpath("//texttool/digiliburlprefix")
85
86 if viewerUrls and (len(viewerUrls)>0):
87 viewerUrl=getTextFromNode(viewerUrls[0])
88 else:
89 viewerUrl=None
90
91
92 textUrls=dom.xpath("//texttool/text")
93
94 if textUrls and (len(textUrls)>0):
95 textUrl=getTextFromNode(textUrls[0])
96 else:
97 textUrl=None
98 return viewerUrl,(image,pt),textUrl
99
100
101 class documentViewer(ZopePageTemplate):
102 """document viewer""" 83 """document viewer"""
103 84 #textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?"
85
104 meta_type="Document viewer" 86 meta_type="Document viewer"
105 87
106 security=ClassSecurityInfo() 88 security=ClassSecurityInfo()
107 manage_options=ZopePageTemplate.manage_options+( 89 manage_options=Folder.manage_options+(
108 {'label':'main config','action':'changeDocumentViewerForm'}, 90 {'label':'main config','action':'changeDocumentViewerForm'},
109 ) 91 )
110 92
111 _default_content_fn = os.path.join(package_home(globals()),'zpt','documentViewer_template.zpt') 93 # templates and forms
112 94 viewer_main = PageTemplateFile('zpt/viewer_main', globals())
113 def __init__(self,id,imageViewerUrl,title=""): 95 toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
96 toc_text = PageTemplateFile('zpt/toc_text', globals())
97 toc_figures = PageTemplateFile('zpt/toc_figures', globals())
98 page_main_images = PageTemplateFile('zpt/page_main_images', globals())
99 page_main_text = PageTemplateFile('zpt/page_main_text', globals())
100 page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
101 page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
102 head_main = PageTemplateFile('zpt/head_main', globals())
103 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
104 info_xml = PageTemplateFile('zpt/info_xml', globals())
105
106
107 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
108 security.declareProtected('View management screens','changeDocumentViewerForm')
109 changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
110
111
112 def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
114 """init document viewer""" 113 """init document viewer"""
115 self.id=id 114 self.id=id
116 self.title=title 115 self.title=title
117 self.imageViewerUrl=imageViewerUrl 116 self.thumbcols = thumbcols
118 117 self.thumbrows = thumbrows
119 security.declareProtected('View management screens','changeDocumentViewerForm') 118 # authgroups is list of authorized groups (delimited by ,)
120 def changeDocumentViewerForm(self): 119 self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
121 """change it""" 120 # create template folder so we can always use template.something
122 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeDocumentViewer.zpt')).__of__(self) 121
123 return pt() 122 templateFolder = Folder('template')
123 #self['template'] = templateFolder # Zope-2.12 style
124 self._setObject('template',templateFolder) # old style
125 try:
126 from Products.XMLRpcTools.XMLRpcTools import XMLRpcServerProxy
127 xmlRpcClient = XMLRpcServerProxy(id='fulltextclient', serverUrl=textServerName, use_xmlrpc=False)
128 #templateFolder['fulltextclient'] = xmlRpcClient
129 templateFolder._setObject('fulltextclient',xmlRpcClient)
130 except Exception, e:
131 logging.error("Unable to create XMLRpcTools for fulltextclient: "+str(e))
132 try:
133 from Products.zogiLib.zogiLib import zogiLib
134 zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
135 #templateFolder['zogilib'] = zogilib
136 templateFolder._setObject('zogilib',zogilib)
137 except Exception, e:
138 logging.error("Unable to create zogiLib for zogilib: "+str(e))
139
140
141 security.declareProtected('View','thumbs_rss')
142 def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
143 '''
144 view it
145 @param mode: defines how to access the document behind url
146 @param url: url which contains display information
147 @param viewMode: if images display images, if text display text, default is images (text,images or auto)
148
149 '''
150 logging.debug("HHHHHHHHHHHHHH:load the rss")
151 logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
152
153 if not hasattr(self, 'template'):
154 # create template folder if it doesn't exist
155 self.manage_addFolder('template')
156
157 if not self.digilibBaseUrl:
158 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
159
160 docinfo = self.getDocinfo(mode=mode,url=url)
161 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
162 pt = getattr(self.template, 'thumbs_main_rss')
163
164 if viewMode=="auto": # automodus gewaehlt
165 if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert
166 viewMode="text"
167 else:
168 viewMode="images"
169
170 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
124 171
172 security.declareProtected('View','index_html')
173 def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None):
174 '''
175 view it
176 @param mode: defines how to access the document behind url
177 @param url: url which contains display information
178 @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
179 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
180 @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
181 '''
182
183 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
184
185 if not hasattr(self, 'template'):
186 # this won't work
187 logging.error("template folder missing!")
188 return "ERROR: template folder missing!"
189
190 if not getattr(self, 'digilibBaseUrl', None):
191 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
192
193 docinfo = self.getDocinfo(mode=mode,url=url)
194
195
196 if tocMode != "thumbs":
197 # get table of contents
198 docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
199
200 if viewMode=="auto": # automodus gewaehlt
201 if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert
202 viewMode="text_dict"
203 else:
204 viewMode="images"
205
206 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
207
208 pt = getattr(self.template, 'viewer_main')
209 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
125 210
126 def changeDocumentViewer(self,imageViewerUrl,title="",RESPONSE=None): 211 def generateMarks(self,mk):
212 ret=""
213 if mk is None:
214 return ""
215 if type(mk) is not ListType:
216 mk=[mk]
217 for m in mk:
218 ret+="mk=%s"%m
219 return ret
220
221
222 def findDigilibUrl(self):
223 """try to get the digilib URL from zogilib"""
224 url = self.template.zogilib.getDLBaseUrl()
225 return url
226
227 def getDocumentViewerURL(self):
228 """returns the URL of this instance"""
229 return self.absolute_url()
230
231 def getStyle(self, idx, selected, style=""):
232 """returns a string with the given style and append 'sel' if path == selected."""
233 #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
234 if idx == selected:
235 return style + 'sel'
236 else:
237 return style
238
239 def getLink(self,param=None,val=None):
240 """link to documentviewer with parameter param set to val"""
241 params=self.REQUEST.form.copy()
242 if param is not None:
243 if val is None:
244 if params.has_key(param):
245 del params[param]
246 else:
247 params[param] = str(val)
248
249 if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
250 params["mode"] = "imagepath"
251 params["url"] = getParentDir(params["url"])
252
253 # quote values and assemble into query string
254 ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
255 url=self.REQUEST['URL1']+"?"+ps
256 return url
257
258 def getLinkAmp(self,param=None,val=None):
259 """link to documentviewer with parameter param set to val"""
260 params=self.REQUEST.form.copy()
261 if param is not None:
262 if val is None:
263 if params.has_key(param):
264 del params[param]
265 else:
266 params[param] = str(val)
267
268 # quote values and assemble into query string
269 logging.info("XYXXXXX: %s"%repr(params.items()))
270 ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
271 url=self.REQUEST['URL1']+"?"+ps
272 return url
273
274 def getInfo_xml(self,url,mode):
275 """returns info about the document as XML"""
276
277 if not self.digilibBaseUrl:
278 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
279
280 docinfo = self.getDocinfo(mode=mode,url=url)
281 pt = getattr(self.template, 'info_xml')
282 return pt(docinfo=docinfo)
283
284
285 def isAccessible(self, docinfo):
286 """returns if access to the resource is granted"""
287 access = docinfo.get('accessType', None)
288 logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access)
289 if access is not None and access == 'free':
290 logger("documentViewer (accessOK)", logging.INFO, "access is free")
291 return True
292 elif access is None or access in self.authgroups:
293 # only local access -- only logged in users
294 user = getSecurityManager().getUser()
295 if user is not None:
296 #print "user: ", user
297 return (user.getUserName() != "Anonymous User")
298 else:
299 return False
300
301 logger("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access)
302 return False
303
304
305 def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
306 """gibt param von dlInfo aus"""
307 num_retries = 3
308 if docinfo is None:
309 docinfo = {}
310
311 for x in range(cut):
312
313 path=getParentDir(path)
314
315 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
316
317 logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl))
318
319 for cnt in range(num_retries):
320 try:
321 # dom = NonvalidatingReader.parseUri(imageUrl)
322 txt=urllib.urlopen(infoUrl).read()
323 dom = Parse(txt)
324 break
325 except:
326 logger("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt))
327 else:
328 raise IOError("Unable to get dir-info from %s"%(infoUrl))
329
330 sizes=dom.xpath("//dir/size")
331 logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes)
332
333 if sizes:
334 docinfo['numPages'] = int(getTextFromNode(sizes[0]))
335 else:
336 docinfo['numPages'] = 0
337
338 # TODO: produce and keep list of image names and numbers
339
340 return docinfo
341
342
343 def getIndexMeta(self, url):
344 """returns dom of index.meta document at url"""
345 num_retries = 3
346 dom = None
347 metaUrl = None
348 if url.startswith("http://"):
349 # real URL
350 metaUrl = url
351 else:
352 # online path
353 server=self.digilibBaseUrl+"/servlet/Texter?fn="
354 metaUrl=server+url.replace("/mpiwg/online","")
355 if not metaUrl.endswith("index.meta"):
356 metaUrl += "/index.meta"
357 logging.debug("METAURL: %s"%metaUrl)
358 for cnt in range(num_retries):
359 try:
360 # patch dirk encoding fehler treten dann nicht mehr auf
361 # dom = NonvalidatingReader.parseUri(metaUrl)
362 txt=urllib.urlopen(metaUrl).read()
363 dom = Parse(txt)
364 break
365 except:
366 logger("ERROR documentViewer (getIndexMeta)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
367
368 if dom is None:
369 raise IOError("Unable to read index meta from %s"%(url))
370
371 return dom
372
373 def getPresentationInfoXML(self, url):
374 """returns dom of info.xml document at url"""
375 num_retries = 3
376 dom = None
377 metaUrl = None
378 if url.startswith("http://"):
379 # real URL
380 metaUrl = url
381 else:
382 # online path
383 server=self.digilibBaseUrl+"/servlet/Texter?fn="
384 metaUrl=server+url.replace("/mpiwg/online","")
385
386 for cnt in range(num_retries):
387 try:
388 # patch dirk encoding fehler treten dann nicht mehr auf
389 # dom = NonvalidatingReader.parseUri(metaUrl)
390 txt=urllib.urlopen(metaUrl).read()
391 dom = Parse(txt)
392 break
393 except:
394 logger("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
395
396 if dom is None:
397 raise IOError("Unable to read infoXMLfrom %s"%(url))
398
399 return dom
400
401
402 def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
403 """gets authorization info from the index.meta file at path or given by dom"""
404 logger("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path))
405
406 access = None
407
408 if docinfo is None:
409 docinfo = {}
410
411 if dom is None:
412 for x in range(cut):
413 path=getParentDir(path)
414 dom = self.getIndexMeta(path)
415
416 acctype = dom.xpath("//access-conditions/access/@type")
417 if acctype and (len(acctype)>0):
418 access=acctype[0].value
419 if access in ['group', 'institution']:
420 access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
421
422 docinfo['accessType'] = access
423 return docinfo
424
425
426 def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
427 """gets bibliographical info from the index.meta file at path or given by dom"""
428 logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
429
430 if docinfo is None:
431 docinfo = {}
432
433 if dom is None:
434 for x in range(cut):
435 path=getParentDir(path)
436 dom = self.getIndexMeta(path)
437
438 logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
439 # put in all raw bib fields as dict "bib"
440 bib = dom.xpath("//bib/*")
441 if bib and len(bib)>0:
442 bibinfo = {}
443 for e in bib:
444 bibinfo[e.localName] = getTextFromNode(e)
445 docinfo['bib'] = bibinfo
446
447 # extract some fields (author, title, year) according to their mapping
448 metaData=self.metadata.main.meta.bib
449 bibtype=dom.xpath("//bib/@type")
450 if bibtype and (len(bibtype)>0):
451 bibtype=bibtype[0].value
452 else:
453 bibtype="generic"
454
455 bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
456 docinfo['bib_type'] = bibtype
457 bibmap=metaData.generateMappingForType(bibtype)
458 # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
459 if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
460 try:
461 docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
462 except: pass
463 try:
464 docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
465 except: pass
466 try:
467 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
468 except: pass
469 logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
470 try:
471 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
472 except:
473 docinfo['lang']=''
474
475 return docinfo
476
477
478 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
479 """parse texttool tag in index meta"""
480 logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url))
481 if docinfo is None:
482 docinfo = {}
483 if docinfo.get('lang', None) is None:
484 docinfo['lang'] = '' # default keine Sprache gesetzt
485 if dom is None:
486 dom = self.getIndexMeta(url)
487
488 archivePath = None
489 archiveName = None
490
491 archiveNames = dom.xpath("//resource/name")
492 if archiveNames and (len(archiveNames) > 0):
493 archiveName = getTextFromNode(archiveNames[0])
494 else:
495 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/name missing in: %s" % (url))
496
497 archivePaths = dom.xpath("//resource/archive-path")
498 if archivePaths and (len(archivePaths) > 0):
499 archivePath = getTextFromNode(archivePaths[0])
500 # clean up archive path
501 if archivePath[0] != '/':
502 archivePath = '/' + archivePath
503 if archiveName and (not archivePath.endswith(archiveName)):
504 archivePath += "/" + archiveName
505 else:
506 # try to get archive-path from url
507 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/archive-path missing in: %s" % (url))
508 if (not url.startswith('http')):
509 archivePath = url.replace('index.meta', '')
510
511 if archivePath is None:
512 # we balk without archive-path
513 raise IOError("Missing archive-path (for text-tool) in %s" % (url))
514
515 imageDirs = dom.xpath("//texttool/image")
516 if imageDirs and (len(imageDirs) > 0):
517 imageDir = getTextFromNode(imageDirs[0])
518
519 else:
520 # we balk with no image tag / not necessary anymore because textmode is now standard
521 #raise IOError("No text-tool info in %s"%(url))
522 imageDir = ""
523 #xquery="//pb"
524 docinfo['imagePath'] = "" # keine Bilder
525 docinfo['imageURL'] = ""
526
527 if imageDir and archivePath:
528 #print "image: ", imageDir, " archivepath: ", archivePath
529 imageDir = os.path.join(archivePath, imageDir)
530 imageDir = imageDir.replace("/mpiwg/online", '')
531 docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
532 docinfo['imagePath'] = imageDir
533
534 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
535
536 viewerUrls = dom.xpath("//texttool/digiliburlprefix")
537 if viewerUrls and (len(viewerUrls) > 0):
538 viewerUrl = getTextFromNode(viewerUrls[0])
539 docinfo['viewerURL'] = viewerUrl
540
541 textUrls = dom.xpath("//texttool/text")
542 if textUrls and (len(textUrls) > 0):
543 textUrl = getTextFromNode(textUrls[0])
544 if urlparse.urlparse(textUrl)[0] == "": #keine url
545 textUrl = os.path.join(archivePath, textUrl)
546 # fix URLs starting with /mpiwg/online
547 if textUrl.startswith("/mpiwg/online"):
548 textUrl = textUrl.replace("/mpiwg/online", '', 1)
549
550 docinfo['textURL'] = textUrl
551
552 textUrls = dom.xpath("//texttool/text-url-path")
553 if textUrls and (len(textUrls) > 0):
554 textUrl = getTextFromNode(textUrls[0])
555 docinfo['textURLPath'] = textUrl
556 if not docinfo['imagePath']:
557 # text-only, no page images
558 docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht
559
560 presentationUrls = dom.xpath("//texttool/presentation")
561 docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
562
563 if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
564 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
565 # durch den relativen Pfad auf die presentation infos
566 presentationPath = getTextFromNode(presentationUrls[0])
567 if url.endswith("index.meta"):
568 presentationUrl = url.replace('index.meta', presentationPath)
569 else:
570 presentationUrl = url + "/" + presentationPath
571
572 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
573
574 docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
575
576 return docinfo
577
578
579 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
580 """gets the bibliographical information from the preseantion entry in texttools
581 """
582 dom=self.getPresentationInfoXML(url)
583 try:
584 docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
585 except:
586 pass
587 try:
588 docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
589 except:
590 pass
591 try:
592 docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
593 except:
594 pass
595 return docinfo
596
597 def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
598 """path ist the path to the images it assumes that the index.meta file is one level higher."""
599 logger("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path))
600 if docinfo is None:
601 docinfo = {}
602 path=path.replace("/mpiwg/online","")
603 docinfo['imagePath'] = path
604 docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
605
606 pathorig=path
607 for x in range(cut):
608 path=getParentDir(path)
609 logging.error("PATH:"+path)
610 imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
611 docinfo['imageURL'] = imageUrl
612
613 #path ist the path to the images it assumes that the index.meta file is one level higher.
614 docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
615 docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
616 return docinfo
617
618
619 def getDocinfo(self, mode, url):
620 """returns docinfo depending on mode"""
621 logger("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url))
622 # look for cached docinfo in session
623 if self.REQUEST.SESSION.has_key('docinfo'):
624 docinfo = self.REQUEST.SESSION['docinfo']
625 # check if its still current
626 if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
627 logger("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo)
628 return docinfo
629 # new docinfo
630 docinfo = {'mode': mode, 'url': url}
631 if mode=="texttool": #index.meta with texttool information
632 docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
633 elif mode=="imagepath":
634 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
635 elif mode=="filepath":
636 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
637 else:
638 logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!")
639 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
640
641 logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo)
642 self.REQUEST.SESSION['docinfo'] = docinfo
643 return docinfo
644
645 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
646 """returns pageinfo with the given parameters"""
647 pageinfo = {}
648 current = getInt(current)
649 pageinfo['current'] = current
650 rows = int(rows or self.thumbrows)
651 pageinfo['rows'] = rows
652 cols = int(cols or self.thumbcols)
653 pageinfo['cols'] = cols
654 grpsize = cols * rows
655 pageinfo['groupsize'] = grpsize
656 start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
657 # int(current / grpsize) * grpsize +1))
658 pageinfo['start'] = start
659 pageinfo['end'] = start + grpsize
660 if (docinfo is not None) and ('numPages' in docinfo):
661 np = int(docinfo['numPages'])
662 pageinfo['end'] = min(pageinfo['end'], np)
663 pageinfo['numgroups'] = int(np / grpsize)
664 if np % grpsize > 0:
665 pageinfo['numgroups'] += 1
666 pageinfo['viewMode'] = viewMode
667 pageinfo['tocMode'] = tocMode
668 pageinfo['query'] = self.REQUEST.get('query',' ')
669 pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
670 pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
671 pageinfo['textPN'] = self.REQUEST.get('textPN','1')
672 pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
673 pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
674 pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
675 pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
676 toc = int (pageinfo['tocPN'])
677 pageinfo['textPages'] =int (toc)
678
679 if 'tocSize_%s'%tocMode in docinfo:
680 tocSize = int(docinfo['tocSize_%s'%tocMode])
681 tocPageSize = int(pageinfo['tocPageSize'])
682 # cached toc
683 if tocSize%tocPageSize>0:
684 tocPages=tocSize/tocPageSize+1
685 else:
686 tocPages=tocSize/tocPageSize
687 pageinfo['tocPN'] = min (tocPages,toc)
688 pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
689 pageinfo['sn'] =self.REQUEST.get('sn','')
690 return pageinfo
691
692 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
127 """init document viewer""" 693 """init document viewer"""
128 self.title=title 694 self.title=title
129 self.imageViewerUrl=imageViewerUrl 695 self.digilibBaseUrl = digilibBaseUrl
130 696 self.thumbrows = thumbrows
697 self.thumbcols = thumbcols
698 self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
131 if RESPONSE is not None: 699 if RESPONSE is not None:
132 RESPONSE.redirect('manage_main') 700 RESPONSE.redirect('manage_main')
133 701
134
135 def imageLink(self,nr):
136 """link hinter den images"""
137 paramsTmp=cgi.parse_qs(self.REQUEST['QUERY_STRING'])
138 params={}
139 for x in paramsTmp.iteritems():
140 params[x[0]]=x[1][0]
141
142 params['pn']=nr
143 newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params)
144 return newUrl
145
146
147 def thumbruler(self,cols,rows,start,maximum):
148 """ruler for thumbs"""
149 ret=""
150 paramsTmp=cgi.parse_qs(self.REQUEST['QUERY_STRING'])
151 params={}
152 for x in paramsTmp.iteritems():
153
154 if not x[0]=="start":
155 params[x[0]]=x[1][0]
156
157 newUrlSelect=self.REQUEST['URL']+"?"+urllib.urlencode(params)
158 if start>0:
159 newStart=max(start-cols*rows,0)
160 params['start']=newStart
161 newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params)
162 ret+="""<a href="%s">prev</a>"""%newUrl
163
164
165 ret+="""<select onChange="location.href='%s&start='+this.options[this.selectedIndex].value" """%newUrlSelect
166 nr,rest=divmod(maximum,cols*rows)
167 if rest > 0:
168 nr+=1
169 for i in range(nr):
170 nr=i*cols*rows
171
172 if (start >= nr) and (start < nr+cols*rows):
173 ret+="""<option value="%s" selected>%s</option>"""%(nr,nr)
174 else:
175 ret+="""<option value="%s">%s</option>"""%(nr,nr)
176 ret+="</select>"
177
178 if start<maximum:
179 newStart=min(start+cols*rows,maximum)
180 params['start']=newStart
181 newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params)
182 ret+="""<a href="%s">next</a>"""%newUrl
183
184 return ret
185
186 def textToolThumb(self,url,start=0):
187 """understands the texttool format
188 @param url: url to index.meta with texttool tag
189 """
190 (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)
191
192 imageUrl=genericDigilib+"/servlet/Scaler?fn=%s"%imagepath[0]
193
194 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','thumbs.zpt')).__of__(self)
195 return pt(imageUrl=imageUrl,pt=imagepath[1],start=start)
196
197 def text(self,mode,url,pn):
198 """give text"""
199 if mode=="texttool": #index.meta with texttool information
200 (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)
201
202 print textpath
203 try:
204 dom = NonvalidatingReader.parseUri(textpath)
205 except:
206 return None
207
208 list=[]
209 nodes=dom.xpath("//pb")
210
211 node=nodes[int(pn)-1]
212
213 p=node
214
215 while p.tagName!="p":
216 p=p.parentNode
217
218
219 endNode=nodes[int(pn)]
220
221
222 e=endNode
223
224 while e.tagName!="p":
225 e=e.parentNode
226
227
228 next=node.parentNode
229
230 #sammle s
231 while next and (next!=endNode.parentNode):
232 list.append(next)
233 next=next.nextSibling
234 list.append(endNode.parentNode)
235
236 if p==e:# beide im selben paragraphen
237
238 else:
239 next=p
240 while next!=e:
241 print next,e
242 list.append(next)
243 next=next.nextSibling
244
245 for x in list:
246 PrettyPrint(x)
247
248 return list
249
250 def image(self,mode,url,pn):
251 """give image out"""
252 if mode=="texttool": #index.meta with texttool information
253 (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)
254 url=viewerUrl+"pn=%s&fn=%s"%(pn,imagepath[0])
255 ret="""<iframe height="100%%" width="100%%" src="%s"/>"""%url
256 return url
257
258 def thumbs(self,mode,url,start):
259 """give thumbs out"""
260 if mode=="texttool": #index.meta with texttool information
261 return self.textToolThumb(url,int(start))
262
263 security.declareProtected('View','index_html')
264
265
266 def index_html(self,mode,url,start=0,pn=0):
267 '''
268 view it
269 @param mode: defines which type of document is behind url
270 @param url: url which contains display information
271 '''
272
273
274 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','documentViewer_template.zpt')).__of__(self)
275 return pt(mode=mode,url=url,start=start,pn=pn)
276
277
278
279 # security.declareProtected('View management screens','renameImageForm')
280
281 def manage_AddDocumentViewerForm(self): 702 def manage_AddDocumentViewerForm(self):
282 """add the viewer form""" 703 """add the viewer form"""
283 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addDocumentViewer.zpt')).__of__(self) 704 pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
284 return pt() 705 return pt()
285 706
286 def manage_AddDocumentViewer(self,id,imageViewerUrl="",title="",RESPONSE=None): 707 def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
287 """add the viewer""" 708 """add the viewer"""
288 newObj=documentViewer(id,imageViewerUrl,title) 709 newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
289 self._setObject(id,newObj) 710 self._setObject(id,newObj)
290 711
291 if RESPONSE is not None: 712 if RESPONSE is not None:
292 RESPONSE.redirect('manage_main') 713 RESPONSE.redirect('manage_main')
293 714
715 ## DocumentViewerTemplate class
716 class DocumentViewerTemplate(ZopePageTemplate):
717 """Template for document viewer"""
718 meta_type="DocumentViewer Template"
719
720
721 def manage_addDocumentViewerTemplateForm(self):
722 """Form for adding"""
723 pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
724 return pt()
725
726 def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
727 REQUEST=None, submit=None):
728 "Add a Page Template with optional file content."
729
730 self._setObject(id, DocumentViewerTemplate(id))
731 ob = getattr(self, id)
732 txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
733 logging.info("txt %s:"%txt)
734 ob.pt_edit(txt,"text/html")
735 if title:
736 ob.pt_setTitle(title)
737 try:
738 u = self.DestinationURL()
739 except AttributeError:
740 u = REQUEST['URL1']
741
742 u = "%s/%s" % (u, urllib.quote(id))
743 REQUEST.RESPONSE.redirect(u+'/manage_main')
744 return ''
745
746
747