comparison documentViewer.py @ 495:ede0c93de798 metalify-1

update branch to latest version of HEAD (with modularisierung branch)
author casties
date Thu, 17 Jun 2010 19:35:24 +0200
parents 96f74b2bab24
children f83ffab77502
comparison
equal deleted inserted replaced
1:312446f900da 495:ede0c93de798
1
2 genericDigilib="http://nausikaa2.rz-berlin.mpg.de/digitallibrary/"
3 1
4 from OFS.Folder import Folder 2 from OFS.Folder import Folder
5 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate 3 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
6 from Products.PageTemplates.PageTemplateFile import PageTemplateFile 4 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
7 from AccessControl import ClassSecurityInfo 5 from AccessControl import ClassSecurityInfo
6 from AccessControl import getSecurityManager
8 from Globals import package_home 7 from Globals import package_home
9 8
10 from Ft.Xml.Domlette import NonvalidatingReader 9 from Ft.Xml import EMPTY_NAMESPACE, Parse
11 from Ft.Xml.Domlette import PrettyPrint, Print 10 from Ft.Xml.Domlette import PrettyPrint, Print
12 from Ft.Xml import EMPTY_NAMESPACE
13
14 import Ft.Xml.XPath
15
16 import os.path 11 import os.path
17 import cgi 12 import sys
18 import urllib 13 import urllib
14 import urllib2
15 import logging
16 import math
17 import urlparse
18 import cStringIO
19
20 def logger(txt,method,txt2):
21 """logging"""
22 logging.info(txt+ txt2)
23
24
25 def getInt(number, default=0):
26 """returns always an int (0 in case of problems)"""
27 try:
28 return int(number)
29 except:
30 return int(default)
19 31
20 def getTextFromNode(nodename): 32 def getTextFromNode(nodename):
33 """get the cdata content of a node"""
34 if nodename is None:
35 return ""
21 nodelist=nodename.childNodes 36 nodelist=nodename.childNodes
22 rc = "" 37 rc = ""
23 for node in nodelist: 38 for node in nodelist:
24 if node.nodeType == node.TEXT_NODE: 39 if node.nodeType == node.TEXT_NODE:
25 rc = rc + node.data 40 rc = rc + node.data
26 return rc 41 return rc
27 42
28 import socket 43 def serializeNode(node, encoding='utf-8'):
29 44 """returns a string containing node as XML"""
30 def urlopen(url): 45 buf = cStringIO.StringIO()
31 """urlopen mit timeout""" 46 Print(node, stream=buf, encoding=encoding)
32 socket.setdefaulttimeout(2) 47 s = buf.getvalue()
33 ret=urllib.urlopen(url) 48 buf.close()
34 socket.setdefaulttimeout(5) 49 return s
35 return ret 50
36 51
37 def getParamFromDigilib(path,param): 52 def getParentDir(path):
38 """gibt param von dlInfo aus""" 53 """returns pathname shortened by one"""
39 imageUrl=genericDigilib+"/dlInfo-xml.jsp?fn="+path 54 return '/'.join(path.split('/')[0:-1])
40 55
41 try: 56
42 dom = NonvalidatingReader.parseUri(imageUrl) 57 def getHttpData(url, data=None, num_tries=3, timeout=10):
43 except: 58 """returns result from url+data HTTP request"""
44 return None 59 # we do GET (by appending data to url)
45 60 if isinstance(data, str) or isinstance(data, unicode):
46 61 # if data is string then append
47 params=dom.xpath("//document-parameters/parameter[@name='%s']/@value"%param) 62 url = "%s?%s"%(url,data)
48 63 elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
49 if params: 64 # urlencode
50 return params[0].value 65 url = "%s?%s"%(url,urllib.urlencode(data))
51 66
52 def parseUrlTextTool(url): 67 response = None
53 """parse index meta""" 68 errmsg = None
54 69 for cnt in range(num_tries):
55 try: 70 try:
56 dom = NonvalidatingReader.parseUri(url) 71 logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
57 except: 72 if sys.version_info < (2, 6):
58 zLOG.LOG("documentViewer (parseUrlTexttool)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2]) 73 # set timeout on socket -- ugly :-(
59 return (None,None,None) 74 import socket
60 75 socket.setdefaulttimeout(float(timeout))
61 archivePaths=dom.xpath("//resource/archive-path") 76 response = urllib2.urlopen(url)
62 77 else:
63 if archivePaths and (len(archivePaths)>0): 78 response = urllib2.urlopen(url,timeout=float(timeout))
64 archivePath=getTextFromNode(archivePaths[0]) 79 # check result?
65 else: 80 break
66 archivePath=None 81 except urllib2.HTTPError, e:
67 82 logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
68 83 errmsg = str(e)
69 images=dom.xpath("//texttool/image") 84 # stop trying
70 85 break
71 if images and (len(images)>0): 86 except urllib2.URLError, e:
72 image=getTextFromNode(images[0]) 87 logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
73 else: 88 errmsg = str(e)
74 image=None 89 # stop trying
75 90 #break
76 if image and archivePath: 91
77 image=os.path.join(archivePath,image) 92 if response is not None:
78 image=image.replace("/mpiwg/online",'') 93 data = response.read()
79 pt=getParamFromDigilib(image,'pt') 94 response.close()
80 95 return data
81 else: 96
82 image=None 97 raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
83 98 #return None
84 viewerUrls=dom.xpath("//texttool/digiliburlprefix") 99
85 100
86 if viewerUrls and (len(viewerUrls)>0): 101
87 viewerUrl=getTextFromNode(viewerUrls[0]) 102 ##
88 else: 103 ## documentViewer class
89 viewerUrl=None 104 ##
90 105 class documentViewer(Folder):
91
92 textUrls=dom.xpath("//texttool/text")
93
94 if textUrls and (len(textUrls)>0):
95 textUrl=getTextFromNode(textUrls[0])
96 else:
97 textUrl=None
98 return viewerUrl,(image,pt),textUrl
99
100
101 class documentViewer(ZopePageTemplate):
102 """document viewer""" 106 """document viewer"""
103
104 meta_type="Document viewer" 107 meta_type="Document viewer"
105 108
106 security=ClassSecurityInfo() 109 security=ClassSecurityInfo()
107 manage_options=ZopePageTemplate.manage_options+( 110 manage_options=Folder.manage_options+(
108 {'label':'main config','action':'changeDocumentViewerForm'}, 111 {'label':'main config','action':'changeDocumentViewerForm'},
109 ) 112 )
110 113
111 _default_content_fn = os.path.join(package_home(globals()),'zpt','documentViewer_template.zpt') 114 # templates and forms
112 115 viewer_main = PageTemplateFile('zpt/viewer_main', globals())
113 def __init__(self,id,imageViewerUrl,title=""): 116 toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
117 toc_text = PageTemplateFile('zpt/toc_text', globals())
118 toc_figures = PageTemplateFile('zpt/toc_figures', globals())
119 page_main_images = PageTemplateFile('zpt/page_main_images', globals())
120 page_main_text = PageTemplateFile('zpt/page_main_text', globals())
121 page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
122 page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
123 head_main = PageTemplateFile('zpt/head_main', globals())
124 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
125 info_xml = PageTemplateFile('zpt/info_xml', globals())
126
127
128 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
129 security.declareProtected('View management screens','changeDocumentViewerForm')
130 changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
131
132
133 def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
114 """init document viewer""" 134 """init document viewer"""
115 self.id=id 135 self.id=id
116 self.title=title 136 self.title=title
117 self.imageViewerUrl=imageViewerUrl 137 self.thumbcols = thumbcols
118 138 self.thumbrows = thumbrows
119 security.declareProtected('View management screens','changeDocumentViewerForm') 139 # authgroups is list of authorized groups (delimited by ,)
120 def changeDocumentViewerForm(self): 140 self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
121 """change it""" 141 # create template folder so we can always use template.something
122 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeDocumentViewer.zpt')).__of__(self) 142
123 return pt() 143 templateFolder = Folder('template')
144 #self['template'] = templateFolder # Zope-2.12 style
145 self._setObject('template',templateFolder) # old style
146 try:
147 import MpdlXmlTextServer
148 textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
149 #templateFolder['fulltextclient'] = xmlRpcClient
150 templateFolder._setObject('fulltextclient',textServer)
151 except Exception, e:
152 logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
153 try:
154 from Products.zogiLib.zogiLib import zogiLib
155 zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
156 #templateFolder['zogilib'] = zogilib
157 templateFolder._setObject('zogilib',zogilib)
158 except Exception, e:
159 logging.error("Unable to create zogiLib for zogilib: "+str(e))
160
161
162 # proxy text server methods to fulltextclient
163 def getTextPage(self, **args):
164 """get page"""
165 return self.template.fulltextclient.getTextPage(**args)
166
167 def getQuery(self, **args):
168 """get query"""
169 return self.template.fulltextclient.getQuery(**args)
170
171 def getSearch(self, **args):
172 """get search"""
173 return self.template.fulltextclient.getSearch(**args)
174
175 def getNumPages(self, **args):
176 """get numpages"""
177 return self.template.fulltextclient.getNumPages(**args)
178
179 def getTranslate(self, **args):
180 """get translate"""
181 return self.template.fulltextclient.getTranslate(**args)
182
183 def getLemma(self, **args):
184 """get lemma"""
185 return self.template.fulltextclient.getLemma(**args)
186
187 def getToc(self, **args):
188 """get toc"""
189 return self.template.fulltextclient.getToc(**args)
190
191 def getTocPage(self, **args):
192 """get tocpage"""
193 return self.template.fulltextclient.getTocPage(**args)
194
195
196 security.declareProtected('View','thumbs_rss')
197 def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
198 '''
199 view it
200 @param mode: defines how to access the document behind url
201 @param url: url which contains display information
202 @param viewMode: if images display images, if text display text, default is images (text,images or auto)
203
204 '''
205 logging.debug("HHHHHHHHHHHHHH:load the rss")
206 logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
207
208 if not hasattr(self, 'template'):
209 # create template folder if it doesn't exist
210 self.manage_addFolder('template')
211
212 if not self.digilibBaseUrl:
213 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
214
215 docinfo = self.getDocinfo(mode=mode,url=url)
216 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
217 pt = getattr(self.template, 'thumbs_main_rss')
218
219 if viewMode=="auto": # automodus gewaehlt
220 if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
221 viewMode="text"
222 else:
223 viewMode="images"
224
225 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
124 226
227 security.declareProtected('View','index_html')
228 def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None):
229 '''
230 view it
231 @param mode: defines how to access the document behind url
232 @param url: url which contains display information
233 @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
234 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
235 @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
236 '''
237
238 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
239
240 if not hasattr(self, 'template'):
241 # this won't work
242 logging.error("template folder missing!")
243 return "ERROR: template folder missing!"
244
245 if not getattr(self, 'digilibBaseUrl', None):
246 self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
247
248 docinfo = self.getDocinfo(mode=mode,url=url)
249
250 if tocMode != "thumbs":
251 # get table of contents
252 docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
253
254 if viewMode=="auto": # automodus gewaehlt
255 if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
256 viewMode="text_dict"
257 else:
258 viewMode="images"
259
260 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
261
262 pt = getattr(self.template, 'viewer_main')
263 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
125 264
126 def changeDocumentViewer(self,imageViewerUrl,title="",RESPONSE=None): 265 def generateMarks(self,mk):
266 ret=""
267 if mk is None:
268 return ""
269 if type(mk) is not ListType:
270 mk=[mk]
271 for m in mk:
272 ret+="mk=%s"%m
273 return ret
274
275
276 def findDigilibUrl(self):
277 """try to get the digilib URL from zogilib"""
278 url = self.template.zogilib.getDLBaseUrl()
279 return url
280
281 def getDocumentViewerURL(self):
282 """returns the URL of this instance"""
283 return self.absolute_url()
284
285 def getStyle(self, idx, selected, style=""):
286 """returns a string with the given style and append 'sel' if path == selected."""
287 #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
288 if idx == selected:
289 return style + 'sel'
290 else:
291 return style
292
293 def getLink(self,param=None,val=None):
294 """link to documentviewer with parameter param set to val"""
295 params=self.REQUEST.form.copy()
296 if param is not None:
297 if val is None:
298 if params.has_key(param):
299 del params[param]
300 else:
301 params[param] = str(val)
302
303 if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
304 params["mode"] = "imagepath"
305 params["url"] = getParentDir(params["url"])
306
307 # quote values and assemble into query string
308 ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
309 url=self.REQUEST['URL1']+"?"+ps
310 return url
311
312 def getLinkAmp(self,param=None,val=None):
313 """link to documentviewer with parameter param set to val"""
314 params=self.REQUEST.form.copy()
315 if param is not None:
316 if val is None:
317 if params.has_key(param):
318 del params[param]
319 else:
320 params[param] = str(val)
321
322 # quote values and assemble into query string
323 logging.debug("XYXXXXX: %s"%repr(params.items()))
324 ps = "&amp;".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
325 url=self.REQUEST['URL1']+"?"+ps
326 return url
327
328 def getInfo_xml(self,url,mode):
329 """returns info about the document as XML"""
330
331 if not self.digilibBaseUrl:
332 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
333
334 docinfo = self.getDocinfo(mode=mode,url=url)
335 pt = getattr(self.template, 'info_xml')
336 return pt(docinfo=docinfo)
337
338
339 def isAccessible(self, docinfo):
340 """returns if access to the resource is granted"""
341 access = docinfo.get('accessType', None)
342 logging.debug("documentViewer (accessOK) access type %s"%access)
343 if access is not None and access == 'free':
344 logging.debug("documentViewer (accessOK) access is free")
345 return True
346 elif access is None or access in self.authgroups:
347 # only local access -- only logged in users
348 user = getSecurityManager().getUser()
349 if user is not None:
350 #print "user: ", user
351 return (user.getUserName() != "Anonymous User")
352 else:
353 return False
354
355 logging.debug("documentViewer (accessOK) unknown access type %s"%access)
356 return False
357
358
359 def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
360 """gibt param von dlInfo aus"""
361 if docinfo is None:
362 docinfo = {}
363
364 for x in range(cut):
365
366 path=getParentDir(path)
367
368 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
369
370 logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
371
372 txt = getHttpData(infoUrl)
373 if txt is None:
374 raise IOError("Unable to get dir-info from %s"%(infoUrl))
375
376 dom = Parse(txt)
377 sizes=dom.xpath("//dir/size")
378 logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
379
380 if sizes:
381 docinfo['numPages'] = int(getTextFromNode(sizes[0]))
382 else:
383 docinfo['numPages'] = 0
384
385 # TODO: produce and keep list of image names and numbers
386
387 return docinfo
388
389
390 def getIndexMeta(self, url):
391 """returns dom of index.meta document at url"""
392 dom = None
393 metaUrl = None
394 if url.startswith("http://"):
395 # real URL
396 metaUrl = url
397 else:
398 # online path
399 server=self.digilibBaseUrl+"/servlet/Texter?fn="
400 metaUrl=server+url.replace("/mpiwg/online","")
401 if not metaUrl.endswith("index.meta"):
402 metaUrl += "/index.meta"
403
404 logging.debug("(getIndexMeta): METAURL: %s"%metaUrl)
405 txt=getHttpData(metaUrl)
406 if txt is None:
407 raise IOError("Unable to read index meta from %s"%(url))
408
409 dom = Parse(txt)
410 return dom
411
412 def getPresentationInfoXML(self, url):
413 """returns dom of info.xml document at url"""
414 dom = None
415 metaUrl = None
416 if url.startswith("http://"):
417 # real URL
418 metaUrl = url
419 else:
420 # online path
421 server=self.digilibBaseUrl+"/servlet/Texter?fn="
422 metaUrl=server+url.replace("/mpiwg/online","")
423
424 txt=getHttpData(metaUrl)
425 if txt is None:
426 raise IOError("Unable to read infoXMLfrom %s"%(url))
427
428 dom = Parse(txt)
429 return dom
430
431
432 def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
433 """gets authorization info from the index.meta file at path or given by dom"""
434 logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
435
436 access = None
437
438 if docinfo is None:
439 docinfo = {}
440
441 if dom is None:
442 for x in range(cut):
443 path=getParentDir(path)
444 dom = self.getIndexMeta(path)
445
446 acctype = dom.xpath("//access-conditions/access/@type")
447 if acctype and (len(acctype)>0):
448 access=acctype[0].value
449 if access in ['group', 'institution']:
450 access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
451
452 docinfo['accessType'] = access
453 return docinfo
454
455
456 def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
457 """gets bibliographical info from the index.meta file at path or given by dom"""
458 logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
459
460 if docinfo is None:
461 docinfo = {}
462
463 if dom is None:
464 for x in range(cut):
465 path=getParentDir(path)
466 dom = self.getIndexMeta(path)
467
468 logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
469 # put in all raw bib fields as dict "bib"
470 bib = dom.xpath("//bib/*")
471 if bib and len(bib)>0:
472 bibinfo = {}
473 for e in bib:
474 bibinfo[e.localName] = getTextFromNode(e)
475 docinfo['bib'] = bibinfo
476
477 # extract some fields (author, title, year) according to their mapping
478 metaData=self.metadata.main.meta.bib
479 bibtype=dom.xpath("//bib/@type")
480 if bibtype and (len(bibtype)>0):
481 bibtype=bibtype[0].value
482 else:
483 bibtype="generic"
484
485 bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
486 docinfo['bib_type'] = bibtype
487 bibmap=metaData.generateMappingForType(bibtype)
488 # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
489 if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
490 try:
491 docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
492 except: pass
493 try:
494 docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
495 except: pass
496 try:
497 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
498 except: pass
499 logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
500 try:
501 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
502 except:
503 docinfo['lang']=''
504
505 return docinfo
506
507
508 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
509 """parse texttool tag in index meta"""
510 logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
511 if docinfo is None:
512 docinfo = {}
513 if docinfo.get('lang', None) is None:
514 docinfo['lang'] = '' # default keine Sprache gesetzt
515 if dom is None:
516 dom = self.getIndexMeta(url)
517
518 archivePath = None
519 archiveName = None
520
521 archiveNames = dom.xpath("//resource/name")
522 if archiveNames and (len(archiveNames) > 0):
523 archiveName = getTextFromNode(archiveNames[0])
524 else:
525 logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
526
527 archivePaths = dom.xpath("//resource/archive-path")
528 if archivePaths and (len(archivePaths) > 0):
529 archivePath = getTextFromNode(archivePaths[0])
530 # clean up archive path
531 if archivePath[0] != '/':
532 archivePath = '/' + archivePath
533 if archiveName and (not archivePath.endswith(archiveName)):
534 archivePath += "/" + archiveName
535 else:
536 # try to get archive-path from url
537 logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
538 if (not url.startswith('http')):
539 archivePath = url.replace('index.meta', '')
540
541 if archivePath is None:
542 # we balk without archive-path
543 raise IOError("Missing archive-path (for text-tool) in %s" % (url))
544
545 imageDirs = dom.xpath("//texttool/image")
546 if imageDirs and (len(imageDirs) > 0):
547 imageDir = getTextFromNode(imageDirs[0])
548
549 else:
550 # we balk with no image tag / not necessary anymore because textmode is now standard
551 #raise IOError("No text-tool info in %s"%(url))
552 imageDir = ""
553 #xquery="//pb"
554 docinfo['imagePath'] = "" # keine Bilder
555 docinfo['imageURL'] = ""
556
557 if imageDir and archivePath:
558 #print "image: ", imageDir, " archivepath: ", archivePath
559 imageDir = os.path.join(archivePath, imageDir)
560 imageDir = imageDir.replace("/mpiwg/online", '')
561 docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
562 docinfo['imagePath'] = imageDir
563
564 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
565
566 viewerUrls = dom.xpath("//texttool/digiliburlprefix")
567 if viewerUrls and (len(viewerUrls) > 0):
568 viewerUrl = getTextFromNode(viewerUrls[0])
569 docinfo['viewerURL'] = viewerUrl
570
571 # old style text URL
572 textUrls = dom.xpath("//texttool/text")
573 if textUrls and (len(textUrls) > 0):
574 textUrl = getTextFromNode(textUrls[0])
575 if urlparse.urlparse(textUrl)[0] == "": #keine url
576 textUrl = os.path.join(archivePath, textUrl)
577 # fix URLs starting with /mpiwg/online
578 if textUrl.startswith("/mpiwg/online"):
579 textUrl = textUrl.replace("/mpiwg/online", '', 1)
580
581 docinfo['textURL'] = textUrl
582
583 # new style text-url-path
584 textUrls = dom.xpath("//texttool/text-url-path")
585 if textUrls and (len(textUrls) > 0):
586 textUrl = getTextFromNode(textUrls[0])
587 docinfo['textURLPath'] = textUrl
588 if not docinfo['imagePath']:
589 # text-only, no page images
590 docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht
591
592 presentationUrls = dom.xpath("//texttool/presentation")
593 docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
594
595 if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
596 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
597 # durch den relativen Pfad auf die presentation infos
598 presentationPath = getTextFromNode(presentationUrls[0])
599 if url.endswith("index.meta"):
600 presentationUrl = url.replace('index.meta', presentationPath)
601 else:
602 presentationUrl = url + "/" + presentationPath
603
604 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
605
606 docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
607
608 return docinfo
609
610
611 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
612 """gets the bibliographical information from the preseantion entry in texttools
613 """
614 dom=self.getPresentationInfoXML(url)
615 try:
616 docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
617 except:
618 pass
619 try:
620 docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
621 except:
622 pass
623 try:
624 docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
625 except:
626 pass
627 return docinfo
628
629 def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
630 """path ist the path to the images it assumes that the index.meta file is one level higher."""
631 logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
632 if docinfo is None:
633 docinfo = {}
634 path=path.replace("/mpiwg/online","")
635 docinfo['imagePath'] = path
636 docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
637
638 pathorig=path
639 for x in range(cut):
640 path=getParentDir(path)
641 logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
642 imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
643 docinfo['imageURL'] = imageUrl
644
645 #path ist the path to the images it assumes that the index.meta file is one level higher.
646 docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
647 docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
648 return docinfo
649
650
651 def getDocinfo(self, mode, url):
652 """returns docinfo depending on mode"""
653 logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
654 # look for cached docinfo in session
655 if self.REQUEST.SESSION.has_key('docinfo'):
656 docinfo = self.REQUEST.SESSION['docinfo']
657 # check if its still current
658 if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
659 logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
660 return docinfo
661 # new docinfo
662 docinfo = {'mode': mode, 'url': url}
663 if mode=="texttool": #index.meta with texttool information
664 docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
665 elif mode=="imagepath":
666 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
667 elif mode=="filepath":
668 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
669 else:
670 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
671 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
672
673 logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
674 self.REQUEST.SESSION['docinfo'] = docinfo
675 return docinfo
676
677 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
678 """returns pageinfo with the given parameters"""
679 pageinfo = {}
680 current = getInt(current)
681 pageinfo['current'] = current
682 rows = int(rows or self.thumbrows)
683 pageinfo['rows'] = rows
684 cols = int(cols or self.thumbcols)
685 pageinfo['cols'] = cols
686 grpsize = cols * rows
687 pageinfo['groupsize'] = grpsize
688 start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
689 # int(current / grpsize) * grpsize +1))
690 pageinfo['start'] = start
691 pageinfo['end'] = start + grpsize
692 if (docinfo is not None) and ('numPages' in docinfo):
693 np = int(docinfo['numPages'])
694 pageinfo['end'] = min(pageinfo['end'], np)
695 pageinfo['numgroups'] = int(np / grpsize)
696 if np % grpsize > 0:
697 pageinfo['numgroups'] += 1
698 pageinfo['viewMode'] = viewMode
699 pageinfo['tocMode'] = tocMode
700 pageinfo['query'] = self.REQUEST.get('query',' ')
701 pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
702 pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
703 pageinfo['textPN'] = self.REQUEST.get('textPN','1')
704 pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
705 pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
706 pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
707 pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
708 toc = int (pageinfo['tocPN'])
709 pageinfo['textPages'] =int (toc)
710
711 if 'tocSize_%s'%tocMode in docinfo:
712 tocSize = int(docinfo['tocSize_%s'%tocMode])
713 tocPageSize = int(pageinfo['tocPageSize'])
714 # cached toc
715 if tocSize%tocPageSize>0:
716 tocPages=tocSize/tocPageSize+1
717 else:
718 tocPages=tocSize/tocPageSize
719 pageinfo['tocPN'] = min (tocPages,toc)
720 pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
721 pageinfo['sn'] =self.REQUEST.get('sn','')
722 return pageinfo
723
724 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
127 """init document viewer""" 725 """init document viewer"""
128 self.title=title 726 self.title=title
129 self.imageViewerUrl=imageViewerUrl 727 self.digilibBaseUrl = digilibBaseUrl
130 728 self.thumbrows = thumbrows
729 self.thumbcols = thumbcols
730 self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
131 if RESPONSE is not None: 731 if RESPONSE is not None:
132 RESPONSE.redirect('manage_main') 732 RESPONSE.redirect('manage_main')
133 733
134
135 def imageLink(self,nr):
136 """link hinter den images"""
137 paramsTmp=cgi.parse_qs(self.REQUEST['QUERY_STRING'])
138 params={}
139 for x in paramsTmp.iteritems():
140 params[x[0]]=x[1][0]
141
142 params['pn']=nr
143 newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params)
144 return newUrl
145
146
147 def thumbruler(self,cols,rows,start,maximum):
148 """ruler for thumbs"""
149 ret=""
150 paramsTmp=cgi.parse_qs(self.REQUEST['QUERY_STRING'])
151 params={}
152 for x in paramsTmp.iteritems():
153
154 if not x[0]=="start":
155 params[x[0]]=x[1][0]
156
157 newUrlSelect=self.REQUEST['URL']+"?"+urllib.urlencode(params)
158 if start>0:
159 newStart=max(start-cols*rows,0)
160 params['start']=newStart
161 newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params)
162 ret+="""<a href="%s">prev</a>"""%newUrl
163
164
165 ret+="""<select onChange="location.href='%s&start='+this.options[this.selectedIndex].value" """%newUrlSelect
166 nr,rest=divmod(maximum,cols*rows)
167 if rest > 0:
168 nr+=1
169 for i in range(nr):
170 nr=i*cols*rows
171
172 if (start >= nr) and (start < nr+cols*rows):
173 ret+="""<option value="%s" selected>%s</option>"""%(nr,nr)
174 else:
175 ret+="""<option value="%s">%s</option>"""%(nr,nr)
176 ret+="</select>"
177
178 if start<maximum:
179 newStart=min(start+cols*rows,maximum)
180 params['start']=newStart
181 newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params)
182 ret+="""<a href="%s">next</a>"""%newUrl
183
184 return ret
185
186 def textToolThumb(self,url,start=0):
187 """understands the texttool format
188 @param url: url to index.meta with texttool tag
189 """
190 (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)
191
192 imageUrl=genericDigilib+"/servlet/Scaler?fn=%s"%imagepath[0]
193
194 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','thumbs.zpt')).__of__(self)
195 return pt(imageUrl=imageUrl,pt=imagepath[1],start=start)
196
197 def text(self,mode,url,pn):
198 """give text"""
199 if mode=="texttool": #index.meta with texttool information
200 (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)
201
202 print textpath
203 try:
204 dom = NonvalidatingReader.parseUri(textpath)
205 except:
206 return None
207
208 list=[]
209 nodes=dom.xpath("//pb")
210
211 node=nodes[int(pn)-1]
212
213 p=node
214
215 while p.tagName!="p":
216 p=p.parentNode
217
218
219 endNode=nodes[int(pn)]
220
221
222 e=endNode
223
224 while e.tagName!="p":
225 e=e.parentNode
226
227
228 next=node.parentNode
229
230 #sammle s
231 while next and (next!=endNode.parentNode):
232 list.append(next)
233 next=next.nextSibling
234 list.append(endNode.parentNode)
235
236 if p==e:# beide im selben paragraphen
237
238 else:
239 next=p
240 while next!=e:
241 print next,e
242 list.append(next)
243 next=next.nextSibling
244
245 for x in list:
246 PrettyPrint(x)
247
248 return list
249
250 def image(self,mode,url,pn):
251 """give image out"""
252 if mode=="texttool": #index.meta with texttool information
253 (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)
254 url=viewerUrl+"pn=%s&fn=%s"%(pn,imagepath[0])
255 ret="""<iframe height="100%%" width="100%%" src="%s"/>"""%url
256 return url
257
258 def thumbs(self,mode,url,start):
259 """give thumbs out"""
260 if mode=="texttool": #index.meta with texttool information
261 return self.textToolThumb(url,int(start))
262
263 security.declareProtected('View','index_html')
264
265
266 def index_html(self,mode,url,start=0,pn=0):
267 '''
268 view it
269 @param mode: defines which type of document is behind url
270 @param url: url which contains display information
271 '''
272
273
274 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','documentViewer_template.zpt')).__of__(self)
275 return pt(mode=mode,url=url,start=start,pn=pn)
276
277
278
279 # security.declareProtected('View management screens','renameImageForm')
280
281 def manage_AddDocumentViewerForm(self): 734 def manage_AddDocumentViewerForm(self):
282 """add the viewer form""" 735 """add the viewer form"""
283 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addDocumentViewer.zpt')).__of__(self) 736 pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
284 return pt() 737 return pt()
285 738
286 def manage_AddDocumentViewer(self,id,imageViewerUrl="",title="",RESPONSE=None): 739 def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
287 """add the viewer""" 740 """add the viewer"""
288 newObj=documentViewer(id,imageViewerUrl,title) 741 newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
289 self._setObject(id,newObj) 742 self._setObject(id,newObj)
290 743
291 if RESPONSE is not None: 744 if RESPONSE is not None:
292 RESPONSE.redirect('manage_main') 745 RESPONSE.redirect('manage_main')
293 746
747 ## DocumentViewerTemplate class
748 class DocumentViewerTemplate(ZopePageTemplate):
749 """Template for document viewer"""
750 meta_type="DocumentViewer Template"
751
752
753 def manage_addDocumentViewerTemplateForm(self):
754 """Form for adding"""
755 pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
756 return pt()
757
758 def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
759 REQUEST=None, submit=None):
760 "Add a Page Template with optional file content."
761
762 self._setObject(id, DocumentViewerTemplate(id))
763 ob = getattr(self, id)
764 txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
765 logging.info("txt %s:"%txt)
766 ob.pt_edit(txt,"text/html")
767 if title:
768 ob.pt_setTitle(title)
769 try:
770 u = self.DestinationURL()
771 except AttributeError:
772 u = REQUEST['URL1']
773
774 u = "%s/%s" % (u, urllib.quote(id))
775 REQUEST.RESPONSE.redirect(u+'/manage_main')
776 return ''
777
778
779