comparison documentViewer.py @ 50:6c0f20cecc60

added evaluation of the presentation/info.xml in texttools
author dwinter
date Thu, 11 Jan 2007 13:03:17 +0100
parents a10fff6199b0
children c5d3aabbf61b
comparison
equal deleted inserted replaced
49:a10fff6199b0 50:6c0f20cecc60
15 15
16 import os.path 16 import os.path
17 import sys 17 import sys
18 import cgi 18 import cgi
19 import urllib 19 import urllib
20 import logging
20 import zLOG 21 import zLOG
21 import urlparse 22 import urlparse
22 23
23 def getInt(number, default=0): 24 def getInt(number, default=0):
24 """returns always an int (0 in case of problems)""" 25 """returns always an int (0 in case of problems)"""
57 ## 58 ##
58 ## documentViewer class 59 ## documentViewer class
59 ## 60 ##
60 class documentViewer(Folder): 61 class documentViewer(Folder):
61 """document viewer""" 62 """document viewer"""
62 textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?" 63 #textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?"
63 64
64 meta_type="Document viewer" 65 meta_type="Document viewer"
65 66
66 security=ClassSecurityInfo() 67 security=ClassSecurityInfo()
67 manage_options=Folder.manage_options+( 68 manage_options=Folder.manage_options+(
225 except: 226 except:
226 zLOG.LOG("ERROR documentViewer (getIndexMata)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2]) 227 zLOG.LOG("ERROR documentViewer (getIndexMata)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2])
227 228
228 if dom is None: 229 if dom is None:
229 raise IOError("Unable to read index meta from %s"%(url)) 230 raise IOError("Unable to read index meta from %s"%(url))
231
232 return dom
233
234 def getPresentationInfoXML(self, url):
235 """returns dom of info.xml document at url"""
236 num_retries = 3
237 dom = None
238 metaUrl = None
239 if url.startswith("http://"):
240 # real URL
241 metaUrl = url
242 else:
243 # online path
244 server=self.digilibBaseUrl+"/servlet/Texter?fn="
245 metaUrl=server+url.replace("/mpiwg/online","")
246
247
248 for cnt in range(num_retries):
249 try:
250 # patch dirk encoding fehler treten dann nicht mehr auf
251 # dom = NonvalidatingReader.parseUri(metaUrl)
252 txt=urllib.urlopen(metaUrl).read()
253 dom = Parse(txt)
254 break
255 except:
256 zLOG.LOG("ERROR documentViewer (getPresentationInfoXML)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2])
257
258 if dom is None:
259 raise IOError("Unable to read infoXMLfrom %s"%(url))
230 260
231 return dom 261 return dom
232 262
233 263
234 def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None): 264 def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None):
343 if urlparse.urlparse(textUrl)[0]=="": #keine url 373 if urlparse.urlparse(textUrl)[0]=="": #keine url
344 textUrl=os.path.join(archivePath,textUrl) 374 textUrl=os.path.join(archivePath,textUrl)
345 375
346 docinfo['textURL'] = textUrl 376 docinfo['textURL'] = textUrl
347 377
348 docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) 378
379 presentationUrls=dom.xpath("//texttool/presentation")
380 if presentationUrls and (len(presentationUrls)>0):
381 # presentation url ergiebt sich ersetzen von index.meta in der url der fŸr die Metadaten
382 # durch den relativen Pfad auf die presentation infos
383 presentationUrl=url.replace('index.meta',getTextFromNode(presentationUrls[0]))
384
385 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl,docinfo=docinfo,dom=dom)
386 else:
387 docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)
349 docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) 388 docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)
350 return docinfo 389 return docinfo
351 390
352 391
392 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
393 """gets the bibliographical information from the preseantion entry in texttools
394 """
395 dom=self.getPresentationInfoXML(url)
396 docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
397 docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
398 docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
399 return docinfo
400
353 def getDocinfoFromImagePath(self,path,docinfo=None): 401 def getDocinfoFromImagePath(self,path,docinfo=None):
354 """path ist the path to the images it assumes that the index.meta file is one level higher.""" 402 """path ist the path to the images it assumes that the index.meta file is one level higher."""
355 zLOG.LOG("documentViewer (getdocinfofromimagepath)", zLOG.INFO,"path: %s"%(path)) 403 zLOG.LOG("documentViewer (getdocinfofromimagepath)", zLOG.INFO,"path: %s"%(path))
356 if docinfo is None: 404 if docinfo is None:
357 docinfo = {} 405 docinfo = {}
368 416
369 def getDocinfo(self, mode, url): 417 def getDocinfo(self, mode, url):
370 """returns docinfo depending on mode""" 418 """returns docinfo depending on mode"""
371 zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"mode: %s, url: %s"%(mode,url)) 419 zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"mode: %s, url: %s"%(mode,url))
372 # look for cached docinfo in session 420 # look for cached docinfo in session
373 if self.REQUEST.SESSION.has_key('docinfo'): 421 # XXXX Sesion abgeschaltet
422 if self.REQUEST.SESSION.has_key('docinfo_XX'):
374 docinfo = self.REQUEST.SESSION['docinfo'] 423 docinfo = self.REQUEST.SESSION['docinfo']
375 # check if its still current 424 # check if its still current
376 if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: 425 if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
377 zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo in session: %s"%docinfo) 426 zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo in session: %s"%docinfo)
378 return docinfo 427 return docinfo
469 # 518 #
470 519
471 def findDigilibUrl(self): 520 def findDigilibUrl(self):
472 """try to get the digilib URL from zogilib""" 521 """try to get the digilib URL from zogilib"""
473 url = self.imageViewerUrl[:-1] + "/getScalerUrl" 522 url = self.imageViewerUrl[:-1] + "/getScalerUrl"
474 print urlparse.urlparse(url)[0] 523 #print urlparse.urlparse(url)[0]
475 print urlparse.urljoin(self.absolute_url(),url) 524 #print urlparse.urljoin(self.absolute_url(),url)
525 logging.info("finddigiliburl: %s"%urlparse.urlparse(url)[0])
526 logging.info("finddigiliburl: %s"%urlparse.urljoin(self.absolute_url(),url))
527
476 try: 528 try:
477 if urlparse.urlparse(url)[0]=='': #relative path 529 if urlparse.urlparse(url)[0]=='': #relative path
478 url=urlparse.urljoin(self.absolute_url()+"/",url) 530 url=urlparse.urljoin(self.absolute_url()+"/",url)
479 531
480 scaler = urlopen(url).read() 532 scaler = urlopen(url).read()