Changeset 465:224aad394350 in documentViewer for documentViewer.py


Ignore:
Timestamp:
Jul 29, 2011, 6:36:04 PM (13 years ago)
Author:
casties
Branch:
elementtree
Message:

really works with new getDocinfo

File:
1 edited

Legend:

Unmodified
Added
Removed
  • documentViewer.py

    r464 r465  
    264264               
    265265        return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
     266
    266267 
    267268    security.declareProtected('View','index_html')
     
    294295        if viewMode=="auto":
    295296            if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
    296                 #texturl gesetzt und textViewer konfiguriert
    297297                viewMode="text_dict"
    298298            else:
     
    390390    def getInfo_xml(self,url,mode):
    391391        """returns info about the document as XML"""
    392 
    393392        if not self.digilibBaseUrl:
    394393            self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
     
    398397        return pt(docinfo=docinfo)
    399398
    400     def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
    401         """returns new option state"""
    402         if not self.REQUEST.SESSION.has_key(optionName):
    403             # not in session -- initial
    404             opt = {'lastState': newState, 'state': initialState}
    405         else:
    406             opt = self.REQUEST.SESSION.get(optionName)
    407             if opt['lastState'] != newState:
    408                 # state in session has changed -- toggle
    409                 opt['state'] = not opt['state']
    410                 opt['lastState'] = newState
    411        
    412         self.REQUEST.SESSION[optionName] = opt
    413         return opt['state']
    414    
    415399    def isAccessible(self, docinfo):
    416400        """returns if access to the resource is granted"""
    417401        access = docinfo.get('accessType', None)
    418402        logging.debug("documentViewer (accessOK) access type %s"%access)
    419         if access is not None and access == 'free':
     403        if access == 'free':
    420404            logging.debug("documentViewer (accessOK) access is free")
    421405            return True
     406       
    422407        elif access is None or access in self.authgroups:
    423408            # only local access -- only logged in users
     
    433418        return False
    434419   
    435                
    436     def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
    437         """gibt param von dlInfo aus"""
    438         if docinfo is None:
    439             docinfo = {}
    440        
    441         for x in range(cut):
    442             path=getParentPath(path)
    443        
    444         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
    445    
    446         logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
    447        
    448         txt = getHttpData(infoUrl)
    449         if txt is None:
    450             raise IOError("Unable to get dir-info from %s"%(infoUrl))
    451 
    452         dom = ET.fromstring(txt)
    453         #dom = Parse(txt)
    454         size=getText(dom.find("size"))
    455         #sizes=dom.xpath("//dir/size")
    456         logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)
    457        
    458         if size:
    459             docinfo['numPages'] = int(size)
    460         else:
    461             docinfo['numPages'] = 0
    462            
    463         # TODO: produce and keep list of image names and numbers
    464                        
    465         return docinfo
    466    
    467     def getIndexMetaPath(self,url):
    468         """gib nur den Pfad zurueck"""
    469         regexp = re.compile(r".*(experimental|permanent)/(.*)")
    470         regpath = regexp.match(url)
    471         if (regpath==None):
    472             return ""
    473         logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))           
    474         return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
    475      
    476    
    477    
    478     def getIndexMetaUrl(self,url):
    479         """returns utr  of index.meta document at url"""
    480      
    481         metaUrl = None
    482         if url.startswith("http://"):
    483             # real URL
    484             metaUrl = url
    485         else:
    486             # online path
    487             server=self.digilibBaseUrl+"/servlet/Texter?fn="
    488             metaUrl=server+url.replace("/mpiwg/online","")
    489             if not metaUrl.endswith("index.meta"):
    490                 metaUrl += "/index.meta"
    491        
    492         return metaUrl
    493    
    494     def getDomFromIndexMeta(self, url):
    495         """get dom from index meta"""
    496         dom = None
    497         metaUrl = self.getIndexMetaUrl(url)
    498                
    499         logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
    500         txt=getHttpData(metaUrl)
    501         if txt is None:
    502             raise IOError("Unable to read index meta from %s"%(url))
    503        
    504         dom = ET.fromstring(txt)
    505         #dom = Parse(txt)
    506         return dom
    507    
    508     def getPresentationInfoXML(self, url):
    509         """returns dom of info.xml document at url"""
    510         dom = None
    511         metaUrl = None
    512         if url.startswith("http://"):
    513             # real URL
    514             metaUrl = url
    515         else:
    516             # online path
    517             server=self.digilibBaseUrl+"/servlet/Texter?fn="
    518             metaUrl=server+url.replace("/mpiwg/online","")
    519        
    520         txt=getHttpData(metaUrl)
    521         if txt is None:
    522             raise IOError("Unable to read infoXMLfrom %s"%(url))
    523            
    524         dom = ET.fromstring(txt)
    525         #dom = Parse(txt)
    526         return dom
    527                        
    528        
    529     def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
    530         """gets authorization info from the index.meta file at path or given by dom"""
    531         logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
    532        
    533         access = None
    534        
    535         if docinfo is None:
    536             docinfo = {}
    537            
    538         if dom is None:
    539             for x in range(cut):
    540                 path=getParentPath(path)
    541             dom = self.getDomFromIndexMeta(path)
    542        
    543         acc = dom.find(".//access-conditions/access")
    544         if acc is not None:
    545             acctype = acc.get('type')
    546             #acctype = dom.xpath("//access-conditions/access/@type")
    547             if acctype:
    548                 access=acctype
    549                 if access in ['group', 'institution']:
    550                     access = dom.find(".//access-conditions/access/name").text.lower()
    551            
    552         docinfo['accessType'] = access
    553         return docinfo
    554    
    555        
    556     def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
    557         """gets bibliographical info from the index.meta file at path or given by dom"""
    558         logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
    559        
    560         if docinfo is None:
    561             docinfo = {}
    562        
    563         if dom is None:
    564             for x in range(cut):
    565                 path=getParentDir(path)
    566             dom = self.getDomFromIndexMeta(path)
    567        
    568         docinfo['indexMetaPath']=self.getIndexMetaPath(path);
    569        
    570         logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
    571         if self.metadataService is not None:
    572             # put all raw bib fields in dict "bib"
    573             bib = self.metadataService.getBibData(dom=dom)
    574             docinfo['bib'] = bib
    575             bibtype = bib.get('@type', None)
    576             docinfo['bib_type'] = bibtype
    577             # also store DC metadata for convenience
    578             dc = self.metadataService.getDCMappedData(bib)
    579             docinfo['creator'] = dc.get('creator',None)
    580             docinfo['title'] = dc.get('title',None)
    581             docinfo['date'] = dc.get('date',None)
    582         else:
    583             logging.error("MetadataService not found!")
    584         return docinfo
    585    
    586    
    587     # TODO: is this needed?
    588     def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
    589         """gets name info from the index.meta file at path or given by dom"""
    590         if docinfo is None:
    591             docinfo = {}
    592        
    593         if dom is None:
    594             for x in range(cut):
    595                 path=getParentPath(path)
    596             dom = self.getDomFromIndexMeta(path)
    597 
    598         docinfo['name']=getText(dom.find("name"))
    599         logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
    600         return docinfo
    601 
    602    
    603     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
    604         """parse texttool tag in index meta"""
    605         logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
    606         if docinfo is None:
    607            docinfo = {}
    608         if docinfo.get('lang', None) is None:
    609             docinfo['lang'] = '' # default keine Sprache gesetzt
    610         if dom is None:
    611             dom = self.getDomFromIndexMeta(url)
    612            
    613         texttool = self.metadata.getTexttoolData(dom=dom)
    614        
    615         archivePath = None
    616         archiveName = None
    617    
    618         archiveName = getText(dom.find("name"))
    619         if not archiveName:
    620             logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
    621        
    622         archivePath = getText(dom.find("archive-path"))
    623         if archivePath:
    624             # clean up archive path
    625             if archivePath[0] != '/':
    626                 archivePath = '/' + archivePath
    627             if archiveName and (not archivePath.endswith(archiveName)):
    628                 archivePath += "/" + archiveName
    629         else:
    630             # try to get archive-path from url
    631             logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
    632             if (not url.startswith('http')):
    633                 archivePath = url.replace('index.meta', '')
    634                
    635         if archivePath is None:
    636             # we balk without archive-path
    637             raise IOError("Missing archive-path (for text-tool) in %s" % (url))
    638        
    639         imageDir = texttool.get('image', None)
    640            
    641         if not imageDir:
    642             # we balk with no image tag / not necessary anymore because textmode is now standard
    643             #raise IOError("No text-tool info in %s"%(url))
    644             imageDir = ""
    645             #xquery="//pb" 
    646             docinfo['imagePath'] = "" # keine Bilder
    647             docinfo['imageURL'] = ""
    648            
    649         if imageDir and archivePath:
    650             #print "image: ", imageDir, " archivepath: ", archivePath
    651             imageDir = os.path.join(archivePath, imageDir)
    652             imageDir = imageDir.replace("/mpiwg/online", '')
    653             docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
    654             docinfo['imagePath'] = imageDir
    655            
    656             docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
    657            
    658         viewerUrl = texttool.get('digiliburlprefix', None)
    659         if viewerUrl:
    660             docinfo['viewerURL'] = viewerUrl
    661        
    662         # old style text URL
    663         textUrl = texttool.get('text', None)
    664         if textUrl:
    665             if urlparse.urlparse(textUrl)[0] == "": #keine url
    666                 textUrl = os.path.join(archivePath, textUrl)
    667             # fix URLs starting with /mpiwg/online
    668             if textUrl.startswith("/mpiwg/online"):
    669                 textUrl = textUrl.replace("/mpiwg/online", '', 1)
    670            
    671             docinfo['textURL'] = textUrl
    672    
    673         # new style text-url-path
    674         textUrl = texttool.get('text-url-path', None)
    675         if textUrl:
    676             docinfo['textURLPath'] = textUrl
    677             textUrlkurz = string.split(textUrl, ".")[0]
    678             docinfo['textURLPathkurz'] = textUrlkurz
    679             #if not docinfo['imagePath']:
    680                 # text-only, no page images
    681                 #docinfo = self.getNumTextPages(docinfo)
    682                  
    683         # get bib info
    684         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
    685         # TODO: is this needed here?
    686         docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
    687        
    688         # TODO: what to do with presentation?
    689         presentationUrl = texttool.get('presentation', None)
    690         if presentationUrl: # ueberschreibe diese durch presentation informationen
    691              # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
    692              # durch den relativen Pfad auf die presentation infos
    693             presentationPath = presentationUrl
    694             if url.endswith("index.meta"):
    695                 presentationUrl = url.replace('index.meta', presentationPath)
    696             else:
    697                 presentationUrl = url + "/" + presentationPath
    698                
    699             docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
    700    
    701         # get authorization
    702         docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info
    703        
    704         return docinfo
    705    
    706    
    707     def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
    708         """gets the bibliographical information from the preseantion entry in texttools
    709         """
    710         dom=self.getPresentationInfoXML(url)
    711         docinfo['author']=getText(dom.find(".//author"))
    712         docinfo['title']=getText(dom.find(".//title"))
    713         docinfo['year']=getText(dom.find(".//date"))
    714         return docinfo
    715    
    716     def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
    717         """path ist the path to the images it assumes that the index.meta file is one level higher."""
    718         logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
    719         if docinfo is None:
    720             docinfo = {}
    721         path=path.replace("/mpiwg/online","")
    722         docinfo['imagePath'] = path
    723         docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
    724        
    725         pathorig=path
    726         for x in range(cut):       
    727                 path=getParentPath(path)
    728         logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
    729         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
    730         docinfo['imageURL'] = imageUrl
    731        
    732         #TODO: use getDocinfoFromIndexMeta
    733         #path ist the path to the images it assumes that the index.meta file is one level higher.
    734         docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
    735         docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
    736         return docinfo
    737    
    738    
    739     def OLDgetDocinfo(self, mode, url):
    740         """returns docinfo depending on mode"""
    741         logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
    742         # look for cached docinfo in session
    743         if self.REQUEST.SESSION.has_key('docinfo'):
    744             docinfo = self.REQUEST.SESSION['docinfo']
    745             # check if its still current
    746             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
    747                 logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())
    748                 return docinfo
    749            
    750         # new docinfo
    751         docinfo = {'mode': mode, 'url': url}
    752         # add self url
    753         docinfo['viewerUrl'] = self.getDocumentViewerURL()
    754         if mode=="texttool":
    755             # index.meta with texttool information
    756             docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
    757         elif mode=="imagepath":
    758             # folder with images, index.meta optional
    759             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
    760         elif mode=="filepath":
    761             # filename
    762             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
    763         else:
    764             logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
    765             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
    766                
    767         logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
    768         #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
    769         # store in session
    770         self.REQUEST.SESSION['docinfo'] = docinfo
    771         return docinfo
    772420
    773421
     
    830478            if bib:
    831479                docinfo = self.getDocinfoFromBib(docinfo, bib)
     480            else:
     481                # no bib - try info.xml
     482                docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
    832483               
    833484            # auth info
     
    839490        if mode != 'texttool':
    840491            # override image path from texttool
    841             docinfo['imagePath'] = url
     492            docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
    842493
    843494        # number of images from digilib
     
    870521            if not docUrl.startswith('http:'):
    871522                docPath = docUrl
    872                
     523        if docPath:
     524            # fix URLs starting with /mpiwg/online
     525            docPath = docPath.replace('/mpiwg/online', '', 1)
     526
    873527        docinfo['documentPath'] = docPath
    874528        return docinfo
     
    890544            if urlparse.urlparse(textUrl)[0] == "": #keine url
    891545                textUrl = os.path.join(docPath, textUrl)
    892                 # fix URLs starting with /mpiwg/online
    893                 textUrl = textUrl.replace('/mpiwg/online', '', 1)
    894546           
    895547            docinfo['textURL'] = textUrl
     
    906558        presentation = texttool.get('presentation', None)
    907559        if presentation and docPath:
    908             docinfo['presentationPath'] = os.path.join(docPath, presentation)
     560            if presentation.startswith('http:'):
     561                docinfo['presentationUrl'] = presentation
     562            else:
     563                docinfo['presentationUrl'] = os.path.join(docPath, presentation)
    909564           
    910565        return docinfo
     
    912567    def getDocinfoFromBib(self, docinfo, bib):
    913568        """reads contents of bib element into docinfo"""
     569        logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
    914570        # put all raw bib fields in dict "bib"
    915571        docinfo['bib'] = bib
     
    926582        """reads contents of access element into docinfo"""
    927583        #TODO: also read resource type
     584        logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
    928585        try:
    929             acctype = accc['@attr']['type']
     586            acctype = acc['@attr']['type']
    930587            if acctype:
    931588                access=acctype
     
    960617           
    961618           
     619    def getDocinfoFromPresentationInfoXml(self,docinfo):
     620        """gets DC-like bibliographical information from the presentation entry in texttools"""
     621        url = docinfo.get('presentationUrl', None)
     622        if not url:
     623            logging.error("getDocinfoFromPresentation: no URL!")
     624            return docinfo
     625       
     626        dom = None
     627        metaUrl = None
     628        if url.startswith("http://"):
     629            # real URL
     630            metaUrl = url
     631        else:
     632            # online path
     633           
     634            server=self.digilibBaseUrl+"/servlet/Texter?fn="
     635            metaUrl=server+url
     636       
     637        txt=getHttpData(metaUrl)
     638        if txt is None:
     639            logging.error("Unable to read info.xml from %s"%(url))
     640            return docinfo
     641           
     642        dom = ET.fromstring(txt)
     643        docinfo['creator']=getText(dom.find(".//author"))
     644        docinfo['title']=getText(dom.find(".//title"))
     645        docinfo['date']=getText(dom.find(".//date"))
     646        return docinfo
     647   
     648
    962649    def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
    963650        """returns pageinfo with the given parameters"""
Note: See TracChangeset for help on using the changeset viewer.