Diff for /documentViewer/documentViewer.py between versions 1.175.2.11 and 1.175.2.12

version 1.175.2.11, 2011/07/29 16:27:24 version 1.175.2.12, 2011/07/29 18:36:04
Line 264  class documentViewer(Folder): Line 264  class documentViewer(Folder):
                                 
         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)          return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
       
     
     security.declareProtected('View','index_html')      security.declareProtected('View','index_html')
     def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):      def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
         """          """
Line 293  class documentViewer(Folder): Line 294  class documentViewer(Folder):
         # auto viewMode: text_dict if text else images          # auto viewMode: text_dict if text else images
         if viewMode=="auto":           if viewMode=="auto": 
             if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):               if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 
                 #texturl gesetzt und textViewer konfiguriert  
                 viewMode="text_dict"                  viewMode="text_dict"
             else:              else:
                 viewMode="images"                  viewMode="images"
Line 389  class documentViewer(Folder): Line 389  class documentViewer(Folder):
           
     def getInfo_xml(self,url,mode):      def getInfo_xml(self,url,mode):
         """returns info about the document as XML"""          """returns info about the document as XML"""
   
         if not self.digilibBaseUrl:          if not self.digilibBaseUrl:
             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"              self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
                   
Line 397  class documentViewer(Folder): Line 396  class documentViewer(Folder):
         pt = getattr(self.template, 'info_xml')          pt = getattr(self.template, 'info_xml')
         return pt(docinfo=docinfo)          return pt(docinfo=docinfo)
   
     def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):  
         """returns new option state"""  
         if not self.REQUEST.SESSION.has_key(optionName):  
             # not in session -- initial  
             opt = {'lastState': newState, 'state': initialState}  
         else:  
             opt = self.REQUEST.SESSION.get(optionName)  
             if opt['lastState'] != newState:  
                 # state in session has changed -- toggle  
                 opt['state'] = not opt['state']  
                 opt['lastState'] = newState  
           
         self.REQUEST.SESSION[optionName] = opt  
         return opt['state']  
       
     def isAccessible(self, docinfo):      def isAccessible(self, docinfo):
         """returns if access to the resource is granted"""          """returns if access to the resource is granted"""
         access = docinfo.get('accessType', None)          access = docinfo.get('accessType', None)
         logging.debug("documentViewer (accessOK) access type %s"%access)          logging.debug("documentViewer (accessOK) access type %s"%access)
         if access is not None and access == 'free':          if access == 'free':
             logging.debug("documentViewer (accessOK) access is free")              logging.debug("documentViewer (accessOK) access is free")
             return True              return True
           
         elif access is None or access in self.authgroups:          elif access is None or access in self.authgroups:
             # only local access -- only logged in users              # only local access -- only logged in users
             user = getSecurityManager().getUser()              user = getSecurityManager().getUser()
Line 433  class documentViewer(Folder): Line 418  class documentViewer(Folder):
         return False          return False
           
                                   
     def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):  
         """gibt param von dlInfo aus"""  
         if docinfo is None:  
             docinfo = {}  
           
         for x in range(cut):  
             path=getParentPath(path)  
          
         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path  
       
         logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))  
           
         txt = getHttpData(infoUrl)  
         if txt is None:  
             raise IOError("Unable to get dir-info from %s"%(infoUrl))  
   
         dom = ET.fromstring(txt)  
         #dom = Parse(txt)  
         size=getText(dom.find("size"))  
         #sizes=dom.xpath("//dir/size")  
         logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)  
           
         if size:  
             docinfo['numPages'] = int(size)  
         else:  
             docinfo['numPages'] = 0  
               
         # TODO: produce and keep list of image names and numbers  
                           
         return docinfo  
       
     def getIndexMetaPath(self,url):  
         """gib nur den Pfad zurueck"""  
         regexp = re.compile(r".*(experimental|permanent)/(.*)")  
         regpath = regexp.match(url)  
         if (regpath==None):  
             return ""  
         logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))              
         return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))  
        
       
       
     def getIndexMetaUrl(self,url):  
         """returns utr  of index.meta document at url"""  
         
         metaUrl = None  
         if url.startswith("http://"):  
             # real URL  
             metaUrl = url  
         else:  
             # online path  
             server=self.digilibBaseUrl+"/servlet/Texter?fn="  
             metaUrl=server+url.replace("/mpiwg/online","")  
             if not metaUrl.endswith("index.meta"):  
                 metaUrl += "/index.meta"  
           
         return metaUrl  
       
     def getDomFromIndexMeta(self, url):  
         """get dom from index meta"""  
         dom = None  
         metaUrl = self.getIndexMetaUrl(url)  
                   
         logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)  
         txt=getHttpData(metaUrl)  
         if txt is None:  
             raise IOError("Unable to read index meta from %s"%(url))  
           
         dom = ET.fromstring(txt)  
         #dom = Parse(txt)  
         return dom  
       
     def getPresentationInfoXML(self, url):  
         """returns dom of info.xml document at url"""  
         dom = None  
         metaUrl = None  
         if url.startswith("http://"):  
             # real URL  
             metaUrl = url  
         else:  
             # online path  
             server=self.digilibBaseUrl+"/servlet/Texter?fn="  
             metaUrl=server+url.replace("/mpiwg/online","")  
           
         txt=getHttpData(metaUrl)  
         if txt is None:  
             raise IOError("Unable to read infoXMLfrom %s"%(url))  
               
         dom = ET.fromstring(txt)  
         #dom = Parse(txt)  
         return dom  
                           
           
     def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):  
         """gets authorization info from the index.meta file at path or given by dom"""  
         logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))  
           
         access = None  
           
         if docinfo is None:  
             docinfo = {}  
               
         if dom is None:  
             for x in range(cut):  
                 path=getParentPath(path)  
             dom = self.getDomFromIndexMeta(path)  
          
         acc = dom.find(".//access-conditions/access")  
         if acc is not None:  
             acctype = acc.get('type')  
             #acctype = dom.xpath("//access-conditions/access/@type")  
             if acctype:  
                 access=acctype  
                 if access in ['group', 'institution']:  
                     access = dom.find(".//access-conditions/access/name").text.lower()  
               
         docinfo['accessType'] = access  
         return docinfo  
       
           
     def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):  
         """gets bibliographical info from the index.meta file at path or given by dom"""  
         logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))  
           
         if docinfo is None:  
             docinfo = {}  
           
         if dom is None:  
             for x in range(cut):  
                 path=getParentDir(path)  
             dom = self.getDomFromIndexMeta(path)  
           
         docinfo['indexMetaPath']=self.getIndexMetaPath(path);  
           
         logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))  
         if self.metadataService is not None:  
             # put all raw bib fields in dict "bib"  
             bib = self.metadataService.getBibData(dom=dom)  
             docinfo['bib'] = bib  
             bibtype = bib.get('@type', None)  
             docinfo['bib_type'] = bibtype  
             # also store DC metadata for convenience  
             dc = self.metadataService.getDCMappedData(bib)  
             docinfo['creator'] = dc.get('creator',None)  
             docinfo['title'] = dc.get('title',None)  
             docinfo['date'] = dc.get('date',None)  
         else:  
             logging.error("MetadataService not found!")  
         return docinfo  
       
       
     # TODO: is this needed?  
     def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):  
         """gets name info from the index.meta file at path or given by dom"""  
         if docinfo is None:  
             docinfo = {}  
           
         if dom is None:  
             for x in range(cut):  
                 path=getParentPath(path)  
             dom = self.getDomFromIndexMeta(path)  
   
         docinfo['name']=getText(dom.find("name"))  
         logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])  
         return docinfo  
   
       
     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):  
         """parse texttool tag in index meta"""  
         logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))  
         if docinfo is None:  
            docinfo = {}  
         if docinfo.get('lang', None) is None:  
             docinfo['lang'] = '' # default keine Sprache gesetzt  
         if dom is None:  
             dom = self.getDomFromIndexMeta(url)  
               
         texttool = self.metadata.getTexttoolData(dom=dom)  
           
         archivePath = None  
         archiveName = None  
       
         archiveName = getText(dom.find("name"))  
         if not archiveName:  
             logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))  
           
         archivePath = getText(dom.find("archive-path"))  
         if archivePath:  
             # clean up archive path  
             if archivePath[0] != '/':  
                 archivePath = '/' + archivePath  
             if archiveName and (not archivePath.endswith(archiveName)):  
                 archivePath += "/" + archiveName  
         else:  
             # try to get archive-path from url  
             logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))  
             if (not url.startswith('http')):  
                 archivePath = url.replace('index.meta', '')  
                   
         if archivePath is None:  
             # we balk without archive-path  
             raise IOError("Missing archive-path (for text-tool) in %s" % (url))  
           
         imageDir = texttool.get('image', None)  
               
         if not imageDir:  
             # we balk with no image tag / not necessary anymore because textmode is now standard  
             #raise IOError("No text-tool info in %s"%(url))  
             imageDir = ""  
             #xquery="//pb"    
             docinfo['imagePath'] = "" # keine Bilder  
             docinfo['imageURL'] = ""  
               
         if imageDir and archivePath:  
             #print "image: ", imageDir, " archivepath: ", archivePath  
             imageDir = os.path.join(archivePath, imageDir)  
             imageDir = imageDir.replace("/mpiwg/online", '')  
             docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)  
             docinfo['imagePath'] = imageDir  
               
             docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir  
               
         viewerUrl = texttool.get('digiliburlprefix', None)  
         if viewerUrl:  
             docinfo['viewerURL'] = viewerUrl  
           
         # old style text URL  
         textUrl = texttool.get('text', None)  
         if textUrl:  
             if urlparse.urlparse(textUrl)[0] == "": #keine url  
                 textUrl = os.path.join(archivePath, textUrl)   
             # fix URLs starting with /mpiwg/online  
             if textUrl.startswith("/mpiwg/online"):  
                 textUrl = textUrl.replace("/mpiwg/online", '', 1)  
               
             docinfo['textURL'] = textUrl  
       
         # new style text-url-path  
         textUrl = texttool.get('text-url-path', None)  
         if textUrl:  
             docinfo['textURLPath'] = textUrl  
             textUrlkurz = string.split(textUrl, ".")[0]  
             docinfo['textURLPathkurz'] = textUrlkurz  
             #if not docinfo['imagePath']:  
                 # text-only, no page images  
                 #docinfo = self.getNumTextPages(docinfo)  
                     
         # get bib info  
         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag  
         # TODO: is this needed here?  
         docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)  
           
         # TODO: what to do with presentation?  
         presentationUrl = texttool.get('presentation', None)  
         if presentationUrl: # ueberschreibe diese durch presentation informationen   
              # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten  
              # durch den relativen Pfad auf die presentation infos  
             presentationPath = presentationUrl  
             if url.endswith("index.meta"):   
                 presentationUrl = url.replace('index.meta', presentationPath)  
             else:  
                 presentationUrl = url + "/" + presentationPath  
                   
             docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)  
       
         # get authorization  
         docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info  
           
         return docinfo  
      
      
     def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):  
         """gets the bibliographical information from the preseantion entry in texttools  
         """  
         dom=self.getPresentationInfoXML(url)  
         docinfo['author']=getText(dom.find(".//author"))  
         docinfo['title']=getText(dom.find(".//title"))  
         docinfo['year']=getText(dom.find(".//date"))  
         return docinfo  
       
     def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):  
         """path ist the path to the images it assumes that the index.meta file is one level higher."""  
         logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))  
         if docinfo is None:  
             docinfo = {}  
         path=path.replace("/mpiwg/online","")  
         docinfo['imagePath'] = path  
         docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)  
           
         pathorig=path  
         for x in range(cut):         
                 path=getParentPath(path)  
         logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)  
         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path  
         docinfo['imageURL'] = imageUrl  
           
         #TODO: use getDocinfoFromIndexMeta  
         #path ist the path to the images it assumes that the index.meta file is one level higher.  
         docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)  
         docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)  
         return docinfo  
       
       
     def OLDgetDocinfo(self, mode, url):  
         """returns docinfo depending on mode"""  
         logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))  
         # look for cached docinfo in session  
         if self.REQUEST.SESSION.has_key('docinfo'):  
             docinfo = self.REQUEST.SESSION['docinfo']  
             # check if its still current  
             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:  
                 logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())  
                 return docinfo  
               
         # new docinfo  
         docinfo = {'mode': mode, 'url': url}  
         # add self url  
         docinfo['viewerUrl'] = self.getDocumentViewerURL()  
         if mode=="texttool":   
             # index.meta with texttool information  
             docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)  
         elif mode=="imagepath":  
             # folder with images, index.meta optional  
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)  
         elif mode=="filepath":  
             # filename  
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)  
         else:  
             logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)  
             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))  
                   
         logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())  
         #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)  
         # store in session  
         self.REQUEST.SESSION['docinfo'] = docinfo  
         return docinfo  
   
   
     def getDocinfo(self, mode, url):      def getDocinfo(self, mode, url):
         """returns docinfo depending on mode"""          """returns docinfo depending on mode"""
Line 829  class documentViewer(Folder): Line 477  class documentViewer(Folder):
             bib = self.metadataService.getBibData(dom=metaDom)              bib = self.metadataService.getBibData(dom=metaDom)
             if bib:              if bib:
                 docinfo = self.getDocinfoFromBib(docinfo, bib)                  docinfo = self.getDocinfoFromBib(docinfo, bib)
               else:
                   # no bib - try info.xml
                   docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
                                   
             # auth info              # auth info
             access = self.metadataService.getAccessData(dom=metaDom)              access = self.metadataService.getAccessData(dom=metaDom)
Line 838  class documentViewer(Folder): Line 489  class documentViewer(Folder):
         # image path          # image path
         if mode != 'texttool':          if mode != 'texttool':
             # override image path from texttool              # override image path from texttool
             docinfo['imagePath'] = url              docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
   
         # number of images from digilib          # number of images from digilib
         if docinfo.get('imagePath', None):          if docinfo.get('imagePath', None):
Line 869  class documentViewer(Folder): Line 520  class documentViewer(Folder):
             docUrl = docinfo['documentURL']              docUrl = docinfo['documentURL']
             if not docUrl.startswith('http:'):              if not docUrl.startswith('http:'):
                 docPath = docUrl                  docPath = docUrl
           if docPath:
               # fix URLs starting with /mpiwg/online
               docPath = docPath.replace('/mpiwg/online', '', 1)
                                   
         docinfo['documentPath'] = docPath          docinfo['documentPath'] = docPath
         return docinfo          return docinfo
Line 889  class documentViewer(Folder): Line 543  class documentViewer(Folder):
         if textUrl and docPath:          if textUrl and docPath:
             if urlparse.urlparse(textUrl)[0] == "": #keine url              if urlparse.urlparse(textUrl)[0] == "": #keine url
                 textUrl = os.path.join(docPath, textUrl)                   textUrl = os.path.join(docPath, textUrl) 
                 # fix URLs starting with /mpiwg/online  
                 textUrl = textUrl.replace('/mpiwg/online', '', 1)  
                           
             docinfo['textURL'] = textUrl              docinfo['textURL'] = textUrl
           
Line 905  class documentViewer(Folder): Line 557  class documentViewer(Folder):
         # old presentation stuff          # old presentation stuff
         presentation = texttool.get('presentation', None)          presentation = texttool.get('presentation', None)
         if presentation and docPath:          if presentation and docPath:
             docinfo['presentationPath'] = os.path.join(docPath, presentation)              if presentation.startswith('http:'):
                   docinfo['presentationUrl'] = presentation
               else:
                   docinfo['presentationUrl'] = os.path.join(docPath, presentation)
                           
         return docinfo          return docinfo
   
     def getDocinfoFromBib(self, docinfo, bib):      def getDocinfoFromBib(self, docinfo, bib):
         """reads contents of bib element into docinfo"""          """reads contents of bib element into docinfo"""
           logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
         # put all raw bib fields in dict "bib"          # put all raw bib fields in dict "bib"
         docinfo['bib'] = bib          docinfo['bib'] = bib
         bibtype = bib.get('@type', None)          bibtype = bib.get('@type', None)
Line 925  class documentViewer(Folder): Line 581  class documentViewer(Folder):
     def getDocinfoFromAccess(self, docinfo, acc):      def getDocinfoFromAccess(self, docinfo, acc):
         """reads contents of access element into docinfo"""          """reads contents of access element into docinfo"""
         #TODO: also read resource type          #TODO: also read resource type
           logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
         try:          try:
             acctype = accc['@attr']['type']              acctype = acc['@attr']['type']
             if acctype:              if acctype:
                 access=acctype                  access=acctype
                 if access in ['group', 'institution']:                  if access in ['group', 'institution']:
Line 959  class documentViewer(Folder): Line 616  class documentViewer(Folder):
         return docinfo          return docinfo
                           
                           
       def getDocinfoFromPresentationInfoXml(self,docinfo):
           """gets DC-like bibliographical information from the presentation entry in texttools"""
           url = docinfo.get('presentationUrl', None)
           if not url:
               logging.error("getDocinfoFromPresentation: no URL!")
               return docinfo
           
           dom = None
           metaUrl = None
           if url.startswith("http://"):
               # real URL
               metaUrl = url
           else:
               # online path
               
               server=self.digilibBaseUrl+"/servlet/Texter?fn="
               metaUrl=server+url
           
           txt=getHttpData(metaUrl)
           if txt is None:
               logging.error("Unable to read info.xml from %s"%(url))
               return docinfo
               
           dom = ET.fromstring(txt)
           docinfo['creator']=getText(dom.find(".//author"))
           docinfo['title']=getText(dom.find(".//title"))
           docinfo['date']=getText(dom.find(".//date"))
           return docinfo
       
   
     def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):      def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
         """returns pageinfo with the given parameters"""          """returns pageinfo with the given parameters"""
         pageinfo = {}          pageinfo = {}

Removed from v.1.175.2.11  
changed lines
  Added in v.1.175.2.12


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>