documentViewer/documentViewer.py - diff

Return to documentViewer.py CVS log

Up to [Repository] / documentViewer

Diff for /documentViewer/documentViewer.py between versions 1.175.2.11 and 1.175.2.12

-version 1.175.2.11, 2011/07/29 16:27:24
+version 1.175.2.12, 2011/07/29 18:36:04
  Line 264  class documentViewer(Folder):
          return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
      security.declareProtected('View','index_html')
      def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
          """
- Line 293  class documentViewer(Folder):
+ Line 294  class documentViewer(Folder):
          # auto viewMode: text_dict if text else images
          if viewMode=="auto":
              if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
-                 #texturl gesetzt und textViewer konfiguriert
                  viewMode="text_dict"
              else:
                  viewMode="images"
  Line 389  class documentViewer(Folder):
      def getInfo_xml(self,url,mode):
          """returns info about the document as XML"""
          if not self.digilibBaseUrl:
              self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
- Line 397  class documentViewer(Folder):
+ Line 396  class documentViewer(Folder):
          pt = getattr(self.template, 'info_xml')
          return pt(docinfo=docinfo)
-     def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
-         """returns new option state"""
-         if not self.REQUEST.SESSION.has_key(optionName):
-             # not in session -- initial
-             opt = {'lastState': newState, 'state': initialState}
-         else:
-             opt = self.REQUEST.SESSION.get(optionName)
-             if opt['lastState'] != newState:
-                 # state in session has changed -- toggle
-                 opt['state'] = not opt['state']
-                 opt['lastState'] = newState
-         self.REQUEST.SESSION[optionName] = opt
-         return opt['state']
      def isAccessible(self, docinfo):
          """returns if access to the resource is granted"""
          access = docinfo.get('accessType', None)
          logging.debug("documentViewer (accessOK) access type %s"%access)
-         if access is not None and access == 'free':
+         if access == 'free':
              logging.debug("documentViewer (accessOK) access is free")
              return True
          elif access is None or access in self.authgroups:
              # only local access -- only logged in users
              user = getSecurityManager().getUser()
- Line 433  class documentViewer(Folder):
+ Line 418  class documentViewer(Folder):
          return False
-     def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
-         """gibt param von dlInfo aus"""
-         if docinfo is None:
-             docinfo = {}
-         for x in range(cut):
-             path=getParentPath(path)
-         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
-         logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
-         txt = getHttpData(infoUrl)
-         if txt is None:
-             raise IOError("Unable to get dir-info from %s"%(infoUrl))
-         dom = ET.fromstring(txt)
-         #dom = Parse(txt)
-         size=getText(dom.find("size"))
-         #sizes=dom.xpath("//dir/size")
-         logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)
-         if size:
-             docinfo['numPages'] = int(size)
-         else:
-             docinfo['numPages'] = 0
-         # TODO: produce and keep list of image names and numbers
-         return docinfo
-     def getIndexMetaPath(self,url):
-         """gib nur den Pfad zurueck"""
-         regexp = re.compile(r".*(experimental|permanent)/(.*)")
-         regpath = regexp.match(url)
-         if (regpath==None):
-             return ""
-         logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
-         return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
-     def getIndexMetaUrl(self,url):
-         """returns utr  of index.meta document at url"""
-         metaUrl = None
-         if url.startswith("http://"):
-             # real URL
-             metaUrl = url
-         else:
-             # online path
-             server=self.digilibBaseUrl+"/servlet/Texter?fn="
-             metaUrl=server+url.replace("/mpiwg/online","")
-             if not metaUrl.endswith("index.meta"):
-                 metaUrl += "/index.meta"
-         return metaUrl
-     def getDomFromIndexMeta(self, url):
-         """get dom from index meta"""
-         dom = None
-         metaUrl = self.getIndexMetaUrl(url)
-         logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
-         txt=getHttpData(metaUrl)
-         if txt is None:
-             raise IOError("Unable to read index meta from %s"%(url))
-         dom = ET.fromstring(txt)
-         #dom = Parse(txt)
-         return dom
-     def getPresentationInfoXML(self, url):
-         """returns dom of info.xml document at url"""
-         dom = None
-         metaUrl = None
-         if url.startswith("http://"):
-             # real URL
-             metaUrl = url
-         else:
-             # online path
-             server=self.digilibBaseUrl+"/servlet/Texter?fn="
-             metaUrl=server+url.replace("/mpiwg/online","")
-         txt=getHttpData(metaUrl)
-         if txt is None:
-             raise IOError("Unable to read infoXMLfrom %s"%(url))
-         dom = ET.fromstring(txt)
-         #dom = Parse(txt)
-         return dom
-     def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
-         """gets authorization info from the index.meta file at path or given by dom"""
-         logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
-         access = None
-         if docinfo is None:
-             docinfo = {}
-         if dom is None:
-             for x in range(cut):
-                 path=getParentPath(path)
-             dom = self.getDomFromIndexMeta(path)
-         acc = dom.find(".//access-conditions/access")
-         if acc is not None:
-             acctype = acc.get('type')
-             #acctype = dom.xpath("//access-conditions/access/@type")
-             if acctype:
-                 access=acctype
-                 if access in ['group', 'institution']:
-                     access = dom.find(".//access-conditions/access/name").text.lower()
-         docinfo['accessType'] = access
-         return docinfo
-     def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
-         """gets bibliographical info from the index.meta file at path or given by dom"""
-         logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
-         if docinfo is None:
-             docinfo = {}
-         if dom is None:
-             for x in range(cut):
-                 path=getParentDir(path)
-             dom = self.getDomFromIndexMeta(path)
-         docinfo['indexMetaPath']=self.getIndexMetaPath(path);
-         logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
-         if self.metadataService is not None:
-             # put all raw bib fields in dict "bib"
-             bib = self.metadataService.getBibData(dom=dom)
-             docinfo['bib'] = bib
-             bibtype = bib.get('@type', None)
-             docinfo['bib_type'] = bibtype
-             # also store DC metadata for convenience
-             dc = self.metadataService.getDCMappedData(bib)
-             docinfo['creator'] = dc.get('creator',None)
-             docinfo['title'] = dc.get('title',None)
-             docinfo['date'] = dc.get('date',None)
-         else:
-             logging.error("MetadataService not found!")
-         return docinfo
-     # TODO: is this needed?
-     def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
-         """gets name info from the index.meta file at path or given by dom"""
-         if docinfo is None:
-             docinfo = {}
-         if dom is None:
-             for x in range(cut):
-                 path=getParentPath(path)
-             dom = self.getDomFromIndexMeta(path)
-         docinfo['name']=getText(dom.find("name"))
-         logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
-         return docinfo
-     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
-         """parse texttool tag in index meta"""
-         logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
-         if docinfo is None:
-            docinfo = {}
-         if docinfo.get('lang', None) is None:
-             docinfo['lang'] = '' # default keine Sprache gesetzt
-         if dom is None:
-             dom = self.getDomFromIndexMeta(url)
-         texttool = self.metadata.getTexttoolData(dom=dom)
-         archivePath = None
-         archiveName = None
-         archiveName = getText(dom.find("name"))
-         if not archiveName:
-             logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
-         archivePath = getText(dom.find("archive-path"))
-         if archivePath:
-             # clean up archive path
-             if archivePath[0] != '/':
-                 archivePath = '/' + archivePath
-             if archiveName and (not archivePath.endswith(archiveName)):
-                 archivePath += "/" + archiveName
-         else:
-             # try to get archive-path from url
-             logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
-             if (not url.startswith('http')):
-                 archivePath = url.replace('index.meta', '')
-         if archivePath is None:
-             # we balk without archive-path
-             raise IOError("Missing archive-path (for text-tool) in %s" % (url))
-         imageDir = texttool.get('image', None)
-         if not imageDir:
-             # we balk with no image tag / not necessary anymore because textmode is now standard
-             #raise IOError("No text-tool info in %s"%(url))
-             imageDir = ""
-             #xquery="//pb"
-             docinfo['imagePath'] = "" # keine Bilder
-             docinfo['imageURL'] = ""
-         if imageDir and archivePath:
-             #print "image: ", imageDir, " archivepath: ", archivePath
-             imageDir = os.path.join(archivePath, imageDir)
-             imageDir = imageDir.replace("/mpiwg/online", '')
-             docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
-             docinfo['imagePath'] = imageDir
-             docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
-         viewerUrl = texttool.get('digiliburlprefix', None)
-         if viewerUrl:
-             docinfo['viewerURL'] = viewerUrl
-         # old style text URL
-         textUrl = texttool.get('text', None)
-         if textUrl:
-             if urlparse.urlparse(textUrl)[0] == "": #keine url
-                 textUrl = os.path.join(archivePath, textUrl)
-             # fix URLs starting with /mpiwg/online
-             if textUrl.startswith("/mpiwg/online"):
-                 textUrl = textUrl.replace("/mpiwg/online", '', 1)
-             docinfo['textURL'] = textUrl
-         # new style text-url-path
-         textUrl = texttool.get('text-url-path', None)
-         if textUrl:
-             docinfo['textURLPath'] = textUrl
-             textUrlkurz = string.split(textUrl, ".")[0]
-             docinfo['textURLPathkurz'] = textUrlkurz
-             #if not docinfo['imagePath']:
-                 # text-only, no page images
-                 #docinfo = self.getNumTextPages(docinfo)
-         # get bib info
-         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
-         # TODO: is this needed here?
-         docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
-         # TODO: what to do with presentation?
-         presentationUrl = texttool.get('presentation', None)
-         if presentationUrl: # ueberschreibe diese durch presentation informationen
-              # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
-              # durch den relativen Pfad auf die presentation infos
-             presentationPath = presentationUrl
-             if url.endswith("index.meta"):
-                 presentationUrl = url.replace('index.meta', presentationPath)
-             else:
-                 presentationUrl = url + "/" + presentationPath
-             docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
-         # get authorization
-         docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info
-         return docinfo
-     def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
-         """gets the bibliographical information from the preseantion entry in texttools
-         """
-         dom=self.getPresentationInfoXML(url)
-         docinfo['author']=getText(dom.find(".//author"))
-         docinfo['title']=getText(dom.find(".//title"))
-         docinfo['year']=getText(dom.find(".//date"))
-         return docinfo
-     def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
-         """path ist the path to the images it assumes that the index.meta file is one level higher."""
-         logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
-         if docinfo is None:
-             docinfo = {}
-         path=path.replace("/mpiwg/online","")
-         docinfo['imagePath'] = path
-         docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
-         pathorig=path
-         for x in range(cut):
-                 path=getParentPath(path)
-         logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
-         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
-         docinfo['imageURL'] = imageUrl
-         #TODO: use getDocinfoFromIndexMeta
-         #path ist the path to the images it assumes that the index.meta file is one level higher.
-         docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
-         docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
-         return docinfo
-     def OLDgetDocinfo(self, mode, url):
-         """returns docinfo depending on mode"""
-         logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
-         # look for cached docinfo in session
-         if self.REQUEST.SESSION.has_key('docinfo'):
-             docinfo = self.REQUEST.SESSION['docinfo']
-             # check if its still current
-             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
-                 logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())
-                 return docinfo
-         # new docinfo
-         docinfo = {'mode': mode, 'url': url}
-         # add self url
-         docinfo['viewerUrl'] = self.getDocumentViewerURL()
-         if mode=="texttool":
-             # index.meta with texttool information
-             docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
-         elif mode=="imagepath":
-             # folder with images, index.meta optional
-             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
-         elif mode=="filepath":
-             # filename
-             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
-         else:
-             logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
-             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
-         logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
-         #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
-         # store in session
-         self.REQUEST.SESSION['docinfo'] = docinfo
-         return docinfo
      def getDocinfo(self, mode, url):
          """returns docinfo depending on mode"""
- Line 829  class documentViewer(Folder):
+ Line 477  class documentViewer(Folder):
              bib = self.metadataService.getBibData(dom=metaDom)
              if bib:
                  docinfo = self.getDocinfoFromBib(docinfo, bib)
+             else:
+                 # no bib - try info.xml
+                 docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
              # auth info
              access = self.metadataService.getAccessData(dom=metaDom)
- Line 838  class documentViewer(Folder):
+ Line 489  class documentViewer(Folder):
          # image path
          if mode != 'texttool':
              # override image path from texttool
-             docinfo['imagePath'] = url
+             docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
          # number of images from digilib
          if docinfo.get('imagePath', None):
- Line 869  class documentViewer(Folder):
+ Line 520  class documentViewer(Folder):
              docUrl = docinfo['documentURL']
              if not docUrl.startswith('http:'):
                  docPath = docUrl
+         if docPath:
+             # fix URLs starting with /mpiwg/online
+             docPath = docPath.replace('/mpiwg/online', '', 1)
          docinfo['documentPath'] = docPath
          return docinfo
- Line 889  class documentViewer(Folder):
+ Line 543  class documentViewer(Folder):
          if textUrl and docPath:
              if urlparse.urlparse(textUrl)[0] == "": #keine url
                  textUrl = os.path.join(docPath, textUrl)
-                 # fix URLs starting with /mpiwg/online
-                 textUrl = textUrl.replace('/mpiwg/online', '', 1)
              docinfo['textURL'] = textUrl
- Line 905  class documentViewer(Folder):
+ Line 557  class documentViewer(Folder):
          # old presentation stuff
          presentation = texttool.get('presentation', None)
          if presentation and docPath:
-             docinfo['presentationPath'] = os.path.join(docPath, presentation)
+             if presentation.startswith('http:'):
+                 docinfo['presentationUrl'] = presentation
+             else:
+                 docinfo['presentationUrl'] = os.path.join(docPath, presentation)
          return docinfo
      def getDocinfoFromBib(self, docinfo, bib):
          """reads contents of bib element into docinfo"""
+         logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
          # put all raw bib fields in dict "bib"
          docinfo['bib'] = bib
          bibtype = bib.get('@type', None)
- Line 925  class documentViewer(Folder):
+ Line 581  class documentViewer(Folder):
      def getDocinfoFromAccess(self, docinfo, acc):
          """reads contents of access element into docinfo"""
          #TODO: also read resource type
+         logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
          try:
-             acctype = accc['@attr']['type']
+             acctype = acc['@attr']['type']
              if acctype:
                  access=acctype
                  if access in ['group', 'institution']:
- Line 959  class documentViewer(Folder):
+ Line 616  class documentViewer(Folder):
          return docinfo
+     def getDocinfoFromPresentationInfoXml(self,docinfo):
+         """gets DC-like bibliographical information from the presentation entry in texttools"""
+         url = docinfo.get('presentationUrl', None)
+         if not url:
+             logging.error("getDocinfoFromPresentation: no URL!")
+             return docinfo
+         dom = None
+         metaUrl = None
+         if url.startswith("http://"):
+             # real URL
+             metaUrl = url
+         else:
+             # online path
+             server=self.digilibBaseUrl+"/servlet/Texter?fn="
+             metaUrl=server+url
+         txt=getHttpData(metaUrl)
+         if txt is None:
+             logging.error("Unable to read info.xml from %s"%(url))
+             return docinfo
+         dom = ET.fromstring(txt)
+         docinfo['creator']=getText(dom.find(".//author"))
+         docinfo['title']=getText(dom.find(".//title"))
+         docinfo['date']=getText(dom.find(".//date"))
+         return docinfo
      def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
          """returns pageinfo with the given parameters"""
          pageinfo = {}

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.175.2.11
changed lines
	Added in v.1.175.2.12