Diff for /documentViewer/documentViewer.py between versions 1.175.2.9 and 1.175.2.15

version 1.175.2.9, 2011/07/27 19:09:44 version 1.175.2.15, 2011/08/02 16:29:15
Line 98  def browserCheck(self): Line 98  def browserCheck(self):
   
     return bt      return bt
   
 def getParentDir(path):  def getParentPath(path, cnt=1):
     """returns pathname shortened by one"""      """returns pathname shortened by cnt"""
     return '/'.join(path.split('/')[0:-1])      # make sure path doesn't end with /
               path = path.rstrip('/')
 def normalizeBibField(bt, underscore=True):      # split by /, shorten, and reassemble
     """returns normalised bib type for looking up mappings"""      return '/'.join(path.split('/')[0:-cnt])
     bt = bt.strip().replace(' ', '-').lower()  
     if underscore:  
         bt = bt.replace('_', '-')  
           
     return bt  
   
 def getBibdataFromDom(dom):  
     """returns dict with all elements from bib-tag"""  
     bibinfo = {}  
     bib = dom.find(".//meta/bib")  
     if bib is not None:  
         # put type in @type  
         type = bib.get('type')  
         bibinfo['@type'] = normalizeBibField(type)  
         # put all subelements in dict  
         for e in bib:  
             bibinfo[normalizeBibField(e.tag)] = getText(e)  
               
     return bibinfo  
   
   
 ##  ##
Line 137  class documentViewer(Folder): Line 118  class documentViewer(Folder):
         {'label':'main config','action':'changeDocumentViewerForm'},          {'label':'main config','action':'changeDocumentViewerForm'},
         )          )
   
       metadataService = None
       """MetaDataFolder instance"""
   
     # templates and forms      # templates and forms
     viewer_main = PageTemplateFile('zpt/viewer_main', globals())      viewer_main = PageTemplateFile('zpt/viewer_main', globals())
     toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())      toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
Line 155  class documentViewer(Folder): Line 139  class documentViewer(Folder):
           
           
     thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())      thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
     security.declareProtected('View management screens','changeDocumentViewerForm')      
     changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())  
   
           
     def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):      def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
Line 179  class documentViewer(Folder): Line 161  class documentViewer(Folder):
             templateFolder._setObject('fulltextclient',textServer)              templateFolder._setObject('fulltextclient',textServer)
         except Exception, e:          except Exception, e:
             logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))              logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
               
         try:          try:
             from Products.zogiLib.zogiLib import zogiLib              from Products.zogiLib.zogiLib import zogiLib
             zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")              zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
Line 187  class documentViewer(Folder): Line 170  class documentViewer(Folder):
         except Exception, e:          except Exception, e:
             logging.error("Unable to create zogiLib for zogilib: "+str(e))              logging.error("Unable to create zogiLib for zogilib: "+str(e))
                   
           try:
               # assume MetaDataFolder instance is called metadata 
               self.metadataService = getattr(self, 'metadata')
           except Exception, e:
               logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
               
                   
     # proxy text server methods to fulltextclient      # proxy text server methods to fulltextclient
     def getTextPage(self, **args):      def getTextPage(self, **args):
Line 275  class documentViewer(Folder): Line 264  class documentViewer(Folder):
                                 
         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)          return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
       
     
     security.declareProtected('View','index_html')      security.declareProtected('View','index_html')
     def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):      def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
         '''          """
         view it          view it
         @param mode: defines how to access the document behind url           @param mode: defines how to access the document behind url 
         @param url: url which contains display information          @param url: url which contains display information
         @param viewMode: if images display images, if text display text, default is auto (text,images or auto)          @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
         @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)          @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
         @param characterNormalization type of text display (reg, norm, none)          """
         @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)  
         '''  
                   
         logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
                   
Line 306  class documentViewer(Folder): Line 294  class documentViewer(Folder):
         # auto viewMode: text_dict if text else images          # auto viewMode: text_dict if text else images
         if viewMode=="auto":           if viewMode=="auto": 
             if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):               if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 
                 #texturl gesetzt und textViewer konfiguriert  
                 viewMode="text_dict"                  viewMode="text_dict"
             else:              else:
                 viewMode="images"                  viewMode="images"
Line 384  class documentViewer(Folder): Line 371  class documentViewer(Folder):
         # FIXME: does this belong here?          # FIXME: does this belong here?
         if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath          if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
                 urlParams["mode"] = "imagepath"                  urlParams["mode"] = "imagepath"
                 urlParams["url"] = getParentDir(urlParams["url"])                  urlParams["url"] = getParentPath(urlParams["url"])
                                   
         # quote values and assemble into query string (not escaping '/')          # quote values and assemble into query string (not escaping '/')
         ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])          ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
Line 402  class documentViewer(Folder): Line 389  class documentViewer(Folder):
           
     def getInfo_xml(self,url,mode):      def getInfo_xml(self,url,mode):
         """returns info about the document as XML"""          """returns info about the document as XML"""
   
         if not self.digilibBaseUrl:          if not self.digilibBaseUrl:
             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"              self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
                   
Line 410  class documentViewer(Folder): Line 396  class documentViewer(Folder):
         pt = getattr(self.template, 'info_xml')          pt = getattr(self.template, 'info_xml')
         return pt(docinfo=docinfo)          return pt(docinfo=docinfo)
   
     def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):  
         """returns new option state"""  
         if not self.REQUEST.SESSION.has_key(optionName):  
             # not in session -- initial  
             opt = {'lastState': newState, 'state': initialState}  
         else:  
             opt = self.REQUEST.SESSION.get(optionName)  
             if opt['lastState'] != newState:  
                 # state in session has changed -- toggle  
                 opt['state'] = not opt['state']  
                 opt['lastState'] = newState  
           
         self.REQUEST.SESSION[optionName] = opt  
         return opt['state']  
       
     def isAccessible(self, docinfo):      def isAccessible(self, docinfo):
         """returns if access to the resource is granted"""          """returns if access to the resource is granted"""
         access = docinfo.get('accessType', None)          access = docinfo.get('accessType', None)
         logging.debug("documentViewer (accessOK) access type %s"%access)          logging.debug("documentViewer (accessOK) access type %s"%access)
         if access is not None and access == 'free':          if access == 'free':
             logging.debug("documentViewer (accessOK) access is free")              logging.debug("documentViewer (accessOK) access is free")
             return True              return True
           
         elif access is None or access in self.authgroups:          elif access is None or access in self.authgroups:
             # only local access -- only logged in users              # only local access -- only logged in users
             user = getSecurityManager().getUser()              user = getSecurityManager().getUser()
Line 446  class documentViewer(Folder): Line 418  class documentViewer(Folder):
         return False          return False
           
                                   
     def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):  
         """gibt param von dlInfo aus"""  
         if docinfo is None:  
             docinfo = {}  
                   
         for x in range(cut):      def getDocinfo(self, mode, url):
             path=getParentDir(path)          """returns docinfo depending on mode"""
                  logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path          # look for cached docinfo in session
           if self.REQUEST.SESSION.has_key('docinfo'):
               docinfo = self.REQUEST.SESSION['docinfo']
               # check if its still current
               if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
                   logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
                   return docinfo
           
         logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))          # new docinfo
           docinfo = {'mode': mode, 'url': url}
           # add self url
           docinfo['viewerUrl'] = self.getDocumentViewerURL()
           # get index.meta DOM
           docUrl = None
           metaDom = None
           if mode=="texttool": 
               # url points to document dir or index.meta
               metaDom = self.metadataService.getDomFromPathOrUrl(url)
               docUrl = url.replace('/index.meta', '')
               if metaDom is None:
                   raise IOError("Unable to find index.meta for mode=texttool!")
                   
         txt = getHttpData(infoUrl)          elif mode=="imagepath":
         if txt is None:              # url points to folder with images, index.meta optional
             raise IOError("Unable to get dir-info from %s"%(infoUrl))              # asssume index.meta in parent dir
               docUrl = getParentPath(url)
               metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
   
         dom = ET.fromstring(txt)          elif mode=="filepath":
         #dom = Parse(txt)              # url points to image file, index.meta optional
         size=getText(dom.find("size"))              # asssume index.meta is two path segments up
         #sizes=dom.xpath("//dir/size")              docUrl = getParentPath(url, 2)
         logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)              metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
                   
         if size:  
             docinfo['numPages'] = int(size)  
         else:          else:
             docinfo['numPages'] = 0              logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
               raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                           
         # TODO: produce and keep list of image names and numbers          docinfo['documentUrl'] = docUrl
           # process index.meta contents
           if metaDom is not None:
               # document directory name and path
               logging.debug("RESOURCE: %s"%repr(self.metadataService.resource.meta.getData(dom=metaDom, all=True, recursive=2)))
               resource = self.metadataService.getResourceData(dom=metaDom)
               if resource:
                   docinfo = self.getDocinfoFromResource(docinfo, resource)
   
               # texttool info
               texttool = self.metadataService.getTexttoolData(dom=metaDom)
               if texttool:
                   docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
               
               # bib info
               bib = self.metadataService.getBibData(dom=metaDom)
               if bib:
                   docinfo = self.getDocinfoFromBib(docinfo, bib)
               else:
                   # no bib - try info.xml
                   docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
                   
               # auth info
               access = self.metadataService.getAccessData(dom=metaDom)
               if access:
                   docinfo = self.getDocinfoFromAccess(docinfo, access)
   
               # attribution info
               attribution = self.metadataService.getAttributionData(dom=metaDom)
               if attribution:
                   logging.debug("getDocinfo: attribution=%s"%repr(attribution))
                   docinfo['attribution'] = attribution
                   #docinfo = self.getDocinfoFromAccess(docinfo, access)
   
               # copyright info
               copyright = self.metadataService.getCopyrightData(dom=metaDom)
               if copyright:
                   logging.debug("getDocinfo: copyright=%s"%repr(copyright))
                   docinfo['copyright'] = copyright
                   #docinfo = self.getDocinfoFromAccess(docinfo, access)
   
           # image path
           if mode != 'texttool':
               # override image path from texttool
               docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
   
           # number of images from digilib
           if docinfo.get('imagePath', None):
               docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
               docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
                                                   
           logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
           #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
           # store in session
           self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo          return docinfo
           
     def getIndexMetaPath(self,url):      def getDocinfoFromResource(self, docinfo, resource):
         """gib nur den Pfad zurueck"""          """reads contents of resource element into docinfo"""
         regexp = re.compile(r".*(experimental|permanent)/(.*)")          docName = resource.get('name', None)
         regpath = regexp.match(url)          docinfo['documentName'] = docName
         if (regpath==None):          docPath = resource.get('archive-path', None)
             return ""          if docPath:
         logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))                          # clean up document path
         return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))              if docPath[0] != '/':
                   docPath = '/' + docPath
                   
               if docName and (not docPath.endswith(docName)):
                   docPath += "/" + docName
               
           else:
               # use docUrl as docPath
               docUrl = docinfo['documentURL']
               if not docUrl.startswith('http:'):
                   docPath = docUrl
           if docPath:
               # fix URLs starting with /mpiwg/online
               docPath = docPath.replace('/mpiwg/online', '', 1)
             
           docinfo['documentPath'] = docPath
           return docinfo
           
       def getDocinfoFromTexttool(self, docinfo, texttool):
           """reads contents of texttool element into docinfo"""
           # image dir
           imageDir = texttool.get('image', None)
           docPath = docinfo.get('documentPath', None)
           if imageDir and docPath:
               #print "image: ", imageDir, " archivepath: ", archivePath
               imageDir = os.path.join(docPath, imageDir)
               imageDir = imageDir.replace('/mpiwg/online', '', 1)
               docinfo['imagePath'] = imageDir
           
     def getIndexMetaUrl(self,url):          # old style text URL
         """returns utr  of index.meta document at url"""          textUrl = texttool.get('text', None)
           if textUrl and docPath:
               if urlparse.urlparse(textUrl)[0] == "": #keine url
                   textUrl = os.path.join(docPath, textUrl) 
               
         metaUrl = None              docinfo['textURL'] = textUrl
         if url.startswith("http://"):  
             # real URL  
             metaUrl = url  
         else:  
             # online path  
             server=self.digilibBaseUrl+"/servlet/Texter?fn="  
             metaUrl=server+url.replace("/mpiwg/online","")  
             if not metaUrl.endswith("index.meta"):  
                 metaUrl += "/index.meta"  
                   
         return metaUrl          # new style text-url-path
           textUrl = texttool.get('text-url-path', None)
           if textUrl:
               docinfo['textURLPath'] = textUrl
           
     def getDomFromIndexMeta(self, url):          # page flow
         """get dom from index meta"""          docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
         dom = None  
         metaUrl = self.getIndexMetaUrl(url)  
                                   
         logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)          # odd pages are left
         txt=getHttpData(metaUrl)          docinfo['oddPage'] = texttool.get('odd-scan-orientation', 'left')
         if txt is None:  
             raise IOError("Unable to read index meta from %s"%(url))  
                   
         dom = ET.fromstring(txt)          # number of title page
         #dom = Parse(txt)          docinfo['titlePage'] = texttool.get('title-scan-no', 0)
         return dom  
           
     def getPresentationInfoXML(self, url):          # old presentation stuff
         """returns dom of info.xml document at url"""          presentation = texttool.get('presentation', None)
         dom = None          if presentation and docPath:
         metaUrl = None              if presentation.startswith('http:'):
         if url.startswith("http://"):                  docinfo['presentationUrl'] = presentation
             # real URL  
             metaUrl = url  
         else:          else:
             # online path                  docinfo['presentationUrl'] = os.path.join(docPath, presentation)
             server=self.digilibBaseUrl+"/servlet/Texter?fn="  
             metaUrl=server+url.replace("/mpiwg/online","")  
           
         txt=getHttpData(metaUrl)  
         if txt is None:  
             raise IOError("Unable to read infoXMLfrom %s"%(url))  
               
         dom = ET.fromstring(txt)  
         #dom = Parse(txt)  
         return dom  
                           
                   
     def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):  
         """gets authorization info from the index.meta file at path or given by dom"""  
         logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))  
                   
         access = None  
           
         if docinfo is None:  
             docinfo = {}  
               
         if dom is None:  
             for x in range(cut):  
                 path=getParentDir(path)  
             dom = self.getDomFromIndexMeta(path)  
          
         acc = dom.find(".//access-conditions/access")  
         if acc is not None:  
             acctype = acc.get('type')  
             #acctype = dom.xpath("//access-conditions/access/@type")  
             if acctype:  
                 access=acctype  
                 if access in ['group', 'institution']:  
                     access = dom.find(".//access-conditions/access/name").text.lower()  
               
         docinfo['accessType'] = access  
         return docinfo          return docinfo
           
               def getDocinfoFromBib(self, docinfo, bib):
     def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):          """reads contents of bib element into docinfo"""
         """gets bibliographical info from the index.meta file at path or given by dom"""          logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
         logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))  
           
         if docinfo is None:  
             docinfo = {}  
           
         if dom is None:  
             for x in range(cut):  
                 path=getParentDir(path)  
             dom = self.getDomFromIndexMeta(path)  
           
         docinfo['indexMetaPath']=self.getIndexMetaPath(path);  
           
         logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))  
         # try to get MetaDataFolder  
         metadata = getattr(self, 'metadata')  
         if metadata is not None:  
             # put all raw bib fields in dict "bib"              # put all raw bib fields in dict "bib"
             bib = metadata.getBibdataFromDom(dom)  
             docinfo['bib'] = bib              docinfo['bib'] = bib
             bibtype = bib.get('@type', None)              bibtype = bib.get('@type', None)
             docinfo['bib_type'] = bibtype          docinfo['bibType'] = bibtype
             # also store DC metadata for convenience              # also store DC metadata for convenience
             dc = metadata.getDCMappedData(bib)          dc = self.metadataService.getDCMappedData(bib)
             docinfo['creator'] = dc.get('creator',None)              docinfo['creator'] = dc.get('creator',None)
             docinfo['title'] = dc.get('title',None)              docinfo['title'] = dc.get('title',None)
             docinfo['date'] = dc.get('date',None)              docinfo['date'] = dc.get('date',None)
         else:          return docinfo
             logging.error("MetaDataFolder 'metadata' not found!")  
             #TODO: remove  
             bib = getBibdataFromDom(dom)  
         return docinfo  
       
       
     # TODO: is this needed?  
     def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):  
         """gets name info from the index.meta file at path or given by dom"""  
         if docinfo is None:  
             docinfo = {}  
           
         if dom is None:  
             for x in range(cut):  
                 path=getParentDir(path)  
             dom = self.getDomFromIndexMeta(path)  
   
         docinfo['name']=getText(dom.find("name"))  
         logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])  
         return docinfo  
       
     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):  
         """parse texttool tag in index meta"""  
         logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))  
         if docinfo is None:  
            docinfo = {}  
         if docinfo.get('lang', None) is None:  
             docinfo['lang'] = '' # default keine Sprache gesetzt  
         if dom is None:  
             dom = self.getDomFromIndexMeta(url)  
           
         archivePath = None  
         archiveName = None  
       
         archiveName = getText(dom.find("name"))  
         if not archiveName:  
             logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))  
           
         archivePath = getText(dom.find("archive-path"))  
         if archivePath:  
             # clean up archive path  
             if archivePath[0] != '/':  
                 archivePath = '/' + archivePath  
             if archiveName and (not archivePath.endswith(archiveName)):  
                 archivePath += "/" + archiveName  
         else:  
             # try to get archive-path from url  
             logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))  
             if (not url.startswith('http')):  
                 archivePath = url.replace('index.meta', '')  
                   
         if archivePath is None:  
             # we balk without archive-path  
             raise IOError("Missing archive-path (for text-tool) in %s" % (url))  
           
         imageDir = getText(dom.find(".//texttool/image"))  
               
         if not imageDir:  
             # we balk with no image tag / not necessary anymore because textmode is now standard  
             #raise IOError("No text-tool info in %s"%(url))  
             imageDir = ""  
             #xquery="//pb"    
             docinfo['imagePath'] = "" # keine Bilder  
             docinfo['imageURL'] = ""  
               
         if imageDir and archivePath:  
             #print "image: ", imageDir, " archivepath: ", archivePath  
             imageDir = os.path.join(archivePath, imageDir)  
             imageDir = imageDir.replace("/mpiwg/online", '')  
             docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)  
             docinfo['imagePath'] = imageDir  
               
             docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir  
               
         viewerUrl = getText(dom.find(".//texttool/digiliburlprefix"))  
         if viewerUrl:  
             docinfo['viewerURL'] = viewerUrl  
           
         # old style text URL  
         textUrl = getText(dom.find(".//texttool/text"))  
         if textUrl:  
             if urlparse.urlparse(textUrl)[0] == "": #keine url  
                 textUrl = os.path.join(archivePath, textUrl)   
             # fix URLs starting with /mpiwg/online  
             if textUrl.startswith("/mpiwg/online"):  
                 textUrl = textUrl.replace("/mpiwg/online", '', 1)  
                           
             docinfo['textURL'] = textUrl      def getDocinfoFromAccess(self, docinfo, acc):
           """reads contents of access element into docinfo"""
           #TODO: also read resource type
           logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
           try:
               acctype = acc['@attr']['type']
               if acctype:
                   access=acctype
                   if access in ['group', 'institution']:
                       access = acc['name'].lower()
           
         # new style text-url-path                  docinfo['accessType'] = access
         textUrl = getText(dom.find(".//texttool/text-url-path"))  
         if textUrl:  
             docinfo['textURLPath'] = textUrl  
             textUrlkurz = string.split(textUrl, ".")[0]  
             docinfo['textURLPathkurz'] = textUrlkurz  
             #if not docinfo['imagePath']:  
                 # text-only, no page images  
                 #docinfo = self.getNumTextPages(docinfo)  
                                       
           except:
               pass
                     
         presentationUrl = getText(dom.find(".//texttool/presentation"))          return docinfo
         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag  
         # TODO: is this needed here?  
         docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)  
                   
       def getDocinfoFromDigilib(self, docinfo, path):
           infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
           # fetch data
           txt = getHttpData(infoUrl)
           if not txt:
               logging.error("Unable to get dir-info from %s"%(infoUrl))
               return docinfo
                   
         if presentationUrl: # ueberschreibe diese durch presentation informationen           dom = ET.fromstring(txt)
              # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten          size = getText(dom.find("size"))
              # durch den relativen Pfad auf die presentation infos          logging.debug("getDocinfoFromDigilib: size=%s"%size)
             presentationPath = presentationUrl          if size:
             if url.endswith("index.meta"):               docinfo['numPages'] = int(size)
                 presentationUrl = url.replace('index.meta', presentationPath)  
             else:              else:
                 presentationUrl = url + "/" + presentationPath              docinfo['numPages'] = 0
                   
             docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)  
       
         docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info  
                   
           # TODO: produce and keep list of image names and numbers
         return docinfo          return docinfo
         
         
     def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):      def getDocinfoFromPresentationInfoXml(self,docinfo):
         """gets the bibliographical information from the preseantion entry in texttools          """gets DC-like bibliographical information from the presentation entry in texttools"""
         """          url = docinfo.get('presentationUrl', None)
         dom=self.getPresentationInfoXML(url)          if not url:
         docinfo['author']=getText(dom.find(".//author"))              logging.error("getDocinfoFromPresentation: no URL!")
         docinfo['title']=getText(dom.find(".//title"))  
         docinfo['year']=getText(dom.find(".//date"))  
         return docinfo          return docinfo
           
     def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):          dom = None
         """path ist the path to the images it assumes that the index.meta file is one level higher."""          metaUrl = None
         logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))          if url.startswith("http://"):
         if docinfo is None:              # real URL
             docinfo = {}              metaUrl = url
         path=path.replace("/mpiwg/online","")          else:
         docinfo['imagePath'] = path              # online path
         docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)  
           
         pathorig=path  
         for x in range(cut):         
                 path=getParentDir(path)  
         logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)  
         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path  
         docinfo['imageURL'] = imageUrl  
           
         #TODO: use getDocinfoFromIndexMeta  
         #path ist the path to the images it assumes that the index.meta file is one level higher.  
         docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)  
         docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)  
         return docinfo  
           
               server=self.digilibBaseUrl+"/servlet/Texter?fn="
               metaUrl=server+url
           
     def getDocinfo(self, mode, url):          txt=getHttpData(metaUrl)
         """returns docinfo depending on mode"""          if txt is None:
         logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))              logging.error("Unable to read info.xml from %s"%(url))
         # look for cached docinfo in session  
         if self.REQUEST.SESSION.has_key('docinfo'):  
             docinfo = self.REQUEST.SESSION['docinfo']  
             # check if its still current  
             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:  
                 logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())  
                 return docinfo                  return docinfo
                           
         # new docinfo          dom = ET.fromstring(txt)
         docinfo = {'mode': mode, 'url': url}          docinfo['creator']=getText(dom.find(".//author"))
         # add self url          docinfo['title']=getText(dom.find(".//title"))
         docinfo['viewerUrl'] = self.getDocumentViewerURL()          docinfo['date']=getText(dom.find(".//date"))
         if mode=="texttool":   
             # index.meta with texttool information  
             docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)  
         elif mode=="imagepath":  
             # folder with images, index.meta optional  
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)  
         elif mode=="filepath":  
             # filename  
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)  
         else:  
             logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)  
             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))  
                   
         logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())  
         #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)  
         # store in session  
         self.REQUEST.SESSION['docinfo'] = docinfo  
         return docinfo          return docinfo
                                 
   
     def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):      def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
         """returns pageinfo with the given parameters"""          """returns pageinfo with the given parameters"""
         pageinfo = {}          pageinfo = {}
Line 840  class documentViewer(Folder): Line 725  class documentViewer(Folder):
         return pageinfo          return pageinfo
   
           
       security.declareProtected('View management screens','changeDocumentViewerForm')    
       changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
       
     def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):      def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
         """init document viewer"""          """init document viewer"""
         self.title=title          self.title=title
Line 847  class documentViewer(Folder): Line 735  class documentViewer(Folder):
         self.thumbrows = thumbrows          self.thumbrows = thumbrows
         self.thumbcols = thumbcols          self.thumbcols = thumbcols
         self.authgroups = [s.strip().lower() for s in authgroups.split(',')]          self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
           try:
               # assume MetaDataFolder instance is called metadata 
               self.metadataService = getattr(self, 'metadata')
           except Exception, e:
               logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
   
         if RESPONSE is not None:          if RESPONSE is not None:
             RESPONSE.redirect('manage_main')              RESPONSE.redirect('manage_main')
                   

Removed from v.1.175.2.9  
changed lines
  Added in v.1.175.2.15


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>