Diff for /documentViewer/documentViewer.py between versions 1.175.2.6 and 1.175.2.15

version 1.175.2.6, 2011/07/20 08:22:36 version 1.175.2.15, 2011/08/02 16:29:15
Line 98  def browserCheck(self): Line 98  def browserCheck(self):
   
     return bt      return bt
   
 def getParentDir(path):  def getParentPath(path, cnt=1):
     """returns pathname shortened by one"""      """returns pathname shortened by cnt"""
     return '/'.join(path.split('/')[0:-1])      # make sure path doesn't end with /
               path = path.rstrip('/')
 def getBibdataFromDom(dom):      # split by /, shorten, and reassemble
     """returns dict with all elements from bib-tag"""      return '/'.join(path.split('/')[0:-cnt])
     bibinfo = {}  
     bib = dom.find(".//meta/bib")  
     if bib is not None:  
         # put type in @type  
         type = bib.get('type')  
         bibinfo['@type'] = type  
         # put all subelements in dict  
         for e in bib:  
             bibinfo[e.tag] = getText(e)  
                           
     return bibinfo  
   
 ##  ##
 ## documentViewer class  ## documentViewer class
Line 128  class documentViewer(Folder): Line 118  class documentViewer(Folder):
         {'label':'main config','action':'changeDocumentViewerForm'},          {'label':'main config','action':'changeDocumentViewerForm'},
         )          )
   
       metadataService = None
       """MetaDataFolder instance"""
   
     # templates and forms      # templates and forms
     viewer_main = PageTemplateFile('zpt/viewer_main', globals())      viewer_main = PageTemplateFile('zpt/viewer_main', globals())
     toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())      toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
Line 146  class documentViewer(Folder): Line 139  class documentViewer(Folder):
           
           
     thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())      thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
     security.declareProtected('View management screens','changeDocumentViewerForm')      
     changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())  
   
           
     def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):      def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
Line 170  class documentViewer(Folder): Line 161  class documentViewer(Folder):
             templateFolder._setObject('fulltextclient',textServer)              templateFolder._setObject('fulltextclient',textServer)
         except Exception, e:          except Exception, e:
             logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))              logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
               
         try:          try:
             from Products.zogiLib.zogiLib import zogiLib              from Products.zogiLib.zogiLib import zogiLib
             zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")              zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
Line 178  class documentViewer(Folder): Line 170  class documentViewer(Folder):
         except Exception, e:          except Exception, e:
             logging.error("Unable to create zogiLib for zogilib: "+str(e))              logging.error("Unable to create zogiLib for zogilib: "+str(e))
                   
           try:
               # assume MetaDataFolder instance is called metadata 
               self.metadataService = getattr(self, 'metadata')
           except Exception, e:
               logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
               
                   
     # proxy text server methods to fulltextclient      # proxy text server methods to fulltextclient
     def getTextPage(self, **args):      def getTextPage(self, **args):
Line 266  class documentViewer(Folder): Line 264  class documentViewer(Folder):
                                 
         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)          return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
       
     
     security.declareProtected('View','index_html')      security.declareProtected('View','index_html')
     def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):      def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
         '''          """
         view it          view it
         @param mode: defines how to access the document behind url           @param mode: defines how to access the document behind url 
         @param url: url which contains display information          @param url: url which contains display information
         @param viewMode: if images display images, if text display text, default is auto (text,images or auto)          @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
         @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)          @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
         @param characterNormalization type of text display (reg, norm, none)          """
         @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)  
         '''  
                   
         logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
                   
Line 297  class documentViewer(Folder): Line 294  class documentViewer(Folder):
         # auto viewMode: text_dict if text else images          # auto viewMode: text_dict if text else images
         if viewMode=="auto":           if viewMode=="auto": 
             if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):               if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 
                 #texturl gesetzt und textViewer konfiguriert  
                 viewMode="text_dict"                  viewMode="text_dict"
             else:              else:
                 viewMode="images"                  viewMode="images"
Line 375  class documentViewer(Folder): Line 371  class documentViewer(Folder):
         # FIXME: does this belong here?          # FIXME: does this belong here?
         if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath          if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
                 urlParams["mode"] = "imagepath"                  urlParams["mode"] = "imagepath"
                 urlParams["url"] = getParentDir(urlParams["url"])                  urlParams["url"] = getParentPath(urlParams["url"])
                                   
         # quote values and assemble into query string (not escaping '/')          # quote values and assemble into query string (not escaping '/')
         ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])          ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
Line 393  class documentViewer(Folder): Line 389  class documentViewer(Folder):
           
     def getInfo_xml(self,url,mode):      def getInfo_xml(self,url,mode):
         """returns info about the document as XML"""          """returns info about the document as XML"""
   
         if not self.digilibBaseUrl:          if not self.digilibBaseUrl:
             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"              self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
                   
Line 401  class documentViewer(Folder): Line 396  class documentViewer(Folder):
         pt = getattr(self.template, 'info_xml')          pt = getattr(self.template, 'info_xml')
         return pt(docinfo=docinfo)          return pt(docinfo=docinfo)
   
     def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):  
         """returns new option state"""  
         if not self.REQUEST.SESSION.has_key(optionName):  
             # not in session -- initial  
             opt = {'lastState': newState, 'state': initialState}  
         else:  
             opt = self.REQUEST.SESSION.get(optionName)  
             if opt['lastState'] != newState:  
                 # state in session has changed -- toggle  
                 opt['state'] = not opt['state']  
                 opt['lastState'] = newState  
           
         self.REQUEST.SESSION[optionName] = opt  
         return opt['state']  
       
     def isAccessible(self, docinfo):      def isAccessible(self, docinfo):
         """returns if access to the resource is granted"""          """returns if access to the resource is granted"""
         access = docinfo.get('accessType', None)          access = docinfo.get('accessType', None)
         logging.debug("documentViewer (accessOK) access type %s"%access)          logging.debug("documentViewer (accessOK) access type %s"%access)
         if access is not None and access == 'free':          if access == 'free':
             logging.debug("documentViewer (accessOK) access is free")              logging.debug("documentViewer (accessOK) access is free")
             return True              return True
           
         elif access is None or access in self.authgroups:          elif access is None or access in self.authgroups:
             # only local access -- only logged in users              # only local access -- only logged in users
             user = getSecurityManager().getUser()              user = getSecurityManager().getUser()
Line 437  class documentViewer(Folder): Line 418  class documentViewer(Folder):
         return False          return False
           
                                   
     def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):  
         """gibt param von dlInfo aus"""  
         if docinfo is None:  
             docinfo = {}  
           
         for x in range(cut):  
             path=getParentDir(path)  
          
         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path  
       
         logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))  
           
         txt = getHttpData(infoUrl)  
         if txt is None:  
             raise IOError("Unable to get dir-info from %s"%(infoUrl))  
   
         dom = ET.fromstring(txt)  
         #dom = Parse(txt)  
         size=getText(dom.find("size"))  
         #sizes=dom.xpath("//dir/size")  
         logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)  
           
         if size:  
             docinfo['numPages'] = int(size)  
         else:  
             docinfo['numPages'] = 0  
               
         # TODO: produce and keep list of image names and numbers  
                                                   
       def getDocinfo(self, mode, url):
           """returns docinfo depending on mode"""
           logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
           # look for cached docinfo in session
           if self.REQUEST.SESSION.has_key('docinfo'):
               docinfo = self.REQUEST.SESSION['docinfo']
               # check if its still current
               if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
                   logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
         return docinfo          return docinfo
           
     def getIndexMetaPath(self,url):          # new docinfo
         """gib nur den Pfad zurueck"""          docinfo = {'mode': mode, 'url': url}
         regexp = re.compile(r".*(experimental|permanent)/(.*)")          # add self url
         regpath = regexp.match(url)          docinfo['viewerUrl'] = self.getDocumentViewerURL()
         if (regpath==None):          # get index.meta DOM
             return ""          docUrl = None
         logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))                      metaDom = None
         return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))          if mode=="texttool": 
                    # url points to document dir or index.meta
                   metaDom = self.metadataService.getDomFromPathOrUrl(url)
                   docUrl = url.replace('/index.meta', '')
     def getIndexMetaUrl(self,url):              if metaDom is None:
         """returns utr  of index.meta document at url"""                  raise IOError("Unable to find index.meta for mode=texttool!")
         
         metaUrl = None  
         if url.startswith("http://"):  
             # real URL  
             metaUrl = url  
         else:  
             # online path  
             server=self.digilibBaseUrl+"/servlet/Texter?fn="  
             metaUrl=server+url.replace("/mpiwg/online","")  
             if not metaUrl.endswith("index.meta"):  
                 metaUrl += "/index.meta"  
           
         return metaUrl  
       
     def getDomFromIndexMeta(self, url):  
         """get dom from index meta"""  
         dom = None  
         metaUrl = self.getIndexMetaUrl(url)  
                                   
         logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)          elif mode=="imagepath":
         txt=getHttpData(metaUrl)              # url points to folder with images, index.meta optional
         if txt is None:              # asssume index.meta in parent dir
             raise IOError("Unable to read index meta from %s"%(url))              docUrl = getParentPath(url)
               metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
                   
         dom = ET.fromstring(txt)          elif mode=="filepath":
         #dom = Parse(txt)              # url points to image file, index.meta optional
         return dom              # asssume index.meta is two path segments up
               docUrl = getParentPath(url, 2)
               metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
           
     def getPresentationInfoXML(self, url):  
         """returns dom of info.xml document at url"""  
         dom = None  
         metaUrl = None  
         if url.startswith("http://"):  
             # real URL  
             metaUrl = url  
         else:          else:
             # online path              logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
             server=self.digilibBaseUrl+"/servlet/Texter?fn="              raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
             metaUrl=server+url.replace("/mpiwg/online","")  
           
         txt=getHttpData(metaUrl)  
         if txt is None:  
             raise IOError("Unable to read infoXMLfrom %s"%(url))  
               
         dom = ET.fromstring(txt)  
         #dom = Parse(txt)  
         return dom  
                           
           
     def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):  
         """gets authorization info from the index.meta file at path or given by dom"""  
         logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))  
           
         access = None  
           
         if docinfo is None:  
             docinfo = {}  
               
         if dom is None:  
             for x in range(cut):  
                 path=getParentDir(path)  
             dom = self.getDomFromIndexMeta(path)  
                 
         acc = dom.find(".//access-conditions/access")          docinfo['documentUrl'] = docUrl
         if acc is not None:          # process index.meta contents
             acctype = acc.get('type')          if metaDom is not None:
             #acctype = dom.xpath("//access-conditions/access/@type")              # document directory name and path
             if acctype:              logging.debug("RESOURCE: %s"%repr(self.metadataService.resource.meta.getData(dom=metaDom, all=True, recursive=2)))
                 access=acctype              resource = self.metadataService.getResourceData(dom=metaDom)
                 if access in ['group', 'institution']:              if resource:
                     access = dom.find(".//access-conditions/access/name").text.lower()                  docinfo = self.getDocinfoFromResource(docinfo, resource)
   
               # texttool info
               texttool = self.metadataService.getTexttoolData(dom=metaDom)
               if texttool:
                   docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
               
               # bib info
               bib = self.metadataService.getBibData(dom=metaDom)
               if bib:
                   docinfo = self.getDocinfoFromBib(docinfo, bib)
               else:
                   # no bib - try info.xml
                   docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
                   
               # auth info
               access = self.metadataService.getAccessData(dom=metaDom)
               if access:
                   docinfo = self.getDocinfoFromAccess(docinfo, access)
   
               # attribution info
               attribution = self.metadataService.getAttributionData(dom=metaDom)
               if attribution:
                   logging.debug("getDocinfo: attribution=%s"%repr(attribution))
                   docinfo['attribution'] = attribution
                   #docinfo = self.getDocinfoFromAccess(docinfo, access)
   
               # copyright info
               copyright = self.metadataService.getCopyrightData(dom=metaDom)
               if copyright:
                   logging.debug("getDocinfo: copyright=%s"%repr(copyright))
                   docinfo['copyright'] = copyright
                   #docinfo = self.getDocinfoFromAccess(docinfo, access)
   
           # image path
           if mode != 'texttool':
               # override image path from texttool
               docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
   
           # number of images from digilib
           if docinfo.get('imagePath', None):
               docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
               docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
                           
         docinfo['accessType'] = access          logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
           #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
           # store in session
           self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo          return docinfo
           
       def getDocinfoFromResource(self, docinfo, resource):
           """reads contents of resource element into docinfo"""
           docName = resource.get('name', None)
           docinfo['documentName'] = docName
           docPath = resource.get('archive-path', None)
           if docPath:
               # clean up document path
               if docPath[0] != '/':
                   docPath = '/' + docPath
                   
               if docName and (not docPath.endswith(docName)):
                   docPath += "/" + docName
               
           else:
               # use docUrl as docPath
               docUrl = docinfo['documentURL']
               if not docUrl.startswith('http:'):
                   docPath = docUrl
           if docPath:
               # fix URLs starting with /mpiwg/online
               docPath = docPath.replace('/mpiwg/online', '', 1)
                   
     def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):          docinfo['documentPath'] = docPath
         """gets bibliographical info from the index.meta file at path or given by dom"""          return docinfo
         logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))  
           
         if docinfo is None:  
             docinfo = {}  
           
         if dom is None:  
             for x in range(cut):  
                 path=getParentDir(path)  
             dom = self.getDomFromIndexMeta(path)  
           
         docinfo['indexMetaPath']=self.getIndexMetaPath(path);  
           
         logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))  
         # put all raw bib fields in dict "bib"  
         bib = getBibdataFromDom(dom)  
         docinfo['bib'] = bib  
           
         # extract some fields (author, title, year) according to their mapping  
         metaData=self.metadata.main.meta.bib  
         bibtype=bib.get("@type")  
         #bibtype=dom.xpath("//bib/@type")  
         if not bibtype:  
             bibtype="generic"  
               
         bibtype=bibtype.replace("-"," ") # wrong types in index meta "-" instead of " " (not wrong! ROC)  
         docinfo['bib_type'] = bibtype  
         bibmap=metaData.generateMappingForType(bibtype)  
         logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))  
         logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))  
         # if there is no mapping bibmap is empty (mapping sometimes has empty fields)  
         if len(bibmap) > 0 and bibmap.get('author',None) or bibmap.get('title',None):  
             try:  
                 docinfo['author']=bib.get(bibmap['author'][0])  
             except: pass  
             try:  
                 docinfo['title']=bib.get(bibmap['title'][0])  
             except: pass  
             try:  
                 docinfo['year']=bib.get(bibmap['year'][0])  
             except: pass  
               
             # ROC: why is this here?  
             #            logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)  
             #            try:  
             #                docinfo['lang']=getTextFromNode(dom.find(".//bib/lang")[0])  
             #            except:  
             #                docinfo['lang']=''  
             #            try:  
             #                docinfo['city']=getTextFromNode(dom.find(".//bib/city")[0])  
             #            except:  
             #                docinfo['city']=''  
             #            try:  
             #                docinfo['number_of_pages']=getTextFromNode(dom.find(".//bib/number_of_pages")[0])  
             #            except:  
             #                docinfo['number_of_pages']=''  
             #            try:  
             #                docinfo['series_volume']=getTextFromNode(dom.find(".//bib/series_volume")[0])  
             #            except:  
             #                docinfo['series_volume']=''  
             #            try:  
             #                docinfo['number_of_volumes']=getTextFromNode(dom.find(".//bib/number_of_volumes")[0])  
             #            except:  
             #                docinfo['number_of_volumes']=''  
             #            try:  
             #                docinfo['translator']=getTextFromNode(dom.find(".//bib/translator")[0])  
             #            except:  
             #                docinfo['translator']=''  
             #            try:  
             #                docinfo['edition']=getTextFromNode(dom.find(".//bib/edition")[0])  
             #            except:  
             #                docinfo['edition']=''  
             #            try:  
             #                docinfo['series_author']=getTextFromNode(dom.find(".//bib/series_author")[0])  
             #            except:  
             #                docinfo['series_author']=''  
             #            try:  
             #                docinfo['publisher']=getTextFromNode(dom.find(".//bib/publisher")[0])  
             #            except:  
             #                docinfo['publisher']=''  
             #            try:  
             #                docinfo['series_title']=getTextFromNode(dom.find(".//bib/series_title")[0])  
             #            except:  
             #                docinfo['series_title']=''  
             #            try:  
             #                docinfo['isbn_issn']=getTextFromNode(dom.find(".//bib/isbn_issn")[0])  
             #            except:  
             #                docinfo['isbn_issn']=''             
         return docinfo  
       
       
     # TODO: is this needed?  
     def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):  
         """gets name info from the index.meta file at path or given by dom"""  
         if docinfo is None:  
             docinfo = {}  
           
         if dom is None:  
             for x in range(cut):  
                 path=getParentDir(path)  
             dom = self.getDomFromIndexMeta(path)  
   
         docinfo['name']=getText(dom.find("name"))  
         logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])  
         return docinfo  
       
     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):  
         """parse texttool tag in index meta"""  
         logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))  
         if docinfo is None:  
            docinfo = {}  
         if docinfo.get('lang', None) is None:  
             docinfo['lang'] = '' # default keine Sprache gesetzt  
         if dom is None:  
             dom = self.getDomFromIndexMeta(url)  
           
         archivePath = None  
         archiveName = None  
       
         archiveName = getText(dom.find("name"))  
         if not archiveName:  
             logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))  
           
         archivePath = getText(dom.find("archive-path"))  
         if archivePath:  
             # clean up archive path  
             if archivePath[0] != '/':  
                 archivePath = '/' + archivePath  
             if archiveName and (not archivePath.endswith(archiveName)):  
                 archivePath += "/" + archiveName  
         else:  
             # try to get archive-path from url  
             logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))  
             if (not url.startswith('http')):  
                 archivePath = url.replace('index.meta', '')  
                   
         if archivePath is None:  
             # we balk without archive-path  
             raise IOError("Missing archive-path (for text-tool) in %s" % (url))  
           
         imageDir = getText(dom.find(".//texttool/image"))  
               
         if not imageDir:  
             # we balk with no image tag / not necessary anymore because textmode is now standard  
             #raise IOError("No text-tool info in %s"%(url))  
             imageDir = ""  
             #xquery="//pb"    
             docinfo['imagePath'] = "" # keine Bilder  
             docinfo['imageURL'] = ""  
                           
         if imageDir and archivePath:      def getDocinfoFromTexttool(self, docinfo, texttool):
           """reads contents of texttool element into docinfo"""
           # image dir
           imageDir = texttool.get('image', None)
           docPath = docinfo.get('documentPath', None)
           if imageDir and docPath:
             #print "image: ", imageDir, " archivepath: ", archivePath              #print "image: ", imageDir, " archivepath: ", archivePath
             imageDir = os.path.join(archivePath, imageDir)              imageDir = os.path.join(docPath, imageDir)
             imageDir = imageDir.replace("/mpiwg/online", '')              imageDir = imageDir.replace('/mpiwg/online', '', 1)
             docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)  
             docinfo['imagePath'] = imageDir              docinfo['imagePath'] = imageDir
                           
             docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir  
               
         viewerUrl = getText(dom.find(".//texttool/digiliburlprefix"))  
         if viewerUrl:  
             docinfo['viewerURL'] = viewerUrl  
           
         # old style text URL          # old style text URL
         textUrl = getText(dom.find(".//texttool/text"))          textUrl = texttool.get('text', None)
         if textUrl:          if textUrl and docPath:
             if urlparse.urlparse(textUrl)[0] == "": #keine url              if urlparse.urlparse(textUrl)[0] == "": #keine url
                 textUrl = os.path.join(archivePath, textUrl)                   textUrl = os.path.join(docPath, textUrl) 
             # fix URLs starting with /mpiwg/online  
             if textUrl.startswith("/mpiwg/online"):  
                 textUrl = textUrl.replace("/mpiwg/online", '', 1)  
                           
             docinfo['textURL'] = textUrl              docinfo['textURL'] = textUrl
           
         # new style text-url-path          # new style text-url-path
         textUrl = getText(dom.find(".//texttool/text-url-path"))          textUrl = texttool.get('text-url-path', None)
         if textUrl:          if textUrl:
             docinfo['textURLPath'] = textUrl              docinfo['textURLPath'] = textUrl
             textUrlkurz = string.split(textUrl, ".")[0]  
             docinfo['textURLPathkurz'] = textUrlkurz  
             #if not docinfo['imagePath']:  
                 # text-only, no page images  
                 #docinfo = self.getNumTextPages(docinfo)  
                                       
           # page flow
           docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
                     
         presentationUrl = getText(dom.find(".//texttool/presentation"))          # odd pages are left
         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag          docinfo['oddPage'] = texttool.get('odd-scan-orientation', 'left')
         # TODO: is this needed here?  
         docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)  
                   
           # number of title page
           docinfo['titlePage'] = texttool.get('title-scan-no', 0)
                   
         if presentationUrl: # ueberschreibe diese durch presentation informationen           # old presentation stuff
              # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten          presentation = texttool.get('presentation', None)
              # durch den relativen Pfad auf die presentation infos          if presentation and docPath:
             presentationPath = presentationUrl              if presentation.startswith('http:'):
             if url.endswith("index.meta"):                   docinfo['presentationUrl'] = presentation
                 presentationUrl = url.replace('index.meta', presentationPath)  
             else:              else:
                 presentationUrl = url + "/" + presentationPath                  docinfo['presentationUrl'] = os.path.join(docPath, presentation)
                                   
             docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)  
           
         docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info          return docinfo
                   
       def getDocinfoFromBib(self, docinfo, bib):
           """reads contents of bib element into docinfo"""
           logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
           # put all raw bib fields in dict "bib"
           docinfo['bib'] = bib
           bibtype = bib.get('@type', None)
           docinfo['bibType'] = bibtype
           # also store DC metadata for convenience
           dc = self.metadataService.getDCMappedData(bib)
           docinfo['creator'] = dc.get('creator',None)
           docinfo['title'] = dc.get('title',None)
           docinfo['date'] = dc.get('date',None)
         return docinfo          return docinfo
         
       def getDocinfoFromAccess(self, docinfo, acc):
           """reads contents of access element into docinfo"""
           #TODO: also read resource type
           logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
           try:
               acctype = acc['@attr']['type']
               if acctype:
                   access=acctype
                   if access in ['group', 'institution']:
                       access = acc['name'].lower()
                   
                   docinfo['accessType'] = access
   
           except:
               pass
         
     def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):  
         """gets the bibliographical information from the preseantion entry in texttools  
         """  
         dom=self.getPresentationInfoXML(url)  
         docinfo['author']=getText(dom.find(".//author"))  
         docinfo['title']=getText(dom.find(".//title"))  
         docinfo['year']=getText(dom.find(".//date"))  
         return docinfo          return docinfo
           
     def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):      def getDocinfoFromDigilib(self, docinfo, path):
         """path ist the path to the images it assumes that the index.meta file is one level higher."""          infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
         logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))          # fetch data
         if docinfo is None:          txt = getHttpData(infoUrl)
             docinfo = {}          if not txt:
         path=path.replace("/mpiwg/online","")              logging.error("Unable to get dir-info from %s"%(infoUrl))
         docinfo['imagePath'] = path  
         docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)  
           
         pathorig=path  
         for x in range(cut):         
                 path=getParentDir(path)  
         logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)  
         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path  
         docinfo['imageURL'] = imageUrl  
           
         #path ist the path to the images it assumes that the index.meta file is one level higher.  
         docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)  
         docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)  
         return docinfo          return docinfo
           
           dom = ET.fromstring(txt)
           size = getText(dom.find("size"))
           logging.debug("getDocinfoFromDigilib: size=%s"%size)
           if size:
               docinfo['numPages'] = int(size)
           else:
               docinfo['numPages'] = 0
           
     def getDocinfo(self, mode, url):          # TODO: produce and keep list of image names and numbers
         """returns docinfo depending on mode"""  
         logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))  
         # look for cached docinfo in session  
         if self.REQUEST.SESSION.has_key('docinfo'):  
             docinfo = self.REQUEST.SESSION['docinfo']  
             # check if its still current  
             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:  
                 logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())  
                 return docinfo                  return docinfo
                           
         # new docinfo              
         docinfo = {'mode': mode, 'url': url}      def getDocinfoFromPresentationInfoXml(self,docinfo):
         # add self url          """gets DC-like bibliographical information from the presentation entry in texttools"""
         docinfo['viewerUrl'] = self.getDocumentViewerURL()          url = docinfo.get('presentationUrl', None)
         if mode=="texttool":           if not url:
             # index.meta with texttool information              logging.error("getDocinfoFromPresentation: no URL!")
             docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)              return docinfo
         elif mode=="imagepath":          
             # folder with images, index.meta optional          dom = None
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)          metaUrl = None
         elif mode=="filepath":          if url.startswith("http://"):
             # filename              # real URL
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)              metaUrl = url
         else:          else:
             logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)              # online path
             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))  
                                   
         # FIXME: fake texturlpath               server=self.digilibBaseUrl+"/servlet/Texter?fn="
         if not docinfo.has_key('textURLPath'):              metaUrl=server+url
             docinfo['textURLPath'] = None  
                   
         logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())          txt=getHttpData(metaUrl)
         #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)          if txt is None:
         self.REQUEST.SESSION['docinfo'] = docinfo              logging.error("Unable to read info.xml from %s"%(url))
         return docinfo          return docinfo
                                 
           dom = ET.fromstring(txt)
           docinfo['creator']=getText(dom.find(".//author"))
           docinfo['title']=getText(dom.find(".//title"))
           docinfo['date']=getText(dom.find(".//date"))
           return docinfo
       
   
     def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):      def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
         """returns pageinfo with the given parameters"""          """returns pageinfo with the given parameters"""
         pageinfo = {}          pageinfo = {}
Line 845  class documentViewer(Folder): Line 680  class documentViewer(Folder):
         pageinfo['cols'] = cols          pageinfo['cols'] = cols
         grpsize = cols * rows          grpsize = cols * rows
         pageinfo['groupsize'] = grpsize          pageinfo['groupsize'] = grpsize
           # what does this do?
         start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))          start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
         # int(current / grpsize) * grpsize +1))          # int(current / grpsize) * grpsize +1))
         pageinfo['start'] = start          pageinfo['start'] = start
Line 855  class documentViewer(Folder): Line 691  class documentViewer(Folder):
             pageinfo['numgroups'] = int(np / grpsize)              pageinfo['numgroups'] = int(np / grpsize)
             if np % grpsize > 0:              if np % grpsize > 0:
                 pageinfo['numgroups'] += 1                          pageinfo['numgroups'] += 1        
                   
         pageinfo['viewMode'] = viewMode          pageinfo['viewMode'] = viewMode
         pageinfo['tocMode'] = tocMode          pageinfo['tocMode'] = tocMode
         pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')          pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
Line 867  class documentViewer(Folder): Line 704  class documentViewer(Folder):
         pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')          pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
         pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')          pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
         pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')               pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')     
           # WTF?:
         toc = int (pageinfo['tocPN'])          toc = int (pageinfo['tocPN'])
         pageinfo['textPages'] =int (toc)          pageinfo['textPages'] =int (toc)
                   
           # What does this do?
         if 'tocSize_%s'%tocMode in docinfo:          if 'tocSize_%s'%tocMode in docinfo:
             tocSize = int(docinfo['tocSize_%s'%tocMode])              tocSize = int(docinfo['tocSize_%s'%tocMode])
             tocPageSize = int(pageinfo['tocPageSize'])              tocPageSize = int(pageinfo['tocPageSize'])
Line 878  class documentViewer(Folder): Line 717  class documentViewer(Folder):
                 tocPages=tocSize/tocPageSize+1                  tocPages=tocSize/tocPageSize+1
             else:              else:
                 tocPages=tocSize/tocPageSize                  tocPages=tocSize/tocPageSize
                   
             pageinfo['tocPN'] = min (tocPages,toc)                                  pageinfo['tocPN'] = min (tocPages,toc)                    
               
         pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')          pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
         pageinfo['sn'] =self.REQUEST.get('sn','')          pageinfo['sn'] =self.REQUEST.get('sn','')
         return pageinfo          return pageinfo
           
   
       security.declareProtected('View management screens','changeDocumentViewerForm')    
       changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
       
 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):  def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
         """init document viewer"""          """init document viewer"""
         self.title=title          self.title=title
Line 890  def changeDocumentViewer(self,title="",d Line 735  def changeDocumentViewer(self,title="",d
         self.thumbrows = thumbrows          self.thumbrows = thumbrows
         self.thumbcols = thumbcols          self.thumbcols = thumbcols
         self.authgroups = [s.strip().lower() for s in authgroups.split(',')]          self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
           try:
               # assume MetaDataFolder instance is called metadata 
               self.metadataService = getattr(self, 'metadata')
           except Exception, e:
               logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
   
         if RESPONSE is not None:          if RESPONSE is not None:
             RESPONSE.redirect('manage_main')              RESPONSE.redirect('manage_main')
                   

Removed from v.1.175.2.6  
changed lines
  Added in v.1.175.2.15


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>