Diff for /documentViewer/documentViewer.py between versions 1.175.2.5 and 1.175.2.24

version 1.175.2.5, 2011/07/19 18:46:35 version 1.175.2.24, 2011/08/12 14:41:39
Line 98  def browserCheck(self): Line 98  def browserCheck(self):
   
     return bt      return bt
   
 def getParentDir(path):  def getParentPath(path, cnt=1):
     """returns pathname shortened by one"""      """returns pathname shortened by cnt"""
     return '/'.join(path.split('/')[0:-1])      # make sure path doesn't end with /
               path = path.rstrip('/')
 def getBibdataFromDom(dom):      # split by /, shorten, and reassemble
     """returns dict with all elements from bib-tag"""      return '/'.join(path.split('/')[0:-cnt])
     bibinfo = {}  
     bib = dom.find(".//meta/bib")  
     if bib is not None:  
         # put type in @type  
         type = bib.get('type')  
         bibinfo['@type'] = type  
         # put all subelements in dict  
         for e in bib:  
             bibinfo[e.tag] = getText(e)  
                           
     return bibinfo  
   
 ##  ##
 ## documentViewer class  ## documentViewer class
Line 128  class documentViewer(Folder): Line 118  class documentViewer(Folder):
         {'label':'main config','action':'changeDocumentViewerForm'},          {'label':'main config','action':'changeDocumentViewerForm'},
         )          )
   
       metadataService = None
       """MetaDataFolder instance"""
   
     # templates and forms      # templates and forms
     viewer_main = PageTemplateFile('zpt/viewer_main', globals())      viewer_main = PageTemplateFile('zpt/viewer_main', globals())
     toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())      toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
Line 146  class documentViewer(Folder): Line 139  class documentViewer(Folder):
           
           
     thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())      thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
     security.declareProtected('View management screens','changeDocumentViewerForm')      
     changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())  
   
           
     def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):      def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
Line 170  class documentViewer(Folder): Line 161  class documentViewer(Folder):
             templateFolder._setObject('fulltextclient',textServer)              templateFolder._setObject('fulltextclient',textServer)
         except Exception, e:          except Exception, e:
             logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))              logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
               
         try:          try:
             from Products.zogiLib.zogiLib import zogiLib              from Products.zogiLib.zogiLib import zogiLib
             zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")              zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
Line 178  class documentViewer(Folder): Line 170  class documentViewer(Folder):
         except Exception, e:          except Exception, e:
             logging.error("Unable to create zogiLib for zogilib: "+str(e))              logging.error("Unable to create zogiLib for zogilib: "+str(e))
                   
           try:
               # assume MetaDataFolder instance is called metadata 
               self.metadataService = getattr(self, 'metadata')
           except Exception, e:
               logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
               
           if digilibBaseUrl is not None:
               self.digilibBaseUrl = digilibBaseUrl
               
                   
     # proxy text server methods to fulltextclient      # proxy text server methods to fulltextclient
     def getTextPage(self, **args):      def getTextPage(self, **args):
Line 208  class documentViewer(Folder): Line 209  class documentViewer(Folder):
         """get all gis places """          """get all gis places """
         return self.template.fulltextclient.getAllGisPlaces(**args)          return self.template.fulltextclient.getAllGisPlaces(**args)
                 
     def getTranslate(self, **args):      def getWordInfo(self, **args):
         """get translate"""          """get translate"""
         return self.template.fulltextclient.getTranslate(**args)          return self.template.fulltextclient.getWordInfo(**args)
   
     def getLemma(self, **args):      def getLemma(self, **args):
         """get lemma"""          """get lemma"""
Line 266  class documentViewer(Folder): Line 267  class documentViewer(Folder):
                                 
         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)          return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
       
     
     security.declareProtected('View','index_html')      security.declareProtected('View','index_html')
     def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):      def index_html(self,url,mode="texttool",viewMode="auto",viewType=None,tocMode="thumbs",start=1,pn=1):
         '''          """
         view it          view page
         @param mode: defines how to access the document behind url   
         @param url: url which contains display information          @param url: url which contains display information
         @param viewMode: if images display images, if text display text, default is auto (text,images or auto)          @param mode: defines how to access the document behind url 
           @param viewMode: 'images': display images, 'text': display text, default is 'auto'
           @param viewType: sub-type of viewMode, e.g. 'dict' for viewMode='text'
         @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)          @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
         @param characterNormalization type of text display (reg, norm, none)          """
         @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)  
         '''  
                   
         logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))          logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewType=%s start=%s pn=%s"%(mode,url,viewMode,viewType,start,pn))
                   
         if not hasattr(self, 'template'):          if not hasattr(self, 'template'):
             # this won't work              # this won't work
Line 294  class documentViewer(Folder): Line 295  class documentViewer(Folder):
             # get table of contents              # get table of contents
             docinfo = self.getToc(mode=tocMode, docinfo=docinfo)              docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
   
         # auto viewMode: text_dict if text else images          # auto viewMode: text if there is a text else images
         if viewMode=="auto":           if viewMode=="auto": 
             if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):               if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 
                 #texturl gesetzt und textViewer konfiguriert                  viewMode = "text"
                 viewMode="text_dict"                  viewType = "dict"
             else:              else:
                 viewMode="images"                  viewMode="images"
                                   
         pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, tocMode=tocMode)          elif viewMode == "text_dict":
               # legacy fix
               viewMode = "text"
               viewType = "dict"
                   
         if viewMode != 'images' and docinfo.get('textURLPath', None):          # stringify viewType
             # get full text page          if isinstance(viewType, list):
             page = self.getTextPage(mode=viewMode, pn=pn, docinfo=docinfo, pageinfo=pageinfo)              logging.debug("index_html: viewType is list:%s"%viewType)
             pageinfo['textPage'] = page              viewType = ','.join([t for t in viewType if t])
                           
           pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewType=viewType, tocMode=tocMode)
                       
           # get template /template/viewer_$viewMode
           pt = getattr(self.template, 'viewer_%s'%viewMode, None)
           if pt is None:
               logging.error("No template for viewMode=%s!"%viewMode)
               # TODO: error page?
               return "No template for viewMode=%s!"%viewMode
                           
         # get template /template/viewer_main  
         pt = getattr(self.template, 'viewer_main')  
         # and execute with parameters          # and execute with parameters
         return pt(docinfo=docinfo, pageinfo=pageinfo, viewMode=viewMode, mk=self.generateMarks(mk))          return pt(docinfo=docinfo, pageinfo=pageinfo)
       
     def generateMarks(self,mk):      def generateMarks(self,mk):
         ret=""          ret=""
Line 348  class documentViewer(Folder): Line 359  class documentViewer(Folder):
         else:          else:
             return style              return style
           
     def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):      def getParams(self, param=None, val=None, params=None, duplicates=None):
         """returns URL to documentviewer with parameter param set to val or from dict params"""          """returns dict with URL parameters.
           
           Takes URL parameters and additionally param=val or dict params.
           Deletes key if value is None."""
         # copy existing request params          # copy existing request params
         urlParams=self.REQUEST.form.copy()          newParams=self.REQUEST.form.copy()
         # change single param          # change single param
         if param is not None:          if param is not None:
             if val is None:              if val is None:
                 if urlParams.has_key(param):                  if newParams.has_key(param):
                     del urlParams[param]                      del newParams[param]
             else:              else:
                 urlParams[param] = str(val)                  newParams[param] = str(val)
                                   
         # change more params          # change more params
         if params is not None:          if params is not None:
             for k in params.keys():              for (k, v) in params.items():
                 v = params[k]  
                 if v is None:                  if v is None:
                     # val=None removes param                      # val=None removes param
                     if urlParams.has_key(k):                      if newParams.has_key(k):
                         del urlParams[k]                          del newParams[k]
                                                   
                 else:                  else:
                     urlParams[k] = v                      newParams[k] = v
   
         # FIXME: does this belong here?          if duplicates:
         if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath              # eliminate lists (coming from duplicate keys)
                 urlParams["mode"] = "imagepath"              for (k,v) in newParams.items():
                 urlParams["url"] = getParentDir(urlParams["url"])                  if isinstance(v, list):
                       if duplicates == 'comma':
                           # make comma-separated list of non-empty entries
                           newParams[k] = ','.join([t for t in v if t])
                       elif duplicates == 'first':
                           # take first non-empty entry
                           newParams[k] = [t for t in v if t][0]
                                   
           return newParams
       
       def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
           """returns URL to documentviewer with parameter param set to val or from dict params"""
           urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
         # quote values and assemble into query string (not escaping '/')          # quote values and assemble into query string (not escaping '/')
         ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])          ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()])
         #ps = urllib.urlencode(urlParams)  
         if baseUrl is None:          if baseUrl is None:
             baseUrl = self.REQUEST['URL1']              baseUrl = self.getDocumentViewerURL()
                           
         url = "%s?%s"%(baseUrl, ps)          url = "%s?%s"%(baseUrl, ps)
         return url          return url
   
       def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
     def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):  
         """link to documentviewer with parameter param set to val"""          """link to documentviewer with parameter param set to val"""
         return self.getLink(param, val, params, baseUrl, '&')          return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
       
           
     def getInfo_xml(self,url,mode):      def getInfo_xml(self,url,mode):
         """returns info about the document as XML"""          """returns info about the document as XML"""
   
         if not self.digilibBaseUrl:          if not self.digilibBaseUrl:
             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"              self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
                   
Line 401  class documentViewer(Folder): Line 423  class documentViewer(Folder):
         pt = getattr(self.template, 'info_xml')          pt = getattr(self.template, 'info_xml')
         return pt(docinfo=docinfo)          return pt(docinfo=docinfo)
   
     def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):  
         """returns new option state"""  
         if not self.REQUEST.SESSION.has_key(optionName):  
             # not in session -- initial  
             opt = {'lastState': newState, 'state': initialState}  
         else:  
             opt = self.REQUEST.SESSION.get(optionName)  
             if opt['lastState'] != newState:  
                 # state in session has changed -- toggle  
                 opt['state'] = not opt['state']  
                 opt['lastState'] = newState  
           
         self.REQUEST.SESSION[optionName] = opt  
         return opt['state']  
       
     def isAccessible(self, docinfo):      def isAccessible(self, docinfo):
         """returns if access to the resource is granted"""          """returns if access to the resource is granted"""
         access = docinfo.get('accessType', None)          access = docinfo.get('accessType', None)
         logging.debug("documentViewer (accessOK) access type %s"%access)          logging.debug("documentViewer (accessOK) access type %s"%access)
         if access is not None and access == 'free':          if access == 'free':
             logging.debug("documentViewer (accessOK) access is free")              logging.debug("documentViewer (accessOK) access is free")
             return True              return True
           
         elif access is None or access in self.authgroups:          elif access is None or access in self.authgroups:
             # only local access -- only logged in users              # only local access -- only logged in users
             user = getSecurityManager().getUser()              user = getSecurityManager().getUser()
Line 437  class documentViewer(Folder): Line 445  class documentViewer(Folder):
         return False          return False
           
                                   
     def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):  
         """gibt param von dlInfo aus"""  
         if docinfo is None:  
             docinfo = {}  
           
         for x in range(cut):  
             path=getParentDir(path)  
          
         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path  
       
         logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))  
           
         txt = getHttpData(infoUrl)  
         if txt is None:  
             raise IOError("Unable to get dir-info from %s"%(infoUrl))  
   
         dom = ET.fromstring(txt)  
         #dom = Parse(txt)  
         size=getText(dom.find("size"))  
         #sizes=dom.xpath("//dir/size")  
         logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)  
           
         if size:  
             docinfo['numPages'] = int(size)  
         else:  
             docinfo['numPages'] = 0  
               
         # TODO: produce and keep list of image names and numbers  
                                                   
       def getDocinfo(self, mode, url):
           """returns docinfo depending on mode"""
           logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
           # look for cached docinfo in session
           if self.REQUEST.SESSION.has_key('docinfo'):
               docinfo = self.REQUEST.SESSION['docinfo']
               # check if its still current
               if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
                   logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
         return docinfo          return docinfo
           
     def getIndexMetaPath(self,url):          # new docinfo
         """gib nur den Pfad zurueck"""          docinfo = {'mode': mode, 'url': url}
         regexp = re.compile(r".*(experimental|permanent)/(.*)")          # add self url
         regpath = regexp.match(url)          docinfo['viewerUrl'] = self.getDocumentViewerURL()
         if (regpath==None):          docinfo['digilibBaseUrl'] = self.digilibBaseUrl
             return ""          # get index.meta DOM
         logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))                      docUrl = None
         return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))          metaDom = None
                if mode=="texttool": 
                   # url points to document dir or index.meta
                   metaDom = self.metadataService.getDomFromPathOrUrl(url)
     def getIndexMetaUrl(self,url):              docUrl = url.replace('/index.meta', '')
         """returns utr  of index.meta document at url"""              if metaDom is None:
                         raise IOError("Unable to find index.meta for mode=texttool!")
         metaUrl = None  
         if url.startswith("http://"):  
             # real URL  
             metaUrl = url  
         else:  
             # online path  
             server=self.digilibBaseUrl+"/servlet/Texter?fn="  
             metaUrl=server+url.replace("/mpiwg/online","")  
             if not metaUrl.endswith("index.meta"):  
                 metaUrl += "/index.meta"  
           
         return metaUrl  
       
     def getDomFromIndexMeta(self, url):  
         """get dom from index meta"""  
         dom = None  
         metaUrl = self.getIndexMetaUrl(url)  
                                   
         logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)          elif mode=="imagepath":
         txt=getHttpData(metaUrl)              # url points to folder with images, index.meta optional
         if txt is None:              # asssume index.meta in parent dir
             raise IOError("Unable to read index meta from %s"%(url))              docUrl = getParentPath(url)
               metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
                   
         dom = ET.fromstring(txt)          elif mode=="filepath":
         #dom = Parse(txt)              # url points to image file, index.meta optional
         return dom              # asssume index.meta is two path segments up
               docUrl = getParentPath(url, 2)
               metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
           
     def getPresentationInfoXML(self, url):  
         """returns dom of info.xml document at url"""  
         dom = None  
         metaUrl = None  
         if url.startswith("http://"):  
             # real URL  
             metaUrl = url  
         else:          else:
             # online path              logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
             server=self.digilibBaseUrl+"/servlet/Texter?fn="              raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
             metaUrl=server+url.replace("/mpiwg/online","")  
           
         txt=getHttpData(metaUrl)  
         if txt is None:  
             raise IOError("Unable to read infoXMLfrom %s"%(url))  
               
         dom = ET.fromstring(txt)  
         #dom = Parse(txt)  
         return dom  
                           
           
     def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):  
         """gets authorization info from the index.meta file at path or given by dom"""  
         logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))  
                   
         access = None          docinfo['documentUrl'] = docUrl
           # process index.meta contents
           if metaDom is not None and metaDom.tag == 'resource':
               # document directory name and path
               resource = self.metadataService.getResourceData(dom=metaDom)
               if resource:
                   docinfo = self.getDocinfoFromResource(docinfo, resource)
   
               # texttool info
               texttool = self.metadataService.getTexttoolData(dom=metaDom)
               if texttool:
                   docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
               
               # bib info
               bib = self.metadataService.getBibData(dom=metaDom)
               if bib:
                   docinfo = self.getDocinfoFromBib(docinfo, bib)
               else:
                   # no bib - try info.xml
                   docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
                   
               # auth info
               access = self.metadataService.getAccessData(dom=metaDom)
               if access:
                   docinfo = self.getDocinfoFromAccess(docinfo, access)
   
               # attribution info
               attribution = self.metadataService.getAttributionData(dom=metaDom)
               if attribution:
                   logging.debug("getDocinfo: attribution=%s"%repr(attribution))
                   docinfo['attribution'] = attribution
                   #docinfo = self.getDocinfoFromAccess(docinfo, access)
   
               # copyright info
               copyright = self.metadataService.getCopyrightData(dom=metaDom)
               if copyright:
                   logging.debug("getDocinfo: copyright=%s"%repr(copyright))
                   docinfo['copyright'] = copyright
                   #docinfo = self.getDocinfoFromAccess(docinfo, access)
   
           # image path
           if mode != 'texttool':
               # override image path from texttool with url
               docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
                   
         if docinfo is None:  
             docinfo = {}  
                           
         if dom is None:  
             for x in range(cut):  
                 path=getParentDir(path)  
             dom = self.getDomFromIndexMeta(path)  
                 
         acc = dom.find(".//access-conditions/access")          # number of images from digilib
         if acc is not None:          if docinfo.get('imagePath', None):
             acctype = acc.get('type')              docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
             #acctype = dom.xpath("//access-conditions/access/@type")              docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
             if acctype:  
                 access=acctype  
                 if access in ['group', 'institution']:  
                     access = dom.find(".//access-conditions/access/name").text.lower()  
                           
         docinfo['accessType'] = access          logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
           #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
           # store in session
           self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo          return docinfo
           
       def getDocinfoFromResource(self, docinfo, resource):
           """reads contents of resource element into docinfo"""
           docName = resource.get('name', None)
           docinfo['documentName'] = docName
           docPath = resource.get('archive-path', None)
           if docPath:
               # clean up document path
               if docPath[0] != '/':
                   docPath = '/' + docPath
                   
               if docName and (not docPath.endswith(docName)):
                   docPath += "/" + docName
               
           else:
               # use docUrl as docPath
               docUrl = docinfo['documentURL']
               if not docUrl.startswith('http:'):
                   docPath = docUrl
           if docPath:
               # fix URLs starting with /mpiwg/online
               docPath = docPath.replace('/mpiwg/online', '', 1)
                   
     def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):          docinfo['documentPath'] = docPath
         """gets bibliographical info from the index.meta file at path or given by dom"""          return docinfo
         logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))  
           
         if docinfo is None:  
             docinfo = {}  
           
         if dom is None:  
             for x in range(cut):  
                 path=getParentDir(path)  
             dom = self.getDomFromIndexMeta(path)  
           
         docinfo['indexMetaPath']=self.getIndexMetaPath(path);  
           
         logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))  
         # put in all raw bib fields as dict "bib"  
         bib = getBibdataFromDom(dom)  
         docinfo['bib'] = bib  
           
         # extract some fields (author, title, year) according to their mapping  
         metaData=self.metadata.main.meta.bib  
         bibtype=bib.get("@type")  
         #bibtype=dom.xpath("//bib/@type")  
         if not bibtype:  
             bibtype="generic"  
               
         bibtype=bibtype.replace("-"," ") # wrong types in index meta "-" instead of " " (not wrong! ROC)  
         docinfo['bib_type'] = bibtype  
         bibmap=metaData.generateMappingForType(bibtype)  
         logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))  
         logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))  
         # if there is no mapping bibmap is empty (mapping sometimes has empty fields)  
         if len(bibmap) > 0 and bibmap.get('author',None) or bibmap.get('title',None):  
             try:  
                 docinfo['author']=bib.get(bibmap['author'][0])  
             except: pass  
             try:  
                 docinfo['title']=bib.get(bibmap['title'][0])  
             except: pass  
             try:  
                 docinfo['year']=bib.get(bibmap['year'][0])  
             except: pass  
               
             # ROC: why is this here?  
             #            logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)  
             #            try:  
             #                docinfo['lang']=getTextFromNode(dom.find(".//bib/lang")[0])  
             #            except:  
             #                docinfo['lang']=''  
             #            try:  
             #                docinfo['city']=getTextFromNode(dom.find(".//bib/city")[0])  
             #            except:  
             #                docinfo['city']=''  
             #            try:  
             #                docinfo['number_of_pages']=getTextFromNode(dom.find(".//bib/number_of_pages")[0])  
             #            except:  
             #                docinfo['number_of_pages']=''  
             #            try:  
             #                docinfo['series_volume']=getTextFromNode(dom.find(".//bib/series_volume")[0])  
             #            except:  
             #                docinfo['series_volume']=''  
             #            try:  
             #                docinfo['number_of_volumes']=getTextFromNode(dom.find(".//bib/number_of_volumes")[0])  
             #            except:  
             #                docinfo['number_of_volumes']=''  
             #            try:  
             #                docinfo['translator']=getTextFromNode(dom.find(".//bib/translator")[0])  
             #            except:  
             #                docinfo['translator']=''  
             #            try:  
             #                docinfo['edition']=getTextFromNode(dom.find(".//bib/edition")[0])  
             #            except:  
             #                docinfo['edition']=''  
             #            try:  
             #                docinfo['series_author']=getTextFromNode(dom.find(".//bib/series_author")[0])  
             #            except:  
             #                docinfo['series_author']=''  
             #            try:  
             #                docinfo['publisher']=getTextFromNode(dom.find(".//bib/publisher")[0])  
             #            except:  
             #                docinfo['publisher']=''  
             #            try:  
             #                docinfo['series_title']=getTextFromNode(dom.find(".//bib/series_title")[0])  
             #            except:  
             #                docinfo['series_title']=''  
             #            try:  
             #                docinfo['isbn_issn']=getTextFromNode(dom.find(".//bib/isbn_issn")[0])  
             #            except:  
             #                docinfo['isbn_issn']=''             
         return docinfo  
       
       
     # TODO: is this needed?  
     def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):  
         """gets name info from the index.meta file at path or given by dom"""  
         if docinfo is None:  
             docinfo = {}  
           
         if dom is None:  
             for x in range(cut):  
                 path=getParentDir(path)  
             dom = self.getDomFromIndexMeta(path)  
   
         docinfo['name']=getText(dom.find("name"))  
         logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])  
         return docinfo  
       
     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):  
         """parse texttool tag in index meta"""  
         logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))  
         if docinfo is None:  
            docinfo = {}  
         if docinfo.get('lang', None) is None:  
             docinfo['lang'] = '' # default keine Sprache gesetzt  
         if dom is None:  
             dom = self.getDomFromIndexMeta(url)  
           
         archivePath = None  
         archiveName = None  
       
         archiveName = getText(dom.find("name"))  
         if not archiveName:  
             logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))  
           
         archivePath = getText(dom.find("archive-path"))  
         if archivePath:  
             # clean up archive path  
             if archivePath[0] != '/':  
                 archivePath = '/' + archivePath  
             if archiveName and (not archivePath.endswith(archiveName)):  
                 archivePath += "/" + archiveName  
         else:  
             # try to get archive-path from url  
             logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))  
             if (not url.startswith('http')):  
                 archivePath = url.replace('index.meta', '')  
                   
         if archivePath is None:  
             # we balk without archive-path  
             raise IOError("Missing archive-path (for text-tool) in %s" % (url))  
           
         imageDir = getText(dom.find(".//texttool/image"))  
               
         if not imageDir:  
             # we balk with no image tag / not necessary anymore because textmode is now standard  
             #raise IOError("No text-tool info in %s"%(url))  
             imageDir = ""  
             #xquery="//pb"    
             docinfo['imagePath'] = "" # keine Bilder  
             docinfo['imageURL'] = ""  
                           
         if imageDir and archivePath:      def getDocinfoFromTexttool(self, docinfo, texttool):
           """reads contents of texttool element into docinfo"""
           # image dir
           imageDir = texttool.get('image', None)
           docPath = docinfo.get('documentPath', None)
           if imageDir and docPath:
             #print "image: ", imageDir, " archivepath: ", archivePath              #print "image: ", imageDir, " archivepath: ", archivePath
             imageDir = os.path.join(archivePath, imageDir)              imageDir = os.path.join(docPath, imageDir)
             imageDir = imageDir.replace("/mpiwg/online", '')              imageDir = imageDir.replace('/mpiwg/online', '', 1)
             docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)  
             docinfo['imagePath'] = imageDir              docinfo['imagePath'] = imageDir
                           
             docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir  
               
         viewerUrl = getText(dom.find(".//texttool/digiliburlprefix"))  
         if viewerUrl:  
             docinfo['viewerURL'] = viewerUrl  
           
         # old style text URL          # old style text URL
         textUrl = getText(dom.find(".//texttool/text"))          textUrl = texttool.get('text', None)
         if textUrl:          if textUrl and docPath:
             if urlparse.urlparse(textUrl)[0] == "": #keine url              if urlparse.urlparse(textUrl)[0] == "": #keine url
                 textUrl = os.path.join(archivePath, textUrl)                   textUrl = os.path.join(docPath, textUrl) 
             # fix URLs starting with /mpiwg/online  
             if textUrl.startswith("/mpiwg/online"):  
                 textUrl = textUrl.replace("/mpiwg/online", '', 1)  
                           
             docinfo['textURL'] = textUrl              docinfo['textURL'] = textUrl
           
         # new style text-url-path          # new style text-url-path
         textUrl = getText(dom.find(".//texttool/text-url-path"))          textUrl = texttool.get('text-url-path', None)
         if textUrl:          if textUrl:
             docinfo['textURLPath'] = textUrl              docinfo['textURLPath'] = textUrl
             textUrlkurz = string.split(textUrl, ".")[0]  
             docinfo['textURLPathkurz'] = textUrlkurz  
             #if not docinfo['imagePath']:  
                 # text-only, no page images  
                 #docinfo = self.getNumTextPages(docinfo)  
                                       
           # page flow
           docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
                     
         presentationUrl = getText(dom.find(".//texttool/presentation"))          # odd pages are left
         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag          docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
         # TODO: is this needed here?  
         docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)  
                   
           # number of title page (0: not defined)
           docinfo['titlePage'] = texttool.get('title-scan-no', 0)
                   
         if presentationUrl: # ueberschreibe diese durch presentation informationen           # old presentation stuff
              # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten          presentation = texttool.get('presentation', None)
              # durch den relativen Pfad auf die presentation infos          if presentation and docPath:
             presentationPath = presentationUrl              if presentation.startswith('http:'):
             if url.endswith("index.meta"):                   docinfo['presentationUrl'] = presentation
                 presentationUrl = url.replace('index.meta', presentationPath)  
             else:              else:
                 presentationUrl = url + "/" + presentationPath                  docinfo['presentationUrl'] = os.path.join(docPath, presentation)
                                   
             docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)  
           
         docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info          return docinfo
                   
       def getDocinfoFromBib(self, docinfo, bib):
           """reads contents of bib element into docinfo"""
           logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
           # put all raw bib fields in dict "bib"
           docinfo['bib'] = bib
           bibtype = bib.get('@type', None)
           docinfo['bibType'] = bibtype
           # also store DC metadata for convenience
           dc = self.metadataService.getDCMappedData(bib)
           docinfo['creator'] = dc.get('creator',None)
           docinfo['title'] = dc.get('title',None)
           docinfo['date'] = dc.get('date',None)
         return docinfo          return docinfo
         
       def getDocinfoFromAccess(self, docinfo, acc):
           """reads contents of access element into docinfo"""
           #TODO: also read resource type
           logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
           try:
               acctype = acc['@attr']['type']
               if acctype:
                   access=acctype
                   if access in ['group', 'institution']:
                       access = acc['name'].lower()
                   
                   docinfo['accessType'] = access
   
           except:
               pass
         
     def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):  
         """gets the bibliographical information from the preseantion entry in texttools  
         """  
         dom=self.getPresentationInfoXML(url)  
         docinfo['author']=getText(dom.find(".//author"))  
         docinfo['title']=getText(dom.find(".//title"))  
         docinfo['year']=getText(dom.find(".//date"))  
         return docinfo          return docinfo
           
     def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):      def getDocinfoFromDigilib(self, docinfo, path):
         """path ist the path to the images it assumes that the index.meta file is one level higher."""          infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
         logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))          # fetch data
         if docinfo is None:          txt = getHttpData(infoUrl)
             docinfo = {}          if not txt:
         path=path.replace("/mpiwg/online","")              logging.error("Unable to get dir-info from %s"%(infoUrl))
         docinfo['imagePath'] = path  
         docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)  
           
         pathorig=path  
         for x in range(cut):         
                 path=getParentDir(path)  
         logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)  
         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path  
         docinfo['imageURL'] = imageUrl  
           
         #path ist the path to the images it assumes that the index.meta file is one level higher.  
         docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)  
         docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)  
         return docinfo          return docinfo
           
           dom = ET.fromstring(txt)
           size = getText(dom.find("size"))
           logging.debug("getDocinfoFromDigilib: size=%s"%size)
           if size:
               docinfo['numPages'] = int(size)
           else:
               docinfo['numPages'] = 0
           
     def getDocinfo(self, mode, url):          # TODO: produce and keep list of image names and numbers
         """returns docinfo depending on mode"""  
         logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))  
         # look for cached docinfo in session  
         if self.REQUEST.SESSION.has_key('docinfo'):  
             docinfo = self.REQUEST.SESSION['docinfo']  
             # check if its still current  
             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:  
                 logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())  
                 return docinfo                  return docinfo
                           
         # new docinfo              
         docinfo = {'mode': mode, 'url': url}      def getDocinfoFromPresentationInfoXml(self,docinfo):
         # add self url          """gets DC-like bibliographical information from the presentation entry in texttools"""
         docinfo['viewerUrl'] = self.getDocumentViewerURL()          url = docinfo.get('presentationUrl', None)
         if mode=="texttool":           if not url:
             # index.meta with texttool information              logging.error("getDocinfoFromPresentation: no URL!")
             docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)              return docinfo
         elif mode=="imagepath":          
             # folder with images, index.meta optional          dom = None
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)          metaUrl = None
         elif mode=="filepath":          if url.startswith("http://"):
             # filename              # real URL
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)              metaUrl = url
         else:          else:
             logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)              # online path
             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))  
                                   
         # FIXME: fake texturlpath               server=self.digilibBaseUrl+"/servlet/Texter?fn="
         if not docinfo.has_key('textURLPath'):              metaUrl=server+url
             docinfo['textURLPath'] = None  
                   
         logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())          txt=getHttpData(metaUrl)
         #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)          if txt is None:
         self.REQUEST.SESSION['docinfo'] = docinfo              logging.error("Unable to read info.xml from %s"%(url))
               return docinfo
               
           dom = ET.fromstring(txt)
           docinfo['creator']=getText(dom.find(".//author"))
           docinfo['title']=getText(dom.find(".//title"))
           docinfo['date']=getText(dom.find(".//date"))
         return docinfo          return docinfo
                                 
     def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):  
       def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewType=None, tocMode=None):
         """returns pageinfo with the given parameters"""          """returns pageinfo with the given parameters"""
           logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewType=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewType,tocMode))
         pageinfo = {}          pageinfo = {}
         current = getInt(current)          pageinfo['viewMode'] = viewMode
           pageinfo['viewType'] = viewType
           pageinfo['tocMode'] = tocMode
           
           current = getInt(current)
         pageinfo['current'] = current          pageinfo['current'] = current
         rows = int(rows or self.thumbrows)          rows = int(rows or self.thumbrows)
         pageinfo['rows'] = rows          pageinfo['rows'] = rows
Line 845  class documentViewer(Folder): Line 713  class documentViewer(Folder):
         pageinfo['cols'] = cols          pageinfo['cols'] = cols
         grpsize = cols * rows          grpsize = cols * rows
         pageinfo['groupsize'] = grpsize          pageinfo['groupsize'] = grpsize
           # is start is empty use one around current
         start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))          start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
         # int(current / grpsize) * grpsize +1))          # int(current / grpsize) * grpsize +1))
         pageinfo['start'] = start          pageinfo['start'] = start
         pageinfo['end'] = start + grpsize          pn = self.REQUEST.get('pn','1')
         if (docinfo is not None) and ('numPages' in docinfo):          pageinfo['pn'] = pn
             np = int(docinfo['numPages'])          np = int(docinfo.get('numPages', 0))
             pageinfo['end'] = min(pageinfo['end'], np)          if np == 0:
               # numPages unknown - maybe we can get it from text page
               if docinfo.get('textURLPath', None):
                   # cache text page as well
                   pageinfo['textPage'] = self.getTextPage(mode=viewType, pn=pn, docinfo=docinfo, pageinfo=pageinfo)
                   np = int(docinfo.get('numPages', 0))
                   
             pageinfo['numgroups'] = int(np / grpsize)              pageinfo['numgroups'] = int(np / grpsize)
             if np % grpsize > 0:              if np % grpsize > 0:
                 pageinfo['numgroups'] += 1                          pageinfo['numgroups'] += 1        
         pageinfo['viewMode'] = viewMode  
         pageinfo['tocMode'] = tocMode          pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
           oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
           # add zeroth page for two columns
           pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
           pageinfo['pageZero'] = pageZero
           pageinfo['pageList'] = self.getPageList(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
                   
         pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')          pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
         #pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1')  
         pageinfo['query'] = self.REQUEST.get('query','')           pageinfo['query'] = self.REQUEST.get('query','') 
         pageinfo['queryType'] = self.REQUEST.get('queryType','')          pageinfo['queryType'] = self.REQUEST.get('queryType','')
         pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')          pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
         pageinfo['textPN'] = self.REQUEST.get('textPN','1')  
         pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')          pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
         pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')          pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
         pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')          pageinfo['queryPageSize'] = getInt(self.REQUEST.get('queryPageSize', 10))
         pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')               pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1'))
         toc = int (pageinfo['tocPN'])          pageinfo['searchPN'] = getInt(self.REQUEST.get('searchPN','1'))
         pageinfo['textPages'] =int (toc)  
                   
           # limit tocPN
         if 'tocSize_%s'%tocMode in docinfo:          if 'tocSize_%s'%tocMode in docinfo:
             tocSize = int(docinfo['tocSize_%s'%tocMode])              tocSize = docinfo['tocSize_%s'%tocMode]
             tocPageSize = int(pageinfo['tocPageSize'])              tocPageSize = pageinfo['tocPageSize']
             # cached toc                         # cached toc           
             if tocSize%tocPageSize>0:              if tocSize%tocPageSize>0:
                 tocPages=tocSize/tocPageSize+1                  tocPages=tocSize/tocPageSize+1
             else:              else:
                 tocPages=tocSize/tocPageSize                  tocPages=tocSize/tocPageSize
             pageinfo['tocPN'] = min (tocPages,toc)                                      
         pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')              pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN'])
         pageinfo['sn'] =self.REQUEST.get('sn','')              
         return pageinfo          return pageinfo
           
   
       def getPageList(self, start=None, rows=None, cols=None, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
           """returns array of page informations for one screenfull of thumbnails"""
           if maxIdx == 0:
               maxIdx = start + rows * cols
   
           pages = []
           if pageZero and start == 1:
               # correct beginning
               idx = 0
           else:
               idx = start
               
           for r in range(rows):
               row = []
               for c in range(cols):
                   if idx < minIdx or idx > maxIdx:
                       page = {'idx':None}
                   else:
                       page = {'idx':idx}
                       
                   idx += 1
                   if pageFlowLtr:
                       row.append(page)
                   else:
                       row.insert(0, page) 
                   
               pages.append(row)
               
           logging.debug("getPageList returns=%s"%(pages))
           return pages
           
   
       security.declareProtected('View management screens','changeDocumentViewerForm')    
       changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
       
 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):  def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
         """init document viewer"""          """init document viewer"""
         self.title=title          self.title=title
Line 890  def changeDocumentViewer(self,title="",d Line 805  def changeDocumentViewer(self,title="",d
         self.thumbrows = thumbrows          self.thumbrows = thumbrows
         self.thumbcols = thumbcols          self.thumbcols = thumbcols
         self.authgroups = [s.strip().lower() for s in authgroups.split(',')]          self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
           try:
               # assume MetaDataFolder instance is called metadata 
               self.metadataService = getattr(self, 'metadata')
           except Exception, e:
               logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
   
         if RESPONSE is not None:          if RESPONSE is not None:
             RESPONSE.redirect('manage_main')              RESPONSE.redirect('manage_main')
                   

Removed from v.1.175.2.5  
changed lines
  Added in v.1.175.2.24


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>