documentViewer: documentViewer.py comparison

comparison documentViewer.py @ 465:224aad394350 elementtree

really works with new getDocinfo

author	casties
date	Fri, 29 Jul 2011 20:36:04 +0200
parents	19bd41d95f62
children	1641be8dc6b5

comparison

equal deleted inserted replaced

-:19bd41d95f62
+:224aad394350
 viewMode="text"
 else:
 viewMode="images"
 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
 security.declareProtected('View','index_html')
 def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
 """
 view it
 docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
 # auto viewMode: text_dict if text else images
 if viewMode=="auto":
 if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
-#texturl gesetzt und textViewer konfiguriert
 viewMode="text_dict"
 else:
 viewMode="images"
 pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, tocMode=tocMode)
 """link to documentviewer with parameter param set to val"""
 return self.getLink(param, val, params, baseUrl, '&amp;')
 def getInfo_xml(self,url,mode):
 """returns info about the document as XML"""
 if not self.digilibBaseUrl:
 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
 docinfo = self.getDocinfo(mode=mode,url=url)
 pt = getattr(self.template, 'info_xml')
 return pt(docinfo=docinfo)
-def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
-"""returns new option state"""
-if not self.REQUEST.SESSION.has_key(optionName):
-# not in session -- initial
-opt = {'lastState': newState, 'state': initialState}
-else:
-opt = self.REQUEST.SESSION.get(optionName)
-if opt['lastState'] != newState:
-# state in session has changed -- toggle
-opt['state'] = not opt['state']
-opt['lastState'] = newState
-self.REQUEST.SESSION[optionName] = opt
-return opt['state']
 def isAccessible(self, docinfo):
 """returns if access to the resource is granted"""
 access = docinfo.get('accessType', None)
 logging.debug("documentViewer (accessOK) access type %s"%access)
-if access is not None and access == 'free':
+if access == 'free':
 logging.debug("documentViewer (accessOK) access is free")
 return True
 elif access is None or access in self.authgroups:
 # only local access -- only logged in users
 user = getSecurityManager().getUser()
 logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
 if user is not None:
 return False
 logging.error("documentViewer (accessOK) unknown access type %s"%access)
 return False
-def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
-"""gibt param von dlInfo aus"""
-if docinfo is None:
-docinfo = {}
-for x in range(cut):
-path=getParentPath(path)
-infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
-logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
-txt = getHttpData(infoUrl)
-if txt is None:
-raise IOError("Unable to get dir-info from %s"%(infoUrl))
-dom = ET.fromstring(txt)
-#dom = Parse(txt)
-size=getText(dom.find("size"))
-#sizes=dom.xpath("//dir/size")
-logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)
-if size:
-docinfo['numPages'] = int(size)
-else:
-docinfo['numPages'] = 0
-# TODO: produce and keep list of image names and numbers
-return docinfo
-def getIndexMetaPath(self,url):
-"""gib nur den Pfad zurueck"""
-regexp = re.compile(r".*(experimental|permanent)/(.*)")
-regpath = regexp.match(url)
-if (regpath==None):
-return ""
-logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
-return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
-def getIndexMetaUrl(self,url):
-"""returns utr  of index.meta document at url"""
-metaUrl = None
-if url.startswith("http://"):
-# real URL
-metaUrl = url
-else:
-# online path
-server=self.digilibBaseUrl+"/servlet/Texter?fn="
-metaUrl=server+url.replace("/mpiwg/online","")
-if not metaUrl.endswith("index.meta"):
-metaUrl += "/index.meta"
-return metaUrl
-def getDomFromIndexMeta(self, url):
-"""get dom from index meta"""
-dom = None
-metaUrl = self.getIndexMetaUrl(url)
-logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
-txt=getHttpData(metaUrl)
-if txt is None:
-raise IOError("Unable to read index meta from %s"%(url))
-dom = ET.fromstring(txt)
-#dom = Parse(txt)
-return dom
-def getPresentationInfoXML(self, url):
-"""returns dom of info.xml document at url"""
-dom = None
-metaUrl = None
-if url.startswith("http://"):
-# real URL
-metaUrl = url
-else:
-# online path
-server=self.digilibBaseUrl+"/servlet/Texter?fn="
-metaUrl=server+url.replace("/mpiwg/online","")
-txt=getHttpData(metaUrl)
-if txt is None:
-raise IOError("Unable to read infoXMLfrom %s"%(url))
-dom = ET.fromstring(txt)
-#dom = Parse(txt)
-return dom
-def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
-"""gets authorization info from the index.meta file at path or given by dom"""
-logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
-access = None
-if docinfo is None:
-docinfo = {}
-if dom is None:
-for x in range(cut):
-path=getParentPath(path)
-dom = self.getDomFromIndexMeta(path)
-acc = dom.find(".//access-conditions/access")
-if acc is not None:
-acctype = acc.get('type')
-#acctype = dom.xpath("//access-conditions/access/@type")
-if acctype:
-access=acctype
-if access in ['group', 'institution']:
-access = dom.find(".//access-conditions/access/name").text.lower()
-docinfo['accessType'] = access
-return docinfo
-def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
-"""gets bibliographical info from the index.meta file at path or given by dom"""
-logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
-if docinfo is None:
-docinfo = {}
-if dom is None:
-for x in range(cut):
-path=getParentDir(path)
-dom = self.getDomFromIndexMeta(path)
-docinfo['indexMetaPath']=self.getIndexMetaPath(path);
-logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
-if self.metadataService is not None:
-# put all raw bib fields in dict "bib"
-bib = self.metadataService.getBibData(dom=dom)
-docinfo['bib'] = bib
-bibtype = bib.get('@type', None)
-docinfo['bib_type'] = bibtype
-# also store DC metadata for convenience
-dc = self.metadataService.getDCMappedData(bib)
-docinfo['creator'] = dc.get('creator',None)
-docinfo['title'] = dc.get('title',None)
-docinfo['date'] = dc.get('date',None)
-else:
-logging.error("MetadataService not found!")
-return docinfo
-# TODO: is this needed?
-def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
-"""gets name info from the index.meta file at path or given by dom"""
-if docinfo is None:
-docinfo = {}
-if dom is None:
-for x in range(cut):
-path=getParentPath(path)
-dom = self.getDomFromIndexMeta(path)
-docinfo['name']=getText(dom.find("name"))
-logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
-return docinfo
-def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
-"""parse texttool tag in index meta"""
-logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
-if docinfo is None:
-docinfo = {}
-if docinfo.get('lang', None) is None:
-docinfo['lang'] = '' # default keine Sprache gesetzt
-if dom is None:
-dom = self.getDomFromIndexMeta(url)
-texttool = self.metadata.getTexttoolData(dom=dom)
-archivePath = None
-archiveName = None
-archiveName = getText(dom.find("name"))
-if not archiveName:
-logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
-archivePath = getText(dom.find("archive-path"))
-if archivePath:
-# clean up archive path
-if archivePath[0] != '/':
-archivePath = '/' + archivePath
-if archiveName and (not archivePath.endswith(archiveName)):
-archivePath += "/" + archiveName
-else:
-# try to get archive-path from url
-logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
-if (not url.startswith('http')):
-archivePath = url.replace('index.meta', '')
-if archivePath is None:
-# we balk without archive-path
-raise IOError("Missing archive-path (for text-tool) in %s" % (url))
-imageDir = texttool.get('image', None)
-if not imageDir:
-# we balk with no image tag / not necessary anymore because textmode is now standard
-#raise IOError("No text-tool info in %s"%(url))
-imageDir = ""
-#xquery="//pb"
-docinfo['imagePath'] = "" # keine Bilder
-docinfo['imageURL'] = ""
-if imageDir and archivePath:
-#print "image: ", imageDir, " archivepath: ", archivePath
-imageDir = os.path.join(archivePath, imageDir)
-imageDir = imageDir.replace("/mpiwg/online", '')
-docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
-docinfo['imagePath'] = imageDir
-docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
-viewerUrl = texttool.get('digiliburlprefix', None)
-if viewerUrl:
-docinfo['viewerURL'] = viewerUrl
-# old style text URL
-textUrl = texttool.get('text', None)
-if textUrl:
-if urlparse.urlparse(textUrl)[0] == "": #keine url
-textUrl = os.path.join(archivePath, textUrl)
-# fix URLs starting with /mpiwg/online
-if textUrl.startswith("/mpiwg/online"):
-textUrl = textUrl.replace("/mpiwg/online", '', 1)
-docinfo['textURL'] = textUrl
-# new style text-url-path
-textUrl = texttool.get('text-url-path', None)
-if textUrl:
-docinfo['textURLPath'] = textUrl
-textUrlkurz = string.split(textUrl, ".")[0]
-docinfo['textURLPathkurz'] = textUrlkurz
-#if not docinfo['imagePath']:
-# text-only, no page images
-#docinfo = self.getNumTextPages(docinfo)
-# get bib info
-docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
-# TODO: is this needed here?
-docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
-# TODO: what to do with presentation?
-presentationUrl = texttool.get('presentation', None)
-if presentationUrl: # ueberschreibe diese durch presentation informationen
-# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
-# durch den relativen Pfad auf die presentation infos
-presentationPath = presentationUrl
-if url.endswith("index.meta"):
-presentationUrl = url.replace('index.meta', presentationPath)
-else:
-presentationUrl = url + "/" + presentationPath
-docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
-# get authorization
-docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info
-return docinfo
-def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
-"""gets the bibliographical information from the preseantion entry in texttools
-"""
-dom=self.getPresentationInfoXML(url)
-docinfo['author']=getText(dom.find(".//author"))
-docinfo['title']=getText(dom.find(".//title"))
-docinfo['year']=getText(dom.find(".//date"))
-return docinfo
-def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
-"""path ist the path to the images it assumes that the index.meta file is one level higher."""
-logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
-if docinfo is None:
-docinfo = {}
-path=path.replace("/mpiwg/online","")
-docinfo['imagePath'] = path
-docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
-pathorig=path
-for x in range(cut):
-path=getParentPath(path)
-logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
-imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
-docinfo['imageURL'] = imageUrl
-#TODO: use getDocinfoFromIndexMeta
-#path ist the path to the images it assumes that the index.meta file is one level higher.
-docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
-docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
-return docinfo
-def OLDgetDocinfo(self, mode, url):
-"""returns docinfo depending on mode"""
-logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
-# look for cached docinfo in session
-if self.REQUEST.SESSION.has_key('docinfo'):
-docinfo = self.REQUEST.SESSION['docinfo']
-# check if its still current
-if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
-logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())
-return docinfo
-# new docinfo
-docinfo = {'mode': mode, 'url': url}
-# add self url
-docinfo['viewerUrl'] = self.getDocumentViewerURL()
-if mode=="texttool":
-# index.meta with texttool information
-docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
-elif mode=="imagepath":
-# folder with images, index.meta optional
-docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
-elif mode=="filepath":
-# filename
-docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
-else:
-logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
-raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
-logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
-#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
-# store in session
-self.REQUEST.SESSION['docinfo'] = docinfo
-return docinfo
 def getDocinfo(self, mode, url):
 """returns docinfo depending on mode"""
 logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
 # bib info
 bib = self.metadataService.getBibData(dom=metaDom)
 if bib:
 docinfo = self.getDocinfoFromBib(docinfo, bib)
+else:
+# no bib - try info.xml
+docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
 # auth info
 access = self.metadataService.getAccessData(dom=metaDom)
 if access:
 docinfo = self.getDocinfoFromAccess(docinfo, access)
 # image path
 if mode != 'texttool':
 # override image path from texttool
-docinfo['imagePath'] = url
+docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
 # number of images from digilib
 if docinfo.get('imagePath', None):
 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
 docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
 else:
 # use docUrl as docPath
 docUrl = docinfo['documentURL']
 if not docUrl.startswith('http:'):
 docPath = docUrl
+if docPath:
+# fix URLs starting with /mpiwg/online
+docPath = docPath.replace('/mpiwg/online', '', 1)
 docinfo['documentPath'] = docPath
 return docinfo
 def getDocinfoFromTexttool(self, docinfo, texttool):
 """reads contents of texttool element into docinfo"""
 # old style text URL
 textUrl = texttool.get('text', None)
 if textUrl and docPath:
 if urlparse.urlparse(textUrl)[0] == "": #keine url
 textUrl = os.path.join(docPath, textUrl)
-# fix URLs starting with /mpiwg/online
-textUrl = textUrl.replace('/mpiwg/online', '', 1)
 docinfo['textURL'] = textUrl
 # new style text-url-path
 textUrl = texttool.get('text-url-path', None)
 #docinfo['textURLPathkurz'] = textUrlkurz
 # old presentation stuff
 presentation = texttool.get('presentation', None)
 if presentation and docPath:
-docinfo['presentationPath'] = os.path.join(docPath, presentation)
+if presentation.startswith('http:'):
+docinfo['presentationUrl'] = presentation
+else:
+docinfo['presentationUrl'] = os.path.join(docPath, presentation)
 return docinfo
 def getDocinfoFromBib(self, docinfo, bib):
 """reads contents of bib element into docinfo"""
+logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
 # put all raw bib fields in dict "bib"
 docinfo['bib'] = bib
 bibtype = bib.get('@type', None)
 docinfo['bibType'] = bibtype
 # also store DC metadata for convenience
 return docinfo
 def getDocinfoFromAccess(self, docinfo, acc):
 """reads contents of access element into docinfo"""
 #TODO: also read resource type
+logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
 try:
-acctype = accc['@attr']['type']
+acctype = acc['@attr']['type']
 if acctype:
 access=acctype
 if access in ['group', 'institution']:
 access = acc['name'].lower()
 # TODO: produce and keep list of image names and numbers
 return docinfo
+def getDocinfoFromPresentationInfoXml(self,docinfo):
+"""gets DC-like bibliographical information from the presentation entry in texttools"""
+url = docinfo.get('presentationUrl', None)
+if not url:
+logging.error("getDocinfoFromPresentation: no URL!")
+return docinfo
+dom = None
+metaUrl = None
+if url.startswith("http://"):
+# real URL
+metaUrl = url
+else:
+# online path
+server=self.digilibBaseUrl+"/servlet/Texter?fn="
+metaUrl=server+url
+txt=getHttpData(metaUrl)
+if txt is None:
+logging.error("Unable to read info.xml from %s"%(url))
+return docinfo
+dom = ET.fromstring(txt)
+docinfo['creator']=getText(dom.find(".//author"))
+docinfo['title']=getText(dom.find(".//title"))
+docinfo['date']=getText(dom.find(".//date"))
+return docinfo
 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
 """returns pageinfo with the given parameters"""
 pageinfo = {}
 current = getInt(current)

Mercurial > hg > documentViewer

comparison documentViewer.py @ 465:224aad394350 elementtree