|
|
| version 1.175.2.11, 2011/07/29 16:27:24 | version 1.175.2.12, 2011/07/29 18:36:04 |
|---|---|
| Line 264 class documentViewer(Folder): | Line 264 class documentViewer(Folder): |
| return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) | return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) |
| security.declareProtected('View','index_html') | security.declareProtected('View','index_html') |
| def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): | def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): |
| """ | """ |
| Line 293 class documentViewer(Folder): | Line 294 class documentViewer(Folder): |
| # auto viewMode: text_dict if text else images | # auto viewMode: text_dict if text else images |
| if viewMode=="auto": | if viewMode=="auto": |
| if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): | if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): |
| #texturl gesetzt und textViewer konfiguriert | |
| viewMode="text_dict" | viewMode="text_dict" |
| else: | else: |
| viewMode="images" | viewMode="images" |
| Line 389 class documentViewer(Folder): | Line 389 class documentViewer(Folder): |
| def getInfo_xml(self,url,mode): | def getInfo_xml(self,url,mode): |
| """returns info about the document as XML""" | """returns info about the document as XML""" |
| if not self.digilibBaseUrl: | if not self.digilibBaseUrl: |
| self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" | self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" |
| Line 397 class documentViewer(Folder): | Line 396 class documentViewer(Folder): |
| pt = getattr(self.template, 'info_xml') | pt = getattr(self.template, 'info_xml') |
| return pt(docinfo=docinfo) | return pt(docinfo=docinfo) |
| def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True): | |
| """returns new option state""" | |
| if not self.REQUEST.SESSION.has_key(optionName): | |
| # not in session -- initial | |
| opt = {'lastState': newState, 'state': initialState} | |
| else: | |
| opt = self.REQUEST.SESSION.get(optionName) | |
| if opt['lastState'] != newState: | |
| # state in session has changed -- toggle | |
| opt['state'] = not opt['state'] | |
| opt['lastState'] = newState | |
| self.REQUEST.SESSION[optionName] = opt | |
| return opt['state'] | |
| def isAccessible(self, docinfo): | def isAccessible(self, docinfo): |
| """returns if access to the resource is granted""" | """returns if access to the resource is granted""" |
| access = docinfo.get('accessType', None) | access = docinfo.get('accessType', None) |
| logging.debug("documentViewer (accessOK) access type %s"%access) | logging.debug("documentViewer (accessOK) access type %s"%access) |
| if access is not None and access == 'free': | if access == 'free': |
| logging.debug("documentViewer (accessOK) access is free") | logging.debug("documentViewer (accessOK) access is free") |
| return True | return True |
| elif access is None or access in self.authgroups: | elif access is None or access in self.authgroups: |
| # only local access -- only logged in users | # only local access -- only logged in users |
| user = getSecurityManager().getUser() | user = getSecurityManager().getUser() |
| Line 433 class documentViewer(Folder): | Line 418 class documentViewer(Folder): |
| return False | return False |
| def getDirinfoFromDigilib(self,path,docinfo=None,cut=0): | |
| """gibt param von dlInfo aus""" | |
| if docinfo is None: | |
| docinfo = {} | |
| for x in range(cut): | |
| path=getParentPath(path) | |
| infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path | |
| logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) | |
| txt = getHttpData(infoUrl) | |
| if txt is None: | |
| raise IOError("Unable to get dir-info from %s"%(infoUrl)) | |
| dom = ET.fromstring(txt) | |
| #dom = Parse(txt) | |
| size=getText(dom.find("size")) | |
| #sizes=dom.xpath("//dir/size") | |
| logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size) | |
| if size: | |
| docinfo['numPages'] = int(size) | |
| else: | |
| docinfo['numPages'] = 0 | |
| # TODO: produce and keep list of image names and numbers | |
| return docinfo | |
| def getIndexMetaPath(self,url): | |
| """gib nur den Pfad zurueck""" | |
| regexp = re.compile(r".*(experimental|permanent)/(.*)") | |
| regpath = regexp.match(url) | |
| if (regpath==None): | |
| return "" | |
| logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2)) | |
| return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2)) | |
| def getIndexMetaUrl(self,url): | |
| """returns utr of index.meta document at url""" | |
| metaUrl = None | |
| if url.startswith("http://"): | |
| # real URL | |
| metaUrl = url | |
| else: | |
| # online path | |
| server=self.digilibBaseUrl+"/servlet/Texter?fn=" | |
| metaUrl=server+url.replace("/mpiwg/online","") | |
| if not metaUrl.endswith("index.meta"): | |
| metaUrl += "/index.meta" | |
| return metaUrl | |
| def getDomFromIndexMeta(self, url): | |
| """get dom from index meta""" | |
| dom = None | |
| metaUrl = self.getIndexMetaUrl(url) | |
| logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl) | |
| txt=getHttpData(metaUrl) | |
| if txt is None: | |
| raise IOError("Unable to read index meta from %s"%(url)) | |
| dom = ET.fromstring(txt) | |
| #dom = Parse(txt) | |
| return dom | |
| def getPresentationInfoXML(self, url): | |
| """returns dom of info.xml document at url""" | |
| dom = None | |
| metaUrl = None | |
| if url.startswith("http://"): | |
| # real URL | |
| metaUrl = url | |
| else: | |
| # online path | |
| server=self.digilibBaseUrl+"/servlet/Texter?fn=" | |
| metaUrl=server+url.replace("/mpiwg/online","") | |
| txt=getHttpData(metaUrl) | |
| if txt is None: | |
| raise IOError("Unable to read infoXMLfrom %s"%(url)) | |
| dom = ET.fromstring(txt) | |
| #dom = Parse(txt) | |
| return dom | |
| def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): | |
| """gets authorization info from the index.meta file at path or given by dom""" | |
| logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path)) | |
| access = None | |
| if docinfo is None: | |
| docinfo = {} | |
| if dom is None: | |
| for x in range(cut): | |
| path=getParentPath(path) | |
| dom = self.getDomFromIndexMeta(path) | |
| acc = dom.find(".//access-conditions/access") | |
| if acc is not None: | |
| acctype = acc.get('type') | |
| #acctype = dom.xpath("//access-conditions/access/@type") | |
| if acctype: | |
| access=acctype | |
| if access in ['group', 'institution']: | |
| access = dom.find(".//access-conditions/access/name").text.lower() | |
| docinfo['accessType'] = access | |
| return docinfo | |
| def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): | |
| """gets bibliographical info from the index.meta file at path or given by dom""" | |
| logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path)) | |
| if docinfo is None: | |
| docinfo = {} | |
| if dom is None: | |
| for x in range(cut): | |
| path=getParentDir(path) | |
| dom = self.getDomFromIndexMeta(path) | |
| docinfo['indexMetaPath']=self.getIndexMetaPath(path); | |
| logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) | |
| if self.metadataService is not None: | |
| # put all raw bib fields in dict "bib" | |
| bib = self.metadataService.getBibData(dom=dom) | |
| docinfo['bib'] = bib | |
| bibtype = bib.get('@type', None) | |
| docinfo['bib_type'] = bibtype | |
| # also store DC metadata for convenience | |
| dc = self.metadataService.getDCMappedData(bib) | |
| docinfo['creator'] = dc.get('creator',None) | |
| docinfo['title'] = dc.get('title',None) | |
| docinfo['date'] = dc.get('date',None) | |
| else: | |
| logging.error("MetadataService not found!") | |
| return docinfo | |
| # TODO: is this needed? | |
| def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): | |
| """gets name info from the index.meta file at path or given by dom""" | |
| if docinfo is None: | |
| docinfo = {} | |
| if dom is None: | |
| for x in range(cut): | |
| path=getParentPath(path) | |
| dom = self.getDomFromIndexMeta(path) | |
| docinfo['name']=getText(dom.find("name")) | |
| logging.debug("documentViewer docinfo[name] %s"%docinfo['name']) | |
| return docinfo | |
| def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): | |
| """parse texttool tag in index meta""" | |
| logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url)) | |
| if docinfo is None: | |
| docinfo = {} | |
| if docinfo.get('lang', None) is None: | |
| docinfo['lang'] = '' # default keine Sprache gesetzt | |
| if dom is None: | |
| dom = self.getDomFromIndexMeta(url) | |
| texttool = self.metadata.getTexttoolData(dom=dom) | |
| archivePath = None | |
| archiveName = None | |
| archiveName = getText(dom.find("name")) | |
| if not archiveName: | |
| logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url)) | |
| archivePath = getText(dom.find("archive-path")) | |
| if archivePath: | |
| # clean up archive path | |
| if archivePath[0] != '/': | |
| archivePath = '/' + archivePath | |
| if archiveName and (not archivePath.endswith(archiveName)): | |
| archivePath += "/" + archiveName | |
| else: | |
| # try to get archive-path from url | |
| logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url)) | |
| if (not url.startswith('http')): | |
| archivePath = url.replace('index.meta', '') | |
| if archivePath is None: | |
| # we balk without archive-path | |
| raise IOError("Missing archive-path (for text-tool) in %s" % (url)) | |
| imageDir = texttool.get('image', None) | |
| if not imageDir: | |
| # we balk with no image tag / not necessary anymore because textmode is now standard | |
| #raise IOError("No text-tool info in %s"%(url)) | |
| imageDir = "" | |
| #xquery="//pb" | |
| docinfo['imagePath'] = "" # keine Bilder | |
| docinfo['imageURL'] = "" | |
| if imageDir and archivePath: | |
| #print "image: ", imageDir, " archivepath: ", archivePath | |
| imageDir = os.path.join(archivePath, imageDir) | |
| imageDir = imageDir.replace("/mpiwg/online", '') | |
| docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo) | |
| docinfo['imagePath'] = imageDir | |
| docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir | |
| viewerUrl = texttool.get('digiliburlprefix', None) | |
| if viewerUrl: | |
| docinfo['viewerURL'] = viewerUrl | |
| # old style text URL | |
| textUrl = texttool.get('text', None) | |
| if textUrl: | |
| if urlparse.urlparse(textUrl)[0] == "": #keine url | |
| textUrl = os.path.join(archivePath, textUrl) | |
| # fix URLs starting with /mpiwg/online | |
| if textUrl.startswith("/mpiwg/online"): | |
| textUrl = textUrl.replace("/mpiwg/online", '', 1) | |
| docinfo['textURL'] = textUrl | |
| # new style text-url-path | |
| textUrl = texttool.get('text-url-path', None) | |
| if textUrl: | |
| docinfo['textURLPath'] = textUrl | |
| textUrlkurz = string.split(textUrl, ".")[0] | |
| docinfo['textURLPathkurz'] = textUrlkurz | |
| #if not docinfo['imagePath']: | |
| # text-only, no page images | |
| #docinfo = self.getNumTextPages(docinfo) | |
| # get bib info | |
| docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag | |
| # TODO: is this needed here? | |
| docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom) | |
| # TODO: what to do with presentation? | |
| presentationUrl = texttool.get('presentation', None) | |
| if presentationUrl: # ueberschreibe diese durch presentation informationen | |
| # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten | |
| # durch den relativen Pfad auf die presentation infos | |
| presentationPath = presentationUrl | |
| if url.endswith("index.meta"): | |
| presentationUrl = url.replace('index.meta', presentationPath) | |
| else: | |
| presentationUrl = url + "/" + presentationPath | |
| docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) | |
| # get authorization | |
| docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info | |
| return docinfo | |
| def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): | |
| """gets the bibliographical information from the preseantion entry in texttools | |
| """ | |
| dom=self.getPresentationInfoXML(url) | |
| docinfo['author']=getText(dom.find(".//author")) | |
| docinfo['title']=getText(dom.find(".//title")) | |
| docinfo['year']=getText(dom.find(".//date")) | |
| return docinfo | |
| def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): | |
| """path ist the path to the images it assumes that the index.meta file is one level higher.""" | |
| logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path)) | |
| if docinfo is None: | |
| docinfo = {} | |
| path=path.replace("/mpiwg/online","") | |
| docinfo['imagePath'] = path | |
| docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut) | |
| pathorig=path | |
| for x in range(cut): | |
| path=getParentPath(path) | |
| logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) | |
| imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path | |
| docinfo['imageURL'] = imageUrl | |
| #TODO: use getDocinfoFromIndexMeta | |
| #path ist the path to the images it assumes that the index.meta file is one level higher. | |
| docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) | |
| docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) | |
| return docinfo | |
| def OLDgetDocinfo(self, mode, url): | |
| """returns docinfo depending on mode""" | |
| logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) | |
| # look for cached docinfo in session | |
| if self.REQUEST.SESSION.has_key('docinfo'): | |
| docinfo = self.REQUEST.SESSION['docinfo'] | |
| # check if its still current | |
| if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: | |
| logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys()) | |
| return docinfo | |
| # new docinfo | |
| docinfo = {'mode': mode, 'url': url} | |
| # add self url | |
| docinfo['viewerUrl'] = self.getDocumentViewerURL() | |
| if mode=="texttool": | |
| # index.meta with texttool information | |
| docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) | |
| elif mode=="imagepath": | |
| # folder with images, index.meta optional | |
| docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) | |
| elif mode=="filepath": | |
| # filename | |
| docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) | |
| else: | |
| logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) | |
| raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) | |
| logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys()) | |
| #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) | |
| # store in session | |
| self.REQUEST.SESSION['docinfo'] = docinfo | |
| return docinfo | |
| def getDocinfo(self, mode, url): | def getDocinfo(self, mode, url): |
| """returns docinfo depending on mode""" | """returns docinfo depending on mode""" |
| Line 829 class documentViewer(Folder): | Line 477 class documentViewer(Folder): |
| bib = self.metadataService.getBibData(dom=metaDom) | bib = self.metadataService.getBibData(dom=metaDom) |
| if bib: | if bib: |
| docinfo = self.getDocinfoFromBib(docinfo, bib) | docinfo = self.getDocinfoFromBib(docinfo, bib) |
| else: | |
| # no bib - try info.xml | |
| docinfo = self.getDocinfoFromPresentationInfoXml(docinfo) | |
| # auth info | # auth info |
| access = self.metadataService.getAccessData(dom=metaDom) | access = self.metadataService.getAccessData(dom=metaDom) |
| Line 838 class documentViewer(Folder): | Line 489 class documentViewer(Folder): |
| # image path | # image path |
| if mode != 'texttool': | if mode != 'texttool': |
| # override image path from texttool | # override image path from texttool |
| docinfo['imagePath'] = url | docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1) |
| # number of images from digilib | # number of images from digilib |
| if docinfo.get('imagePath', None): | if docinfo.get('imagePath', None): |
| Line 869 class documentViewer(Folder): | Line 520 class documentViewer(Folder): |
| docUrl = docinfo['documentURL'] | docUrl = docinfo['documentURL'] |
| if not docUrl.startswith('http:'): | if not docUrl.startswith('http:'): |
| docPath = docUrl | docPath = docUrl |
| if docPath: | |
| # fix URLs starting with /mpiwg/online | |
| docPath = docPath.replace('/mpiwg/online', '', 1) | |
| docinfo['documentPath'] = docPath | docinfo['documentPath'] = docPath |
| return docinfo | return docinfo |
| Line 889 class documentViewer(Folder): | Line 543 class documentViewer(Folder): |
| if textUrl and docPath: | if textUrl and docPath: |
| if urlparse.urlparse(textUrl)[0] == "": #keine url | if urlparse.urlparse(textUrl)[0] == "": #keine url |
| textUrl = os.path.join(docPath, textUrl) | textUrl = os.path.join(docPath, textUrl) |
| # fix URLs starting with /mpiwg/online | |
| textUrl = textUrl.replace('/mpiwg/online', '', 1) | |
| docinfo['textURL'] = textUrl | docinfo['textURL'] = textUrl |
| Line 905 class documentViewer(Folder): | Line 557 class documentViewer(Folder): |
| # old presentation stuff | # old presentation stuff |
| presentation = texttool.get('presentation', None) | presentation = texttool.get('presentation', None) |
| if presentation and docPath: | if presentation and docPath: |
| docinfo['presentationPath'] = os.path.join(docPath, presentation) | if presentation.startswith('http:'): |
| docinfo['presentationUrl'] = presentation | |
| else: | |
| docinfo['presentationUrl'] = os.path.join(docPath, presentation) | |
| return docinfo | return docinfo |
| def getDocinfoFromBib(self, docinfo, bib): | def getDocinfoFromBib(self, docinfo, bib): |
| """reads contents of bib element into docinfo""" | """reads contents of bib element into docinfo""" |
| logging.debug("getDocinfoFromBib bib=%s"%repr(bib)) | |
| # put all raw bib fields in dict "bib" | # put all raw bib fields in dict "bib" |
| docinfo['bib'] = bib | docinfo['bib'] = bib |
| bibtype = bib.get('@type', None) | bibtype = bib.get('@type', None) |
| Line 925 class documentViewer(Folder): | Line 581 class documentViewer(Folder): |
| def getDocinfoFromAccess(self, docinfo, acc): | def getDocinfoFromAccess(self, docinfo, acc): |
| """reads contents of access element into docinfo""" | """reads contents of access element into docinfo""" |
| #TODO: also read resource type | #TODO: also read resource type |
| logging.debug("getDocinfoFromAccess acc=%s"%repr(acc)) | |
| try: | try: |
| acctype = accc['@attr']['type'] | acctype = acc['@attr']['type'] |
| if acctype: | if acctype: |
| access=acctype | access=acctype |
| if access in ['group', 'institution']: | if access in ['group', 'institution']: |
| Line 959 class documentViewer(Folder): | Line 616 class documentViewer(Folder): |
| return docinfo | return docinfo |
| def getDocinfoFromPresentationInfoXml(self,docinfo): | |
| """gets DC-like bibliographical information from the presentation entry in texttools""" | |
| url = docinfo.get('presentationUrl', None) | |
| if not url: | |
| logging.error("getDocinfoFromPresentation: no URL!") | |
| return docinfo | |
| dom = None | |
| metaUrl = None | |
| if url.startswith("http://"): | |
| # real URL | |
| metaUrl = url | |
| else: | |
| # online path | |
| server=self.digilibBaseUrl+"/servlet/Texter?fn=" | |
| metaUrl=server+url | |
| txt=getHttpData(metaUrl) | |
| if txt is None: | |
| logging.error("Unable to read info.xml from %s"%(url)) | |
| return docinfo | |
| dom = ET.fromstring(txt) | |
| docinfo['creator']=getText(dom.find(".//author")) | |
| docinfo['title']=getText(dom.find(".//title")) | |
| docinfo['date']=getText(dom.find(".//date")) | |
| return docinfo | |
| def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): | def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): |
| """returns pageinfo with the given parameters""" | """returns pageinfo with the given parameters""" |
| pageinfo = {} | pageinfo = {} |