Mercurial > hg > MetaDataProvider
view MetaDataFolder.py @ 31:ab58edfc0707
more parameters and more comments.
author | casties |
---|---|
date | Mon, 01 Oct 2012 18:17:29 +0200 |
parents | b3428e281ee2 |
children | 1f845c76dad3 |
line wrap: on
line source
from OFS.Folder import Folder from Products.PageTemplates.PageTemplateFile import PageTemplateFile from Globals import package_home from AccessControl import ClassSecurityInfo import os.path import urlparse import logging import xml.etree.ElementTree as ET from MetaDataMapping import MetaDataMapping from MetaData import MetaData from SrvTxtUtils import getHttpData, getText def normalizeBibField(bt, underscore=True): """returns normalised bib type for looking up mappings""" bt = bt.strip().replace(' ', '-').lower() if underscore: bt = bt.replace('_', '-') return bt def toString(list): ret=u"" for l in list: ret+=unicode(l) return ret class MetaDataFolder(Folder): """provides methods for managing complete metadata structures""" meta_type='MetaDataFolder' security=ClassSecurityInfo() manage_options = Folder.manage_options+( {'label':'Main Config','action':'changeMetaDataFolderForm'}, ) metaDataServerUrl = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=%s" """URL of metadata server. %s replaced by file path.""" def __init__(self,id,title='',metaDataServerUrl=None): """initialize a new instance""" self.id = id self.title = title if metaDataServerUrl: self.metaDataServerUrl = metaDataServerUrl def getMDFromPathOrUrl(self,path): """returns contents of metadata file from server as text""" if not path: logging.error("getMDFromPathOrUrl: empty path!") return None parsedurl = urlparse.urlparse(path) if parsedurl[0] != "": # has schema (e.g. http) url=path else: # path only path = path.replace('/mpiwg/online/', '') if path.endswith("index.meta"): url = self.metaDataServerUrl%path else: url = os.path.join(self.metaDataServerUrl%path,'index.meta') try: md = getHttpData(url) return md except: logging.error("getMDFromPathOrUrl: unable to get data!") return None def getDomFromPathOrUrl(self, path): """returns DOM of metadata file at given path""" dom = None data = self.getMDFromPathOrUrl(path) if data: dom = ET.fromstring(data) return dom def getXmlPathObj(self, xmlpath): """returns object at xmlpath""" # make xmlpath relative for Zope if xmlpath[0] == '/': xmlpath = xmlpath[1:] obj = self.restrictedTraverse(xmlpath, None) return obj def getXmlPathData(self, xmlpath, path=None, dom=None, recursive=0, all=False, allText=False): """returns contents of element at xmlpath as dict. @param xmlpath: xml path to selected elements @param path: file or url path to metadata file @param dom: dom of metadata @param all: put contents of tags with the same name in list value @param recursive: number of recursions. 0=just children @param allText: get text content of all subelements @returns: dict with attributes (key=@attr) and child elements (key=tag) """ logging.error("getXmlPathData(%s)"%xmlpath) mdObj = self.getXmlPathObj(xmlpath) if mdObj is not None: return mdObj.getData(path=path, dom=dom, recursive=recursive, all=all, allText=allText) else: logging.error("getXmlPathData: MetaData object for '%s' not found!"%xmlpath) return None def getXmlPathFormatted(self, xmlpath, template, path=None, dom=None, data=None, allFields=False, recursive=0, all=False, allText=False): """returns contents of element at xmlpath as dict @param xmlpath: xml path to selected elements @param template: name of template for data @param path: file or url path to metadata file @param dom: dom of metadata @param all: put contents of tags with the same name in list value @param recursive: number of recursions. 0=just children @param allText: get text content of all subelements @returns: dict with attributes (key=@attr) and child elements (key=tag) """ logging.error("getXmlPathFormatted(xmlpath=%s, template=%s)"%(xmlpath,template)) mdObj = self.getXmlPathObj(xmlpath) if mdObj is not None: if data is None: data = mdObj.getData(path=path, dom=dom, recursive=recursive, all=all, allText=allText) if data is None: return '' fmt = '' if all: # data is list of elements for d in data: # concatenate formatted strings fmt += mdObj.getFormatted(template, path=path, dom=dom, data=d, allFields=allFields) + '\n' else: fmt = mdObj.getFormatted(template, path=path, dom=dom, data=data, allFields=allFields) return fmt else: logging.error("getXmlPathFormatted: MetaData object for '%s' not found!"%xmlpath) return '' def getResourceData(self, path=None, dom=None): """returns contents of resource tag as dict""" return self.getXmlPathData('resource', path=path, dom=dom) def getTexttoolData(self, path=None, dom=None, recursive=0, all=False): """returns contents of texttool tag as dict""" return self.getXmlPathData('resource/meta/texttool', path=path, dom=dom, recursive=recursive, all=all) def getAccessData(self, path=None, dom=None): """returns contents of access tag as dict""" return self.getXmlPathData('resource/meta/access-conditions/access', path=path, dom=dom) def getAttributionData(self, path=None, dom=None, all=True): """returns contents of attribution tag as dict""" return self.getXmlPathData('resource/meta/access-conditions/attribution', path=path, dom=dom, all=all) def getAttributionFormatted(self, template, path=None, dom=None, data=None, all=True): """returns formatted contents of access tag""" return self.getXmlPathFormatted('resource/meta/access-conditions/attribution', template, path=path, dom=dom, data=data, all=all) def getCopyrightData(self, path=None, dom=None, recursive=1, all=True): """returns contents of copyright tag as dict""" # information is two tags deep - recursive=1 return self.getXmlPathData('resource/meta/access-conditions/copyright', path=path, dom=dom, recursive=recursive, all=all) def getCopyrightFormatted(self, template, path=None, dom=None, data=None, recursive=1, all=True): """returns formatted contents of access tag""" # information is two tags deep - recursive=1 return self.getXmlPathFormatted('resource/meta/access-conditions/copyright', template, path=path, dom=dom, data=data, recursive=recursive, all=all) def getContextData(self, path=None, dom=None, all=True): """returns contents of context tag as dict""" # information is two tags deep - recursive=1 return self.getXmlPathData('resource/meta/context', path=path, dom=dom, all=all) def getDRI(self, path=None, dom=None, type="escidoc"): """returns the DRI of the document""" dris = self.getXmlPathData('resource/meta/dri', path=path, dom=dom, all=True) #logging.debug("getDRI: dris=%s"%repr(dris)) if dris is None: return None for dri in dris: #logging.debug("getDRI: dri=%s"%dri) if type is None: # no type -- take the first one return dri.get('@text', None) # use element with matching @type att = dri.get('@attr', None) if att is not None: if (att.get('type', None) == type): return dri.get('@text', None) return None def getBibData(self, path=None, dom=None, all=False, recursive=0): """returns contents of bib tag as dict""" return self.resource.meta.bib.getData(path=path, dom=dom, all=all, recursive=recursive) def getBibMapping(self, bibtype): """returns MetaDataMapping for resource/meta/bib of bibtype""" return self.resource.meta.bib.getMapping(bibtype) def getBibFields(self, bibdata): """returns dict with metadata description for bibdata""" return self.resource.meta.bib.getMapFields(bibdata) def getBibMappedData(self, bibdata, allFields=False): """returns dict with metadata descriptions and data for bibdata""" return self.resource.meta.bib.getMappedData(bibdata, allFields=allFields) def getDCDataFromPath(self,path): """returns DC mapped data from path to index.meta""" return self.resource.meta.bib.getDCDataFromPath(path) def getDCMappedData(self, bibdata): """returns dict with DC keys and data from bibdata""" return self.resource.meta.bib.getDCMappedData(bibdata) def getBibFormattedMetaData(self, path=None, dom=None, bibdata=None, bibxdata=None): """get the metadafrom server""" logging.debug("getBibFormattedMetaData(path=%s)"%path) return self.resource.meta.bib.getFormatted('metadata_template', path=path, dom=dom, data=bibdata, xdata=bibxdata) def getBibFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None, bibxdata=None): """get the metadafrom server""" logging.debug("getBibFormattedMetaDataExtended(path=%s)"%path) return self.resource.meta.bib.getFormatted('metadata_extended_template', path=path, dom=dom, data=bibdata, xdata=bibxdata, allFields=True) def getBibFormattedLabel(self,path=None, dom=None, bibdata=None): """get the metadafrom server""" logging.debug("getBibFormattedLabel(%s)"%path) return self.resource.meta.bib.getFormatted('label_template', path=path, dom=dom, data=bibdata) # compatibility getFormattedMetaData = getBibFormattedMetaData getFormattedMetaDataShort = getBibFormattedMetaData getFormattedMetaDataExtended = getBibFormattedMetaDataExtended getFormattedLabel = getBibFormattedLabel changeMetaDataFolderForm = PageTemplateFile('zpt/changeMetaDataFolder',globals()) security.declarePublic('changeMetaDataFolder') def changeMetaDataFolder(self,title,metaDataServerUrl,RESPONSE=None): """Change MetaDataFolder""" self.title = title self.metaDataServerUrl=metaDataServerUrl if RESPONSE is not None: RESPONSE.redirect('manage_main') def manage_addMetaDataFolderForm(self): """add MetaDataFolder form""" pt = PageTemplateFile('zpt/addMetadataFolderForm',globals()).__of__(self) return pt() def manage_addMetaDataFolder(self,id,title,RESPONSE=None): """a MetaDataFolder objekt""" newObj=MetaDataFolder(id,title) self.Destination()._setObject(id,newObj) if RESPONSE is not None: RESPONSE.redirect('manage_main')