Mercurial > hg > MetaDataProvider
view MetaDataFolder.py @ 26:a19575be96e8
getDRI reshuffled, works on python 2.6. now.
author | casties |
---|---|
date | Mon, 30 Jul 2012 19:43:23 +0200 |
parents | 64b703d1b8a4 |
children | a0d273542509 |
line wrap: on
line source
from OFS.Folder import Folder from Products.PageTemplates.PageTemplateFile import PageTemplateFile from Globals import package_home from AccessControl import ClassSecurityInfo import os.path import urlparse import logging import xml.etree.ElementTree as ET from MetaDataMapping import MetaDataMapping from MetaData import MetaData from SrvTxtUtils import getHttpData, getText def normalizeBibField(bt, underscore=True): """returns normalised bib type for looking up mappings""" bt = bt.strip().replace(' ', '-').lower() if underscore: bt = bt.replace('_', '-') return bt def OLDgetBibdataFromDom(dom): """returns dict with all elements from bib-tag""" bibinfo = {} bib = dom.find(".//meta/bib") if bib is not None: # put type in @type type = bib.get('type') bibinfo['@type'] = normalizeBibField(type) # put all subelements in dict for e in bib: bibinfo[normalizeBibField(e.tag)] = getText(e) return bibinfo def toString(list): ret=u"" for l in list: ret+=unicode(l) return ret def dcMetaDataToHash(mdSet): """Convenience Function for creates a hash from the DCMetadataset @param mdSet: String containing DCMetadata informmation currently only in the format getDCMetadata of this module""" NSS = { 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'dc': 'http://dublincore.org/documents/dcmi-namespace/', 'owl':"http://www.w3.org/2002/07/owl#", 'rdfs':"http://www.w3.org/2000/01/rdf-schema#" } ret={} import StringIO import sys buffer= StringIO.StringIO(mdSet) try: md = amara.parse(buffer,prefixes=NSS) except: logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1])) ret["error"]=mdSet return ret ret["title"] = toString(md.xml_xpath("//dc:title/text()")) ret["creator"] =toString(md.xml_xpath("//dc:creator/text()")) ret["date"] = toString(md.xml_xpath("//dc:date/text()")) return ret class MetaDataFolder(Folder): """provides methods for managing complete metadata structures""" meta_type='MetaDataFolder' security=ClassSecurityInfo() manage_options = Folder.manage_options+( {'label':'Main Config','action':'changeMetaDataFolderForm'}, ) metaDataServerUrl = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=%s" """URL of metadata server. %s replaced by file path.""" def __init__(self,id,title='',metaDataServerUrl=None): """initialize a new instance""" self.id = id self.title = title if metaDataServerUrl: self.metaDataServerUrl = metaDataServerUrl def getMDFromPathOrUrl(self,path): """returns contents of metadata file from server as text""" if not path: logging.error("getMDFromPathOrUrl: empty path!") return None parsedurl = urlparse.urlparse(path) if parsedurl[0] != "": # has schema (e.g. http) url=path else: # path only path = path.replace('/mpiwg/online/', '') if path.endswith("index.meta"): url = self.metaDataServerUrl%path else: url = os.path.join(self.metaDataServerUrl%path,'index.meta') try: md = getHttpData(url) return md except: logging.error("getMDFromPathOrUrl: unable to get data!") return None def getDomFromPathOrUrl(self, path): """returns DOM of metadata file at given path""" dom = None data = self.getMDFromPathOrUrl(path) if data: dom = ET.fromstring(data) return dom def getXmlPathObj(self, xmlpath): """returns object at xmlpath""" # make xmlpath relative for Zope if xmlpath[0] == '/': xmlpath = xmlpath[1:] obj = self.restrictedTraverse(xmlpath, None) return obj def getXmlPathData(self, xmlpath, path=None, dom=None, recursive=0, all=False, allText=False): """returns contents of element at xmlpath as dict""" logging.error("getXmlPathData(%s)"%xmlpath) mdObj = self.getXmlPathObj(xmlpath) if mdObj is not None: return mdObj.getData(path=path, dom=dom, recursive=recursive, all=all, allText=allText) else: logging.error("getXmlPathData: MetaData element at '%s' not found!"%xmlpath) return None def getXmlPathFormatted(self, xmlpath, template, path=None, dom=None, data=None, allFields=False, recursive=0, all=False, allText=False): """returns contents of element at xmlpath as dict""" logging.error("getXmlPathFormatted(xmlpath=%s, template=%s)"%(xmlpath,template)) mdObj = self.getXmlPathObj(xmlpath) if mdObj is not None: if data is None: data = mdObj.getData(path=path, dom=dom, recursive=recursive, all=all, allText=allText) if data is None: return '' fmt = '' if all: # data is list of elements for d in data: # concatenate formatted strings fmt += mdObj.getFormatted(template, path=path, dom=dom, data=d, allFields=allFields) + '\n' else: fmt = mdObj.getFormatted(template, path=path, dom=dom, data=data, allFields=allFields) return fmt else: logging.error("getXmlPathFormatted: MetaData element at '%s' not found!"%xmlpath) return '' def getResourceData(self, path=None, dom=None): """returns contents of resource tag as dict""" return self.getXmlPathData('resource', path=path, dom=dom) def getTexttoolData(self, path=None, dom=None): """returns contents of texttool tag as dict""" return self.getXmlPathData('resource/meta/texttool', path=path, dom=dom) def getAccessData(self, path=None, dom=None): """returns contents of access tag as dict""" return self.getXmlPathData('resource/meta/access-conditions/access', path=path, dom=dom) def getAttributionData(self, path=None, dom=None, all=True): """returns contents of attribution tag as dict""" return self.getXmlPathData('resource/meta/access-conditions/attribution', path=path, dom=dom, all=all) def getAttributionFormatted(self, template, path=None, dom=None, data=None, all=True): """returns formatted contents of access tag""" return self.getXmlPathFormatted('resource/meta/access-conditions/attribution', template, path=path, dom=dom, data=data, all=all) def getCopyrightData(self, path=None, dom=None, recursive=1, all=True): """returns contents of copyright tag as dict""" # information is two tags deep - recursive=1 return self.getXmlPathData('resource/meta/access-conditions/copyright', path=path, dom=dom, recursive=recursive, all=all) def getCopyrightFormatted(self, template, path=None, dom=None, data=None, recursive=1, all=True): """returns formatted contents of access tag""" # information is two tags deep - recursive=1 return self.getXmlPathFormatted('resource/meta/access-conditions/copyright', template, path=path, dom=dom, data=data, recursive=recursive, all=all) def getContextData(self, path=None, dom=None, all=True): """returns contents of context tag as dict""" # information is two tags deep - recursive=1 return self.getXmlPathData('resource/meta/context', path=path, dom=dom, all=all) def getDRI(self, path=None, dom=None, type="escidoc"): """returns the DRI of the document""" dris = self.getXmlPathData('resource/meta/dri', path=path, dom=dom, all=True) logging.debug("getDRI: dris=%s"%repr(dris)) if dris is None: return None for dri in dris: logging.debug("getDRI: dri=%s"%dri) att = dri.get('@attr', None) if att is not None: if (att.get('type', None) == type): return dri.get('@text', None) return None #return self.resource.meta.getDRI(path=path, dom=dom, type=type) def getBibData(self, path=None, dom=None): """returns contents of bib tag as dict""" return self.resource.meta.bib.getData(path=path, dom=dom) def getBibMapping(self, bibtype): """returns MetaDataMapping for resource/meta/bib of bibtype""" return self.resource.meta.bib.getMapping(bibtype) def getBibFields(self, bibdata): """returns dict with metadata description for bibdata""" return self.resource.meta.bib.getMapFields(bibdata) def getBibMappedData(self, bibdata, allFields=False): """returns dict with metadata descriptions and data for bibdata""" return self.resource.meta.bib.getMappedData(bibdata, allFields=allFields) def getDCMappedData(self, bibdata): """returns dict with DC keys and data from bibdata""" return self.resource.meta.bib.getDCMappedData(bibdata) def getBibFormattedMetaData(self, path=None, dom=None, bibdata=None): """get the metadafrom server""" logging.debug("getBibFormattedMetaData(path=%s)"%path) return self.resource.meta.bib.getFormatted('metadata_template', path=path, dom=dom, data=bibdata) def getBibFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None): """get the metadafrom server""" logging.debug("getBibFormattedMetaDataExtended(path=%s)"%path) return self.resource.meta.bib.getFormatted('metadata_extended_template', path=path, dom=dom, data=bibdata, allFields=True) def getBibFormattedLabel(self,path=None, dom=None, bibdata=None): """get the metadafrom server""" logging.debug("getBibFormattedLabel(%s)"%path) return self.resource.meta.bib.getFormatted('label_template', path=path, dom=dom, data=bibdata) # compatibility getFormattedMetaData = getBibFormattedMetaData getFormattedMetaDataShort = getBibFormattedMetaData getFormattedMetaDataExtended = getBibFormattedMetaDataExtended getFormattedLabel = getBibFormattedLabel def OLDgetDCFormatted(self,path): """get the metadata as dc set""" logging.debug("getDCFormatted(path=%s)"%path) namespace={ 'mpiwg': "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"} namespaceUsed=False md = self.getMDFromPathOrUrl(path) logging.debug("MD in XML"+md) im = amara.parse(md, prefixes=namespace) typePaths=im.xml_xpath('//bib/@type') archimedes=False if len(typePaths)<1: typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes if len(typePaths)>0: type = "archimedes" archimedes=True else: typePaths=im.xml_xpath('//mpiwg:bib/@type') if len(typePaths)<1: return "" else: namespaceUsed=True type=unicode(typePaths[0]) else: type=unicode(typePaths[0]) logging.info("got type:"+type) try: mapping=getattr(self.main.meta.bib,type.lower(),None) except: logging.error("getMetaDataFromServer no mapping for type: %s"%type) return "" try: dcMapping=getattr(self.main.meta.bib,"dc",None) except: logging.error("getMetaDataFromServer no dc in meta/bib") return "" mds=mapping.generateMappingHash() # Hole das Mapping generisches Feld --> Feld im entsprechenden Typ dcMds=dcMapping.generateMappingHash() mdHash=[] logging.debug("Value: %s"%repr(mds)) for key,valueTriple in mds.items(): value=valueTriple[0] logging.debug("Value: %s"%repr(value)) logging.debug("Key: %s"%repr(key)) if value!="": if not archimedes: if namespaceUsed: try: v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value) except: logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value) else: v = im.xml_xpath('//bib/%s/text()'%value) else: v = im.xml_xpath('//archimedes/%s/text()'%value) if len(v) > 0: dc=dcMds[key][0] if (dc !="") and (value !=""): logging.debug("%s--> : %s"%(repr(value),dc)) mdHash.append([dc,unicode(v[0])]) ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """ ret+="<dc:type>%s</dc:type>"%type for md in mdHash: ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0]) ret+="</bib>" return ret changeMetaDataFolderForm = PageTemplateFile('zpt/changeMetaDataFolder',globals()) security.declarePublic('changeMetaDataFolder') def changeMetaDataFolder(self,title,metaDataServerUrl,RESPONSE=None): """Change MetaDataFolder""" self.title = title self.metaDataServerUrl=metaDataServerUrl if RESPONSE is not None: RESPONSE.redirect('manage_main') def manage_addMetaDataFolderForm(self): """add MetaDataFolder form""" pt = PageTemplateFile('zpt/addMetadataFolderForm',globals()).__of__(self) return pt() def manage_addMetaDataFolder(self,id,title,RESPONSE=None): """a MetaDataFolder objekt""" newObj=MetaDataFolder(id,title) self.Destination()._setObject(id,newObj) if RESPONSE is not None: RESPONSE.redirect('manage_main')