Mercurial > hg > MetaDataProvider
view MetaDataFolder.py @ 10:68bc459c9f59
getmd handles empty url
author | casties |
---|---|
date | Fri, 29 Jul 2011 12:09:33 +0200 |
parents | eeaad777d3d7 |
children | a29665fa9c62 |
line wrap: on
line source
from OFS.Folder import Folder from Products.PageTemplates.PageTemplateFile import PageTemplateFile from Globals import package_home from AccessControl import ClassSecurityInfo import os.path import urlparse import logging from MetaDataMapping import MetaDataMapping from MetaData import MetaData def normalizeBibField(bt, underscore=True): """returns normalised bib type for looking up mappings""" bt = bt.strip().replace(' ', '-').lower() if underscore: bt = bt.replace('_', '-') return bt def OLDgetBibdataFromDom(dom): """returns dict with all elements from bib-tag""" bibinfo = {} bib = dom.find(".//meta/bib") if bib is not None: # put type in @type type = bib.get('type') bibinfo['@type'] = normalizeBibField(type) # put all subelements in dict for e in bib: bibinfo[normalizeBibField(e.tag)] = getText(e) return bibinfo def toString(list): ret=u"" for l in list: ret+=unicode(l) return ret def dcMetaDataToHash(mdSet): """Convenience Function for creates a hash from the DCMetadataset @param mdSet: String containing DCMetadata informmation currently only in the format getDCMetadata of this module""" NSS = { 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'dc': 'http://dublincore.org/documents/dcmi-namespace/', 'owl':"http://www.w3.org/2002/07/owl#", 'rdfs':"http://www.w3.org/2000/01/rdf-schema#" } ret={} import StringIO import sys buffer= StringIO.StringIO(mdSet) try: md = amara.parse(buffer,prefixes=NSS) except: logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1])) ret["error"]=mdSet return ret ret["title"] = toString(md.xml_xpath("//dc:title/text()")) ret["creator"] =toString(md.xml_xpath("//dc:creator/text()")) ret["date"] = toString(md.xml_xpath("//dc:date/text()")) return ret class MetaDataFolder(Folder): """provides methods for managing complete metadata structures""" meta_type='MetaDataFolder' security=ClassSecurityInfo() manage_options = Folder.manage_options+( {'label':'Main Config','action':'changeMetaDataFolderForm'}, ) def __init__(self,id,title='',metaDataServerUrl=''): """initialize a new instance""" self.id = id self.title = title self.metaDataServerUrl = metaDataServerUrl def getMDFromPathOrUrl(self,path): """returns contents of metadata file from server as text""" if not path: logging.error("getMDFromPathOrUrl: empty path!") return "" parsedurl = urlparse.urlparse(path) if parsedurl[0] != "": # has schema (e.g. http) url=path else: # path only path = path.replace('/mpiwg/online/', '') if path.endswith("index.meta"): url = self.metaDataServerUrl%path else: url = os.path.join(self.metaDataServerUrl%path,'index.meta') #logging.debug("get Metadata: %s"%url) md = getHttpData(url) return md def getXmlPathData(self, xmlpath, path=None, dom=None): """returns contents of element at xmlpath as dict""" logging.error("getXmlPathData(%s)"%xmlpath) # make xmlpath relative for Zope if xmlpath[0] == '/': xmlpath = xmlpath[1:] mdObj = self.restrictedTraverse(xmlpath, None) if mdObj is not None: return mdObj.getData(path=path, dom=dom) else: logging.error("getXmlPathData: MetaData element at '%s' not found!"%xmlpath) return None def getTexttoolData(self, path=None, dom=None): """returns contents of texttool tag as dict""" return self.getXmlPathData('resource/meta/texttool', path=path, dom=dom) def getBibData(self, path=None, dom=None): """returns contents of bib tag as dict""" return self.resource.meta.bib.getData(path=path, dom=dom) def getBibMapping(self, bibtype): """returns MetaDataMapping for resource/meta/bib of bibtype""" return self.resource.meta.bib.getMapping(bibtype) def getBibFields(self, bibdata): """returns dict with metadata description for bibdata""" return self.resource.meta.bib.getMapFields(bibdata) def getBibMappedData(self, bibdata, allFields=False): """returns dict with metadata descriptions and data for bibdata""" return self.resource.meta.bib.getMappedData(bibdata, allFields=allFields) def getDCMappedData(self, bibdata): """returns dict with DC keys and data from bibdata""" return self.resource.meta.bib.getDCMappedData(bibdata) def getBibFormattedMetaData(self, path=None, dom=None, bibdata=None): """get the metadafrom server""" logging.debug("getBibFormattedMetaData(path=%s)"%path) return self.resource.meta.bib.getFormatted('metadata_template', path=path, dom=dom, data=bibdata) def getBibFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None): """get the metadafrom server""" logging.debug("getBibFormattedMetaDataExtended(path=%s)"%path) return self.resource.meta.bib.getFormatted('metadata_extended_template', path=path, dom=dom, data=bibdata, allFields=True) def getBibFormattedLabel(self,path=None, dom=None, bibdata=None): """get the metadafrom server""" logging.debug("getBibFormattedLabel(%s)"%path) return self.resource.meta.bib.getFormatted('label_template', path=path, dom=dom, data=bibdata) # compatibility getFormattedMetaData = getBibFormattedMetaData getFormattedMetaDataShort = getBibFormattedMetaData getFormattedMetaDataExtended = getBibFormattedMetaDataExtended getFormattedLabel = getBibFormattedLabel def getDCFormatted(self,path): """get the metadata as dc set""" logging.debug("getDCFormatted(path=%s)"%path) namespace={ 'mpiwg': "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"} namespaceUsed=False md = self.getMDFromPathOrUrl(path) logging.debug("MD in XML"+md) im = amara.parse(md, prefixes=namespace) typePaths=im.xml_xpath('//bib/@type') archimedes=False if len(typePaths)<1: typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes if len(typePaths)>0: type = "archimedes" archimedes=True else: typePaths=im.xml_xpath('//mpiwg:bib/@type') if len(typePaths)<1: return "" else: namespaceUsed=True type=unicode(typePaths[0]) else: type=unicode(typePaths[0]) logging.info("got type:"+type) try: mapping=getattr(self.main.meta.bib,type.lower(),None) except: logging.error("getMetaDataFromServer no mapping for type: %s"%type) return "" try: dcMapping=getattr(self.main.meta.bib,"dc",None) except: logging.error("getMetaDataFromServer no dc in meta/bib") return "" mds=mapping.generateMappingHash() # Hole das Mapping generisches Feld --> Feld im entsprechenden Typ dcMds=dcMapping.generateMappingHash() mdHash=[] logging.debug("Value: %s"%repr(mds)) for key,valueTriple in mds.items(): value=valueTriple[0] logging.debug("Value: %s"%repr(value)) logging.debug("Key: %s"%repr(key)) if value!="": if not archimedes: if namespaceUsed: try: v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value) except: logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value) else: v = im.xml_xpath('//bib/%s/text()'%value) else: v = im.xml_xpath('//archimedes/%s/text()'%value) if len(v) > 0: dc=dcMds[key][0] if (dc !="") and (value !=""): logging.debug("%s--> : %s"%(repr(value),dc)) mdHash.append([dc,unicode(v[0])]) ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """ ret+="<dc:type>%s</dc:type>"%type for md in mdHash: ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0]) ret+="</bib>" return ret changeMetaDataFolderForm = PageTemplateFile('zpt/changeMetaDataFolder',globals()) security.declarePublic('changeMetaDataFolder') def changeMetaDataFolder(self,title,metaDataServerUrl,RESPONSE=None): """Change MetaDataFolder""" self.title = title self.metaDataServerUrl=metaDataServerUrl if RESPONSE is not None: RESPONSE.redirect('manage_main') def manage_addMetaDataFolderForm(self): """add MetaDataFolder form""" pt = PageTemplateFile('zpt/addMetadataFolderForm',globals()).__of__(self) return pt() def manage_addMetaDataFolder(self,id,title,RESPONSE=None): """a MetaDataFolder objekt""" newObj=MetaDataFolder(id,title) self.Destination()._setObject(id,newObj) if RESPONSE is not None: RESPONSE.redirect('manage_main')