Mercurial > hg > MetaDataProvider
view MetaDataFolder.py @ 12:7f0e2b656e5c
more work for non-bib metadata
author | casties |
---|---|
date | Fri, 29 Jul 2011 18:28:06 +0200 |
parents | a29665fa9c62 |
children | 281d223aa361 |
line wrap: on
line source
from OFS.Folder import Folder from Products.PageTemplates.PageTemplateFile import PageTemplateFile from Globals import package_home from AccessControl import ClassSecurityInfo import os.path import urlparse import logging import xml.etree.ElementTree as ET from MetaDataMapping import MetaDataMapping from MetaData import MetaData from SrvTxtUtils import getHttpData, getText def normalizeBibField(bt, underscore=True): """returns normalised bib type for looking up mappings""" bt = bt.strip().replace(' ', '-').lower() if underscore: bt = bt.replace('_', '-') return bt def OLDgetBibdataFromDom(dom): """returns dict with all elements from bib-tag""" bibinfo = {} bib = dom.find(".//meta/bib") if bib is not None: # put type in @type type = bib.get('type') bibinfo['@type'] = normalizeBibField(type) # put all subelements in dict for e in bib: bibinfo[normalizeBibField(e.tag)] = getText(e) return bibinfo def toString(list): ret=u"" for l in list: ret+=unicode(l) return ret def dcMetaDataToHash(mdSet): """Convenience Function for creates a hash from the DCMetadataset @param mdSet: String containing DCMetadata informmation currently only in the format getDCMetadata of this module""" NSS = { 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'dc': 'http://dublincore.org/documents/dcmi-namespace/', 'owl':"http://www.w3.org/2002/07/owl#", 'rdfs':"http://www.w3.org/2000/01/rdf-schema#" } ret={} import StringIO import sys buffer= StringIO.StringIO(mdSet) try: md = amara.parse(buffer,prefixes=NSS) except: logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1])) ret["error"]=mdSet return ret ret["title"] = toString(md.xml_xpath("//dc:title/text()")) ret["creator"] =toString(md.xml_xpath("//dc:creator/text()")) ret["date"] = toString(md.xml_xpath("//dc:date/text()")) return ret class MetaDataFolder(Folder): """provides methods for managing complete metadata structures""" meta_type='MetaDataFolder' security=ClassSecurityInfo() manage_options = Folder.manage_options+( {'label':'Main Config','action':'changeMetaDataFolderForm'}, ) metaDataServerUrl = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=%s" """URL of metadata server. %s replaced by file path.""" def __init__(self,id,title='',metaDataServerUrl=None): """initialize a new instance""" self.id = id self.title = title if metaDataServerUrl: self.metaDataServerUrl = metaDataServerUrl def getMDFromPathOrUrl(self,path): """returns contents of metadata file from server as text""" if not path: logging.error("getMDFromPathOrUrl: empty path!") return None parsedurl = urlparse.urlparse(path) if parsedurl[0] != "": # has schema (e.g. http) url=path else: # path only path = path.replace('/mpiwg/online/', '') if path.endswith("index.meta"): url = self.metaDataServerUrl%path else: url = os.path.join(self.metaDataServerUrl%path,'index.meta') #logging.debug("get Metadata: %s"%url) md = getHttpData(url) return md def getDomFromPathOrUrl(self, path): """returns DOM of metadata file at given path""" dom = None data = self.getMDFromPathOrUrl(path) if data: dom = ET.fromstring(data) return dom def getXmlPathData(self, xmlpath, path=None, dom=None): """returns contents of element at xmlpath as dict""" logging.error("getXmlPathData(%s)"%xmlpath) # make xmlpath relative for Zope if xmlpath[0] == '/': xmlpath = xmlpath[1:] mdObj = self.restrictedTraverse(xmlpath, None) if mdObj is not None: return mdObj.getData(path=path, dom=dom) else: logging.error("getXmlPathData: MetaData element at '%s' not found!"%xmlpath) return None def getResourceData(self, path=None, dom=None): """returns contents of resource tag as dict""" return self.getXmlPathData('resource', path=path, dom=dom) def getTexttoolData(self, path=None, dom=None): """returns contents of texttool tag as dict""" return self.getXmlPathData('resource/meta/texttool', path=path, dom=dom) def getAccessData(self, path=None, dom=None): """returns contents of access tag as dict""" return self.getXmlPathData('resource/meta/access-conditions/access', path=path, dom=dom) def getBibData(self, path=None, dom=None): """returns contents of bib tag as dict""" return self.resource.meta.bib.getData(path=path, dom=dom) def getBibMapping(self, bibtype): """returns MetaDataMapping for resource/meta/bib of bibtype""" return self.resource.meta.bib.getMapping(bibtype) def getBibFields(self, bibdata): """returns dict with metadata description for bibdata""" return self.resource.meta.bib.getMapFields(bibdata) def getBibMappedData(self, bibdata, allFields=False): """returns dict with metadata descriptions and data for bibdata""" return self.resource.meta.bib.getMappedData(bibdata, allFields=allFields) def getDCMappedData(self, bibdata): """returns dict with DC keys and data from bibdata""" return self.resource.meta.bib.getDCMappedData(bibdata) def getBibFormattedMetaData(self, path=None, dom=None, bibdata=None): """get the metadafrom server""" logging.debug("getBibFormattedMetaData(path=%s)"%path) return self.resource.meta.bib.getFormatted('metadata_template', path=path, dom=dom, data=bibdata) def getBibFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None): """get the metadafrom server""" logging.debug("getBibFormattedMetaDataExtended(path=%s)"%path) return self.resource.meta.bib.getFormatted('metadata_extended_template', path=path, dom=dom, data=bibdata, allFields=True) def getBibFormattedLabel(self,path=None, dom=None, bibdata=None): """get the metadafrom server""" logging.debug("getBibFormattedLabel(%s)"%path) return self.resource.meta.bib.getFormatted('label_template', path=path, dom=dom, data=bibdata) # compatibility getFormattedMetaData = getBibFormattedMetaData getFormattedMetaDataShort = getBibFormattedMetaData getFormattedMetaDataExtended = getBibFormattedMetaDataExtended getFormattedLabel = getBibFormattedLabel def OLDgetDCFormatted(self,path): """get the metadata as dc set""" logging.debug("getDCFormatted(path=%s)"%path) namespace={ 'mpiwg': "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"} namespaceUsed=False md = self.getMDFromPathOrUrl(path) logging.debug("MD in XML"+md) im = amara.parse(md, prefixes=namespace) typePaths=im.xml_xpath('//bib/@type') archimedes=False if len(typePaths)<1: typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes if len(typePaths)>0: type = "archimedes" archimedes=True else: typePaths=im.xml_xpath('//mpiwg:bib/@type') if len(typePaths)<1: return "" else: namespaceUsed=True type=unicode(typePaths[0]) else: type=unicode(typePaths[0]) logging.info("got type:"+type) try: mapping=getattr(self.main.meta.bib,type.lower(),None) except: logging.error("getMetaDataFromServer no mapping for type: %s"%type) return "" try: dcMapping=getattr(self.main.meta.bib,"dc",None) except: logging.error("getMetaDataFromServer no dc in meta/bib") return "" mds=mapping.generateMappingHash() # Hole das Mapping generisches Feld --> Feld im entsprechenden Typ dcMds=dcMapping.generateMappingHash() mdHash=[] logging.debug("Value: %s"%repr(mds)) for key,valueTriple in mds.items(): value=valueTriple[0] logging.debug("Value: %s"%repr(value)) logging.debug("Key: %s"%repr(key)) if value!="": if not archimedes: if namespaceUsed: try: v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value) except: logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value) else: v = im.xml_xpath('//bib/%s/text()'%value) else: v = im.xml_xpath('//archimedes/%s/text()'%value) if len(v) > 0: dc=dcMds[key][0] if (dc !="") and (value !=""): logging.debug("%s--> : %s"%(repr(value),dc)) mdHash.append([dc,unicode(v[0])]) ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """ ret+="<dc:type>%s</dc:type>"%type for md in mdHash: ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0]) ret+="</bib>" return ret changeMetaDataFolderForm = PageTemplateFile('zpt/changeMetaDataFolder',globals()) security.declarePublic('changeMetaDataFolder') def changeMetaDataFolder(self,title,metaDataServerUrl,RESPONSE=None): """Change MetaDataFolder""" self.title = title self.metaDataServerUrl=metaDataServerUrl if RESPONSE is not None: RESPONSE.redirect('manage_main') def manage_addMetaDataFolderForm(self): """add MetaDataFolder form""" pt = PageTemplateFile('zpt/addMetadataFolderForm',globals()).__of__(self) return pt() def manage_addMetaDataFolder(self,id,title,RESPONSE=None): """a MetaDataFolder objekt""" newObj=MetaDataFolder(id,title) self.Destination()._setObject(id,newObj) if RESPONSE is not None: RESPONSE.redirect('manage_main')