Mercurial > hg > MetaDataProvider
diff MetaData.py @ 0:9f9d9be26e53
first checkin in Mercurial (see history in SVN)
author | casties |
---|---|
date | Mon, 25 Jul 2011 16:50:48 +0200 |
parents | |
children | e4bae49e657b |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MetaData.py Mon Jul 25 16:50:48 2011 +0200 @@ -0,0 +1,419 @@ +from OFS.Folder import Folder +from Products.PageTemplates.PageTemplateFile import PageTemplateFile +from Globals import package_home +from AccessControl import ClassSecurityInfo +import os.path +import urllib +import logging +import urlparse + +# TODO: which xml toolkit? +import amara +import xml.sax.saxutils +import xml.dom.minidom +import xml.etree.ElementTree as ET + + +# TODO: do we need this? +#from Products.OSA_system2 import OSAS_helpers +#from Products.OSA_system2.OSAS_metadata import OSAS_Metadata,OSAS_MetadataMapping + +from OSAS_metadata import OSAS_Metadata, OSAS_MetadataMapping + + +from SrvTxtUtils import getHttpData, getText + + +# TODO: get rid of this +def getTextFromNode(nodelist): + """gibt text aus nodelist""" + rc = "" + for node in nodelist: + if node.nodeType == node.TEXT_NODE: + rc = rc + node.data + return rc + + +def normalizeBibtype(bt): + """returns normalised bib type for looking up mappings""" + bt = bt.strip().replace(' ', '-').lower() + return bt + +def toString(list): + ret=u"" + + for l in list: + ret+=unicode(l) + + return ret + +def dcMetaDataToHash(mdSet): + """Convenience Function for creates a hash from the DCMetadataset + @param mdSet: String containing DCMetadata informmation + currently only in the format getDCMetadata of this module""" + + NSS = { + 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', + 'dc': 'http://dublincore.org/documents/dcmi-namespace/', + 'owl':"http://www.w3.org/2002/07/owl#", + 'rdfs':"http://www.w3.org/2000/01/rdf-schema#" + } + ret={} + import StringIO + import sys + buffer= StringIO.StringIO(mdSet) + try: + md = amara.parse(buffer,prefixes=NSS) + except: + logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1])) + + ret["error"]=mdSet + return ret + + ret["title"] = toString(md.xml_xpath("//dc:title/text()")) + ret["creator"] =toString(md.xml_xpath("//dc:creator/text()")) + ret["date"] = toString(md.xml_xpath("//dc:date/text()")) + + return ret + +def getBibdataFromDom(dom): + """returns dict with all elements from bib-tag""" + bibinfo = {} + bib = dom.find(".//meta/bib") + if bib is not None: + # put type in @type + type = bib.get('type') + bibinfo['@type'] = normalizedBibtype(type) + # put all subelements in dict + for e in bib: + bibinfo[e.tag] = getText(e) + + return bibinfo + + + + +class MetaData(OSAS_Metadata): + """provides basic methods for managing metadata structures""" + meta_type='MetaData' + security=ClassSecurityInfo() + manage_options = Folder.manage_options+( + {'label':'Main Config','action':'changeMetadataForm'}, + {'label':'Import XML Schema','action':'importMetaDataExportXML'}, + {'label':'Select Fields for Display','action':'indicateDisplayFieldsForm'}, + ) + + def __init__(self,id,shortDescription='',description='',fields=''): + """initialize a new instance""" + self.id = id + self.shortDescription =shortDescription #label fuer link auf add page + self.description=description #description of the method for link page + self.fieldList=fields.split(",")[0:] + self.metaDataServerUrl="" # muss mit change metadata gesetzt werden + + + def correctPath(self,path,remove=None,prefix=None,cut=0): + """convinience method um einen pfad zu veraendern""" + + if remove is not None: + path=path.replace(remove,'') + if prefix is not None: + path=os.path.join(prefix,path) + + if cut>0: + splitted=path.split("/") + path="/".join(splitted[0:len(splitted)-cut]) + return path + + def importMetaDataExportXML(self,importFile=None,RESPONSE=None): + """imports metadata from the metadataexportxml file""" + + if importFile is None: + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','importMetaDataExportXML.zpt')).__of__(self) + return pt() + + dom=xml.dom.minidom.parse(importFile) + self.createMappingFromDom(dom.getElementsByTagName("metadataExport")[0]) + + if RESPONSE is not None: + RESPONSE.redirect('manage_main') + + + def createMappingFromDom(self,metadatanode,metadata=None): + """erzeuge ein Mapping aus dem der metadatanode des xmlformats, metadata ist ein metadataobject""" + + if metadata is None: + metadata=self + + nodes=metadatanode.childNodes + + #erster schritt: anlegen der fieldlist + for node in nodes: + logging.debug("node: %s"%node.tagName) + if node.tagName=="set": + set=node + if set.getAttribute('name')=='generic': + list=[] + for entry in set.getElementsByTagName('entry'): + list.append(entry.getAttribute('genericName')) + metadata.fieldList=list[0:] + + else: + id=set.getAttribute('name').encode('utf-8') + list=[] + argList={} + for entry in set.getElementsByTagName('entry'): + genericName=entry.getAttribute('genericName') + tag=entry.getAttribute('tag') + label=entry.getAttribute('label') + description=getTextFromNode(entry.childNodes) #TODO: clean + argList[genericName]=(tag,label,description) + metadata._setObject(id,MetaDataMapping(id,id,argList)) + + elif node.tagName=="metadata": + mn=node + name=mn.getAttribute('name').encode('utf-8') + metadata._setObject(name,MetaData(name,name)) + mdObj=getattr(metadata,name) + mdObj.createMappingFromDom(mn) + + + def getMDFromPathOrUrl(self,path): + parsedurl = urlparse.urlparse(path) + if parsedurl[0] != "": + # has schema (e.g. http) + url=path + else: + # path only + if path.endswith("index.meta"): + url =self.metaDataServerUrl%path + else: + url=os.path.join(self.metaDataServerUrl%path,'index.meta') + + #logging.debug("get Metadata: %s"%url) + md = getHttpData(url) + return md + + def getDCFormatted(self,path): + """get the metadata as dc set""" + logging.debug("getDCFormatted(path=%s)"%path) + namespace={ 'mpiwg': "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"} + namespaceUsed=False + + md = self.getMDFromPathOrUrl(path) + logging.debug("MD in XML"+md) + im = amara.parse(md, prefixes=namespace) + + typePaths=im.xml_xpath('//bib/@type') + archimedes=False + + if len(typePaths)<1: + typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes + if len(typePaths)>0: + type = "archimedes" + archimedes=True + else: + typePaths=im.xml_xpath('//mpiwg:bib/@type') + if len(typePaths)<1: + return "" + else: + namespaceUsed=True + + type=unicode(typePaths[0]) + else: + type=unicode(typePaths[0]) + logging.info("got type:"+type) + try: + mapping=getattr(self.main.meta.bib,type.lower(),None) + except: + logging.error("getMetaDataFromServer no mapping for type: %s"%type) + return "" + + try: + dcMapping=getattr(self.main.meta.bib,"dc",None) + except: + logging.error("getMetaDataFromServer no dc in meta/bib") + return "" + + mds=mapping.generateMappingHash() # Hole das Mapping generisches Feld --> Feld im entsprechenden Typ + dcMds=dcMapping.generateMappingHash() + + mdHash=[] + logging.debug("Value: %s"%repr(mds)) + + for key,valueTriple in mds.items(): + value=valueTriple[0] + logging.debug("Value: %s"%repr(value)) + logging.debug("Key: %s"%repr(key)) + if value!="": + if not archimedes: + if namespaceUsed: + try: + v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value) + except: + logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value) + else: + v = im.xml_xpath('//bib/%s/text()'%value) + else: + v = im.xml_xpath('//archimedes/%s/text()'%value) + if len(v) > 0: + dc=dcMds[key][0] + + if (dc !="") and (value !=""): + logging.debug("%s--> : %s"%(repr(value),dc)) + mdHash.append([dc,unicode(v[0])]) + + ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """ + ret+="<dc:type>%s</dc:type>"%type + for md in mdHash: + + ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0]) + ret+="</bib>" + return ret + + + def getStdMappedHash(self, bibdata): + """returns dict with metadata from bibdata mapped according to standard mapping""" + mdHash={} + bibtype = bibdata['@type'] + # get mapping from main/meta/bib + try: + mapping=getattr(self.main.meta.bib, bibtype.lower()) + except: + logging.error("getStdMappedHash: no mapping for type: %s"%bibtype) + return mdHash + + mds = mapping.generateMappingHash() # Hole das Mapping generisches Feld --> Feld im entsprechenden Typ + + for field in mds.keys(): + # get mapped field name + mf = mds[field][0] + if not mf: + continue + logging.debug("mapping: %s = %s"%(field,mf)) + mdHash[field] = bibdata.get(mf, '') + + return mdHash + + + def getFormatted(self, template, path=None, dom=None, bibdata=None): + """returns string with document data formatted according to template. + gets data from server or dom or pre-parsed bibdata.""" + logging.debug("getFormatted(template=%s)"%(template)) + + if dom is None and bibdata is None: + # get from server + md = self.getMDFromPathOrUrl(path.replace("/mpiwg/online","")) + #logging.debug("md:"+md) + #dom = amara.parse(md) + dom = ET.fromstring(md) + + # get contents of bib tag + if bibdata is None: + bibdata = getBibdataFromDom(dom) + + bibtype = bibdata['@type'] + + # get template + tp=getattr(self,"%s_%s"%(template, bibtype.lower()), None) + if tp is None: + logging.warning("getFormatted: no template for: %s_%s"%(template, bibtype)) + # try generic + tp=getattr(self,"%s_generic"%(template), None) + if tp is None: + logging.error("getFormatted: no generic template either: %s"%(template)) + return "" + + # put mapped data in mdHash + mdHash = self.getStdMappedHash(bibdata) + + return tp(stdmd=mdHash, md=bibdata) + + + def getFormattedMetaData(self, path=None, dom=None, bibdata=None): + """get the metadafrom server""" + logging.debug("getFormattedMetaData(path=%s)"%path) + return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata) + + def getFormattedMetaDataShort(self, path=None, dom=None, bibdata=None): + """get the metadafrom server""" + logging.debug("getFormattedMetaDataShort(path=%s)"%path) + return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata) + + def getFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None): + """get the metadafrom server""" + logging.debug("getFormattedMetaDataExtended(path=%s)"%path) + return self.getFormatted('metadata_extended_template', path=path, dom=dom, bibdata=bibdata) + + def getFormattedLabel(self,path=None, dom=None, bibdata=None): + """get the metadafrom server""" + logging.debug("getFormattedLabel(%s)"%path) + return self.getFormatted('label_template', path=path, dom=dom, bibdata=bibdata) + + def getFormattedMetaDataShortFromServer(self,path): + """get the metadafrom server""" + logging.debug("getFormattedMetaDataShortFromServer(path=%s)"%path) + return self.getFormatted('metadata_template', path) + + def getFormattedMetaDataExtendedFromServer(self,path): + """get the metadafrom server""" + logging.debug("getFormattedMetaDataExtendedFromServer(path=%s)"%path) + return self.getFormatted('metadata_extended_template', path) + + def getFormattedLabelFromServer(self,path): + """get the metadafrom server""" + logging.debug("getFormattedLabelFromServer(%s)"%path) + return self.getFormatted('label_template', path) + + + security.declarePublic('changeMetadataForm') + def changeMetadataForm(self): + """Main configuration""" + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeMetadata.zpt')).__of__(self) + return pt() + + security.declarePublic('changeMetadata') + def changeMetadata(self,shortDescription,description,fields,metaDataServerUrl,RESPONSE=None): + """Change Metadata""" + self.shortDescription=shortDescription + self.description=description + self.fieldList=fields.split(",")[0:] + self.metaDataServerUrl=metaDataServerUrl + if RESPONSE is not None: + RESPONSE.redirect('manage_main') + + +def manage_addMetaDataForm(self): + """interface for adding the OSAS_add_Metadata""" + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMetadataForm.zpt')).__of__(self) + return pt() + +def manage_addMetaData(self,id,shortDescription,description,fields,RESPONSE=None): + """a metadata objekt""" + newObj=MetaData(id,shortDescription,description,fields) + self.Destination()._setObject(id,newObj) + if RESPONSE is not None: + RESPONSE.redirect('manage_main') + +class MetaDataMapping(OSAS_MetadataMapping): + meta_type="MetadataMapping" + +def manage_addMetaDataMappingForm(self): + """interface for adding the OSAS_root""" + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMetadataMappingForm.zpt')).__of__(self) + + return pt() + +def manage_addMetaDataMapping(self,idOfObject,titleOfObject,RESPONSE=None): + """add the OSAS_root""" + + argList={} + for arg in self.fieldList: + if not (arg in ['idOfObject','titleOfObject']): + argList[arg]=(self.REQUEST.form[arg],self.REQUEST.form['label_'+arg],self.REQUEST.form['explanation_'+arg],self.REQUEST.form['status_'+arg],self.REQUEST.form['values_'+arg]) + + newObj=MetaDataMapping(idOfObject,titleOfObject,argList) + self._setObject(idOfObject,newObj) + if RESPONSE is not None: + RESPONSE.redirect('manage_main') +