view MetaDataFolder.py @ 11:a29665fa9c62

more work for non-bib metadata
author casties
date Fri, 29 Jul 2011 14:45:13 +0200
parents 68bc459c9f59
children 7f0e2b656e5c
line wrap: on
line source

from OFS.Folder import Folder
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Globals import package_home
from AccessControl import ClassSecurityInfo
import os.path
import urlparse
import logging

from MetaDataMapping import MetaDataMapping
from MetaData import MetaData

def normalizeBibField(bt, underscore=True):
    """returns normalised bib type for looking up mappings"""
    bt = bt.strip().replace(' ', '-').lower()
    if underscore:
        bt = bt.replace('_', '-')
        
    return bt

def OLDgetBibdataFromDom(dom):
    """returns dict with all elements from bib-tag"""
    bibinfo = {}
    bib = dom.find(".//meta/bib")
    if bib is not None:
        # put type in @type
        type = bib.get('type')
        bibinfo['@type'] = normalizeBibField(type)
        # put all subelements in dict
        for e in bib:
            bibinfo[normalizeBibField(e.tag)] = getText(e)
            
    return bibinfo

def toString(list):
    ret=u""
    
    for l in list:
        ret+=unicode(l)
    
    return ret

def dcMetaDataToHash(mdSet):
    """Convenience Function for creates a hash from the DCMetadataset
    @param mdSet: String containing DCMetadata informmation
    currently only in the format getDCMetadata of this module"""
    
    NSS = {
           'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
           'dc': 'http://dublincore.org/documents/dcmi-namespace/',
           'owl':"http://www.w3.org/2002/07/owl#",
           'rdfs':"http://www.w3.org/2000/01/rdf-schema#"
    }   
    ret={}
    import StringIO
    import sys
    buffer= StringIO.StringIO(mdSet)
    try:
        md = amara.parse(buffer,prefixes=NSS)
    except:
        logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1]))
                                
        ret["error"]=mdSet
        return ret
   
    ret["title"] = toString(md.xml_xpath("//dc:title/text()"))
    ret["creator"] =toString(md.xml_xpath("//dc:creator/text()"))
    ret["date"] = toString(md.xml_xpath("//dc:date/text()"))
    
    return ret
        

class MetaDataFolder(Folder):
    """provides methods for managing complete metadata structures"""
    meta_type='MetaDataFolder'
    security=ClassSecurityInfo()
    manage_options = Folder.manage_options+(
        {'label':'Main Config','action':'changeMetaDataFolderForm'},
        )

    def __init__(self,id,title='',metaDataServerUrl=''):
        """initialize a new instance"""
        self.id = id
        self.title = title
        self.metaDataServerUrl = metaDataServerUrl

        
    def getMDFromPathOrUrl(self,path):
        """returns contents of metadata file from server as text"""
        if not path:
            logging.error("getMDFromPathOrUrl: empty path!")
            return ""
        
        parsedurl = urlparse.urlparse(path)
        if parsedurl[0] != "":
            # has schema (e.g. http)
            url=path
        else:
            # path only
            path = path.replace('/mpiwg/online/', '')
            if path.endswith("index.meta"):
                url = self.metaDataServerUrl%path
            else:
                url = os.path.join(self.metaDataServerUrl%path,'index.meta')
            
        #logging.debug("get Metadata: %s"%url)
        md = getHttpData(url)
        return md

    def getXmlPathData(self, xmlpath, path=None, dom=None):
        """returns contents of element at xmlpath as dict"""
        logging.error("getXmlPathData(%s)"%xmlpath)
        # make xmlpath relative for Zope
        if xmlpath[0] == '/':
            xmlpath = xmlpath[1:]
            
        mdObj = self.restrictedTraverse(xmlpath, None)
        if mdObj is not None:
            return mdObj.getData(path=path, dom=dom)
        else:
            logging.error("getXmlPathData: MetaData element at '%s' not found!"%xmlpath)
            return None

    def getTexttoolData(self, path=None, dom=None):
        """returns contents of texttool tag as dict"""
        return self.getXmlPathData('resource/meta/texttool', path=path, dom=dom)

    def getBibData(self, path=None, dom=None):
        """returns contents of bib tag as dict"""
        return self.resource.meta.bib.getData(path=path, dom=dom)

    def getBibMapping(self, bibtype):
        """returns MetaDataMapping for resource/meta/bib of bibtype"""
        return self.resource.meta.bib.getMapping(bibtype)
    
    def getBibFields(self, bibdata):
        """returns dict with metadata description for bibdata"""
        return self.resource.meta.bib.getMapFields(bibdata)

    def getBibMappedData(self, bibdata, allFields=False):
        """returns dict with metadata descriptions and data for bibdata"""
        return self.resource.meta.bib.getMappedData(bibdata, allFields=allFields)
    
    def getDCMappedData(self, bibdata):
        """returns dict with DC keys and data from bibdata"""
        return self.resource.meta.bib.getDCMappedData(bibdata)
                
    def getBibFormattedMetaData(self, path=None, dom=None, bibdata=None):
            """get the metadafrom server"""
            logging.debug("getBibFormattedMetaData(path=%s)"%path)
            return self.resource.meta.bib.getFormatted('metadata_template', path=path, dom=dom, data=bibdata)
                
    def getBibFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None):
            """get the metadafrom server"""
            logging.debug("getBibFormattedMetaDataExtended(path=%s)"%path)
            return self.resource.meta.bib.getFormatted('metadata_extended_template', path=path, dom=dom, data=bibdata, allFields=True)
            
    def getBibFormattedLabel(self,path=None, dom=None, bibdata=None):
            """get the metadafrom server"""
            logging.debug("getBibFormattedLabel(%s)"%path)
            return self.resource.meta.bib.getFormatted('label_template', path=path, dom=dom, data=bibdata)
                        
    # compatibility
    getFormattedMetaData = getBibFormattedMetaData
    getFormattedMetaDataShort = getBibFormattedMetaData
    getFormattedMetaDataExtended = getBibFormattedMetaDataExtended
    getFormattedLabel = getBibFormattedLabel

                
    def OLDgetDCFormatted(self,path):
        """get the metadata as dc set"""
        logging.debug("getDCFormatted(path=%s)"%path)
        namespace={ 'mpiwg':  "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"}
        namespaceUsed=False
        
        md = self.getMDFromPathOrUrl(path)
        logging.debug("MD in XML"+md)
        im = amara.parse(md, prefixes=namespace)
        
        typePaths=im.xml_xpath('//bib/@type')
        archimedes=False
        
        if len(typePaths)<1: 
            typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes
            if len(typePaths)>0:
                type = "archimedes"
                archimedes=True
            else:
                typePaths=im.xml_xpath('//mpiwg:bib/@type')
                if len(typePaths)<1:
                    return ""
                else:
                    namespaceUsed=True
                    
                    type=unicode(typePaths[0])
        else:
            type=unicode(typePaths[0])
        logging.info("got type:"+type)    
        try:
            mapping=getattr(self.main.meta.bib,type.lower(),None)
        except:
            logging.error("getMetaDataFromServer no mapping  for type: %s"%type)
            return ""     
        
        try:
            dcMapping=getattr(self.main.meta.bib,"dc",None)
        except:
            logging.error("getMetaDataFromServer no dc in meta/bib")
            return ""     
        
        mds=mapping.generateMappingHash() # Hole  das Mapping generisches Feld --> Feld im entsprechenden Typ
        dcMds=dcMapping.generateMappingHash() 
        
        mdHash=[]
        logging.debug("Value: %s"%repr(mds))
       
        for key,valueTriple in mds.items():
                value=valueTriple[0]
                logging.debug("Value: %s"%repr(value))
                logging.debug("Key: %s"%repr(key))
                if value!="":
                    if not archimedes:
                        if namespaceUsed:
                            try:
                                v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value)
                            except:
                                logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value)
                        else:
                            v = im.xml_xpath('//bib/%s/text()'%value)
                    else:
                        v = im.xml_xpath('//archimedes/%s/text()'%value)
                    if len(v) > 0:
                        dc=dcMds[key][0]
                        
                        if (dc !="") and (value !=""):
                            logging.debug("%s--> : %s"%(repr(value),dc))
                            mdHash.append([dc,unicode(v[0])])
               
        ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """
        ret+="<dc:type>%s</dc:type>"%type
        for md in mdHash:

            ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0])
        ret+="</bib>"
        return ret

                        
    changeMetaDataFolderForm = PageTemplateFile('zpt/changeMetaDataFolder',globals())
    
    security.declarePublic('changeMetaDataFolder')
    def changeMetaDataFolder(self,title,metaDataServerUrl,RESPONSE=None):
        """Change MetaDataFolder"""
        self.title = title
        self.metaDataServerUrl=metaDataServerUrl
        if RESPONSE is not None:
            RESPONSE.redirect('manage_main')


def manage_addMetaDataFolderForm(self):
    """add MetaDataFolder form"""
    pt = PageTemplateFile('zpt/addMetadataFolderForm',globals()).__of__(self)
    return pt()

def manage_addMetaDataFolder(self,id,title,RESPONSE=None):
    """a MetaDataFolder objekt"""
    newObj=MetaDataFolder(id,title)
    self.Destination()._setObject(id,newObj)
    if RESPONSE is not None:
        RESPONSE.redirect('manage_main')