view MetaDataFolder.py @ 12:7f0e2b656e5c

more work for non-bib metadata
author casties
date Fri, 29 Jul 2011 18:28:06 +0200
parents a29665fa9c62
children 281d223aa361
line wrap: on
line source

from OFS.Folder import Folder
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Globals import package_home
from AccessControl import ClassSecurityInfo
import os.path
import urlparse
import logging

import xml.etree.ElementTree as ET

from MetaDataMapping import MetaDataMapping
from MetaData import MetaData
from SrvTxtUtils import getHttpData, getText

def normalizeBibField(bt, underscore=True):
    """returns normalised bib type for looking up mappings"""
    bt = bt.strip().replace(' ', '-').lower()
    if underscore:
        bt = bt.replace('_', '-')
        
    return bt

def OLDgetBibdataFromDom(dom):
    """returns dict with all elements from bib-tag"""
    bibinfo = {}
    bib = dom.find(".//meta/bib")
    if bib is not None:
        # put type in @type
        type = bib.get('type')
        bibinfo['@type'] = normalizeBibField(type)
        # put all subelements in dict
        for e in bib:
            bibinfo[normalizeBibField(e.tag)] = getText(e)
            
    return bibinfo

def toString(list):
    ret=u""
    
    for l in list:
        ret+=unicode(l)
    
    return ret

def dcMetaDataToHash(mdSet):
    """Convenience Function for creates a hash from the DCMetadataset
    @param mdSet: String containing DCMetadata informmation
    currently only in the format getDCMetadata of this module"""
    
    NSS = {
           'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
           'dc': 'http://dublincore.org/documents/dcmi-namespace/',
           'owl':"http://www.w3.org/2002/07/owl#",
           'rdfs':"http://www.w3.org/2000/01/rdf-schema#"
    }   
    ret={}
    import StringIO
    import sys
    buffer= StringIO.StringIO(mdSet)
    try:
        md = amara.parse(buffer,prefixes=NSS)
    except:
        logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1]))
                                
        ret["error"]=mdSet
        return ret
   
    ret["title"] = toString(md.xml_xpath("//dc:title/text()"))
    ret["creator"] =toString(md.xml_xpath("//dc:creator/text()"))
    ret["date"] = toString(md.xml_xpath("//dc:date/text()"))
    
    return ret
        

class MetaDataFolder(Folder):
    """provides methods for managing complete metadata structures"""
    meta_type='MetaDataFolder'
    security=ClassSecurityInfo()
    manage_options = Folder.manage_options+(
        {'label':'Main Config','action':'changeMetaDataFolderForm'},
        )

    metaDataServerUrl = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=%s"
    """URL of metadata server. %s replaced by file path."""

    def __init__(self,id,title='',metaDataServerUrl=None):
        """initialize a new instance"""
        self.id = id
        self.title = title
        if metaDataServerUrl:
            self.metaDataServerUrl = metaDataServerUrl

        
    def getMDFromPathOrUrl(self,path):
        """returns contents of metadata file from server as text"""
        if not path:
            logging.error("getMDFromPathOrUrl: empty path!")
            return None
        
        parsedurl = urlparse.urlparse(path)
        if parsedurl[0] != "":
            # has schema (e.g. http)
            url=path
        else:
            # path only
            path = path.replace('/mpiwg/online/', '')
            if path.endswith("index.meta"):
                url = self.metaDataServerUrl%path
            else:
                url = os.path.join(self.metaDataServerUrl%path,'index.meta')
            
        #logging.debug("get Metadata: %s"%url)
        md = getHttpData(url)
        return md

    def getDomFromPathOrUrl(self, path):
        """returns DOM of metadata file at given path"""
        dom = None
        data = self.getMDFromPathOrUrl(path)
        if data:
            dom = ET.fromstring(data)
            
        return dom

    def getXmlPathData(self, xmlpath, path=None, dom=None):
        """returns contents of element at xmlpath as dict"""
        logging.error("getXmlPathData(%s)"%xmlpath)
        # make xmlpath relative for Zope
        if xmlpath[0] == '/':
            xmlpath = xmlpath[1:]
            
        mdObj = self.restrictedTraverse(xmlpath, None)
        if mdObj is not None:
            return mdObj.getData(path=path, dom=dom)
        else:
            logging.error("getXmlPathData: MetaData element at '%s' not found!"%xmlpath)
            return None

    def getResourceData(self, path=None, dom=None):
        """returns contents of resource tag as dict"""
        return self.getXmlPathData('resource', path=path, dom=dom)

    def getTexttoolData(self, path=None, dom=None):
        """returns contents of texttool tag as dict"""
        return self.getXmlPathData('resource/meta/texttool', path=path, dom=dom)

    def getAccessData(self, path=None, dom=None):
        """returns contents of access tag as dict"""
        return self.getXmlPathData('resource/meta/access-conditions/access', path=path, dom=dom)
    
    def getBibData(self, path=None, dom=None):
        """returns contents of bib tag as dict"""
        return self.resource.meta.bib.getData(path=path, dom=dom)

    def getBibMapping(self, bibtype):
        """returns MetaDataMapping for resource/meta/bib of bibtype"""
        return self.resource.meta.bib.getMapping(bibtype)
    
    def getBibFields(self, bibdata):
        """returns dict with metadata description for bibdata"""
        return self.resource.meta.bib.getMapFields(bibdata)

    def getBibMappedData(self, bibdata, allFields=False):
        """returns dict with metadata descriptions and data for bibdata"""
        return self.resource.meta.bib.getMappedData(bibdata, allFields=allFields)
    
    def getDCMappedData(self, bibdata):
        """returns dict with DC keys and data from bibdata"""
        return self.resource.meta.bib.getDCMappedData(bibdata)
                
    def getBibFormattedMetaData(self, path=None, dom=None, bibdata=None):
            """get the metadafrom server"""
            logging.debug("getBibFormattedMetaData(path=%s)"%path)
            return self.resource.meta.bib.getFormatted('metadata_template', path=path, dom=dom, data=bibdata)
                
    def getBibFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None):
            """get the metadafrom server"""
            logging.debug("getBibFormattedMetaDataExtended(path=%s)"%path)
            return self.resource.meta.bib.getFormatted('metadata_extended_template', path=path, dom=dom, data=bibdata, allFields=True)
            
    def getBibFormattedLabel(self,path=None, dom=None, bibdata=None):
            """get the metadafrom server"""
            logging.debug("getBibFormattedLabel(%s)"%path)
            return self.resource.meta.bib.getFormatted('label_template', path=path, dom=dom, data=bibdata)
                        
    # compatibility
    getFormattedMetaData = getBibFormattedMetaData
    getFormattedMetaDataShort = getBibFormattedMetaData
    getFormattedMetaDataExtended = getBibFormattedMetaDataExtended
    getFormattedLabel = getBibFormattedLabel

                
    def OLDgetDCFormatted(self,path):
        """get the metadata as dc set"""
        logging.debug("getDCFormatted(path=%s)"%path)
        namespace={ 'mpiwg':  "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"}
        namespaceUsed=False
        
        md = self.getMDFromPathOrUrl(path)
        logging.debug("MD in XML"+md)
        im = amara.parse(md, prefixes=namespace)
        
        typePaths=im.xml_xpath('//bib/@type')
        archimedes=False
        
        if len(typePaths)<1: 
            typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes
            if len(typePaths)>0:
                type = "archimedes"
                archimedes=True
            else:
                typePaths=im.xml_xpath('//mpiwg:bib/@type')
                if len(typePaths)<1:
                    return ""
                else:
                    namespaceUsed=True
                    
                    type=unicode(typePaths[0])
        else:
            type=unicode(typePaths[0])
        logging.info("got type:"+type)    
        try:
            mapping=getattr(self.main.meta.bib,type.lower(),None)
        except:
            logging.error("getMetaDataFromServer no mapping  for type: %s"%type)
            return ""     
        
        try:
            dcMapping=getattr(self.main.meta.bib,"dc",None)
        except:
            logging.error("getMetaDataFromServer no dc in meta/bib")
            return ""     
        
        mds=mapping.generateMappingHash() # Hole  das Mapping generisches Feld --> Feld im entsprechenden Typ
        dcMds=dcMapping.generateMappingHash() 
        
        mdHash=[]
        logging.debug("Value: %s"%repr(mds))
       
        for key,valueTriple in mds.items():
                value=valueTriple[0]
                logging.debug("Value: %s"%repr(value))
                logging.debug("Key: %s"%repr(key))
                if value!="":
                    if not archimedes:
                        if namespaceUsed:
                            try:
                                v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value)
                            except:
                                logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value)
                        else:
                            v = im.xml_xpath('//bib/%s/text()'%value)
                    else:
                        v = im.xml_xpath('//archimedes/%s/text()'%value)
                    if len(v) > 0:
                        dc=dcMds[key][0]
                        
                        if (dc !="") and (value !=""):
                            logging.debug("%s--> : %s"%(repr(value),dc))
                            mdHash.append([dc,unicode(v[0])])
               
        ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """
        ret+="<dc:type>%s</dc:type>"%type
        for md in mdHash:

            ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0])
        ret+="</bib>"
        return ret

                        
    changeMetaDataFolderForm = PageTemplateFile('zpt/changeMetaDataFolder',globals())
    
    security.declarePublic('changeMetaDataFolder')
    def changeMetaDataFolder(self,title,metaDataServerUrl,RESPONSE=None):
        """Change MetaDataFolder"""
        self.title = title
        self.metaDataServerUrl=metaDataServerUrl
        if RESPONSE is not None:
            RESPONSE.redirect('manage_main')


def manage_addMetaDataFolderForm(self):
    """add MetaDataFolder form"""
    pt = PageTemplateFile('zpt/addMetadataFolderForm',globals()).__of__(self)
    return pt()

def manage_addMetaDataFolder(self,id,title,RESPONSE=None):
    """a MetaDataFolder objekt"""
    newObj=MetaDataFolder(id,title)
    self.Destination()._setObject(id,newObj)
    if RESPONSE is not None:
        RESPONSE.redirect('manage_main')