view MetaDataFolder.py @ 17:ba617e755c56

mostly finished attributions and copyright
author casties
date Tue, 02 Aug 2011 18:28:39 +0200
parents 41b90f09a1f2
children 611b6df70840
line wrap: on
line source

from OFS.Folder import Folder
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Globals import package_home
from AccessControl import ClassSecurityInfo
import os.path
import urlparse
import logging

import xml.etree.ElementTree as ET

from MetaDataMapping import MetaDataMapping
from MetaData import MetaData
from SrvTxtUtils import getHttpData, getText

def normalizeBibField(bt, underscore=True):
    """returns normalised bib type for looking up mappings"""
    bt = bt.strip().replace(' ', '-').lower()
    if underscore:
        bt = bt.replace('_', '-')
        
    return bt

def OLDgetBibdataFromDom(dom):
    """returns dict with all elements from bib-tag"""
    bibinfo = {}
    bib = dom.find(".//meta/bib")
    if bib is not None:
        # put type in @type
        type = bib.get('type')
        bibinfo['@type'] = normalizeBibField(type)
        # put all subelements in dict
        for e in bib:
            bibinfo[normalizeBibField(e.tag)] = getText(e)
            
    return bibinfo

def toString(list):
    ret=u""
    
    for l in list:
        ret+=unicode(l)
    
    return ret

def dcMetaDataToHash(mdSet):
    """Convenience Function for creates a hash from the DCMetadataset
    @param mdSet: String containing DCMetadata informmation
    currently only in the format getDCMetadata of this module"""
    
    NSS = {
           'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
           'dc': 'http://dublincore.org/documents/dcmi-namespace/',
           'owl':"http://www.w3.org/2002/07/owl#",
           'rdfs':"http://www.w3.org/2000/01/rdf-schema#"
    }   
    ret={}
    import StringIO
    import sys
    buffer= StringIO.StringIO(mdSet)
    try:
        md = amara.parse(buffer,prefixes=NSS)
    except:
        logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1]))
                                
        ret["error"]=mdSet
        return ret
   
    ret["title"] = toString(md.xml_xpath("//dc:title/text()"))
    ret["creator"] =toString(md.xml_xpath("//dc:creator/text()"))
    ret["date"] = toString(md.xml_xpath("//dc:date/text()"))
    
    return ret
        

class MetaDataFolder(Folder):
    """provides methods for managing complete metadata structures"""
    meta_type='MetaDataFolder'
    security=ClassSecurityInfo()
    manage_options = Folder.manage_options+(
        {'label':'Main Config','action':'changeMetaDataFolderForm'},
        )

    metaDataServerUrl = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=%s"
    """URL of metadata server. %s replaced by file path."""

    def __init__(self,id,title='',metaDataServerUrl=None):
        """initialize a new instance"""
        self.id = id
        self.title = title
        if metaDataServerUrl:
            self.metaDataServerUrl = metaDataServerUrl

        
    def getMDFromPathOrUrl(self,path):
        """returns contents of metadata file from server as text"""
        if not path:
            logging.error("getMDFromPathOrUrl: empty path!")
            return None
        
        parsedurl = urlparse.urlparse(path)
        if parsedurl[0] != "":
            # has schema (e.g. http)
            url=path
        else:
            # path only
            path = path.replace('/mpiwg/online/', '')
            if path.endswith("index.meta"):
                url = self.metaDataServerUrl%path
            else:
                url = os.path.join(self.metaDataServerUrl%path,'index.meta')
            
        #logging.debug("get Metadata: %s"%url)
        md = getHttpData(url)
        return md

    def getDomFromPathOrUrl(self, path):
        """returns DOM of metadata file at given path"""
        dom = None
        data = self.getMDFromPathOrUrl(path)
        if data:
            dom = ET.fromstring(data)
            
        return dom

    def getXmlPathObj(self, xmlpath):
        """returns object at xmlpath"""
        # make xmlpath relative for Zope
        if xmlpath[0] == '/':
            xmlpath = xmlpath[1:]
            
        obj = self.restrictedTraverse(xmlpath, None)
        return obj

    def getXmlPathData(self, xmlpath, path=None, dom=None, recursive=0, all=False, allText=False):
        """returns contents of element at xmlpath as dict"""
        logging.error("getXmlPathData(%s)"%xmlpath)
        mdObj = self.getXmlPathObj(xmlpath)
        if mdObj is not None:
            return mdObj.getData(path=path, dom=dom, recursive=recursive, all=all, allText=allText)
        else:
            logging.error("getXmlPathData: MetaData element at '%s' not found!"%xmlpath)
            return None

    def getXmlPathFormatted(self, xmlpath, template, path=None, dom=None, data=None, allFields=False, recursive=0, all=False, allText=False):
        """returns contents of element at xmlpath as dict"""
        logging.error("getXmlPathFormatted(xmlpath=%s, template=%s)"%(xmlpath,template))
        mdObj = self.getXmlPathObj(xmlpath)
        if mdObj is not None:
            if data is None:
                data = mdObj.getData(path=path, dom=dom, recursive=recursive, all=all, allText=allText)
                
            if data is None:
                return ''
            
            fmt = ''
            if all:
                # data is list of elements
                for d in data:
                    # concatenate formatted strings
                    fmt += mdObj.getFormatted(template, path=path, dom=dom, data=d, allFields=allFields) + '\n'
                    
            else:
                fmt = mdObj.getFormatted(template, path=path, dom=dom, data=data, allFields=allFields)
                
            return fmt
        
        else:
            logging.error("getXmlPathFormatted: MetaData element at '%s' not found!"%xmlpath)
            return ''

    def getResourceData(self, path=None, dom=None):
        """returns contents of resource tag as dict"""
        return self.getXmlPathData('resource', path=path, dom=dom)

    def getTexttoolData(self, path=None, dom=None):
        """returns contents of texttool tag as dict"""
        return self.getXmlPathData('resource/meta/texttool', path=path, dom=dom)

    def getAccessData(self, path=None, dom=None):
        """returns contents of access tag as dict"""
        return self.getXmlPathData('resource/meta/access-conditions/access', path=path, dom=dom)
    
    def getAttributionData(self, path=None, dom=None, all=True):
        """returns contents of attribution tag as dict"""
        return self.getXmlPathData('resource/meta/access-conditions/attribution', path=path, dom=dom, all=all)
    
    def getAttributionFormatted(self, template, path=None, dom=None, data=None, all=True):
        """returns formatted contents of access tag"""
        return self.getXmlPathFormatted('resource/meta/access-conditions/attribution', template, path=path, dom=dom, data=data, all=all)
    
    def getCopyrightData(self, path=None, dom=None, recursive=1, all=True):
        """returns contents of copyright tag as dict"""
        return self.getXmlPathData('resource/meta/access-conditions/copyright', path=path, dom=dom, recursive=recursive, all=all)
    
    def getCopyrightFormatted(self, template, path=None, dom=None, data=None, all=True):
        """returns formatted contents of access tag"""
        return self.getXmlPathFormatted('resource/meta/access-conditions/copyright', template, path=path, dom=dom, data=data, all=all)
    
    def getBibData(self, path=None, dom=None):
        """returns contents of bib tag as dict"""
        return self.resource.meta.bib.getData(path=path, dom=dom)

    def getBibMapping(self, bibtype):
        """returns MetaDataMapping for resource/meta/bib of bibtype"""
        return self.resource.meta.bib.getMapping(bibtype)
    
    def getBibFields(self, bibdata):
        """returns dict with metadata description for bibdata"""
        return self.resource.meta.bib.getMapFields(bibdata)

    def getBibMappedData(self, bibdata, allFields=False):
        """returns dict with metadata descriptions and data for bibdata"""
        return self.resource.meta.bib.getMappedData(bibdata, allFields=allFields)
    
    def getDCMappedData(self, bibdata):
        """returns dict with DC keys and data from bibdata"""
        return self.resource.meta.bib.getDCMappedData(bibdata)
                
    def getBibFormattedMetaData(self, path=None, dom=None, bibdata=None):
            """get the metadafrom server"""
            logging.debug("getBibFormattedMetaData(path=%s)"%path)
            return self.resource.meta.bib.getFormatted('metadata_template', path=path, dom=dom, data=bibdata)
                
    def getBibFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None):
            """get the metadafrom server"""
            logging.debug("getBibFormattedMetaDataExtended(path=%s)"%path)
            return self.resource.meta.bib.getFormatted('metadata_extended_template', path=path, dom=dom, data=bibdata, allFields=True)
            
    def getBibFormattedLabel(self,path=None, dom=None, bibdata=None):
            """get the metadafrom server"""
            logging.debug("getBibFormattedLabel(%s)"%path)
            return self.resource.meta.bib.getFormatted('label_template', path=path, dom=dom, data=bibdata)
                        
    # compatibility
    getFormattedMetaData = getBibFormattedMetaData
    getFormattedMetaDataShort = getBibFormattedMetaData
    getFormattedMetaDataExtended = getBibFormattedMetaDataExtended
    getFormattedLabel = getBibFormattedLabel

                
    def OLDgetDCFormatted(self,path):
        """get the metadata as dc set"""
        logging.debug("getDCFormatted(path=%s)"%path)
        namespace={ 'mpiwg':  "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"}
        namespaceUsed=False
        
        md = self.getMDFromPathOrUrl(path)
        logging.debug("MD in XML"+md)
        im = amara.parse(md, prefixes=namespace)
        
        typePaths=im.xml_xpath('//bib/@type')
        archimedes=False
        
        if len(typePaths)<1: 
            typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes
            if len(typePaths)>0:
                type = "archimedes"
                archimedes=True
            else:
                typePaths=im.xml_xpath('//mpiwg:bib/@type')
                if len(typePaths)<1:
                    return ""
                else:
                    namespaceUsed=True
                    
                    type=unicode(typePaths[0])
        else:
            type=unicode(typePaths[0])
        logging.info("got type:"+type)    
        try:
            mapping=getattr(self.main.meta.bib,type.lower(),None)
        except:
            logging.error("getMetaDataFromServer no mapping  for type: %s"%type)
            return ""     
        
        try:
            dcMapping=getattr(self.main.meta.bib,"dc",None)
        except:
            logging.error("getMetaDataFromServer no dc in meta/bib")
            return ""     
        
        mds=mapping.generateMappingHash() # Hole  das Mapping generisches Feld --> Feld im entsprechenden Typ
        dcMds=dcMapping.generateMappingHash() 
        
        mdHash=[]
        logging.debug("Value: %s"%repr(mds))
       
        for key,valueTriple in mds.items():
                value=valueTriple[0]
                logging.debug("Value: %s"%repr(value))
                logging.debug("Key: %s"%repr(key))
                if value!="":
                    if not archimedes:
                        if namespaceUsed:
                            try:
                                v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value)
                            except:
                                logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value)
                        else:
                            v = im.xml_xpath('//bib/%s/text()'%value)
                    else:
                        v = im.xml_xpath('//archimedes/%s/text()'%value)
                    if len(v) > 0:
                        dc=dcMds[key][0]
                        
                        if (dc !="") and (value !=""):
                            logging.debug("%s--> : %s"%(repr(value),dc))
                            mdHash.append([dc,unicode(v[0])])
               
        ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """
        ret+="<dc:type>%s</dc:type>"%type
        for md in mdHash:

            ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0])
        ret+="</bib>"
        return ret

                        
    changeMetaDataFolderForm = PageTemplateFile('zpt/changeMetaDataFolder',globals())
    
    security.declarePublic('changeMetaDataFolder')
    def changeMetaDataFolder(self,title,metaDataServerUrl,RESPONSE=None):
        """Change MetaDataFolder"""
        self.title = title
        self.metaDataServerUrl=metaDataServerUrl
        if RESPONSE is not None:
            RESPONSE.redirect('manage_main')


def manage_addMetaDataFolderForm(self):
    """add MetaDataFolder form"""
    pt = PageTemplateFile('zpt/addMetadataFolderForm',globals()).__of__(self)
    return pt()

def manage_addMetaDataFolder(self,id,title,RESPONSE=None):
    """a MetaDataFolder objekt"""
    newObj=MetaDataFolder(id,title)
    self.Destination()._setObject(id,newObj)
    if RESPONSE is not None:
        RESPONSE.redirect('manage_main')