view MetaData.py @ 29:b3428e281ee2

dcpmapping added to metadata
author dwinter
date Mon, 17 Sep 2012 10:14:01 +0200
parents 40508e672841
children 1cb439acd1e1
line wrap: on
line source

from OFS.Folder import Folder
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Globals import package_home
from AccessControl import ClassSecurityInfo
import logging

import xml.etree.ElementTree as ET
import xml.sax.saxutils

from MetaDataMapping import MetaDataMapping
from SrvTxtUtils import getHttpData, getText

def normalizeFieldName(bt, underscore=True):
    """returns normalised field type for looking up mappings"""
    bt = bt.strip().replace(' ', '-').lower()
    if underscore:
        bt = bt.replace('_', '-')
        
    return bt               

def putAppend(hash, key, value):
    """puts value in dict hash at key if it doesn't exist or adds value to a list"""
    #logging.debug("putAppend(hash=%s, key=%s, value=%s)"%(hash,key,value))
    if key in hash:
        # key exists
        oldval = hash[key]
        if isinstance(oldval, list):
            # is list already - append
            oldval.append(value)
        else:
            # needs list
            val = [oldval, value]
            hash[key] = val
    
    else:
        # key doesn't exist
        hash[key] = value

    #logging.debug("putAppend returns hash=%s"%(hash))
    return hash


class MetaData(Folder):
    """provides basic methods for managing metadata structures"""
    meta_type='MetaData'
    security=ClassSecurityInfo()
    manage_options = Folder.manage_options+(
        {'label':'Main Config','action':'changeMetaDataForm'},
        {'label':'Import XML Schema','action':'importMetaDataExportXML'},
        #{'label':'Select Fields for Display','action':'indicateDisplayFieldsForm'},
        )
    
    mappingSelectAttribute = None
    """the name of the attribute that can be used to select a mapping (if applicable)"""
    
    def __init__(self,id,title=None,shortDescription='',description='',fields=''):
        """initialize a new instance"""
        self.id = id
        # title is tag name
        if title:
            self.title = title
        else:
            # assume id is tag name
            self.title = id
            
        self.shortDescription =shortDescription #label fuer link auf add page
        self.description=description #description of the method for link page
        if fields:
            self.fieldList=fields.split(",")[0:]
        #self.metaDataServerUrl="" # muss mit change metadata gesetzt werden
    
    
    def getFieldList(self):
        """returns fieldList"""
        return ','.join(self.fieldList)
    
    def getTagName(self):
        """returns the tag name of this element"""
        if self.title:
            return self.title
        else:
            return self.shortDescription
    
    def getXmlPath(self, omitRoot=False):
        """returns the xpath to this element"""
        path = '/%s'%self.getTagName()
        parent = self.aq_parent
        if parent.meta_type == self.meta_type:
            # add parent
            path = parent.getXmlPath(omitRoot=omitRoot) + path
        elif omitRoot:
            return ''
            
        return path
    
    def getSubDom(self, path=None, dom=None, all=False):
        """returns the subtree (list) of the dom rooted in this element"""
        if dom is None:
            # get from server
            dom = self.getDomFromPathOrUrl(path)
                
        # ElementTree doesn't like absolute paths
        # lets assume dom is rooted in the first element
        xpath = '.' + self.getXmlPath(omitRoot=True)
        logging.debug("getSubDom looking for %s in %s"%(xpath, dom))
        if all:
            elem = dom.findall(xpath)
        else:
            elem = dom.find(xpath)
            
        return elem
        

    def _getData(self, elem, recursive, normalizeNames=False, all=False, allText=False):
        """helper method for getData()"""
        #logging.debug("_getDataFromDom(dom=%s, recursive=%s)"%(elem,recursive))
        data = {}
        attr = {}
        # put attributes in @attr
        for attname in elem.keys():
            attr[attname] = elem.get(attname)
        
        if attr:
            data['@attr'] = attr
            
        # put text in @text
        if elem.text:
            data['@text'] = elem.text
            
        for e in elem:
            # put all child elements in data
            if normalizeNames:
                # normalize key names
                key = normalizeFieldName(e.tag)
            else:
                key = e.tag
                
            if recursive > 0:
                # more recursive - call _getData on element
                val = self._getData(e, recursive=recursive-1, normalizeNames=normalizeNames, all=all, allText=allText)
            else:
                val = getText(e, recursive=allText)
                
            if all:
                # add multiple tags as list
                putAppend(data, key, val)
            else:
                data[key] = val
            
        #logging.debug("_getDataFromDom: returns %s"%repr(data))
        return data
            

    def getData(self, path=None, dom=None, normalizeNames=True, all=False, recursive=0, allText=0):
        """Returns dict with attributes and child elements from corresponding tag.
        
        @param path: file or url path to metadata file
        @param dom: dom of metadata
        @param normalizeNames: normalize tag names
        @param all: put contents of tags with the same name in list value
        @param recurse: number of recursions. 0=just children
        @param allText: get text content of all subelements
        @returns: dict with attributes (key=@attr) and child elements (key=tag)
        """
        #logging.debug("getData(path=%s, dom=%s)"%(path,dom))
        if path is None and dom is None:
            return None
        
        elem = self.getSubDom(path=path, dom=dom, all=all)
        if elem is None:
            return None
        
        if all:
            # subdom is list - return list
            data = []
            for e in elem:
                data.append(self._getData(e, recursive=recursive, normalizeNames=normalizeNames, all=all, allText=allText))

        else:
            # subdom is element
            data = self._getData(elem, recursive=recursive, normalizeNames=normalizeNames, all=all, allText=allText)

        if self.mappingSelectAttribute:
            # put type in @type
            if all:
                attr = data[0].get('@attr', None)
            else:
                attr = data.get('@attr', None)
                
            if attr:
                type = attr.get(self.mappingSelectAttribute, None)
                if type is not None:
                    if all:
                        data[0]['@type'] = normalizeFieldName(type)
                    else:
                        data['@type'] = normalizeFieldName(type)
                
        return data
    

    def getDCDataFromPath(self,path):
        """returns DC mapped data from path to index.meta"""
        data=self.getData(path=path)
        ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """
        for key,value in self.getDCMappedData(data).items():
            ret+="""<dc:%s>%s</dc:%s>"""%(key,xml.sax.saxutils.escape(value),key)
        ret+="</bib>"
        
        return ret
    
    def getMapping(self, type):
        """returns MetaDataMapping for type"""
        # try type as id
        mapping = getattr(self, type, None)
        if mapping is None:
            # try manually
            mapFolder = self
            for obj in mapFolder.objectValues():
                if obj.meta_type == "MetadataMapping":
                    # real type is in title
                    mapType = obj.title
                    if mapType == type:
                        # try type as is
                        return obj
                    
                    if normalizeFieldName(mapType, underscore=True) == normalizeFieldName(type, underscore=True):
                        # try normalized type without underscore
                        return obj
 
        return mapping
    
    def getMapFields(self, data):
        """returns dict with metadata description for data"""
        fields = {}
        type = data.get('@type', None)
        if not type:
            logging.error("getMapFields: no @type!")
            return fields
        
        # get mapping from main/meta/bib
        mapping = self.getMapping(type)
        if mapping is None:
            logging.error("getMapFields: no mapping for type: %s"%type)
            return fields
            
        # get field descriptions (copy so we can change it)
        fields = mapping.getFields().copy()
        # add field list
        fields['@fieldList'] = mapping.getFieldList()            
        
        return fields

    def getMappedData(self, data, allFields=False):
        """returns dict with metadata descriptions and data for data"""
        fields = self.getMapFields(data)
        fieldList = fields['@fieldList']
        mappedData = {}
        mappedList = []
        for bk in fieldList:
            # ignore descriptions without data
            if not data.get(bk, None):
                continue
            
            # field description (copy so we can change it)
            bf = fields[bk].copy()
            # add value
            bf['value'] = data[bk]
            mappedData[bk] = bf
            mappedList.append(bk)
        
        if allFields and len(mappedData) < len(data):
            # add fields that were not in fields
            for bk in data.keys():
                if bk in mappedData or not data[bk] or bk[0]=='@':
                    continue
                
                
                mappedData[bk] = {'tag':bk, 'label':bk, 'value':data[bk]}
                mappedList.append(bk)
                
        mappedData['@fieldList'] = mappedList
        mappedData['@type'] = data.get('@type',None)
        return mappedData
    
    def getDCMappedData(self, data, allFields=False):
        """returns dict with DC keys and data form data"""
        fields = self.getMapFields(data)
        dcData = {}
        for bk in fields.keys():
            # ignore descriptions without data
            if not data.get(bk, None):
                continue
            
            # field description
            dc = fields[bk].get('dcmap', None)
            if dc:
                # add value
                if dcData.get('dc', None):
                    # key exists - append
                    dcData[dc] += '/' + data[bk]
                else:
                    dcData[dc] = data[bk]
                    
        return dcData
    
    def getFormatted(self, template, path=None, dom=None, data=None, xdata=None, allFields=False):
            """returns string with document data formatted according to template.
               gets data from server or dom or pre-parsed data."""
            logging.debug("getFormatted(template=%s)"%(template))
                        
            # get contents of tag
            if data is None:
                data = self.getData(path=path, dom=dom)
                if data is None:
                    # no data
                    logging.error("getFormatted: no data for template: %s"%(template))
                    return ""

            type = data.get('@type', '')
           
            # get template
            tp=getattr(self,"%s_%s"%(template, normalizeFieldName(type)), None)
            
            if tp is None:
                logging.warning("getFormatted: no template for: %s_%s!"%(template, normalizeFieldName(type)))
                # try generic
                tp=getattr(self,"%s_generic"%(template), None)
                if tp is None:
                    logging.error("getFormatted: no generic template either: %s"%(template))
                    return ""
            
            if type:
                # put field descriptions in mdHash        
                fields = self.getMappedData(data, allFields=allFields)
            else:
                fields = {}
            
            #logging.debug(tp)    
            return tp(mdmap=fields, md=data, mdx=xdata)


    def correctPath(self,path,remove=None,prefix=None,cut=0):
        """convinience method um einen pfad zu veraendern"""
        if remove is not None:
            path=path.replace(remove,'')
        if prefix is not None:
            path=os.path.join(prefix,path)
        
        if cut>0:
            splitted=path.split("/")
            path="/".join(splitted[0:len(splitted)-cut])
        return path

    
    def importMetaDataExportXML(self,importFile=None,RESPONSE=None):
        """imports metadata from the metadataexportxml file"""
        
        if importFile is None:
            pt=PageTemplateFile('zpt/importMetaDataExportXML', globals()).__of__(self)
            return pt()
        
        dom=ET.parse(importFile)
        node = dom.getroot()
        if node.tag != 'metadataExport':
            node = dom.find("metadataExport")
            
        self.createMappingFromDom(node)
        
        if RESPONSE is not None:
            RESPONSE.redirect('manage_main')

   
    def createMappingFromDom(self,metadatanode,metadata=None):
        """erzeuge ein Mapping aus dem der metadatanode des xmlformats, metadata ist ein metadataobject"""
        
        if metadata is None:
            metadata=self
        
        nodes=metadatanode
        
        for node in nodes:
            logging.debug("node: %s"%repr(node))
            if node.tag=="set":
                set=node
                id=set.get('name')
                list=[]
                argList={}
                for entry in set:
                    genericName=entry.get('genericName')
                    if set.get('name')=='generic':
                        # generic mapping doesn't have labels
                        tag = genericName
                        label = genericName
                    else:
                        tag=entry.get('tag')
                        label=entry.get('label')
                        
                    if not tag:
                        # ignore empty tags
                        continue
                    
                    description=getText(entry)
                    argList[tag]={'tag':tag,'label':label,'explanation':description,'status':'optional'}
                    
                logging.debug("createMappingFromDom: new mapping=%s"%repr(argList))
                metadata._setObject(id,MetaDataMapping(id,id,argList))

            elif node.tag=="metadata":
               mn=node
               name=mn.get('name')
               logging.debug("createMappingFromDom: new metadata=%s"%repr(name))
               metadata._setObject(name,MetaData(name,name))
               mdObj=getattr(metadata,name)
               mdObj.createMappingFromDom(mn)
    
    
    security.declarePublic('changeMetaDataForm')
    changeMetaDataForm = PageTemplateFile('zpt/changeMetadata', globals())
    
    security.declarePublic('changeMetaData')
    def changeMetaData(self,title=None,shortDescription=None,description=None,mappingSelectAttribute=None,fields=None,metaDataServerUrl=None,RESPONSE=None):
        """Change Metadata"""
        self.title = title
        self.shortDescription=shortDescription
        self.description=description
        self.mappingSelectAttribute=mappingSelectAttribute
        if fields:
            self.fieldList=fields.split(",")[0:]

        self.metaDataServerUrl=metaDataServerUrl
            
        if RESPONSE is not None:
            RESPONSE.redirect('manage_main')


def manage_addMetaDataForm(self):
    """interface for adding the Metadata"""
    pt=PageTemplateFile('zpt/addMetadataForm', globals()).__of__(self)
    return pt()

def manage_addMetaData(self,id,title=None,shortDescription=None,description=None,fields=None,RESPONSE=None):
    """a metadata objekt"""
    newObj = MetaData(id,title=title,shortDescription=shortDescription,description=description,fields=fields)
    self.Destination()._setObject(id,newObj)
    if RESPONSE is not None:
        RESPONSE.redirect('manage_main')