view MetaData.py @ 13:5f48f956ffa3

more resistant to empty data
author casties
date Fri, 29 Jul 2011 20:35:10 +0200
parents 7f0e2b656e5c
children 281d223aa361
line wrap: on
line source

from OFS.Folder import Folder
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Globals import package_home
from AccessControl import ClassSecurityInfo
import logging

import xml.etree.ElementTree as ET

from MetaDataMapping import MetaDataMapping
from SrvTxtUtils import getHttpData, getText

def normalizeFieldName(bt, underscore=True):
    """returns normalised field type for looking up mappings"""
    bt = bt.strip().replace(' ', '-').lower()
    if underscore:
        bt = bt.replace('_', '-')
        
    return bt               


class MetaData(Folder):
    """provides basic methods for managing metadata structures"""
    meta_type='MetaData'
    security=ClassSecurityInfo()
    manage_options = Folder.manage_options+(
        {'label':'Main Config','action':'changeMetaDataForm'},
        {'label':'Import XML Schema','action':'importMetaDataExportXML'},
        #{'label':'Select Fields for Display','action':'indicateDisplayFieldsForm'},
        )
    
    mappingSelectAttribute = None
    """the name of the attribute that can be used to select a mapping (if applicable)"""
    
    def __init__(self,id,title=None,shortDescription='',description='',fields=''):
        """initialize a new instance"""
        self.id = id
        # title is tag name
        if title:
            self.title = title
        else:
            # assume id is tag name
            self.title = id
            
        self.shortDescription =shortDescription #label fuer link auf add page
        self.description=description #description of the method for link page
        if fields:
            self.fieldList=fields.split(",")[0:]
        #self.metaDataServerUrl="" # muss mit change metadata gesetzt werden
    
    
    def getFieldList(self):
        """returns fieldList"""
        return ','.join(self.fieldList)
    
    def getTagName(self):
        """returns the tag name of this element"""
        if self.title:
            return self.title
        else:
            return self.shortDescription
    
    def getXmlPath(self, omitRoot=False):
        """returns the xpath to this element"""
        path = '/%s'%self.getTagName()
        parent = self.aq_parent
        if parent.meta_type == self.meta_type:
            # add parent
            path = parent.getXmlPath(omitRoot=omitRoot) + path
        elif omitRoot:
            return ''
            
        return path
    
    def getSubDom(self, path=None, dom=None):
        """returns the subtree of the dom rooted in this element"""
        if dom is None:
            # get from server
            md = self.getDomFromPathOrUrl(path)
                
        # ElementTree doesn't like absolute paths
        # lets assume dom is rooted in the first element
        xpath = '.' + self.getXmlPath(omitRoot=True)
        logging.debug("getSubDom looking for %s in %s"%(xpath, dom))
        elem = dom.find(xpath)
        return elem
        
    def getData(self, path=None, dom=None, normalizeNames=True, recursive=0):
        """returns dict with attributes and child elements from corresponding tag"""
        if path is None and dom is None:
            return None
        
        data = {}
        attr = {}
        elem = self.getSubDom(path=path, dom=dom)
        if elem is not None:
            # put attributes in @attr
            for attname in elem.keys():
                attr[attname] = elem.get(attname)
                
            data['@attr'] = attr
            if self.mappingSelectAttribute:
                # put type in @type
                type = attr.get(self.mappingSelectAttribute, None)
                if type is not None:
                    data['@type'] = normalizeFieldName(type)
            
            # put all subelements in dict
            if normalizeNames:
                for e in elem:
                    data[normalizeFieldName(e.tag)] = getText(e, recursive=recursive)
            else:
                for e in elem:
                    data[e.tag] = getText(e, recursive=recursive)
            
        return data

    def getMapping(self, type):
        """returns MetaDataMapping for type"""
        # try type as id
        mapping = getattr(self, type, None)
        if mapping is None:
            # try manually
            mapFolder = self
            for obj in mapFolder.objectValues():
                if obj.meta_type == "MetadataMapping":
                    # real type is in title
                    mapType = obj.title
                    if mapType == type:
                        # try type as is
                        return obj
                    
                    if normalizeFieldName(mapType, underscore=True) == normalizeFieldName(type, underscore=True):
                        # try normalized type without underscore
                        return obj
 
        return mapping
    
    def getMapFields(self, data):
        """returns dict with metadata description for data"""
        fields = {}
        type = data.get('@type', None)
        if not type:
            logging.error("getMapFields: no @type!")
            return fields
        
        # get mapping from main/meta/bib
        mapping = self.getMapping(type)
        if mapping is None:
            logging.error("getMapFields: no mapping for type: %s"%type)
            return fields
            
        # get field descriptions (copy so we can change it)
        fields = mapping.getFields().copy()
        # add field list
        fields['@fieldList'] = mapping.getFieldList()            
        
        return fields

    def getMappedData(self, data, allFields=False):
        """returns dict with metadata descriptions and data for data"""
        fields = self.getMapFields(data)
        fieldList = fields['@fieldList']
        mappedData = {}
        mappedList = []
        for bk in fieldList:
            # ignore descriptions without data
            if not data.get(bk, None):
                continue
            
            # field description (copy so we can change it)
            bf = fields[bk].copy()
            # add value
            bf['value'] = data[bk]
            mappedData[bk] = bf
            mappedList.append(bk)
        
        if allFields and len(mappedData) < len(data):
            # add fields that were not in fields
            for bk in data.keys():
                if bk in mappedData or not data[bk]:
                    continue
                
                mappedData[bk] = {'tag':bk, 'label':bk, 'value':data[bk]}
                mappedList.append(bk)
                
        mappedData['@fieldList'] = mappedList
        return mappedData
    
    def getDCMappedData(self, data, allFields=False):
        """returns dict with DC keys and data form data"""
        fields = self.getMapFields(data)
        dcData = {}
        for bk in fields.keys():
            # ignore descriptions without data
            if not data.get(bk, None):
                continue
            
            # field description
            dc = fields[bk].get('dcmap', None)
            if dc:
                # add value
                if dcData.get('dc',None):
                    # key exists - append
                    dcData[dc] += '/' + data[bk]
                else:
                    dcData[dc] = data[bk]
                    
        return dcData
    
    def getFormatted(self, template, path=None, dom=None, data=None, allFields=False):
            """returns string with document data formatted according to template.
               gets data from server or dom or pre-parsed data."""
            logging.debug("getFormatted(template=%s)"%(template))
            
            # get contents of tag
            if data is None:
                data = self.getData(path=path, dom=dom)
                if data is None:
                    # no data
                    logging.error("getFormatted: no data for template: %s"%(template))
                    return ""

            type = data.get('@type', '')
           
            # get template
            tp=getattr(self,"%s_%s"%(template, normalizeFieldName(type)), None)
            if tp is None:
                logging.warning("getFormatted: no template for: %s_%s"%(template, type))
                # try generic
                tp=getattr(self,"%s_generic"%(template), None)
                if tp is None:
                    logging.error("getFormatted: no generic template either: %s"%(template))
                    return ""
            
            # put field descriptions in mdHash        
            fields = self.getMappedData(data, allFields=allFields)
                
            return tp(mdmap=fields, md=data)


                
    def correctPath(self,path,remove=None,prefix=None,cut=0):
        """convinience method um einen pfad zu veraendern"""
        if remove is not None:
            path=path.replace(remove,'')
        if prefix is not None:
            path=os.path.join(prefix,path)
        
        if cut>0:
            splitted=path.split("/")
            path="/".join(splitted[0:len(splitted)-cut])
        return path

    
    def importMetaDataExportXML(self,importFile=None,RESPONSE=None):
        """imports metadata from the metadataexportxml file"""
        
        if importFile is None:
            pt=PageTemplateFile('zpt/importMetaDataExportXML', globals()).__of__(self)
            return pt()
        
        dom=ET.parse(importFile)
        node = dom.getroot()
        if node.tag != 'metadataExport':
            node = dom.find("metadataExport")
            
        self.createMappingFromDom(node)
        
        if RESPONSE is not None:
            RESPONSE.redirect('manage_main')

   
    def createMappingFromDom(self,metadatanode,metadata=None):
        """erzeuge ein Mapping aus dem der metadatanode des xmlformats, metadata ist ein metadataobject"""
        
        if metadata is None:
            metadata=self
        
        nodes=metadatanode
        
        for node in nodes:
            logging.debug("node: %s"%repr(node))
            if node.tag=="set":
                set=node
                id=set.get('name')
                list=[]
                argList={}
                for entry in set:
                    genericName=entry.get('genericName')
                    if set.get('name')=='generic':
                        # generic mapping doesn't have labels
                        tag = genericName
                        label = genericName
                    else:
                        tag=entry.get('tag')
                        label=entry.get('label')
                        
                    if not tag:
                        # ignore empty tags
                        continue
                    
                    description=getText(entry)
                    argList[tag]={'tag':tag,'label':label,'explanation':description,'status':'optional'}
                    
                logging.debug("createMappingFromDom: new mapping=%s"%repr(argList))
                metadata._setObject(id,MetaDataMapping(id,id,argList))

            elif node.tag=="metadata":
               mn=node
               name=mn.get('name')
               logging.debug("createMappingFromDom: new metadata=%s"%repr(name))
               metadata._setObject(name,MetaData(name,name))
               mdObj=getattr(metadata,name)
               mdObj.createMappingFromDom(mn)
    
    
    security.declarePublic('changeMetaDataForm')
    changeMetaDataForm = PageTemplateFile('zpt/changeMetadata', globals())
    
    security.declarePublic('changeMetaData')
    def changeMetaData(self,title=None,shortDescription=None,description=None,mappingSelectAttribute=None,fields=None,metaDataServerUrl=None,RESPONSE=None):
        """Change Metadata"""
        self.title = title
        self.shortDescription=shortDescription
        self.description=description
        self.mappingSelectAttribute=mappingSelectAttribute
        if fields:
            self.fieldList=fields.split(",")[0:]

        self.metaDataServerUrl=metaDataServerUrl
            
        if RESPONSE is not None:
            RESPONSE.redirect('manage_main')


def manage_addMetaDataForm(self):
    """interface for adding the Metadata"""
    pt=PageTemplateFile('zpt/addMetadataForm', globals()).__of__(self)
    return pt()

def manage_addMetaData(self,id,title=None,shortDescription=None,description=None,fields=None,RESPONSE=None):
    """a metadata objekt"""
    newObj = MetaData(id,title=title,shortDescription=shortDescription,description=description,fields=fields)
    self.Destination()._setObject(id,newObj)
    if RESPONSE is not None:
        RESPONSE.redirect('manage_main')