view MetaData.py @ 38:67115536b7ec default tip

DC as JSON export added
author dwinter
date Thu, 22 May 2014 12:09:20 +0200
parents f6a8055f6798
children
line wrap: on
line source

from OFS.Folder import Folder
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from AccessControl import ClassSecurityInfo
import logging

import xml.etree.ElementTree as ET
import xml.sax.saxutils

from MetaDataMapping import MetaDataMapping
from SrvTxtUtils import getText

def normalizeFieldName(bt, underscore=True):
    """returns normalised field type for looking up mappings"""
    bt = bt.strip().replace(' ', '-').lower()
    if underscore:
        bt = bt.replace('_', '-')
        
    return bt               

def putAppend(hash, key, value):
    """puts value in dict hash at key if it doesn't exist or adds value to a list"""
    #logging.debug("putAppend(hash=%s, key=%s, value=%s)"%(hash,key,value))
    if key in hash:
        # key exists
        oldval = hash[key]
        if isinstance(oldval, list):
            # is list already - append
            oldval.append(value)
        else:
            # needs list
            val = [oldval, value]
            hash[key] = val
    
    else:
        # key doesn't exist
        hash[key] = value

    #logging.debug("putAppend returns hash=%s"%(hash))
    return hash


class MetaData(Folder):
    """provides basic methods for managing metadata structures"""
    meta_type='MetaData'
    security=ClassSecurityInfo()
    manage_options = Folder.manage_options+(
        {'label':'Main Config','action':'changeMetaDataForm'},
        {'label':'Import XML Schema','action':'importMetaDataExportXML'},
        #{'label':'Select Fields for Display','action':'indicateDisplayFieldsForm'},
        )
    
    mappingSelectAttribute = None
    """the name of the attribute that can be used to select a mapping (if applicable)"""
    
    def __init__(self,id,title=None,shortDescription='',description='',fields=''):
        """initialize a new instance"""
        self.id = id
        # title is tag name
        if title:
            self.title = title
        else:
            # assume id is tag name
            self.title = id
            
        self.shortDescription =shortDescription #label fuer link auf add page
        self.description=description #description of the method for link page
        if fields:
            self.fieldList=fields.split(",")[0:]
        #self.metaDataServerUrl="" # muss mit change metadata gesetzt werden
    
    
    def getFieldList(self):
        """returns fieldList"""
        return ','.join(self.fieldList)
    
    def getTagName(self):
        """returns the tag name of this element"""
        if self.title:
            return self.title
        else:
            return self.shortDescription
    
    def getXmlPath(self, omitRoot=False):
        """returns the xpath to this element"""
        path = '/%s'%self.getTagName()
        parent = self.aq_parent
        if parent.meta_type == self.meta_type:
            # add parent
            path = parent.getXmlPath(omitRoot=omitRoot) + path
        elif omitRoot:
            return ''
            
        return path
    
    def getSubDom(self, path=None, dom=None, all=False):
        """returns the subtree (list) of the dom rooted in this element"""
        if dom is None:
            # get from server
            dom = self.getDomFromPathOrUrl(path)
                
        # ElementTree doesn't like absolute paths
        # lets assume dom is rooted in the first element
        xpath = '.' + self.getXmlPath(omitRoot=True)
        logging.debug("getSubDom looking for %s in %s"%(xpath, dom))
        if all:
            elem = dom.findall(xpath)
        else:
            elem = dom.find(xpath)
            
        return elem
        

    def _getData(self, elem, recursive, normalizeNames=False, all=False, allText=False):
        """helper method for getData()"""
        #logging.debug("_getDataFromDom(dom=%s, recursive=%s)"%(elem,recursive))
        data = {}
        attr = {}
        # put attributes in @attr
        for attname in elem.keys():
            attr[attname] = elem.get(attname)
        
        if attr:
            data['@attr'] = attr
            
        # put text in @text
        if elem.text:
            data['@text'] = elem.text
            
        for e in elem:
            # put all child elements in data
            if normalizeNames:
                # normalize key names
                key = normalizeFieldName(e.tag)
            else:
                key = e.tag
                
            if recursive > 0:
                # more recursive - call _getData on element
                val = self._getData(e, recursive=recursive-1, normalizeNames=normalizeNames, all=all, allText=allText)
            else:
                val = getText(e, recursive=allText)
                
            if all:
                # add multiple tags as list
                putAppend(data, key, val)
            else:
                if key not in data:
                    # don't overwrite element data
                    data[key] = val
                                
        #logging.debug("_getDataFromDom: returns %s"%repr(data))
        return data
            

    def getData(self, path=None, dom=None, normalizeNames=True, all=False, recursive=0, allText=0):
        """Returns dict with attributes and child elements from corresponding tag.
        
        @param path: file or url path to metadata file
        @param dom: dom of metadata
        @param normalizeNames: normalize tag names
        @param all: put contents of tags with the same name in list value
        @param recursive: number of recursions. 0=just children
        @param allText: get text content of all subelements
        @returns: dict with attributes (key=@attr) and child elements (key=tag)
        """
        #logging.debug("getData(path=%s, dom=%s)"%(path,dom))
        if path is None and dom is None:
            return None
        
        elem = self.getSubDom(path=path, dom=dom, all=all)
        if elem is None:
            return None
        
        if all:
            # subdom is list - return list
            data = []
            for e in elem:
                data.append(self._getData(e, recursive=recursive, normalizeNames=normalizeNames, all=all, allText=allText))

        else:
            # subdom is element
            data = self._getData(elem, recursive=recursive, normalizeNames=normalizeNames, all=all, allText=allText)

        if self.mappingSelectAttribute:
            # put type in @type
            if all:
                attr = data[0].get('@attr', None)
            else:
                attr = data.get('@attr', None)
                
            if attr:
                type = attr.get(self.mappingSelectAttribute, None)
                if type is not None:
                    if all:
                        data[0]['@type'] = normalizeFieldName(type)
                    else:
                        data['@type'] = normalizeFieldName(type)
                
        return data
    

    def getDCDataFromPath(self,path):
        """returns DC mapped data from path to index.meta"""
        data=self.getData(path=path)
        ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """
        for key,value in self.getDCMappedData(data).items():
            ret+="""<dc:%s>%s</dc:%s>"""%(key,xml.sax.saxutils.escape(value),key)
        ret+="</bib>"


        REQUEST=self.REQUEST

        if REQUEST is not None:
            origin = REQUEST.getHeader('Origin')
            if origin:
                REQUEST.response.setHeader('Access-Control-Allow-Origin', origin)
                REQUEST.response.setHeader('Access-Control-Allow-Credentials', 'true')
            else:
                REQUEST.response.setHeader('Access-Control-Allow-Origin', '*')

            REQUEST.response.setHeader('Access-Control-Allow-Methods', 'GET, PUT, POST, DELETE, OPTIONS, HEAD')
            REQUEST.response.setHeader('Access-Control-Allow-Headers', 'Origin, Content-Type, Cache-Control, X-Requested-With')
        
        return ret
    
    def getMapping(self, type):
        """returns MetaDataMapping for type"""
        # try type as id
        mapping = getattr(self, type, None)
        if mapping is None:
            # try manually
            mapFolder = self
            for obj in mapFolder.objectValues():
                if obj.meta_type == "MetadataMapping":
                    # real type is in title
                    mapType = obj.title
                    if mapType == type:
                        # try type as is
                        return obj
                    
                    if normalizeFieldName(mapType, underscore=True) == normalizeFieldName(type, underscore=True):
                        # try normalized type without underscore
                        return obj
 
        return mapping
    
    def getMapFields(self, data):
        """returns dict with metadata description for data"""
        fields = {}
        type = data.get('@type', None)
        if not type:
            logging.error("getMapFields: no @type!")
            return fields
        
        # get mapping from main/meta/bib
        mapping = self.getMapping(type)
        if mapping is None:
            logging.error("getMapFields: no mapping for type: %s"%type)
            return fields
            
        # get field descriptions (copy so we can change it)
        fields = mapping.getFields().copy()
        # add field list
        fields['@fieldList'] = mapping.getFieldList()            
        
        return fields

    def getMappedData(self, data, allFields=False):
        """returns dict with metadata descriptions and data for data"""
        fields = self.getMapFields(data)
        fieldList = fields.get('@fieldList', [])
        mappedData = {}
        mappedList = []
        for bk in fieldList:
            # ignore descriptions without data
            if not data.get(bk, None):
                continue
            
            # field description (copy so we can change it)
            bf = fields[bk].copy()
            # add value
            bf['value'] = data[bk]
            mappedData[bk] = bf
            mappedList.append(bk)
        
        if allFields and len(mappedData) < len(data):
            # add fields that were not in fields
            for bk in data.keys():
                if bk in mappedData or not data[bk] or bk[0]=='@':
                    continue
                
                mappedData[bk] = {'tag':bk, 'label':bk, 'value':data[bk]}
                mappedList.append(bk)
                
        mappedData['@fieldList'] = mappedList
        mappedData['@type'] = data.get('@type', None)
        return mappedData
    
    def getDCMappedData(self, data, allFields=False):
        """returns dict with DC keys and data from data"""
        fields = self.getMapFields(data)
        dcData = {}
        for bk in fields.keys():
            # ignore descriptions without data
            if not data.get(bk, None):
                continue
            
            # field description
            dc = fields[bk].get('dcmap', None)
            if dc:
                # add value
                if dcData.get('dc', None):
                    # key exists - append
                    dcData[dc] += '/' + data[bk]
                else:
                    dcData[dc] = data[bk]
                    
        return dcData
    
    def getFormatted(self, template, path=None, dom=None, data=None, xdata=None, allFields=False):
            """returns string with document data formatted according to template.
               gets data from server or dom or pre-parsed data."""
            logging.debug("getFormatted(template=%s, allFields=%s)"%(template,allFields))
                        
            # get contents of tag
            if data is None:
                data = self.getData(path=path, dom=dom)
                if data is None:
                    # no data
                    logging.error("getFormatted: no data for template: %s"%(template))
                    return ""

            type = data.get('@type', '')
           
            # get template
            tp=getattr(self,"%s_%s"%(template, normalizeFieldName(type)), None)
            
            if tp is None:
                logging.warning("getFormatted: no template for: %s_%s!"%(template, normalizeFieldName(type)))
                # try generic
                tp=getattr(self,"%s_generic"%(template), None)
                if tp is None:
                    logging.error("getFormatted: no generic template either: %s"%(template))
                    return ""
            
            if type:
                # put field descriptions in mdHash        
                fields = self.getMappedData(data, allFields=allFields)
            else:
                fields = {}
            
            #logging.debug("getFormatted: md=%s"%(repr(data)))    
            return tp(mdmap=fields, md=data, mdx=xdata)


    def correctPath(self,path,remove=None,prefix=None,cut=0):
        """convenience method um einen pfad zu veraendern"""
        if remove is not None:
            path=path.replace(remove,'')
        if prefix is not None:
            path="%s/%s"%(prefix,path)
        
        if cut>0:
            splitted=path.split("/")
            path="/".join(splitted[0:len(splitted)-cut])
        return path

    
    def importMetaDataExportXML(self,importFile=None,RESPONSE=None):
        """imports metadata from the metadataexportxml file"""
        
        if importFile is None:
            pt=PageTemplateFile('zpt/importMetaDataExportXML', globals()).__of__(self)
            return pt()
        
        dom=ET.parse(importFile)
        node = dom.getroot()
        if node.tag != 'metadataExport':
            node = dom.find("metadataExport")
            
        self.createMappingFromDom(node)
        
        if RESPONSE is not None:
            RESPONSE.redirect('manage_main')

   
    def createMappingFromDom(self,metadatanode,metadata=None):
        """erzeuge ein Mapping aus dem der metadatanode des xmlformats, metadata ist ein metadataobject"""
        
        if metadata is None:
            metadata=self
        
        nodes=metadatanode
        
        for node in nodes:
            logging.debug("node: %s"%repr(node))
            if node.tag=="set":
                set=node
                id=set.get('name')
                list=[]
                argList={}
                for entry in set:
                    genericName=entry.get('genericName')
                    if set.get('name')=='generic':
                        # generic mapping doesn't have labels
                        tag = genericName
                        label = genericName
                    else:
                        tag=entry.get('tag')
                        label=entry.get('label')
                        
                    if not tag:
                        # ignore empty tags
                        continue
                    
                    description=getText(entry)
                    argList[tag]={'tag':tag,'label':label,'explanation':description,'status':'optional'}
                    
                logging.debug("createMappingFromDom: new mapping=%s"%repr(argList))
                metadata._setObject(id,MetaDataMapping(id,id,argList))

            elif node.tag=="metadata":
               mn=node
               name=mn.get('name')
               logging.debug("createMappingFromDom: new metadata=%s"%repr(name))
               metadata._setObject(name,MetaData(name,name))
               mdObj=getattr(metadata,name)
               mdObj.createMappingFromDom(mn)
    
    
    security.declarePublic('changeMetaDataForm')
    changeMetaDataForm = PageTemplateFile('zpt/changeMetadata', globals())
    
    security.declarePublic('changeMetaData')
    def changeMetaData(self,title=None,shortDescription=None,description=None,mappingSelectAttribute=None,fields=None,metaDataServerUrl=None,RESPONSE=None):
        """Change Metadata"""
        self.title = title
        self.shortDescription=shortDescription
        self.description=description
        self.mappingSelectAttribute=mappingSelectAttribute
        if fields:
            self.fieldList=fields.split(",")[0:]

        self.metaDataServerUrl=metaDataServerUrl
            
        if RESPONSE is not None:
            RESPONSE.redirect('manage_main')


def manage_addMetaDataForm(self):
    """interface for adding the Metadata"""
    pt=PageTemplateFile('zpt/addMetadataForm', globals()).__of__(self)
    return pt()

def manage_addMetaData(self,id,title=None,shortDescription=None,description=None,fields=None,RESPONSE=None):
    """a metadata objekt"""
    newObj = MetaData(id,title=title,shortDescription=shortDescription,description=description,fields=fields)
    self.Destination()._setObject(id,newObj)
    if RESPONSE is not None:
        RESPONSE.redirect('manage_main')