view MetaDataFolder.py @ 38:67115536b7ec default tip

DC as JSON export added
author dwinter
date Thu, 22 May 2014 12:09:20 +0200
parents e231cff8688b
children
line wrap: on
line source

from OFS.Folder import Folder
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Globals import package_home
from AccessControl import ClassSecurityInfo
import os.path
import urlparse
import logging

import xml.etree.ElementTree as ET

from MetaDataMapping import MetaDataMapping
from MetaData import MetaData
from SrvTxtUtils import getHttpData, getText

def normalizeBibField(bt, underscore=True):
    """returns normalised bib type for looking up mappings"""
    bt = bt.strip().replace(' ', '-').lower()
    if underscore:
        bt = bt.replace('_', '-')
        
    return bt

def toString(list):
    ret=u""
    
    for l in list:
        ret+=unicode(l)
    
    return ret

class MetaDataFolder(Folder):
    """provides methods for managing complete metadata structures"""
    meta_type='MetaDataFolder'
    security=ClassSecurityInfo()
    manage_options = Folder.manage_options+(
        {'label':'Main Config','action':'changeMetaDataFolderForm'},
        )

    metaDataServerUrl = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=%s"
    """URL of metadata server. %s replaced by file path."""

    def __init__(self,id,title='',metaDataServerUrl=None):
        """initialize a new instance"""
        self.id = id
        self.title = title
        if metaDataServerUrl:
            self.metaDataServerUrl = metaDataServerUrl

        
    def getMDFromPathOrUrl(self,path):
        """returns contents of metadata file from server as text"""
        if not path:
            logging.error("getMDFromPathOrUrl: empty path!")
            return None
        
        parsedurl = urlparse.urlparse(path)
        if parsedurl[0] != "":
            # has schema (e.g. http)
            url=path
        else:
            # path only
            path = path.replace('/mpiwg/online/', '')
            if path.endswith("index.meta"):
                url = self.metaDataServerUrl%path
            else:
                url = os.path.join(self.metaDataServerUrl%path,'index.meta')
            
        try:
            md = getHttpData(url)
            return md
        
        except:
            logging.error("getMDFromPathOrUrl: unable to get data!")
            
        return None

    def getDomFromPathOrUrl(self, path):
        """returns DOM of metadata file at given path"""
        dom = None
        data = self.getMDFromPathOrUrl(path)
        if data:
            dom = ET.fromstring(data)
            
        return dom

    def getXmlPathObj(self, xmlpath):
        """returns object at xmlpath"""
        # make xmlpath relative for Zope
        if xmlpath[0] == '/':
            xmlpath = xmlpath[1:]
            
        obj = self.restrictedTraverse(xmlpath, None)
        return obj

    def getXmlPathData(self, xmlpath, path=None, dom=None, recursive=0, all=False, allText=False):
        """returns contents of element at xmlpath as dict.
        
        @param xmlpath: xml path to selected elements
        @param path: file or url path to metadata file
        @param dom: dom of metadata
        @param all: put contents of tags with the same name in list value
        @param recursive: number of recursions. 0=just children
        @param allText: get text content of all subelements
        @returns: dict with attributes (key=@attr) and child elements (key=tag)
        """
        logging.error("getXmlPathData(%s)"%xmlpath)
        mdObj = self.getXmlPathObj(xmlpath)
        if mdObj is not None:
            return mdObj.getData(path=path, dom=dom, recursive=recursive, all=all, allText=allText)
        else:
            logging.error("getXmlPathData: MetaData object for '%s' not found!"%xmlpath)
            return None

    def getXmlPathFormatted(self, xmlpath, template, path=None, dom=None, data=None, allFields=False, recursive=0, all=False, allText=False):
        """returns contents of element at xmlpath as dict
        
        @param xmlpath: xml path to selected elements
        @param template: name of template for data
        @param path: file or url path to metadata file
        @param dom: dom of metadata
        @param all: put contents of tags with the same name in list value
        @param recursive: number of recursions. 0=just children
        @param allText: get text content of all subelements
        @returns: dict with attributes (key=@attr) and child elements (key=tag)
        """
        logging.error("getXmlPathFormatted(xmlpath=%s, template=%s)"%(xmlpath,template))
        mdObj = self.getXmlPathObj(xmlpath)
        if mdObj is not None:
            if data is None:
                data = mdObj.getData(path=path, dom=dom, recursive=recursive, all=all, allText=allText)
                
            if data is None:
                return ''
            
            fmt = ''
            if all:
                # data is list of elements
                for d in data:
                    # concatenate formatted strings
                    fmt += mdObj.getFormatted(template, path=path, dom=dom, data=d, allFields=allFields) + '\n'
                    
            else:
                fmt = mdObj.getFormatted(template, path=path, dom=dom, data=data, allFields=allFields)
                
            return fmt
        
        else:
            logging.error("getXmlPathFormatted: MetaData object for '%s' not found!"%xmlpath)
            return ''

    def getResourceData(self, path=None, dom=None, recursive=0, all=False):
        """returns contents of resource tag as dict"""
        return self.getXmlPathData('resource', path=path, dom=dom, recursive=recursive, all=all)

    def getTexttoolData(self, path=None, dom=None, recursive=0, all=False):
        """returns contents of texttool tag as dict"""
        return self.getXmlPathData('resource/meta/texttool', path=path, dom=dom, recursive=recursive, all=all)

    def getAccessData(self, path=None, dom=None, recursive=0, all=False):
        """returns contents of access tag as dict"""
        return self.getXmlPathData('resource/meta/access-conditions/access', path=path, dom=dom, recursive=recursive, all=all)
    
    def getAttributionData(self, path=None, dom=None, recursive=0, all=True):
        """returns contents of attribution tag as dict"""
        return self.getXmlPathData('resource/meta/access-conditions/attribution', path=path, dom=dom, recursive=recursive, all=all)
    
    def getAttributionFormatted(self, template, path=None, dom=None, data=None, recursive=0, all=True):
        """returns formatted contents of access tag"""
        return self.getXmlPathFormatted('resource/meta/access-conditions/attribution', template, path=path, dom=dom, data=data, recursive=recursive, all=all)
    
    def getCopyrightData(self, path=None, dom=None, recursive=1, all=True):
        """returns contents of copyright tag as dict"""
        # information is two tags deep - recursive=1
        return self.getXmlPathData('resource/meta/access-conditions/copyright', path=path, dom=dom, recursive=recursive, all=all)
    
    def getCopyrightFormatted(self, template, path=None, dom=None, data=None, recursive=1, all=True):
        """returns formatted contents of access tag"""
        # information is two tags deep - recursive=1
        return self.getXmlPathFormatted('resource/meta/access-conditions/copyright', template, path=path, dom=dom, data=data, recursive=recursive, all=all)
    
    def getContextData(self, path=None, dom=None, recursive=0, all=True):
        """returns contents of context tag as dict"""
        # information is two tags deep - recursive=1
        return self.getXmlPathData('resource/meta/context', path=path, dom=dom, recursive=recursive,  all=all)
        
    
    def getDRI(self, path=None, dom=None, type="escidoc"):
        """returns the DRI of the document"""
        dris = self.getXmlPathData('resource/meta/dri', path=path, dom=dom, all=True)
        #logging.debug("getDRI: dris=%s"%repr(dris))
        if dris is None:
            return None
        
        for dri in dris:
            #logging.debug("getDRI: dri=%s"%dri)
            if type is None:
                # no type -- take the first one
                return dri.get('@text', None)
            
            # use element with matching @type
            att = dri.get('@attr', None)
            if att is not None:
                if (att.get('type', None) == type):
                    return dri.get('@text', None)
        
        return None

    
    def getBibData(self, path=None, dom=None, all=False, recursive=0):
        """returns contents of bib tag as dict"""
        return self.resource.meta.bib.getData(path=path, dom=dom, all=all, recursive=recursive)

    def getBibMapping(self, bibtype):
        """returns MetaDataMapping for resource/meta/bib of bibtype"""
        return self.resource.meta.bib.getMapping(bibtype)
    
    def getBibFields(self, bibdata):
        """returns dict with metadata description for bibdata"""
        return self.resource.meta.bib.getMapFields(bibdata)

    def getBibMappedData(self, bibdata, allFields=False):
        """returns dict with metadata descriptions and data for bibdata"""
        return self.resource.meta.bib.getMappedData(bibdata, allFields=allFields)
    
    def getBibMappedDataJSON(self, bibdata=None, allFields=False):
        """returns dict with metadata descriptions and data for bibdata as JSON takes bidata as JSON"""
        
        import json
           
            
           
        if bibdata is None:
            bibdata=self.REQUEST.form
        
        bibdata = json.loads(bibdata,'utf-8')
           
        return json.dumps(self.resource.meta.bib.getMappedData(bibdata, allFields=allFields));
   
    
    
    def getDCDataFromPath(self,path):
         """returns DC mapped data from path to index.meta"""
         return self.resource.meta.bib.getDCDataFromPath(path)
     
    def getDCMappedData(self, bibdata):
        """returns dict with DC keys and data from bibdata"""
        return self.resource.meta.bib.getDCMappedData(bibdata)
       
       
    def getDCMappedDataJSON(self, bibdata):
        """returns dict with DC keys and data from bibdata"""
        import json
       
        if bibdata is None:
            bibdata=self.REQUEST.form
            
         
        bibdata = json.loads(bibdata,'utf-8')
           
        return  json.dumps(self.resource.meta.bib.getDCMappedData(bibdata));
   
    def getBibFormattedMetaDataJSON(self, path=None, dom=None, bibdata=None, bibxdata=None):
            """get the metadafrom server as json, takes bibdata json-formatted"""
            import json
           
            logging.debug("getBibFormattedMetaData(path=%s)"%path)
           
            if bibdata is None:
                bibdata=self.REQUEST.form
            
         
            bibdata = json.loads(bibdata,'utf-8')
           
            return json.dumps(self.resource.meta.bib.getFormatted('metadata_template', path=path, dom=dom, data=bibdata, xdata=bibxdata))
            
    def getBibFormattedMetaData(self, path=None, dom=None, bibdata=None, bibxdata=None):
            """get the metadafrom server"""
            
            logging.debug("getBibFormattedMetaData(path=%s)"%path)
            return self.resource.meta.bib.getFormatted('metadata_template', path=path, dom=dom, data=bibdata, xdata=bibxdata)
                
    def getBibFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None, bibxdata=None):
            """get the metadafrom server"""
            logging.debug("getBibFormattedMetaDataExtended(path=%s)"%path)
            return self.resource.meta.bib.getFormatted('metadata_extended_template', path=path, dom=dom, data=bibdata, xdata=bibxdata, allFields=True)
   
   
    def getBibFormattedLabelJSON(self, path=None, dom=None, bibdata=None, bibxdata=None):
            """get the formatted metadafrom server as json, takes bibdata json-formatted"""
            import json
           
            logging.debug("getBibFormattedMetaData(path=%s)"%path)
           
            if bibdata is None:
                bibdata=self.REQUEST.form
            
         
            bibdata = json.loads(bibdata,'utf-8')
           
            return json.dumps(self.resource.meta.bib.getFormatted('label_template', path=path, dom=dom, data=bibdata, xdata=bibxdata))
            
    def getBibFormattedLabel(self,path=None, dom=None, bibdata=None):
            """get the metadafrom server"""
            logging.debug("getBibFormattedLabel(%s)"%path)
            return self.resource.meta.bib.getFormatted('label_template', path=path, dom=dom, data=bibdata)
                        
    # compatibility
    getFormattedMetaData = getBibFormattedMetaData
    getFormattedMetaDataShort = getBibFormattedMetaData
    getFormattedMetaDataExtended = getBibFormattedMetaDataExtended
    getFormattedLabel = getBibFormattedLabel

                                        
    changeMetaDataFolderForm = PageTemplateFile('zpt/changeMetaDataFolder',globals())
    
    security.declarePublic('changeMetaDataFolder')
    def changeMetaDataFolder(self,title,metaDataServerUrl,RESPONSE=None):
        """Change MetaDataFolder"""
        self.title = title
        self.metaDataServerUrl=metaDataServerUrl
        if RESPONSE is not None:
            RESPONSE.redirect('manage_main')


def manage_addMetaDataFolderForm(self):
    """add MetaDataFolder form"""
    pt = PageTemplateFile('zpt/addMetadataFolderForm',globals()).__of__(self)
    return pt()

def manage_addMetaDataFolder(self,id,title,RESPONSE=None):
    """a MetaDataFolder objekt"""
    newObj=MetaDataFolder(id,title)
    self.Destination()._setObject(id,newObj)
    if RESPONSE is not None:
        RESPONSE.redirect('manage_main')