view MetaData.py @ 3:3dadf0d89261

more renovation
author casties
date Tue, 26 Jul 2011 20:08:11 +0200
parents ac8e119b25ec
children 8291255b1868
line wrap: on
line source

from OFS.Folder import Folder
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Globals import package_home
from AccessControl import ClassSecurityInfo
import os.path
import urllib
import logging
import urlparse

# TODO: which xml toolkit?
import amara
import xml.sax.saxutils
import xml.dom.minidom
import xml.etree.ElementTree as ET


# TODO: do we need this?
#from Products.OSA_system2 import OSAS_helpers
#from Products.OSA_system2.OSAS_metadata import OSAS_Metadata,OSAS_MetadataMapping

from MetaDataMapping import MetaDataMapping
from OSAS_metadata import OSAS_Metadata, OSAS_MetadataMapping


from SrvTxtUtils import getHttpData, getText


# TODO: get rid of this
def getTextFromNode(nodelist):
    """gibt text aus nodelist"""
    rc = ""
    for node in nodelist:
        if node.nodeType == node.TEXT_NODE:
           rc = rc + node.data
    return rc


def normalizeBibField(bt, underscore=True):
    """returns normalised bib type for looking up mappings"""
    bt = bt.strip().replace(' ', '-').lower()
    if underscore:
        bt = bt.replace('_', '-')
        
    return bt

def getBibdataFromDom(dom):
    """returns dict with all elements from bib-tag"""
    bibinfo = {}
    bib = dom.find(".//meta/bib")
    if bib is not None:
        # put type in @type
        type = bib.get('type')
        bibinfo['@type'] = normalizeBibField(type)
        # put all subelements in dict
        for e in bib:
            bibinfo[normalizeBibField(e.tag)] = getText(e)
            
    return bibinfo

def toString(list):
    ret=u""
    
    for l in list:
        ret+=unicode(l)
    
    return ret

def dcMetaDataToHash(mdSet):
    """Convenience Function for creates a hash from the DCMetadataset
    @param mdSet: String containing DCMetadata informmation
    currently only in the format getDCMetadata of this module"""
    
    NSS = {
           'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
           'dc': 'http://dublincore.org/documents/dcmi-namespace/',
           'owl':"http://www.w3.org/2002/07/owl#",
           'rdfs':"http://www.w3.org/2000/01/rdf-schema#"
    }   
    ret={}
    import StringIO
    import sys
    buffer= StringIO.StringIO(mdSet)
    try:
        md = amara.parse(buffer,prefixes=NSS)
    except:
        logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1]))
                                
        ret["error"]=mdSet
        return ret
   
    ret["title"] = toString(md.xml_xpath("//dc:title/text()"))
    ret["creator"] =toString(md.xml_xpath("//dc:creator/text()"))
    ret["date"] = toString(md.xml_xpath("//dc:date/text()"))
    
    return ret
        

               


class MetaData(OSAS_Metadata):
    """provides basic methods for managing metadata structures"""
    meta_type='MetaData'
    security=ClassSecurityInfo()
    manage_options = Folder.manage_options+(
        {'label':'Main Config','action':'changeMetadataForm'},
        {'label':'Import XML Schema','action':'importMetaDataExportXML'},
        #{'label':'Select Fields for Display','action':'indicateDisplayFieldsForm'},
        )
    
    def __init__(self,id,shortDescription='',description='',fields=''):
        """initialize a new instance"""
        self.id = id
        self.shortDescription =shortDescription #label fuer link auf add page
        self.description=description #description of the method for link page
        self.fieldList=fields.split(",")[0:]
        self.metaDataServerUrl="" # muss mit change metadata gesetzt werden
    
        
    def correctPath(self,path,remove=None,prefix=None,cut=0):
        """convinience method um einen pfad zu veraendern"""
        if remove is not None:
            path=path.replace(remove,'')
        if prefix is not None:
            path=os.path.join(prefix,path)
        
        if cut>0:
            splitted=path.split("/")
            path="/".join(splitted[0:len(splitted)-cut])
        return path
    
    def importMetaDataExportXML(self,importFile=None,RESPONSE=None):
        """imports metadata from the metadataexportxml file"""
        
        if importFile is None:
            pt=PageTemplateFile('zpt/importMetaDataExportXML', globals()).__of__(self)
            return pt()
        
        dom=ET.parse(importFile)
        node = dom.getroot()
        if node.tag != 'metadataExport':
            node = dom.find("metadataExport")
            
        self.createMappingFromDom(node)
        
        if RESPONSE is not None:
            RESPONSE.redirect('manage_main')

   
    def createMappingFromDom(self,metadatanode,metadata=None):
        """erzeuge ein Mapping aus dem der metadatanode des xmlformats, metadata ist ein metadataobject"""
        
        if metadata is None:
            metadata=self
        
        nodes=metadatanode
        
        for node in nodes:
            logging.debug("node: %s"%repr(node))
            if node.tag=="set":
                set=node
                id=set.get('name')
                list=[]
                argList={}
                for entry in set:
                    genericName=entry.get('genericName')
                    if set.get('name')=='generic':
                        # generic mapping doesn't have labels
                        tag = genericName
                        label = genericName
                    else:
                        tag=entry.get('tag')
                        label=entry.get('label')
                        
                    if not tag:
                        # ignore empty tags
                        continue
                    
                    description=getText(entry)
                    argList[tag]={'tag':tag,'label':label,'explanation':description,'status':'optional'}
                    
                logging.debug("createMappingFromDom: new mapping=%s"%repr(argList))
                metadata._setObject(id,MetaDataMapping(id,id,argList))

            elif node.tag=="metadata":
               mn=node
               name=mn.get('name')
               logging.debug("createMappingFromDom: new metadata=%s"%repr(name))
               metadata._setObject(name,MetaData(name,name))
               mdObj=getattr(metadata,name)
               mdObj.createMappingFromDom(mn)
    
    def getMDFromPathOrUrl(self,path):
        parsedurl = urlparse.urlparse(path)
        if parsedurl[0] != "":
            # has schema (e.g. http)
            url=path
        else:
            # path only
            if path.endswith("index.meta"):
                url =self.metaDataServerUrl%path
            else:
                url=os.path.join(self.metaDataServerUrl%path,'index.meta')
            
        #logging.debug("get Metadata: %s"%url)
        md = getHttpData(url)
        return md
    
    def getDCFormatted(self,path):
        """get the metadata as dc set"""
        logging.debug("getDCFormatted(path=%s)"%path)
        namespace={ 'mpiwg':  "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"}
        namespaceUsed=False
        
        md = self.getMDFromPathOrUrl(path)
        logging.debug("MD in XML"+md)
        im = amara.parse(md, prefixes=namespace)
        
        typePaths=im.xml_xpath('//bib/@type')
        archimedes=False
        
        if len(typePaths)<1: 
            typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes
            if len(typePaths)>0:
                type = "archimedes"
                archimedes=True
            else:
                typePaths=im.xml_xpath('//mpiwg:bib/@type')
                if len(typePaths)<1:
                    return ""
                else:
                    namespaceUsed=True
                    
                    type=unicode(typePaths[0])
        else:
            type=unicode(typePaths[0])
        logging.info("got type:"+type)    
        try:
            mapping=getattr(self.main.meta.bib,type.lower(),None)
        except:
            logging.error("getMetaDataFromServer no mapping  for type: %s"%type)
            return ""     
        
        try:
            dcMapping=getattr(self.main.meta.bib,"dc",None)
        except:
            logging.error("getMetaDataFromServer no dc in meta/bib")
            return ""     
        
        mds=mapping.generateMappingHash() # Hole  das Mapping generisches Feld --> Feld im entsprechenden Typ
        dcMds=dcMapping.generateMappingHash() 
        
        mdHash=[]
        logging.debug("Value: %s"%repr(mds))
       
        for key,valueTriple in mds.items():
                value=valueTriple[0]
                logging.debug("Value: %s"%repr(value))
                logging.debug("Key: %s"%repr(key))
                if value!="":
                    if not archimedes:
                        if namespaceUsed:
                            try:
                                v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value)
                            except:
                                logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value)
                        else:
                            v = im.xml_xpath('//bib/%s/text()'%value)
                    else:
                        v = im.xml_xpath('//archimedes/%s/text()'%value)
                    if len(v) > 0:
                        dc=dcMds[key][0]
                        
                        if (dc !="") and (value !=""):
                            logging.debug("%s--> : %s"%(repr(value),dc))
                            mdHash.append([dc,unicode(v[0])])
               
        ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """
        ret+="<dc:type>%s</dc:type>"%type
        for md in mdHash:

            ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0])
        ret+="</bib>"
        return ret


    def getBibMapping(self, bibtype):
        """returns MetaDataMapping for bibtype"""
        # try type as id
        mapping = getattr(self.main.meta.bib, bibtype, None)
        if mapping is None:
            # try manually
            mapFolder = self.main.meta.bib
            for obj in mapFolder.objectValues():
                if obj.meta_type == "MetadataMapping":
                    # real type is in title
                    mapType = obj.title
                    if mapType == bibtype:
                        # try type as is
                        return obj
                    
                    if normalizeBibField(mapType, underscore=True) == normalizeBibField(bibtype, underscore=True):
                        # try normalized type without underscore
                        return obj
 
        return mapping
    
    def getBibFields(self, bibdata):
        """returns dict with metadata description for bibdata"""
        bibfields = {}
        bibtype = bibdata['@type']
        # get mapping from main/meta/bib
        mapping = self.getBibMapping(bibtype)
        if mapping is None:
            logging.error("getBibFields: no mapping for type: %s"%bibtype)
            return bibfields
            
        # get field descriptions (copy so we can change it)
        bibfields = mapping.getFields().copy()
        # add field list
        bibfields['@fieldList'] = mapping.getFieldList()            
        
        return bibfields

    def getBibMappedData(self, bibdata, allFields=False):
        """returns dict with metadata descriptions and data for bibdata"""
        bibfields = self.getBibFields(bibdata)
        mappedData = {}
        mappedList = []
        for bk in bibfields.keys():
            # ignore descriptions without data
            if not bibdata.get(bk, None):
                continue
            
            # field description (copy so we can change it)
            bf = bibfields[bk].copy()
            # add value
            bf['value'] = bibdata[bk]
            mappedData[bk] = bf
            mappedList.append(bk)
        
        if allFields and len(mappedData) < len(bibdata):
            # add fields that were not in bibfields
            for bk in bibdata.keys():
                if bk in mappedData or not bibdata[bk]:
                    continue
                
                mappedData[bk] = {'tag':bk, 'label':bk, 'value':bibdata[bk]}
                mappedList.append(bk)
                
        mappedData['@fieldList'] = mappedList
        return mappedData
    
    def getFormatted(self, template, path=None, dom=None, bibdata=None, allFields=False):
            """returns string with document data formatted according to template.
               gets data from server or dom or pre-parsed bibdata."""
            logging.debug("getFormatted(template=%s)"%(template))
            
            if dom is None and bibdata is None:
                # get from server
                md = self.getMDFromPathOrUrl(path.replace("/mpiwg/online",""))
                #logging.debug("md:"+md)
                #dom = amara.parse(md)
                dom = ET.fromstring(md)
                
            # get contents of bib tag
            if bibdata is None:
                bibdata = getBibdataFromDom(dom)

            bibtype = bibdata['@type']
           
            # get template
            tp=getattr(self,"%s_%s"%(template, bibtype.lower()), None)
            if tp is None:
                logging.warning("getFormatted: no template for: %s_%s"%(template, bibtype))
                # try generic
                tp=getattr(self,"%s_generic"%(template), None)
                if tp is None:
                    logging.error("getFormatted: no generic template either: %s"%(template))
                    return ""
            
            # put bib field descriptions in mdHash        
            bibfields = self.getBibMappedData(bibdata, allFields=allFields)
                
            return tp(mdmap=bibfields, md=bibdata)

                
    def getFormattedMetaData(self, path=None, dom=None, bibdata=None):
            """get the metadafrom server"""
            logging.debug("getFormattedMetaData(path=%s)"%path)
            return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata)
                
    def getFormattedMetaDataShort(self, path=None, dom=None, bibdata=None):
            """get the metadafrom server"""
            logging.debug("getFormattedMetaDataShort(path=%s)"%path)
            return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata)
                
    def getFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None):
            """get the metadafrom server"""
            logging.debug("getFormattedMetaDataExtended(path=%s)"%path)
            return self.getFormatted('metadata_extended_template', path=path, dom=dom, bibdata=bibdata, allFields=True)
            
    def getFormattedLabel(self,path=None, dom=None, bibdata=None):
            """get the metadafrom server"""
            logging.debug("getFormattedLabel(%s)"%path)
            return self.getFormatted('label_template', path=path, dom=dom, bibdata=bibdata)
                        
    def getFormattedMetaDataShortFromServer(self,path):
            """get the metadafrom server"""
            logging.debug("getFormattedMetaDataShortFromServer(path=%s)"%path)
            return self.getFormatted('metadata_template', path)
                
    def getFormattedMetaDataExtendedFromServer(self,path):
            """get the metadafrom server"""
            logging.debug("getFormattedMetaDataExtendedFromServer(path=%s)"%path)
            return self.getFormatted('metadata_extended_template', path=path, allFields=True)
            
    def getFormattedLabelFromServer(self,path):
            """get the metadafrom server"""
            logging.debug("getFormattedLabelFromServer(%s)"%path)
            return self.getFormatted('label_template', path)
                        
    
    security.declarePublic('changeMetadataForm')
    def changeMetadataForm(self):
        """Main configuration"""
        pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeMetadata.zpt')).__of__(self)
        return pt()
    
    security.declarePublic('changeMetadata')
    def changeMetadata(self,shortDescription,description,fields,metaDataServerUrl,RESPONSE=None):
        """Change Metadata"""
        self.shortDescription=shortDescription
        self.description=description
        self.fieldList=fields.split(",")[0:]
        self.metaDataServerUrl=metaDataServerUrl
        if RESPONSE is not None:
            RESPONSE.redirect('manage_main')


def manage_addMetaDataForm(self):
    """interface for adding the OSAS_add_Metadata"""
    pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMetadataForm.zpt')).__of__(self)
    return pt()

def manage_addMetaData(self,id,shortDescription,description,fields,RESPONSE=None):
    """a metadata objekt"""
    newObj=MetaData(id,shortDescription,description,fields)
    self.Destination()._setObject(id,newObj)
    if RESPONSE is not None:
        RESPONSE.redirect('manage_main')