view MetaData.py @ 2:ac8e119b25ec

trying to make import from xml work
author casties
date Tue, 26 Jul 2011 11:55:19 +0200
parents e4bae49e657b
children 3dadf0d89261
line wrap: on
line source

from OFS.Folder import Folder
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Globals import package_home
from AccessControl import ClassSecurityInfo
import os.path
import urllib
import logging
import urlparse

# TODO: which xml toolkit?
import amara
import xml.sax.saxutils
import xml.dom.minidom
import xml.etree.ElementTree as ET


# TODO: do we need this?
#from Products.OSA_system2 import OSAS_helpers
#from Products.OSA_system2.OSAS_metadata import OSAS_Metadata,OSAS_MetadataMapping

from MetaDataMapping import MetaDataMapping
from OSAS_metadata import OSAS_Metadata, OSAS_MetadataMapping


from SrvTxtUtils import getHttpData, getText


# TODO: get rid of this
def getTextFromNode(nodelist):
    """gibt text aus nodelist"""
    rc = ""
    for node in nodelist:
        if node.nodeType == node.TEXT_NODE:
           rc = rc + node.data
    return rc


def normalizeBibtype(bt):
    """returns normalised bib type for looking up mappings"""
    bt = bt.strip().replace(' ', '-').lower()
    return bt

def getBibdataFromDom(dom):
    """returns dict with all elements from bib-tag"""
    bibinfo = {}
    bib = dom.find(".//meta/bib")
    if bib is not None:
        # put type in @type
        type = bib.get('type')
        bibinfo['@type'] = normalizedBibtype(type)
        # put all subelements in dict
        for e in bib:
            bibinfo[e.tag] = getText(e)
            
    return bibinfo

def toString(list):
    ret=u""
    
    for l in list:
        ret+=unicode(l)
    
    return ret

def dcMetaDataToHash(mdSet):
    """Convenience Function for creates a hash from the DCMetadataset
    @param mdSet: String containing DCMetadata informmation
    currently only in the format getDCMetadata of this module"""
    
    NSS = {
           'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
           'dc': 'http://dublincore.org/documents/dcmi-namespace/',
           'owl':"http://www.w3.org/2002/07/owl#",
           'rdfs':"http://www.w3.org/2000/01/rdf-schema#"
    }   
    ret={}
    import StringIO
    import sys
    buffer= StringIO.StringIO(mdSet)
    try:
        md = amara.parse(buffer,prefixes=NSS)
    except:
        logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1]))
                                
        ret["error"]=mdSet
        return ret
   
    ret["title"] = toString(md.xml_xpath("//dc:title/text()"))
    ret["creator"] =toString(md.xml_xpath("//dc:creator/text()"))
    ret["date"] = toString(md.xml_xpath("//dc:date/text()"))
    
    return ret
        

               


class MetaData(OSAS_Metadata):
    """provides basic methods for managing metadata structures"""
    meta_type='MetaData'
    security=ClassSecurityInfo()
    manage_options = Folder.manage_options+(
        {'label':'Main Config','action':'changeMetadataForm'},
        {'label':'Import XML Schema','action':'importMetaDataExportXML'},
        {'label':'Select Fields for Display','action':'indicateDisplayFieldsForm'},
        )
    
    def __init__(self,id,shortDescription='',description='',fields=''):
        """initialize a new instance"""
        self.id = id
        self.shortDescription =shortDescription #label fuer link auf add page
        self.description=description #description of the method for link page
        self.fieldList=fields.split(",")[0:]
        self.metaDataServerUrl="" # muss mit change metadata gesetzt werden
    
        
    def correctPath(self,path,remove=None,prefix=None,cut=0):
        """convinience method um einen pfad zu veraendern"""
        
        if remove is not None:
            path=path.replace(remove,'')
        if prefix is not None:
            path=os.path.join(prefix,path)
        
        if cut>0:
            splitted=path.split("/")
            path="/".join(splitted[0:len(splitted)-cut])
        return path
    
    def importMetaDataExportXML(self,importFile=None,RESPONSE=None):
        """imports metadata from the metadataexportxml file"""
        
        if importFile is None:
            pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','importMetaDataExportXML.zpt')).__of__(self)
            return pt()
        
        dom=xml.dom.minidom.parse(importFile)
        self.createMappingFromDom(dom.getElementsByTagName("metadataExport")[0])
        
        if RESPONSE is not None:
            RESPONSE.redirect('manage_main')

   
    def createMappingFromDom(self,metadatanode,metadata=None):
        """erzeuge ein Mapping aus dem der metadatanode des xmlformats, metadata ist ein metadataobject"""
        
        if metadata is None:
            metadata=self
        
        nodes=metadatanode.childNodes
        
        #erster schritt: anlegen der fieldlist
        for node in nodes:
            logging.debug("node: %s"%repr(node))
            if node.tagName=="set":
                set=node
                if set.getAttribute('name')=='generic':
                   list=[]
                   for entry in set.getElementsByTagName('entry'):
                       list.append(entry.getAttribute('genericName'))
                   metadata.fieldList=list[0:]
                   
                else:
                   id=set.getAttribute('name').encode('utf-8')
                   list=[]
                   argList={}
                   for entry in set.getElementsByTagName('entry'):
                       genericName=entry.getAttribute('genericName')
                       tag=entry.getAttribute('tag')
                       label=entry.getAttribute('label')
                       description=getTextFromNode(entry.childNodes) #TODO: clean
                       argList[genericName]=(tag,label,description)
                   metadata._setObject(id,MetaDataMapping(id,id,argList))
   
            elif node.tagName=="metadata":
               mn=node
               name=mn.getAttribute('name').encode('utf-8')
               metadata._setObject(name,MetaData(name,name))
               mdObj=getattr(metadata,name)
               mdObj.createMappingFromDom(mn)
    
    
    def getMDFromPathOrUrl(self,path):
        parsedurl = urlparse.urlparse(path)
        if parsedurl[0] != "":
            # has schema (e.g. http)
            url=path
        else:
            # path only
            if path.endswith("index.meta"):
                url =self.metaDataServerUrl%path
            else:
                url=os.path.join(self.metaDataServerUrl%path,'index.meta')
            
        #logging.debug("get Metadata: %s"%url)
        md = getHttpData(url)
        return md
    
    def getDCFormatted(self,path):
        """get the metadata as dc set"""
        logging.debug("getDCFormatted(path=%s)"%path)
        namespace={ 'mpiwg':  "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"}
        namespaceUsed=False
        
        md = self.getMDFromPathOrUrl(path)
        logging.debug("MD in XML"+md)
        im = amara.parse(md, prefixes=namespace)
        
        typePaths=im.xml_xpath('//bib/@type')
        archimedes=False
        
        if len(typePaths)<1: 
            typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes
            if len(typePaths)>0:
                type = "archimedes"
                archimedes=True
            else:
                typePaths=im.xml_xpath('//mpiwg:bib/@type')
                if len(typePaths)<1:
                    return ""
                else:
                    namespaceUsed=True
                    
                    type=unicode(typePaths[0])
        else:
            type=unicode(typePaths[0])
        logging.info("got type:"+type)    
        try:
            mapping=getattr(self.main.meta.bib,type.lower(),None)
        except:
            logging.error("getMetaDataFromServer no mapping  for type: %s"%type)
            return ""     
        
        try:
            dcMapping=getattr(self.main.meta.bib,"dc",None)
        except:
            logging.error("getMetaDataFromServer no dc in meta/bib")
            return ""     
        
        mds=mapping.generateMappingHash() # Hole  das Mapping generisches Feld --> Feld im entsprechenden Typ
        dcMds=dcMapping.generateMappingHash() 
        
        mdHash=[]
        logging.debug("Value: %s"%repr(mds))
       
        for key,valueTriple in mds.items():
                value=valueTriple[0]
                logging.debug("Value: %s"%repr(value))
                logging.debug("Key: %s"%repr(key))
                if value!="":
                    if not archimedes:
                        if namespaceUsed:
                            try:
                                v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value)
                            except:
                                logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value)
                        else:
                            v = im.xml_xpath('//bib/%s/text()'%value)
                    else:
                        v = im.xml_xpath('//archimedes/%s/text()'%value)
                    if len(v) > 0:
                        dc=dcMds[key][0]
                        
                        if (dc !="") and (value !=""):
                            logging.debug("%s--> : %s"%(repr(value),dc))
                            mdHash.append([dc,unicode(v[0])])
               
        ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """
        ret+="<dc:type>%s</dc:type>"%type
        for md in mdHash:

            ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0])
        ret+="</bib>"
        return ret

    
    def getBibFields(self, bibdata):
        """returns dict with metadata description for bibdata"""
        bibtype = bibdata['@type']
        # get mapping from main/meta/bib
        try:
            mapping=getattr(self.main.meta.bib, bibtype.lower())
        except:
            logging.error("getStdMappedHash: no mapping for type: %s"%bibtype)
            return mdHash
            
        # get field descriptions
        bibFields = mapping.getFields.copy()
        # add field list
        bibFields['@fieldList'] = mapping.getFieldList()
        
        return bibFields

    
    def getFormatted(self, template, path=None, dom=None, bibdata=None):
            """returns string with document data formatted according to template.
               gets data from server or dom or pre-parsed bibdata."""
            logging.debug("getFormatted(template=%s)"%(template))
            
            if dom is None and bibdata is None:
                # get from server
                md = self.getMDFromPathOrUrl(path.replace("/mpiwg/online",""))
                #logging.debug("md:"+md)
                #dom = amara.parse(md)
                dom = ET.fromstring(md)
                
            # get contents of bib tag
            if bibdata is None:
                bibdata = getBibdataFromDom(dom)

            bibtype = bibdata['@type']
           
            # get template
            tp=getattr(self,"%s_%s"%(template, bibtype.lower()), None)
            if tp is None:
                logging.warning("getFormatted: no template for: %s_%s"%(template, bibtype))
                # try generic
                tp=getattr(self,"%s_generic"%(template), None)
                if tp is None:
                    logging.error("getFormatted: no generic template either: %s"%(template))
                    return ""
            
            # put bib field descriptions in mdHash        
            bibFields = self.getBibFields(bibdata)
                
            return tp(bibFields=bibFields, md=bibdata)

                
    def getFormattedMetaData(self, path=None, dom=None, bibdata=None):
            """get the metadafrom server"""
            logging.debug("getFormattedMetaData(path=%s)"%path)
            return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata)
                
    def getFormattedMetaDataShort(self, path=None, dom=None, bibdata=None):
            """get the metadafrom server"""
            logging.debug("getFormattedMetaDataShort(path=%s)"%path)
            return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata)
                
    def getFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None):
            """get the metadafrom server"""
            logging.debug("getFormattedMetaDataExtended(path=%s)"%path)
            return self.getFormatted('metadata_extended_template', path=path, dom=dom, bibdata=bibdata)
            
    def getFormattedLabel(self,path=None, dom=None, bibdata=None):
            """get the metadafrom server"""
            logging.debug("getFormattedLabel(%s)"%path)
            return self.getFormatted('label_template', path=path, dom=dom, bibdata=bibdata)
                        
    def getFormattedMetaDataShortFromServer(self,path):
            """get the metadafrom server"""
            logging.debug("getFormattedMetaDataShortFromServer(path=%s)"%path)
            return self.getFormatted('metadata_template', path)
                
    def getFormattedMetaDataExtendedFromServer(self,path):
            """get the metadafrom server"""
            logging.debug("getFormattedMetaDataExtendedFromServer(path=%s)"%path)
            return self.getFormatted('metadata_extended_template', path)
            
    def getFormattedLabelFromServer(self,path):
            """get the metadafrom server"""
            logging.debug("getFormattedLabelFromServer(%s)"%path)
            return self.getFormatted('label_template', path)
                        
    
    security.declarePublic('changeMetadataForm')
    def changeMetadataForm(self):
        """Main configuration"""
        pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeMetadata.zpt')).__of__(self)
        return pt()
    
    security.declarePublic('changeMetadata')
    def changeMetadata(self,shortDescription,description,fields,metaDataServerUrl,RESPONSE=None):
        """Change Metadata"""
        self.shortDescription=shortDescription
        self.description=description
        self.fieldList=fields.split(",")[0:]
        self.metaDataServerUrl=metaDataServerUrl
        if RESPONSE is not None:
            RESPONSE.redirect('manage_main')


def manage_addMetaDataForm(self):
    """interface for adding the OSAS_add_Metadata"""
    pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMetadataForm.zpt')).__of__(self)
    return pt()

def manage_addMetaData(self,id,shortDescription,description,fields,RESPONSE=None):
    """a metadata objekt"""
    newObj=MetaData(id,shortDescription,description,fields)
    self.Destination()._setObject(id,newObj)
    if RESPONSE is not None:
        RESPONSE.redirect('manage_main')