diff MetaData.py @ 0:9f9d9be26e53

first checkin in Mercurial (see history in SVN)
author casties
date Mon, 25 Jul 2011 16:50:48 +0200
parents
children e4bae49e657b
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MetaData.py	Mon Jul 25 16:50:48 2011 +0200
@@ -0,0 +1,419 @@
+from OFS.Folder import Folder
+from Products.PageTemplates.PageTemplateFile import PageTemplateFile
+from Globals import package_home
+from AccessControl import ClassSecurityInfo
+import os.path
+import urllib
+import logging
+import urlparse
+
+# TODO: which xml toolkit?
+import amara
+import xml.sax.saxutils
+import xml.dom.minidom
+import xml.etree.ElementTree as ET
+
+
+# TODO: do we need this?
+#from Products.OSA_system2 import OSAS_helpers
+#from Products.OSA_system2.OSAS_metadata import OSAS_Metadata,OSAS_MetadataMapping
+
+from OSAS_metadata import OSAS_Metadata, OSAS_MetadataMapping
+
+
+from SrvTxtUtils import getHttpData, getText
+
+
+# TODO: get rid of this
+def getTextFromNode(nodelist):
+    """gibt text aus nodelist"""
+    rc = ""
+    for node in nodelist:
+        if node.nodeType == node.TEXT_NODE:
+           rc = rc + node.data
+    return rc
+
+
+def normalizeBibtype(bt):
+    """returns normalised bib type for looking up mappings"""
+    bt = bt.strip().replace(' ', '-').lower()
+    return bt
+
+def toString(list):
+    ret=u""
+    
+    for l in list:
+        ret+=unicode(l)
+    
+    return ret
+
+def dcMetaDataToHash(mdSet):
+    """Convenience Function for creates a hash from the DCMetadataset
+    @param mdSet: String containing DCMetadata informmation
+    currently only in the format getDCMetadata of this module"""
+    
+    NSS = {
+           'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
+           'dc': 'http://dublincore.org/documents/dcmi-namespace/',
+           'owl':"http://www.w3.org/2002/07/owl#",
+           'rdfs':"http://www.w3.org/2000/01/rdf-schema#"
+    }   
+    ret={}
+    import StringIO
+    import sys
+    buffer= StringIO.StringIO(mdSet)
+    try:
+        md = amara.parse(buffer,prefixes=NSS)
+    except:
+        logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1]))
+                                
+        ret["error"]=mdSet
+        return ret
+   
+    ret["title"] = toString(md.xml_xpath("//dc:title/text()"))
+    ret["creator"] =toString(md.xml_xpath("//dc:creator/text()"))
+    ret["date"] = toString(md.xml_xpath("//dc:date/text()"))
+    
+    return ret
+        
+def getBibdataFromDom(dom):
+    """returns dict with all elements from bib-tag"""
+    bibinfo = {}
+    bib = dom.find(".//meta/bib")
+    if bib is not None:
+        # put type in @type
+        type = bib.get('type')
+        bibinfo['@type'] = normalizedBibtype(type)
+        # put all subelements in dict
+        for e in bib:
+            bibinfo[e.tag] = getText(e)
+            
+    return bibinfo
+
+               
+
+
+class MetaData(OSAS_Metadata):
+    """provides basic methods for managing metadata structures"""
+    meta_type='MetaData'
+    security=ClassSecurityInfo()
+    manage_options = Folder.manage_options+(
+        {'label':'Main Config','action':'changeMetadataForm'},
+        {'label':'Import XML Schema','action':'importMetaDataExportXML'},
+        {'label':'Select Fields for Display','action':'indicateDisplayFieldsForm'},
+        )
+    
+    def __init__(self,id,shortDescription='',description='',fields=''):
+        """initialize a new instance"""
+        self.id = id
+        self.shortDescription =shortDescription #label fuer link auf add page
+        self.description=description #description of the method for link page
+        self.fieldList=fields.split(",")[0:]
+        self.metaDataServerUrl="" # muss mit change metadata gesetzt werden
+    
+        
+    def correctPath(self,path,remove=None,prefix=None,cut=0):
+        """convinience method um einen pfad zu veraendern"""
+        
+        if remove is not None:
+            path=path.replace(remove,'')
+        if prefix is not None:
+            path=os.path.join(prefix,path)
+        
+        if cut>0:
+            splitted=path.split("/")
+            path="/".join(splitted[0:len(splitted)-cut])
+        return path
+    
+    def importMetaDataExportXML(self,importFile=None,RESPONSE=None):
+        """imports metadata from the metadataexportxml file"""
+        
+        if importFile is None:
+            pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','importMetaDataExportXML.zpt')).__of__(self)
+            return pt()
+        
+        dom=xml.dom.minidom.parse(importFile)
+        self.createMappingFromDom(dom.getElementsByTagName("metadataExport")[0])
+        
+        if RESPONSE is not None:
+            RESPONSE.redirect('manage_main')
+
+   
+    def createMappingFromDom(self,metadatanode,metadata=None):
+        """erzeuge ein Mapping aus dem der metadatanode des xmlformats, metadata ist ein metadataobject"""
+        
+        if metadata is None:
+            metadata=self
+        
+        nodes=metadatanode.childNodes
+        
+        #erster schritt: anlegen der fieldlist
+        for node in nodes:
+            logging.debug("node: %s"%node.tagName)
+            if node.tagName=="set":
+                set=node
+                if set.getAttribute('name')=='generic':
+                   list=[]
+                   for entry in set.getElementsByTagName('entry'):
+                       list.append(entry.getAttribute('genericName'))
+                   metadata.fieldList=list[0:]
+                   
+                else:
+                   id=set.getAttribute('name').encode('utf-8')
+                   list=[]
+                   argList={}
+                   for entry in set.getElementsByTagName('entry'):
+                       genericName=entry.getAttribute('genericName')
+                       tag=entry.getAttribute('tag')
+                       label=entry.getAttribute('label')
+                       description=getTextFromNode(entry.childNodes) #TODO: clean
+                       argList[genericName]=(tag,label,description)
+                   metadata._setObject(id,MetaDataMapping(id,id,argList))
+   
+            elif node.tagName=="metadata":
+               mn=node
+               name=mn.getAttribute('name').encode('utf-8')
+               metadata._setObject(name,MetaData(name,name))
+               mdObj=getattr(metadata,name)
+               mdObj.createMappingFromDom(mn)
+    
+    
+    def getMDFromPathOrUrl(self,path):
+        parsedurl = urlparse.urlparse(path)
+        if parsedurl[0] != "":
+            # has schema (e.g. http)
+            url=path
+        else:
+            # path only
+            if path.endswith("index.meta"):
+                url =self.metaDataServerUrl%path
+            else:
+                url=os.path.join(self.metaDataServerUrl%path,'index.meta')
+            
+        #logging.debug("get Metadata: %s"%url)
+        md = getHttpData(url)
+        return md
+    
+    def getDCFormatted(self,path):
+        """get the metadata as dc set"""
+        logging.debug("getDCFormatted(path=%s)"%path)
+        namespace={ 'mpiwg':  "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"}
+        namespaceUsed=False
+        
+        md = self.getMDFromPathOrUrl(path)
+        logging.debug("MD in XML"+md)
+        im = amara.parse(md, prefixes=namespace)
+        
+        typePaths=im.xml_xpath('//bib/@type')
+        archimedes=False
+        
+        if len(typePaths)<1: 
+            typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes
+            if len(typePaths)>0:
+                type = "archimedes"
+                archimedes=True
+            else:
+                typePaths=im.xml_xpath('//mpiwg:bib/@type')
+                if len(typePaths)<1:
+                    return ""
+                else:
+                    namespaceUsed=True
+                    
+                    type=unicode(typePaths[0])
+        else:
+            type=unicode(typePaths[0])
+        logging.info("got type:"+type)    
+        try:
+            mapping=getattr(self.main.meta.bib,type.lower(),None)
+        except:
+            logging.error("getMetaDataFromServer no mapping  for type: %s"%type)
+            return ""     
+        
+        try:
+            dcMapping=getattr(self.main.meta.bib,"dc",None)
+        except:
+            logging.error("getMetaDataFromServer no dc in meta/bib")
+            return ""     
+        
+        mds=mapping.generateMappingHash() # Hole  das Mapping generisches Feld --> Feld im entsprechenden Typ
+        dcMds=dcMapping.generateMappingHash() 
+        
+        mdHash=[]
+        logging.debug("Value: %s"%repr(mds))
+       
+        for key,valueTriple in mds.items():
+                value=valueTriple[0]
+                logging.debug("Value: %s"%repr(value))
+                logging.debug("Key: %s"%repr(key))
+                if value!="":
+                    if not archimedes:
+                        if namespaceUsed:
+                            try:
+                                v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value)
+                            except:
+                                logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value)
+                        else:
+                            v = im.xml_xpath('//bib/%s/text()'%value)
+                    else:
+                        v = im.xml_xpath('//archimedes/%s/text()'%value)
+                    if len(v) > 0:
+                        dc=dcMds[key][0]
+                        
+                        if (dc !="") and (value !=""):
+                            logging.debug("%s--> : %s"%(repr(value),dc))
+                            mdHash.append([dc,unicode(v[0])])
+               
+        ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """
+        ret+="<dc:type>%s</dc:type>"%type
+        for md in mdHash:
+
+            ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0])
+        ret+="</bib>"
+        return ret
+
+    
+    def getStdMappedHash(self, bibdata):
+        """returns dict with metadata from bibdata mapped according to standard mapping"""
+        mdHash={}
+        bibtype = bibdata['@type']
+        # get mapping from main/meta/bib
+        try:
+            mapping=getattr(self.main.meta.bib, bibtype.lower())
+        except:
+            logging.error("getStdMappedHash: no mapping for type: %s"%bibtype)
+            return mdHash
+            
+        mds = mapping.generateMappingHash() # Hole  das Mapping generisches Feld --> Feld im entsprechenden Typ
+        
+        for field in mds.keys():
+            # get mapped field name
+            mf = mds[field][0]
+            if not mf:
+                continue
+            logging.debug("mapping: %s = %s"%(field,mf))
+            mdHash[field] = bibdata.get(mf, '')
+            
+        return mdHash
+
+    
+    def getFormatted(self, template, path=None, dom=None, bibdata=None):
+            """returns string with document data formatted according to template.
+               gets data from server or dom or pre-parsed bibdata."""
+            logging.debug("getFormatted(template=%s)"%(template))
+            
+            if dom is None and bibdata is None:
+                # get from server
+                md = self.getMDFromPathOrUrl(path.replace("/mpiwg/online",""))
+                #logging.debug("md:"+md)
+                #dom = amara.parse(md)
+                dom = ET.fromstring(md)
+                
+            # get contents of bib tag
+            if bibdata is None:
+                bibdata = getBibdataFromDom(dom)
+
+            bibtype = bibdata['@type']
+           
+            # get template
+            tp=getattr(self,"%s_%s"%(template, bibtype.lower()), None)
+            if tp is None:
+                logging.warning("getFormatted: no template for: %s_%s"%(template, bibtype))
+                # try generic
+                tp=getattr(self,"%s_generic"%(template), None)
+                if tp is None:
+                    logging.error("getFormatted: no generic template either: %s"%(template))
+                    return ""
+            
+            # put mapped data in mdHash        
+            mdHash = self.getStdMappedHash(bibdata)
+                
+            return tp(stdmd=mdHash, md=bibdata)
+
+                
+    def getFormattedMetaData(self, path=None, dom=None, bibdata=None):
+            """get the metadafrom server"""
+            logging.debug("getFormattedMetaData(path=%s)"%path)
+            return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata)
+                
+    def getFormattedMetaDataShort(self, path=None, dom=None, bibdata=None):
+            """get the metadafrom server"""
+            logging.debug("getFormattedMetaDataShort(path=%s)"%path)
+            return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata)
+                
+    def getFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None):
+            """get the metadafrom server"""
+            logging.debug("getFormattedMetaDataExtended(path=%s)"%path)
+            return self.getFormatted('metadata_extended_template', path=path, dom=dom, bibdata=bibdata)
+            
+    def getFormattedLabel(self,path=None, dom=None, bibdata=None):
+            """get the metadafrom server"""
+            logging.debug("getFormattedLabel(%s)"%path)
+            return self.getFormatted('label_template', path=path, dom=dom, bibdata=bibdata)
+                        
+    def getFormattedMetaDataShortFromServer(self,path):
+            """get the metadafrom server"""
+            logging.debug("getFormattedMetaDataShortFromServer(path=%s)"%path)
+            return self.getFormatted('metadata_template', path)
+                
+    def getFormattedMetaDataExtendedFromServer(self,path):
+            """get the metadafrom server"""
+            logging.debug("getFormattedMetaDataExtendedFromServer(path=%s)"%path)
+            return self.getFormatted('metadata_extended_template', path)
+            
+    def getFormattedLabelFromServer(self,path):
+            """get the metadafrom server"""
+            logging.debug("getFormattedLabelFromServer(%s)"%path)
+            return self.getFormatted('label_template', path)
+                        
+    
+    security.declarePublic('changeMetadataForm')
+    def changeMetadataForm(self):
+        """Main configuration"""
+        pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeMetadata.zpt')).__of__(self)
+        return pt()
+    
+    security.declarePublic('changeMetadata')
+    def changeMetadata(self,shortDescription,description,fields,metaDataServerUrl,RESPONSE=None):
+        """Change Metadata"""
+        self.shortDescription=shortDescription
+        self.description=description
+        self.fieldList=fields.split(",")[0:]
+        self.metaDataServerUrl=metaDataServerUrl
+        if RESPONSE is not None:
+            RESPONSE.redirect('manage_main')
+
+
+def manage_addMetaDataForm(self):
+    """interface for adding the OSAS_add_Metadata"""
+    pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMetadataForm.zpt')).__of__(self)
+    return pt()
+
+def manage_addMetaData(self,id,shortDescription,description,fields,RESPONSE=None):
+    """a metadata objekt"""
+    newObj=MetaData(id,shortDescription,description,fields)
+    self.Destination()._setObject(id,newObj)
+    if RESPONSE is not None:
+        RESPONSE.redirect('manage_main')
+        
+class MetaDataMapping(OSAS_MetadataMapping):
+    meta_type="MetadataMapping"
+    
+def manage_addMetaDataMappingForm(self):
+    """interface for adding the OSAS_root"""
+    pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMetadataMappingForm.zpt')).__of__(self)
+
+    return pt()
+
+def manage_addMetaDataMapping(self,idOfObject,titleOfObject,RESPONSE=None):
+    """add the OSAS_root"""
+    
+    argList={}
+    for arg in self.fieldList:
+        if not (arg in ['idOfObject','titleOfObject']):
+            argList[arg]=(self.REQUEST.form[arg],self.REQUEST.form['label_'+arg],self.REQUEST.form['explanation_'+arg],self.REQUEST.form['status_'+arg],self.REQUEST.form['values_'+arg])
+            
+    newObj=MetaDataMapping(idOfObject,titleOfObject,argList)
+    self._setObject(idOfObject,newObj)
+    if RESPONSE is not None:
+        RESPONSE.redirect('manage_main')
+