Mercurial > hg > MetaDataProvider
view MetaData.py @ 2:ac8e119b25ec
trying to make import from xml work
author | casties |
---|---|
date | Tue, 26 Jul 2011 11:55:19 +0200 |
parents | e4bae49e657b |
children | 3dadf0d89261 |
line wrap: on
line source
from OFS.Folder import Folder from Products.PageTemplates.PageTemplateFile import PageTemplateFile from Globals import package_home from AccessControl import ClassSecurityInfo import os.path import urllib import logging import urlparse # TODO: which xml toolkit? import amara import xml.sax.saxutils import xml.dom.minidom import xml.etree.ElementTree as ET # TODO: do we need this? #from Products.OSA_system2 import OSAS_helpers #from Products.OSA_system2.OSAS_metadata import OSAS_Metadata,OSAS_MetadataMapping from MetaDataMapping import MetaDataMapping from OSAS_metadata import OSAS_Metadata, OSAS_MetadataMapping from SrvTxtUtils import getHttpData, getText # TODO: get rid of this def getTextFromNode(nodelist): """gibt text aus nodelist""" rc = "" for node in nodelist: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc def normalizeBibtype(bt): """returns normalised bib type for looking up mappings""" bt = bt.strip().replace(' ', '-').lower() return bt def getBibdataFromDom(dom): """returns dict with all elements from bib-tag""" bibinfo = {} bib = dom.find(".//meta/bib") if bib is not None: # put type in @type type = bib.get('type') bibinfo['@type'] = normalizedBibtype(type) # put all subelements in dict for e in bib: bibinfo[e.tag] = getText(e) return bibinfo def toString(list): ret=u"" for l in list: ret+=unicode(l) return ret def dcMetaDataToHash(mdSet): """Convenience Function for creates a hash from the DCMetadataset @param mdSet: String containing DCMetadata informmation currently only in the format getDCMetadata of this module""" NSS = { 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'dc': 'http://dublincore.org/documents/dcmi-namespace/', 'owl':"http://www.w3.org/2002/07/owl#", 'rdfs':"http://www.w3.org/2000/01/rdf-schema#" } ret={} import StringIO import sys buffer= StringIO.StringIO(mdSet) try: md = amara.parse(buffer,prefixes=NSS) except: logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1])) ret["error"]=mdSet return ret ret["title"] = toString(md.xml_xpath("//dc:title/text()")) ret["creator"] =toString(md.xml_xpath("//dc:creator/text()")) ret["date"] = toString(md.xml_xpath("//dc:date/text()")) return ret class MetaData(OSAS_Metadata): """provides basic methods for managing metadata structures""" meta_type='MetaData' security=ClassSecurityInfo() manage_options = Folder.manage_options+( {'label':'Main Config','action':'changeMetadataForm'}, {'label':'Import XML Schema','action':'importMetaDataExportXML'}, {'label':'Select Fields for Display','action':'indicateDisplayFieldsForm'}, ) def __init__(self,id,shortDescription='',description='',fields=''): """initialize a new instance""" self.id = id self.shortDescription =shortDescription #label fuer link auf add page self.description=description #description of the method for link page self.fieldList=fields.split(",")[0:] self.metaDataServerUrl="" # muss mit change metadata gesetzt werden def correctPath(self,path,remove=None,prefix=None,cut=0): """convinience method um einen pfad zu veraendern""" if remove is not None: path=path.replace(remove,'') if prefix is not None: path=os.path.join(prefix,path) if cut>0: splitted=path.split("/") path="/".join(splitted[0:len(splitted)-cut]) return path def importMetaDataExportXML(self,importFile=None,RESPONSE=None): """imports metadata from the metadataexportxml file""" if importFile is None: pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','importMetaDataExportXML.zpt')).__of__(self) return pt() dom=xml.dom.minidom.parse(importFile) self.createMappingFromDom(dom.getElementsByTagName("metadataExport")[0]) if RESPONSE is not None: RESPONSE.redirect('manage_main') def createMappingFromDom(self,metadatanode,metadata=None): """erzeuge ein Mapping aus dem der metadatanode des xmlformats, metadata ist ein metadataobject""" if metadata is None: metadata=self nodes=metadatanode.childNodes #erster schritt: anlegen der fieldlist for node in nodes: logging.debug("node: %s"%repr(node)) if node.tagName=="set": set=node if set.getAttribute('name')=='generic': list=[] for entry in set.getElementsByTagName('entry'): list.append(entry.getAttribute('genericName')) metadata.fieldList=list[0:] else: id=set.getAttribute('name').encode('utf-8') list=[] argList={} for entry in set.getElementsByTagName('entry'): genericName=entry.getAttribute('genericName') tag=entry.getAttribute('tag') label=entry.getAttribute('label') description=getTextFromNode(entry.childNodes) #TODO: clean argList[genericName]=(tag,label,description) metadata._setObject(id,MetaDataMapping(id,id,argList)) elif node.tagName=="metadata": mn=node name=mn.getAttribute('name').encode('utf-8') metadata._setObject(name,MetaData(name,name)) mdObj=getattr(metadata,name) mdObj.createMappingFromDom(mn) def getMDFromPathOrUrl(self,path): parsedurl = urlparse.urlparse(path) if parsedurl[0] != "": # has schema (e.g. http) url=path else: # path only if path.endswith("index.meta"): url =self.metaDataServerUrl%path else: url=os.path.join(self.metaDataServerUrl%path,'index.meta') #logging.debug("get Metadata: %s"%url) md = getHttpData(url) return md def getDCFormatted(self,path): """get the metadata as dc set""" logging.debug("getDCFormatted(path=%s)"%path) namespace={ 'mpiwg': "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"} namespaceUsed=False md = self.getMDFromPathOrUrl(path) logging.debug("MD in XML"+md) im = amara.parse(md, prefixes=namespace) typePaths=im.xml_xpath('//bib/@type') archimedes=False if len(typePaths)<1: typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes if len(typePaths)>0: type = "archimedes" archimedes=True else: typePaths=im.xml_xpath('//mpiwg:bib/@type') if len(typePaths)<1: return "" else: namespaceUsed=True type=unicode(typePaths[0]) else: type=unicode(typePaths[0]) logging.info("got type:"+type) try: mapping=getattr(self.main.meta.bib,type.lower(),None) except: logging.error("getMetaDataFromServer no mapping for type: %s"%type) return "" try: dcMapping=getattr(self.main.meta.bib,"dc",None) except: logging.error("getMetaDataFromServer no dc in meta/bib") return "" mds=mapping.generateMappingHash() # Hole das Mapping generisches Feld --> Feld im entsprechenden Typ dcMds=dcMapping.generateMappingHash() mdHash=[] logging.debug("Value: %s"%repr(mds)) for key,valueTriple in mds.items(): value=valueTriple[0] logging.debug("Value: %s"%repr(value)) logging.debug("Key: %s"%repr(key)) if value!="": if not archimedes: if namespaceUsed: try: v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value) except: logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value) else: v = im.xml_xpath('//bib/%s/text()'%value) else: v = im.xml_xpath('//archimedes/%s/text()'%value) if len(v) > 0: dc=dcMds[key][0] if (dc !="") and (value !=""): logging.debug("%s--> : %s"%(repr(value),dc)) mdHash.append([dc,unicode(v[0])]) ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """ ret+="<dc:type>%s</dc:type>"%type for md in mdHash: ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0]) ret+="</bib>" return ret def getBibFields(self, bibdata): """returns dict with metadata description for bibdata""" bibtype = bibdata['@type'] # get mapping from main/meta/bib try: mapping=getattr(self.main.meta.bib, bibtype.lower()) except: logging.error("getStdMappedHash: no mapping for type: %s"%bibtype) return mdHash # get field descriptions bibFields = mapping.getFields.copy() # add field list bibFields['@fieldList'] = mapping.getFieldList() return bibFields def getFormatted(self, template, path=None, dom=None, bibdata=None): """returns string with document data formatted according to template. gets data from server or dom or pre-parsed bibdata.""" logging.debug("getFormatted(template=%s)"%(template)) if dom is None and bibdata is None: # get from server md = self.getMDFromPathOrUrl(path.replace("/mpiwg/online","")) #logging.debug("md:"+md) #dom = amara.parse(md) dom = ET.fromstring(md) # get contents of bib tag if bibdata is None: bibdata = getBibdataFromDom(dom) bibtype = bibdata['@type'] # get template tp=getattr(self,"%s_%s"%(template, bibtype.lower()), None) if tp is None: logging.warning("getFormatted: no template for: %s_%s"%(template, bibtype)) # try generic tp=getattr(self,"%s_generic"%(template), None) if tp is None: logging.error("getFormatted: no generic template either: %s"%(template)) return "" # put bib field descriptions in mdHash bibFields = self.getBibFields(bibdata) return tp(bibFields=bibFields, md=bibdata) def getFormattedMetaData(self, path=None, dom=None, bibdata=None): """get the metadafrom server""" logging.debug("getFormattedMetaData(path=%s)"%path) return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata) def getFormattedMetaDataShort(self, path=None, dom=None, bibdata=None): """get the metadafrom server""" logging.debug("getFormattedMetaDataShort(path=%s)"%path) return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata) def getFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None): """get the metadafrom server""" logging.debug("getFormattedMetaDataExtended(path=%s)"%path) return self.getFormatted('metadata_extended_template', path=path, dom=dom, bibdata=bibdata) def getFormattedLabel(self,path=None, dom=None, bibdata=None): """get the metadafrom server""" logging.debug("getFormattedLabel(%s)"%path) return self.getFormatted('label_template', path=path, dom=dom, bibdata=bibdata) def getFormattedMetaDataShortFromServer(self,path): """get the metadafrom server""" logging.debug("getFormattedMetaDataShortFromServer(path=%s)"%path) return self.getFormatted('metadata_template', path) def getFormattedMetaDataExtendedFromServer(self,path): """get the metadafrom server""" logging.debug("getFormattedMetaDataExtendedFromServer(path=%s)"%path) return self.getFormatted('metadata_extended_template', path) def getFormattedLabelFromServer(self,path): """get the metadafrom server""" logging.debug("getFormattedLabelFromServer(%s)"%path) return self.getFormatted('label_template', path) security.declarePublic('changeMetadataForm') def changeMetadataForm(self): """Main configuration""" pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeMetadata.zpt')).__of__(self) return pt() security.declarePublic('changeMetadata') def changeMetadata(self,shortDescription,description,fields,metaDataServerUrl,RESPONSE=None): """Change Metadata""" self.shortDescription=shortDescription self.description=description self.fieldList=fields.split(",")[0:] self.metaDataServerUrl=metaDataServerUrl if RESPONSE is not None: RESPONSE.redirect('manage_main') def manage_addMetaDataForm(self): """interface for adding the OSAS_add_Metadata""" pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMetadataForm.zpt')).__of__(self) return pt() def manage_addMetaData(self,id,shortDescription,description,fields,RESPONSE=None): """a metadata objekt""" newObj=MetaData(id,shortDescription,description,fields) self.Destination()._setObject(id,newObj) if RESPONSE is not None: RESPONSE.redirect('manage_main')