Mercurial > hg > MetaDataProvider
view MetaData.py @ 3:3dadf0d89261
more renovation
author | casties |
---|---|
date | Tue, 26 Jul 2011 20:08:11 +0200 |
parents | ac8e119b25ec |
children | 8291255b1868 |
line wrap: on
line source
from OFS.Folder import Folder from Products.PageTemplates.PageTemplateFile import PageTemplateFile from Globals import package_home from AccessControl import ClassSecurityInfo import os.path import urllib import logging import urlparse # TODO: which xml toolkit? import amara import xml.sax.saxutils import xml.dom.minidom import xml.etree.ElementTree as ET # TODO: do we need this? #from Products.OSA_system2 import OSAS_helpers #from Products.OSA_system2.OSAS_metadata import OSAS_Metadata,OSAS_MetadataMapping from MetaDataMapping import MetaDataMapping from OSAS_metadata import OSAS_Metadata, OSAS_MetadataMapping from SrvTxtUtils import getHttpData, getText # TODO: get rid of this def getTextFromNode(nodelist): """gibt text aus nodelist""" rc = "" for node in nodelist: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc def normalizeBibField(bt, underscore=True): """returns normalised bib type for looking up mappings""" bt = bt.strip().replace(' ', '-').lower() if underscore: bt = bt.replace('_', '-') return bt def getBibdataFromDom(dom): """returns dict with all elements from bib-tag""" bibinfo = {} bib = dom.find(".//meta/bib") if bib is not None: # put type in @type type = bib.get('type') bibinfo['@type'] = normalizeBibField(type) # put all subelements in dict for e in bib: bibinfo[normalizeBibField(e.tag)] = getText(e) return bibinfo def toString(list): ret=u"" for l in list: ret+=unicode(l) return ret def dcMetaDataToHash(mdSet): """Convenience Function for creates a hash from the DCMetadataset @param mdSet: String containing DCMetadata informmation currently only in the format getDCMetadata of this module""" NSS = { 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'dc': 'http://dublincore.org/documents/dcmi-namespace/', 'owl':"http://www.w3.org/2002/07/owl#", 'rdfs':"http://www.w3.org/2000/01/rdf-schema#" } ret={} import StringIO import sys buffer= StringIO.StringIO(mdSet) try: md = amara.parse(buffer,prefixes=NSS) except: logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1])) ret["error"]=mdSet return ret ret["title"] = toString(md.xml_xpath("//dc:title/text()")) ret["creator"] =toString(md.xml_xpath("//dc:creator/text()")) ret["date"] = toString(md.xml_xpath("//dc:date/text()")) return ret class MetaData(OSAS_Metadata): """provides basic methods for managing metadata structures""" meta_type='MetaData' security=ClassSecurityInfo() manage_options = Folder.manage_options+( {'label':'Main Config','action':'changeMetadataForm'}, {'label':'Import XML Schema','action':'importMetaDataExportXML'}, #{'label':'Select Fields for Display','action':'indicateDisplayFieldsForm'}, ) def __init__(self,id,shortDescription='',description='',fields=''): """initialize a new instance""" self.id = id self.shortDescription =shortDescription #label fuer link auf add page self.description=description #description of the method for link page self.fieldList=fields.split(",")[0:] self.metaDataServerUrl="" # muss mit change metadata gesetzt werden def correctPath(self,path,remove=None,prefix=None,cut=0): """convinience method um einen pfad zu veraendern""" if remove is not None: path=path.replace(remove,'') if prefix is not None: path=os.path.join(prefix,path) if cut>0: splitted=path.split("/") path="/".join(splitted[0:len(splitted)-cut]) return path def importMetaDataExportXML(self,importFile=None,RESPONSE=None): """imports metadata from the metadataexportxml file""" if importFile is None: pt=PageTemplateFile('zpt/importMetaDataExportXML', globals()).__of__(self) return pt() dom=ET.parse(importFile) node = dom.getroot() if node.tag != 'metadataExport': node = dom.find("metadataExport") self.createMappingFromDom(node) if RESPONSE is not None: RESPONSE.redirect('manage_main') def createMappingFromDom(self,metadatanode,metadata=None): """erzeuge ein Mapping aus dem der metadatanode des xmlformats, metadata ist ein metadataobject""" if metadata is None: metadata=self nodes=metadatanode for node in nodes: logging.debug("node: %s"%repr(node)) if node.tag=="set": set=node id=set.get('name') list=[] argList={} for entry in set: genericName=entry.get('genericName') if set.get('name')=='generic': # generic mapping doesn't have labels tag = genericName label = genericName else: tag=entry.get('tag') label=entry.get('label') if not tag: # ignore empty tags continue description=getText(entry) argList[tag]={'tag':tag,'label':label,'explanation':description,'status':'optional'} logging.debug("createMappingFromDom: new mapping=%s"%repr(argList)) metadata._setObject(id,MetaDataMapping(id,id,argList)) elif node.tag=="metadata": mn=node name=mn.get('name') logging.debug("createMappingFromDom: new metadata=%s"%repr(name)) metadata._setObject(name,MetaData(name,name)) mdObj=getattr(metadata,name) mdObj.createMappingFromDom(mn) def getMDFromPathOrUrl(self,path): parsedurl = urlparse.urlparse(path) if parsedurl[0] != "": # has schema (e.g. http) url=path else: # path only if path.endswith("index.meta"): url =self.metaDataServerUrl%path else: url=os.path.join(self.metaDataServerUrl%path,'index.meta') #logging.debug("get Metadata: %s"%url) md = getHttpData(url) return md def getDCFormatted(self,path): """get the metadata as dc set""" logging.debug("getDCFormatted(path=%s)"%path) namespace={ 'mpiwg': "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"} namespaceUsed=False md = self.getMDFromPathOrUrl(path) logging.debug("MD in XML"+md) im = amara.parse(md, prefixes=namespace) typePaths=im.xml_xpath('//bib/@type') archimedes=False if len(typePaths)<1: typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes if len(typePaths)>0: type = "archimedes" archimedes=True else: typePaths=im.xml_xpath('//mpiwg:bib/@type') if len(typePaths)<1: return "" else: namespaceUsed=True type=unicode(typePaths[0]) else: type=unicode(typePaths[0]) logging.info("got type:"+type) try: mapping=getattr(self.main.meta.bib,type.lower(),None) except: logging.error("getMetaDataFromServer no mapping for type: %s"%type) return "" try: dcMapping=getattr(self.main.meta.bib,"dc",None) except: logging.error("getMetaDataFromServer no dc in meta/bib") return "" mds=mapping.generateMappingHash() # Hole das Mapping generisches Feld --> Feld im entsprechenden Typ dcMds=dcMapping.generateMappingHash() mdHash=[] logging.debug("Value: %s"%repr(mds)) for key,valueTriple in mds.items(): value=valueTriple[0] logging.debug("Value: %s"%repr(value)) logging.debug("Key: %s"%repr(key)) if value!="": if not archimedes: if namespaceUsed: try: v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value) except: logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value) else: v = im.xml_xpath('//bib/%s/text()'%value) else: v = im.xml_xpath('//archimedes/%s/text()'%value) if len(v) > 0: dc=dcMds[key][0] if (dc !="") and (value !=""): logging.debug("%s--> : %s"%(repr(value),dc)) mdHash.append([dc,unicode(v[0])]) ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """ ret+="<dc:type>%s</dc:type>"%type for md in mdHash: ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0]) ret+="</bib>" return ret def getBibMapping(self, bibtype): """returns MetaDataMapping for bibtype""" # try type as id mapping = getattr(self.main.meta.bib, bibtype, None) if mapping is None: # try manually mapFolder = self.main.meta.bib for obj in mapFolder.objectValues(): if obj.meta_type == "MetadataMapping": # real type is in title mapType = obj.title if mapType == bibtype: # try type as is return obj if normalizeBibField(mapType, underscore=True) == normalizeBibField(bibtype, underscore=True): # try normalized type without underscore return obj return mapping def getBibFields(self, bibdata): """returns dict with metadata description for bibdata""" bibfields = {} bibtype = bibdata['@type'] # get mapping from main/meta/bib mapping = self.getBibMapping(bibtype) if mapping is None: logging.error("getBibFields: no mapping for type: %s"%bibtype) return bibfields # get field descriptions (copy so we can change it) bibfields = mapping.getFields().copy() # add field list bibfields['@fieldList'] = mapping.getFieldList() return bibfields def getBibMappedData(self, bibdata, allFields=False): """returns dict with metadata descriptions and data for bibdata""" bibfields = self.getBibFields(bibdata) mappedData = {} mappedList = [] for bk in bibfields.keys(): # ignore descriptions without data if not bibdata.get(bk, None): continue # field description (copy so we can change it) bf = bibfields[bk].copy() # add value bf['value'] = bibdata[bk] mappedData[bk] = bf mappedList.append(bk) if allFields and len(mappedData) < len(bibdata): # add fields that were not in bibfields for bk in bibdata.keys(): if bk in mappedData or not bibdata[bk]: continue mappedData[bk] = {'tag':bk, 'label':bk, 'value':bibdata[bk]} mappedList.append(bk) mappedData['@fieldList'] = mappedList return mappedData def getFormatted(self, template, path=None, dom=None, bibdata=None, allFields=False): """returns string with document data formatted according to template. gets data from server or dom or pre-parsed bibdata.""" logging.debug("getFormatted(template=%s)"%(template)) if dom is None and bibdata is None: # get from server md = self.getMDFromPathOrUrl(path.replace("/mpiwg/online","")) #logging.debug("md:"+md) #dom = amara.parse(md) dom = ET.fromstring(md) # get contents of bib tag if bibdata is None: bibdata = getBibdataFromDom(dom) bibtype = bibdata['@type'] # get template tp=getattr(self,"%s_%s"%(template, bibtype.lower()), None) if tp is None: logging.warning("getFormatted: no template for: %s_%s"%(template, bibtype)) # try generic tp=getattr(self,"%s_generic"%(template), None) if tp is None: logging.error("getFormatted: no generic template either: %s"%(template)) return "" # put bib field descriptions in mdHash bibfields = self.getBibMappedData(bibdata, allFields=allFields) return tp(mdmap=bibfields, md=bibdata) def getFormattedMetaData(self, path=None, dom=None, bibdata=None): """get the metadafrom server""" logging.debug("getFormattedMetaData(path=%s)"%path) return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata) def getFormattedMetaDataShort(self, path=None, dom=None, bibdata=None): """get the metadafrom server""" logging.debug("getFormattedMetaDataShort(path=%s)"%path) return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata) def getFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None): """get the metadafrom server""" logging.debug("getFormattedMetaDataExtended(path=%s)"%path) return self.getFormatted('metadata_extended_template', path=path, dom=dom, bibdata=bibdata, allFields=True) def getFormattedLabel(self,path=None, dom=None, bibdata=None): """get the metadafrom server""" logging.debug("getFormattedLabel(%s)"%path) return self.getFormatted('label_template', path=path, dom=dom, bibdata=bibdata) def getFormattedMetaDataShortFromServer(self,path): """get the metadafrom server""" logging.debug("getFormattedMetaDataShortFromServer(path=%s)"%path) return self.getFormatted('metadata_template', path) def getFormattedMetaDataExtendedFromServer(self,path): """get the metadafrom server""" logging.debug("getFormattedMetaDataExtendedFromServer(path=%s)"%path) return self.getFormatted('metadata_extended_template', path=path, allFields=True) def getFormattedLabelFromServer(self,path): """get the metadafrom server""" logging.debug("getFormattedLabelFromServer(%s)"%path) return self.getFormatted('label_template', path) security.declarePublic('changeMetadataForm') def changeMetadataForm(self): """Main configuration""" pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeMetadata.zpt')).__of__(self) return pt() security.declarePublic('changeMetadata') def changeMetadata(self,shortDescription,description,fields,metaDataServerUrl,RESPONSE=None): """Change Metadata""" self.shortDescription=shortDescription self.description=description self.fieldList=fields.split(",")[0:] self.metaDataServerUrl=metaDataServerUrl if RESPONSE is not None: RESPONSE.redirect('manage_main') def manage_addMetaDataForm(self): """interface for adding the OSAS_add_Metadata""" pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMetadataForm.zpt')).__of__(self) return pt() def manage_addMetaData(self,id,shortDescription,description,fields,RESPONSE=None): """a metadata objekt""" newObj=MetaData(id,shortDescription,description,fields) self.Destination()._setObject(id,newObj) if RESPONSE is not None: RESPONSE.redirect('manage_main')