Mercurial > hg > MetaDataProvider
diff MetaDataFolder.py @ 6:00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
author | casties |
---|---|
date | Wed, 27 Jul 2011 21:08:01 +0200 |
parents | c1dbf78cc036 |
children | 4cd862bf37a3 |
line wrap: on
line diff
--- a/MetaDataFolder.py Wed Jul 27 14:48:56 2011 +0200 +++ b/MetaDataFolder.py Wed Jul 27 21:08:01 2011 +0200 @@ -16,7 +16,7 @@ return bt -def getBibdataFromDom(dom): +def OLDgetBibdataFromDom(dom): """returns dict with all elements from bib-tag""" bibinfo = {} bib = dom.find(".//meta/bib") @@ -30,6 +30,44 @@ return bibinfo +def toString(list): + ret=u"" + + for l in list: + ret+=unicode(l) + + return ret + +def dcMetaDataToHash(mdSet): + """Convenience Function for creates a hash from the DCMetadataset + @param mdSet: String containing DCMetadata informmation + currently only in the format getDCMetadata of this module""" + + NSS = { + 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', + 'dc': 'http://dublincore.org/documents/dcmi-namespace/', + 'owl':"http://www.w3.org/2002/07/owl#", + 'rdfs':"http://www.w3.org/2000/01/rdf-schema#" + } + ret={} + import StringIO + import sys + buffer= StringIO.StringIO(mdSet) + try: + md = amara.parse(buffer,prefixes=NSS) + except: + logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1])) + + ret["error"]=mdSet + return ret + + ret["title"] = toString(md.xml_xpath("//dc:title/text()")) + ret["creator"] =toString(md.xml_xpath("//dc:creator/text()")) + ret["date"] = toString(md.xml_xpath("//dc:date/text()")) + + return ret + + class MetaDataFolder(Folder): """provides methods for managing complete metadata structures""" meta_type='MetaDataFolder' @@ -61,141 +99,123 @@ md = getHttpData(url) return md + def getBibdataFromDom(self, dom): + """returns contents of bib tag as dict""" + return self.resource.meta.bib.getDataFromDom(dom) + def getBibMapping(self, bibtype): - """returns MetaDataMapping for bibtype""" - # try type as id - mapping = getattr(self.main.meta.bib, bibtype, None) - if mapping is None: - # try manually - mapFolder = self.main.meta.bib - for obj in mapFolder.objectValues(): - if obj.meta_type == "MetadataMapping": - # real type is in title - mapType = obj.title - if mapType == bibtype: - # try type as is - return obj - - if normalizeBibField(mapType, underscore=True) == normalizeBibField(bibtype, underscore=True): - # try normalized type without underscore - return obj - - return mapping + """returns MetaDataMapping for resource/meta/bib of bibtype""" + return self.resource.meta.bib.getMapping(bibtype) def getBibFields(self, bibdata): """returns dict with metadata description for bibdata""" - bibfields = {} - bibtype = bibdata['@type'] - # get mapping from main/meta/bib - mapping = self.getBibMapping(bibtype) - if mapping is None: - logging.error("getBibFields: no mapping for type: %s"%bibtype) - return bibfields - - # get field descriptions (copy so we can change it) - bibfields = mapping.getFields().copy() - # add field list - bibfields['@fieldList'] = mapping.getFieldList() - - return bibfields + return self.resource.meta.bib.getMapFields(bibdata) def getBibMappedData(self, bibdata, allFields=False): """returns dict with metadata descriptions and data for bibdata""" - bibfields = self.getBibFields(bibdata) - mappedData = {} - mappedList = [] - for bk in bibfields.keys(): - # ignore descriptions without data - if not bibdata.get(bk, None): - continue + return self.resource.meta.bib.getMappedData(bibdata, allFields=allFields) + + def getDCMappedData(self, bibdata): + """returns dict with DC keys and data from bibdata""" + return self.resource.meta.bib.getDCMappedData(bibdata) + + + def getBibFormattedMetaData(self, path=None, dom=None, bibdata=None): + """get the metadafrom server""" + logging.debug("getBibFormattedMetaData(path=%s)"%path) + return self.resource.meta.bib.getFormatted('metadata_template', path=path, dom=dom, data=bibdata) + + def getBibFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None): + """get the metadafrom server""" + logging.debug("getBibFormattedMetaDataExtended(path=%s)"%path) + return self.resource.meta.bib.getFormatted('metadata_extended_template', path=path, dom=dom, data=bibdata, allFields=True) - # field description (copy so we can change it) - bf = bibfields[bk].copy() - # add value - bf['value'] = bibdata[bk] - mappedData[bk] = bf - mappedList.append(bk) - - if allFields and len(mappedData) < len(bibdata): - # add fields that were not in bibfields - for bk in bibdata.keys(): - if bk in mappedData or not bibdata[bk]: - continue + def getBibFormattedLabel(self,path=None, dom=None, bibdata=None): + """get the metadafrom server""" + logging.debug("getBibFormattedLabel(%s)"%path) + return self.resource.meta.bib.getFormatted('label_template', path=path, dom=dom, data=bibdata) + + getFormattedMetaData = getBibFormattedMetaData + getFormattedMetaDataShort = getBibFormattedMetaData + getFormattedMetaDataExtended = getBibFormattedMetaDataExtended + getFormattedLabel = getBibFormattedLabel - mappedData[bk] = {'tag':bk, 'label':bk, 'value':bibdata[bk]} - mappedList.append(bk) - - mappedData['@fieldList'] = mappedList - return mappedData - - def getFormatted(self, template, path=None, dom=None, bibdata=None, allFields=False): - """returns string with document data formatted according to template. - gets data from server or dom or pre-parsed bibdata.""" - logging.debug("getFormatted(template=%s)"%(template)) - - if dom is None and bibdata is None: - # get from server - md = self.getMDFromPathOrUrl(path.replace("/mpiwg/online","")) - #logging.debug("md:"+md) - #dom = amara.parse(md) - dom = ET.fromstring(md) - - # get contents of bib tag - if bibdata is None: - bibdata = getBibdataFromDom(dom) + def getDCFormatted(self,path): + """get the metadata as dc set""" + logging.debug("getDCFormatted(path=%s)"%path) + namespace={ 'mpiwg': "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"} + namespaceUsed=False + + md = self.getMDFromPathOrUrl(path) + logging.debug("MD in XML"+md) + im = amara.parse(md, prefixes=namespace) + + typePaths=im.xml_xpath('//bib/@type') + archimedes=False + + if len(typePaths)<1: + typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes + if len(typePaths)>0: + type = "archimedes" + archimedes=True + else: + typePaths=im.xml_xpath('//mpiwg:bib/@type') + if len(typePaths)<1: + return "" + else: + namespaceUsed=True + + type=unicode(typePaths[0]) + else: + type=unicode(typePaths[0]) + logging.info("got type:"+type) + try: + mapping=getattr(self.main.meta.bib,type.lower(),None) + except: + logging.error("getMetaDataFromServer no mapping for type: %s"%type) + return "" + + try: + dcMapping=getattr(self.main.meta.bib,"dc",None) + except: + logging.error("getMetaDataFromServer no dc in meta/bib") + return "" + + mds=mapping.generateMappingHash() # Hole das Mapping generisches Feld --> Feld im entsprechenden Typ + dcMds=dcMapping.generateMappingHash() + + mdHash=[] + logging.debug("Value: %s"%repr(mds)) + + for key,valueTriple in mds.items(): + value=valueTriple[0] + logging.debug("Value: %s"%repr(value)) + logging.debug("Key: %s"%repr(key)) + if value!="": + if not archimedes: + if namespaceUsed: + try: + v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value) + except: + logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value) + else: + v = im.xml_xpath('//bib/%s/text()'%value) + else: + v = im.xml_xpath('//archimedes/%s/text()'%value) + if len(v) > 0: + dc=dcMds[key][0] + + if (dc !="") and (value !=""): + logging.debug("%s--> : %s"%(repr(value),dc)) + mdHash.append([dc,unicode(v[0])]) + + ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """ + ret+="<dc:type>%s</dc:type>"%type + for md in mdHash: - bibtype = bibdata['@type'] - - # get template - tp=getattr(self,"%s_%s"%(template, bibtype.lower()), None) - if tp is None: - logging.warning("getFormatted: no template for: %s_%s"%(template, bibtype)) - # try generic - tp=getattr(self,"%s_generic"%(template), None) - if tp is None: - logging.error("getFormatted: no generic template either: %s"%(template)) - return "" - - # put bib field descriptions in mdHash - bibfields = self.getBibMappedData(bibdata, allFields=allFields) - - return tp(mdmap=bibfields, md=bibdata) - - - def getFormattedMetaData(self, path=None, dom=None, bibdata=None): - """get the metadafrom server""" - logging.debug("getFormattedMetaData(path=%s)"%path) - return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata) - - def getFormattedMetaDataShort(self, path=None, dom=None, bibdata=None): - """get the metadafrom server""" - logging.debug("getFormattedMetaDataShort(path=%s)"%path) - return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata) - - def getFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None): - """get the metadafrom server""" - logging.debug("getFormattedMetaDataExtended(path=%s)"%path) - return self.getFormatted('metadata_extended_template', path=path, dom=dom, bibdata=bibdata, allFields=True) - - def getFormattedLabel(self,path=None, dom=None, bibdata=None): - """get the metadafrom server""" - logging.debug("getFormattedLabel(%s)"%path) - return self.getFormatted('label_template', path=path, dom=dom, bibdata=bibdata) - - def getFormattedMetaDataShortFromServer(self,path): - """get the metadafrom server""" - logging.debug("getFormattedMetaDataShortFromServer(path=%s)"%path) - return self.getFormatted('metadata_template', path) - - def getFormattedMetaDataExtendedFromServer(self,path): - """get the metadafrom server""" - logging.debug("getFormattedMetaDataExtendedFromServer(path=%s)"%path) - return self.getFormatted('metadata_extended_template', path=path, allFields=True) - - def getFormattedLabelFromServer(self,path): - """get the metadafrom server""" - logging.debug("getFormattedLabelFromServer(%s)"%path) - return self.getFormatted('label_template', path) + ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0]) + ret+="</bib>" + return ret changeMetaDataFolderForm = PageTemplateFile('zpt/changeMetaDataFolder',globals()) @@ -209,7 +229,7 @@ RESPONSE.redirect('manage_main') -manage_addMetaDataFolderForm = PageTemplateFile('zpt/addMetaDataFolderForm',globals()) +manage_addMetaDataFolderForm = PageTemplateFile('zpt/addMetadataFolderForm',globals()) def manage_addMetaDataFolder(self,id,title,RESPONSE=None): """a MetaDataFolder objekt"""