Mercurial > hg > MetaDataProvider
changeset 15:41b90f09a1f2
new getdata
author | casties |
---|---|
date | Tue, 02 Aug 2011 12:34:11 +0200 |
parents | 281d223aa361 |
children | 5d41c350dd2b |
files | MetaData.py MetaDataFolder.py |
diffstat | 2 files changed, 92 insertions(+), 47 deletions(-) [+] |
line wrap: on
line diff
--- a/MetaData.py Mon Aug 01 19:30:12 2011 +0200 +++ b/MetaData.py Tue Aug 02 12:34:11 2011 +0200 @@ -17,6 +17,24 @@ return bt +def putAppend(hash, key, value): + """puts value in dict hash at key if it doesn't exist or adds value to a list""" + if key in hash: + # key exists + oldval = hash[key] + if isinstance(oldval, list): + # is list already - append + oldval.append(value) + else: + # needs list + val = [oldval, value] + hash[key] = val + + else: + # key doesn't exist + hash[key] = value + + return hash class MetaData(Folder): """provides basic methods for managing metadata structures""" @@ -71,7 +89,7 @@ return path - def getSubDom(self, path=None, dom=None): + def getSubDom(self, path=None, dom=None, all=False): """returns the subtree (list) of the dom rooted in this element""" if dom is None: # get from server @@ -81,48 +99,75 @@ # lets assume dom is rooted in the first element xpath = '.' + self.getXmlPath(omitRoot=True) logging.debug("getSubDom looking for %s in %s"%(xpath, dom)) - elem = dom.findall(xpath) + if all: + elem = dom.findall(xpath) + else: + elem = dom.find(xpath) + return elem - def getData(self, path=None, dom=None, normalizeNames=True, allOccurrences=False, allText=0): + + def _getData(self, elem, recursive, normalizeNames=False, all=False, allText=False): + logging.debug("_getDataFromDom(dom=%s, recursive=%s)"%(elem,recursive)) + data = {} + attr = {} + # put attributes in @attr + for attname in elem.keys(): + attr[attname] = elem.get(attname) + + if attr: + data['@attr'] = attr + + # TODO: should this be here? + if self.mappingSelectAttribute: + # put type in @type + type = attr.get(self.mappingSelectAttribute, None) + if type is not None: + data['@type'] = normalizeFieldName(type) + + for e in elem: + # put all child elements in data + if normalizeNames: + # normalize key names + key = normalizeFieldName(e.tag) + else: + key = e.tag + + if recursive > 0: + # more recursive - call _getData on elements + val = self._getData(e, recursive=recursive-1, normalizeNames=normalizeNames, all=all, allText=allText) + else: + val = getText(e, recursive=allText) + + if all: + # add multiple tags as list + putAppend(data, key, val) + else: + data[key] = val + + logging.debug("_getDataFromDom: returns %s"%repr(data)) + return data + + + def getData(self, path=None, dom=None, normalizeNames=True, all=False, recursive=0, allText=0): """returns dict with attributes and child elements from corresponding tag""" logging.debug("getData(path=%s, dom=%s)"%(path,dom)) if path is None and dom is None: return None - dataList = [] - elems = self.getSubDom(path=path, dom=dom) - for elem in elems: - data = {} - attr = {} - # put attributes in @attr - for attname in elem.keys(): - attr[attname] = elem.get(attname) - - data['@attr'] = attr - if self.mappingSelectAttribute: - # put type in @type - type = attr.get(self.mappingSelectAttribute, None) - if type is not None: - data['@type'] = normalizeFieldName(type) - - # put all subelements in dict - if normalizeNames: - for e in elem: - data[normalizeFieldName(e.tag)] = getText(e, recursive=allText) - else: - for e in elem: - data[e.tag] = getText(e, recursive=allText) - - dataList.append(data) + elem = self.getSubDom(path=path, dom=dom, all=all) + if all: + # subdom is list - return list + data = [] + for e in elem: + data.append(self._getData(e, recursive=recursive, normalizeNames=normalizeNames, allText=allText)) - if allOccurrences: - return dataList - - if dataList: - return dataList[0] else: - return {} + # subdom is element + data = self._getData(elem, recursive=recursive, normalizeNames=normalizeNames, allText=allText) + + return data + def getMapping(self, type): """returns MetaDataMapping for type"""
--- a/MetaDataFolder.py Mon Aug 01 19:30:12 2011 +0200 +++ b/MetaDataFolder.py Tue Aug 02 12:34:11 2011 +0200 @@ -131,29 +131,29 @@ obj = self.restrictedTraverse(xmlpath, None) return obj - def getXmlPathData(self, xmlpath, path=None, dom=None, allText=False, allOccurrences=False): + def getXmlPathData(self, xmlpath, path=None, dom=None, allText=False, all=False): """returns contents of element at xmlpath as dict""" logging.error("getXmlPathData(%s)"%xmlpath) mdObj = self.getXmlPathObj(xmlpath) if mdObj is not None: - return mdObj.getData(path=path, dom=dom, allText=allText, allOccurrences=allOccurrences) + return mdObj.getData(path=path, dom=dom, allText=allText, all=all) else: logging.error("getXmlPathData: MetaData element at '%s' not found!"%xmlpath) return None - def getXmlPathFormatted(self, xmlpath, template, path=None, dom=None, data=None, allFields=False, allText=False, allOccurrences=False): + def getXmlPathFormatted(self, xmlpath, template, path=None, dom=None, data=None, allFields=False, allText=False, all=False): """returns contents of element at xmlpath as dict""" logging.error("getXmlPathFormatted(xmlpath=%s, template=%s)"%(xmlpath,template)) mdObj = self.getXmlPathObj(xmlpath) if mdObj is not None: if data is None: - data = mdObj.getData(path=path, dom=dom, allText=allText, allOccurrences=allOccurrences) + data = mdObj.getData(path=path, dom=dom, allText=allText, all=all) if data is None: return '' fmt = '' - if allOccurrences: + if all: # data is list of elements for d in data: # concatenate formatted strings @@ -180,21 +180,21 @@ """returns contents of access tag as dict""" return self.getXmlPathData('resource/meta/access-conditions/access', path=path, dom=dom) - def getAttributionData(self, path=None, dom=None, allOccurrences=True): + def getAttributionData(self, path=None, dom=None, all=True): """returns contents of attribution tag as dict""" - return self.getXmlPathData('resource/meta/access-conditions/attribution', path=path, dom=dom, allOccurrences=allOccurrences) + return self.getXmlPathData('resource/meta/access-conditions/attribution', path=path, dom=dom, all=all) - def getAttributionFormatted(self, template, path=None, dom=None, data=None, allOccurrences=True): + def getAttributionFormatted(self, template, path=None, dom=None, data=None, all=True): """returns formatted contents of access tag""" - return self.getXmlPathFormatted('resource/meta/access-conditions/attribution', template, path=path, dom=dom, data=data, allOccurrences=allOccurrences) + return self.getXmlPathFormatted('resource/meta/access-conditions/attribution', template, path=path, dom=dom, data=data, all=all) - def getCopyrightData(self, path=None, dom=None, allOccurrences=True): + def getCopyrightData(self, path=None, dom=None, all=True): """returns contents of copyright tag as dict""" - return self.getXmlPathData('resource/meta/access-conditions/copyright', path=path, dom=dom, allText=True, allOccurrences=allOccurrences) + return self.getXmlPathData('resource/meta/access-conditions/copyright', path=path, dom=dom, allText=True, all=all) - def getCopyrightFormatted(self, template, path=None, dom=None, data=None, allOccurrences=True): + def getCopyrightFormatted(self, template, path=None, dom=None, data=None, all=True): """returns formatted contents of access tag""" - return self.getXmlPathFormatted('resource/meta/access-conditions/copyright', template, path=path, dom=dom, data=data, allOccurrences=allOccurrences) + return self.getXmlPathFormatted('resource/meta/access-conditions/copyright', template, path=path, dom=dom, data=data, all=all) def getBibData(self, path=None, dom=None): """returns contents of bib tag as dict"""