changeset 15:41b90f09a1f2

new getdata
author casties
date Tue, 02 Aug 2011 12:34:11 +0200
parents 281d223aa361
children 5d41c350dd2b
files MetaData.py MetaDataFolder.py
diffstat 2 files changed, 92 insertions(+), 47 deletions(-) [+]
line wrap: on
line diff
--- a/MetaData.py	Mon Aug 01 19:30:12 2011 +0200
+++ b/MetaData.py	Tue Aug 02 12:34:11 2011 +0200
@@ -17,6 +17,24 @@
         
     return bt               
 
+def putAppend(hash, key, value):
+    """puts value in dict hash at key if it doesn't exist or adds value to a list"""
+    if key in hash:
+        # key exists
+        oldval = hash[key]
+        if isinstance(oldval, list):
+            # is list already - append
+            oldval.append(value)
+        else:
+            # needs list
+            val = [oldval, value]
+            hash[key] = val
+    
+    else:
+        # key doesn't exist
+        hash[key] = value
+
+    return hash
 
 class MetaData(Folder):
     """provides basic methods for managing metadata structures"""
@@ -71,7 +89,7 @@
             
         return path
     
-    def getSubDom(self, path=None, dom=None):
+    def getSubDom(self, path=None, dom=None, all=False):
         """returns the subtree (list) of the dom rooted in this element"""
         if dom is None:
             # get from server
@@ -81,48 +99,75 @@
         # lets assume dom is rooted in the first element
         xpath = '.' + self.getXmlPath(omitRoot=True)
         logging.debug("getSubDom looking for %s in %s"%(xpath, dom))
-        elem = dom.findall(xpath)
+        if all:
+            elem = dom.findall(xpath)
+        else:
+            elem = dom.find(xpath)
+            
         return elem
         
-    def getData(self, path=None, dom=None, normalizeNames=True, allOccurrences=False, allText=0):
+
+    def _getData(self, elem, recursive, normalizeNames=False, all=False, allText=False):
+        logging.debug("_getDataFromDom(dom=%s, recursive=%s)"%(elem,recursive))
+        data = {}
+        attr = {}
+        # put attributes in @attr
+        for attname in elem.keys():
+            attr[attname] = elem.get(attname)
+        
+        if attr:
+            data['@attr'] = attr
+            
+        # TODO: should this be here?
+        if self.mappingSelectAttribute:
+            # put type in @type
+            type = attr.get(self.mappingSelectAttribute, None)
+            if type is not None:
+                data['@type'] = normalizeFieldName(type)
+                
+        for e in elem:
+            # put all child elements in data
+            if normalizeNames:
+                # normalize key names
+                key = normalizeFieldName(e.tag)
+            else:
+                key = e.tag
+                
+            if recursive > 0:
+                # more recursive - call _getData on elements
+                val = self._getData(e, recursive=recursive-1, normalizeNames=normalizeNames, all=all, allText=allText)
+            else:
+                val = getText(e, recursive=allText)
+                
+            if all:
+                # add multiple tags as list
+                putAppend(data, key, val)
+            else:
+                data[key] = val
+            
+        logging.debug("_getDataFromDom: returns %s"%repr(data))
+        return data
+            
+
+    def getData(self, path=None, dom=None, normalizeNames=True, all=False, recursive=0, allText=0):
         """returns dict with attributes and child elements from corresponding tag"""
         logging.debug("getData(path=%s, dom=%s)"%(path,dom))
         if path is None and dom is None:
             return None
         
-        dataList = []
-        elems = self.getSubDom(path=path, dom=dom)
-        for elem in elems:
-            data = {}
-            attr = {}
-            # put attributes in @attr
-            for attname in elem.keys():
-                attr[attname] = elem.get(attname)
-                
-            data['@attr'] = attr
-            if self.mappingSelectAttribute:
-                # put type in @type
-                type = attr.get(self.mappingSelectAttribute, None)
-                if type is not None:
-                    data['@type'] = normalizeFieldName(type)
-            
-            # put all subelements in dict
-            if normalizeNames:
-                for e in elem:
-                    data[normalizeFieldName(e.tag)] = getText(e, recursive=allText)
-            else:
-                for e in elem:
-                    data[e.tag] = getText(e, recursive=allText)
-                    
-            dataList.append(data)
+        elem = self.getSubDom(path=path, dom=dom, all=all)
+        if all:
+            # subdom is list - return list
+            data = []
+            for e in elem:
+                data.append(self._getData(e, recursive=recursive, normalizeNames=normalizeNames, allText=allText))
 
-        if allOccurrences:
-            return dataList
-        
-        if dataList:
-            return dataList[0]
         else:
-            return {}
+            # subdom is element
+            data = self._getData(elem, recursive=recursive, normalizeNames=normalizeNames, allText=allText)
+
+        return data
+    
 
     def getMapping(self, type):
         """returns MetaDataMapping for type"""
--- a/MetaDataFolder.py	Mon Aug 01 19:30:12 2011 +0200
+++ b/MetaDataFolder.py	Tue Aug 02 12:34:11 2011 +0200
@@ -131,29 +131,29 @@
         obj = self.restrictedTraverse(xmlpath, None)
         return obj
 
-    def getXmlPathData(self, xmlpath, path=None, dom=None, allText=False, allOccurrences=False):
+    def getXmlPathData(self, xmlpath, path=None, dom=None, allText=False, all=False):
         """returns contents of element at xmlpath as dict"""
         logging.error("getXmlPathData(%s)"%xmlpath)
         mdObj = self.getXmlPathObj(xmlpath)
         if mdObj is not None:
-            return mdObj.getData(path=path, dom=dom, allText=allText, allOccurrences=allOccurrences)
+            return mdObj.getData(path=path, dom=dom, allText=allText, all=all)
         else:
             logging.error("getXmlPathData: MetaData element at '%s' not found!"%xmlpath)
             return None
 
-    def getXmlPathFormatted(self, xmlpath, template, path=None, dom=None, data=None, allFields=False, allText=False, allOccurrences=False):
+    def getXmlPathFormatted(self, xmlpath, template, path=None, dom=None, data=None, allFields=False, allText=False, all=False):
         """returns contents of element at xmlpath as dict"""
         logging.error("getXmlPathFormatted(xmlpath=%s, template=%s)"%(xmlpath,template))
         mdObj = self.getXmlPathObj(xmlpath)
         if mdObj is not None:
             if data is None:
-                data = mdObj.getData(path=path, dom=dom, allText=allText, allOccurrences=allOccurrences)
+                data = mdObj.getData(path=path, dom=dom, allText=allText, all=all)
                 
             if data is None:
                 return ''
             
             fmt = ''
-            if allOccurrences:
+            if all:
                 # data is list of elements
                 for d in data:
                     # concatenate formatted strings
@@ -180,21 +180,21 @@
         """returns contents of access tag as dict"""
         return self.getXmlPathData('resource/meta/access-conditions/access', path=path, dom=dom)
     
-    def getAttributionData(self, path=None, dom=None, allOccurrences=True):
+    def getAttributionData(self, path=None, dom=None, all=True):
         """returns contents of attribution tag as dict"""
-        return self.getXmlPathData('resource/meta/access-conditions/attribution', path=path, dom=dom, allOccurrences=allOccurrences)
+        return self.getXmlPathData('resource/meta/access-conditions/attribution', path=path, dom=dom, all=all)
     
-    def getAttributionFormatted(self, template, path=None, dom=None, data=None, allOccurrences=True):
+    def getAttributionFormatted(self, template, path=None, dom=None, data=None, all=True):
         """returns formatted contents of access tag"""
-        return self.getXmlPathFormatted('resource/meta/access-conditions/attribution', template, path=path, dom=dom, data=data, allOccurrences=allOccurrences)
+        return self.getXmlPathFormatted('resource/meta/access-conditions/attribution', template, path=path, dom=dom, data=data, all=all)
     
-    def getCopyrightData(self, path=None, dom=None, allOccurrences=True):
+    def getCopyrightData(self, path=None, dom=None, all=True):
         """returns contents of copyright tag as dict"""
-        return self.getXmlPathData('resource/meta/access-conditions/copyright', path=path, dom=dom, allText=True, allOccurrences=allOccurrences)
+        return self.getXmlPathData('resource/meta/access-conditions/copyright', path=path, dom=dom, allText=True, all=all)
     
-    def getCopyrightFormatted(self, template, path=None, dom=None, data=None, allOccurrences=True):
+    def getCopyrightFormatted(self, template, path=None, dom=None, data=None, all=True):
         """returns formatted contents of access tag"""
-        return self.getXmlPathFormatted('resource/meta/access-conditions/copyright', template, path=path, dom=dom, data=data, allOccurrences=allOccurrences)
+        return self.getXmlPathFormatted('resource/meta/access-conditions/copyright', template, path=path, dom=dom, data=data, all=all)
     
     def getBibData(self, path=None, dom=None):
         """returns contents of bib tag as dict"""