changeset 12:7f0e2b656e5c

more work for non-bib metadata
author casties
date Fri, 29 Jul 2011 18:28:06 +0200
parents a29665fa9c62
children 5f48f956ffa3
files MetaData.py MetaDataFolder.py SrvTxtUtils.py zpt/changeMetaDataFolder.zpt
diffstat 4 files changed, 41 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/MetaData.py	Fri Jul 29 14:45:13 2011 +0200
+++ b/MetaData.py	Fri Jul 29 18:28:06 2011 +0200
@@ -74,8 +74,7 @@
         """returns the subtree of the dom rooted in this element"""
         if dom is None:
             # get from server
-            md = self.getMDFromPathOrUrl(path)
-            dom = ET.fromstring(md)
+            md = self.getDomFromPathOrUrl(path)
                 
         # ElementTree doesn't like absolute paths
         # lets assume dom is rooted in the first element
@@ -84,7 +83,7 @@
         elem = dom.find(xpath)
         return elem
         
-    def getData(self, path=None, dom=None, normalizeNames=True):
+    def getData(self, path=None, dom=None, normalizeNames=True, recursive=0):
         """returns dict with attributes and child elements from corresponding tag"""
         data = {}
         attr = {}
@@ -104,10 +103,10 @@
             # put all subelements in dict
             if normalizeNames:
                 for e in elem:
-                    data[normalizeFieldName(e.tag)] = getText(e)
+                    data[normalizeFieldName(e.tag)] = getText(e, recursive=recursive)
             else:
                 for e in elem:
-                    data[e.tag] = getText(e)
+                    data[e.tag] = getText(e, recursive=recursive)
             
         return data
 
--- a/MetaDataFolder.py	Fri Jul 29 14:45:13 2011 +0200
+++ b/MetaDataFolder.py	Fri Jul 29 18:28:06 2011 +0200
@@ -6,8 +6,11 @@
 import urlparse
 import logging
 
+import xml.etree.ElementTree as ET
+
 from MetaDataMapping import MetaDataMapping
 from MetaData import MetaData
+from SrvTxtUtils import getHttpData, getText
 
 def normalizeBibField(bt, underscore=True):
     """returns normalised bib type for looking up mappings"""
@@ -77,18 +80,22 @@
         {'label':'Main Config','action':'changeMetaDataFolderForm'},
         )
 
-    def __init__(self,id,title='',metaDataServerUrl=''):
+    metaDataServerUrl = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=%s"
+    """URL of metadata server. %s replaced by file path."""
+
+    def __init__(self,id,title='',metaDataServerUrl=None):
         """initialize a new instance"""
         self.id = id
         self.title = title
-        self.metaDataServerUrl = metaDataServerUrl
+        if metaDataServerUrl:
+            self.metaDataServerUrl = metaDataServerUrl
 
         
     def getMDFromPathOrUrl(self,path):
         """returns contents of metadata file from server as text"""
         if not path:
             logging.error("getMDFromPathOrUrl: empty path!")
-            return ""
+            return None
         
         parsedurl = urlparse.urlparse(path)
         if parsedurl[0] != "":
@@ -106,6 +113,15 @@
         md = getHttpData(url)
         return md
 
+    def getDomFromPathOrUrl(self, path):
+        """returns DOM of metadata file at given path"""
+        dom = None
+        data = self.getMDFromPathOrUrl(path)
+        if data:
+            dom = ET.fromstring(data)
+            
+        return dom
+
     def getXmlPathData(self, xmlpath, path=None, dom=None):
         """returns contents of element at xmlpath as dict"""
         logging.error("getXmlPathData(%s)"%xmlpath)
@@ -120,10 +136,18 @@
             logging.error("getXmlPathData: MetaData element at '%s' not found!"%xmlpath)
             return None
 
+    def getResourceData(self, path=None, dom=None):
+        """returns contents of resource tag as dict"""
+        return self.getXmlPathData('resource', path=path, dom=dom)
+
     def getTexttoolData(self, path=None, dom=None):
         """returns contents of texttool tag as dict"""
         return self.getXmlPathData('resource/meta/texttool', path=path, dom=dom)
 
+    def getAccessData(self, path=None, dom=None):
+        """returns contents of access tag as dict"""
+        return self.getXmlPathData('resource/meta/access-conditions/access', path=path, dom=dom)
+    
     def getBibData(self, path=None, dom=None):
         """returns contents of bib tag as dict"""
         return self.resource.meta.bib.getData(path=path, dom=dom)
--- a/SrvTxtUtils.py	Fri Jul 29 14:45:13 2011 +0200
+++ b/SrvTxtUtils.py	Fri Jul 29 18:28:06 2011 +0200
@@ -6,7 +6,7 @@
 import logging
 
 
-srvTxtUtilsVersion = "1.1"
+srvTxtUtilsVersion = "1.2"
 
 def getInt(number, default=0):
     """returns always an int (0 in case of problems)"""
@@ -22,14 +22,18 @@
     except:
         return default
 
-def getText(node):
+def getText(node, recursive=0):
     """returns all text content of a node and its subnodes"""
     if node is None:
-        return ""
+        return ''
+    
     # ElementTree:
-    text = node.text or ""
+    text = node.text or ''
     for e in node:
-        text += gettext(e)
+        if recursive:
+            text += getText(e)
+        else:
+            text += e.text or ''
         if e.tail:
             text += e.tail
 
--- a/zpt/changeMetaDataFolder.zpt	Fri Jul 29 14:45:13 2011 +0200
+++ b/zpt/changeMetaDataFolder.zpt	Fri Jul 29 18:28:06 2011 +0200
@@ -6,7 +6,7 @@
 
     <form name="form" action="changeMetaDataFolder">
       <b> Title: </b><input type="text" name="title" size="20" tal:attributes="value python:here.title"><br><br>
-      <i>Server for XML files (e.g. url of digilib Texter-servlet)</i><br>
+      <i>Server for XML files (e.g. url of digilib Texter-servlet. '%s' replaced with file path)</i><br>
       <input type="text" size="100" name="metaDataServerUrl" tal:attributes="value python:getattr(here,'metaDataServerUrl','')"><br><br> 
       <input type="submit" value="Change"><br><br>
     </form>