Mercurial > hg > MetaDataProvider
changeset 12:7f0e2b656e5c
more work for non-bib metadata
author | casties |
---|---|
date | Fri, 29 Jul 2011 18:28:06 +0200 |
parents | a29665fa9c62 |
children | 5f48f956ffa3 |
files | MetaData.py MetaDataFolder.py SrvTxtUtils.py zpt/changeMetaDataFolder.zpt |
diffstat | 4 files changed, 41 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/MetaData.py Fri Jul 29 14:45:13 2011 +0200 +++ b/MetaData.py Fri Jul 29 18:28:06 2011 +0200 @@ -74,8 +74,7 @@ """returns the subtree of the dom rooted in this element""" if dom is None: # get from server - md = self.getMDFromPathOrUrl(path) - dom = ET.fromstring(md) + md = self.getDomFromPathOrUrl(path) # ElementTree doesn't like absolute paths # lets assume dom is rooted in the first element @@ -84,7 +83,7 @@ elem = dom.find(xpath) return elem - def getData(self, path=None, dom=None, normalizeNames=True): + def getData(self, path=None, dom=None, normalizeNames=True, recursive=0): """returns dict with attributes and child elements from corresponding tag""" data = {} attr = {} @@ -104,10 +103,10 @@ # put all subelements in dict if normalizeNames: for e in elem: - data[normalizeFieldName(e.tag)] = getText(e) + data[normalizeFieldName(e.tag)] = getText(e, recursive=recursive) else: for e in elem: - data[e.tag] = getText(e) + data[e.tag] = getText(e, recursive=recursive) return data
--- a/MetaDataFolder.py Fri Jul 29 14:45:13 2011 +0200 +++ b/MetaDataFolder.py Fri Jul 29 18:28:06 2011 +0200 @@ -6,8 +6,11 @@ import urlparse import logging +import xml.etree.ElementTree as ET + from MetaDataMapping import MetaDataMapping from MetaData import MetaData +from SrvTxtUtils import getHttpData, getText def normalizeBibField(bt, underscore=True): """returns normalised bib type for looking up mappings""" @@ -77,18 +80,22 @@ {'label':'Main Config','action':'changeMetaDataFolderForm'}, ) - def __init__(self,id,title='',metaDataServerUrl=''): + metaDataServerUrl = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=%s" + """URL of metadata server. %s replaced by file path.""" + + def __init__(self,id,title='',metaDataServerUrl=None): """initialize a new instance""" self.id = id self.title = title - self.metaDataServerUrl = metaDataServerUrl + if metaDataServerUrl: + self.metaDataServerUrl = metaDataServerUrl def getMDFromPathOrUrl(self,path): """returns contents of metadata file from server as text""" if not path: logging.error("getMDFromPathOrUrl: empty path!") - return "" + return None parsedurl = urlparse.urlparse(path) if parsedurl[0] != "": @@ -106,6 +113,15 @@ md = getHttpData(url) return md + def getDomFromPathOrUrl(self, path): + """returns DOM of metadata file at given path""" + dom = None + data = self.getMDFromPathOrUrl(path) + if data: + dom = ET.fromstring(data) + + return dom + def getXmlPathData(self, xmlpath, path=None, dom=None): """returns contents of element at xmlpath as dict""" logging.error("getXmlPathData(%s)"%xmlpath) @@ -120,10 +136,18 @@ logging.error("getXmlPathData: MetaData element at '%s' not found!"%xmlpath) return None + def getResourceData(self, path=None, dom=None): + """returns contents of resource tag as dict""" + return self.getXmlPathData('resource', path=path, dom=dom) + def getTexttoolData(self, path=None, dom=None): """returns contents of texttool tag as dict""" return self.getXmlPathData('resource/meta/texttool', path=path, dom=dom) + def getAccessData(self, path=None, dom=None): + """returns contents of access tag as dict""" + return self.getXmlPathData('resource/meta/access-conditions/access', path=path, dom=dom) + def getBibData(self, path=None, dom=None): """returns contents of bib tag as dict""" return self.resource.meta.bib.getData(path=path, dom=dom)
--- a/SrvTxtUtils.py Fri Jul 29 14:45:13 2011 +0200 +++ b/SrvTxtUtils.py Fri Jul 29 18:28:06 2011 +0200 @@ -6,7 +6,7 @@ import logging -srvTxtUtilsVersion = "1.1" +srvTxtUtilsVersion = "1.2" def getInt(number, default=0): """returns always an int (0 in case of problems)""" @@ -22,14 +22,18 @@ except: return default -def getText(node): +def getText(node, recursive=0): """returns all text content of a node and its subnodes""" if node is None: - return "" + return '' + # ElementTree: - text = node.text or "" + text = node.text or '' for e in node: - text += gettext(e) + if recursive: + text += getText(e) + else: + text += e.text or '' if e.tail: text += e.tail
--- a/zpt/changeMetaDataFolder.zpt Fri Jul 29 14:45:13 2011 +0200 +++ b/zpt/changeMetaDataFolder.zpt Fri Jul 29 18:28:06 2011 +0200 @@ -6,7 +6,7 @@ <form name="form" action="changeMetaDataFolder"> <b> Title: </b><input type="text" name="title" size="20" tal:attributes="value python:here.title"><br><br> - <i>Server for XML files (e.g. url of digilib Texter-servlet)</i><br> + <i>Server for XML files (e.g. url of digilib Texter-servlet. '%s' replaced with file path)</i><br> <input type="text" size="100" name="metaDataServerUrl" tal:attributes="value python:getattr(here,'metaDataServerUrl','')"><br><br> <input type="submit" value="Change"><br><br> </form>