Mercurial > hg > MetaDataProvider
annotate MetaDataFolder.py @ 33:1f845c76dad3
all getXXX take recursive and all parameters.
| author | casties |
|---|---|
| date | Tue, 18 Dec 2012 19:25:02 +0100 |
| parents | ab58edfc0707 |
| children | 559907a4d538 |
| rev | line source |
|---|---|
| 4 | 1 from OFS.Folder import Folder |
| 2 from Products.PageTemplates.PageTemplateFile import PageTemplateFile | |
| 3 from Globals import package_home | |
| 4 from AccessControl import ClassSecurityInfo | |
| 5 import os.path | |
| 9 | 6 import urlparse |
| 4 | 7 import logging |
| 8 | |
| 12 | 9 import xml.etree.ElementTree as ET |
| 10 | |
| 4 | 11 from MetaDataMapping import MetaDataMapping |
| 12 from MetaData import MetaData | |
| 12 | 13 from SrvTxtUtils import getHttpData, getText |
| 4 | 14 |
| 15 def normalizeBibField(bt, underscore=True): | |
| 16 """returns normalised bib type for looking up mappings""" | |
| 17 bt = bt.strip().replace(' ', '-').lower() | |
| 18 if underscore: | |
| 19 bt = bt.replace('_', '-') | |
| 20 | |
| 21 return bt | |
| 22 | |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
23 def toString(list): |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
24 ret=u"" |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
25 |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
26 for l in list: |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
27 ret+=unicode(l) |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
28 |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
29 return ret |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
30 |
| 4 | 31 class MetaDataFolder(Folder): |
| 32 """provides methods for managing complete metadata structures""" | |
| 33 meta_type='MetaDataFolder' | |
| 34 security=ClassSecurityInfo() | |
| 35 manage_options = Folder.manage_options+( | |
| 5 | 36 {'label':'Main Config','action':'changeMetaDataFolderForm'}, |
| 4 | 37 ) |
| 38 | |
| 12 | 39 metaDataServerUrl = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=%s" |
| 40 """URL of metadata server. %s replaced by file path.""" | |
| 41 | |
| 42 def __init__(self,id,title='',metaDataServerUrl=None): | |
| 4 | 43 """initialize a new instance""" |
| 44 self.id = id | |
| 45 self.title = title | |
| 12 | 46 if metaDataServerUrl: |
| 47 self.metaDataServerUrl = metaDataServerUrl | |
| 4 | 48 |
| 49 | |
| 50 def getMDFromPathOrUrl(self,path): | |
| 10 | 51 """returns contents of metadata file from server as text""" |
| 52 if not path: | |
| 53 logging.error("getMDFromPathOrUrl: empty path!") | |
| 12 | 54 return None |
| 10 | 55 |
| 4 | 56 parsedurl = urlparse.urlparse(path) |
| 57 if parsedurl[0] != "": | |
| 58 # has schema (e.g. http) | |
| 59 url=path | |
| 60 else: | |
| 61 # path only | |
| 8 | 62 path = path.replace('/mpiwg/online/', '') |
| 4 | 63 if path.endswith("index.meta"): |
| 8 | 64 url = self.metaDataServerUrl%path |
| 4 | 65 else: |
| 8 | 66 url = os.path.join(self.metaDataServerUrl%path,'index.meta') |
| 4 | 67 |
|
23
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
68 try: |
|
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
69 md = getHttpData(url) |
|
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
70 return md |
|
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
71 |
|
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
72 except: |
|
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
73 logging.error("getMDFromPathOrUrl: unable to get data!") |
|
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
74 |
|
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
75 return None |
| 4 | 76 |
| 12 | 77 def getDomFromPathOrUrl(self, path): |
| 78 """returns DOM of metadata file at given path""" | |
| 79 dom = None | |
| 80 data = self.getMDFromPathOrUrl(path) | |
| 81 if data: | |
| 82 dom = ET.fromstring(data) | |
| 83 | |
| 84 return dom | |
| 85 | |
| 14 | 86 def getXmlPathObj(self, xmlpath): |
| 87 """returns object at xmlpath""" | |
| 9 | 88 # make xmlpath relative for Zope |
| 89 if xmlpath[0] == '/': | |
| 90 xmlpath = xmlpath[1:] | |
| 91 | |
| 14 | 92 obj = self.restrictedTraverse(xmlpath, None) |
| 93 return obj | |
| 94 | |
| 17 | 95 def getXmlPathData(self, xmlpath, path=None, dom=None, recursive=0, all=False, allText=False): |
| 31 | 96 """returns contents of element at xmlpath as dict. |
| 97 | |
| 98 @param xmlpath: xml path to selected elements | |
| 99 @param path: file or url path to metadata file | |
| 100 @param dom: dom of metadata | |
| 101 @param all: put contents of tags with the same name in list value | |
| 102 @param recursive: number of recursions. 0=just children | |
| 103 @param allText: get text content of all subelements | |
| 104 @returns: dict with attributes (key=@attr) and child elements (key=tag) | |
| 105 """ | |
| 14 | 106 logging.error("getXmlPathData(%s)"%xmlpath) |
| 107 mdObj = self.getXmlPathObj(xmlpath) | |
| 9 | 108 if mdObj is not None: |
| 17 | 109 return mdObj.getData(path=path, dom=dom, recursive=recursive, all=all, allText=allText) |
| 9 | 110 else: |
| 27 | 111 logging.error("getXmlPathData: MetaData object for '%s' not found!"%xmlpath) |
| 9 | 112 return None |
| 113 | |
| 17 | 114 def getXmlPathFormatted(self, xmlpath, template, path=None, dom=None, data=None, allFields=False, recursive=0, all=False, allText=False): |
| 31 | 115 """returns contents of element at xmlpath as dict |
| 116 | |
| 117 @param xmlpath: xml path to selected elements | |
| 118 @param template: name of template for data | |
| 119 @param path: file or url path to metadata file | |
| 120 @param dom: dom of metadata | |
| 121 @param all: put contents of tags with the same name in list value | |
| 122 @param recursive: number of recursions. 0=just children | |
| 123 @param allText: get text content of all subelements | |
| 124 @returns: dict with attributes (key=@attr) and child elements (key=tag) | |
| 125 """ | |
| 14 | 126 logging.error("getXmlPathFormatted(xmlpath=%s, template=%s)"%(xmlpath,template)) |
| 127 mdObj = self.getXmlPathObj(xmlpath) | |
| 128 if mdObj is not None: | |
| 129 if data is None: | |
| 17 | 130 data = mdObj.getData(path=path, dom=dom, recursive=recursive, all=all, allText=allText) |
| 14 | 131 |
| 132 if data is None: | |
| 133 return '' | |
| 134 | |
| 135 fmt = '' | |
| 15 | 136 if all: |
| 14 | 137 # data is list of elements |
| 138 for d in data: | |
| 139 # concatenate formatted strings | |
| 140 fmt += mdObj.getFormatted(template, path=path, dom=dom, data=d, allFields=allFields) + '\n' | |
| 141 | |
| 142 else: | |
| 143 fmt = mdObj.getFormatted(template, path=path, dom=dom, data=data, allFields=allFields) | |
| 144 | |
| 145 return fmt | |
| 146 | |
| 147 else: | |
| 27 | 148 logging.error("getXmlPathFormatted: MetaData object for '%s' not found!"%xmlpath) |
| 14 | 149 return '' |
| 150 | |
| 33 | 151 def getResourceData(self, path=None, dom=None, recursive=0, all=False): |
| 12 | 152 """returns contents of resource tag as dict""" |
| 33 | 153 return self.getXmlPathData('resource', path=path, dom=dom, recursive=recursive, all=all) |
| 12 | 154 |
| 31 | 155 def getTexttoolData(self, path=None, dom=None, recursive=0, all=False): |
| 8 | 156 """returns contents of texttool tag as dict""" |
| 31 | 157 return self.getXmlPathData('resource/meta/texttool', path=path, dom=dom, recursive=recursive, all=all) |
| 8 | 158 |
| 33 | 159 def getAccessData(self, path=None, dom=None, recursive=0, all=False): |
| 12 | 160 """returns contents of access tag as dict""" |
| 33 | 161 return self.getXmlPathData('resource/meta/access-conditions/access', path=path, dom=dom, recursive=recursive, all=all) |
| 12 | 162 |
| 33 | 163 def getAttributionData(self, path=None, dom=None, recursive=0, all=True): |
| 14 | 164 """returns contents of attribution tag as dict""" |
| 33 | 165 return self.getXmlPathData('resource/meta/access-conditions/attribution', path=path, dom=dom, recursive=recursive, all=all) |
| 14 | 166 |
| 33 | 167 def getAttributionFormatted(self, template, path=None, dom=None, data=None, recursive=0, all=True): |
| 14 | 168 """returns formatted contents of access tag""" |
| 33 | 169 return self.getXmlPathFormatted('resource/meta/access-conditions/attribution', template, path=path, dom=dom, data=data, recursive=recursive, all=all) |
| 14 | 170 |
| 17 | 171 def getCopyrightData(self, path=None, dom=None, recursive=1, all=True): |
| 14 | 172 """returns contents of copyright tag as dict""" |
| 18 | 173 # information is two tags deep - recursive=1 |
| 17 | 174 return self.getXmlPathData('resource/meta/access-conditions/copyright', path=path, dom=dom, recursive=recursive, all=all) |
| 14 | 175 |
| 18 | 176 def getCopyrightFormatted(self, template, path=None, dom=None, data=None, recursive=1, all=True): |
| 14 | 177 """returns formatted contents of access tag""" |
| 18 | 178 # information is two tags deep - recursive=1 |
| 179 return self.getXmlPathFormatted('resource/meta/access-conditions/copyright', template, path=path, dom=dom, data=data, recursive=recursive, all=all) | |
| 14 | 180 |
| 33 | 181 def getContextData(self, path=None, dom=None, recursive=0, all=True): |
| 20 | 182 """returns contents of context tag as dict""" |
| 183 # information is two tags deep - recursive=1 | |
| 33 | 184 return self.getXmlPathData('resource/meta/context', path=path, dom=dom, recursive=recursive, all=all) |
| 20 | 185 |
| 186 | |
| 26 | 187 def getDRI(self, path=None, dom=None, type="escidoc"): |
| 24 | 188 """returns the DRI of the document""" |
| 26 | 189 dris = self.getXmlPathData('resource/meta/dri', path=path, dom=dom, all=True) |
| 27 | 190 #logging.debug("getDRI: dris=%s"%repr(dris)) |
| 26 | 191 if dris is None: |
| 192 return None | |
| 193 | |
| 194 for dri in dris: | |
| 27 | 195 #logging.debug("getDRI: dri=%s"%dri) |
| 196 if type is None: | |
| 197 # no type -- take the first one | |
| 198 return dri.get('@text', None) | |
| 199 | |
| 200 # use element with matching @type | |
| 26 | 201 att = dri.get('@attr', None) |
| 202 if att is not None: | |
| 203 if (att.get('type', None) == type): | |
| 204 return dri.get('@text', None) | |
| 205 | |
| 206 return None | |
| 27 | 207 |
| 24 | 208 |
|
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
209 def getBibData(self, path=None, dom=None, all=False, recursive=0): |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
210 """returns contents of bib tag as dict""" |
|
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
211 return self.resource.meta.bib.getData(path=path, dom=dom, all=all, recursive=recursive) |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
212 |
| 4 | 213 def getBibMapping(self, bibtype): |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
214 """returns MetaDataMapping for resource/meta/bib of bibtype""" |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
215 return self.resource.meta.bib.getMapping(bibtype) |
| 4 | 216 |
| 217 def getBibFields(self, bibdata): | |
| 218 """returns dict with metadata description for bibdata""" | |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
219 return self.resource.meta.bib.getMapFields(bibdata) |
| 4 | 220 |
| 221 def getBibMappedData(self, bibdata, allFields=False): | |
| 222 """returns dict with metadata descriptions and data for bibdata""" | |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
223 return self.resource.meta.bib.getMappedData(bibdata, allFields=allFields) |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
224 |
| 29 | 225 def getDCDataFromPath(self,path): |
| 226 """returns DC mapped data from path to index.meta""" | |
| 227 return self.resource.meta.bib.getDCDataFromPath(path) | |
| 228 | |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
229 def getDCMappedData(self, bibdata): |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
230 """returns dict with DC keys and data from bibdata""" |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
231 return self.resource.meta.bib.getDCMappedData(bibdata) |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
232 |
|
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
233 def getBibFormattedMetaData(self, path=None, dom=None, bibdata=None, bibxdata=None): |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
234 """get the metadafrom server""" |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
235 logging.debug("getBibFormattedMetaData(path=%s)"%path) |
|
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
236 return self.resource.meta.bib.getFormatted('metadata_template', path=path, dom=dom, data=bibdata, xdata=bibxdata) |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
237 |
|
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
238 def getBibFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None, bibxdata=None): |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
239 """get the metadafrom server""" |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
240 logging.debug("getBibFormattedMetaDataExtended(path=%s)"%path) |
|
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
241 return self.resource.meta.bib.getFormatted('metadata_extended_template', path=path, dom=dom, data=bibdata, xdata=bibxdata, allFields=True) |
| 4 | 242 |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
243 def getBibFormattedLabel(self,path=None, dom=None, bibdata=None): |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
244 """get the metadafrom server""" |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
245 logging.debug("getBibFormattedLabel(%s)"%path) |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
246 return self.resource.meta.bib.getFormatted('label_template', path=path, dom=dom, data=bibdata) |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
247 |
| 8 | 248 # compatibility |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
249 getFormattedMetaData = getBibFormattedMetaData |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
250 getFormattedMetaDataShort = getBibFormattedMetaData |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
251 getFormattedMetaDataExtended = getBibFormattedMetaDataExtended |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
252 getFormattedLabel = getBibFormattedLabel |
| 8 | 253 |
|
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
254 |
| 4 | 255 changeMetaDataFolderForm = PageTemplateFile('zpt/changeMetaDataFolder',globals()) |
| 256 | |
| 257 security.declarePublic('changeMetaDataFolder') | |
| 258 def changeMetaDataFolder(self,title,metaDataServerUrl,RESPONSE=None): | |
| 259 """Change MetaDataFolder""" | |
| 260 self.title = title | |
| 261 self.metaDataServerUrl=metaDataServerUrl | |
| 262 if RESPONSE is not None: | |
| 263 RESPONSE.redirect('manage_main') | |
| 264 | |
| 265 | |
| 9 | 266 def manage_addMetaDataFolderForm(self): |
| 267 """add MetaDataFolder form""" | |
| 268 pt = PageTemplateFile('zpt/addMetadataFolderForm',globals()).__of__(self) | |
| 269 return pt() | |
| 4 | 270 |
| 271 def manage_addMetaDataFolder(self,id,title,RESPONSE=None): | |
| 272 """a MetaDataFolder objekt""" | |
| 273 newObj=MetaDataFolder(id,title) | |
| 274 self.Destination()._setObject(id,newObj) | |
| 275 if RESPONSE is not None: | |
| 276 RESPONSE.redirect('manage_main') | |
| 277 |
