Mercurial > hg > MetaDataProvider
annotate MetaDataFolder.py @ 38:67115536b7ec default tip
DC as JSON export added
author | dwinter |
---|---|
date | Thu, 22 May 2014 12:09:20 +0200 |
parents | e231cff8688b |
children |
rev | line source |
---|---|
4 | 1 from OFS.Folder import Folder |
2 from Products.PageTemplates.PageTemplateFile import PageTemplateFile | |
3 from Globals import package_home | |
4 from AccessControl import ClassSecurityInfo | |
5 import os.path | |
9 | 6 import urlparse |
4 | 7 import logging |
8 | |
12 | 9 import xml.etree.ElementTree as ET |
10 | |
4 | 11 from MetaDataMapping import MetaDataMapping |
12 from MetaData import MetaData | |
12 | 13 from SrvTxtUtils import getHttpData, getText |
4 | 14 |
15 def normalizeBibField(bt, underscore=True): | |
16 """returns normalised bib type for looking up mappings""" | |
17 bt = bt.strip().replace(' ', '-').lower() | |
18 if underscore: | |
19 bt = bt.replace('_', '-') | |
20 | |
21 return bt | |
22 | |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
23 def toString(list): |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
24 ret=u"" |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
25 |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
26 for l in list: |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
27 ret+=unicode(l) |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
28 |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
29 return ret |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
30 |
4 | 31 class MetaDataFolder(Folder): |
32 """provides methods for managing complete metadata structures""" | |
33 meta_type='MetaDataFolder' | |
34 security=ClassSecurityInfo() | |
35 manage_options = Folder.manage_options+( | |
5 | 36 {'label':'Main Config','action':'changeMetaDataFolderForm'}, |
4 | 37 ) |
38 | |
12 | 39 metaDataServerUrl = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=%s" |
40 """URL of metadata server. %s replaced by file path.""" | |
41 | |
42 def __init__(self,id,title='',metaDataServerUrl=None): | |
4 | 43 """initialize a new instance""" |
44 self.id = id | |
45 self.title = title | |
12 | 46 if metaDataServerUrl: |
47 self.metaDataServerUrl = metaDataServerUrl | |
4 | 48 |
49 | |
50 def getMDFromPathOrUrl(self,path): | |
10 | 51 """returns contents of metadata file from server as text""" |
52 if not path: | |
53 logging.error("getMDFromPathOrUrl: empty path!") | |
12 | 54 return None |
10 | 55 |
4 | 56 parsedurl = urlparse.urlparse(path) |
57 if parsedurl[0] != "": | |
58 # has schema (e.g. http) | |
59 url=path | |
60 else: | |
61 # path only | |
8 | 62 path = path.replace('/mpiwg/online/', '') |
4 | 63 if path.endswith("index.meta"): |
8 | 64 url = self.metaDataServerUrl%path |
4 | 65 else: |
8 | 66 url = os.path.join(self.metaDataServerUrl%path,'index.meta') |
4 | 67 |
23
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
68 try: |
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
69 md = getHttpData(url) |
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
70 return md |
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
71 |
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
72 except: |
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
73 logging.error("getMDFromPathOrUrl: unable to get data!") |
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
74 |
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
75 return None |
4 | 76 |
12 | 77 def getDomFromPathOrUrl(self, path): |
78 """returns DOM of metadata file at given path""" | |
79 dom = None | |
80 data = self.getMDFromPathOrUrl(path) | |
81 if data: | |
82 dom = ET.fromstring(data) | |
83 | |
84 return dom | |
85 | |
14 | 86 def getXmlPathObj(self, xmlpath): |
87 """returns object at xmlpath""" | |
9 | 88 # make xmlpath relative for Zope |
89 if xmlpath[0] == '/': | |
90 xmlpath = xmlpath[1:] | |
91 | |
14 | 92 obj = self.restrictedTraverse(xmlpath, None) |
93 return obj | |
94 | |
17 | 95 def getXmlPathData(self, xmlpath, path=None, dom=None, recursive=0, all=False, allText=False): |
31 | 96 """returns contents of element at xmlpath as dict. |
97 | |
98 @param xmlpath: xml path to selected elements | |
99 @param path: file or url path to metadata file | |
100 @param dom: dom of metadata | |
101 @param all: put contents of tags with the same name in list value | |
102 @param recursive: number of recursions. 0=just children | |
103 @param allText: get text content of all subelements | |
104 @returns: dict with attributes (key=@attr) and child elements (key=tag) | |
105 """ | |
14 | 106 logging.error("getXmlPathData(%s)"%xmlpath) |
107 mdObj = self.getXmlPathObj(xmlpath) | |
9 | 108 if mdObj is not None: |
17 | 109 return mdObj.getData(path=path, dom=dom, recursive=recursive, all=all, allText=allText) |
9 | 110 else: |
27 | 111 logging.error("getXmlPathData: MetaData object for '%s' not found!"%xmlpath) |
9 | 112 return None |
113 | |
17 | 114 def getXmlPathFormatted(self, xmlpath, template, path=None, dom=None, data=None, allFields=False, recursive=0, all=False, allText=False): |
31 | 115 """returns contents of element at xmlpath as dict |
116 | |
117 @param xmlpath: xml path to selected elements | |
118 @param template: name of template for data | |
119 @param path: file or url path to metadata file | |
120 @param dom: dom of metadata | |
121 @param all: put contents of tags with the same name in list value | |
122 @param recursive: number of recursions. 0=just children | |
123 @param allText: get text content of all subelements | |
124 @returns: dict with attributes (key=@attr) and child elements (key=tag) | |
125 """ | |
14 | 126 logging.error("getXmlPathFormatted(xmlpath=%s, template=%s)"%(xmlpath,template)) |
127 mdObj = self.getXmlPathObj(xmlpath) | |
128 if mdObj is not None: | |
129 if data is None: | |
17 | 130 data = mdObj.getData(path=path, dom=dom, recursive=recursive, all=all, allText=allText) |
14 | 131 |
132 if data is None: | |
133 return '' | |
134 | |
135 fmt = '' | |
15 | 136 if all: |
14 | 137 # data is list of elements |
138 for d in data: | |
139 # concatenate formatted strings | |
140 fmt += mdObj.getFormatted(template, path=path, dom=dom, data=d, allFields=allFields) + '\n' | |
141 | |
142 else: | |
143 fmt = mdObj.getFormatted(template, path=path, dom=dom, data=data, allFields=allFields) | |
144 | |
145 return fmt | |
146 | |
147 else: | |
27 | 148 logging.error("getXmlPathFormatted: MetaData object for '%s' not found!"%xmlpath) |
14 | 149 return '' |
150 | |
33 | 151 def getResourceData(self, path=None, dom=None, recursive=0, all=False): |
12 | 152 """returns contents of resource tag as dict""" |
33 | 153 return self.getXmlPathData('resource', path=path, dom=dom, recursive=recursive, all=all) |
12 | 154 |
31 | 155 def getTexttoolData(self, path=None, dom=None, recursive=0, all=False): |
8 | 156 """returns contents of texttool tag as dict""" |
31 | 157 return self.getXmlPathData('resource/meta/texttool', path=path, dom=dom, recursive=recursive, all=all) |
8 | 158 |
33 | 159 def getAccessData(self, path=None, dom=None, recursive=0, all=False): |
12 | 160 """returns contents of access tag as dict""" |
33 | 161 return self.getXmlPathData('resource/meta/access-conditions/access', path=path, dom=dom, recursive=recursive, all=all) |
12 | 162 |
33 | 163 def getAttributionData(self, path=None, dom=None, recursive=0, all=True): |
14 | 164 """returns contents of attribution tag as dict""" |
33 | 165 return self.getXmlPathData('resource/meta/access-conditions/attribution', path=path, dom=dom, recursive=recursive, all=all) |
14 | 166 |
33 | 167 def getAttributionFormatted(self, template, path=None, dom=None, data=None, recursive=0, all=True): |
14 | 168 """returns formatted contents of access tag""" |
33 | 169 return self.getXmlPathFormatted('resource/meta/access-conditions/attribution', template, path=path, dom=dom, data=data, recursive=recursive, all=all) |
14 | 170 |
17 | 171 def getCopyrightData(self, path=None, dom=None, recursive=1, all=True): |
14 | 172 """returns contents of copyright tag as dict""" |
18 | 173 # information is two tags deep - recursive=1 |
17 | 174 return self.getXmlPathData('resource/meta/access-conditions/copyright', path=path, dom=dom, recursive=recursive, all=all) |
14 | 175 |
18 | 176 def getCopyrightFormatted(self, template, path=None, dom=None, data=None, recursive=1, all=True): |
14 | 177 """returns formatted contents of access tag""" |
18 | 178 # information is two tags deep - recursive=1 |
179 return self.getXmlPathFormatted('resource/meta/access-conditions/copyright', template, path=path, dom=dom, data=data, recursive=recursive, all=all) | |
14 | 180 |
33 | 181 def getContextData(self, path=None, dom=None, recursive=0, all=True): |
20 | 182 """returns contents of context tag as dict""" |
183 # information is two tags deep - recursive=1 | |
33 | 184 return self.getXmlPathData('resource/meta/context', path=path, dom=dom, recursive=recursive, all=all) |
20 | 185 |
186 | |
26 | 187 def getDRI(self, path=None, dom=None, type="escidoc"): |
24 | 188 """returns the DRI of the document""" |
26 | 189 dris = self.getXmlPathData('resource/meta/dri', path=path, dom=dom, all=True) |
27 | 190 #logging.debug("getDRI: dris=%s"%repr(dris)) |
26 | 191 if dris is None: |
192 return None | |
193 | |
194 for dri in dris: | |
27 | 195 #logging.debug("getDRI: dri=%s"%dri) |
196 if type is None: | |
197 # no type -- take the first one | |
198 return dri.get('@text', None) | |
199 | |
200 # use element with matching @type | |
26 | 201 att = dri.get('@attr', None) |
202 if att is not None: | |
203 if (att.get('type', None) == type): | |
204 return dri.get('@text', None) | |
205 | |
206 return None | |
27 | 207 |
24 | 208 |
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
209 def getBibData(self, path=None, dom=None, all=False, recursive=0): |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
210 """returns contents of bib tag as dict""" |
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
211 return self.resource.meta.bib.getData(path=path, dom=dom, all=all, recursive=recursive) |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
212 |
4 | 213 def getBibMapping(self, bibtype): |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
214 """returns MetaDataMapping for resource/meta/bib of bibtype""" |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
215 return self.resource.meta.bib.getMapping(bibtype) |
4 | 216 |
217 def getBibFields(self, bibdata): | |
218 """returns dict with metadata description for bibdata""" | |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
219 return self.resource.meta.bib.getMapFields(bibdata) |
4 | 220 |
221 def getBibMappedData(self, bibdata, allFields=False): | |
222 """returns dict with metadata descriptions and data for bibdata""" | |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
223 return self.resource.meta.bib.getMappedData(bibdata, allFields=allFields) |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
224 |
36 | 225 def getBibMappedDataJSON(self, bibdata=None, allFields=False): |
226 """returns dict with metadata descriptions and data for bibdata as JSON takes bidata as JSON""" | |
227 | |
228 import json | |
229 | |
230 | |
231 | |
232 if bibdata is None: | |
233 bibdata=self.REQUEST.form | |
234 | |
235 bibdata = json.loads(bibdata,'utf-8') | |
236 | |
237 return json.dumps(self.resource.meta.bib.getMappedData(bibdata, allFields=allFields)); | |
238 | |
239 | |
240 | |
29 | 241 def getDCDataFromPath(self,path): |
242 """returns DC mapped data from path to index.meta""" | |
243 return self.resource.meta.bib.getDCDataFromPath(path) | |
244 | |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
245 def getDCMappedData(self, bibdata): |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
246 """returns dict with DC keys and data from bibdata""" |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
247 return self.resource.meta.bib.getDCMappedData(bibdata) |
36 | 248 |
38 | 249 |
250 def getDCMappedDataJSON(self, bibdata): | |
251 """returns dict with DC keys and data from bibdata""" | |
252 import json | |
253 | |
254 if bibdata is None: | |
255 bibdata=self.REQUEST.form | |
256 | |
257 | |
258 bibdata = json.loads(bibdata,'utf-8') | |
259 | |
260 return json.dumps(self.resource.meta.bib.getDCMappedData(bibdata)); | |
261 | |
36 | 262 def getBibFormattedMetaDataJSON(self, path=None, dom=None, bibdata=None, bibxdata=None): |
263 """get the metadafrom server as json, takes bibdata json-formatted""" | |
264 import json | |
265 | |
266 logging.debug("getBibFormattedMetaData(path=%s)"%path) | |
267 | |
268 if bibdata is None: | |
269 bibdata=self.REQUEST.form | |
270 | |
271 | |
272 bibdata = json.loads(bibdata,'utf-8') | |
273 | |
274 return json.dumps(self.resource.meta.bib.getFormatted('metadata_template', path=path, dom=dom, data=bibdata, xdata=bibxdata)) | |
275 | |
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
276 def getBibFormattedMetaData(self, path=None, dom=None, bibdata=None, bibxdata=None): |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
277 """get the metadafrom server""" |
36 | 278 |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
279 logging.debug("getBibFormattedMetaData(path=%s)"%path) |
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
280 return self.resource.meta.bib.getFormatted('metadata_template', path=path, dom=dom, data=bibdata, xdata=bibxdata) |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
281 |
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
282 def getBibFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None, bibxdata=None): |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
283 """get the metadafrom server""" |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
284 logging.debug("getBibFormattedMetaDataExtended(path=%s)"%path) |
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
285 return self.resource.meta.bib.getFormatted('metadata_extended_template', path=path, dom=dom, data=bibdata, xdata=bibxdata, allFields=True) |
37 | 286 |
287 | |
288 def getBibFormattedLabelJSON(self, path=None, dom=None, bibdata=None, bibxdata=None): | |
289 """get the formatted metadafrom server as json, takes bibdata json-formatted""" | |
290 import json | |
291 | |
292 logging.debug("getBibFormattedMetaData(path=%s)"%path) | |
293 | |
294 if bibdata is None: | |
295 bibdata=self.REQUEST.form | |
296 | |
297 | |
298 bibdata = json.loads(bibdata,'utf-8') | |
299 | |
300 return json.dumps(self.resource.meta.bib.getFormatted('label_template', path=path, dom=dom, data=bibdata, xdata=bibxdata)) | |
4 | 301 |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
302 def getBibFormattedLabel(self,path=None, dom=None, bibdata=None): |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
303 """get the metadafrom server""" |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
304 logging.debug("getBibFormattedLabel(%s)"%path) |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
305 return self.resource.meta.bib.getFormatted('label_template', path=path, dom=dom, data=bibdata) |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
306 |
8 | 307 # compatibility |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
308 getFormattedMetaData = getBibFormattedMetaData |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
309 getFormattedMetaDataShort = getBibFormattedMetaData |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
310 getFormattedMetaDataExtended = getBibFormattedMetaDataExtended |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
311 getFormattedLabel = getBibFormattedLabel |
8 | 312 |
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
313 |
4 | 314 changeMetaDataFolderForm = PageTemplateFile('zpt/changeMetaDataFolder',globals()) |
315 | |
316 security.declarePublic('changeMetaDataFolder') | |
317 def changeMetaDataFolder(self,title,metaDataServerUrl,RESPONSE=None): | |
318 """Change MetaDataFolder""" | |
319 self.title = title | |
320 self.metaDataServerUrl=metaDataServerUrl | |
321 if RESPONSE is not None: | |
322 RESPONSE.redirect('manage_main') | |
323 | |
324 | |
9 | 325 def manage_addMetaDataFolderForm(self): |
326 """add MetaDataFolder form""" | |
327 pt = PageTemplateFile('zpt/addMetadataFolderForm',globals()).__of__(self) | |
328 return pt() | |
4 | 329 |
330 def manage_addMetaDataFolder(self,id,title,RESPONSE=None): | |
331 """a MetaDataFolder objekt""" | |
332 newObj=MetaDataFolder(id,title) | |
333 self.Destination()._setObject(id,newObj) | |
334 if RESPONSE is not None: | |
335 RESPONSE.redirect('manage_main') | |
336 |