Mercurial > hg > MetaDataProvider
annotate MetaDataFolder.py @ 33:1f845c76dad3
all getXXX take recursive and all parameters.
author | casties |
---|---|
date | Tue, 18 Dec 2012 19:25:02 +0100 |
parents | ab58edfc0707 |
children | 559907a4d538 |
rev | line source |
---|---|
4 | 1 from OFS.Folder import Folder |
2 from Products.PageTemplates.PageTemplateFile import PageTemplateFile | |
3 from Globals import package_home | |
4 from AccessControl import ClassSecurityInfo | |
5 import os.path | |
9 | 6 import urlparse |
4 | 7 import logging |
8 | |
12 | 9 import xml.etree.ElementTree as ET |
10 | |
4 | 11 from MetaDataMapping import MetaDataMapping |
12 from MetaData import MetaData | |
12 | 13 from SrvTxtUtils import getHttpData, getText |
4 | 14 |
15 def normalizeBibField(bt, underscore=True): | |
16 """returns normalised bib type for looking up mappings""" | |
17 bt = bt.strip().replace(' ', '-').lower() | |
18 if underscore: | |
19 bt = bt.replace('_', '-') | |
20 | |
21 return bt | |
22 | |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
23 def toString(list): |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
24 ret=u"" |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
25 |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
26 for l in list: |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
27 ret+=unicode(l) |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
28 |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
29 return ret |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
30 |
4 | 31 class MetaDataFolder(Folder): |
32 """provides methods for managing complete metadata structures""" | |
33 meta_type='MetaDataFolder' | |
34 security=ClassSecurityInfo() | |
35 manage_options = Folder.manage_options+( | |
5 | 36 {'label':'Main Config','action':'changeMetaDataFolderForm'}, |
4 | 37 ) |
38 | |
12 | 39 metaDataServerUrl = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=%s" |
40 """URL of metadata server. %s replaced by file path.""" | |
41 | |
42 def __init__(self,id,title='',metaDataServerUrl=None): | |
4 | 43 """initialize a new instance""" |
44 self.id = id | |
45 self.title = title | |
12 | 46 if metaDataServerUrl: |
47 self.metaDataServerUrl = metaDataServerUrl | |
4 | 48 |
49 | |
50 def getMDFromPathOrUrl(self,path): | |
10 | 51 """returns contents of metadata file from server as text""" |
52 if not path: | |
53 logging.error("getMDFromPathOrUrl: empty path!") | |
12 | 54 return None |
10 | 55 |
4 | 56 parsedurl = urlparse.urlparse(path) |
57 if parsedurl[0] != "": | |
58 # has schema (e.g. http) | |
59 url=path | |
60 else: | |
61 # path only | |
8 | 62 path = path.replace('/mpiwg/online/', '') |
4 | 63 if path.endswith("index.meta"): |
8 | 64 url = self.metaDataServerUrl%path |
4 | 65 else: |
8 | 66 url = os.path.join(self.metaDataServerUrl%path,'index.meta') |
4 | 67 |
23
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
68 try: |
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
69 md = getHttpData(url) |
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
70 return md |
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
71 |
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
72 except: |
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
73 logging.error("getMDFromPathOrUrl: unable to get data!") |
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
74 |
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
75 return None |
4 | 76 |
12 | 77 def getDomFromPathOrUrl(self, path): |
78 """returns DOM of metadata file at given path""" | |
79 dom = None | |
80 data = self.getMDFromPathOrUrl(path) | |
81 if data: | |
82 dom = ET.fromstring(data) | |
83 | |
84 return dom | |
85 | |
14 | 86 def getXmlPathObj(self, xmlpath): |
87 """returns object at xmlpath""" | |
9 | 88 # make xmlpath relative for Zope |
89 if xmlpath[0] == '/': | |
90 xmlpath = xmlpath[1:] | |
91 | |
14 | 92 obj = self.restrictedTraverse(xmlpath, None) |
93 return obj | |
94 | |
17 | 95 def getXmlPathData(self, xmlpath, path=None, dom=None, recursive=0, all=False, allText=False): |
31 | 96 """returns contents of element at xmlpath as dict. |
97 | |
98 @param xmlpath: xml path to selected elements | |
99 @param path: file or url path to metadata file | |
100 @param dom: dom of metadata | |
101 @param all: put contents of tags with the same name in list value | |
102 @param recursive: number of recursions. 0=just children | |
103 @param allText: get text content of all subelements | |
104 @returns: dict with attributes (key=@attr) and child elements (key=tag) | |
105 """ | |
14 | 106 logging.error("getXmlPathData(%s)"%xmlpath) |
107 mdObj = self.getXmlPathObj(xmlpath) | |
9 | 108 if mdObj is not None: |
17 | 109 return mdObj.getData(path=path, dom=dom, recursive=recursive, all=all, allText=allText) |
9 | 110 else: |
27 | 111 logging.error("getXmlPathData: MetaData object for '%s' not found!"%xmlpath) |
9 | 112 return None |
113 | |
17 | 114 def getXmlPathFormatted(self, xmlpath, template, path=None, dom=None, data=None, allFields=False, recursive=0, all=False, allText=False): |
31 | 115 """returns contents of element at xmlpath as dict |
116 | |
117 @param xmlpath: xml path to selected elements | |
118 @param template: name of template for data | |
119 @param path: file or url path to metadata file | |
120 @param dom: dom of metadata | |
121 @param all: put contents of tags with the same name in list value | |
122 @param recursive: number of recursions. 0=just children | |
123 @param allText: get text content of all subelements | |
124 @returns: dict with attributes (key=@attr) and child elements (key=tag) | |
125 """ | |
14 | 126 logging.error("getXmlPathFormatted(xmlpath=%s, template=%s)"%(xmlpath,template)) |
127 mdObj = self.getXmlPathObj(xmlpath) | |
128 if mdObj is not None: | |
129 if data is None: | |
17 | 130 data = mdObj.getData(path=path, dom=dom, recursive=recursive, all=all, allText=allText) |
14 | 131 |
132 if data is None: | |
133 return '' | |
134 | |
135 fmt = '' | |
15 | 136 if all: |
14 | 137 # data is list of elements |
138 for d in data: | |
139 # concatenate formatted strings | |
140 fmt += mdObj.getFormatted(template, path=path, dom=dom, data=d, allFields=allFields) + '\n' | |
141 | |
142 else: | |
143 fmt = mdObj.getFormatted(template, path=path, dom=dom, data=data, allFields=allFields) | |
144 | |
145 return fmt | |
146 | |
147 else: | |
27 | 148 logging.error("getXmlPathFormatted: MetaData object for '%s' not found!"%xmlpath) |
14 | 149 return '' |
150 | |
33 | 151 def getResourceData(self, path=None, dom=None, recursive=0, all=False): |
12 | 152 """returns contents of resource tag as dict""" |
33 | 153 return self.getXmlPathData('resource', path=path, dom=dom, recursive=recursive, all=all) |
12 | 154 |
31 | 155 def getTexttoolData(self, path=None, dom=None, recursive=0, all=False): |
8 | 156 """returns contents of texttool tag as dict""" |
31 | 157 return self.getXmlPathData('resource/meta/texttool', path=path, dom=dom, recursive=recursive, all=all) |
8 | 158 |
33 | 159 def getAccessData(self, path=None, dom=None, recursive=0, all=False): |
12 | 160 """returns contents of access tag as dict""" |
33 | 161 return self.getXmlPathData('resource/meta/access-conditions/access', path=path, dom=dom, recursive=recursive, all=all) |
12 | 162 |
33 | 163 def getAttributionData(self, path=None, dom=None, recursive=0, all=True): |
14 | 164 """returns contents of attribution tag as dict""" |
33 | 165 return self.getXmlPathData('resource/meta/access-conditions/attribution', path=path, dom=dom, recursive=recursive, all=all) |
14 | 166 |
33 | 167 def getAttributionFormatted(self, template, path=None, dom=None, data=None, recursive=0, all=True): |
14 | 168 """returns formatted contents of access tag""" |
33 | 169 return self.getXmlPathFormatted('resource/meta/access-conditions/attribution', template, path=path, dom=dom, data=data, recursive=recursive, all=all) |
14 | 170 |
17 | 171 def getCopyrightData(self, path=None, dom=None, recursive=1, all=True): |
14 | 172 """returns contents of copyright tag as dict""" |
18 | 173 # information is two tags deep - recursive=1 |
17 | 174 return self.getXmlPathData('resource/meta/access-conditions/copyright', path=path, dom=dom, recursive=recursive, all=all) |
14 | 175 |
18 | 176 def getCopyrightFormatted(self, template, path=None, dom=None, data=None, recursive=1, all=True): |
14 | 177 """returns formatted contents of access tag""" |
18 | 178 # information is two tags deep - recursive=1 |
179 return self.getXmlPathFormatted('resource/meta/access-conditions/copyright', template, path=path, dom=dom, data=data, recursive=recursive, all=all) | |
14 | 180 |
33 | 181 def getContextData(self, path=None, dom=None, recursive=0, all=True): |
20 | 182 """returns contents of context tag as dict""" |
183 # information is two tags deep - recursive=1 | |
33 | 184 return self.getXmlPathData('resource/meta/context', path=path, dom=dom, recursive=recursive, all=all) |
20 | 185 |
186 | |
26 | 187 def getDRI(self, path=None, dom=None, type="escidoc"): |
24 | 188 """returns the DRI of the document""" |
26 | 189 dris = self.getXmlPathData('resource/meta/dri', path=path, dom=dom, all=True) |
27 | 190 #logging.debug("getDRI: dris=%s"%repr(dris)) |
26 | 191 if dris is None: |
192 return None | |
193 | |
194 for dri in dris: | |
27 | 195 #logging.debug("getDRI: dri=%s"%dri) |
196 if type is None: | |
197 # no type -- take the first one | |
198 return dri.get('@text', None) | |
199 | |
200 # use element with matching @type | |
26 | 201 att = dri.get('@attr', None) |
202 if att is not None: | |
203 if (att.get('type', None) == type): | |
204 return dri.get('@text', None) | |
205 | |
206 return None | |
27 | 207 |
24 | 208 |
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
209 def getBibData(self, path=None, dom=None, all=False, recursive=0): |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
210 """returns contents of bib tag as dict""" |
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
211 return self.resource.meta.bib.getData(path=path, dom=dom, all=all, recursive=recursive) |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
212 |
4 | 213 def getBibMapping(self, bibtype): |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
214 """returns MetaDataMapping for resource/meta/bib of bibtype""" |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
215 return self.resource.meta.bib.getMapping(bibtype) |
4 | 216 |
217 def getBibFields(self, bibdata): | |
218 """returns dict with metadata description for bibdata""" | |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
219 return self.resource.meta.bib.getMapFields(bibdata) |
4 | 220 |
221 def getBibMappedData(self, bibdata, allFields=False): | |
222 """returns dict with metadata descriptions and data for bibdata""" | |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
223 return self.resource.meta.bib.getMappedData(bibdata, allFields=allFields) |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
224 |
29 | 225 def getDCDataFromPath(self,path): |
226 """returns DC mapped data from path to index.meta""" | |
227 return self.resource.meta.bib.getDCDataFromPath(path) | |
228 | |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
229 def getDCMappedData(self, bibdata): |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
230 """returns dict with DC keys and data from bibdata""" |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
231 return self.resource.meta.bib.getDCMappedData(bibdata) |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
232 |
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
233 def getBibFormattedMetaData(self, path=None, dom=None, bibdata=None, bibxdata=None): |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
234 """get the metadafrom server""" |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
235 logging.debug("getBibFormattedMetaData(path=%s)"%path) |
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
236 return self.resource.meta.bib.getFormatted('metadata_template', path=path, dom=dom, data=bibdata, xdata=bibxdata) |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
237 |
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
238 def getBibFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None, bibxdata=None): |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
239 """get the metadafrom server""" |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
240 logging.debug("getBibFormattedMetaDataExtended(path=%s)"%path) |
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
241 return self.resource.meta.bib.getFormatted('metadata_extended_template', path=path, dom=dom, data=bibdata, xdata=bibxdata, allFields=True) |
4 | 242 |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
243 def getBibFormattedLabel(self,path=None, dom=None, bibdata=None): |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
244 """get the metadafrom server""" |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
245 logging.debug("getBibFormattedLabel(%s)"%path) |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
246 return self.resource.meta.bib.getFormatted('label_template', path=path, dom=dom, data=bibdata) |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
247 |
8 | 248 # compatibility |
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
249 getFormattedMetaData = getBibFormattedMetaData |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
250 getFormattedMetaDataShort = getBibFormattedMetaData |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
251 getFormattedMetaDataExtended = getBibFormattedMetaDataExtended |
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
252 getFormattedLabel = getBibFormattedLabel |
8 | 253 |
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
254 |
4 | 255 changeMetaDataFolderForm = PageTemplateFile('zpt/changeMetaDataFolder',globals()) |
256 | |
257 security.declarePublic('changeMetaDataFolder') | |
258 def changeMetaDataFolder(self,title,metaDataServerUrl,RESPONSE=None): | |
259 """Change MetaDataFolder""" | |
260 self.title = title | |
261 self.metaDataServerUrl=metaDataServerUrl | |
262 if RESPONSE is not None: | |
263 RESPONSE.redirect('manage_main') | |
264 | |
265 | |
9 | 266 def manage_addMetaDataFolderForm(self): |
267 """add MetaDataFolder form""" | |
268 pt = PageTemplateFile('zpt/addMetadataFolderForm',globals()).__of__(self) | |
269 return pt() | |
4 | 270 |
271 def manage_addMetaDataFolder(self,id,title,RESPONSE=None): | |
272 """a MetaDataFolder objekt""" | |
273 newObj=MetaDataFolder(id,title) | |
274 self.Destination()._setObject(id,newObj) | |
275 if RESPONSE is not None: | |
276 RESPONSE.redirect('manage_main') | |
277 |