Mercurial > hg > MetaDataProvider
comparison MetaData.py @ 6:00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
author | casties |
---|---|
date | Wed, 27 Jul 2011 21:08:01 +0200 |
parents | c1dbf78cc036 |
children | e959bc6bf2a7 |
comparison
equal
deleted
inserted
replaced
5:c1dbf78cc036 | 6:00147a1ab4ac |
---|---|
22 from OSAS_metadata import OSAS_Metadata, OSAS_MetadataMapping | 22 from OSAS_metadata import OSAS_Metadata, OSAS_MetadataMapping |
23 | 23 |
24 | 24 |
25 from SrvTxtUtils import getHttpData, getText | 25 from SrvTxtUtils import getHttpData, getText |
26 | 26 |
27 | 27 def normalizeFieldName(bt, underscore=True): |
28 # TODO: get rid of this | 28 """returns normalised field type for looking up mappings""" |
29 def getTextFromNode(nodelist): | 29 bt = bt.strip().replace(' ', '-').lower() |
30 """gibt text aus nodelist""" | 30 if underscore: |
31 rc = "" | 31 bt = bt.replace('_', '-') |
32 for node in nodelist: | 32 |
33 if node.nodeType == node.TEXT_NODE: | 33 return bt |
34 rc = rc + node.data | 34 |
35 return rc | 35 |
36 | 36 class MetaData(Folder): |
37 | |
38 def toString(list): | |
39 ret=u"" | |
40 | |
41 for l in list: | |
42 ret+=unicode(l) | |
43 | |
44 return ret | |
45 | |
46 def dcMetaDataToHash(mdSet): | |
47 """Convenience Function for creates a hash from the DCMetadataset | |
48 @param mdSet: String containing DCMetadata informmation | |
49 currently only in the format getDCMetadata of this module""" | |
50 | |
51 NSS = { | |
52 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', | |
53 'dc': 'http://dublincore.org/documents/dcmi-namespace/', | |
54 'owl':"http://www.w3.org/2002/07/owl#", | |
55 'rdfs':"http://www.w3.org/2000/01/rdf-schema#" | |
56 } | |
57 ret={} | |
58 import StringIO | |
59 import sys | |
60 buffer= StringIO.StringIO(mdSet) | |
61 try: | |
62 md = amara.parse(buffer,prefixes=NSS) | |
63 except: | |
64 logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1])) | |
65 | |
66 ret["error"]=mdSet | |
67 return ret | |
68 | |
69 ret["title"] = toString(md.xml_xpath("//dc:title/text()")) | |
70 ret["creator"] =toString(md.xml_xpath("//dc:creator/text()")) | |
71 ret["date"] = toString(md.xml_xpath("//dc:date/text()")) | |
72 | |
73 return ret | |
74 | |
75 | |
76 | |
77 | |
78 | |
79 class MetaData(OSAS_Metadata): | |
80 """provides basic methods for managing metadata structures""" | 37 """provides basic methods for managing metadata structures""" |
81 meta_type='MetaData' | 38 meta_type='MetaData' |
82 security=ClassSecurityInfo() | 39 security=ClassSecurityInfo() |
83 manage_options = Folder.manage_options+( | 40 manage_options = Folder.manage_options+( |
84 {'label':'Main Config','action':'changeMetaDataForm'}, | 41 {'label':'Main Config','action':'changeMetaDataForm'}, |
85 {'label':'Import XML Schema','action':'importMetaDataExportXML'}, | 42 {'label':'Import XML Schema','action':'importMetaDataExportXML'}, |
86 #{'label':'Select Fields for Display','action':'indicateDisplayFieldsForm'}, | 43 #{'label':'Select Fields for Display','action':'indicateDisplayFieldsForm'}, |
87 ) | 44 ) |
88 | 45 |
46 mappingSelectAttribute = 'type' | |
47 """the name of the attribute that can be used to select a mapping (if applicable)""" | |
48 | |
89 def __init__(self,id,shortDescription='',description='',fields=''): | 49 def __init__(self,id,shortDescription='',description='',fields=''): |
90 """initialize a new instance""" | 50 """initialize a new instance""" |
91 self.id = id | 51 self.id = id |
92 self.shortDescription =shortDescription #label fuer link auf add page | 52 self.shortDescription =shortDescription #label fuer link auf add page |
93 self.description=description #description of the method for link page | 53 self.description=description #description of the method for link page |
94 self.fieldList=fields.split(",")[0:] | 54 self.fieldList=fields.split(",")[0:] |
95 self.metaDataServerUrl="" # muss mit change metadata gesetzt werden | 55 #self.metaDataServerUrl="" # muss mit change metadata gesetzt werden |
96 | 56 |
97 | 57 |
58 def getFieldList(self): | |
59 """returns fieldList""" | |
60 return ','.join(self.fieldList) | |
61 | |
62 def getTagName(self): | |
63 """returns the tag name of this element""" | |
64 return self.shortDescription | |
65 | |
66 def getXmlPath(self, omitRoot=False): | |
67 """returns the xpath to this element""" | |
68 path = '/%s'%self.getTagName() | |
69 parent = self.aq_parent | |
70 if parent.meta_type == self.meta_type: | |
71 # add parent | |
72 path = parent.getXmlPath(omitRoot=omitRoot) + path | |
73 elif omitRoot: | |
74 return '' | |
75 | |
76 return path | |
77 | |
78 def getMapping(self, type): | |
79 """returns MetaDataMapping for type""" | |
80 # try type as id | |
81 mapping = getattr(self, type, None) | |
82 if mapping is None: | |
83 # try manually | |
84 mapFolder = self | |
85 for obj in mapFolder.objectValues(): | |
86 if obj.meta_type == "MetadataMapping": | |
87 # real type is in title | |
88 mapType = obj.title | |
89 if mapType == type: | |
90 # try type as is | |
91 return obj | |
92 | |
93 if normalizeFieldName(mapType, underscore=True) == normalizeFieldName(type, underscore=True): | |
94 # try normalized type without underscore | |
95 return obj | |
96 | |
97 return mapping | |
98 | |
99 def getMapFields(self, data): | |
100 """returns dict with metadata description for data""" | |
101 fields = {} | |
102 type = data['@type'] | |
103 # get mapping from main/meta/bib | |
104 mapping = self.getMapping(type) | |
105 if mapping is None: | |
106 logging.error("getMapFields: no mapping for type: %s"%type) | |
107 return fields | |
108 | |
109 # get field descriptions (copy so we can change it) | |
110 fields = mapping.getFields().copy() | |
111 # add field list | |
112 fields['@fieldList'] = mapping.getFieldList() | |
113 | |
114 return fields | |
115 | |
116 def getMappedData(self, data, allFields=False): | |
117 """returns dict with metadata descriptions and data for data""" | |
118 fields = self.getMapFields(data) | |
119 fieldList = fields['@fieldList'] | |
120 mappedData = {} | |
121 mappedList = [] | |
122 for bk in fieldList: | |
123 # ignore descriptions without data | |
124 if not data.get(bk, None): | |
125 continue | |
126 | |
127 # field description (copy so we can change it) | |
128 bf = fields[bk].copy() | |
129 # add value | |
130 bf['value'] = data[bk] | |
131 mappedData[bk] = bf | |
132 mappedList.append(bk) | |
133 | |
134 if allFields and len(mappedData) < len(data): | |
135 # add fields that were not in fields | |
136 for bk in data.keys(): | |
137 if bk in mappedData or not data[bk]: | |
138 continue | |
139 | |
140 mappedData[bk] = {'tag':bk, 'label':bk, 'value':data[bk]} | |
141 mappedList.append(bk) | |
142 | |
143 mappedData['@fieldList'] = mappedList | |
144 return mappedData | |
145 | |
146 def getDCMappedData(self, data, allFields=False): | |
147 """returns dict with DC keys and data form data""" | |
148 fields = self.getMapFields(data) | |
149 dcData = {} | |
150 for bk in fields.keys(): | |
151 # ignore descriptions without data | |
152 if not data.get(bk, None): | |
153 continue | |
154 | |
155 # field description | |
156 dc = fields[bk].get('dcmap', None) | |
157 if dc: | |
158 # add value | |
159 if dcData.get('dc',None): | |
160 # key exists - append | |
161 dcData[dc] += '/' + data[bk] | |
162 else: | |
163 dcData[dc] = data[bk] | |
164 | |
165 return dcData | |
166 | |
167 def getFormatted(self, template, path=None, dom=None, data=None, allFields=False): | |
168 """returns string with document data formatted according to template. | |
169 gets data from server or dom or pre-parsed data.""" | |
170 logging.debug("getFormatted(template=%s)"%(template)) | |
171 | |
172 if dom is None and data is None: | |
173 # get from server | |
174 md = self.getMDFromPathOrUrl(path.replace("/mpiwg/online","")) | |
175 dom = ET.fromstring(md) | |
176 | |
177 # get contents of bib tag | |
178 if data is None: | |
179 data = getDataFromDom(dom) | |
180 | |
181 type = data.get('@type', '') | |
182 | |
183 # get template | |
184 tp=getattr(self,"%s_%s"%(template, normalizeFieldName(type)), None) | |
185 if tp is None: | |
186 logging.warning("getFormatted: no template for: %s_%s"%(template, type)) | |
187 # try generic | |
188 tp=getattr(self,"%s_generic"%(template), None) | |
189 if tp is None: | |
190 logging.error("getFormatted: no generic template either: %s"%(template)) | |
191 return "" | |
192 | |
193 # put field descriptions in mdHash | |
194 fields = self.getMappedData(data, allFields=allFields) | |
195 | |
196 return tp(mdmap=fields, md=data) | |
197 | |
198 | |
199 def getDataFromDom(self, dom): | |
200 """returns dict with all elements from corresponding tag""" | |
201 info = {} | |
202 # ElementTree doesn't like absolute paths | |
203 # lets assume dom is rooted in the first element | |
204 xpath = '.' + self.getXmlPath(omitRoot=True) | |
205 logging.debug("getDataFromDom looking for %s in %s"%(xpath, dom)) | |
206 elem = dom.find(xpath) | |
207 if elem is not None: | |
208 # put type in @type | |
209 type = elem.get(self.mappingSelectAttribute, None) | |
210 if type is not None: | |
211 info['@type'] = normalizeFieldName(type) | |
212 | |
213 # put all subelements in dict | |
214 for e in elem: | |
215 info[normalizeFieldName(e.tag)] = getText(e) | |
216 | |
217 return info | |
218 | |
219 | |
220 | |
98 def correctPath(self,path,remove=None,prefix=None,cut=0): | 221 def correctPath(self,path,remove=None,prefix=None,cut=0): |
99 """convinience method um einen pfad zu veraendern""" | 222 """convinience method um einen pfad zu veraendern""" |
100 if remove is not None: | 223 if remove is not None: |
101 path=path.replace(remove,'') | 224 path=path.replace(remove,'') |
102 if prefix is not None: | 225 if prefix is not None: |
104 | 227 |
105 if cut>0: | 228 if cut>0: |
106 splitted=path.split("/") | 229 splitted=path.split("/") |
107 path="/".join(splitted[0:len(splitted)-cut]) | 230 path="/".join(splitted[0:len(splitted)-cut]) |
108 return path | 231 return path |
232 | |
109 | 233 |
110 def importMetaDataExportXML(self,importFile=None,RESPONSE=None): | 234 def importMetaDataExportXML(self,importFile=None,RESPONSE=None): |
111 """imports metadata from the metadataexportxml file""" | 235 """imports metadata from the metadataexportxml file""" |
112 | 236 |
113 if importFile is None: | 237 if importFile is None: |
166 logging.debug("createMappingFromDom: new metadata=%s"%repr(name)) | 290 logging.debug("createMappingFromDom: new metadata=%s"%repr(name)) |
167 metadata._setObject(name,MetaData(name,name)) | 291 metadata._setObject(name,MetaData(name,name)) |
168 mdObj=getattr(metadata,name) | 292 mdObj=getattr(metadata,name) |
169 mdObj.createMappingFromDom(mn) | 293 mdObj.createMappingFromDom(mn) |
170 | 294 |
171 def getMDFromPathOrUrl(self,path): | |
172 parsedurl = urlparse.urlparse(path) | |
173 if parsedurl[0] != "": | |
174 # has schema (e.g. http) | |
175 url=path | |
176 else: | |
177 # path only | |
178 if path.endswith("index.meta"): | |
179 url =self.metaDataServerUrl%path | |
180 else: | |
181 url=os.path.join(self.metaDataServerUrl%path,'index.meta') | |
182 | |
183 #logging.debug("get Metadata: %s"%url) | |
184 md = getHttpData(url) | |
185 return md | |
186 | |
187 def getDCFormatted(self,path): | |
188 """get the metadata as dc set""" | |
189 logging.debug("getDCFormatted(path=%s)"%path) | |
190 namespace={ 'mpiwg': "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"} | |
191 namespaceUsed=False | |
192 | |
193 md = self.getMDFromPathOrUrl(path) | |
194 logging.debug("MD in XML"+md) | |
195 im = amara.parse(md, prefixes=namespace) | |
196 | |
197 typePaths=im.xml_xpath('//bib/@type') | |
198 archimedes=False | |
199 | |
200 if len(typePaths)<1: | |
201 typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes | |
202 if len(typePaths)>0: | |
203 type = "archimedes" | |
204 archimedes=True | |
205 else: | |
206 typePaths=im.xml_xpath('//mpiwg:bib/@type') | |
207 if len(typePaths)<1: | |
208 return "" | |
209 else: | |
210 namespaceUsed=True | |
211 | |
212 type=unicode(typePaths[0]) | |
213 else: | |
214 type=unicode(typePaths[0]) | |
215 logging.info("got type:"+type) | |
216 try: | |
217 mapping=getattr(self.main.meta.bib,type.lower(),None) | |
218 except: | |
219 logging.error("getMetaDataFromServer no mapping for type: %s"%type) | |
220 return "" | |
221 | |
222 try: | |
223 dcMapping=getattr(self.main.meta.bib,"dc",None) | |
224 except: | |
225 logging.error("getMetaDataFromServer no dc in meta/bib") | |
226 return "" | |
227 | |
228 mds=mapping.generateMappingHash() # Hole das Mapping generisches Feld --> Feld im entsprechenden Typ | |
229 dcMds=dcMapping.generateMappingHash() | |
230 | |
231 mdHash=[] | |
232 logging.debug("Value: %s"%repr(mds)) | |
233 | |
234 for key,valueTriple in mds.items(): | |
235 value=valueTriple[0] | |
236 logging.debug("Value: %s"%repr(value)) | |
237 logging.debug("Key: %s"%repr(key)) | |
238 if value!="": | |
239 if not archimedes: | |
240 if namespaceUsed: | |
241 try: | |
242 v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value) | |
243 except: | |
244 logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value) | |
245 else: | |
246 v = im.xml_xpath('//bib/%s/text()'%value) | |
247 else: | |
248 v = im.xml_xpath('//archimedes/%s/text()'%value) | |
249 if len(v) > 0: | |
250 dc=dcMds[key][0] | |
251 | |
252 if (dc !="") and (value !=""): | |
253 logging.debug("%s--> : %s"%(repr(value),dc)) | |
254 mdHash.append([dc,unicode(v[0])]) | |
255 | |
256 ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """ | |
257 ret+="<dc:type>%s</dc:type>"%type | |
258 for md in mdHash: | |
259 | |
260 ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0]) | |
261 ret+="</bib>" | |
262 return ret | |
263 | |
264 | |
265 def getBibMapping(self, bibtype): | |
266 """returns MetaDataMapping for bibtype""" | |
267 # try type as id | |
268 mapping = getattr(self.main.meta.bib, bibtype, None) | |
269 if mapping is None: | |
270 # try manually | |
271 mapFolder = self.main.meta.bib | |
272 for obj in mapFolder.objectValues(): | |
273 if obj.meta_type == "MetadataMapping": | |
274 # real type is in title | |
275 mapType = obj.title | |
276 if mapType == bibtype: | |
277 # try type as is | |
278 return obj | |
279 | |
280 if normalizeBibField(mapType, underscore=True) == normalizeBibField(bibtype, underscore=True): | |
281 # try normalized type without underscore | |
282 return obj | |
283 | |
284 return mapping | |
285 | |
286 def getBibFields(self, bibdata): | |
287 """returns dict with metadata description for bibdata""" | |
288 bibfields = {} | |
289 bibtype = bibdata['@type'] | |
290 # get mapping from main/meta/bib | |
291 mapping = self.getBibMapping(bibtype) | |
292 if mapping is None: | |
293 logging.error("getBibFields: no mapping for type: %s"%bibtype) | |
294 return bibfields | |
295 | |
296 # get field descriptions (copy so we can change it) | |
297 bibfields = mapping.getFields().copy() | |
298 # add field list | |
299 bibfields['@fieldList'] = mapping.getFieldList() | |
300 | |
301 return bibfields | |
302 | |
303 def getBibMappedData(self, bibdata, allFields=False): | |
304 """returns dict with metadata descriptions and data for bibdata""" | |
305 bibfields = self.getBibFields(bibdata) | |
306 mappedData = {} | |
307 mappedList = [] | |
308 for bk in bibfields.keys(): | |
309 # ignore descriptions without data | |
310 if not bibdata.get(bk, None): | |
311 continue | |
312 | |
313 # field description (copy so we can change it) | |
314 bf = bibfields[bk].copy() | |
315 # add value | |
316 bf['value'] = bibdata[bk] | |
317 mappedData[bk] = bf | |
318 mappedList.append(bk) | |
319 | |
320 if allFields and len(mappedData) < len(bibdata): | |
321 # add fields that were not in bibfields | |
322 for bk in bibdata.keys(): | |
323 if bk in mappedData or not bibdata[bk]: | |
324 continue | |
325 | |
326 mappedData[bk] = {'tag':bk, 'label':bk, 'value':bibdata[bk]} | |
327 mappedList.append(bk) | |
328 | |
329 mappedData['@fieldList'] = mappedList | |
330 return mappedData | |
331 | |
332 def getFormatted(self, template, path=None, dom=None, bibdata=None, allFields=False): | |
333 """returns string with document data formatted according to template. | |
334 gets data from server or dom or pre-parsed bibdata.""" | |
335 logging.debug("getFormatted(template=%s)"%(template)) | |
336 | |
337 if dom is None and bibdata is None: | |
338 # get from server | |
339 md = self.getMDFromPathOrUrl(path.replace("/mpiwg/online","")) | |
340 #logging.debug("md:"+md) | |
341 #dom = amara.parse(md) | |
342 dom = ET.fromstring(md) | |
343 | |
344 # get contents of bib tag | |
345 if bibdata is None: | |
346 bibdata = getBibdataFromDom(dom) | |
347 | |
348 bibtype = bibdata['@type'] | |
349 | |
350 # get template | |
351 tp=getattr(self,"%s_%s"%(template, bibtype.lower()), None) | |
352 if tp is None: | |
353 logging.warning("getFormatted: no template for: %s_%s"%(template, bibtype)) | |
354 # try generic | |
355 tp=getattr(self,"%s_generic"%(template), None) | |
356 if tp is None: | |
357 logging.error("getFormatted: no generic template either: %s"%(template)) | |
358 return "" | |
359 | |
360 # put bib field descriptions in mdHash | |
361 bibfields = self.getBibMappedData(bibdata, allFields=allFields) | |
362 | |
363 return tp(mdmap=bibfields, md=bibdata) | |
364 | |
365 | |
366 def getFormattedMetaData(self, path=None, dom=None, bibdata=None): | |
367 """get the metadafrom server""" | |
368 logging.debug("getFormattedMetaData(path=%s)"%path) | |
369 return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata) | |
370 | |
371 def getFormattedMetaDataShort(self, path=None, dom=None, bibdata=None): | |
372 """get the metadafrom server""" | |
373 logging.debug("getFormattedMetaDataShort(path=%s)"%path) | |
374 return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata) | |
375 | |
376 def getFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None): | |
377 """get the metadafrom server""" | |
378 logging.debug("getFormattedMetaDataExtended(path=%s)"%path) | |
379 return self.getFormatted('metadata_extended_template', path=path, dom=dom, bibdata=bibdata, allFields=True) | |
380 | |
381 def getFormattedLabel(self,path=None, dom=None, bibdata=None): | |
382 """get the metadafrom server""" | |
383 logging.debug("getFormattedLabel(%s)"%path) | |
384 return self.getFormatted('label_template', path=path, dom=dom, bibdata=bibdata) | |
385 | |
386 def getFormattedMetaDataShortFromServer(self,path): | |
387 """get the metadafrom server""" | |
388 logging.debug("getFormattedMetaDataShortFromServer(path=%s)"%path) | |
389 return self.getFormatted('metadata_template', path) | |
390 | |
391 def getFormattedMetaDataExtendedFromServer(self,path): | |
392 """get the metadafrom server""" | |
393 logging.debug("getFormattedMetaDataExtendedFromServer(path=%s)"%path) | |
394 return self.getFormatted('metadata_extended_template', path=path, allFields=True) | |
395 | |
396 def getFormattedLabelFromServer(self,path): | |
397 """get the metadafrom server""" | |
398 logging.debug("getFormattedLabelFromServer(%s)"%path) | |
399 return self.getFormatted('label_template', path) | |
400 | |
401 | 295 |
402 security.declarePublic('changeMetaDataForm') | 296 security.declarePublic('changeMetaDataForm') |
403 def changeMetaDataForm(self): | 297 def changeMetaDataForm(self): |
404 """Main configuration""" | 298 """Main configuration""" |
405 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeMetaData.zpt')).__of__(self) | 299 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeMetadata.zpt')).__of__(self) |
406 return pt() | 300 return pt() |
407 | 301 |
408 security.declarePublic('changeMetaData') | 302 security.declarePublic('changeMetaData') |
409 def changeMetaData(self,shortDescription,description,fields,metaDataServerUrl,RESPONSE=None): | 303 def changeMetaData(self,shortDescription,description,fields,metaDataServerUrl,RESPONSE=None): |
410 """Change Metadata""" | 304 """Change Metadata""" |
416 RESPONSE.redirect('manage_main') | 310 RESPONSE.redirect('manage_main') |
417 | 311 |
418 | 312 |
419 def manage_addMetaDataForm(self): | 313 def manage_addMetaDataForm(self): |
420 """interface for adding the OSAS_add_Metadata""" | 314 """interface for adding the OSAS_add_Metadata""" |
421 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMetaDataForm.zpt')).__of__(self) | 315 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMetadataForm.zpt')).__of__(self) |
422 return pt() | 316 return pt() |
423 | 317 |
424 def manage_addMetaData(self,id,shortDescription,description,fields,RESPONSE=None): | 318 def manage_addMetaData(self,id,shortDescription,description,fields,RESPONSE=None): |
425 """a metadata objekt""" | 319 """a metadata objekt""" |
426 newObj=MetaData(id,shortDescription,description,fields) | 320 newObj=MetaData(id,shortDescription,description,fields) |