comparison MetaData.py @ 6:00147a1ab4ac

division between MetaDataFolder and Metadata looks good now (to me :-)
author casties
date Wed, 27 Jul 2011 21:08:01 +0200
parents c1dbf78cc036
children e959bc6bf2a7
comparison
equal deleted inserted replaced
5:c1dbf78cc036 6:00147a1ab4ac
22 from OSAS_metadata import OSAS_Metadata, OSAS_MetadataMapping 22 from OSAS_metadata import OSAS_Metadata, OSAS_MetadataMapping
23 23
24 24
25 from SrvTxtUtils import getHttpData, getText 25 from SrvTxtUtils import getHttpData, getText
26 26
27 27 def normalizeFieldName(bt, underscore=True):
28 # TODO: get rid of this 28 """returns normalised field type for looking up mappings"""
29 def getTextFromNode(nodelist): 29 bt = bt.strip().replace(' ', '-').lower()
30 """gibt text aus nodelist""" 30 if underscore:
31 rc = "" 31 bt = bt.replace('_', '-')
32 for node in nodelist: 32
33 if node.nodeType == node.TEXT_NODE: 33 return bt
34 rc = rc + node.data 34
35 return rc 35
36 36 class MetaData(Folder):
37
38 def toString(list):
39 ret=u""
40
41 for l in list:
42 ret+=unicode(l)
43
44 return ret
45
46 def dcMetaDataToHash(mdSet):
47 """Convenience Function for creates a hash from the DCMetadataset
48 @param mdSet: String containing DCMetadata informmation
49 currently only in the format getDCMetadata of this module"""
50
51 NSS = {
52 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
53 'dc': 'http://dublincore.org/documents/dcmi-namespace/',
54 'owl':"http://www.w3.org/2002/07/owl#",
55 'rdfs':"http://www.w3.org/2000/01/rdf-schema#"
56 }
57 ret={}
58 import StringIO
59 import sys
60 buffer= StringIO.StringIO(mdSet)
61 try:
62 md = amara.parse(buffer,prefixes=NSS)
63 except:
64 logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1]))
65
66 ret["error"]=mdSet
67 return ret
68
69 ret["title"] = toString(md.xml_xpath("//dc:title/text()"))
70 ret["creator"] =toString(md.xml_xpath("//dc:creator/text()"))
71 ret["date"] = toString(md.xml_xpath("//dc:date/text()"))
72
73 return ret
74
75
76
77
78
79 class MetaData(OSAS_Metadata):
80 """provides basic methods for managing metadata structures""" 37 """provides basic methods for managing metadata structures"""
81 meta_type='MetaData' 38 meta_type='MetaData'
82 security=ClassSecurityInfo() 39 security=ClassSecurityInfo()
83 manage_options = Folder.manage_options+( 40 manage_options = Folder.manage_options+(
84 {'label':'Main Config','action':'changeMetaDataForm'}, 41 {'label':'Main Config','action':'changeMetaDataForm'},
85 {'label':'Import XML Schema','action':'importMetaDataExportXML'}, 42 {'label':'Import XML Schema','action':'importMetaDataExportXML'},
86 #{'label':'Select Fields for Display','action':'indicateDisplayFieldsForm'}, 43 #{'label':'Select Fields for Display','action':'indicateDisplayFieldsForm'},
87 ) 44 )
88 45
46 mappingSelectAttribute = 'type'
47 """the name of the attribute that can be used to select a mapping (if applicable)"""
48
89 def __init__(self,id,shortDescription='',description='',fields=''): 49 def __init__(self,id,shortDescription='',description='',fields=''):
90 """initialize a new instance""" 50 """initialize a new instance"""
91 self.id = id 51 self.id = id
92 self.shortDescription =shortDescription #label fuer link auf add page 52 self.shortDescription =shortDescription #label fuer link auf add page
93 self.description=description #description of the method for link page 53 self.description=description #description of the method for link page
94 self.fieldList=fields.split(",")[0:] 54 self.fieldList=fields.split(",")[0:]
95 self.metaDataServerUrl="" # muss mit change metadata gesetzt werden 55 #self.metaDataServerUrl="" # muss mit change metadata gesetzt werden
96 56
97 57
58 def getFieldList(self):
59 """returns fieldList"""
60 return ','.join(self.fieldList)
61
62 def getTagName(self):
63 """returns the tag name of this element"""
64 return self.shortDescription
65
66 def getXmlPath(self, omitRoot=False):
67 """returns the xpath to this element"""
68 path = '/%s'%self.getTagName()
69 parent = self.aq_parent
70 if parent.meta_type == self.meta_type:
71 # add parent
72 path = parent.getXmlPath(omitRoot=omitRoot) + path
73 elif omitRoot:
74 return ''
75
76 return path
77
78 def getMapping(self, type):
79 """returns MetaDataMapping for type"""
80 # try type as id
81 mapping = getattr(self, type, None)
82 if mapping is None:
83 # try manually
84 mapFolder = self
85 for obj in mapFolder.objectValues():
86 if obj.meta_type == "MetadataMapping":
87 # real type is in title
88 mapType = obj.title
89 if mapType == type:
90 # try type as is
91 return obj
92
93 if normalizeFieldName(mapType, underscore=True) == normalizeFieldName(type, underscore=True):
94 # try normalized type without underscore
95 return obj
96
97 return mapping
98
99 def getMapFields(self, data):
100 """returns dict with metadata description for data"""
101 fields = {}
102 type = data['@type']
103 # get mapping from main/meta/bib
104 mapping = self.getMapping(type)
105 if mapping is None:
106 logging.error("getMapFields: no mapping for type: %s"%type)
107 return fields
108
109 # get field descriptions (copy so we can change it)
110 fields = mapping.getFields().copy()
111 # add field list
112 fields['@fieldList'] = mapping.getFieldList()
113
114 return fields
115
116 def getMappedData(self, data, allFields=False):
117 """returns dict with metadata descriptions and data for data"""
118 fields = self.getMapFields(data)
119 fieldList = fields['@fieldList']
120 mappedData = {}
121 mappedList = []
122 for bk in fieldList:
123 # ignore descriptions without data
124 if not data.get(bk, None):
125 continue
126
127 # field description (copy so we can change it)
128 bf = fields[bk].copy()
129 # add value
130 bf['value'] = data[bk]
131 mappedData[bk] = bf
132 mappedList.append(bk)
133
134 if allFields and len(mappedData) < len(data):
135 # add fields that were not in fields
136 for bk in data.keys():
137 if bk in mappedData or not data[bk]:
138 continue
139
140 mappedData[bk] = {'tag':bk, 'label':bk, 'value':data[bk]}
141 mappedList.append(bk)
142
143 mappedData['@fieldList'] = mappedList
144 return mappedData
145
146 def getDCMappedData(self, data, allFields=False):
147 """returns dict with DC keys and data form data"""
148 fields = self.getMapFields(data)
149 dcData = {}
150 for bk in fields.keys():
151 # ignore descriptions without data
152 if not data.get(bk, None):
153 continue
154
155 # field description
156 dc = fields[bk].get('dcmap', None)
157 if dc:
158 # add value
159 if dcData.get('dc',None):
160 # key exists - append
161 dcData[dc] += '/' + data[bk]
162 else:
163 dcData[dc] = data[bk]
164
165 return dcData
166
167 def getFormatted(self, template, path=None, dom=None, data=None, allFields=False):
168 """returns string with document data formatted according to template.
169 gets data from server or dom or pre-parsed data."""
170 logging.debug("getFormatted(template=%s)"%(template))
171
172 if dom is None and data is None:
173 # get from server
174 md = self.getMDFromPathOrUrl(path.replace("/mpiwg/online",""))
175 dom = ET.fromstring(md)
176
177 # get contents of bib tag
178 if data is None:
179 data = getDataFromDom(dom)
180
181 type = data.get('@type', '')
182
183 # get template
184 tp=getattr(self,"%s_%s"%(template, normalizeFieldName(type)), None)
185 if tp is None:
186 logging.warning("getFormatted: no template for: %s_%s"%(template, type))
187 # try generic
188 tp=getattr(self,"%s_generic"%(template), None)
189 if tp is None:
190 logging.error("getFormatted: no generic template either: %s"%(template))
191 return ""
192
193 # put field descriptions in mdHash
194 fields = self.getMappedData(data, allFields=allFields)
195
196 return tp(mdmap=fields, md=data)
197
198
199 def getDataFromDom(self, dom):
200 """returns dict with all elements from corresponding tag"""
201 info = {}
202 # ElementTree doesn't like absolute paths
203 # lets assume dom is rooted in the first element
204 xpath = '.' + self.getXmlPath(omitRoot=True)
205 logging.debug("getDataFromDom looking for %s in %s"%(xpath, dom))
206 elem = dom.find(xpath)
207 if elem is not None:
208 # put type in @type
209 type = elem.get(self.mappingSelectAttribute, None)
210 if type is not None:
211 info['@type'] = normalizeFieldName(type)
212
213 # put all subelements in dict
214 for e in elem:
215 info[normalizeFieldName(e.tag)] = getText(e)
216
217 return info
218
219
220
98 def correctPath(self,path,remove=None,prefix=None,cut=0): 221 def correctPath(self,path,remove=None,prefix=None,cut=0):
99 """convinience method um einen pfad zu veraendern""" 222 """convinience method um einen pfad zu veraendern"""
100 if remove is not None: 223 if remove is not None:
101 path=path.replace(remove,'') 224 path=path.replace(remove,'')
102 if prefix is not None: 225 if prefix is not None:
104 227
105 if cut>0: 228 if cut>0:
106 splitted=path.split("/") 229 splitted=path.split("/")
107 path="/".join(splitted[0:len(splitted)-cut]) 230 path="/".join(splitted[0:len(splitted)-cut])
108 return path 231 return path
232
109 233
110 def importMetaDataExportXML(self,importFile=None,RESPONSE=None): 234 def importMetaDataExportXML(self,importFile=None,RESPONSE=None):
111 """imports metadata from the metadataexportxml file""" 235 """imports metadata from the metadataexportxml file"""
112 236
113 if importFile is None: 237 if importFile is None:
166 logging.debug("createMappingFromDom: new metadata=%s"%repr(name)) 290 logging.debug("createMappingFromDom: new metadata=%s"%repr(name))
167 metadata._setObject(name,MetaData(name,name)) 291 metadata._setObject(name,MetaData(name,name))
168 mdObj=getattr(metadata,name) 292 mdObj=getattr(metadata,name)
169 mdObj.createMappingFromDom(mn) 293 mdObj.createMappingFromDom(mn)
170 294
171 def getMDFromPathOrUrl(self,path):
172 parsedurl = urlparse.urlparse(path)
173 if parsedurl[0] != "":
174 # has schema (e.g. http)
175 url=path
176 else:
177 # path only
178 if path.endswith("index.meta"):
179 url =self.metaDataServerUrl%path
180 else:
181 url=os.path.join(self.metaDataServerUrl%path,'index.meta')
182
183 #logging.debug("get Metadata: %s"%url)
184 md = getHttpData(url)
185 return md
186
187 def getDCFormatted(self,path):
188 """get the metadata as dc set"""
189 logging.debug("getDCFormatted(path=%s)"%path)
190 namespace={ 'mpiwg': "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"}
191 namespaceUsed=False
192
193 md = self.getMDFromPathOrUrl(path)
194 logging.debug("MD in XML"+md)
195 im = amara.parse(md, prefixes=namespace)
196
197 typePaths=im.xml_xpath('//bib/@type')
198 archimedes=False
199
200 if len(typePaths)<1:
201 typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes
202 if len(typePaths)>0:
203 type = "archimedes"
204 archimedes=True
205 else:
206 typePaths=im.xml_xpath('//mpiwg:bib/@type')
207 if len(typePaths)<1:
208 return ""
209 else:
210 namespaceUsed=True
211
212 type=unicode(typePaths[0])
213 else:
214 type=unicode(typePaths[0])
215 logging.info("got type:"+type)
216 try:
217 mapping=getattr(self.main.meta.bib,type.lower(),None)
218 except:
219 logging.error("getMetaDataFromServer no mapping for type: %s"%type)
220 return ""
221
222 try:
223 dcMapping=getattr(self.main.meta.bib,"dc",None)
224 except:
225 logging.error("getMetaDataFromServer no dc in meta/bib")
226 return ""
227
228 mds=mapping.generateMappingHash() # Hole das Mapping generisches Feld --> Feld im entsprechenden Typ
229 dcMds=dcMapping.generateMappingHash()
230
231 mdHash=[]
232 logging.debug("Value: %s"%repr(mds))
233
234 for key,valueTriple in mds.items():
235 value=valueTriple[0]
236 logging.debug("Value: %s"%repr(value))
237 logging.debug("Key: %s"%repr(key))
238 if value!="":
239 if not archimedes:
240 if namespaceUsed:
241 try:
242 v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value)
243 except:
244 logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value)
245 else:
246 v = im.xml_xpath('//bib/%s/text()'%value)
247 else:
248 v = im.xml_xpath('//archimedes/%s/text()'%value)
249 if len(v) > 0:
250 dc=dcMds[key][0]
251
252 if (dc !="") and (value !=""):
253 logging.debug("%s--> : %s"%(repr(value),dc))
254 mdHash.append([dc,unicode(v[0])])
255
256 ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """
257 ret+="<dc:type>%s</dc:type>"%type
258 for md in mdHash:
259
260 ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0])
261 ret+="</bib>"
262 return ret
263
264
265 def getBibMapping(self, bibtype):
266 """returns MetaDataMapping for bibtype"""
267 # try type as id
268 mapping = getattr(self.main.meta.bib, bibtype, None)
269 if mapping is None:
270 # try manually
271 mapFolder = self.main.meta.bib
272 for obj in mapFolder.objectValues():
273 if obj.meta_type == "MetadataMapping":
274 # real type is in title
275 mapType = obj.title
276 if mapType == bibtype:
277 # try type as is
278 return obj
279
280 if normalizeBibField(mapType, underscore=True) == normalizeBibField(bibtype, underscore=True):
281 # try normalized type without underscore
282 return obj
283
284 return mapping
285
286 def getBibFields(self, bibdata):
287 """returns dict with metadata description for bibdata"""
288 bibfields = {}
289 bibtype = bibdata['@type']
290 # get mapping from main/meta/bib
291 mapping = self.getBibMapping(bibtype)
292 if mapping is None:
293 logging.error("getBibFields: no mapping for type: %s"%bibtype)
294 return bibfields
295
296 # get field descriptions (copy so we can change it)
297 bibfields = mapping.getFields().copy()
298 # add field list
299 bibfields['@fieldList'] = mapping.getFieldList()
300
301 return bibfields
302
303 def getBibMappedData(self, bibdata, allFields=False):
304 """returns dict with metadata descriptions and data for bibdata"""
305 bibfields = self.getBibFields(bibdata)
306 mappedData = {}
307 mappedList = []
308 for bk in bibfields.keys():
309 # ignore descriptions without data
310 if not bibdata.get(bk, None):
311 continue
312
313 # field description (copy so we can change it)
314 bf = bibfields[bk].copy()
315 # add value
316 bf['value'] = bibdata[bk]
317 mappedData[bk] = bf
318 mappedList.append(bk)
319
320 if allFields and len(mappedData) < len(bibdata):
321 # add fields that were not in bibfields
322 for bk in bibdata.keys():
323 if bk in mappedData or not bibdata[bk]:
324 continue
325
326 mappedData[bk] = {'tag':bk, 'label':bk, 'value':bibdata[bk]}
327 mappedList.append(bk)
328
329 mappedData['@fieldList'] = mappedList
330 return mappedData
331
332 def getFormatted(self, template, path=None, dom=None, bibdata=None, allFields=False):
333 """returns string with document data formatted according to template.
334 gets data from server or dom or pre-parsed bibdata."""
335 logging.debug("getFormatted(template=%s)"%(template))
336
337 if dom is None and bibdata is None:
338 # get from server
339 md = self.getMDFromPathOrUrl(path.replace("/mpiwg/online",""))
340 #logging.debug("md:"+md)
341 #dom = amara.parse(md)
342 dom = ET.fromstring(md)
343
344 # get contents of bib tag
345 if bibdata is None:
346 bibdata = getBibdataFromDom(dom)
347
348 bibtype = bibdata['@type']
349
350 # get template
351 tp=getattr(self,"%s_%s"%(template, bibtype.lower()), None)
352 if tp is None:
353 logging.warning("getFormatted: no template for: %s_%s"%(template, bibtype))
354 # try generic
355 tp=getattr(self,"%s_generic"%(template), None)
356 if tp is None:
357 logging.error("getFormatted: no generic template either: %s"%(template))
358 return ""
359
360 # put bib field descriptions in mdHash
361 bibfields = self.getBibMappedData(bibdata, allFields=allFields)
362
363 return tp(mdmap=bibfields, md=bibdata)
364
365
366 def getFormattedMetaData(self, path=None, dom=None, bibdata=None):
367 """get the metadafrom server"""
368 logging.debug("getFormattedMetaData(path=%s)"%path)
369 return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata)
370
371 def getFormattedMetaDataShort(self, path=None, dom=None, bibdata=None):
372 """get the metadafrom server"""
373 logging.debug("getFormattedMetaDataShort(path=%s)"%path)
374 return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata)
375
376 def getFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None):
377 """get the metadafrom server"""
378 logging.debug("getFormattedMetaDataExtended(path=%s)"%path)
379 return self.getFormatted('metadata_extended_template', path=path, dom=dom, bibdata=bibdata, allFields=True)
380
381 def getFormattedLabel(self,path=None, dom=None, bibdata=None):
382 """get the metadafrom server"""
383 logging.debug("getFormattedLabel(%s)"%path)
384 return self.getFormatted('label_template', path=path, dom=dom, bibdata=bibdata)
385
386 def getFormattedMetaDataShortFromServer(self,path):
387 """get the metadafrom server"""
388 logging.debug("getFormattedMetaDataShortFromServer(path=%s)"%path)
389 return self.getFormatted('metadata_template', path)
390
391 def getFormattedMetaDataExtendedFromServer(self,path):
392 """get the metadafrom server"""
393 logging.debug("getFormattedMetaDataExtendedFromServer(path=%s)"%path)
394 return self.getFormatted('metadata_extended_template', path=path, allFields=True)
395
396 def getFormattedLabelFromServer(self,path):
397 """get the metadafrom server"""
398 logging.debug("getFormattedLabelFromServer(%s)"%path)
399 return self.getFormatted('label_template', path)
400
401 295
402 security.declarePublic('changeMetaDataForm') 296 security.declarePublic('changeMetaDataForm')
403 def changeMetaDataForm(self): 297 def changeMetaDataForm(self):
404 """Main configuration""" 298 """Main configuration"""
405 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeMetaData.zpt')).__of__(self) 299 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeMetadata.zpt')).__of__(self)
406 return pt() 300 return pt()
407 301
408 security.declarePublic('changeMetaData') 302 security.declarePublic('changeMetaData')
409 def changeMetaData(self,shortDescription,description,fields,metaDataServerUrl,RESPONSE=None): 303 def changeMetaData(self,shortDescription,description,fields,metaDataServerUrl,RESPONSE=None):
410 """Change Metadata""" 304 """Change Metadata"""
416 RESPONSE.redirect('manage_main') 310 RESPONSE.redirect('manage_main')
417 311
418 312
419 def manage_addMetaDataForm(self): 313 def manage_addMetaDataForm(self):
420 """interface for adding the OSAS_add_Metadata""" 314 """interface for adding the OSAS_add_Metadata"""
421 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMetaDataForm.zpt')).__of__(self) 315 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMetadataForm.zpt')).__of__(self)
422 return pt() 316 return pt()
423 317
424 def manage_addMetaData(self,id,shortDescription,description,fields,RESPONSE=None): 318 def manage_addMetaData(self,id,shortDescription,description,fields,RESPONSE=None):
425 """a metadata objekt""" 319 """a metadata objekt"""
426 newObj=MetaData(id,shortDescription,description,fields) 320 newObj=MetaData(id,shortDescription,description,fields)