comparison MetaData.py @ 0:9f9d9be26e53

first checkin in Mercurial (see history in SVN)
author casties
date Mon, 25 Jul 2011 16:50:48 +0200
parents
children e4bae49e657b
comparison
equal deleted inserted replaced
-1:000000000000 0:9f9d9be26e53
1 from OFS.Folder import Folder
2 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
3 from Globals import package_home
4 from AccessControl import ClassSecurityInfo
5 import os.path
6 import urllib
7 import logging
8 import urlparse
9
10 # TODO: which xml toolkit?
11 import amara
12 import xml.sax.saxutils
13 import xml.dom.minidom
14 import xml.etree.ElementTree as ET
15
16
17 # TODO: do we need this?
18 #from Products.OSA_system2 import OSAS_helpers
19 #from Products.OSA_system2.OSAS_metadata import OSAS_Metadata,OSAS_MetadataMapping
20
21 from OSAS_metadata import OSAS_Metadata, OSAS_MetadataMapping
22
23
24 from SrvTxtUtils import getHttpData, getText
25
26
27 # TODO: get rid of this
28 def getTextFromNode(nodelist):
29 """gibt text aus nodelist"""
30 rc = ""
31 for node in nodelist:
32 if node.nodeType == node.TEXT_NODE:
33 rc = rc + node.data
34 return rc
35
36
37 def normalizeBibtype(bt):
38 """returns normalised bib type for looking up mappings"""
39 bt = bt.strip().replace(' ', '-').lower()
40 return bt
41
42 def toString(list):
43 ret=u""
44
45 for l in list:
46 ret+=unicode(l)
47
48 return ret
49
50 def dcMetaDataToHash(mdSet):
51 """Convenience Function for creates a hash from the DCMetadataset
52 @param mdSet: String containing DCMetadata informmation
53 currently only in the format getDCMetadata of this module"""
54
55 NSS = {
56 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
57 'dc': 'http://dublincore.org/documents/dcmi-namespace/',
58 'owl':"http://www.w3.org/2002/07/owl#",
59 'rdfs':"http://www.w3.org/2000/01/rdf-schema#"
60 }
61 ret={}
62 import StringIO
63 import sys
64 buffer= StringIO.StringIO(mdSet)
65 try:
66 md = amara.parse(buffer,prefixes=NSS)
67 except:
68 logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1]))
69
70 ret["error"]=mdSet
71 return ret
72
73 ret["title"] = toString(md.xml_xpath("//dc:title/text()"))
74 ret["creator"] =toString(md.xml_xpath("//dc:creator/text()"))
75 ret["date"] = toString(md.xml_xpath("//dc:date/text()"))
76
77 return ret
78
79 def getBibdataFromDom(dom):
80 """returns dict with all elements from bib-tag"""
81 bibinfo = {}
82 bib = dom.find(".//meta/bib")
83 if bib is not None:
84 # put type in @type
85 type = bib.get('type')
86 bibinfo['@type'] = normalizedBibtype(type)
87 # put all subelements in dict
88 for e in bib:
89 bibinfo[e.tag] = getText(e)
90
91 return bibinfo
92
93
94
95
96 class MetaData(OSAS_Metadata):
97 """provides basic methods for managing metadata structures"""
98 meta_type='MetaData'
99 security=ClassSecurityInfo()
100 manage_options = Folder.manage_options+(
101 {'label':'Main Config','action':'changeMetadataForm'},
102 {'label':'Import XML Schema','action':'importMetaDataExportXML'},
103 {'label':'Select Fields for Display','action':'indicateDisplayFieldsForm'},
104 )
105
106 def __init__(self,id,shortDescription='',description='',fields=''):
107 """initialize a new instance"""
108 self.id = id
109 self.shortDescription =shortDescription #label fuer link auf add page
110 self.description=description #description of the method for link page
111 self.fieldList=fields.split(",")[0:]
112 self.metaDataServerUrl="" # muss mit change metadata gesetzt werden
113
114
115 def correctPath(self,path,remove=None,prefix=None,cut=0):
116 """convinience method um einen pfad zu veraendern"""
117
118 if remove is not None:
119 path=path.replace(remove,'')
120 if prefix is not None:
121 path=os.path.join(prefix,path)
122
123 if cut>0:
124 splitted=path.split("/")
125 path="/".join(splitted[0:len(splitted)-cut])
126 return path
127
128 def importMetaDataExportXML(self,importFile=None,RESPONSE=None):
129 """imports metadata from the metadataexportxml file"""
130
131 if importFile is None:
132 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','importMetaDataExportXML.zpt')).__of__(self)
133 return pt()
134
135 dom=xml.dom.minidom.parse(importFile)
136 self.createMappingFromDom(dom.getElementsByTagName("metadataExport")[0])
137
138 if RESPONSE is not None:
139 RESPONSE.redirect('manage_main')
140
141
142 def createMappingFromDom(self,metadatanode,metadata=None):
143 """erzeuge ein Mapping aus dem der metadatanode des xmlformats, metadata ist ein metadataobject"""
144
145 if metadata is None:
146 metadata=self
147
148 nodes=metadatanode.childNodes
149
150 #erster schritt: anlegen der fieldlist
151 for node in nodes:
152 logging.debug("node: %s"%node.tagName)
153 if node.tagName=="set":
154 set=node
155 if set.getAttribute('name')=='generic':
156 list=[]
157 for entry in set.getElementsByTagName('entry'):
158 list.append(entry.getAttribute('genericName'))
159 metadata.fieldList=list[0:]
160
161 else:
162 id=set.getAttribute('name').encode('utf-8')
163 list=[]
164 argList={}
165 for entry in set.getElementsByTagName('entry'):
166 genericName=entry.getAttribute('genericName')
167 tag=entry.getAttribute('tag')
168 label=entry.getAttribute('label')
169 description=getTextFromNode(entry.childNodes) #TODO: clean
170 argList[genericName]=(tag,label,description)
171 metadata._setObject(id,MetaDataMapping(id,id,argList))
172
173 elif node.tagName=="metadata":
174 mn=node
175 name=mn.getAttribute('name').encode('utf-8')
176 metadata._setObject(name,MetaData(name,name))
177 mdObj=getattr(metadata,name)
178 mdObj.createMappingFromDom(mn)
179
180
181 def getMDFromPathOrUrl(self,path):
182 parsedurl = urlparse.urlparse(path)
183 if parsedurl[0] != "":
184 # has schema (e.g. http)
185 url=path
186 else:
187 # path only
188 if path.endswith("index.meta"):
189 url =self.metaDataServerUrl%path
190 else:
191 url=os.path.join(self.metaDataServerUrl%path,'index.meta')
192
193 #logging.debug("get Metadata: %s"%url)
194 md = getHttpData(url)
195 return md
196
197 def getDCFormatted(self,path):
198 """get the metadata as dc set"""
199 logging.debug("getDCFormatted(path=%s)"%path)
200 namespace={ 'mpiwg': "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"}
201 namespaceUsed=False
202
203 md = self.getMDFromPathOrUrl(path)
204 logging.debug("MD in XML"+md)
205 im = amara.parse(md, prefixes=namespace)
206
207 typePaths=im.xml_xpath('//bib/@type')
208 archimedes=False
209
210 if len(typePaths)<1:
211 typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes
212 if len(typePaths)>0:
213 type = "archimedes"
214 archimedes=True
215 else:
216 typePaths=im.xml_xpath('//mpiwg:bib/@type')
217 if len(typePaths)<1:
218 return ""
219 else:
220 namespaceUsed=True
221
222 type=unicode(typePaths[0])
223 else:
224 type=unicode(typePaths[0])
225 logging.info("got type:"+type)
226 try:
227 mapping=getattr(self.main.meta.bib,type.lower(),None)
228 except:
229 logging.error("getMetaDataFromServer no mapping for type: %s"%type)
230 return ""
231
232 try:
233 dcMapping=getattr(self.main.meta.bib,"dc",None)
234 except:
235 logging.error("getMetaDataFromServer no dc in meta/bib")
236 return ""
237
238 mds=mapping.generateMappingHash() # Hole das Mapping generisches Feld --> Feld im entsprechenden Typ
239 dcMds=dcMapping.generateMappingHash()
240
241 mdHash=[]
242 logging.debug("Value: %s"%repr(mds))
243
244 for key,valueTriple in mds.items():
245 value=valueTriple[0]
246 logging.debug("Value: %s"%repr(value))
247 logging.debug("Key: %s"%repr(key))
248 if value!="":
249 if not archimedes:
250 if namespaceUsed:
251 try:
252 v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value)
253 except:
254 logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value)
255 else:
256 v = im.xml_xpath('//bib/%s/text()'%value)
257 else:
258 v = im.xml_xpath('//archimedes/%s/text()'%value)
259 if len(v) > 0:
260 dc=dcMds[key][0]
261
262 if (dc !="") and (value !=""):
263 logging.debug("%s--> : %s"%(repr(value),dc))
264 mdHash.append([dc,unicode(v[0])])
265
266 ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """
267 ret+="<dc:type>%s</dc:type>"%type
268 for md in mdHash:
269
270 ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0])
271 ret+="</bib>"
272 return ret
273
274
275 def getStdMappedHash(self, bibdata):
276 """returns dict with metadata from bibdata mapped according to standard mapping"""
277 mdHash={}
278 bibtype = bibdata['@type']
279 # get mapping from main/meta/bib
280 try:
281 mapping=getattr(self.main.meta.bib, bibtype.lower())
282 except:
283 logging.error("getStdMappedHash: no mapping for type: %s"%bibtype)
284 return mdHash
285
286 mds = mapping.generateMappingHash() # Hole das Mapping generisches Feld --> Feld im entsprechenden Typ
287
288 for field in mds.keys():
289 # get mapped field name
290 mf = mds[field][0]
291 if not mf:
292 continue
293 logging.debug("mapping: %s = %s"%(field,mf))
294 mdHash[field] = bibdata.get(mf, '')
295
296 return mdHash
297
298
299 def getFormatted(self, template, path=None, dom=None, bibdata=None):
300 """returns string with document data formatted according to template.
301 gets data from server or dom or pre-parsed bibdata."""
302 logging.debug("getFormatted(template=%s)"%(template))
303
304 if dom is None and bibdata is None:
305 # get from server
306 md = self.getMDFromPathOrUrl(path.replace("/mpiwg/online",""))
307 #logging.debug("md:"+md)
308 #dom = amara.parse(md)
309 dom = ET.fromstring(md)
310
311 # get contents of bib tag
312 if bibdata is None:
313 bibdata = getBibdataFromDom(dom)
314
315 bibtype = bibdata['@type']
316
317 # get template
318 tp=getattr(self,"%s_%s"%(template, bibtype.lower()), None)
319 if tp is None:
320 logging.warning("getFormatted: no template for: %s_%s"%(template, bibtype))
321 # try generic
322 tp=getattr(self,"%s_generic"%(template), None)
323 if tp is None:
324 logging.error("getFormatted: no generic template either: %s"%(template))
325 return ""
326
327 # put mapped data in mdHash
328 mdHash = self.getStdMappedHash(bibdata)
329
330 return tp(stdmd=mdHash, md=bibdata)
331
332
333 def getFormattedMetaData(self, path=None, dom=None, bibdata=None):
334 """get the metadafrom server"""
335 logging.debug("getFormattedMetaData(path=%s)"%path)
336 return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata)
337
338 def getFormattedMetaDataShort(self, path=None, dom=None, bibdata=None):
339 """get the metadafrom server"""
340 logging.debug("getFormattedMetaDataShort(path=%s)"%path)
341 return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata)
342
343 def getFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None):
344 """get the metadafrom server"""
345 logging.debug("getFormattedMetaDataExtended(path=%s)"%path)
346 return self.getFormatted('metadata_extended_template', path=path, dom=dom, bibdata=bibdata)
347
348 def getFormattedLabel(self,path=None, dom=None, bibdata=None):
349 """get the metadafrom server"""
350 logging.debug("getFormattedLabel(%s)"%path)
351 return self.getFormatted('label_template', path=path, dom=dom, bibdata=bibdata)
352
353 def getFormattedMetaDataShortFromServer(self,path):
354 """get the metadafrom server"""
355 logging.debug("getFormattedMetaDataShortFromServer(path=%s)"%path)
356 return self.getFormatted('metadata_template', path)
357
358 def getFormattedMetaDataExtendedFromServer(self,path):
359 """get the metadafrom server"""
360 logging.debug("getFormattedMetaDataExtendedFromServer(path=%s)"%path)
361 return self.getFormatted('metadata_extended_template', path)
362
363 def getFormattedLabelFromServer(self,path):
364 """get the metadafrom server"""
365 logging.debug("getFormattedLabelFromServer(%s)"%path)
366 return self.getFormatted('label_template', path)
367
368
369 security.declarePublic('changeMetadataForm')
370 def changeMetadataForm(self):
371 """Main configuration"""
372 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeMetadata.zpt')).__of__(self)
373 return pt()
374
375 security.declarePublic('changeMetadata')
376 def changeMetadata(self,shortDescription,description,fields,metaDataServerUrl,RESPONSE=None):
377 """Change Metadata"""
378 self.shortDescription=shortDescription
379 self.description=description
380 self.fieldList=fields.split(",")[0:]
381 self.metaDataServerUrl=metaDataServerUrl
382 if RESPONSE is not None:
383 RESPONSE.redirect('manage_main')
384
385
386 def manage_addMetaDataForm(self):
387 """interface for adding the OSAS_add_Metadata"""
388 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMetadataForm.zpt')).__of__(self)
389 return pt()
390
391 def manage_addMetaData(self,id,shortDescription,description,fields,RESPONSE=None):
392 """a metadata objekt"""
393 newObj=MetaData(id,shortDescription,description,fields)
394 self.Destination()._setObject(id,newObj)
395 if RESPONSE is not None:
396 RESPONSE.redirect('manage_main')
397
398 class MetaDataMapping(OSAS_MetadataMapping):
399 meta_type="MetadataMapping"
400
401 def manage_addMetaDataMappingForm(self):
402 """interface for adding the OSAS_root"""
403 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMetadataMappingForm.zpt')).__of__(self)
404
405 return pt()
406
407 def manage_addMetaDataMapping(self,idOfObject,titleOfObject,RESPONSE=None):
408 """add the OSAS_root"""
409
410 argList={}
411 for arg in self.fieldList:
412 if not (arg in ['idOfObject','titleOfObject']):
413 argList[arg]=(self.REQUEST.form[arg],self.REQUEST.form['label_'+arg],self.REQUEST.form['explanation_'+arg],self.REQUEST.form['status_'+arg],self.REQUEST.form['values_'+arg])
414
415 newObj=MetaDataMapping(idOfObject,titleOfObject,argList)
416 self._setObject(idOfObject,newObj)
417 if RESPONSE is not None:
418 RESPONSE.redirect('manage_main')
419