Mercurial > hg > MetaDataProvider
comparison MetaData.py @ 0:9f9d9be26e53
first checkin in Mercurial (see history in SVN)
author | casties |
---|---|
date | Mon, 25 Jul 2011 16:50:48 +0200 |
parents | |
children | e4bae49e657b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9f9d9be26e53 |
---|---|
1 from OFS.Folder import Folder | |
2 from Products.PageTemplates.PageTemplateFile import PageTemplateFile | |
3 from Globals import package_home | |
4 from AccessControl import ClassSecurityInfo | |
5 import os.path | |
6 import urllib | |
7 import logging | |
8 import urlparse | |
9 | |
10 # TODO: which xml toolkit? | |
11 import amara | |
12 import xml.sax.saxutils | |
13 import xml.dom.minidom | |
14 import xml.etree.ElementTree as ET | |
15 | |
16 | |
17 # TODO: do we need this? | |
18 #from Products.OSA_system2 import OSAS_helpers | |
19 #from Products.OSA_system2.OSAS_metadata import OSAS_Metadata,OSAS_MetadataMapping | |
20 | |
21 from OSAS_metadata import OSAS_Metadata, OSAS_MetadataMapping | |
22 | |
23 | |
24 from SrvTxtUtils import getHttpData, getText | |
25 | |
26 | |
27 # TODO: get rid of this | |
28 def getTextFromNode(nodelist): | |
29 """gibt text aus nodelist""" | |
30 rc = "" | |
31 for node in nodelist: | |
32 if node.nodeType == node.TEXT_NODE: | |
33 rc = rc + node.data | |
34 return rc | |
35 | |
36 | |
37 def normalizeBibtype(bt): | |
38 """returns normalised bib type for looking up mappings""" | |
39 bt = bt.strip().replace(' ', '-').lower() | |
40 return bt | |
41 | |
42 def toString(list): | |
43 ret=u"" | |
44 | |
45 for l in list: | |
46 ret+=unicode(l) | |
47 | |
48 return ret | |
49 | |
50 def dcMetaDataToHash(mdSet): | |
51 """Convenience Function for creates a hash from the DCMetadataset | |
52 @param mdSet: String containing DCMetadata informmation | |
53 currently only in the format getDCMetadata of this module""" | |
54 | |
55 NSS = { | |
56 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', | |
57 'dc': 'http://dublincore.org/documents/dcmi-namespace/', | |
58 'owl':"http://www.w3.org/2002/07/owl#", | |
59 'rdfs':"http://www.w3.org/2000/01/rdf-schema#" | |
60 } | |
61 ret={} | |
62 import StringIO | |
63 import sys | |
64 buffer= StringIO.StringIO(mdSet) | |
65 try: | |
66 md = amara.parse(buffer,prefixes=NSS) | |
67 except: | |
68 logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1])) | |
69 | |
70 ret["error"]=mdSet | |
71 return ret | |
72 | |
73 ret["title"] = toString(md.xml_xpath("//dc:title/text()")) | |
74 ret["creator"] =toString(md.xml_xpath("//dc:creator/text()")) | |
75 ret["date"] = toString(md.xml_xpath("//dc:date/text()")) | |
76 | |
77 return ret | |
78 | |
79 def getBibdataFromDom(dom): | |
80 """returns dict with all elements from bib-tag""" | |
81 bibinfo = {} | |
82 bib = dom.find(".//meta/bib") | |
83 if bib is not None: | |
84 # put type in @type | |
85 type = bib.get('type') | |
86 bibinfo['@type'] = normalizedBibtype(type) | |
87 # put all subelements in dict | |
88 for e in bib: | |
89 bibinfo[e.tag] = getText(e) | |
90 | |
91 return bibinfo | |
92 | |
93 | |
94 | |
95 | |
96 class MetaData(OSAS_Metadata): | |
97 """provides basic methods for managing metadata structures""" | |
98 meta_type='MetaData' | |
99 security=ClassSecurityInfo() | |
100 manage_options = Folder.manage_options+( | |
101 {'label':'Main Config','action':'changeMetadataForm'}, | |
102 {'label':'Import XML Schema','action':'importMetaDataExportXML'}, | |
103 {'label':'Select Fields for Display','action':'indicateDisplayFieldsForm'}, | |
104 ) | |
105 | |
106 def __init__(self,id,shortDescription='',description='',fields=''): | |
107 """initialize a new instance""" | |
108 self.id = id | |
109 self.shortDescription =shortDescription #label fuer link auf add page | |
110 self.description=description #description of the method for link page | |
111 self.fieldList=fields.split(",")[0:] | |
112 self.metaDataServerUrl="" # muss mit change metadata gesetzt werden | |
113 | |
114 | |
115 def correctPath(self,path,remove=None,prefix=None,cut=0): | |
116 """convinience method um einen pfad zu veraendern""" | |
117 | |
118 if remove is not None: | |
119 path=path.replace(remove,'') | |
120 if prefix is not None: | |
121 path=os.path.join(prefix,path) | |
122 | |
123 if cut>0: | |
124 splitted=path.split("/") | |
125 path="/".join(splitted[0:len(splitted)-cut]) | |
126 return path | |
127 | |
128 def importMetaDataExportXML(self,importFile=None,RESPONSE=None): | |
129 """imports metadata from the metadataexportxml file""" | |
130 | |
131 if importFile is None: | |
132 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','importMetaDataExportXML.zpt')).__of__(self) | |
133 return pt() | |
134 | |
135 dom=xml.dom.minidom.parse(importFile) | |
136 self.createMappingFromDom(dom.getElementsByTagName("metadataExport")[0]) | |
137 | |
138 if RESPONSE is not None: | |
139 RESPONSE.redirect('manage_main') | |
140 | |
141 | |
142 def createMappingFromDom(self,metadatanode,metadata=None): | |
143 """erzeuge ein Mapping aus dem der metadatanode des xmlformats, metadata ist ein metadataobject""" | |
144 | |
145 if metadata is None: | |
146 metadata=self | |
147 | |
148 nodes=metadatanode.childNodes | |
149 | |
150 #erster schritt: anlegen der fieldlist | |
151 for node in nodes: | |
152 logging.debug("node: %s"%node.tagName) | |
153 if node.tagName=="set": | |
154 set=node | |
155 if set.getAttribute('name')=='generic': | |
156 list=[] | |
157 for entry in set.getElementsByTagName('entry'): | |
158 list.append(entry.getAttribute('genericName')) | |
159 metadata.fieldList=list[0:] | |
160 | |
161 else: | |
162 id=set.getAttribute('name').encode('utf-8') | |
163 list=[] | |
164 argList={} | |
165 for entry in set.getElementsByTagName('entry'): | |
166 genericName=entry.getAttribute('genericName') | |
167 tag=entry.getAttribute('tag') | |
168 label=entry.getAttribute('label') | |
169 description=getTextFromNode(entry.childNodes) #TODO: clean | |
170 argList[genericName]=(tag,label,description) | |
171 metadata._setObject(id,MetaDataMapping(id,id,argList)) | |
172 | |
173 elif node.tagName=="metadata": | |
174 mn=node | |
175 name=mn.getAttribute('name').encode('utf-8') | |
176 metadata._setObject(name,MetaData(name,name)) | |
177 mdObj=getattr(metadata,name) | |
178 mdObj.createMappingFromDom(mn) | |
179 | |
180 | |
181 def getMDFromPathOrUrl(self,path): | |
182 parsedurl = urlparse.urlparse(path) | |
183 if parsedurl[0] != "": | |
184 # has schema (e.g. http) | |
185 url=path | |
186 else: | |
187 # path only | |
188 if path.endswith("index.meta"): | |
189 url =self.metaDataServerUrl%path | |
190 else: | |
191 url=os.path.join(self.metaDataServerUrl%path,'index.meta') | |
192 | |
193 #logging.debug("get Metadata: %s"%url) | |
194 md = getHttpData(url) | |
195 return md | |
196 | |
197 def getDCFormatted(self,path): | |
198 """get the metadata as dc set""" | |
199 logging.debug("getDCFormatted(path=%s)"%path) | |
200 namespace={ 'mpiwg': "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"} | |
201 namespaceUsed=False | |
202 | |
203 md = self.getMDFromPathOrUrl(path) | |
204 logging.debug("MD in XML"+md) | |
205 im = amara.parse(md, prefixes=namespace) | |
206 | |
207 typePaths=im.xml_xpath('//bib/@type') | |
208 archimedes=False | |
209 | |
210 if len(typePaths)<1: | |
211 typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes | |
212 if len(typePaths)>0: | |
213 type = "archimedes" | |
214 archimedes=True | |
215 else: | |
216 typePaths=im.xml_xpath('//mpiwg:bib/@type') | |
217 if len(typePaths)<1: | |
218 return "" | |
219 else: | |
220 namespaceUsed=True | |
221 | |
222 type=unicode(typePaths[0]) | |
223 else: | |
224 type=unicode(typePaths[0]) | |
225 logging.info("got type:"+type) | |
226 try: | |
227 mapping=getattr(self.main.meta.bib,type.lower(),None) | |
228 except: | |
229 logging.error("getMetaDataFromServer no mapping for type: %s"%type) | |
230 return "" | |
231 | |
232 try: | |
233 dcMapping=getattr(self.main.meta.bib,"dc",None) | |
234 except: | |
235 logging.error("getMetaDataFromServer no dc in meta/bib") | |
236 return "" | |
237 | |
238 mds=mapping.generateMappingHash() # Hole das Mapping generisches Feld --> Feld im entsprechenden Typ | |
239 dcMds=dcMapping.generateMappingHash() | |
240 | |
241 mdHash=[] | |
242 logging.debug("Value: %s"%repr(mds)) | |
243 | |
244 for key,valueTriple in mds.items(): | |
245 value=valueTriple[0] | |
246 logging.debug("Value: %s"%repr(value)) | |
247 logging.debug("Key: %s"%repr(key)) | |
248 if value!="": | |
249 if not archimedes: | |
250 if namespaceUsed: | |
251 try: | |
252 v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value) | |
253 except: | |
254 logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value) | |
255 else: | |
256 v = im.xml_xpath('//bib/%s/text()'%value) | |
257 else: | |
258 v = im.xml_xpath('//archimedes/%s/text()'%value) | |
259 if len(v) > 0: | |
260 dc=dcMds[key][0] | |
261 | |
262 if (dc !="") and (value !=""): | |
263 logging.debug("%s--> : %s"%(repr(value),dc)) | |
264 mdHash.append([dc,unicode(v[0])]) | |
265 | |
266 ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """ | |
267 ret+="<dc:type>%s</dc:type>"%type | |
268 for md in mdHash: | |
269 | |
270 ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0]) | |
271 ret+="</bib>" | |
272 return ret | |
273 | |
274 | |
275 def getStdMappedHash(self, bibdata): | |
276 """returns dict with metadata from bibdata mapped according to standard mapping""" | |
277 mdHash={} | |
278 bibtype = bibdata['@type'] | |
279 # get mapping from main/meta/bib | |
280 try: | |
281 mapping=getattr(self.main.meta.bib, bibtype.lower()) | |
282 except: | |
283 logging.error("getStdMappedHash: no mapping for type: %s"%bibtype) | |
284 return mdHash | |
285 | |
286 mds = mapping.generateMappingHash() # Hole das Mapping generisches Feld --> Feld im entsprechenden Typ | |
287 | |
288 for field in mds.keys(): | |
289 # get mapped field name | |
290 mf = mds[field][0] | |
291 if not mf: | |
292 continue | |
293 logging.debug("mapping: %s = %s"%(field,mf)) | |
294 mdHash[field] = bibdata.get(mf, '') | |
295 | |
296 return mdHash | |
297 | |
298 | |
299 def getFormatted(self, template, path=None, dom=None, bibdata=None): | |
300 """returns string with document data formatted according to template. | |
301 gets data from server or dom or pre-parsed bibdata.""" | |
302 logging.debug("getFormatted(template=%s)"%(template)) | |
303 | |
304 if dom is None and bibdata is None: | |
305 # get from server | |
306 md = self.getMDFromPathOrUrl(path.replace("/mpiwg/online","")) | |
307 #logging.debug("md:"+md) | |
308 #dom = amara.parse(md) | |
309 dom = ET.fromstring(md) | |
310 | |
311 # get contents of bib tag | |
312 if bibdata is None: | |
313 bibdata = getBibdataFromDom(dom) | |
314 | |
315 bibtype = bibdata['@type'] | |
316 | |
317 # get template | |
318 tp=getattr(self,"%s_%s"%(template, bibtype.lower()), None) | |
319 if tp is None: | |
320 logging.warning("getFormatted: no template for: %s_%s"%(template, bibtype)) | |
321 # try generic | |
322 tp=getattr(self,"%s_generic"%(template), None) | |
323 if tp is None: | |
324 logging.error("getFormatted: no generic template either: %s"%(template)) | |
325 return "" | |
326 | |
327 # put mapped data in mdHash | |
328 mdHash = self.getStdMappedHash(bibdata) | |
329 | |
330 return tp(stdmd=mdHash, md=bibdata) | |
331 | |
332 | |
333 def getFormattedMetaData(self, path=None, dom=None, bibdata=None): | |
334 """get the metadafrom server""" | |
335 logging.debug("getFormattedMetaData(path=%s)"%path) | |
336 return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata) | |
337 | |
338 def getFormattedMetaDataShort(self, path=None, dom=None, bibdata=None): | |
339 """get the metadafrom server""" | |
340 logging.debug("getFormattedMetaDataShort(path=%s)"%path) | |
341 return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata) | |
342 | |
343 def getFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None): | |
344 """get the metadafrom server""" | |
345 logging.debug("getFormattedMetaDataExtended(path=%s)"%path) | |
346 return self.getFormatted('metadata_extended_template', path=path, dom=dom, bibdata=bibdata) | |
347 | |
348 def getFormattedLabel(self,path=None, dom=None, bibdata=None): | |
349 """get the metadafrom server""" | |
350 logging.debug("getFormattedLabel(%s)"%path) | |
351 return self.getFormatted('label_template', path=path, dom=dom, bibdata=bibdata) | |
352 | |
353 def getFormattedMetaDataShortFromServer(self,path): | |
354 """get the metadafrom server""" | |
355 logging.debug("getFormattedMetaDataShortFromServer(path=%s)"%path) | |
356 return self.getFormatted('metadata_template', path) | |
357 | |
358 def getFormattedMetaDataExtendedFromServer(self,path): | |
359 """get the metadafrom server""" | |
360 logging.debug("getFormattedMetaDataExtendedFromServer(path=%s)"%path) | |
361 return self.getFormatted('metadata_extended_template', path) | |
362 | |
363 def getFormattedLabelFromServer(self,path): | |
364 """get the metadafrom server""" | |
365 logging.debug("getFormattedLabelFromServer(%s)"%path) | |
366 return self.getFormatted('label_template', path) | |
367 | |
368 | |
369 security.declarePublic('changeMetadataForm') | |
370 def changeMetadataForm(self): | |
371 """Main configuration""" | |
372 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeMetadata.zpt')).__of__(self) | |
373 return pt() | |
374 | |
375 security.declarePublic('changeMetadata') | |
376 def changeMetadata(self,shortDescription,description,fields,metaDataServerUrl,RESPONSE=None): | |
377 """Change Metadata""" | |
378 self.shortDescription=shortDescription | |
379 self.description=description | |
380 self.fieldList=fields.split(",")[0:] | |
381 self.metaDataServerUrl=metaDataServerUrl | |
382 if RESPONSE is not None: | |
383 RESPONSE.redirect('manage_main') | |
384 | |
385 | |
386 def manage_addMetaDataForm(self): | |
387 """interface for adding the OSAS_add_Metadata""" | |
388 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMetadataForm.zpt')).__of__(self) | |
389 return pt() | |
390 | |
391 def manage_addMetaData(self,id,shortDescription,description,fields,RESPONSE=None): | |
392 """a metadata objekt""" | |
393 newObj=MetaData(id,shortDescription,description,fields) | |
394 self.Destination()._setObject(id,newObj) | |
395 if RESPONSE is not None: | |
396 RESPONSE.redirect('manage_main') | |
397 | |
398 class MetaDataMapping(OSAS_MetadataMapping): | |
399 meta_type="MetadataMapping" | |
400 | |
401 def manage_addMetaDataMappingForm(self): | |
402 """interface for adding the OSAS_root""" | |
403 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMetadataMappingForm.zpt')).__of__(self) | |
404 | |
405 return pt() | |
406 | |
407 def manage_addMetaDataMapping(self,idOfObject,titleOfObject,RESPONSE=None): | |
408 """add the OSAS_root""" | |
409 | |
410 argList={} | |
411 for arg in self.fieldList: | |
412 if not (arg in ['idOfObject','titleOfObject']): | |
413 argList[arg]=(self.REQUEST.form[arg],self.REQUEST.form['label_'+arg],self.REQUEST.form['explanation_'+arg],self.REQUEST.form['status_'+arg],self.REQUEST.form['values_'+arg]) | |
414 | |
415 newObj=MetaDataMapping(idOfObject,titleOfObject,argList) | |
416 self._setObject(idOfObject,newObj) | |
417 if RESPONSE is not None: | |
418 RESPONSE.redirect('manage_main') | |
419 |