0
|
1 from OFS.Folder import Folder
|
|
2 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
|
|
3 from Globals import package_home
|
|
4 from AccessControl import ClassSecurityInfo
|
|
5 import os.path
|
|
6 import urllib
|
|
7 import logging
|
|
8 import urlparse
|
|
9
|
|
10 # TODO: which xml toolkit?
|
|
11 import amara
|
|
12 import xml.sax.saxutils
|
|
13 import xml.dom.minidom
|
|
14 import xml.etree.ElementTree as ET
|
|
15
|
|
16
|
|
17 # TODO: do we need this?
|
|
18 #from Products.OSA_system2 import OSAS_helpers
|
|
19 #from Products.OSA_system2.OSAS_metadata import OSAS_Metadata,OSAS_MetadataMapping
|
|
20
|
|
21 from OSAS_metadata import OSAS_Metadata, OSAS_MetadataMapping
|
|
22
|
|
23
|
|
24 from SrvTxtUtils import getHttpData, getText
|
|
25
|
|
26
|
|
27 # TODO: get rid of this
|
|
28 def getTextFromNode(nodelist):
|
|
29 """gibt text aus nodelist"""
|
|
30 rc = ""
|
|
31 for node in nodelist:
|
|
32 if node.nodeType == node.TEXT_NODE:
|
|
33 rc = rc + node.data
|
|
34 return rc
|
|
35
|
|
36
|
|
37 def normalizeBibtype(bt):
|
|
38 """returns normalised bib type for looking up mappings"""
|
|
39 bt = bt.strip().replace(' ', '-').lower()
|
|
40 return bt
|
|
41
|
|
42 def toString(list):
|
|
43 ret=u""
|
|
44
|
|
45 for l in list:
|
|
46 ret+=unicode(l)
|
|
47
|
|
48 return ret
|
|
49
|
|
50 def dcMetaDataToHash(mdSet):
|
|
51 """Convenience Function for creates a hash from the DCMetadataset
|
|
52 @param mdSet: String containing DCMetadata informmation
|
|
53 currently only in the format getDCMetadata of this module"""
|
|
54
|
|
55 NSS = {
|
|
56 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
|
57 'dc': 'http://dublincore.org/documents/dcmi-namespace/',
|
|
58 'owl':"http://www.w3.org/2002/07/owl#",
|
|
59 'rdfs':"http://www.w3.org/2000/01/rdf-schema#"
|
|
60 }
|
|
61 ret={}
|
|
62 import StringIO
|
|
63 import sys
|
|
64 buffer= StringIO.StringIO(mdSet)
|
|
65 try:
|
|
66 md = amara.parse(buffer,prefixes=NSS)
|
|
67 except:
|
|
68 logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1]))
|
|
69
|
|
70 ret["error"]=mdSet
|
|
71 return ret
|
|
72
|
|
73 ret["title"] = toString(md.xml_xpath("//dc:title/text()"))
|
|
74 ret["creator"] =toString(md.xml_xpath("//dc:creator/text()"))
|
|
75 ret["date"] = toString(md.xml_xpath("//dc:date/text()"))
|
|
76
|
|
77 return ret
|
|
78
|
|
79 def getBibdataFromDom(dom):
|
|
80 """returns dict with all elements from bib-tag"""
|
|
81 bibinfo = {}
|
|
82 bib = dom.find(".//meta/bib")
|
|
83 if bib is not None:
|
|
84 # put type in @type
|
|
85 type = bib.get('type')
|
|
86 bibinfo['@type'] = normalizedBibtype(type)
|
|
87 # put all subelements in dict
|
|
88 for e in bib:
|
|
89 bibinfo[e.tag] = getText(e)
|
|
90
|
|
91 return bibinfo
|
|
92
|
|
93
|
|
94
|
|
95
|
|
96 class MetaData(OSAS_Metadata):
|
|
97 """provides basic methods for managing metadata structures"""
|
|
98 meta_type='MetaData'
|
|
99 security=ClassSecurityInfo()
|
|
100 manage_options = Folder.manage_options+(
|
|
101 {'label':'Main Config','action':'changeMetadataForm'},
|
|
102 {'label':'Import XML Schema','action':'importMetaDataExportXML'},
|
|
103 {'label':'Select Fields for Display','action':'indicateDisplayFieldsForm'},
|
|
104 )
|
|
105
|
|
106 def __init__(self,id,shortDescription='',description='',fields=''):
|
|
107 """initialize a new instance"""
|
|
108 self.id = id
|
|
109 self.shortDescription =shortDescription #label fuer link auf add page
|
|
110 self.description=description #description of the method for link page
|
|
111 self.fieldList=fields.split(",")[0:]
|
|
112 self.metaDataServerUrl="" # muss mit change metadata gesetzt werden
|
|
113
|
|
114
|
|
115 def correctPath(self,path,remove=None,prefix=None,cut=0):
|
|
116 """convinience method um einen pfad zu veraendern"""
|
|
117
|
|
118 if remove is not None:
|
|
119 path=path.replace(remove,'')
|
|
120 if prefix is not None:
|
|
121 path=os.path.join(prefix,path)
|
|
122
|
|
123 if cut>0:
|
|
124 splitted=path.split("/")
|
|
125 path="/".join(splitted[0:len(splitted)-cut])
|
|
126 return path
|
|
127
|
|
128 def importMetaDataExportXML(self,importFile=None,RESPONSE=None):
|
|
129 """imports metadata from the metadataexportxml file"""
|
|
130
|
|
131 if importFile is None:
|
|
132 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','importMetaDataExportXML.zpt')).__of__(self)
|
|
133 return pt()
|
|
134
|
|
135 dom=xml.dom.minidom.parse(importFile)
|
|
136 self.createMappingFromDom(dom.getElementsByTagName("metadataExport")[0])
|
|
137
|
|
138 if RESPONSE is not None:
|
|
139 RESPONSE.redirect('manage_main')
|
|
140
|
|
141
|
|
142 def createMappingFromDom(self,metadatanode,metadata=None):
|
|
143 """erzeuge ein Mapping aus dem der metadatanode des xmlformats, metadata ist ein metadataobject"""
|
|
144
|
|
145 if metadata is None:
|
|
146 metadata=self
|
|
147
|
|
148 nodes=metadatanode.childNodes
|
|
149
|
|
150 #erster schritt: anlegen der fieldlist
|
|
151 for node in nodes:
|
|
152 logging.debug("node: %s"%node.tagName)
|
|
153 if node.tagName=="set":
|
|
154 set=node
|
|
155 if set.getAttribute('name')=='generic':
|
|
156 list=[]
|
|
157 for entry in set.getElementsByTagName('entry'):
|
|
158 list.append(entry.getAttribute('genericName'))
|
|
159 metadata.fieldList=list[0:]
|
|
160
|
|
161 else:
|
|
162 id=set.getAttribute('name').encode('utf-8')
|
|
163 list=[]
|
|
164 argList={}
|
|
165 for entry in set.getElementsByTagName('entry'):
|
|
166 genericName=entry.getAttribute('genericName')
|
|
167 tag=entry.getAttribute('tag')
|
|
168 label=entry.getAttribute('label')
|
|
169 description=getTextFromNode(entry.childNodes) #TODO: clean
|
|
170 argList[genericName]=(tag,label,description)
|
|
171 metadata._setObject(id,MetaDataMapping(id,id,argList))
|
|
172
|
|
173 elif node.tagName=="metadata":
|
|
174 mn=node
|
|
175 name=mn.getAttribute('name').encode('utf-8')
|
|
176 metadata._setObject(name,MetaData(name,name))
|
|
177 mdObj=getattr(metadata,name)
|
|
178 mdObj.createMappingFromDom(mn)
|
|
179
|
|
180
|
|
181 def getMDFromPathOrUrl(self,path):
|
|
182 parsedurl = urlparse.urlparse(path)
|
|
183 if parsedurl[0] != "":
|
|
184 # has schema (e.g. http)
|
|
185 url=path
|
|
186 else:
|
|
187 # path only
|
|
188 if path.endswith("index.meta"):
|
|
189 url =self.metaDataServerUrl%path
|
|
190 else:
|
|
191 url=os.path.join(self.metaDataServerUrl%path,'index.meta')
|
|
192
|
|
193 #logging.debug("get Metadata: %s"%url)
|
|
194 md = getHttpData(url)
|
|
195 return md
|
|
196
|
|
197 def getDCFormatted(self,path):
|
|
198 """get the metadata as dc set"""
|
|
199 logging.debug("getDCFormatted(path=%s)"%path)
|
|
200 namespace={ 'mpiwg': "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"}
|
|
201 namespaceUsed=False
|
|
202
|
|
203 md = self.getMDFromPathOrUrl(path)
|
|
204 logging.debug("MD in XML"+md)
|
|
205 im = amara.parse(md, prefixes=namespace)
|
|
206
|
|
207 typePaths=im.xml_xpath('//bib/@type')
|
|
208 archimedes=False
|
|
209
|
|
210 if len(typePaths)<1:
|
|
211 typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes
|
|
212 if len(typePaths)>0:
|
|
213 type = "archimedes"
|
|
214 archimedes=True
|
|
215 else:
|
|
216 typePaths=im.xml_xpath('//mpiwg:bib/@type')
|
|
217 if len(typePaths)<1:
|
|
218 return ""
|
|
219 else:
|
|
220 namespaceUsed=True
|
|
221
|
|
222 type=unicode(typePaths[0])
|
|
223 else:
|
|
224 type=unicode(typePaths[0])
|
|
225 logging.info("got type:"+type)
|
|
226 try:
|
|
227 mapping=getattr(self.main.meta.bib,type.lower(),None)
|
|
228 except:
|
|
229 logging.error("getMetaDataFromServer no mapping for type: %s"%type)
|
|
230 return ""
|
|
231
|
|
232 try:
|
|
233 dcMapping=getattr(self.main.meta.bib,"dc",None)
|
|
234 except:
|
|
235 logging.error("getMetaDataFromServer no dc in meta/bib")
|
|
236 return ""
|
|
237
|
|
238 mds=mapping.generateMappingHash() # Hole das Mapping generisches Feld --> Feld im entsprechenden Typ
|
|
239 dcMds=dcMapping.generateMappingHash()
|
|
240
|
|
241 mdHash=[]
|
|
242 logging.debug("Value: %s"%repr(mds))
|
|
243
|
|
244 for key,valueTriple in mds.items():
|
|
245 value=valueTriple[0]
|
|
246 logging.debug("Value: %s"%repr(value))
|
|
247 logging.debug("Key: %s"%repr(key))
|
|
248 if value!="":
|
|
249 if not archimedes:
|
|
250 if namespaceUsed:
|
|
251 try:
|
|
252 v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value)
|
|
253 except:
|
|
254 logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value)
|
|
255 else:
|
|
256 v = im.xml_xpath('//bib/%s/text()'%value)
|
|
257 else:
|
|
258 v = im.xml_xpath('//archimedes/%s/text()'%value)
|
|
259 if len(v) > 0:
|
|
260 dc=dcMds[key][0]
|
|
261
|
|
262 if (dc !="") and (value !=""):
|
|
263 logging.debug("%s--> : %s"%(repr(value),dc))
|
|
264 mdHash.append([dc,unicode(v[0])])
|
|
265
|
|
266 ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """
|
|
267 ret+="<dc:type>%s</dc:type>"%type
|
|
268 for md in mdHash:
|
|
269
|
|
270 ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0])
|
|
271 ret+="</bib>"
|
|
272 return ret
|
|
273
|
|
274
|
|
275 def getStdMappedHash(self, bibdata):
|
|
276 """returns dict with metadata from bibdata mapped according to standard mapping"""
|
|
277 mdHash={}
|
|
278 bibtype = bibdata['@type']
|
|
279 # get mapping from main/meta/bib
|
|
280 try:
|
|
281 mapping=getattr(self.main.meta.bib, bibtype.lower())
|
|
282 except:
|
|
283 logging.error("getStdMappedHash: no mapping for type: %s"%bibtype)
|
|
284 return mdHash
|
|
285
|
|
286 mds = mapping.generateMappingHash() # Hole das Mapping generisches Feld --> Feld im entsprechenden Typ
|
|
287
|
|
288 for field in mds.keys():
|
|
289 # get mapped field name
|
|
290 mf = mds[field][0]
|
|
291 if not mf:
|
|
292 continue
|
|
293 logging.debug("mapping: %s = %s"%(field,mf))
|
|
294 mdHash[field] = bibdata.get(mf, '')
|
|
295
|
|
296 return mdHash
|
|
297
|
|
298
|
|
299 def getFormatted(self, template, path=None, dom=None, bibdata=None):
|
|
300 """returns string with document data formatted according to template.
|
|
301 gets data from server or dom or pre-parsed bibdata."""
|
|
302 logging.debug("getFormatted(template=%s)"%(template))
|
|
303
|
|
304 if dom is None and bibdata is None:
|
|
305 # get from server
|
|
306 md = self.getMDFromPathOrUrl(path.replace("/mpiwg/online",""))
|
|
307 #logging.debug("md:"+md)
|
|
308 #dom = amara.parse(md)
|
|
309 dom = ET.fromstring(md)
|
|
310
|
|
311 # get contents of bib tag
|
|
312 if bibdata is None:
|
|
313 bibdata = getBibdataFromDom(dom)
|
|
314
|
|
315 bibtype = bibdata['@type']
|
|
316
|
|
317 # get template
|
|
318 tp=getattr(self,"%s_%s"%(template, bibtype.lower()), None)
|
|
319 if tp is None:
|
|
320 logging.warning("getFormatted: no template for: %s_%s"%(template, bibtype))
|
|
321 # try generic
|
|
322 tp=getattr(self,"%s_generic"%(template), None)
|
|
323 if tp is None:
|
|
324 logging.error("getFormatted: no generic template either: %s"%(template))
|
|
325 return ""
|
|
326
|
|
327 # put mapped data in mdHash
|
|
328 mdHash = self.getStdMappedHash(bibdata)
|
|
329
|
|
330 return tp(stdmd=mdHash, md=bibdata)
|
|
331
|
|
332
|
|
333 def getFormattedMetaData(self, path=None, dom=None, bibdata=None):
|
|
334 """get the metadafrom server"""
|
|
335 logging.debug("getFormattedMetaData(path=%s)"%path)
|
|
336 return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata)
|
|
337
|
|
338 def getFormattedMetaDataShort(self, path=None, dom=None, bibdata=None):
|
|
339 """get the metadafrom server"""
|
|
340 logging.debug("getFormattedMetaDataShort(path=%s)"%path)
|
|
341 return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata)
|
|
342
|
|
343 def getFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None):
|
|
344 """get the metadafrom server"""
|
|
345 logging.debug("getFormattedMetaDataExtended(path=%s)"%path)
|
|
346 return self.getFormatted('metadata_extended_template', path=path, dom=dom, bibdata=bibdata)
|
|
347
|
|
348 def getFormattedLabel(self,path=None, dom=None, bibdata=None):
|
|
349 """get the metadafrom server"""
|
|
350 logging.debug("getFormattedLabel(%s)"%path)
|
|
351 return self.getFormatted('label_template', path=path, dom=dom, bibdata=bibdata)
|
|
352
|
|
353 def getFormattedMetaDataShortFromServer(self,path):
|
|
354 """get the metadafrom server"""
|
|
355 logging.debug("getFormattedMetaDataShortFromServer(path=%s)"%path)
|
|
356 return self.getFormatted('metadata_template', path)
|
|
357
|
|
358 def getFormattedMetaDataExtendedFromServer(self,path):
|
|
359 """get the metadafrom server"""
|
|
360 logging.debug("getFormattedMetaDataExtendedFromServer(path=%s)"%path)
|
|
361 return self.getFormatted('metadata_extended_template', path)
|
|
362
|
|
363 def getFormattedLabelFromServer(self,path):
|
|
364 """get the metadafrom server"""
|
|
365 logging.debug("getFormattedLabelFromServer(%s)"%path)
|
|
366 return self.getFormatted('label_template', path)
|
|
367
|
|
368
|
|
369 security.declarePublic('changeMetadataForm')
|
|
370 def changeMetadataForm(self):
|
|
371 """Main configuration"""
|
|
372 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeMetadata.zpt')).__of__(self)
|
|
373 return pt()
|
|
374
|
|
375 security.declarePublic('changeMetadata')
|
|
376 def changeMetadata(self,shortDescription,description,fields,metaDataServerUrl,RESPONSE=None):
|
|
377 """Change Metadata"""
|
|
378 self.shortDescription=shortDescription
|
|
379 self.description=description
|
|
380 self.fieldList=fields.split(",")[0:]
|
|
381 self.metaDataServerUrl=metaDataServerUrl
|
|
382 if RESPONSE is not None:
|
|
383 RESPONSE.redirect('manage_main')
|
|
384
|
|
385
|
|
386 def manage_addMetaDataForm(self):
|
|
387 """interface for adding the OSAS_add_Metadata"""
|
|
388 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMetadataForm.zpt')).__of__(self)
|
|
389 return pt()
|
|
390
|
|
391 def manage_addMetaData(self,id,shortDescription,description,fields,RESPONSE=None):
|
|
392 """a metadata objekt"""
|
|
393 newObj=MetaData(id,shortDescription,description,fields)
|
|
394 self.Destination()._setObject(id,newObj)
|
|
395 if RESPONSE is not None:
|
|
396 RESPONSE.redirect('manage_main')
|
|
397
|
|
398 class MetaDataMapping(OSAS_MetadataMapping):
|
|
399 meta_type="MetadataMapping"
|
|
400
|
|
401 def manage_addMetaDataMappingForm(self):
|
|
402 """interface for adding the OSAS_root"""
|
|
403 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMetadataMappingForm.zpt')).__of__(self)
|
|
404
|
|
405 return pt()
|
|
406
|
|
407 def manage_addMetaDataMapping(self,idOfObject,titleOfObject,RESPONSE=None):
|
|
408 """add the OSAS_root"""
|
|
409
|
|
410 argList={}
|
|
411 for arg in self.fieldList:
|
|
412 if not (arg in ['idOfObject','titleOfObject']):
|
|
413 argList[arg]=(self.REQUEST.form[arg],self.REQUEST.form['label_'+arg],self.REQUEST.form['explanation_'+arg],self.REQUEST.form['status_'+arg],self.REQUEST.form['values_'+arg])
|
|
414
|
|
415 newObj=MetaDataMapping(idOfObject,titleOfObject,argList)
|
|
416 self._setObject(idOfObject,newObj)
|
|
417 if RESPONSE is not None:
|
|
418 RESPONSE.redirect('manage_main')
|
|
419
|