comparison MetaDataFolder.py @ 6:00147a1ab4ac

division between MetaDataFolder and Metadata looks good now (to me :-)
author casties
date Wed, 27 Jul 2011 21:08:01 +0200
parents c1dbf78cc036
children 4cd862bf37a3
comparison
equal deleted inserted replaced
5:c1dbf78cc036 6:00147a1ab4ac
14 if underscore: 14 if underscore:
15 bt = bt.replace('_', '-') 15 bt = bt.replace('_', '-')
16 16
17 return bt 17 return bt
18 18
19 def getBibdataFromDom(dom): 19 def OLDgetBibdataFromDom(dom):
20 """returns dict with all elements from bib-tag""" 20 """returns dict with all elements from bib-tag"""
21 bibinfo = {} 21 bibinfo = {}
22 bib = dom.find(".//meta/bib") 22 bib = dom.find(".//meta/bib")
23 if bib is not None: 23 if bib is not None:
24 # put type in @type 24 # put type in @type
27 # put all subelements in dict 27 # put all subelements in dict
28 for e in bib: 28 for e in bib:
29 bibinfo[normalizeBibField(e.tag)] = getText(e) 29 bibinfo[normalizeBibField(e.tag)] = getText(e)
30 30
31 return bibinfo 31 return bibinfo
32
33 def toString(list):
34 ret=u""
35
36 for l in list:
37 ret+=unicode(l)
38
39 return ret
40
41 def dcMetaDataToHash(mdSet):
42 """Convenience Function for creates a hash from the DCMetadataset
43 @param mdSet: String containing DCMetadata informmation
44 currently only in the format getDCMetadata of this module"""
45
46 NSS = {
47 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
48 'dc': 'http://dublincore.org/documents/dcmi-namespace/',
49 'owl':"http://www.w3.org/2002/07/owl#",
50 'rdfs':"http://www.w3.org/2000/01/rdf-schema#"
51 }
52 ret={}
53 import StringIO
54 import sys
55 buffer= StringIO.StringIO(mdSet)
56 try:
57 md = amara.parse(buffer,prefixes=NSS)
58 except:
59 logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1]))
60
61 ret["error"]=mdSet
62 return ret
63
64 ret["title"] = toString(md.xml_xpath("//dc:title/text()"))
65 ret["creator"] =toString(md.xml_xpath("//dc:creator/text()"))
66 ret["date"] = toString(md.xml_xpath("//dc:date/text()"))
67
68 return ret
69
32 70
33 class MetaDataFolder(Folder): 71 class MetaDataFolder(Folder):
34 """provides methods for managing complete metadata structures""" 72 """provides methods for managing complete metadata structures"""
35 meta_type='MetaDataFolder' 73 meta_type='MetaDataFolder'
36 security=ClassSecurityInfo() 74 security=ClassSecurityInfo()
59 97
60 #logging.debug("get Metadata: %s"%url) 98 #logging.debug("get Metadata: %s"%url)
61 md = getHttpData(url) 99 md = getHttpData(url)
62 return md 100 return md
63 101
102 def getBibdataFromDom(self, dom):
103 """returns contents of bib tag as dict"""
104 return self.resource.meta.bib.getDataFromDom(dom)
105
64 def getBibMapping(self, bibtype): 106 def getBibMapping(self, bibtype):
65 """returns MetaDataMapping for bibtype""" 107 """returns MetaDataMapping for resource/meta/bib of bibtype"""
66 # try type as id 108 return self.resource.meta.bib.getMapping(bibtype)
67 mapping = getattr(self.main.meta.bib, bibtype, None)
68 if mapping is None:
69 # try manually
70 mapFolder = self.main.meta.bib
71 for obj in mapFolder.objectValues():
72 if obj.meta_type == "MetadataMapping":
73 # real type is in title
74 mapType = obj.title
75 if mapType == bibtype:
76 # try type as is
77 return obj
78
79 if normalizeBibField(mapType, underscore=True) == normalizeBibField(bibtype, underscore=True):
80 # try normalized type without underscore
81 return obj
82
83 return mapping
84 109
85 def getBibFields(self, bibdata): 110 def getBibFields(self, bibdata):
86 """returns dict with metadata description for bibdata""" 111 """returns dict with metadata description for bibdata"""
87 bibfields = {} 112 return self.resource.meta.bib.getMapFields(bibdata)
88 bibtype = bibdata['@type']
89 # get mapping from main/meta/bib
90 mapping = self.getBibMapping(bibtype)
91 if mapping is None:
92 logging.error("getBibFields: no mapping for type: %s"%bibtype)
93 return bibfields
94
95 # get field descriptions (copy so we can change it)
96 bibfields = mapping.getFields().copy()
97 # add field list
98 bibfields['@fieldList'] = mapping.getFieldList()
99
100 return bibfields
101 113
102 def getBibMappedData(self, bibdata, allFields=False): 114 def getBibMappedData(self, bibdata, allFields=False):
103 """returns dict with metadata descriptions and data for bibdata""" 115 """returns dict with metadata descriptions and data for bibdata"""
104 bibfields = self.getBibFields(bibdata) 116 return self.resource.meta.bib.getMappedData(bibdata, allFields=allFields)
105 mappedData = {} 117
106 mappedList = [] 118 def getDCMappedData(self, bibdata):
107 for bk in bibfields.keys(): 119 """returns dict with DC keys and data from bibdata"""
108 # ignore descriptions without data 120 return self.resource.meta.bib.getDCMappedData(bibdata)
109 if not bibdata.get(bk, None): 121
110 continue 122
123 def getBibFormattedMetaData(self, path=None, dom=None, bibdata=None):
124 """get the metadafrom server"""
125 logging.debug("getBibFormattedMetaData(path=%s)"%path)
126 return self.resource.meta.bib.getFormatted('metadata_template', path=path, dom=dom, data=bibdata)
127
128 def getBibFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None):
129 """get the metadafrom server"""
130 logging.debug("getBibFormattedMetaDataExtended(path=%s)"%path)
131 return self.resource.meta.bib.getFormatted('metadata_extended_template', path=path, dom=dom, data=bibdata, allFields=True)
111 132
112 # field description (copy so we can change it) 133 def getBibFormattedLabel(self,path=None, dom=None, bibdata=None):
113 bf = bibfields[bk].copy() 134 """get the metadafrom server"""
114 # add value 135 logging.debug("getBibFormattedLabel(%s)"%path)
115 bf['value'] = bibdata[bk] 136 return self.resource.meta.bib.getFormatted('label_template', path=path, dom=dom, data=bibdata)
116 mappedData[bk] = bf 137
117 mappedList.append(bk) 138 getFormattedMetaData = getBibFormattedMetaData
118 139 getFormattedMetaDataShort = getBibFormattedMetaData
119 if allFields and len(mappedData) < len(bibdata): 140 getFormattedMetaDataExtended = getBibFormattedMetaDataExtended
120 # add fields that were not in bibfields 141 getFormattedLabel = getBibFormattedLabel
121 for bk in bibdata.keys():
122 if bk in mappedData or not bibdata[bk]:
123 continue
124 142
125 mappedData[bk] = {'tag':bk, 'label':bk, 'value':bibdata[bk]} 143 def getDCFormatted(self,path):
126 mappedList.append(bk) 144 """get the metadata as dc set"""
127 145 logging.debug("getDCFormatted(path=%s)"%path)
128 mappedData['@fieldList'] = mappedList 146 namespace={ 'mpiwg': "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"}
129 return mappedData 147 namespaceUsed=False
130 148
131 def getFormatted(self, template, path=None, dom=None, bibdata=None, allFields=False): 149 md = self.getMDFromPathOrUrl(path)
132 """returns string with document data formatted according to template. 150 logging.debug("MD in XML"+md)
133 gets data from server or dom or pre-parsed bibdata.""" 151 im = amara.parse(md, prefixes=namespace)
134 logging.debug("getFormatted(template=%s)"%(template)) 152
135 153 typePaths=im.xml_xpath('//bib/@type')
136 if dom is None and bibdata is None: 154 archimedes=False
137 # get from server 155
138 md = self.getMDFromPathOrUrl(path.replace("/mpiwg/online","")) 156 if len(typePaths)<1:
139 #logging.debug("md:"+md) 157 typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes
140 #dom = amara.parse(md) 158 if len(typePaths)>0:
141 dom = ET.fromstring(md) 159 type = "archimedes"
142 160 archimedes=True
143 # get contents of bib tag 161 else:
144 if bibdata is None: 162 typePaths=im.xml_xpath('//mpiwg:bib/@type')
145 bibdata = getBibdataFromDom(dom) 163 if len(typePaths)<1:
146
147 bibtype = bibdata['@type']
148
149 # get template
150 tp=getattr(self,"%s_%s"%(template, bibtype.lower()), None)
151 if tp is None:
152 logging.warning("getFormatted: no template for: %s_%s"%(template, bibtype))
153 # try generic
154 tp=getattr(self,"%s_generic"%(template), None)
155 if tp is None:
156 logging.error("getFormatted: no generic template either: %s"%(template))
157 return "" 164 return ""
158 165 else:
159 # put bib field descriptions in mdHash 166 namespaceUsed=True
160 bibfields = self.getBibMappedData(bibdata, allFields=allFields) 167
161 168 type=unicode(typePaths[0])
162 return tp(mdmap=bibfields, md=bibdata) 169 else:
163 170 type=unicode(typePaths[0])
164 171 logging.info("got type:"+type)
165 def getFormattedMetaData(self, path=None, dom=None, bibdata=None): 172 try:
166 """get the metadafrom server""" 173 mapping=getattr(self.main.meta.bib,type.lower(),None)
167 logging.debug("getFormattedMetaData(path=%s)"%path) 174 except:
168 return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata) 175 logging.error("getMetaDataFromServer no mapping for type: %s"%type)
169 176 return ""
170 def getFormattedMetaDataShort(self, path=None, dom=None, bibdata=None): 177
171 """get the metadafrom server""" 178 try:
172 logging.debug("getFormattedMetaDataShort(path=%s)"%path) 179 dcMapping=getattr(self.main.meta.bib,"dc",None)
173 return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata) 180 except:
174 181 logging.error("getMetaDataFromServer no dc in meta/bib")
175 def getFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None): 182 return ""
176 """get the metadafrom server""" 183
177 logging.debug("getFormattedMetaDataExtended(path=%s)"%path) 184 mds=mapping.generateMappingHash() # Hole das Mapping generisches Feld --> Feld im entsprechenden Typ
178 return self.getFormatted('metadata_extended_template', path=path, dom=dom, bibdata=bibdata, allFields=True) 185 dcMds=dcMapping.generateMappingHash()
179 186
180 def getFormattedLabel(self,path=None, dom=None, bibdata=None): 187 mdHash=[]
181 """get the metadafrom server""" 188 logging.debug("Value: %s"%repr(mds))
182 logging.debug("getFormattedLabel(%s)"%path) 189
183 return self.getFormatted('label_template', path=path, dom=dom, bibdata=bibdata) 190 for key,valueTriple in mds.items():
191 value=valueTriple[0]
192 logging.debug("Value: %s"%repr(value))
193 logging.debug("Key: %s"%repr(key))
194 if value!="":
195 if not archimedes:
196 if namespaceUsed:
197 try:
198 v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value)
199 except:
200 logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value)
201 else:
202 v = im.xml_xpath('//bib/%s/text()'%value)
203 else:
204 v = im.xml_xpath('//archimedes/%s/text()'%value)
205 if len(v) > 0:
206 dc=dcMds[key][0]
184 207
185 def getFormattedMetaDataShortFromServer(self,path): 208 if (dc !="") and (value !=""):
186 """get the metadafrom server""" 209 logging.debug("%s--> : %s"%(repr(value),dc))
187 logging.debug("getFormattedMetaDataShortFromServer(path=%s)"%path) 210 mdHash.append([dc,unicode(v[0])])
188 return self.getFormatted('metadata_template', path) 211
189 212 ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """
190 def getFormattedMetaDataExtendedFromServer(self,path): 213 ret+="<dc:type>%s</dc:type>"%type
191 """get the metadafrom server""" 214 for md in mdHash:
192 logging.debug("getFormattedMetaDataExtendedFromServer(path=%s)"%path) 215
193 return self.getFormatted('metadata_extended_template', path=path, allFields=True) 216 ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0])
194 217 ret+="</bib>"
195 def getFormattedLabelFromServer(self,path): 218 return ret
196 """get the metadafrom server"""
197 logging.debug("getFormattedLabelFromServer(%s)"%path)
198 return self.getFormatted('label_template', path)
199 219
200 220
201 changeMetaDataFolderForm = PageTemplateFile('zpt/changeMetaDataFolder',globals()) 221 changeMetaDataFolderForm = PageTemplateFile('zpt/changeMetaDataFolder',globals())
202 222
203 security.declarePublic('changeMetaDataFolder') 223 security.declarePublic('changeMetaDataFolder')
207 self.metaDataServerUrl=metaDataServerUrl 227 self.metaDataServerUrl=metaDataServerUrl
208 if RESPONSE is not None: 228 if RESPONSE is not None:
209 RESPONSE.redirect('manage_main') 229 RESPONSE.redirect('manage_main')
210 230
211 231
212 manage_addMetaDataFolderForm = PageTemplateFile('zpt/addMetaDataFolderForm',globals()) 232 manage_addMetaDataFolderForm = PageTemplateFile('zpt/addMetadataFolderForm',globals())
213 233
214 def manage_addMetaDataFolder(self,id,title,RESPONSE=None): 234 def manage_addMetaDataFolder(self,id,title,RESPONSE=None):
215 """a MetaDataFolder objekt""" 235 """a MetaDataFolder objekt"""
216 newObj=MetaDataFolder(id,title) 236 newObj=MetaDataFolder(id,title)
217 self.Destination()._setObject(id,newObj) 237 self.Destination()._setObject(id,newObj)