Mercurial > hg > MetaDataProvider
comparison MetaDataFolder.py @ 6:00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
author | casties |
---|---|
date | Wed, 27 Jul 2011 21:08:01 +0200 |
parents | c1dbf78cc036 |
children | 4cd862bf37a3 |
comparison
equal
deleted
inserted
replaced
5:c1dbf78cc036 | 6:00147a1ab4ac |
---|---|
14 if underscore: | 14 if underscore: |
15 bt = bt.replace('_', '-') | 15 bt = bt.replace('_', '-') |
16 | 16 |
17 return bt | 17 return bt |
18 | 18 |
19 def getBibdataFromDom(dom): | 19 def OLDgetBibdataFromDom(dom): |
20 """returns dict with all elements from bib-tag""" | 20 """returns dict with all elements from bib-tag""" |
21 bibinfo = {} | 21 bibinfo = {} |
22 bib = dom.find(".//meta/bib") | 22 bib = dom.find(".//meta/bib") |
23 if bib is not None: | 23 if bib is not None: |
24 # put type in @type | 24 # put type in @type |
27 # put all subelements in dict | 27 # put all subelements in dict |
28 for e in bib: | 28 for e in bib: |
29 bibinfo[normalizeBibField(e.tag)] = getText(e) | 29 bibinfo[normalizeBibField(e.tag)] = getText(e) |
30 | 30 |
31 return bibinfo | 31 return bibinfo |
32 | |
33 def toString(list): | |
34 ret=u"" | |
35 | |
36 for l in list: | |
37 ret+=unicode(l) | |
38 | |
39 return ret | |
40 | |
41 def dcMetaDataToHash(mdSet): | |
42 """Convenience Function for creates a hash from the DCMetadataset | |
43 @param mdSet: String containing DCMetadata informmation | |
44 currently only in the format getDCMetadata of this module""" | |
45 | |
46 NSS = { | |
47 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', | |
48 'dc': 'http://dublincore.org/documents/dcmi-namespace/', | |
49 'owl':"http://www.w3.org/2002/07/owl#", | |
50 'rdfs':"http://www.w3.org/2000/01/rdf-schema#" | |
51 } | |
52 ret={} | |
53 import StringIO | |
54 import sys | |
55 buffer= StringIO.StringIO(mdSet) | |
56 try: | |
57 md = amara.parse(buffer,prefixes=NSS) | |
58 except: | |
59 logging.error("Error: %s (%s)"%(sys.exc_info()[0],sys.exc_info()[1])) | |
60 | |
61 ret["error"]=mdSet | |
62 return ret | |
63 | |
64 ret["title"] = toString(md.xml_xpath("//dc:title/text()")) | |
65 ret["creator"] =toString(md.xml_xpath("//dc:creator/text()")) | |
66 ret["date"] = toString(md.xml_xpath("//dc:date/text()")) | |
67 | |
68 return ret | |
69 | |
32 | 70 |
33 class MetaDataFolder(Folder): | 71 class MetaDataFolder(Folder): |
34 """provides methods for managing complete metadata structures""" | 72 """provides methods for managing complete metadata structures""" |
35 meta_type='MetaDataFolder' | 73 meta_type='MetaDataFolder' |
36 security=ClassSecurityInfo() | 74 security=ClassSecurityInfo() |
59 | 97 |
60 #logging.debug("get Metadata: %s"%url) | 98 #logging.debug("get Metadata: %s"%url) |
61 md = getHttpData(url) | 99 md = getHttpData(url) |
62 return md | 100 return md |
63 | 101 |
102 def getBibdataFromDom(self, dom): | |
103 """returns contents of bib tag as dict""" | |
104 return self.resource.meta.bib.getDataFromDom(dom) | |
105 | |
64 def getBibMapping(self, bibtype): | 106 def getBibMapping(self, bibtype): |
65 """returns MetaDataMapping for bibtype""" | 107 """returns MetaDataMapping for resource/meta/bib of bibtype""" |
66 # try type as id | 108 return self.resource.meta.bib.getMapping(bibtype) |
67 mapping = getattr(self.main.meta.bib, bibtype, None) | |
68 if mapping is None: | |
69 # try manually | |
70 mapFolder = self.main.meta.bib | |
71 for obj in mapFolder.objectValues(): | |
72 if obj.meta_type == "MetadataMapping": | |
73 # real type is in title | |
74 mapType = obj.title | |
75 if mapType == bibtype: | |
76 # try type as is | |
77 return obj | |
78 | |
79 if normalizeBibField(mapType, underscore=True) == normalizeBibField(bibtype, underscore=True): | |
80 # try normalized type without underscore | |
81 return obj | |
82 | |
83 return mapping | |
84 | 109 |
85 def getBibFields(self, bibdata): | 110 def getBibFields(self, bibdata): |
86 """returns dict with metadata description for bibdata""" | 111 """returns dict with metadata description for bibdata""" |
87 bibfields = {} | 112 return self.resource.meta.bib.getMapFields(bibdata) |
88 bibtype = bibdata['@type'] | |
89 # get mapping from main/meta/bib | |
90 mapping = self.getBibMapping(bibtype) | |
91 if mapping is None: | |
92 logging.error("getBibFields: no mapping for type: %s"%bibtype) | |
93 return bibfields | |
94 | |
95 # get field descriptions (copy so we can change it) | |
96 bibfields = mapping.getFields().copy() | |
97 # add field list | |
98 bibfields['@fieldList'] = mapping.getFieldList() | |
99 | |
100 return bibfields | |
101 | 113 |
102 def getBibMappedData(self, bibdata, allFields=False): | 114 def getBibMappedData(self, bibdata, allFields=False): |
103 """returns dict with metadata descriptions and data for bibdata""" | 115 """returns dict with metadata descriptions and data for bibdata""" |
104 bibfields = self.getBibFields(bibdata) | 116 return self.resource.meta.bib.getMappedData(bibdata, allFields=allFields) |
105 mappedData = {} | 117 |
106 mappedList = [] | 118 def getDCMappedData(self, bibdata): |
107 for bk in bibfields.keys(): | 119 """returns dict with DC keys and data from bibdata""" |
108 # ignore descriptions without data | 120 return self.resource.meta.bib.getDCMappedData(bibdata) |
109 if not bibdata.get(bk, None): | 121 |
110 continue | 122 |
123 def getBibFormattedMetaData(self, path=None, dom=None, bibdata=None): | |
124 """get the metadafrom server""" | |
125 logging.debug("getBibFormattedMetaData(path=%s)"%path) | |
126 return self.resource.meta.bib.getFormatted('metadata_template', path=path, dom=dom, data=bibdata) | |
127 | |
128 def getBibFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None): | |
129 """get the metadafrom server""" | |
130 logging.debug("getBibFormattedMetaDataExtended(path=%s)"%path) | |
131 return self.resource.meta.bib.getFormatted('metadata_extended_template', path=path, dom=dom, data=bibdata, allFields=True) | |
111 | 132 |
112 # field description (copy so we can change it) | 133 def getBibFormattedLabel(self,path=None, dom=None, bibdata=None): |
113 bf = bibfields[bk].copy() | 134 """get the metadafrom server""" |
114 # add value | 135 logging.debug("getBibFormattedLabel(%s)"%path) |
115 bf['value'] = bibdata[bk] | 136 return self.resource.meta.bib.getFormatted('label_template', path=path, dom=dom, data=bibdata) |
116 mappedData[bk] = bf | 137 |
117 mappedList.append(bk) | 138 getFormattedMetaData = getBibFormattedMetaData |
118 | 139 getFormattedMetaDataShort = getBibFormattedMetaData |
119 if allFields and len(mappedData) < len(bibdata): | 140 getFormattedMetaDataExtended = getBibFormattedMetaDataExtended |
120 # add fields that were not in bibfields | 141 getFormattedLabel = getBibFormattedLabel |
121 for bk in bibdata.keys(): | |
122 if bk in mappedData or not bibdata[bk]: | |
123 continue | |
124 | 142 |
125 mappedData[bk] = {'tag':bk, 'label':bk, 'value':bibdata[bk]} | 143 def getDCFormatted(self,path): |
126 mappedList.append(bk) | 144 """get the metadata as dc set""" |
127 | 145 logging.debug("getDCFormatted(path=%s)"%path) |
128 mappedData['@fieldList'] = mappedList | 146 namespace={ 'mpiwg': "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"} |
129 return mappedData | 147 namespaceUsed=False |
130 | 148 |
131 def getFormatted(self, template, path=None, dom=None, bibdata=None, allFields=False): | 149 md = self.getMDFromPathOrUrl(path) |
132 """returns string with document data formatted according to template. | 150 logging.debug("MD in XML"+md) |
133 gets data from server or dom or pre-parsed bibdata.""" | 151 im = amara.parse(md, prefixes=namespace) |
134 logging.debug("getFormatted(template=%s)"%(template)) | 152 |
135 | 153 typePaths=im.xml_xpath('//bib/@type') |
136 if dom is None and bibdata is None: | 154 archimedes=False |
137 # get from server | 155 |
138 md = self.getMDFromPathOrUrl(path.replace("/mpiwg/online","")) | 156 if len(typePaths)<1: |
139 #logging.debug("md:"+md) | 157 typePaths=im.xml_xpath('//meta/archimedes') # sinderfall fuer veraltete index.meta files vom typ archimedes |
140 #dom = amara.parse(md) | 158 if len(typePaths)>0: |
141 dom = ET.fromstring(md) | 159 type = "archimedes" |
142 | 160 archimedes=True |
143 # get contents of bib tag | 161 else: |
144 if bibdata is None: | 162 typePaths=im.xml_xpath('//mpiwg:bib/@type') |
145 bibdata = getBibdataFromDom(dom) | 163 if len(typePaths)<1: |
146 | |
147 bibtype = bibdata['@type'] | |
148 | |
149 # get template | |
150 tp=getattr(self,"%s_%s"%(template, bibtype.lower()), None) | |
151 if tp is None: | |
152 logging.warning("getFormatted: no template for: %s_%s"%(template, bibtype)) | |
153 # try generic | |
154 tp=getattr(self,"%s_generic"%(template), None) | |
155 if tp is None: | |
156 logging.error("getFormatted: no generic template either: %s"%(template)) | |
157 return "" | 164 return "" |
158 | 165 else: |
159 # put bib field descriptions in mdHash | 166 namespaceUsed=True |
160 bibfields = self.getBibMappedData(bibdata, allFields=allFields) | 167 |
161 | 168 type=unicode(typePaths[0]) |
162 return tp(mdmap=bibfields, md=bibdata) | 169 else: |
163 | 170 type=unicode(typePaths[0]) |
164 | 171 logging.info("got type:"+type) |
165 def getFormattedMetaData(self, path=None, dom=None, bibdata=None): | 172 try: |
166 """get the metadafrom server""" | 173 mapping=getattr(self.main.meta.bib,type.lower(),None) |
167 logging.debug("getFormattedMetaData(path=%s)"%path) | 174 except: |
168 return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata) | 175 logging.error("getMetaDataFromServer no mapping for type: %s"%type) |
169 | 176 return "" |
170 def getFormattedMetaDataShort(self, path=None, dom=None, bibdata=None): | 177 |
171 """get the metadafrom server""" | 178 try: |
172 logging.debug("getFormattedMetaDataShort(path=%s)"%path) | 179 dcMapping=getattr(self.main.meta.bib,"dc",None) |
173 return self.getFormatted('metadata_template', path=path, dom=dom, bibdata=bibdata) | 180 except: |
174 | 181 logging.error("getMetaDataFromServer no dc in meta/bib") |
175 def getFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None): | 182 return "" |
176 """get the metadafrom server""" | 183 |
177 logging.debug("getFormattedMetaDataExtended(path=%s)"%path) | 184 mds=mapping.generateMappingHash() # Hole das Mapping generisches Feld --> Feld im entsprechenden Typ |
178 return self.getFormatted('metadata_extended_template', path=path, dom=dom, bibdata=bibdata, allFields=True) | 185 dcMds=dcMapping.generateMappingHash() |
179 | 186 |
180 def getFormattedLabel(self,path=None, dom=None, bibdata=None): | 187 mdHash=[] |
181 """get the metadafrom server""" | 188 logging.debug("Value: %s"%repr(mds)) |
182 logging.debug("getFormattedLabel(%s)"%path) | 189 |
183 return self.getFormatted('label_template', path=path, dom=dom, bibdata=bibdata) | 190 for key,valueTriple in mds.items(): |
191 value=valueTriple[0] | |
192 logging.debug("Value: %s"%repr(value)) | |
193 logging.debug("Key: %s"%repr(key)) | |
194 if value!="": | |
195 if not archimedes: | |
196 if namespaceUsed: | |
197 try: | |
198 v = im.xml_xpath('//mpiwg:bib/mpiwg:%s/text()'%value) | |
199 except: | |
200 logging.error('cannot do: //mpiwg:bib/mpiwg:%s/text()'%value) | |
201 else: | |
202 v = im.xml_xpath('//bib/%s/text()'%value) | |
203 else: | |
204 v = im.xml_xpath('//archimedes/%s/text()'%value) | |
205 if len(v) > 0: | |
206 dc=dcMds[key][0] | |
184 | 207 |
185 def getFormattedMetaDataShortFromServer(self,path): | 208 if (dc !="") and (value !=""): |
186 """get the metadafrom server""" | 209 logging.debug("%s--> : %s"%(repr(value),dc)) |
187 logging.debug("getFormattedMetaDataShortFromServer(path=%s)"%path) | 210 mdHash.append([dc,unicode(v[0])]) |
188 return self.getFormatted('metadata_template', path) | 211 |
189 | 212 ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """ |
190 def getFormattedMetaDataExtendedFromServer(self,path): | 213 ret+="<dc:type>%s</dc:type>"%type |
191 """get the metadafrom server""" | 214 for md in mdHash: |
192 logging.debug("getFormattedMetaDataExtendedFromServer(path=%s)"%path) | 215 |
193 return self.getFormatted('metadata_extended_template', path=path, allFields=True) | 216 ret+="""<dc:%s>%s</dc:%s>"""%(md[0],xml.sax.saxutils.escape(md[1]),md[0]) |
194 | 217 ret+="</bib>" |
195 def getFormattedLabelFromServer(self,path): | 218 return ret |
196 """get the metadafrom server""" | |
197 logging.debug("getFormattedLabelFromServer(%s)"%path) | |
198 return self.getFormatted('label_template', path) | |
199 | 219 |
200 | 220 |
201 changeMetaDataFolderForm = PageTemplateFile('zpt/changeMetaDataFolder',globals()) | 221 changeMetaDataFolderForm = PageTemplateFile('zpt/changeMetaDataFolder',globals()) |
202 | 222 |
203 security.declarePublic('changeMetaDataFolder') | 223 security.declarePublic('changeMetaDataFolder') |
207 self.metaDataServerUrl=metaDataServerUrl | 227 self.metaDataServerUrl=metaDataServerUrl |
208 if RESPONSE is not None: | 228 if RESPONSE is not None: |
209 RESPONSE.redirect('manage_main') | 229 RESPONSE.redirect('manage_main') |
210 | 230 |
211 | 231 |
212 manage_addMetaDataFolderForm = PageTemplateFile('zpt/addMetaDataFolderForm',globals()) | 232 manage_addMetaDataFolderForm = PageTemplateFile('zpt/addMetadataFolderForm',globals()) |
213 | 233 |
214 def manage_addMetaDataFolder(self,id,title,RESPONSE=None): | 234 def manage_addMetaDataFolder(self,id,title,RESPONSE=None): |
215 """a MetaDataFolder objekt""" | 235 """a MetaDataFolder objekt""" |
216 newObj=MetaDataFolder(id,title) | 236 newObj=MetaDataFolder(id,title) |
217 self.Destination()._setObject(id,newObj) | 237 self.Destination()._setObject(id,newObj) |