Mercurial > hg > MetaDataProvider
annotate MetaDataFolder.py @ 28:40508e672841
richer metadata for bib. still not sufficient though.
| author | casties |
|---|---|
| date | Mon, 20 Aug 2012 20:14:45 +0200 |
| parents | a0d273542509 |
| children | b3428e281ee2 |
| rev | line source |
|---|---|
| 4 | 1 from OFS.Folder import Folder |
| 2 from Products.PageTemplates.PageTemplateFile import PageTemplateFile | |
| 3 from Globals import package_home | |
| 4 from AccessControl import ClassSecurityInfo | |
| 5 import os.path | |
| 9 | 6 import urlparse |
| 4 | 7 import logging |
| 8 | |
| 12 | 9 import xml.etree.ElementTree as ET |
| 10 | |
| 4 | 11 from MetaDataMapping import MetaDataMapping |
| 12 from MetaData import MetaData | |
| 12 | 13 from SrvTxtUtils import getHttpData, getText |
| 4 | 14 |
| 15 def normalizeBibField(bt, underscore=True): | |
| 16 """returns normalised bib type for looking up mappings""" | |
| 17 bt = bt.strip().replace(' ', '-').lower() | |
| 18 if underscore: | |
| 19 bt = bt.replace('_', '-') | |
| 20 | |
| 21 return bt | |
| 22 | |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
23 def toString(list): |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
24 ret=u"" |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
25 |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
26 for l in list: |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
27 ret+=unicode(l) |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
28 |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
29 return ret |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
30 |
| 4 | 31 class MetaDataFolder(Folder): |
| 32 """provides methods for managing complete metadata structures""" | |
| 33 meta_type='MetaDataFolder' | |
| 34 security=ClassSecurityInfo() | |
| 35 manage_options = Folder.manage_options+( | |
| 5 | 36 {'label':'Main Config','action':'changeMetaDataFolderForm'}, |
| 4 | 37 ) |
| 38 | |
| 12 | 39 metaDataServerUrl = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=%s" |
| 40 """URL of metadata server. %s replaced by file path.""" | |
| 41 | |
| 42 def __init__(self,id,title='',metaDataServerUrl=None): | |
| 4 | 43 """initialize a new instance""" |
| 44 self.id = id | |
| 45 self.title = title | |
| 12 | 46 if metaDataServerUrl: |
| 47 self.metaDataServerUrl = metaDataServerUrl | |
| 4 | 48 |
| 49 | |
| 50 def getMDFromPathOrUrl(self,path): | |
| 10 | 51 """returns contents of metadata file from server as text""" |
| 52 if not path: | |
| 53 logging.error("getMDFromPathOrUrl: empty path!") | |
| 12 | 54 return None |
| 10 | 55 |
| 4 | 56 parsedurl = urlparse.urlparse(path) |
| 57 if parsedurl[0] != "": | |
| 58 # has schema (e.g. http) | |
| 59 url=path | |
| 60 else: | |
| 61 # path only | |
| 8 | 62 path = path.replace('/mpiwg/online/', '') |
| 4 | 63 if path.endswith("index.meta"): |
| 8 | 64 url = self.metaDataServerUrl%path |
| 4 | 65 else: |
| 8 | 66 url = os.path.join(self.metaDataServerUrl%path,'index.meta') |
| 4 | 67 |
|
23
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
68 try: |
|
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
69 md = getHttpData(url) |
|
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
70 return md |
|
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
71 |
|
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
72 except: |
|
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
73 logging.error("getMDFromPathOrUrl: unable to get data!") |
|
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
74 |
|
5ed16f971297
getDomFromPathOrUrl does not throw exception but returns None.
casties
parents:
20
diff
changeset
|
75 return None |
| 4 | 76 |
| 12 | 77 def getDomFromPathOrUrl(self, path): |
| 78 """returns DOM of metadata file at given path""" | |
| 79 dom = None | |
| 80 data = self.getMDFromPathOrUrl(path) | |
| 81 if data: | |
| 82 dom = ET.fromstring(data) | |
| 83 | |
| 84 return dom | |
| 85 | |
| 14 | 86 def getXmlPathObj(self, xmlpath): |
| 87 """returns object at xmlpath""" | |
| 9 | 88 # make xmlpath relative for Zope |
| 89 if xmlpath[0] == '/': | |
| 90 xmlpath = xmlpath[1:] | |
| 91 | |
| 14 | 92 obj = self.restrictedTraverse(xmlpath, None) |
| 93 return obj | |
| 94 | |
| 17 | 95 def getXmlPathData(self, xmlpath, path=None, dom=None, recursive=0, all=False, allText=False): |
| 14 | 96 """returns contents of element at xmlpath as dict""" |
| 97 logging.error("getXmlPathData(%s)"%xmlpath) | |
| 98 mdObj = self.getXmlPathObj(xmlpath) | |
| 9 | 99 if mdObj is not None: |
| 17 | 100 return mdObj.getData(path=path, dom=dom, recursive=recursive, all=all, allText=allText) |
| 9 | 101 else: |
| 27 | 102 logging.error("getXmlPathData: MetaData object for '%s' not found!"%xmlpath) |
| 9 | 103 return None |
| 104 | |
| 17 | 105 def getXmlPathFormatted(self, xmlpath, template, path=None, dom=None, data=None, allFields=False, recursive=0, all=False, allText=False): |
| 14 | 106 """returns contents of element at xmlpath as dict""" |
| 107 logging.error("getXmlPathFormatted(xmlpath=%s, template=%s)"%(xmlpath,template)) | |
| 108 mdObj = self.getXmlPathObj(xmlpath) | |
| 109 if mdObj is not None: | |
| 110 if data is None: | |
| 17 | 111 data = mdObj.getData(path=path, dom=dom, recursive=recursive, all=all, allText=allText) |
| 14 | 112 |
| 113 if data is None: | |
| 114 return '' | |
| 115 | |
| 116 fmt = '' | |
| 15 | 117 if all: |
| 14 | 118 # data is list of elements |
| 119 for d in data: | |
| 120 # concatenate formatted strings | |
| 121 fmt += mdObj.getFormatted(template, path=path, dom=dom, data=d, allFields=allFields) + '\n' | |
| 122 | |
| 123 else: | |
| 124 fmt = mdObj.getFormatted(template, path=path, dom=dom, data=data, allFields=allFields) | |
| 125 | |
| 126 return fmt | |
| 127 | |
| 128 else: | |
| 27 | 129 logging.error("getXmlPathFormatted: MetaData object for '%s' not found!"%xmlpath) |
| 14 | 130 return '' |
| 131 | |
| 12 | 132 def getResourceData(self, path=None, dom=None): |
| 133 """returns contents of resource tag as dict""" | |
| 134 return self.getXmlPathData('resource', path=path, dom=dom) | |
| 135 | |
| 8 | 136 def getTexttoolData(self, path=None, dom=None): |
| 137 """returns contents of texttool tag as dict""" | |
| 9 | 138 return self.getXmlPathData('resource/meta/texttool', path=path, dom=dom) |
| 8 | 139 |
| 12 | 140 def getAccessData(self, path=None, dom=None): |
| 141 """returns contents of access tag as dict""" | |
| 142 return self.getXmlPathData('resource/meta/access-conditions/access', path=path, dom=dom) | |
| 143 | |
| 15 | 144 def getAttributionData(self, path=None, dom=None, all=True): |
| 14 | 145 """returns contents of attribution tag as dict""" |
| 15 | 146 return self.getXmlPathData('resource/meta/access-conditions/attribution', path=path, dom=dom, all=all) |
| 14 | 147 |
| 15 | 148 def getAttributionFormatted(self, template, path=None, dom=None, data=None, all=True): |
| 14 | 149 """returns formatted contents of access tag""" |
| 15 | 150 return self.getXmlPathFormatted('resource/meta/access-conditions/attribution', template, path=path, dom=dom, data=data, all=all) |
| 14 | 151 |
| 17 | 152 def getCopyrightData(self, path=None, dom=None, recursive=1, all=True): |
| 14 | 153 """returns contents of copyright tag as dict""" |
| 18 | 154 # information is two tags deep - recursive=1 |
| 17 | 155 return self.getXmlPathData('resource/meta/access-conditions/copyright', path=path, dom=dom, recursive=recursive, all=all) |
| 14 | 156 |
| 18 | 157 def getCopyrightFormatted(self, template, path=None, dom=None, data=None, recursive=1, all=True): |
| 14 | 158 """returns formatted contents of access tag""" |
| 18 | 159 # information is two tags deep - recursive=1 |
| 160 return self.getXmlPathFormatted('resource/meta/access-conditions/copyright', template, path=path, dom=dom, data=data, recursive=recursive, all=all) | |
| 14 | 161 |
| 20 | 162 def getContextData(self, path=None, dom=None, all=True): |
| 163 """returns contents of context tag as dict""" | |
| 164 # information is two tags deep - recursive=1 | |
| 165 return self.getXmlPathData('resource/meta/context', path=path, dom=dom, all=all) | |
| 166 | |
| 167 | |
| 26 | 168 def getDRI(self, path=None, dom=None, type="escidoc"): |
| 24 | 169 """returns the DRI of the document""" |
| 26 | 170 dris = self.getXmlPathData('resource/meta/dri', path=path, dom=dom, all=True) |
| 27 | 171 #logging.debug("getDRI: dris=%s"%repr(dris)) |
| 26 | 172 if dris is None: |
| 173 return None | |
| 174 | |
| 175 for dri in dris: | |
| 27 | 176 #logging.debug("getDRI: dri=%s"%dri) |
| 177 if type is None: | |
| 178 # no type -- take the first one | |
| 179 return dri.get('@text', None) | |
| 180 | |
| 181 # use element with matching @type | |
| 26 | 182 att = dri.get('@attr', None) |
| 183 if att is not None: | |
| 184 if (att.get('type', None) == type): | |
| 185 return dri.get('@text', None) | |
| 186 | |
| 187 return None | |
| 27 | 188 |
| 24 | 189 |
|
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
190 def getBibData(self, path=None, dom=None, all=False, recursive=0): |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
191 """returns contents of bib tag as dict""" |
|
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
192 return self.resource.meta.bib.getData(path=path, dom=dom, all=all, recursive=recursive) |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
193 |
| 4 | 194 def getBibMapping(self, bibtype): |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
195 """returns MetaDataMapping for resource/meta/bib of bibtype""" |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
196 return self.resource.meta.bib.getMapping(bibtype) |
| 4 | 197 |
| 198 def getBibFields(self, bibdata): | |
| 199 """returns dict with metadata description for bibdata""" | |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
200 return self.resource.meta.bib.getMapFields(bibdata) |
| 4 | 201 |
| 202 def getBibMappedData(self, bibdata, allFields=False): | |
| 203 """returns dict with metadata descriptions and data for bibdata""" | |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
204 return self.resource.meta.bib.getMappedData(bibdata, allFields=allFields) |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
205 |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
206 def getDCMappedData(self, bibdata): |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
207 """returns dict with DC keys and data from bibdata""" |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
208 return self.resource.meta.bib.getDCMappedData(bibdata) |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
209 |
|
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
210 def getBibFormattedMetaData(self, path=None, dom=None, bibdata=None, bibxdata=None): |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
211 """get the metadafrom server""" |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
212 logging.debug("getBibFormattedMetaData(path=%s)"%path) |
|
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
213 return self.resource.meta.bib.getFormatted('metadata_template', path=path, dom=dom, data=bibdata, xdata=bibxdata) |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
214 |
|
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
215 def getBibFormattedMetaDataExtended(self,path=None, dom=None, bibdata=None, bibxdata=None): |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
216 """get the metadafrom server""" |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
217 logging.debug("getBibFormattedMetaDataExtended(path=%s)"%path) |
|
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
218 return self.resource.meta.bib.getFormatted('metadata_extended_template', path=path, dom=dom, data=bibdata, xdata=bibxdata, allFields=True) |
| 4 | 219 |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
220 def getBibFormattedLabel(self,path=None, dom=None, bibdata=None): |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
221 """get the metadafrom server""" |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
222 logging.debug("getBibFormattedLabel(%s)"%path) |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
223 return self.resource.meta.bib.getFormatted('label_template', path=path, dom=dom, data=bibdata) |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
224 |
| 8 | 225 # compatibility |
|
6
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
226 getFormattedMetaData = getBibFormattedMetaData |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
227 getFormattedMetaDataShort = getBibFormattedMetaData |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
228 getFormattedMetaDataExtended = getBibFormattedMetaDataExtended |
|
00147a1ab4ac
division between MetaDataFolder and Metadata looks good now (to me :-)
casties
parents:
5
diff
changeset
|
229 getFormattedLabel = getBibFormattedLabel |
| 8 | 230 |
|
28
40508e672841
richer metadata for bib. still not sufficient though.
casties
parents:
27
diff
changeset
|
231 |
| 4 | 232 changeMetaDataFolderForm = PageTemplateFile('zpt/changeMetaDataFolder',globals()) |
| 233 | |
| 234 security.declarePublic('changeMetaDataFolder') | |
| 235 def changeMetaDataFolder(self,title,metaDataServerUrl,RESPONSE=None): | |
| 236 """Change MetaDataFolder""" | |
| 237 self.title = title | |
| 238 self.metaDataServerUrl=metaDataServerUrl | |
| 239 if RESPONSE is not None: | |
| 240 RESPONSE.redirect('manage_main') | |
| 241 | |
| 242 | |
| 9 | 243 def manage_addMetaDataFolderForm(self): |
| 244 """add MetaDataFolder form""" | |
| 245 pt = PageTemplateFile('zpt/addMetadataFolderForm',globals()).__of__(self) | |
| 246 return pt() | |
| 4 | 247 |
| 248 def manage_addMetaDataFolder(self,id,title,RESPONSE=None): | |
| 249 """a MetaDataFolder objekt""" | |
| 250 newObj=MetaDataFolder(id,title) | |
| 251 self.Destination()._setObject(id,newObj) | |
| 252 if RESPONSE is not None: | |
| 253 RESPONSE.redirect('manage_main') | |
| 254 |
