Mercurial > hg > MetaDataProvider
view MetaData.py @ 38:67115536b7ec default tip
DC as JSON export added
author | dwinter |
---|---|
date | Thu, 22 May 2014 12:09:20 +0200 |
parents | f6a8055f6798 |
children |
line wrap: on
line source
from OFS.Folder import Folder from Products.PageTemplates.PageTemplateFile import PageTemplateFile from AccessControl import ClassSecurityInfo import logging import xml.etree.ElementTree as ET import xml.sax.saxutils from MetaDataMapping import MetaDataMapping from SrvTxtUtils import getText def normalizeFieldName(bt, underscore=True): """returns normalised field type for looking up mappings""" bt = bt.strip().replace(' ', '-').lower() if underscore: bt = bt.replace('_', '-') return bt def putAppend(hash, key, value): """puts value in dict hash at key if it doesn't exist or adds value to a list""" #logging.debug("putAppend(hash=%s, key=%s, value=%s)"%(hash,key,value)) if key in hash: # key exists oldval = hash[key] if isinstance(oldval, list): # is list already - append oldval.append(value) else: # needs list val = [oldval, value] hash[key] = val else: # key doesn't exist hash[key] = value #logging.debug("putAppend returns hash=%s"%(hash)) return hash class MetaData(Folder): """provides basic methods for managing metadata structures""" meta_type='MetaData' security=ClassSecurityInfo() manage_options = Folder.manage_options+( {'label':'Main Config','action':'changeMetaDataForm'}, {'label':'Import XML Schema','action':'importMetaDataExportXML'}, #{'label':'Select Fields for Display','action':'indicateDisplayFieldsForm'}, ) mappingSelectAttribute = None """the name of the attribute that can be used to select a mapping (if applicable)""" def __init__(self,id,title=None,shortDescription='',description='',fields=''): """initialize a new instance""" self.id = id # title is tag name if title: self.title = title else: # assume id is tag name self.title = id self.shortDescription =shortDescription #label fuer link auf add page self.description=description #description of the method for link page if fields: self.fieldList=fields.split(",")[0:] #self.metaDataServerUrl="" # muss mit change metadata gesetzt werden def getFieldList(self): """returns fieldList""" return ','.join(self.fieldList) def getTagName(self): """returns the tag name of this element""" if self.title: return self.title else: return self.shortDescription def getXmlPath(self, omitRoot=False): """returns the xpath to this element""" path = '/%s'%self.getTagName() parent = self.aq_parent if parent.meta_type == self.meta_type: # add parent path = parent.getXmlPath(omitRoot=omitRoot) + path elif omitRoot: return '' return path def getSubDom(self, path=None, dom=None, all=False): """returns the subtree (list) of the dom rooted in this element""" if dom is None: # get from server dom = self.getDomFromPathOrUrl(path) # ElementTree doesn't like absolute paths # lets assume dom is rooted in the first element xpath = '.' + self.getXmlPath(omitRoot=True) logging.debug("getSubDom looking for %s in %s"%(xpath, dom)) if all: elem = dom.findall(xpath) else: elem = dom.find(xpath) return elem def _getData(self, elem, recursive, normalizeNames=False, all=False, allText=False): """helper method for getData()""" #logging.debug("_getDataFromDom(dom=%s, recursive=%s)"%(elem,recursive)) data = {} attr = {} # put attributes in @attr for attname in elem.keys(): attr[attname] = elem.get(attname) if attr: data['@attr'] = attr # put text in @text if elem.text: data['@text'] = elem.text for e in elem: # put all child elements in data if normalizeNames: # normalize key names key = normalizeFieldName(e.tag) else: key = e.tag if recursive > 0: # more recursive - call _getData on element val = self._getData(e, recursive=recursive-1, normalizeNames=normalizeNames, all=all, allText=allText) else: val = getText(e, recursive=allText) if all: # add multiple tags as list putAppend(data, key, val) else: if key not in data: # don't overwrite element data data[key] = val #logging.debug("_getDataFromDom: returns %s"%repr(data)) return data def getData(self, path=None, dom=None, normalizeNames=True, all=False, recursive=0, allText=0): """Returns dict with attributes and child elements from corresponding tag. @param path: file or url path to metadata file @param dom: dom of metadata @param normalizeNames: normalize tag names @param all: put contents of tags with the same name in list value @param recursive: number of recursions. 0=just children @param allText: get text content of all subelements @returns: dict with attributes (key=@attr) and child elements (key=tag) """ #logging.debug("getData(path=%s, dom=%s)"%(path,dom)) if path is None and dom is None: return None elem = self.getSubDom(path=path, dom=dom, all=all) if elem is None: return None if all: # subdom is list - return list data = [] for e in elem: data.append(self._getData(e, recursive=recursive, normalizeNames=normalizeNames, all=all, allText=allText)) else: # subdom is element data = self._getData(elem, recursive=recursive, normalizeNames=normalizeNames, all=all, allText=allText) if self.mappingSelectAttribute: # put type in @type if all: attr = data[0].get('@attr', None) else: attr = data.get('@attr', None) if attr: type = attr.get(self.mappingSelectAttribute, None) if type is not None: if all: data[0]['@type'] = normalizeFieldName(type) else: data['@type'] = normalizeFieldName(type) return data def getDCDataFromPath(self,path): """returns DC mapped data from path to index.meta""" data=self.getData(path=path) ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """ for key,value in self.getDCMappedData(data).items(): ret+="""<dc:%s>%s</dc:%s>"""%(key,xml.sax.saxutils.escape(value),key) ret+="</bib>" REQUEST=self.REQUEST if REQUEST is not None: origin = REQUEST.getHeader('Origin') if origin: REQUEST.response.setHeader('Access-Control-Allow-Origin', origin) REQUEST.response.setHeader('Access-Control-Allow-Credentials', 'true') else: REQUEST.response.setHeader('Access-Control-Allow-Origin', '*') REQUEST.response.setHeader('Access-Control-Allow-Methods', 'GET, PUT, POST, DELETE, OPTIONS, HEAD') REQUEST.response.setHeader('Access-Control-Allow-Headers', 'Origin, Content-Type, Cache-Control, X-Requested-With') return ret def getMapping(self, type): """returns MetaDataMapping for type""" # try type as id mapping = getattr(self, type, None) if mapping is None: # try manually mapFolder = self for obj in mapFolder.objectValues(): if obj.meta_type == "MetadataMapping": # real type is in title mapType = obj.title if mapType == type: # try type as is return obj if normalizeFieldName(mapType, underscore=True) == normalizeFieldName(type, underscore=True): # try normalized type without underscore return obj return mapping def getMapFields(self, data): """returns dict with metadata description for data""" fields = {} type = data.get('@type', None) if not type: logging.error("getMapFields: no @type!") return fields # get mapping from main/meta/bib mapping = self.getMapping(type) if mapping is None: logging.error("getMapFields: no mapping for type: %s"%type) return fields # get field descriptions (copy so we can change it) fields = mapping.getFields().copy() # add field list fields['@fieldList'] = mapping.getFieldList() return fields def getMappedData(self, data, allFields=False): """returns dict with metadata descriptions and data for data""" fields = self.getMapFields(data) fieldList = fields.get('@fieldList', []) mappedData = {} mappedList = [] for bk in fieldList: # ignore descriptions without data if not data.get(bk, None): continue # field description (copy so we can change it) bf = fields[bk].copy() # add value bf['value'] = data[bk] mappedData[bk] = bf mappedList.append(bk) if allFields and len(mappedData) < len(data): # add fields that were not in fields for bk in data.keys(): if bk in mappedData or not data[bk] or bk[0]=='@': continue mappedData[bk] = {'tag':bk, 'label':bk, 'value':data[bk]} mappedList.append(bk) mappedData['@fieldList'] = mappedList mappedData['@type'] = data.get('@type', None) return mappedData def getDCMappedData(self, data, allFields=False): """returns dict with DC keys and data from data""" fields = self.getMapFields(data) dcData = {} for bk in fields.keys(): # ignore descriptions without data if not data.get(bk, None): continue # field description dc = fields[bk].get('dcmap', None) if dc: # add value if dcData.get('dc', None): # key exists - append dcData[dc] += '/' + data[bk] else: dcData[dc] = data[bk] return dcData def getFormatted(self, template, path=None, dom=None, data=None, xdata=None, allFields=False): """returns string with document data formatted according to template. gets data from server or dom or pre-parsed data.""" logging.debug("getFormatted(template=%s, allFields=%s)"%(template,allFields)) # get contents of tag if data is None: data = self.getData(path=path, dom=dom) if data is None: # no data logging.error("getFormatted: no data for template: %s"%(template)) return "" type = data.get('@type', '') # get template tp=getattr(self,"%s_%s"%(template, normalizeFieldName(type)), None) if tp is None: logging.warning("getFormatted: no template for: %s_%s!"%(template, normalizeFieldName(type))) # try generic tp=getattr(self,"%s_generic"%(template), None) if tp is None: logging.error("getFormatted: no generic template either: %s"%(template)) return "" if type: # put field descriptions in mdHash fields = self.getMappedData(data, allFields=allFields) else: fields = {} #logging.debug("getFormatted: md=%s"%(repr(data))) return tp(mdmap=fields, md=data, mdx=xdata) def correctPath(self,path,remove=None,prefix=None,cut=0): """convenience method um einen pfad zu veraendern""" if remove is not None: path=path.replace(remove,'') if prefix is not None: path="%s/%s"%(prefix,path) if cut>0: splitted=path.split("/") path="/".join(splitted[0:len(splitted)-cut]) return path def importMetaDataExportXML(self,importFile=None,RESPONSE=None): """imports metadata from the metadataexportxml file""" if importFile is None: pt=PageTemplateFile('zpt/importMetaDataExportXML', globals()).__of__(self) return pt() dom=ET.parse(importFile) node = dom.getroot() if node.tag != 'metadataExport': node = dom.find("metadataExport") self.createMappingFromDom(node) if RESPONSE is not None: RESPONSE.redirect('manage_main') def createMappingFromDom(self,metadatanode,metadata=None): """erzeuge ein Mapping aus dem der metadatanode des xmlformats, metadata ist ein metadataobject""" if metadata is None: metadata=self nodes=metadatanode for node in nodes: logging.debug("node: %s"%repr(node)) if node.tag=="set": set=node id=set.get('name') list=[] argList={} for entry in set: genericName=entry.get('genericName') if set.get('name')=='generic': # generic mapping doesn't have labels tag = genericName label = genericName else: tag=entry.get('tag') label=entry.get('label') if not tag: # ignore empty tags continue description=getText(entry) argList[tag]={'tag':tag,'label':label,'explanation':description,'status':'optional'} logging.debug("createMappingFromDom: new mapping=%s"%repr(argList)) metadata._setObject(id,MetaDataMapping(id,id,argList)) elif node.tag=="metadata": mn=node name=mn.get('name') logging.debug("createMappingFromDom: new metadata=%s"%repr(name)) metadata._setObject(name,MetaData(name,name)) mdObj=getattr(metadata,name) mdObj.createMappingFromDom(mn) security.declarePublic('changeMetaDataForm') changeMetaDataForm = PageTemplateFile('zpt/changeMetadata', globals()) security.declarePublic('changeMetaData') def changeMetaData(self,title=None,shortDescription=None,description=None,mappingSelectAttribute=None,fields=None,metaDataServerUrl=None,RESPONSE=None): """Change Metadata""" self.title = title self.shortDescription=shortDescription self.description=description self.mappingSelectAttribute=mappingSelectAttribute if fields: self.fieldList=fields.split(",")[0:] self.metaDataServerUrl=metaDataServerUrl if RESPONSE is not None: RESPONSE.redirect('manage_main') def manage_addMetaDataForm(self): """interface for adding the Metadata""" pt=PageTemplateFile('zpt/addMetadataForm', globals()).__of__(self) return pt() def manage_addMetaData(self,id,title=None,shortDescription=None,description=None,fields=None,RESPONSE=None): """a metadata objekt""" newObj = MetaData(id,title=title,shortDescription=shortDescription,description=description,fields=fields) self.Destination()._setObject(id,newObj) if RESPONSE is not None: RESPONSE.redirect('manage_main')