Mercurial > hg > MetaDataProvider
view MetaData.py @ 31:ab58edfc0707
more parameters and more comments.
author | casties |
---|---|
date | Mon, 01 Oct 2012 18:17:29 +0200 |
parents | 1cb439acd1e1 |
children | a0a147409f67 |
line wrap: on
line source
from OFS.Folder import Folder from Products.PageTemplates.PageTemplateFile import PageTemplateFile from Globals import package_home from AccessControl import ClassSecurityInfo import logging import xml.etree.ElementTree as ET import xml.sax.saxutils from MetaDataMapping import MetaDataMapping from SrvTxtUtils import getHttpData, getText def normalizeFieldName(bt, underscore=True): """returns normalised field type for looking up mappings""" bt = bt.strip().replace(' ', '-').lower() if underscore: bt = bt.replace('_', '-') return bt def putAppend(hash, key, value): """puts value in dict hash at key if it doesn't exist or adds value to a list""" #logging.debug("putAppend(hash=%s, key=%s, value=%s)"%(hash,key,value)) if key in hash: # key exists oldval = hash[key] if isinstance(oldval, list): # is list already - append oldval.append(value) else: # needs list val = [oldval, value] hash[key] = val else: # key doesn't exist hash[key] = value #logging.debug("putAppend returns hash=%s"%(hash)) return hash class MetaData(Folder): """provides basic methods for managing metadata structures""" meta_type='MetaData' security=ClassSecurityInfo() manage_options = Folder.manage_options+( {'label':'Main Config','action':'changeMetaDataForm'}, {'label':'Import XML Schema','action':'importMetaDataExportXML'}, #{'label':'Select Fields for Display','action':'indicateDisplayFieldsForm'}, ) mappingSelectAttribute = None """the name of the attribute that can be used to select a mapping (if applicable)""" def __init__(self,id,title=None,shortDescription='',description='',fields=''): """initialize a new instance""" self.id = id # title is tag name if title: self.title = title else: # assume id is tag name self.title = id self.shortDescription =shortDescription #label fuer link auf add page self.description=description #description of the method for link page if fields: self.fieldList=fields.split(",")[0:] #self.metaDataServerUrl="" # muss mit change metadata gesetzt werden def getFieldList(self): """returns fieldList""" return ','.join(self.fieldList) def getTagName(self): """returns the tag name of this element""" if self.title: return self.title else: return self.shortDescription def getXmlPath(self, omitRoot=False): """returns the xpath to this element""" path = '/%s'%self.getTagName() parent = self.aq_parent if parent.meta_type == self.meta_type: # add parent path = parent.getXmlPath(omitRoot=omitRoot) + path elif omitRoot: return '' return path def getSubDom(self, path=None, dom=None, all=False): """returns the subtree (list) of the dom rooted in this element""" if dom is None: # get from server dom = self.getDomFromPathOrUrl(path) # ElementTree doesn't like absolute paths # lets assume dom is rooted in the first element xpath = '.' + self.getXmlPath(omitRoot=True) logging.debug("getSubDom looking for %s in %s"%(xpath, dom)) if all: elem = dom.findall(xpath) else: elem = dom.find(xpath) return elem def _getData(self, elem, recursive, normalizeNames=False, all=False, allText=False): """helper method for getData()""" #logging.debug("_getDataFromDom(dom=%s, recursive=%s)"%(elem,recursive)) data = {} attr = {} # put attributes in @attr for attname in elem.keys(): attr[attname] = elem.get(attname) if attr: data['@attr'] = attr # put text in @text if elem.text: data['@text'] = elem.text for e in elem: # put all child elements in data if normalizeNames: # normalize key names key = normalizeFieldName(e.tag) else: key = e.tag if recursive > 0: # more recursive - call _getData on element val = self._getData(e, recursive=recursive-1, normalizeNames=normalizeNames, all=all, allText=allText) else: val = getText(e, recursive=allText) if all: # add multiple tags as list putAppend(data, key, val) else: data[key] = val #logging.debug("_getDataFromDom: returns %s"%repr(data)) return data def getData(self, path=None, dom=None, normalizeNames=True, all=False, recursive=0, allText=0): """Returns dict with attributes and child elements from corresponding tag. @param path: file or url path to metadata file @param dom: dom of metadata @param normalizeNames: normalize tag names @param all: put contents of tags with the same name in list value @param recursive: number of recursions. 0=just children @param allText: get text content of all subelements @returns: dict with attributes (key=@attr) and child elements (key=tag) """ #logging.debug("getData(path=%s, dom=%s)"%(path,dom)) if path is None and dom is None: return None elem = self.getSubDom(path=path, dom=dom, all=all) if elem is None: return None if all: # subdom is list - return list data = [] for e in elem: data.append(self._getData(e, recursive=recursive, normalizeNames=normalizeNames, all=all, allText=allText)) else: # subdom is element data = self._getData(elem, recursive=recursive, normalizeNames=normalizeNames, all=all, allText=allText) if self.mappingSelectAttribute: # put type in @type if all: attr = data[0].get('@attr', None) else: attr = data.get('@attr', None) if attr: type = attr.get(self.mappingSelectAttribute, None) if type is not None: if all: data[0]['@type'] = normalizeFieldName(type) else: data['@type'] = normalizeFieldName(type) return data def getDCDataFromPath(self,path): """returns DC mapped data from path to index.meta""" data=self.getData(path=path) ret="""<bib xmlns:dc="http://dublincore.org/documents/dcmi-namespace/"> """ for key,value in self.getDCMappedData(data).items(): ret+="""<dc:%s>%s</dc:%s>"""%(key,xml.sax.saxutils.escape(value),key) ret+="</bib>" REQUEST=self.REQUEST if REQUEST is not None: origin = REQUEST.getHeader('Origin') if origin: REQUEST.response.setHeader('Access-Control-Allow-Origin', origin) REQUEST.response.setHeader('Access-Control-Allow-Credentials', 'true') else: REQUEST.response.setHeader('Access-Control-Allow-Origin', '*') REQUEST.response.setHeader('Access-Control-Allow-Methods', 'GET, PUT, POST, DELETE, OPTIONS, HEAD') REQUEST.response.setHeader('Access-Control-Allow-Headers', 'Origin, Content-Type, Cache-Control, X-Requested-With') return ret def getMapping(self, type): """returns MetaDataMapping for type""" # try type as id mapping = getattr(self, type, None) if mapping is None: # try manually mapFolder = self for obj in mapFolder.objectValues(): if obj.meta_type == "MetadataMapping": # real type is in title mapType = obj.title if mapType == type: # try type as is return obj if normalizeFieldName(mapType, underscore=True) == normalizeFieldName(type, underscore=True): # try normalized type without underscore return obj return mapping def getMapFields(self, data): """returns dict with metadata description for data""" fields = {} type = data.get('@type', None) if not type: logging.error("getMapFields: no @type!") return fields # get mapping from main/meta/bib mapping = self.getMapping(type) if mapping is None: logging.error("getMapFields: no mapping for type: %s"%type) return fields # get field descriptions (copy so we can change it) fields = mapping.getFields().copy() # add field list fields['@fieldList'] = mapping.getFieldList() return fields def getMappedData(self, data, allFields=False): """returns dict with metadata descriptions and data for data""" fields = self.getMapFields(data) fieldList = fields['@fieldList'] mappedData = {} mappedList = [] for bk in fieldList: # ignore descriptions without data if not data.get(bk, None): continue # field description (copy so we can change it) bf = fields[bk].copy() # add value bf['value'] = data[bk] mappedData[bk] = bf mappedList.append(bk) if allFields and len(mappedData) < len(data): # add fields that were not in fields for bk in data.keys(): if bk in mappedData or not data[bk] or bk[0]=='@': continue mappedData[bk] = {'tag':bk, 'label':bk, 'value':data[bk]} mappedList.append(bk) mappedData['@fieldList'] = mappedList mappedData['@type'] = data.get('@type',None) return mappedData def getDCMappedData(self, data, allFields=False): """returns dict with DC keys and data form data""" fields = self.getMapFields(data) dcData = {} for bk in fields.keys(): # ignore descriptions without data if not data.get(bk, None): continue # field description dc = fields[bk].get('dcmap', None) if dc: # add value if dcData.get('dc', None): # key exists - append dcData[dc] += '/' + data[bk] else: dcData[dc] = data[bk] return dcData def getFormatted(self, template, path=None, dom=None, data=None, xdata=None, allFields=False): """returns string with document data formatted according to template. gets data from server or dom or pre-parsed data.""" logging.debug("getFormatted(template=%s)"%(template)) # get contents of tag if data is None: data = self.getData(path=path, dom=dom) if data is None: # no data logging.error("getFormatted: no data for template: %s"%(template)) return "" type = data.get('@type', '') # get template tp=getattr(self,"%s_%s"%(template, normalizeFieldName(type)), None) if tp is None: logging.warning("getFormatted: no template for: %s_%s!"%(template, normalizeFieldName(type))) # try generic tp=getattr(self,"%s_generic"%(template), None) if tp is None: logging.error("getFormatted: no generic template either: %s"%(template)) return "" if type: # put field descriptions in mdHash fields = self.getMappedData(data, allFields=allFields) else: fields = {} #logging.debug(tp) return tp(mdmap=fields, md=data, mdx=xdata) def correctPath(self,path,remove=None,prefix=None,cut=0): """convinience method um einen pfad zu veraendern""" if remove is not None: path=path.replace(remove,'') if prefix is not None: path=os.path.join(prefix,path) if cut>0: splitted=path.split("/") path="/".join(splitted[0:len(splitted)-cut]) return path def importMetaDataExportXML(self,importFile=None,RESPONSE=None): """imports metadata from the metadataexportxml file""" if importFile is None: pt=PageTemplateFile('zpt/importMetaDataExportXML', globals()).__of__(self) return pt() dom=ET.parse(importFile) node = dom.getroot() if node.tag != 'metadataExport': node = dom.find("metadataExport") self.createMappingFromDom(node) if RESPONSE is not None: RESPONSE.redirect('manage_main') def createMappingFromDom(self,metadatanode,metadata=None): """erzeuge ein Mapping aus dem der metadatanode des xmlformats, metadata ist ein metadataobject""" if metadata is None: metadata=self nodes=metadatanode for node in nodes: logging.debug("node: %s"%repr(node)) if node.tag=="set": set=node id=set.get('name') list=[] argList={} for entry in set: genericName=entry.get('genericName') if set.get('name')=='generic': # generic mapping doesn't have labels tag = genericName label = genericName else: tag=entry.get('tag') label=entry.get('label') if not tag: # ignore empty tags continue description=getText(entry) argList[tag]={'tag':tag,'label':label,'explanation':description,'status':'optional'} logging.debug("createMappingFromDom: new mapping=%s"%repr(argList)) metadata._setObject(id,MetaDataMapping(id,id,argList)) elif node.tag=="metadata": mn=node name=mn.get('name') logging.debug("createMappingFromDom: new metadata=%s"%repr(name)) metadata._setObject(name,MetaData(name,name)) mdObj=getattr(metadata,name) mdObj.createMappingFromDom(mn) security.declarePublic('changeMetaDataForm') changeMetaDataForm = PageTemplateFile('zpt/changeMetadata', globals()) security.declarePublic('changeMetaData') def changeMetaData(self,title=None,shortDescription=None,description=None,mappingSelectAttribute=None,fields=None,metaDataServerUrl=None,RESPONSE=None): """Change Metadata""" self.title = title self.shortDescription=shortDescription self.description=description self.mappingSelectAttribute=mappingSelectAttribute if fields: self.fieldList=fields.split(",")[0:] self.metaDataServerUrl=metaDataServerUrl if RESPONSE is not None: RESPONSE.redirect('manage_main') def manage_addMetaDataForm(self): """interface for adding the Metadata""" pt=PageTemplateFile('zpt/addMetadataForm', globals()).__of__(self) return pt() def manage_addMetaData(self,id,title=None,shortDescription=None,description=None,fields=None,RESPONSE=None): """a metadata objekt""" newObj = MetaData(id,title=title,shortDescription=shortDescription,description=description,fields=fields) self.Destination()._setObject(id,newObj) if RESPONSE is not None: RESPONSE.redirect('manage_main')