Mercurial > hg > MetaDataProvider
view MetaData.py @ 25:64b703d1b8a4
Merge with d036de7fd78d45d9558dfccb88042cbec83d4b5e
author | casties |
---|---|
date | Fri, 27 Jul 2012 12:53:51 +0200 |
parents | d036de7fd78d |
children | a19575be96e8 |
line wrap: on
line source
from OFS.Folder import Folder from Products.PageTemplates.PageTemplateFile import PageTemplateFile from Globals import package_home from AccessControl import ClassSecurityInfo import logging import xml.etree.ElementTree as ET from MetaDataMapping import MetaDataMapping from SrvTxtUtils import getHttpData, getText def normalizeFieldName(bt, underscore=True): """returns normalised field type for looking up mappings""" bt = bt.strip().replace(' ', '-').lower() if underscore: bt = bt.replace('_', '-') return bt def putAppend(hash, key, value): """puts value in dict hash at key if it doesn't exist or adds value to a list""" #logging.debug("putAppend(hash=%s, key=%s, value=%s)"%(hash,key,value)) if key in hash: # key exists oldval = hash[key] if isinstance(oldval, list): # is list already - append oldval.append(value) else: # needs list val = [oldval, value] hash[key] = val else: # key doesn't exist hash[key] = value #logging.debug("putAppend returns hash=%s"%(hash)) return hash class MetaData(Folder): """provides basic methods for managing metadata structures""" meta_type='MetaData' security=ClassSecurityInfo() manage_options = Folder.manage_options+( {'label':'Main Config','action':'changeMetaDataForm'}, {'label':'Import XML Schema','action':'importMetaDataExportXML'}, #{'label':'Select Fields for Display','action':'indicateDisplayFieldsForm'}, ) mappingSelectAttribute = None """the name of the attribute that can be used to select a mapping (if applicable)""" def __init__(self,id,title=None,shortDescription='',description='',fields=''): """initialize a new instance""" self.id = id # title is tag name if title: self.title = title else: # assume id is tag name self.title = id self.shortDescription =shortDescription #label fuer link auf add page self.description=description #description of the method for link page if fields: self.fieldList=fields.split(",")[0:] #self.metaDataServerUrl="" # muss mit change metadata gesetzt werden def getFieldList(self): """returns fieldList""" return ','.join(self.fieldList) def getTagName(self): """returns the tag name of this element""" if self.title: return self.title else: return self.shortDescription def getXmlPath(self, omitRoot=False): """returns the xpath to this element""" path = '/%s'%self.getTagName() parent = self.aq_parent if parent.meta_type == self.meta_type: # add parent path = parent.getXmlPath(omitRoot=omitRoot) + path elif omitRoot: return '' return path def getSubDom(self, path=None, dom=None, all=False): """returns the subtree (list) of the dom rooted in this element""" if dom is None: # get from server dom = self.getDomFromPathOrUrl(path) # ElementTree doesn't like absolute paths # lets assume dom is rooted in the first element xpath = '.' + self.getXmlPath(omitRoot=True) logging.debug("getSubDom looking for %s in %s"%(xpath, dom)) if all: elem = dom.findall(xpath) else: elem = dom.find(xpath) return elem def _getData(self, elem, recursive, normalizeNames=False, all=False, allText=False): """helper method for getData()""" #logging.debug("_getDataFromDom(dom=%s, recursive=%s)"%(elem,recursive)) data = {} attr = {} # put attributes in @attr for attname in elem.keys(): attr[attname] = elem.get(attname) if attr: data['@attr'] = attr # put text in @text if elem.text: data['@text'] = elem.text for e in elem: # put all child elements in data if normalizeNames: # normalize key names key = normalizeFieldName(e.tag) else: key = e.tag if recursive > 0: # more recursive - call _getData on element val = self._getData(e, recursive=recursive-1, normalizeNames=normalizeNames, all=all, allText=allText) else: val = getText(e, recursive=allText) if all: # add multiple tags as list putAppend(data, key, val) else: data[key] = val #logging.debug("_getDataFromDom: returns %s"%repr(data)) return data def getDRI(self,path=None,type="escidoc",dom=None): """returns the DRI of an document, i.e. the content of the dri tag with the given type.""" #logging.debug("getData(path=%s, dom=%s)"%(path,dom)) if path is None and dom is None: return None if dom is None: # get from server dom = self.getDomFromPathOrUrl(path) # ElementTree doesn't like absolute paths # lets assume dom is rooted in the first element xpath = '.' + self.getXmlPath(omitRoot=True)+'/dri[@type="%s"]'%type logging.debug("getSubDom looking for %s in %s"%(xpath, dom)) elem = dom.find(xpath) if elem is None: return None return getText(elem) def getData(self, path=None, dom=None, normalizeNames=True, all=False, recursive=0, allText=0): """Returns dict with attributes and child elements from corresponding tag. @param path: file or url path to metadata file @param dom: dom of metadata @param normalizeNames: normalize tag names @param all: put contents of tags with the same name in list value @param recurse: number of recursions. 0=just children @param allText: get text content of all subelements @returns: dict with attributes (key=@attr) and child elements (key=tag) """ #logging.debug("getData(path=%s, dom=%s)"%(path,dom)) if path is None and dom is None: return None elem = self.getSubDom(path=path, dom=dom, all=all) if elem is None: return None if all: # subdom is list - return list data = [] for e in elem: data.append(self._getData(e, recursive=recursive, normalizeNames=normalizeNames, all=all, allText=allText)) else: # subdom is element data = self._getData(elem, recursive=recursive, normalizeNames=normalizeNames, all=all, allText=allText) if self.mappingSelectAttribute: # put type in @type attr = data.get('@attr', None) if attr: type = attr.get(self.mappingSelectAttribute, None) if type is not None: data['@type'] = normalizeFieldName(type) return data def getMapping(self, type): """returns MetaDataMapping for type""" # try type as id mapping = getattr(self, type, None) if mapping is None: # try manually mapFolder = self for obj in mapFolder.objectValues(): if obj.meta_type == "MetadataMapping": # real type is in title mapType = obj.title if mapType == type: # try type as is return obj if normalizeFieldName(mapType, underscore=True) == normalizeFieldName(type, underscore=True): # try normalized type without underscore return obj return mapping def getMapFields(self, data): """returns dict with metadata description for data""" fields = {} type = data.get('@type', None) if not type: logging.error("getMapFields: no @type!") return fields # get mapping from main/meta/bib mapping = self.getMapping(type) if mapping is None: logging.error("getMapFields: no mapping for type: %s"%type) return fields # get field descriptions (copy so we can change it) fields = mapping.getFields().copy() # add field list fields['@fieldList'] = mapping.getFieldList() return fields def getMappedData(self, data, allFields=False): """returns dict with metadata descriptions and data for data""" fields = self.getMapFields(data) fieldList = fields['@fieldList'] mappedData = {} mappedList = [] for bk in fieldList: # ignore descriptions without data if not data.get(bk, None): continue # field description (copy so we can change it) bf = fields[bk].copy() # add value bf['value'] = data[bk] mappedData[bk] = bf mappedList.append(bk) if allFields and len(mappedData) < len(data): # add fields that were not in fields for bk in data.keys(): if bk in mappedData or not data[bk] or bk[0]=='@': continue mappedData[bk] = {'tag':bk, 'label':bk, 'value':data[bk]} mappedList.append(bk) mappedData['@fieldList'] = mappedList mappedData['@type'] = data.get('@type',None) return mappedData def getDCMappedData(self, data, allFields=False): """returns dict with DC keys and data form data""" fields = self.getMapFields(data) dcData = {} for bk in fields.keys(): # ignore descriptions without data if not data.get(bk, None): continue # field description dc = fields[bk].get('dcmap', None) if dc: # add value if dcData.get('dc',None): # key exists - append dcData[dc] += '/' + data[bk] else: dcData[dc] = data[bk] return dcData def getFormatted(self, template, path=None, dom=None, data=None, allFields=False): """returns string with document data formatted according to template. gets data from server or dom or pre-parsed data.""" logging.debug("getFormatted(template=%s)"%(template)) logging.debug(self.absolute_url()) logging.debug(self.__dict__) # get contents of tag if data is None: data = self.getData(path=path, dom=dom) if data is None: # no data logging.error("getFormatted: no data for template: %s"%(template)) return "" type = data.get('@type', '') # get template tp=getattr(self,"%s_%s"%(template, normalizeFieldName(type)), None) if tp is None: logging.warning("getFormatted: no template for: %s_%s!"%(template, normalizeFieldName(type))) # try generic tp=getattr(self,"%s_generic"%(template), None) if tp is None: logging.error("getFormatted: no generic template either: %s"%(template)) return "" if type: # put field descriptions in mdHash fields = self.getMappedData(data, allFields=allFields) else: fields = {} logging.debug("XXXX") logging.debug(tp) return tp(mdmap=fields, md=data) def correctPath(self,path,remove=None,prefix=None,cut=0): """convinience method um einen pfad zu veraendern""" if remove is not None: path=path.replace(remove,'') if prefix is not None: path=os.path.join(prefix,path) if cut>0: splitted=path.split("/") path="/".join(splitted[0:len(splitted)-cut]) return path def importMetaDataExportXML(self,importFile=None,RESPONSE=None): """imports metadata from the metadataexportxml file""" if importFile is None: pt=PageTemplateFile('zpt/importMetaDataExportXML', globals()).__of__(self) return pt() dom=ET.parse(importFile) node = dom.getroot() if node.tag != 'metadataExport': node = dom.find("metadataExport") self.createMappingFromDom(node) if RESPONSE is not None: RESPONSE.redirect('manage_main') def createMappingFromDom(self,metadatanode,metadata=None): """erzeuge ein Mapping aus dem der metadatanode des xmlformats, metadata ist ein metadataobject""" if metadata is None: metadata=self nodes=metadatanode for node in nodes: logging.debug("node: %s"%repr(node)) if node.tag=="set": set=node id=set.get('name') list=[] argList={} for entry in set: genericName=entry.get('genericName') if set.get('name')=='generic': # generic mapping doesn't have labels tag = genericName label = genericName else: tag=entry.get('tag') label=entry.get('label') if not tag: # ignore empty tags continue description=getText(entry) argList[tag]={'tag':tag,'label':label,'explanation':description,'status':'optional'} logging.debug("createMappingFromDom: new mapping=%s"%repr(argList)) metadata._setObject(id,MetaDataMapping(id,id,argList)) elif node.tag=="metadata": mn=node name=mn.get('name') logging.debug("createMappingFromDom: new metadata=%s"%repr(name)) metadata._setObject(name,MetaData(name,name)) mdObj=getattr(metadata,name) mdObj.createMappingFromDom(mn) security.declarePublic('changeMetaDataForm') changeMetaDataForm = PageTemplateFile('zpt/changeMetadata', globals()) security.declarePublic('changeMetaData') def changeMetaData(self,title=None,shortDescription=None,description=None,mappingSelectAttribute=None,fields=None,metaDataServerUrl=None,RESPONSE=None): """Change Metadata""" self.title = title self.shortDescription=shortDescription self.description=description self.mappingSelectAttribute=mappingSelectAttribute if fields: self.fieldList=fields.split(",")[0:] self.metaDataServerUrl=metaDataServerUrl if RESPONSE is not None: RESPONSE.redirect('manage_main') def manage_addMetaDataForm(self): """interface for adding the Metadata""" pt=PageTemplateFile('zpt/addMetadataForm', globals()).__of__(self) return pt() def manage_addMetaData(self,id,title=None,shortDescription=None,description=None,fields=None,RESPONSE=None): """a metadata objekt""" newObj = MetaData(id,title=title,shortDescription=shortDescription,description=description,fields=fields) self.Destination()._setObject(id,newObj) if RESPONSE is not None: RESPONSE.redirect('manage_main')