Mercurial > hg > openmind
view src/main/java/org/mpi/openmind/repository/utils/ImportOM3Util.java @ 127:3e772f7f43e0 default tip
ismi-date with long month names in xml dump.
author | Robert Casties <casties@mpiwg-berlin.mpg.de> |
---|---|
date | Thu, 11 May 2023 18:15:45 +0200 |
parents | 615d27dce9b3 |
children |
line wrap: on
line source
package org.mpi.openmind.repository.utils; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.lang.StringUtils; import org.mpi.openmind.cache.WrapperService; import org.mpi.openmind.repository.bo.Attribute; import org.mpi.openmind.repository.bo.Entity; import org.mpi.openmind.repository.bo.Relation; import org.mpi.openmind.repository.services.ServiceRegistry; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; /** * * @author jurzua */ public class ImportOM3Util { private static Map<String, Entity> assertions; public static void importAssertions(WrapperService ontologyService, String fileName, Boolean dropAssertions) { Long start = System.currentTimeMillis(); System.out.println("Importing Assertions: "); assertions = new HashMap<String, Entity>(); int entitiesInput = 0; int entitiesSaved = 0; DecimalFormat df = new DecimalFormat("#.##"); List<Entity> assertions = new ArrayList<Entity>(); List<ImportEntityRelation> relations = new ArrayList<ImportEntityRelation>(); Map<String, Entity> conceptCache = new HashMap<String, Entity>(); Map<Long, Entity> assertionCache = new HashMap<Long, Entity>(); try { if (dropAssertions) { ontologyService.getPS().dropAssertions(); } Document doc = XMLUtil.getDocument(fileName); NodeList list = doc.getElementsByTagName(XMLUtil.OPENMIND_DATA); if (list.getLength() > 0) { Node rootElement = list.item(0); // importing entities Node entsNode = XMLUtil.getNodeByName(rootElement .getChildNodes(), XMLUtil.ENTITIES); if (entsNode != null) { int counter = 0; for (int i = 0; i < entsNode.getChildNodes().getLength() ; i++) { counter++; double percent = ((double) counter / (double) entsNode .getChildNodes().getLength()) * 100.0; // if ((counter % 20) == 0) { // System.out.print("*"); // } // if ((percent % 10) < 0.005) { // System.out.println("\n[" + df.format(percent) // + " %] counter: " + counter + " / " // + entsNode.getChildNodes().getLength()); // } Node entNode = entsNode.getChildNodes().item(i); if (entNode.getNodeName().equals(XMLUtil.ENTITY)) { entitiesInput++; Entity assertion = getEntityFromNode( ontologyService, entNode, conceptCache, assertionCache); if (assertion != null) { // System.out.println("Assertion built: " + // ontologyService.saveAssertion(assertion)); // ontologyService.saveAssertion(assertion); assertions.add(assertion); //now the relations come an the end of the file //getRelationsOfAssertion(entNode, relations); entitiesSaved++; // concepts.put(concept.getOwnValue(), concept); } } } // System.out.println("---------------------------"); // this call improves the performace of the insertion, // cause it is executed by a single transaction. ontologyService.saveEntityList(assertions, "import-om3"); } Node relsNode = XMLUtil.getNodeByName(rootElement .getChildNodes(), XMLUtil.RELATIONS); getRelationsOfAssertion(relsNode, relations); List<org.mpi.openmind.repository.bo.Node> relationList = new ArrayList<org.mpi.openmind.repository.bo.Node>(); for (ImportEntityRelation importRel : relations) { // iff there exist the target and the source entity, this // relation will be saved if (assertionCache.get(importRel.getSourceId()) != null && assertionCache.get(importRel.getTargetId()) != null) { Relation relation = new Relation(); relation.setOwnValue(importRel.getLabel()); relation.setSystemStatus(org.mpi.openmind.repository.bo.Node.SYS_STATUS_CURRENT_VERSION); Entity src = assertionCache.get(importRel.getSourceId()); relation.setSourceId(src.getId()); relation.setSourceModif(src.getModificationTime()); relation.setSourceObjectClass(src.getObjectClass()); Entity tar = assertionCache.get(importRel.getTargetId()); relation.setTargetId(tar.getId()); relation.setTargetModif(tar.getModificationTime()); relation.setTargetObjectClass(tar.getObjectClass()); relation.setType(org.mpi.openmind.repository.bo.Node.TYPE_ABOX); relationList.add(relation); //System.out.println("Saving: " + importRel); } else { System.err .println("ImportEntityRelation could not be saved." + importRel); } } // this second save will make persistent the associations. //ontologyService.saveEntityList(assertions); ontologyService.saveNodeListOnlyForScripts(relationList); } else { throw new IllegalArgumentException( "The document is not well formed"); } } catch (Exception e) { e.printStackTrace(); } Long end = System.currentTimeMillis(); Long diff = end - start; System.out.println("\n###################################"); System.out.println("Summary Entities Importing:"); System.out.println("Entities input: " + entitiesInput); System.out.println("Entities saved: " + entitiesSaved); System.out.println("Time execution: " + diff / (60 * 60 * 1000) + "[hours] " + diff / (60 * 1000) + "[min] " + diff / 1000 + "[sec]"); System.out.println("###################################\n"); } private static Entity getEntityFromNode(WrapperService ontologyService, Node node, Map<String, Entity> conceptCache, Map<Long, Entity> assertionCache) throws Exception { Entity assertion = null; Long id = null; if (node.getAttributes() != null) { assertion = new Entity( org.mpi.openmind.repository.bo.Node.TYPE_ABOX, false); String objectClass = node.getAttributes().getNamedItem( XMLUtil.OBJECT_CLASS).getNodeValue(); if (StringUtils.isNotEmpty(objectClass)) { assertion.setObjectClass(objectClass); /* Entity concept = conceptCache.get(objectClass); if (concept == null) { concept = ontologyService.getConceptByOwnValue(objectClass); conceptCache.put(objectClass, concept); assertion.setObjectClass(concept.getOwnValue()); } // concept.setOwnValue(node.getAttributes().getNamedItem(XMLUtil.OBJECT_CLASS).getNodeValue()); if (concept == null) { System.out.println("Concept no found: " + node.getAttributes().getNamedItem( XMLUtil.OBJECT_CLASS)); } else { assertion.setConcept(concept); } */ } String ownValue = node.getAttributes().getNamedItem( XMLUtil.OWN_VALUE).getNodeValue(); assertion.setOwnValue(ownValue); if (node.getAttributes().getNamedItem("id") != null) { String stringId = node.getAttributes().getNamedItem("id") .getNodeValue(); id = new Long(stringId); } for (int i = 0; i < node.getChildNodes().getLength(); i++) { Node nodeAtt = node.getChildNodes().item(i); if (nodeAtt.getNodeName().equals(XMLUtil.ATTRIBUTES)) { addAttributesToAssertion(assertion, nodeAtt); } // TODO ask MPI, if instance an insertion of ownvalue, here will // be created a new att. if (nodeAtt.getNodeName().equals(XMLUtil.ATTRIBUTE_VALUE)) { assertion.setOwnValue(nodeAtt.getFirstChild() .getNodeValue()); } } } if (id != null) { assertionCache.put(id, assertion); } return assertion; } private static void getRelationsOfAssertion(Node node, List<ImportEntityRelation> relations) { if (node.getAttributes() != null) { for (int i = 0; i < node.getChildNodes().getLength(); i++) { Node relsNode = node.getChildNodes().item(i); if (relsNode.getNodeName().equals(XMLUtil.RELATIONS)) { for (int j = 0; j < relsNode.getChildNodes().getLength(); j++) { Node relNode = relsNode.getChildNodes().item(j); String label = null; String sourceId = null; String targetId = null; if (relNode.getAttributes().getNamedItem(XMLUtil.LABEL) != null) { label = relNode.getAttributes().getNamedItem( XMLUtil.LABEL).getNodeValue(); } if (relNode.getAttributes().getNamedItem( XMLUtil.RELATION_SOURCE_ID) != null) { sourceId = relNode.getAttributes().getNamedItem( XMLUtil.RELATION_SOURCE_ID).getNodeValue(); } if (relNode.getAttributes().getNamedItem( XMLUtil.RELATION_TARGET_ID) != null) { targetId = relNode.getAttributes().getNamedItem( XMLUtil.RELATION_TARGET_ID).getNodeValue(); } if (StringUtils.isNotEmpty(label) && StringUtils.isNotEmpty(sourceId) && StringUtils.isNotEmpty(targetId)) { try { ImportEntityRelation r = new ImportEntityRelation(); r.setSourceId(new Long(sourceId)); r.setTargetId(new Long(targetId)); r.setLabel(label); relations.add(r); } catch (Exception e) { System.err.println(e.getMessage()); } } } } } } } private static Entity addAttributesToAssertion(Entity assertion, Node nodeAtts) { if (nodeAtts.getAttributes() != null) { for (int i = 0; i < nodeAtts.getChildNodes().getLength(); i++) { Node attNode = nodeAtts.getChildNodes().item(i); Attribute att = new Attribute(); //if(attNode.getFirstChild() != null) // att.setOwnValue(attNode.getFirstChild().getNodeValue()); if(attNode.getAttributes() != null){ if (attNode.getAttributes().getNamedItem(XMLUtil.ATTRIBUTE_VALUE) != null) { att.setOwnValue(attNode.getAttributes().getNamedItem( XMLUtil.ATTRIBUTE_VALUE).getNodeValue()); } if (attNode.getAttributes().getNamedItem(XMLUtil.CONTENT_TYPE) != null) { att.setContentType(attNode.getAttributes().getNamedItem( XMLUtil.CONTENT_TYPE).getNodeValue()); } if (attNode.getAttributes() .getNamedItem(XMLUtil.ATTRIBUTE_NAME) != null) { att.setObjectClass(attNode.getAttributes().getNamedItem( XMLUtil.ATTRIBUTE_NAME).getNodeValue()); } } assertion.addAttribute(att); } } return assertion; } private static Entity getConceptFromNode(Node node) { Entity concept = null; if (node.getAttributes() != null) { concept = new Entity(); concept.setType(org.mpi.openmind.repository.bo.Node.TYPE_TBOX); concept.setObjectClass(org.mpi.openmind.repository.bo.Node.TYPE_TBOX); if (node.getAttributes().getNamedItem(XMLUtil.OBJECT_CLASS) != null) { concept.setOwnValue(node.getAttributes().getNamedItem( XMLUtil.OBJECT_CLASS).getNodeValue()); } /* if (node.getAttributes().getNamedItem(XMLUtil.OWN_VALUE) != null) { concept.setOwnValue(node.getAttributes().getNamedItem( XMLUtil.OWN_VALUE).getNodeValue()); } */ /* for (int i = 0; i < node.getChildNodes().getLength(); i++) { Node nodeAtt = node.getChildNodes().item(i); if (nodeAtt.getNodeName().equals(XMLUtil.ATTRIBUTES)) { addAttributesToAssertion(concept, nodeAtt); } }*/ // if(atts.getNamedItem(XMLUtil.MAIN_LABEL) != null){ // def.setObjectClass(atts.getNamedItem(XMLUtil.OBJECT_CLASS).getNodeValue()); // } for (int i = 0; i < node.getChildNodes().getLength(); i++) { Node labelNode = node.getChildNodes().item(i); if (labelNode.getNodeName().equals(XMLUtil.LABEL)) { addAttributeToConcept(concept, labelNode); } } } return concept; } // ##################################################### // ##################################################### // ##################################################### // ##################################################### // ##################################################### // ##################################################### // ##################################################### private static Map<String, Entity> concepts; static public void importConcepts(WrapperService ontologyService, String fileName, Boolean dropConcepts) { Long start = System.currentTimeMillis(); System.out.println("Importing Concepts:"); concepts = new HashMap<String, Entity>(); int conceptsInput = 0; int conceptsSaved = 0; try { if (dropConcepts) { ontologyService.getPS().dropDefinitions(); } Document doc = XMLUtil.getDocument(fileName); NodeList list = doc.getElementsByTagName(XMLUtil.META_DATA); if (list.getLength() > 0) { Node rootElement = list.item(0); // importing definitions Node defsNode = XMLUtil.getNodeByName(rootElement .getChildNodes(), XMLUtil.DEFINITIONS); if (defsNode != null) { // conceptsInput = defsNode.getChildNodes().getLength(); for (int i = 0; i < defsNode.getChildNodes().getLength(); i++) { Node defNode = defsNode.getChildNodes().item(i); // System.out.println("\t" + defNode.getNodeName()); if (defNode.getNodeName().equals(XMLUtil.DEFINITION)) { conceptsInput++; Entity concept = getConceptFromNode(defNode); if (concept != null) { // System.out.println("Concept built: " + // ontologyService.saveConcept(concept)); //ontologyService.saveConcept(concept); concepts.put(concept.getOwnValue(), concept); conceptsSaved++; } } } for(Entity e : concepts.values()){ ontologyService.saveConcept(e); } } // importing relation between defintions // note: only if definitions were found, has to be imported the // relations. List<org.mpi.openmind.repository.bo.Node> relationList = new ArrayList<org.mpi.openmind.repository.bo.Node>(); Node relsNode = XMLUtil.getNodeByName(rootElement .getChildNodes(), XMLUtil.RELATIONS); if (relsNode != null) { for (int j = 0; j < relsNode.getChildNodes().getLength(); j++) { Node nodeRel = relsNode.getChildNodes().item(j); Relation relation = getRelationFromNode(nodeRel); if(relation != null) relationList.add(relation); } ontologyService.saveNodeListOnlyForScripts(relationList, "import"); } } else { throw new IllegalArgumentException( "The document is not well formed"); } } catch (Exception e) { e.printStackTrace(); } Long end = System.currentTimeMillis(); Long diff = end - start; System.out.println("###################################"); System.out.println("Summary Concepts Importing:"); System.out.println("Concepts input: " + conceptsInput); System.out.println("Concepts saved: " + conceptsSaved); System.out.println("Time execution: " + diff / (60 * 60 * 1000) + "[hours] " + diff / (60 * 1000) + "[min] " + diff / 1000 + "[sec]"); System.out.println("###################################"); } private static Relation getRelationFromNode(Node node) { Relation relation = null; if (node.getAttributes() != null) { Entity source = null; Entity target = null; String ownValue = null; if (node.getAttributes().getNamedItem(XMLUtil.RELATION_SOURCE) != null) { source = concepts.get(node.getAttributes().getNamedItem( XMLUtil.RELATION_SOURCE).getNodeValue()); } if (node.getAttributes().getNamedItem(XMLUtil.RELATION_TARGET) != null) { target = concepts.get(node.getAttributes().getNamedItem( XMLUtil.RELATION_TARGET).getNodeValue()); } if (node.getAttributes().getNamedItem(XMLUtil.RELATION_LABEL) != null) { ownValue = node.getAttributes().getNamedItem( XMLUtil.RELATION_LABEL).getNodeValue(); } if (source != null && target != null && StringUtils.isNotEmpty(ownValue)) { relation = new Relation(); relation.setSource(source); relation.setTarget(target); relation.setOwnValue(ownValue); source.addSourceRelation(relation); } } return relation; } private static Entity addAttributeToConcept(Entity concept, Node label) { if (label.getAttributes() != null) { Attribute att = new Attribute(); if (label.getAttributes().getNamedItem(XMLUtil.LABEL_NAME) != null) { att.setOwnValue(label.getAttributes().getNamedItem( XMLUtil.LABEL_NAME).getNodeValue()); } if (label.getAttributes().getNamedItem(XMLUtil.CONTENT_TYPE) != null) { att.setContentType(label.getAttributes().getNamedItem( XMLUtil.CONTENT_TYPE).getNodeValue()); } concept.addAttribute(att); } return concept; } public static class ImportEntityRelation { String label; Long sourceId; Long targetId; public String getLabel() { return label; } public void setLabel(String label) { this.label = label; } public Long getSourceId() { return sourceId; } public void setSourceId(Long sourceId) { this.sourceId = sourceId; } public Long getTargetId() { return targetId; } public void setTargetId(Long targetId) { this.targetId = targetId; } @Override public String toString() { return sourceId + " <" + label + "> " + targetId; } } public static void main(String[] args) { ServiceRegistry registry = new ServiceRegistry(); registry.getPS().setImportModus(true); //importAssertions(registry.getOntologyService(), "/Users/jurzua/Projects/max-planck/openmind4/trunk/data/ismi-data-100414/country-colated-om4.xml", false); //importAssertions(registry.getOntologyService(), "/Users/jurzua/Projects/max-planck/openmind4/trunk/data/ismi-data-100414/output2.xml", false); importConcepts(registry.getWrapper(), "/Users/jurzua/Projects/max-planck/openmind4/trunk/data/ismi-data-100414/definitions-om3.xml", true); //importAssertions(registry.getOntologyService(), "/Users/jurzua/2010.10.14[06.00]-ENT.xml", true); registry.getPS().setImportModus(false); System.exit(0); } }