view src/main/java/org/mpi/openmind/repository/utils/ImportOM3Util.java @ 75:e0be7c0030f5

cleanup and better comments.
author casties
date Thu, 23 Feb 2017 19:05:47 +0100
parents 615d27dce9b3
children
line wrap: on
line source

package org.mpi.openmind.repository.utils;

import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang.StringUtils;
import org.mpi.openmind.cache.WrapperService;
import org.mpi.openmind.repository.bo.Attribute;
import org.mpi.openmind.repository.bo.Entity;
import org.mpi.openmind.repository.bo.Relation;
import org.mpi.openmind.repository.services.ServiceRegistry;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/**
 * 
 * @author jurzua
 */
public class ImportOM3Util {

	private static Map<String, Entity> assertions;

	public static void importAssertions(WrapperService ontologyService,
		String fileName, Boolean dropAssertions) {
		Long start = System.currentTimeMillis();
		System.out.println("Importing Assertions: ");
		assertions = new HashMap<String, Entity>();
		int entitiesInput = 0;
		int entitiesSaved = 0;
		DecimalFormat df = new DecimalFormat("#.##");

		List<Entity> assertions = new ArrayList<Entity>();
		List<ImportEntityRelation> relations = new ArrayList<ImportEntityRelation>();
		Map<String, Entity> conceptCache = new HashMap<String, Entity>();
		Map<Long, Entity> assertionCache = new HashMap<Long, Entity>();

		try {
			if (dropAssertions) {
				ontologyService.getPS().dropAssertions();
			}
			Document doc = XMLUtil.getDocument(fileName);
			NodeList list = doc.getElementsByTagName(XMLUtil.OPENMIND_DATA);
			if (list.getLength() > 0) {
				Node rootElement = list.item(0);

				// importing entities
				Node entsNode = XMLUtil.getNodeByName(rootElement
						.getChildNodes(), XMLUtil.ENTITIES);
				if (entsNode != null) {
					int counter = 0;
					for (int i = 0; i < entsNode.getChildNodes().getLength() ; i++) {
						counter++;
						double percent = ((double) counter / (double) entsNode
								.getChildNodes().getLength()) * 100.0;
//						if ((counter % 20) == 0) {
//							System.out.print("*");
//						}
//						if ((percent % 10) < 0.005) {
//							System.out.println("\n[" + df.format(percent)
//									+ " %] counter: " + counter + " / "
//									+ entsNode.getChildNodes().getLength());
//						}

						Node entNode = entsNode.getChildNodes().item(i);
						if (entNode.getNodeName().equals(XMLUtil.ENTITY)) {
							entitiesInput++;
							Entity assertion = getEntityFromNode(
									ontologyService, entNode, conceptCache,
									assertionCache);

							if (assertion != null) {
								// System.out.println("Assertion built: " +
								// ontologyService.saveAssertion(assertion));
								// ontologyService.saveAssertion(assertion);
								assertions.add(assertion);
								//now the relations come an the end of the file
								//getRelationsOfAssertion(entNode, relations);
								entitiesSaved++;
								// concepts.put(concept.getOwnValue(), concept);
							}
						}
					}
//					System.out.println("---------------------------");
					// this call improves the performace of the insertion,
					// cause it is executed by a single transaction.
					ontologyService.saveEntityList(assertions, "import-om3");
				}
				
				Node relsNode = XMLUtil.getNodeByName(rootElement
						.getChildNodes(), XMLUtil.RELATIONS);
				getRelationsOfAssertion(relsNode, relations);
				
				List<org.mpi.openmind.repository.bo.Node> relationList = 
					new ArrayList<org.mpi.openmind.repository.bo.Node>();
				for (ImportEntityRelation importRel : relations) {
					// iff there exist the target and the source entity, this
					// relation will be saved
					if (assertionCache.get(importRel.getSourceId()) != null
							&& assertionCache.get(importRel.getTargetId()) != null) {
						Relation relation = new Relation();
						
						relation.setOwnValue(importRel.getLabel());
						relation.setSystemStatus(org.mpi.openmind.repository.bo.Node.SYS_STATUS_CURRENT_VERSION);
						
						Entity src = assertionCache.get(importRel.getSourceId());
						relation.setSourceId(src.getId());
						relation.setSourceModif(src.getModificationTime());
						relation.setSourceObjectClass(src.getObjectClass());
						
						Entity tar = assertionCache.get(importRel.getTargetId());
						relation.setTargetId(tar.getId());
						relation.setTargetModif(tar.getModificationTime());
						relation.setTargetObjectClass(tar.getObjectClass());
						relation.setType(org.mpi.openmind.repository.bo.Node.TYPE_ABOX);
						
						relationList.add(relation);
						//System.out.println("Saving: " + importRel);
					} else {
						System.err
								.println("ImportEntityRelation could not be saved."
										+ importRel);
					}

				}
				// this second save will make persistent the associations.
				//ontologyService.saveEntityList(assertions);
				ontologyService.saveNodeListOnlyForScripts(relationList);
			} else {
				throw new IllegalArgumentException(
						"The document is not well formed");
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		Long end = System.currentTimeMillis();
		Long diff = end - start;
		System.out.println("\n###################################");
		System.out.println("Summary Entities Importing:");
		System.out.println("Entities input: " + entitiesInput);
		System.out.println("Entities saved: " + entitiesSaved);
		System.out.println("Time execution: " + diff / (60 * 60 * 1000)
				+ "[hours] " + diff / (60 * 1000) + "[min] " + diff / 1000
				+ "[sec]");
		System.out.println("###################################\n");
	}

	private static Entity getEntityFromNode(WrapperService ontologyService,
			Node node, Map<String, Entity> conceptCache,
			Map<Long, Entity> assertionCache) throws Exception {
		Entity assertion = null;
		Long id = null;
		if (node.getAttributes() != null) {
			assertion = new Entity(
					org.mpi.openmind.repository.bo.Node.TYPE_ABOX, false);
			String objectClass = node.getAttributes().getNamedItem(
					XMLUtil.OBJECT_CLASS).getNodeValue();
			if (StringUtils.isNotEmpty(objectClass)) {
				assertion.setObjectClass(objectClass);
				/*
				Entity concept = conceptCache.get(objectClass);
				if (concept == null) {
					concept = ontologyService.getConceptByOwnValue(objectClass);
					conceptCache.put(objectClass, concept);
					assertion.setObjectClass(concept.getOwnValue());
				}
				// concept.setOwnValue(node.getAttributes().getNamedItem(XMLUtil.OBJECT_CLASS).getNodeValue());
				if (concept == null) {
					System.out.println("Concept no found: "
							+ node.getAttributes().getNamedItem(
									XMLUtil.OBJECT_CLASS));
				} else {
					assertion.setConcept(concept);
				}
				*/
			}
			
			String ownValue = node.getAttributes().getNamedItem(
					XMLUtil.OWN_VALUE).getNodeValue();
			assertion.setOwnValue(ownValue);
			
			if (node.getAttributes().getNamedItem("id") != null) {
				String stringId = node.getAttributes().getNamedItem("id")
						.getNodeValue();
				id = new Long(stringId);
			}

			for (int i = 0; i < node.getChildNodes().getLength(); i++) {
				Node nodeAtt = node.getChildNodes().item(i);
				if (nodeAtt.getNodeName().equals(XMLUtil.ATTRIBUTES)) {
					addAttributesToAssertion(assertion, nodeAtt);
				}
				// TODO ask MPI, if instance an insertion of ownvalue, here will
				// be created a new att.
				if (nodeAtt.getNodeName().equals(XMLUtil.ATTRIBUTE_VALUE)) {
					assertion.setOwnValue(nodeAtt.getFirstChild()
							.getNodeValue());
				}
			}
		}
		if (id != null) {
			assertionCache.put(id, assertion);
		}
		return assertion;
	}

	private static void getRelationsOfAssertion(Node node,
			List<ImportEntityRelation> relations) {
		if (node.getAttributes() != null) {
			for (int i = 0; i < node.getChildNodes().getLength(); i++) {
				Node relsNode = node.getChildNodes().item(i);
				if (relsNode.getNodeName().equals(XMLUtil.RELATIONS)) {
					for (int j = 0; j < relsNode.getChildNodes().getLength(); j++) {
						Node relNode = relsNode.getChildNodes().item(j);
						String label = null;
						String sourceId = null;
						String targetId = null;

						if (relNode.getAttributes().getNamedItem(XMLUtil.LABEL) != null) {
							label = relNode.getAttributes().getNamedItem(
									XMLUtil.LABEL).getNodeValue();
						}
						if (relNode.getAttributes().getNamedItem(
								XMLUtil.RELATION_SOURCE_ID) != null) {
							sourceId = relNode.getAttributes().getNamedItem(
									XMLUtil.RELATION_SOURCE_ID).getNodeValue();
						}
						if (relNode.getAttributes().getNamedItem(
								XMLUtil.RELATION_TARGET_ID) != null) {
							targetId = relNode.getAttributes().getNamedItem(
									XMLUtil.RELATION_TARGET_ID).getNodeValue();
						}
						if (StringUtils.isNotEmpty(label)
								&& StringUtils.isNotEmpty(sourceId)
								&& StringUtils.isNotEmpty(targetId)) {
							try {
								ImportEntityRelation r = new ImportEntityRelation();
								r.setSourceId(new Long(sourceId));
								r.setTargetId(new Long(targetId));
								r.setLabel(label);
								relations.add(r);
							} catch (Exception e) {
								System.err.println(e.getMessage());
							}
						}
					}
				}
			}
		}
	}

	private static Entity addAttributesToAssertion(Entity assertion,
			Node nodeAtts) {
		if (nodeAtts.getAttributes() != null) {
			for (int i = 0; i < nodeAtts.getChildNodes().getLength(); i++) {
				Node attNode = nodeAtts.getChildNodes().item(i);
				Attribute att = new Attribute();
				
				//if(attNode.getFirstChild() != null)
				//	att.setOwnValue(attNode.getFirstChild().getNodeValue());
				
				if(attNode.getAttributes() != null){
					if (attNode.getAttributes().getNamedItem(XMLUtil.ATTRIBUTE_VALUE) != null) {
						att.setOwnValue(attNode.getAttributes().getNamedItem(
								XMLUtil.ATTRIBUTE_VALUE).getNodeValue());
					}					
					if (attNode.getAttributes().getNamedItem(XMLUtil.CONTENT_TYPE) != null) {
						att.setContentType(attNode.getAttributes().getNamedItem(
								XMLUtil.CONTENT_TYPE).getNodeValue());
					}
					if (attNode.getAttributes()
							.getNamedItem(XMLUtil.ATTRIBUTE_NAME) != null) {
						att.setObjectClass(attNode.getAttributes().getNamedItem(
								XMLUtil.ATTRIBUTE_NAME).getNodeValue());
					}					
				}

				assertion.addAttribute(att);
			}
		}
		return assertion;
	}

	private static Entity getConceptFromNode(Node node) {
		Entity concept = null;
		if (node.getAttributes() != null) {
			concept = new Entity();
			concept.setType(org.mpi.openmind.repository.bo.Node.TYPE_TBOX);
			concept.setObjectClass(org.mpi.openmind.repository.bo.Node.TYPE_TBOX);
			if (node.getAttributes().getNamedItem(XMLUtil.OBJECT_CLASS) != null) {
				concept.setOwnValue(node.getAttributes().getNamedItem(
						XMLUtil.OBJECT_CLASS).getNodeValue());
			}
			/*
			if (node.getAttributes().getNamedItem(XMLUtil.OWN_VALUE) != null) {
				concept.setOwnValue(node.getAttributes().getNamedItem(
						XMLUtil.OWN_VALUE).getNodeValue());
			}
			*/
			/*
			for (int i = 0; i < node.getChildNodes().getLength(); i++) {
				Node nodeAtt = node.getChildNodes().item(i);
				if (nodeAtt.getNodeName().equals(XMLUtil.ATTRIBUTES)) {
					addAttributesToAssertion(concept, nodeAtt);
				}
			}*/
			
			// if(atts.getNamedItem(XMLUtil.MAIN_LABEL) != null){
			// def.setObjectClass(atts.getNamedItem(XMLUtil.OBJECT_CLASS).getNodeValue());
			// }
			
			for (int i = 0; i < node.getChildNodes().getLength(); i++) {
				Node labelNode = node.getChildNodes().item(i);
				if (labelNode.getNodeName().equals(XMLUtil.LABEL)) {
					addAttributeToConcept(concept, labelNode);
				}
			}
		}
		return concept;
	}

	// #####################################################
	// #####################################################
	// #####################################################
	// #####################################################
	// #####################################################
	// #####################################################
	// #####################################################

	private static Map<String, Entity> concepts;

	static public void importConcepts(WrapperService ontologyService,
			String fileName, Boolean dropConcepts) {
		Long start = System.currentTimeMillis();
		System.out.println("Importing Concepts:");
		concepts = new HashMap<String, Entity>();
		
		int conceptsInput = 0;
		int conceptsSaved = 0;
		try {
			if (dropConcepts) {
				ontologyService.getPS().dropDefinitions();
			}
			Document doc = XMLUtil.getDocument(fileName);
			NodeList list = doc.getElementsByTagName(XMLUtil.META_DATA);
			if (list.getLength() > 0) {
				Node rootElement = list.item(0);

				// importing definitions
				Node defsNode = XMLUtil.getNodeByName(rootElement
						.getChildNodes(), XMLUtil.DEFINITIONS);
				if (defsNode != null) {
					// conceptsInput = defsNode.getChildNodes().getLength();
					for (int i = 0; i < defsNode.getChildNodes().getLength(); i++) {
						Node defNode = defsNode.getChildNodes().item(i);
						// System.out.println("\t" + defNode.getNodeName());
						if (defNode.getNodeName().equals(XMLUtil.DEFINITION)) {
							conceptsInput++;
							Entity concept = getConceptFromNode(defNode);
							if (concept != null) {
								// System.out.println("Concept built: " +
								// ontologyService.saveConcept(concept));
								//ontologyService.saveConcept(concept);
								concepts.put(concept.getOwnValue(), concept);
								conceptsSaved++;
							}
						}
					}
					for(Entity e : concepts.values()){
						ontologyService.saveConcept(e);	
					}
				}

				
				// importing relation between defintions
				// note: only if definitions were found, has to be imported the
				// relations.
				
				List<org.mpi.openmind.repository.bo.Node> relationList = new ArrayList<org.mpi.openmind.repository.bo.Node>();
				Node relsNode = XMLUtil.getNodeByName(rootElement
						.getChildNodes(), XMLUtil.RELATIONS);
				if (relsNode != null) {
					for (int j = 0; j < relsNode.getChildNodes().getLength(); j++) {
						Node nodeRel = relsNode.getChildNodes().item(j);
						Relation relation = getRelationFromNode(nodeRel);
						if(relation != null)
							relationList.add(relation);
					}
					ontologyService.saveNodeListOnlyForScripts(relationList, "import");
				}

			} else {
				throw new IllegalArgumentException(
						"The document is not well formed");
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		Long end = System.currentTimeMillis();
		Long diff = end - start;
		System.out.println("###################################");
		System.out.println("Summary Concepts Importing:");
		System.out.println("Concepts input: " + conceptsInput);
		System.out.println("Concepts saved: " + conceptsSaved);
		System.out.println("Time execution: " + diff / (60 * 60 * 1000)
				+ "[hours] " + diff / (60 * 1000) + "[min] " + diff / 1000
				+ "[sec]");
		System.out.println("###################################");
	}

	private static Relation getRelationFromNode(Node node) {
		Relation relation = null;
		if (node.getAttributes() != null) {
			Entity source = null;
			Entity target = null;
			String ownValue = null;

			if (node.getAttributes().getNamedItem(XMLUtil.RELATION_SOURCE) != null) {
				source = concepts.get(node.getAttributes().getNamedItem(
						XMLUtil.RELATION_SOURCE).getNodeValue());
			}
			if (node.getAttributes().getNamedItem(XMLUtil.RELATION_TARGET) != null) {
				target = concepts.get(node.getAttributes().getNamedItem(
						XMLUtil.RELATION_TARGET).getNodeValue());
			}
			if (node.getAttributes().getNamedItem(XMLUtil.RELATION_LABEL) != null) {
				ownValue = node.getAttributes().getNamedItem(
						XMLUtil.RELATION_LABEL).getNodeValue();
			}

			if (source != null && target != null
					&& StringUtils.isNotEmpty(ownValue)) {
				relation = new Relation();
				relation.setSource(source);
				relation.setTarget(target);
				relation.setOwnValue(ownValue);
				source.addSourceRelation(relation);
			}

		}
		return relation;
	}

	private static Entity addAttributeToConcept(Entity concept, Node label) {
		if (label.getAttributes() != null) {
			Attribute att = new Attribute();
			if (label.getAttributes().getNamedItem(XMLUtil.LABEL_NAME) != null) {
				att.setOwnValue(label.getAttributes().getNamedItem(
						XMLUtil.LABEL_NAME).getNodeValue());
			}
			if (label.getAttributes().getNamedItem(XMLUtil.CONTENT_TYPE) != null) {
				att.setContentType(label.getAttributes().getNamedItem(
						XMLUtil.CONTENT_TYPE).getNodeValue());
			}
			concept.addAttribute(att);
		}
		return concept;
	}

	public static class ImportEntityRelation {
		String label;
		Long sourceId;
		Long targetId;

		public String getLabel() {
			return label;
		}

		public void setLabel(String label) {
			this.label = label;
		}

		public Long getSourceId() {
			return sourceId;
		}

		public void setSourceId(Long sourceId) {
			this.sourceId = sourceId;
		}

		public Long getTargetId() {
			return targetId;
		}

		public void setTargetId(Long targetId) {
			this.targetId = targetId;
		}

		@Override
		public String toString() {
			return sourceId + " <" + label + "> " + targetId;
		}
	}
	
	public static void main(String[] args) {
		ServiceRegistry registry = new ServiceRegistry();
		registry.getPS().setImportModus(true);
		//importAssertions(registry.getOntologyService(), "/Users/jurzua/Projects/max-planck/openmind4/trunk/data/ismi-data-100414/country-colated-om4.xml", false);
		//importAssertions(registry.getOntologyService(), "/Users/jurzua/Projects/max-planck/openmind4/trunk/data/ismi-data-100414/output2.xml", false);
		importConcepts(registry.getWrapper(), "/Users/jurzua/Projects/max-planck/openmind4/trunk/data/ismi-data-100414/definitions-om3.xml", true);
		//importAssertions(registry.getOntologyService(), "/Users/jurzua/2010.10.14[06.00]-ENT.xml", true);
		registry.getPS().setImportModus(false);
		System.exit(0);
	}
}