view src/main/java/org/mpi/openmind/repository/utils/XMLUtil.java @ 79:b0aebac0780a

put statistics about number of entities, relations and attributes in xml dump. tags: entity-statistics, relation-statistics.
author casties
date Fri, 03 Mar 2017 18:59:20 +0100
parents a59984fd3c3f
children d4b456623d43
line wrap: on
line source

package org.mpi.openmind.repository.utils;

//JAXP
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
import java.io.*;

import org.w3c.dom.Node;
import org.w3c.dom.NodeList;



/**
 *
 * @author Jorge UrzĂșa
 */
public class XMLUtil {

    private static DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();

    public static String OWN_VALUE_CONFIG = "own-value-configuration";
    public static String OWN_VALUE_RULES = "own-value-rules";
    public static String OWN_VALUE_RULE = "own-value-rule";
    public static String TARGET_RELATION = "target-relation";
    public static String SOURCE_RELATION = "source-relation";
    public static String SUBSTRING = "substring";
    public static String NAME = "name";
    public static String SOURCE = "source";
    public static String TARGET = "target";
    public static String END_NODE = "end-node";
    public static String XOR = "xor";
    public static String RULE = "rule";
    public static String REF = "ref";
    public static String VALUE = "value";
    public static String TYPE = "type";
    public static String CONSTANT = "constant";
    
    public static String PRINT_RULES = "print-rules";
    public static String PRINT_RULE = "print-rule";
    
    public static String OPENMIND_DATA = "openmind-data";
    public static String ENTITIES = "entities";
    public static String ENTITY = "entity";
    public static String ENTITY_ID = "id";
    public static String ATTRIBUTES = "attributes";
    public static String ATTRIBUTE = "attribute";
    public static String ATTRIBUTE_NAME = "name";
    public static String ATTRIBUTE_VALUE = "value";
    public static String RELATION_SOURCE_ID = "source-id";
    public static String RELATION_TARGET_ID = "target-id";
    public static String RELATION_ID = "id";
    public static String VIEW = "view";
    public static String VIEWS = "views";
    public static String MODIFICATION_TIME = "mtime";
    public static String VERSION = "version";
    public static String ID = "id";
    public static String ROW_ID = "row-id";
    public static String USER = "user";
    public static String PUBLIC = "public";
    public static String CONCEPTS = "concepts";
    public static String CONCEPT = "concept";
    public static String ASSERTIONS = "assertions";
    public static String ASSERTION = "assertion";
    public static String NORMALIZED = "norm";
    
    public static String ENTITY_STATS = "entity-statistics";
    public static String RELATION_STATS = "relation-statistics";
    public static String COUNT = "count";
    
    public static String META_DATA = "openmind-meta";
    public static String DEFINITIONS = "definitions";
    public static String DEFINITION = "definition";
    public static String LABEL = "label";
    public static String MAIN_LABEL = "main-label";
    public static String OBJECT_CLASS = "object-class";
    public static String OWN_VALUE = "own-value";
    public static String LABEL_NAME = "name";
    public static String RELATIONS = "relations";
    public static String RELATION = "relation";
    public static String RELATION_LABEL = "label";
    public static String RELATION_SOURCE = "source-class";
    public static String RELATION_TARGET = "target-class";
    public static String CONTENT_TYPE = "content-type";
    
    public static String INSERTION_MODE = "insertion-mode";
    public static String MERGE = "merge";
    public static String REPLACE = "replace";
    public static String INSERT = "insert";

    public static Node getNodeByName(NodeList nodeList, String name){
        for(int i=0; i < nodeList.getLength(); i++){
            Node tmp = nodeList.item(i);
            if(tmp.getNodeName().equals(name))
                return tmp;
        }
        return null;
    }

    /** Parses XML file and returns XML document.
     * @param fileName XML file to parse
     * @return XML document or <B>null</B> if error occured
     */
    public static Document getDocument(String fileName) {
        //System.out.println("Parsing XML file... " + fileName);
        DocumentBuilder docBuilder;
        Document doc = null;
        DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
        docBuilderFactory.setIgnoringElementContentWhitespace(true);
        try {
            docBuilder = docBuilderFactory.newDocumentBuilder();
        }
        catch (ParserConfigurationException e) {
            System.out.println("Wrong parser configuration: " + e.getMessage());
            e.printStackTrace();
            return null;
        }
        File sourceFile = new File(fileName);
        try {
            doc = docBuilder.parse(sourceFile);
        }
        catch (SAXException e) {
            System.out.println("Wrong XML file structure: " + e.getMessage());
            e.printStackTrace();
            return null;
        }
        catch (IOException e) {
            System.out.println("Could not read source file: " + e.getMessage());
            e.printStackTrace();
        }
        //System.out.println("XML file parsed");
        return doc;
    }
    
    /**
     * This method looks in the class-path for a file with the name given in input.
     * @param fileName
     * @return
     */
    public static Document getDocumentFromPathContext(String fileName) {
        //System.out.println("Parsing XML file... " + fileName);
        DocumentBuilder docBuilder;
        Document doc = null;
        DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
        docBuilderFactory.setIgnoringElementContentWhitespace(true);
        try {
            docBuilder = docBuilderFactory.newDocumentBuilder();
        }
        catch (ParserConfigurationException e) {
            System.out.println("Wrong parser configuration: " + e.getMessage());
            e.printStackTrace();
            return null;
        }
        
        ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
        InputStream stream = classLoader.getResourceAsStream( fileName );
        try {
            doc = docBuilder.parse(stream);
        }
        catch (SAXException e) {
            System.out.println("Wrong XML file structure: " + e.getMessage());
            e.printStackTrace();
            return null;
        }
        catch (IOException e) {
            System.out.println("Could not read source file: " + e.getMessage());
            e.printStackTrace();
        }
        //System.out.println("XML file parsed");
        return doc;
    }

    /** Saves XML Document into XML file.
    * @param fileName XML file name
    * @param doc XML document to save
    * @return <B>true</B> if method success <B>false</B> otherwise
    */
    static public boolean saveXMLDocument(String fileName, Document doc) {

        File xmlOutputFile = new File(fileName);
        FileOutputStream fos;
        Transformer transformer;
        try {
            fos = new FileOutputStream(xmlOutputFile);
        }
        catch (FileNotFoundException e) {
            System.out.println("Error occured: " + e.getMessage());
            return false;
        }
        // Use a Transformer for output
        TransformerFactory transformerFactory = TransformerFactory.newInstance();
        try {
            transformer = transformerFactory.newTransformer();
        }
        catch (TransformerConfigurationException e) {
            System.out.println("Transformer configuration error: " + e.getMessage());
            return false;
        }
        DOMSource source = new DOMSource(doc);
        StreamResult result = new StreamResult(fos);
        // transform source into result will do save
        try {
            transformer.transform(source, result);
        }
        catch (TransformerException e) {
            System.out.println("Error transform: " + e.getMessage());
        }
        System.out.println("XML file saved.");
        return true;
    }

    public static String transformateDocumentToString(Document doc){
        try{
            DOMSource domSource = new DOMSource(doc);
            StringWriter writer = new StringWriter();
            StreamResult result = new StreamResult(writer);

            TransformerFactory tf = TransformerFactory.newInstance();
            Transformer transformer = tf.newTransformer();
            transformer.transform(domSource, result);
            return writer.toString();
        }catch(Exception e){
            e.printStackTrace();
        }
        return null;
    }

    public String ptransformateDocumentToString(Document doc){
        try{
            DOMSource domSource = new DOMSource(doc);
            StringWriter writer = new StringWriter();
            StreamResult result = new StreamResult(writer);

            TransformerFactory tf = TransformerFactory.newInstance();
            Transformer transformer = tf.newTransformer();
            transformer.transform(domSource, result);
            return writer.toString();
        }catch(Exception e){
            e.printStackTrace();
        }
        return null;
    }

    public static Document transformateStringToDocument(String text){
        try{
            DocumentBuilder db = dbf.newDocumentBuilder();
            InputStream stream = new ByteArrayInputStream(text.getBytes("UTF-8"));
            return db.parse(stream);
        }catch(Exception e){
            e.printStackTrace();
        }
        return null;
    }

    /**
     * <p>This method transforms the file
     * input using the a stylesheet giving as input</p>
     *
     * @param xsl is the stylesheet file used to transformate the input document.
     * @param input is the document, which will be transformated.
     * @return the document transformed by the xsl stylesheet.
     */
    public static Document xmlToXmlTransformation(Document xsl, Document input){
        try{
            TransformerFactory factory = TransformerFactory.newInstance();

            InputStream stream = new ByteArrayInputStream(XMLUtil.transformateDocumentToString(xsl).getBytes("UTF-8"));
            //Transformer transformer = factory.newTransformer(new DOMSource(xsl));            
            //Transformer transformer = factory.newTransformer(new StreamSource("transformatorAxel1.xsl"));
            Transformer transformer = factory.newTransformer(new StreamSource(stream));
            DOMResult result = new DOMResult();
            //transformer.transform(new DOMSource(input), new StreamResult(new FileOutputStream("mi.xml")));
            transformer.transform(new DOMSource(input), result);
            
            return (Document)result.getNode();
        }catch(Exception e){
            e.printStackTrace();
        }
        return null;
    }
}