view software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/XmlUtil.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children 2396a569e446
line wrap: on
line source

package de.mpg.mpiwg.berlin.mpdl.util;

import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.net.URL;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;

import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;

import net.sf.saxon.om.NodeInfo;

import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;

public class XmlUtil {
  static String JAXP_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage"; 
  static String JAXP_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource";
  static String W3C_XML_SCHEMA = XMLConstants.W3C_XML_SCHEMA_NS_URI; 

  public static XmlUtil getInstance() {
    return new XmlUtil();
  }

  public Node doc(String url) throws ApplicationException {
    Node root = null;
    try {
      DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
      dbf.setNamespaceAware(true);
      DocumentBuilder db = dbf.newDocumentBuilder();
      InputSource inputSource = new InputSource(url); 
      Document doc = db.parse(inputSource);   
      root = doc.getFirstChild();
    } catch (Exception e) {
      throw new ApplicationException(e);
    }
    return root;
  }

  public Node parse(String xmlFileName) throws ApplicationException {
    File xmlFile = new File(xmlFileName);
    XmlUtil xmlUtil = XmlUtil.getInstance();
    Node retNode = null;
    try {
     retNode = xmlUtil.doc(xmlFile);
    } catch (ApplicationException e) {
      throw new ApplicationException("Your source file is not valid: " + e.getMessage());
    }
    return retNode;
  }
  
  public Node doc(File xmlFile) throws ApplicationException {
    Node root = null;
    try {
      DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
      dbf.setNamespaceAware(true);
      DocumentBuilder db = dbf.newDocumentBuilder();
      Document doc = db.parse(xmlFile);   
      root = doc.getFirstChild();
    } catch (Exception e) {
      throw new ApplicationException(e);
    }
    return root;
  }

  public void validateByRelaxNG(File xmlFile, URL schemaUrl) throws ApplicationException {
    System.setProperty(SchemaFactory.class.getName() + ":" + XMLConstants.RELAXNG_NS_URI, "com.thaiopensource.relaxng.jaxp.CompactSyntaxSchemaFactory");
    SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.RELAXNG_NS_URI);
    Schema schema = null;
    try {
      schema = factory.newSchema(schemaUrl);
    } catch (SAXException e) {
      throw new ApplicationException(e);
    }
    Validator validator = schema.newValidator();
    InputSource inputSource = new InputSource(xmlFile.getPath());
    Source source = new SAXSource(inputSource); 
    try {
      validator.validate(source);
    } catch (SAXException e) {
      String message = e.getMessage();
      String text = "Your file is not valid against the RelaxNG schema: " + schemaUrl;
      throw new ApplicationException(text + ":\n" + message);
    } catch (IOException e) {
      String message = e.getMessage();
      String text = "Your file is not valid against the RelaxNG schema: " + schemaUrl;
      throw new ApplicationException(text + ": " + message);
    }    
  }
  
  public String getNodeValue(Node node) {
    String nodeValueStr = node.getNodeValue();
    if (nodeValueStr == null)
      nodeValueStr = node.getTextContent();
    return nodeValueStr;
  }
  
  public String getNodeAttributeValue(Node node, String attrName) {
    NamedNodeMap attrs = node.getAttributes();
    if (attrs == null) {
      return null;
    }
    Node attN = attrs.getNamedItem(attrName);
    if (attN == null) {
      return null;
    }
    return attN.getNodeValue();
  }
  
  public ArrayList<String> toStringArray(NodeList nodes) {
    ArrayList<String> nodeValues = null;
    for (int i=0; i< nodes.getLength(); i++) {
      Node node = nodes.item(i);
      if (nodeValues == null)
        nodeValues = new ArrayList<String>();
      String nodeValue = node.getNodeValue();
      nodeValues.add(nodeValue);
    }
    return nodeValues;
  }
  
  public String toXsDate(Date date) {
    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
    String xsDateStr = dateFormat.format(date);
    return xsDateStr;
  }
  
  public Date toDate(String xsDateStr) throws ApplicationException {
    Date retDate = null;
    if (xsDateStr == null)
      return null;
    try {
      DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
      retDate = dateFormat.parse(xsDateStr);
    } catch (ParseException e) {
      throw new ApplicationException(e);
    }
    return retDate;
  }

  public String evaluateToString(String xmlString, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
    String resultStr = null;
    ArrayList<String> strArray = evaluateToStringArray(xmlString, xpathExpression, nsContext);
    if (strArray != null && strArray.size() > 0)
      resultStr = strArray.get(0);
    return resultStr;
  }
  
  public String evaluateToString(InputSource inputSource, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
    String resultStr = null;
    ArrayList<String> strArray = evaluateToStringArray(inputSource, xpathExpression, nsContext);
    if (strArray != null && strArray.size() > 0)
      resultStr = strArray.get(0);
    return resultStr;
  }
  
  public String evaluateToString(Node node, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
    String resultStr = null;
    ArrayList<String> strArray = evaluateToStringArray(node, xpathExpression, nsContext);
    if (strArray != null && strArray.size() > 0)
      resultStr = strArray.get(0);
    return resultStr;
  }

  public ArrayList<String> evaluateToStringArray(String xmlString, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
    Reader stringReader = new StringReader(xmlString);
    InputSource inputSource = new InputSource(stringReader); 
    ArrayList<String> retStrArray = evaluateToStringArray(inputSource, xpathExpression, nsContext);
    return retStrArray;
  }
  
  public ArrayList<String> evaluateToStringArray(InputSource inputSource, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
    ArrayList<String> retStrArray = null;
    try {
      XPath xpath = XPathFactory.newInstance().newXPath();
      if (nsContext != null)
        xpath.setNamespaceContext(nsContext); 
      Object resultObjects = xpath.evaluate(xpathExpression, inputSource, XPathConstants.NODESET);
      if (resultObjects != null) {
        retStrArray = nodesetToStringArray(resultObjects);
      }
    } catch (Exception e) {
      throw new ApplicationException(e);
    }
    return retStrArray;
  }
  
  public ArrayList<Node> evaluateToNodeArray(InputSource inputSource, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
    ArrayList<Node> retArray = null;
    try {
      XPath xpath = XPathFactory.newInstance().newXPath();
      if (nsContext != null)
        xpath.setNamespaceContext(nsContext); 
      Object resultObjects = xpath.evaluate(xpathExpression, inputSource, XPathConstants.NODESET);
      if (resultObjects != null) {
        retArray = nodesetToNodeArray(resultObjects);
      }
    } catch (Exception e) {
      throw new ApplicationException(e);
    }
    return retArray;
  }
  
  public ArrayList<String> evaluateToStringArray(Node node, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
    ArrayList<String> retStrArray = null;
    try {
      XPath xpath = XPathFactory.newInstance().newXPath();
      if (nsContext != null)
        xpath.setNamespaceContext(nsContext); 
      Object resultObjects = xpath.evaluate(xpathExpression, node, XPathConstants.NODESET);
      if (resultObjects != null) {
        retStrArray = nodesetToStringArray(resultObjects);
      }
    } catch (Exception e) {
      throw new ApplicationException(e);
    }
    return retStrArray;
  }
  
  /*
   * XPath evaluation: handles both, javax and also Saxon's implementation
   * javax XPath evaluation: returns a NodeList
   * Saxon's XPath evaluation: returns an ArrayList of TinyTextImpl (which could be casted to NodeInfo which could be handled as if it was a dom node)
   */
  private ArrayList<String> nodesetToStringArray(Object nodesetObjects) {
    ArrayList<String> retStrArray = null;
    if (nodesetObjects instanceof NodeList) {
      NodeList resultNodeList = (NodeList) nodesetObjects;
      int length = resultNodeList.getLength();
      if (length > 0) {
        retStrArray = new ArrayList<String>();
        for (int i=0; i<length; i++) {
          Node n = resultNodeList.item(i);
          String nodeValue = n.getNodeValue();
          if (nodeValue == null)
            nodeValue = n.getTextContent();
          if (nodeValue != null)
            retStrArray.add(nodeValue);
        }
      }
    } else if (nodesetObjects instanceof ArrayList) {
      ArrayList arrayListNodes = (ArrayList) nodesetObjects;
      retStrArray = new ArrayList<String>();
      for (int i=0; i<arrayListNodes.size(); i++) {
        Object arrayListNode = arrayListNodes.get(i);
        if (arrayListNode instanceof Node) {
          Node n = (Node) arrayListNode;
          String nodeValue = n.getNodeValue();
          if (nodeValue == null)
            nodeValue = n.getTextContent();
          if (nodeValue != null)
            retStrArray.add(nodeValue);
        } else if (arrayListNode instanceof NodeInfo) {
          NodeInfo n = (NodeInfo) arrayListNode;
          String nodeValue = n.getStringValue();
          retStrArray.add(nodeValue);
        }
      }
    } 
    return retStrArray;
  }

  /*
   * XPath evaluation: handles both, javax and also Saxon's implementation
   * javax XPath evaluation: returns a NodeList
   * Saxon's XPath evaluation: returns an ArrayList of TinyTextImpl (which could be casted to NodeInfo which could be handled as if it was a dom node)
   */
  private ArrayList<Node> nodesetToNodeArray(Object nodesetObjects) {
    ArrayList<Node> retArray = null;
    if (nodesetObjects instanceof NodeList) {
      NodeList resultNodeList = (NodeList) nodesetObjects;
      int length = resultNodeList.getLength();
      if (length > 0) {
        retArray = new ArrayList<Node>();
        for (int i=0; i<length; i++) {
          Node n = resultNodeList.item(i);
          retArray.add(n);
        }
      }
    } else if (nodesetObjects instanceof ArrayList) {
      ArrayList arrayListNodes = (ArrayList) nodesetObjects;
      retArray = new ArrayList<Node>();
      for (int i=0; i<arrayListNodes.size(); i++) {
        Object arrayListNode = arrayListNodes.get(i);
        if (arrayListNode instanceof Node) {
          Node n = (Node) arrayListNode;
          retArray.add(n);
        } else if (arrayListNode instanceof NodeInfo) {
          NodeInfo n = (NodeInfo) arrayListNode;
          // TODO provide clean return value
        }
      }
    } 
    return retArray;
  }

  // TODO not used yet, test it
  public Node doc(File xmlFile, File schemaFile) throws ApplicationException {
    Node root = null;
    try {
      DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
      dbf.setNamespaceAware(true);
      // dbf.setAttribute(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA);  // TODO vielleicht doch wieder anschalten
      dbf.setAttribute(JAXP_SCHEMA_SOURCE, schemaFile);
      DocumentBuilder db = dbf.newDocumentBuilder();
      Document doc = db.parse(xmlFile);   
      root = doc.getFirstChild();
    } catch (Exception e) {
      throw new ApplicationException(e);
    }
    return root;
  }

  // TODO not used yet, test it
  public void validateByRelaxNG(Node docNode, URL schemaUrl) throws ApplicationException {
    // System.setProperty(SchemaFactory.class.getName() + ":" + XMLConstants.RELAXNG_NS_URI, "com.thaiopensource.relaxng.jaxp.XMLSyntaxSchemaFactory");
    System.setProperty(SchemaFactory.class.getName() + ":" + XMLConstants.RELAXNG_NS_URI, "com.thaiopensource.relaxng.jaxp.CompactSyntaxSchemaFactory");
    SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.RELAXNG_NS_URI);
    Schema schema = null;
    try {
      schema = factory.newSchema(schemaUrl);
    } catch (SAXException e) {
      throw new ApplicationException(e);
    }
    Validator validator = schema.newValidator();
    DOMSource source = new DOMSource(docNode);
    try {
      validator.validate(source);
    } catch (SAXException e) {
      throw new ApplicationException(e);
    } catch (IOException e) {
      throw new ApplicationException(e);
    }    
  }

  // TODO not used yet, test it
  public String toStringByTransformer(Node doc) throws ApplicationException {
    String xmlString = null;
    try {
      Transformer transformer = TransformerFactory.newInstance().newTransformer();
      transformer.setOutputProperty(OutputKeys.INDENT, "yes");
      //initialize StreamResult with File object to save to file
      StreamResult result = new StreamResult(new StringWriter());
      DOMSource source = new DOMSource(doc);
      transformer.transform(source, result);
      xmlString = result.getWriter().toString();
    } catch (TransformerConfigurationException e) {
      throw new ApplicationException(e);
    } catch (TransformerException e) {
      throw new ApplicationException(e);
    }
    return xmlString;
  }
}