Mercurial > hg > mpdl-group
view software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/XmlUtil.java @ 9:1ec29fdd0db8
neue .lex Dateien f?r Normalisierung / externe Objekte update
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 22 Feb 2011 16:03:45 +0100 |
parents | 2396a569e446 |
children | 257f67be5c00 |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.util; import java.io.File; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.io.StringWriter; import java.io.Writer; import java.net.URL; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.Iterator; import javax.xml.XMLConstants; import javax.xml.namespace.NamespaceContext; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.OutputKeys; import javax.xml.transform.Source; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.sax.SAXSource; import javax.xml.transform.stream.StreamResult; import javax.xml.validation.Schema; import javax.xml.validation.SchemaFactory; import javax.xml.validation.Validator; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathFactory; import net.sf.saxon.om.NodeInfo; import net.sf.saxon.query.QueryResult; import net.sf.saxon.trans.XPathException; import org.w3c.dom.Document; import org.w3c.dom.DocumentType; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; public class XmlUtil { static String JAXP_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage"; static String JAXP_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource"; static String W3C_XML_SCHEMA = XMLConstants.W3C_XML_SCHEMA_NS_URI; private NamespaceContext namespaceContext; public static XmlUtil getInstance() { return new XmlUtil(); } public void setNsContext(String nsName) { if (nsName.equals("general")) namespaceContext = getNsContextGeneral(); } public NamespaceContext getNsContextGeneral() { NamespaceContext nsContext = new NamespaceContext() { public String getNamespaceURI(String prefix) { String uri; if (prefix.equals("xlink")) uri = "http://www.w3.org/1999/xlink"; else if (prefix.equals("xml")) uri = "http://www.w3.org/XML/1998/namespace"; else if (prefix.equals("dc")) uri = "http://purl.org/dc/elements/1.1/"; else if (prefix.equals("mpiwg")) uri = "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"; else uri = null; return uri; } public String getPrefix(String uri) { if (uri.equals("http://www.w3.org/1999/xlink")) return "xlink"; else if (uri.equals("http://www.w3.org/XML/1998/namespace")) return "xml"; else if (uri.equals("http://purl.org/dc/elements/1.1/")) return "dc"; else if (uri.equals("http://www.mpiwg-berlin.mpg.de/ns/mpiwg")) return "mpiwg"; else return null; } public Iterator getPrefixes(String namespace) { return null; } }; return nsContext; } public Node doc(String url) throws ApplicationException { Node root = null; try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); DocumentBuilder db = dbf.newDocumentBuilder(); InputSource inputSource = new InputSource(url); Document doc = db.parse(inputSource); root = doc.getFirstChild(); } catch (Exception e) { throw new ApplicationException(e); } return root; } public Node parse(String xmlFileName) throws ApplicationException { File xmlFile = new File(xmlFileName); XmlUtil xmlUtil = XmlUtil.getInstance(); Node retNode = null; try { retNode = xmlUtil.doc(xmlFile); } catch (ApplicationException e) { throw new ApplicationException("Your source file is not valid: " + e.getMessage()); } return retNode; } public Node doc(File xmlFile) throws ApplicationException { Node root = null; try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(xmlFile); root = doc.getFirstChild(); } catch (Exception e) { throw new ApplicationException(e); } return root; } public void validateByRelaxNG(File xmlFile, URL schemaUrl) throws ApplicationException { System.setProperty(SchemaFactory.class.getName() + ":" + XMLConstants.RELAXNG_NS_URI, "com.thaiopensource.relaxng.jaxp.CompactSyntaxSchemaFactory"); SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.RELAXNG_NS_URI); Schema schema = null; try { schema = factory.newSchema(schemaUrl); } catch (SAXException e) { throw new ApplicationException(e); } Validator validator = schema.newValidator(); InputSource inputSource = new InputSource(xmlFile.getPath()); Source source = new SAXSource(inputSource); try { validator.validate(source); } catch (SAXException e) { String message = e.getMessage(); String text = "Your file is not valid against the RelaxNG schema: " + schemaUrl; throw new ApplicationException(text + ":\n" + message); } catch (IOException e) { String message = e.getMessage(); String text = "Your file is not valid against the RelaxNG schema: " + schemaUrl; throw new ApplicationException(text + ": " + message); } } public String getNodeValue(Node node) { String nodeValueStr = node.getNodeValue(); if (nodeValueStr == null) nodeValueStr = node.getTextContent(); return nodeValueStr; } public String getNodeAttributeValue(Node node, String attrName) { NamedNodeMap attrs = node.getAttributes(); if (attrs == null) { return null; } Node attN = attrs.getNamedItem(attrName); if (attN == null) { return null; } return attN.getNodeValue(); } public ArrayList<String> toStringArray(NodeList nodes) { ArrayList<String> nodeValues = null; for (int i=0; i< nodes.getLength(); i++) { Node node = nodes.item(i); if (nodeValues == null) nodeValues = new ArrayList<String>(); String nodeValue = node.getNodeValue(); nodeValues.add(nodeValue); } return nodeValues; } public String toXsDate(Date date) { DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"); String xsDateStr = dateFormat.format(date); return xsDateStr; } public Date toDate(String xsDateStr) throws ApplicationException { Date retDate = null; if (xsDateStr == null) return null; try { DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"); retDate = dateFormat.parse(xsDateStr); } catch (ParseException e) { throw new ApplicationException(e); } return retDate; } public String evaluateToString(String xmlString, String xpathExpression, NamespaceContext nsContext) throws ApplicationException { String resultStr = null; ArrayList<String> strArray = evaluateToStringArray(xmlString, xpathExpression, nsContext); if (strArray != null && strArray.size() > 0) resultStr = strArray.get(0); return resultStr; } public String evaluateToString(InputSource inputSource, String xpathExpression, NamespaceContext nsContext) throws ApplicationException { String resultStr = null; ArrayList<String> strArray = evaluateToStringArray(inputSource, xpathExpression, nsContext); if (strArray != null && strArray.size() > 0) resultStr = strArray.get(0); return resultStr; } public String evaluateToString(Node node, String xpathExpression, NamespaceContext nsContext) throws ApplicationException { String resultStr = null; ArrayList<String> strArray = evaluateToStringArray(node, xpathExpression, nsContext); if (strArray != null && strArray.size() > 0) resultStr = strArray.get(0); return resultStr; } public ArrayList<String> evaluateToStringArray(String xmlString, String xpathExpression, NamespaceContext nsContext) throws ApplicationException { Reader stringReader = new StringReader(xmlString); InputSource inputSource = new InputSource(stringReader); ArrayList<String> retStrArray = evaluateToStringArray(inputSource, xpathExpression, nsContext); return retStrArray; } public ArrayList<String> evaluateToStringArray(InputSource inputSource, String xpathExpression, NamespaceContext nsContext) throws ApplicationException { ArrayList<String> retStrArray = null; try { XPath xpath = XPathFactory.newInstance().newXPath(); if (namespaceContext != null) xpath.setNamespaceContext(namespaceContext); if (nsContext != null) xpath.setNamespaceContext(nsContext); Object resultObjects = xpath.evaluate(xpathExpression, inputSource, XPathConstants.NODESET); if (resultObjects != null) { retStrArray = nodesetToStringArray(resultObjects); } } catch (Exception e) { throw new ApplicationException(e); } return retStrArray; } public ArrayList<Node> evaluateToNodeArray(InputSource inputSource, String xpathExpression, NamespaceContext nsContext) throws ApplicationException { ArrayList<Node> retArray = null; try { XPath xpath = XPathFactory.newInstance().newXPath(); if (namespaceContext != null) xpath.setNamespaceContext(namespaceContext); if (nsContext != null) xpath.setNamespaceContext(nsContext); Object resultObjects = xpath.evaluate(xpathExpression, inputSource, XPathConstants.NODESET); if (resultObjects != null) { retArray = nodesetToNodeArray(resultObjects); } } catch (Exception e) { throw new ApplicationException(e); } return retArray; } public ArrayList<String> evaluateToStringArray(Node node, String xpathExpression, NamespaceContext nsContext) throws ApplicationException { ArrayList<String> retStrArray = null; try { XPath xpath = XPathFactory.newInstance().newXPath(); if (nsContext != null) xpath.setNamespaceContext(nsContext); Object resultObjects = xpath.evaluate(xpathExpression, node, XPathConstants.NODESET); if (resultObjects != null) { retStrArray = nodesetToStringArray(resultObjects); } } catch (Exception e) { throw new ApplicationException(e); } return retStrArray; } /* * XPath evaluation: handles both, javax and also Saxon's implementation * javax XPath evaluation: returns a NodeList * Saxon's XPath evaluation: returns an ArrayList of TinyTextImpl (which could be casted to NodeInfo which could be handled as if it was a dom node) */ private ArrayList<String> nodesetToStringArray(Object nodesetObjects) { ArrayList<String> retStrArray = null; if (nodesetObjects instanceof NodeList) { NodeList resultNodeList = (NodeList) nodesetObjects; int length = resultNodeList.getLength(); if (length > 0) { retStrArray = new ArrayList<String>(); for (int i=0; i<length; i++) { Node n = resultNodeList.item(i); String nodeValue = n.getNodeValue(); if (nodeValue == null) nodeValue = n.getTextContent(); if (nodeValue != null) retStrArray.add(nodeValue); } } } else if (nodesetObjects instanceof ArrayList) { ArrayList arrayListNodes = (ArrayList) nodesetObjects; retStrArray = new ArrayList<String>(); for (int i=0; i<arrayListNodes.size(); i++) { Object arrayListNode = arrayListNodes.get(i); if (arrayListNode instanceof Node) { Node n = (Node) arrayListNode; String nodeValue = n.getNodeValue(); if (nodeValue == null) nodeValue = n.getTextContent(); if (nodeValue != null) retStrArray.add(nodeValue); } else if (arrayListNode instanceof NodeInfo) { NodeInfo n = (NodeInfo) arrayListNode; String nodeValue = n.getStringValue(); retStrArray.add(nodeValue); } } } return retStrArray; } /* * XPath evaluation: handles both, javax and also Saxon's implementation * javax XPath evaluation: returns a NodeList * Saxon's XPath evaluation: returns an ArrayList of TinyTextImpl (which could be casted to NodeInfo which could be handled as if it was a dom node) */ private ArrayList<Node> nodesetToNodeArray(Object nodesetObjects) throws ApplicationException { ArrayList<Node> retArray = null; if (nodesetObjects instanceof NodeList) { NodeList resultNodeList = (NodeList) nodesetObjects; int length = resultNodeList.getLength(); if (length > 0) { retArray = new ArrayList<Node>(); for (int i=0; i<length; i++) { Node n = resultNodeList.item(i); retArray.add(n); } } } else if (nodesetObjects instanceof ArrayList) { ArrayList arrayListNodes = (ArrayList) nodesetObjects; retArray = new ArrayList<Node>(); for (int i=0; i<arrayListNodes.size(); i++) { Object arrayListNode = arrayListNodes.get(i); if (arrayListNode instanceof Node) { Node n = (Node) arrayListNode; retArray.add(n); } else if (arrayListNode instanceof NodeInfo) { NodeInfo n = (NodeInfo) arrayListNode; String xmlStr = ""; try { xmlStr = QueryResult.serialize(n); DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = dbfac.newDocumentBuilder(); Node domNode = docBuilder.parse(new InputSource(new StringReader(xmlStr))).getDocumentElement(); retArray.add(domNode); } catch (Exception e) { throw new ApplicationException(e); } } } } return retArray; } /* * XPath evaluation: handles both, javax and also Saxon's implementation * javax XPath evaluation: returns a NodeList * Saxon's XPath evaluation: returns an ArrayList of TinyTextImpl (which could be casted to NodeInfo which could be handled as if it was a dom node) */ private String nodesetToXmlString(Object nodesetObjects) throws ApplicationException { Writer writer = new StringWriter(); try { if (nodesetObjects instanceof NodeList) { NodeList resultNodeList = (NodeList) nodesetObjects; int length = resultNodeList.getLength(); if (length > 0) { for (int i=0; i<length; i++) { Node n = resultNodeList.item(i); serializeNode(n, writer, ""); } } } else if (nodesetObjects instanceof ArrayList) { ArrayList arrayListNodes = (ArrayList) nodesetObjects; for (int i=0; i<arrayListNodes.size(); i++) { Object arrayListNode = arrayListNodes.get(i); if (arrayListNode instanceof Node) { Node n = (Node) arrayListNode; serializeNode(n, writer, ""); } else if (arrayListNode instanceof NodeInfo) { NodeInfo n = (NodeInfo) arrayListNode; String xmlStr = QueryResult.serialize(n); writer.write(xmlStr); } } } writer.flush(); } catch (XPathException e) { throw new ApplicationException(e); } catch (IOException e) { throw new ApplicationException(e); } return writer.toString(); } public String evaluateToXmlString(String xmlString, String xpathExpression, NamespaceContext nsContext) throws ApplicationException { String resultStr = null; try { XPathFactory xpathFactory = net.sf.saxon.xpath.XPathFactoryImpl.newInstance(); XPath xpath = xpathFactory.newXPath(); if (namespaceContext != null) xpath.setNamespaceContext(namespaceContext); if (nsContext != null) xpath.setNamespaceContext(nsContext); Reader stringReader = new StringReader(xmlString); InputSource inputSource = new InputSource(stringReader); Object resultObjects = xpath.evaluate(xpathExpression, inputSource, XPathConstants.NODESET); if (resultObjects != null) { resultStr = nodesetToXmlString(resultObjects); } } catch (Exception e) { throw new ApplicationException(e); } return resultStr; } public ArrayList<Node> evaluateToNodeArray(String xmlString, String xpathExpression, NamespaceContext nsContext) throws ApplicationException { ArrayList<Node> result = null; try { XPathFactory xpathFactory = net.sf.saxon.xpath.XPathFactoryImpl.newInstance(); XPath xpath = xpathFactory.newXPath(); if (namespaceContext != null) xpath.setNamespaceContext(namespaceContext); if (nsContext != null) xpath.setNamespaceContext(nsContext); Reader stringReader = new StringReader(xmlString); InputSource inputSource = new InputSource(stringReader); Object resultObjects = xpath.evaluate(xpathExpression, inputSource, XPathConstants.NODESET); if (resultObjects != null) { result = nodesetToNodeArray(resultObjects); } } catch (Exception e) { throw new ApplicationException(e); } return result; } public String insertAtCharPos(String xmlFragment, String charPosStr, String newXmlNodeStr) { Integer charPos = new Integer(charPosStr); int strCharIndex = getCharIndex(xmlFragment, charPos); if (charPos == 0) strCharIndex = getCharIndex(xmlFragment, charPos + 1) - 1; String resultStr = xmlFragment.substring(0, strCharIndex) + newXmlNodeStr + xmlFragment.substring(strCharIndex); return resultStr; } private int getCharIndex(String xmlFragment, int charPos) { int size = xmlFragment.length(); int counter = 0; int charCounter = 0; int counterLastChar = -1; boolean isEntity = false; boolean isElement = false; while (counter < size) { char c = xmlFragment.charAt(counter); switch (c) { case '<': isElement = true; break; case '>': isElement = false; break; case '&': isEntity = true; break; case ';': isEntity = false; break; } // count all chars which are not inside elements and entities // if element closing char ">" is found it should not be counted as a char // if an entity closing char ";" is found it should be counted cause the entity itself is one char long if (! isEntity && ! isElement && !(c == '>')) { charCounter++; counterLastChar = counter; } if (charCounter == charPos) { break; } counter++; } // input charPos was bigger than available chars: return the last available charPos if (counter == size) return counterLastChar + 1; return counter + 1; } /** * <p> This will serialize a DOM <code>Node</code> to * the supplied <code>Writer</code>. </p> * * @param node DOM <code>Node</code> to serialize. * @param writer <code>Writer</code> to write to. * @param indentLevel current indentation. */ private void serializeNode(Node node, Writer writer, String indentLevel) throws ApplicationException { try { // Determine action based on node type switch (node.getNodeType()) { case Node.DOCUMENT_NODE: writer.write("<?xml version=\"1.0\"?>"); writer.write("\n"); // recurse on each child NodeList nodes = node.getChildNodes(); if (nodes != null) { for (int i=0; i<nodes.getLength(); i++) { serializeNode(nodes.item(i), writer, ""); } } break; case Node.ELEMENT_NODE: String name = node.getNodeName(); writer.write(indentLevel + "<" + name); NamedNodeMap attributes = node.getAttributes(); for (int i=0; i<attributes.getLength(); i++) { Node current = attributes.item(i); writer.write(" " + current.getNodeName() + "=\"" + current.getNodeValue() + "\""); } writer.write(">"); // recurse on each child NodeList children = node.getChildNodes(); if (children != null) { if ((children.item(0) != null) && (children.item(0).getNodeType() == Node.ELEMENT_NODE)) { writer.write("\n"); } for (int i=0; i<children.getLength(); i++) { serializeNode(children.item(i), writer, indentLevel + " "); } if ((children.item(0) != null) && (children.item(children.getLength()-1).getNodeType() == Node.ELEMENT_NODE)) { writer.write(indentLevel); } } writer.write("</" + name + ">"); writer.write("\n"); break; case Node.TEXT_NODE: writer.write(node.getNodeValue()); break; case Node.CDATA_SECTION_NODE: writer.write("<![CDATA[" + node.getNodeValue() + "]]>"); break; case Node.COMMENT_NODE: writer.write(indentLevel + "<!-- " + node.getNodeValue() + " -->"); writer.write("\n"); break; case Node.PROCESSING_INSTRUCTION_NODE: writer.write("<?" + node.getNodeName() + " " + node.getNodeValue() + "?>"); writer.write("\n"); break; case Node.ENTITY_REFERENCE_NODE: writer.write("&" + node.getNodeName() + ";"); break; case Node.DOCUMENT_TYPE_NODE: DocumentType docType = (DocumentType)node; writer.write("<!DOCTYPE " + docType.getName()); if (docType.getPublicId() != null) { System.out.print(" PUBLIC \"" + docType.getPublicId() + "\" "); } else { writer.write(" SYSTEM "); } writer.write("\"" + docType.getSystemId() + "\">"); writer.write("\n"); break; } } catch (IOException e) { throw new ApplicationException(e); } } // TODO not used yet, test it public Node doc(File xmlFile, File schemaFile) throws ApplicationException { Node root = null; try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); // dbf.setAttribute(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA); // TODO vielleicht doch wieder anschalten dbf.setAttribute(JAXP_SCHEMA_SOURCE, schemaFile); DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(xmlFile); root = doc.getFirstChild(); } catch (Exception e) { throw new ApplicationException(e); } return root; } // TODO not used yet, test it public void validateByRelaxNG(Node docNode, URL schemaUrl) throws ApplicationException { // System.setProperty(SchemaFactory.class.getName() + ":" + XMLConstants.RELAXNG_NS_URI, "com.thaiopensource.relaxng.jaxp.XMLSyntaxSchemaFactory"); System.setProperty(SchemaFactory.class.getName() + ":" + XMLConstants.RELAXNG_NS_URI, "com.thaiopensource.relaxng.jaxp.CompactSyntaxSchemaFactory"); SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.RELAXNG_NS_URI); Schema schema = null; try { schema = factory.newSchema(schemaUrl); } catch (SAXException e) { throw new ApplicationException(e); } Validator validator = schema.newValidator(); DOMSource source = new DOMSource(docNode); try { validator.validate(source); } catch (SAXException e) { throw new ApplicationException(e); } catch (IOException e) { throw new ApplicationException(e); } } // TODO not used yet, test it public String toStringByTransformer(Node doc) throws ApplicationException { String xmlString = null; try { Transformer transformer = TransformerFactory.newInstance().newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); //initialize StreamResult with File object to save to file StreamResult result = new StreamResult(new StringWriter()); DOMSource source = new DOMSource(doc); transformer.transform(source, result); xmlString = result.getWriter().toString(); } catch (TransformerConfigurationException e) { throw new ApplicationException(e); } catch (TransformerException e) { throw new ApplicationException(e); } return xmlString; } }