comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/XmlUtil.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children 2396a569e446
comparison
equal deleted inserted replaced
-1:000000000000 0:408254cf2f1d
1 package de.mpg.mpiwg.berlin.mpdl.util;
2
3 import java.io.File;
4 import java.io.IOException;
5 import java.io.Reader;
6 import java.io.StringReader;
7 import java.io.StringWriter;
8 import java.net.URL;
9 import java.text.DateFormat;
10 import java.text.ParseException;
11 import java.text.SimpleDateFormat;
12 import java.util.ArrayList;
13 import java.util.Date;
14
15 import javax.xml.XMLConstants;
16 import javax.xml.namespace.NamespaceContext;
17 import javax.xml.parsers.DocumentBuilder;
18 import javax.xml.parsers.DocumentBuilderFactory;
19 import javax.xml.transform.OutputKeys;
20 import javax.xml.transform.Source;
21 import javax.xml.transform.Transformer;
22 import javax.xml.transform.TransformerConfigurationException;
23 import javax.xml.transform.TransformerException;
24 import javax.xml.transform.TransformerFactory;
25 import javax.xml.transform.dom.DOMSource;
26 import javax.xml.transform.sax.SAXSource;
27 import javax.xml.transform.stream.StreamResult;
28 import javax.xml.validation.Schema;
29 import javax.xml.validation.SchemaFactory;
30 import javax.xml.validation.Validator;
31 import javax.xml.xpath.XPath;
32 import javax.xml.xpath.XPathConstants;
33 import javax.xml.xpath.XPathFactory;
34
35 import net.sf.saxon.om.NodeInfo;
36
37 import org.w3c.dom.Document;
38 import org.w3c.dom.NamedNodeMap;
39 import org.w3c.dom.Node;
40 import org.w3c.dom.NodeList;
41 import org.xml.sax.InputSource;
42 import org.xml.sax.SAXException;
43
44 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
45
46 public class XmlUtil {
47 static String JAXP_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage";
48 static String JAXP_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource";
49 static String W3C_XML_SCHEMA = XMLConstants.W3C_XML_SCHEMA_NS_URI;
50
51 public static XmlUtil getInstance() {
52 return new XmlUtil();
53 }
54
55 public Node doc(String url) throws ApplicationException {
56 Node root = null;
57 try {
58 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
59 dbf.setNamespaceAware(true);
60 DocumentBuilder db = dbf.newDocumentBuilder();
61 InputSource inputSource = new InputSource(url);
62 Document doc = db.parse(inputSource);
63 root = doc.getFirstChild();
64 } catch (Exception e) {
65 throw new ApplicationException(e);
66 }
67 return root;
68 }
69
70 public Node parse(String xmlFileName) throws ApplicationException {
71 File xmlFile = new File(xmlFileName);
72 XmlUtil xmlUtil = XmlUtil.getInstance();
73 Node retNode = null;
74 try {
75 retNode = xmlUtil.doc(xmlFile);
76 } catch (ApplicationException e) {
77 throw new ApplicationException("Your source file is not valid: " + e.getMessage());
78 }
79 return retNode;
80 }
81
82 public Node doc(File xmlFile) throws ApplicationException {
83 Node root = null;
84 try {
85 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
86 dbf.setNamespaceAware(true);
87 DocumentBuilder db = dbf.newDocumentBuilder();
88 Document doc = db.parse(xmlFile);
89 root = doc.getFirstChild();
90 } catch (Exception e) {
91 throw new ApplicationException(e);
92 }
93 return root;
94 }
95
96 public void validateByRelaxNG(File xmlFile, URL schemaUrl) throws ApplicationException {
97 System.setProperty(SchemaFactory.class.getName() + ":" + XMLConstants.RELAXNG_NS_URI, "com.thaiopensource.relaxng.jaxp.CompactSyntaxSchemaFactory");
98 SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.RELAXNG_NS_URI);
99 Schema schema = null;
100 try {
101 schema = factory.newSchema(schemaUrl);
102 } catch (SAXException e) {
103 throw new ApplicationException(e);
104 }
105 Validator validator = schema.newValidator();
106 InputSource inputSource = new InputSource(xmlFile.getPath());
107 Source source = new SAXSource(inputSource);
108 try {
109 validator.validate(source);
110 } catch (SAXException e) {
111 String message = e.getMessage();
112 String text = "Your file is not valid against the RelaxNG schema: " + schemaUrl;
113 throw new ApplicationException(text + ":\n" + message);
114 } catch (IOException e) {
115 String message = e.getMessage();
116 String text = "Your file is not valid against the RelaxNG schema: " + schemaUrl;
117 throw new ApplicationException(text + ": " + message);
118 }
119 }
120
121 public String getNodeValue(Node node) {
122 String nodeValueStr = node.getNodeValue();
123 if (nodeValueStr == null)
124 nodeValueStr = node.getTextContent();
125 return nodeValueStr;
126 }
127
128 public String getNodeAttributeValue(Node node, String attrName) {
129 NamedNodeMap attrs = node.getAttributes();
130 if (attrs == null) {
131 return null;
132 }
133 Node attN = attrs.getNamedItem(attrName);
134 if (attN == null) {
135 return null;
136 }
137 return attN.getNodeValue();
138 }
139
140 public ArrayList<String> toStringArray(NodeList nodes) {
141 ArrayList<String> nodeValues = null;
142 for (int i=0; i< nodes.getLength(); i++) {
143 Node node = nodes.item(i);
144 if (nodeValues == null)
145 nodeValues = new ArrayList<String>();
146 String nodeValue = node.getNodeValue();
147 nodeValues.add(nodeValue);
148 }
149 return nodeValues;
150 }
151
152 public String toXsDate(Date date) {
153 DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
154 String xsDateStr = dateFormat.format(date);
155 return xsDateStr;
156 }
157
158 public Date toDate(String xsDateStr) throws ApplicationException {
159 Date retDate = null;
160 if (xsDateStr == null)
161 return null;
162 try {
163 DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
164 retDate = dateFormat.parse(xsDateStr);
165 } catch (ParseException e) {
166 throw new ApplicationException(e);
167 }
168 return retDate;
169 }
170
171 public String evaluateToString(String xmlString, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
172 String resultStr = null;
173 ArrayList<String> strArray = evaluateToStringArray(xmlString, xpathExpression, nsContext);
174 if (strArray != null && strArray.size() > 0)
175 resultStr = strArray.get(0);
176 return resultStr;
177 }
178
179 public String evaluateToString(InputSource inputSource, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
180 String resultStr = null;
181 ArrayList<String> strArray = evaluateToStringArray(inputSource, xpathExpression, nsContext);
182 if (strArray != null && strArray.size() > 0)
183 resultStr = strArray.get(0);
184 return resultStr;
185 }
186
187 public String evaluateToString(Node node, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
188 String resultStr = null;
189 ArrayList<String> strArray = evaluateToStringArray(node, xpathExpression, nsContext);
190 if (strArray != null && strArray.size() > 0)
191 resultStr = strArray.get(0);
192 return resultStr;
193 }
194
195 public ArrayList<String> evaluateToStringArray(String xmlString, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
196 Reader stringReader = new StringReader(xmlString);
197 InputSource inputSource = new InputSource(stringReader);
198 ArrayList<String> retStrArray = evaluateToStringArray(inputSource, xpathExpression, nsContext);
199 return retStrArray;
200 }
201
202 public ArrayList<String> evaluateToStringArray(InputSource inputSource, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
203 ArrayList<String> retStrArray = null;
204 try {
205 XPath xpath = XPathFactory.newInstance().newXPath();
206 if (nsContext != null)
207 xpath.setNamespaceContext(nsContext);
208 Object resultObjects = xpath.evaluate(xpathExpression, inputSource, XPathConstants.NODESET);
209 if (resultObjects != null) {
210 retStrArray = nodesetToStringArray(resultObjects);
211 }
212 } catch (Exception e) {
213 throw new ApplicationException(e);
214 }
215 return retStrArray;
216 }
217
218 public ArrayList<Node> evaluateToNodeArray(InputSource inputSource, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
219 ArrayList<Node> retArray = null;
220 try {
221 XPath xpath = XPathFactory.newInstance().newXPath();
222 if (nsContext != null)
223 xpath.setNamespaceContext(nsContext);
224 Object resultObjects = xpath.evaluate(xpathExpression, inputSource, XPathConstants.NODESET);
225 if (resultObjects != null) {
226 retArray = nodesetToNodeArray(resultObjects);
227 }
228 } catch (Exception e) {
229 throw new ApplicationException(e);
230 }
231 return retArray;
232 }
233
234 public ArrayList<String> evaluateToStringArray(Node node, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
235 ArrayList<String> retStrArray = null;
236 try {
237 XPath xpath = XPathFactory.newInstance().newXPath();
238 if (nsContext != null)
239 xpath.setNamespaceContext(nsContext);
240 Object resultObjects = xpath.evaluate(xpathExpression, node, XPathConstants.NODESET);
241 if (resultObjects != null) {
242 retStrArray = nodesetToStringArray(resultObjects);
243 }
244 } catch (Exception e) {
245 throw new ApplicationException(e);
246 }
247 return retStrArray;
248 }
249
250 /*
251 * XPath evaluation: handles both, javax and also Saxon's implementation
252 * javax XPath evaluation: returns a NodeList
253 * Saxon's XPath evaluation: returns an ArrayList of TinyTextImpl (which could be casted to NodeInfo which could be handled as if it was a dom node)
254 */
255 private ArrayList<String> nodesetToStringArray(Object nodesetObjects) {
256 ArrayList<String> retStrArray = null;
257 if (nodesetObjects instanceof NodeList) {
258 NodeList resultNodeList = (NodeList) nodesetObjects;
259 int length = resultNodeList.getLength();
260 if (length > 0) {
261 retStrArray = new ArrayList<String>();
262 for (int i=0; i<length; i++) {
263 Node n = resultNodeList.item(i);
264 String nodeValue = n.getNodeValue();
265 if (nodeValue == null)
266 nodeValue = n.getTextContent();
267 if (nodeValue != null)
268 retStrArray.add(nodeValue);
269 }
270 }
271 } else if (nodesetObjects instanceof ArrayList) {
272 ArrayList arrayListNodes = (ArrayList) nodesetObjects;
273 retStrArray = new ArrayList<String>();
274 for (int i=0; i<arrayListNodes.size(); i++) {
275 Object arrayListNode = arrayListNodes.get(i);
276 if (arrayListNode instanceof Node) {
277 Node n = (Node) arrayListNode;
278 String nodeValue = n.getNodeValue();
279 if (nodeValue == null)
280 nodeValue = n.getTextContent();
281 if (nodeValue != null)
282 retStrArray.add(nodeValue);
283 } else if (arrayListNode instanceof NodeInfo) {
284 NodeInfo n = (NodeInfo) arrayListNode;
285 String nodeValue = n.getStringValue();
286 retStrArray.add(nodeValue);
287 }
288 }
289 }
290 return retStrArray;
291 }
292
293 /*
294 * XPath evaluation: handles both, javax and also Saxon's implementation
295 * javax XPath evaluation: returns a NodeList
296 * Saxon's XPath evaluation: returns an ArrayList of TinyTextImpl (which could be casted to NodeInfo which could be handled as if it was a dom node)
297 */
298 private ArrayList<Node> nodesetToNodeArray(Object nodesetObjects) {
299 ArrayList<Node> retArray = null;
300 if (nodesetObjects instanceof NodeList) {
301 NodeList resultNodeList = (NodeList) nodesetObjects;
302 int length = resultNodeList.getLength();
303 if (length > 0) {
304 retArray = new ArrayList<Node>();
305 for (int i=0; i<length; i++) {
306 Node n = resultNodeList.item(i);
307 retArray.add(n);
308 }
309 }
310 } else if (nodesetObjects instanceof ArrayList) {
311 ArrayList arrayListNodes = (ArrayList) nodesetObjects;
312 retArray = new ArrayList<Node>();
313 for (int i=0; i<arrayListNodes.size(); i++) {
314 Object arrayListNode = arrayListNodes.get(i);
315 if (arrayListNode instanceof Node) {
316 Node n = (Node) arrayListNode;
317 retArray.add(n);
318 } else if (arrayListNode instanceof NodeInfo) {
319 NodeInfo n = (NodeInfo) arrayListNode;
320 // TODO provide clean return value
321 }
322 }
323 }
324 return retArray;
325 }
326
327 // TODO not used yet, test it
328 public Node doc(File xmlFile, File schemaFile) throws ApplicationException {
329 Node root = null;
330 try {
331 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
332 dbf.setNamespaceAware(true);
333 // dbf.setAttribute(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA); // TODO vielleicht doch wieder anschalten
334 dbf.setAttribute(JAXP_SCHEMA_SOURCE, schemaFile);
335 DocumentBuilder db = dbf.newDocumentBuilder();
336 Document doc = db.parse(xmlFile);
337 root = doc.getFirstChild();
338 } catch (Exception e) {
339 throw new ApplicationException(e);
340 }
341 return root;
342 }
343
344 // TODO not used yet, test it
345 public void validateByRelaxNG(Node docNode, URL schemaUrl) throws ApplicationException {
346 // System.setProperty(SchemaFactory.class.getName() + ":" + XMLConstants.RELAXNG_NS_URI, "com.thaiopensource.relaxng.jaxp.XMLSyntaxSchemaFactory");
347 System.setProperty(SchemaFactory.class.getName() + ":" + XMLConstants.RELAXNG_NS_URI, "com.thaiopensource.relaxng.jaxp.CompactSyntaxSchemaFactory");
348 SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.RELAXNG_NS_URI);
349 Schema schema = null;
350 try {
351 schema = factory.newSchema(schemaUrl);
352 } catch (SAXException e) {
353 throw new ApplicationException(e);
354 }
355 Validator validator = schema.newValidator();
356 DOMSource source = new DOMSource(docNode);
357 try {
358 validator.validate(source);
359 } catch (SAXException e) {
360 throw new ApplicationException(e);
361 } catch (IOException e) {
362 throw new ApplicationException(e);
363 }
364 }
365
366 // TODO not used yet, test it
367 public String toStringByTransformer(Node doc) throws ApplicationException {
368 String xmlString = null;
369 try {
370 Transformer transformer = TransformerFactory.newInstance().newTransformer();
371 transformer.setOutputProperty(OutputKeys.INDENT, "yes");
372 //initialize StreamResult with File object to save to file
373 StreamResult result = new StreamResult(new StringWriter());
374 DOMSource source = new DOMSource(doc);
375 transformer.transform(source, result);
376 xmlString = result.getWriter().toString();
377 } catch (TransformerConfigurationException e) {
378 throw new ApplicationException(e);
379 } catch (TransformerException e) {
380 throw new ApplicationException(e);
381 }
382 return xmlString;
383 }
384 }