comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/XmlUtil.java @ 9:1ec29fdd0db8

neue .lex Dateien f?r Normalisierung / externe Objekte update
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 22 Feb 2011 16:03:45 +0100
parents 2396a569e446
children 257f67be5c00
comparison
equal deleted inserted replaced
8:d2a1c14fde31 9:1ec29fdd0db8
10 import java.text.DateFormat; 10 import java.text.DateFormat;
11 import java.text.ParseException; 11 import java.text.ParseException;
12 import java.text.SimpleDateFormat; 12 import java.text.SimpleDateFormat;
13 import java.util.ArrayList; 13 import java.util.ArrayList;
14 import java.util.Date; 14 import java.util.Date;
15 import java.util.Iterator;
15 16
16 import javax.xml.XMLConstants; 17 import javax.xml.XMLConstants;
17 import javax.xml.namespace.NamespaceContext; 18 import javax.xml.namespace.NamespaceContext;
18 import javax.xml.parsers.DocumentBuilder; 19 import javax.xml.parsers.DocumentBuilder;
19 import javax.xml.parsers.DocumentBuilderFactory; 20 import javax.xml.parsers.DocumentBuilderFactory;
32 import javax.xml.xpath.XPath; 33 import javax.xml.xpath.XPath;
33 import javax.xml.xpath.XPathConstants; 34 import javax.xml.xpath.XPathConstants;
34 import javax.xml.xpath.XPathFactory; 35 import javax.xml.xpath.XPathFactory;
35 36
36 import net.sf.saxon.om.NodeInfo; 37 import net.sf.saxon.om.NodeInfo;
38 import net.sf.saxon.query.QueryResult;
39 import net.sf.saxon.trans.XPathException;
37 40
38 import org.w3c.dom.Document; 41 import org.w3c.dom.Document;
39 import org.w3c.dom.DocumentType; 42 import org.w3c.dom.DocumentType;
40 import org.w3c.dom.NamedNodeMap; 43 import org.w3c.dom.NamedNodeMap;
41 import org.w3c.dom.Node; 44 import org.w3c.dom.Node;
48 public class XmlUtil { 51 public class XmlUtil {
49 static String JAXP_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage"; 52 static String JAXP_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage";
50 static String JAXP_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource"; 53 static String JAXP_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource";
51 static String W3C_XML_SCHEMA = XMLConstants.W3C_XML_SCHEMA_NS_URI; 54 static String W3C_XML_SCHEMA = XMLConstants.W3C_XML_SCHEMA_NS_URI;
52 55
56 private NamespaceContext namespaceContext;
57
53 public static XmlUtil getInstance() { 58 public static XmlUtil getInstance() {
54 return new XmlUtil(); 59 return new XmlUtil();
60 }
61
62 public void setNsContext(String nsName) {
63 if (nsName.equals("general"))
64 namespaceContext = getNsContextGeneral();
65 }
66
67 public NamespaceContext getNsContextGeneral() {
68 NamespaceContext nsContext = new NamespaceContext() {
69 public String getNamespaceURI(String prefix) {
70 String uri;
71 if (prefix.equals("xlink"))
72 uri = "http://www.w3.org/1999/xlink";
73 else if (prefix.equals("xml"))
74 uri = "http://www.w3.org/XML/1998/namespace";
75 else if (prefix.equals("dc"))
76 uri = "http://purl.org/dc/elements/1.1/";
77 else if (prefix.equals("mpiwg"))
78 uri = "http://www.mpiwg-berlin.mpg.de/ns/mpiwg";
79 else
80 uri = null;
81 return uri;
82 }
83 public String getPrefix(String uri) {
84 if (uri.equals("http://www.w3.org/1999/xlink"))
85 return "xlink";
86 else if (uri.equals("http://www.w3.org/XML/1998/namespace"))
87 return "xml";
88 else if (uri.equals("http://purl.org/dc/elements/1.1/"))
89 return "dc";
90 else if (uri.equals("http://www.mpiwg-berlin.mpg.de/ns/mpiwg"))
91 return "mpiwg";
92 else
93 return null;
94 }
95 public Iterator getPrefixes(String namespace) {
96 return null;
97 }
98 };
99 return nsContext;
55 } 100 }
56 101
57 public Node doc(String url) throws ApplicationException { 102 public Node doc(String url) throws ApplicationException {
58 Node root = null; 103 Node root = null;
59 try { 104 try {
203 248
204 public ArrayList<String> evaluateToStringArray(InputSource inputSource, String xpathExpression, NamespaceContext nsContext) throws ApplicationException { 249 public ArrayList<String> evaluateToStringArray(InputSource inputSource, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
205 ArrayList<String> retStrArray = null; 250 ArrayList<String> retStrArray = null;
206 try { 251 try {
207 XPath xpath = XPathFactory.newInstance().newXPath(); 252 XPath xpath = XPathFactory.newInstance().newXPath();
253 if (namespaceContext != null)
254 xpath.setNamespaceContext(namespaceContext);
208 if (nsContext != null) 255 if (nsContext != null)
209 xpath.setNamespaceContext(nsContext); 256 xpath.setNamespaceContext(nsContext);
210 Object resultObjects = xpath.evaluate(xpathExpression, inputSource, XPathConstants.NODESET); 257 Object resultObjects = xpath.evaluate(xpathExpression, inputSource, XPathConstants.NODESET);
211 if (resultObjects != null) { 258 if (resultObjects != null) {
212 retStrArray = nodesetToStringArray(resultObjects); 259 retStrArray = nodesetToStringArray(resultObjects);
219 266
220 public ArrayList<Node> evaluateToNodeArray(InputSource inputSource, String xpathExpression, NamespaceContext nsContext) throws ApplicationException { 267 public ArrayList<Node> evaluateToNodeArray(InputSource inputSource, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
221 ArrayList<Node> retArray = null; 268 ArrayList<Node> retArray = null;
222 try { 269 try {
223 XPath xpath = XPathFactory.newInstance().newXPath(); 270 XPath xpath = XPathFactory.newInstance().newXPath();
271 if (namespaceContext != null)
272 xpath.setNamespaceContext(namespaceContext);
224 if (nsContext != null) 273 if (nsContext != null)
225 xpath.setNamespaceContext(nsContext); 274 xpath.setNamespaceContext(nsContext);
226 Object resultObjects = xpath.evaluate(xpathExpression, inputSource, XPathConstants.NODESET); 275 Object resultObjects = xpath.evaluate(xpathExpression, inputSource, XPathConstants.NODESET);
227 if (resultObjects != null) { 276 if (resultObjects != null) {
228 retArray = nodesetToNodeArray(resultObjects); 277 retArray = nodesetToNodeArray(resultObjects);
295 /* 344 /*
296 * XPath evaluation: handles both, javax and also Saxon's implementation 345 * XPath evaluation: handles both, javax and also Saxon's implementation
297 * javax XPath evaluation: returns a NodeList 346 * javax XPath evaluation: returns a NodeList
298 * Saxon's XPath evaluation: returns an ArrayList of TinyTextImpl (which could be casted to NodeInfo which could be handled as if it was a dom node) 347 * Saxon's XPath evaluation: returns an ArrayList of TinyTextImpl (which could be casted to NodeInfo which could be handled as if it was a dom node)
299 */ 348 */
300 private ArrayList<Node> nodesetToNodeArray(Object nodesetObjects) { 349 private ArrayList<Node> nodesetToNodeArray(Object nodesetObjects) throws ApplicationException {
301 ArrayList<Node> retArray = null; 350 ArrayList<Node> retArray = null;
302 if (nodesetObjects instanceof NodeList) { 351 if (nodesetObjects instanceof NodeList) {
303 NodeList resultNodeList = (NodeList) nodesetObjects; 352 NodeList resultNodeList = (NodeList) nodesetObjects;
304 int length = resultNodeList.getLength(); 353 int length = resultNodeList.getLength();
305 if (length > 0) { 354 if (length > 0) {
317 if (arrayListNode instanceof Node) { 366 if (arrayListNode instanceof Node) {
318 Node n = (Node) arrayListNode; 367 Node n = (Node) arrayListNode;
319 retArray.add(n); 368 retArray.add(n);
320 } else if (arrayListNode instanceof NodeInfo) { 369 } else if (arrayListNode instanceof NodeInfo) {
321 NodeInfo n = (NodeInfo) arrayListNode; 370 NodeInfo n = (NodeInfo) arrayListNode;
322 // TODO provide clean return value 371 String xmlStr = "";
372 try {
373 xmlStr = QueryResult.serialize(n);
374 DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance();
375 DocumentBuilder docBuilder = dbfac.newDocumentBuilder();
376 Node domNode = docBuilder.parse(new InputSource(new StringReader(xmlStr))).getDocumentElement();
377 retArray.add(domNode);
378 } catch (Exception e) {
379 throw new ApplicationException(e);
380 }
323 } 381 }
324 } 382 }
325 } 383 }
326 return retArray; 384 return retArray;
327 } 385 }
351 if (arrayListNode instanceof Node) { 409 if (arrayListNode instanceof Node) {
352 Node n = (Node) arrayListNode; 410 Node n = (Node) arrayListNode;
353 serializeNode(n, writer, ""); 411 serializeNode(n, writer, "");
354 } else if (arrayListNode instanceof NodeInfo) { 412 } else if (arrayListNode instanceof NodeInfo) {
355 NodeInfo n = (NodeInfo) arrayListNode; 413 NodeInfo n = (NodeInfo) arrayListNode;
356 writer.write(n.getStringValue()); // TODO if that really happens 414 String xmlStr = QueryResult.serialize(n);
415 writer.write(xmlStr);
357 } 416 }
358 } 417 }
359 } 418 }
360 writer.flush(); 419 writer.flush();
420 } catch (XPathException e) {
421 throw new ApplicationException(e);
361 } catch (IOException e) { 422 } catch (IOException e) {
362 throw new ApplicationException(e); 423 throw new ApplicationException(e);
363 } 424 }
364 return writer.toString(); 425 return writer.toString();
365 } 426 }
366 427
367 public String evaluateToXmlString(String xmlString, String xpathExpression, NamespaceContext nsContext) throws ApplicationException { 428 public String evaluateToXmlString(String xmlString, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
368 String resultStr = null; 429 String resultStr = null;
369 try { 430 try {
370 XPath xpath = XPathFactory.newInstance().newXPath(); 431 XPathFactory xpathFactory = net.sf.saxon.xpath.XPathFactoryImpl.newInstance();
432 XPath xpath = xpathFactory.newXPath();
433 if (namespaceContext != null)
434 xpath.setNamespaceContext(namespaceContext);
371 if (nsContext != null) 435 if (nsContext != null)
372 xpath.setNamespaceContext(nsContext); 436 xpath.setNamespaceContext(nsContext);
373 Reader stringReader = new StringReader(xmlString); 437 Reader stringReader = new StringReader(xmlString);
374 InputSource inputSource = new InputSource(stringReader); 438 InputSource inputSource = new InputSource(stringReader);
375 Object resultObjects = xpath.evaluate(xpathExpression, inputSource, XPathConstants.NODESET); 439 Object resultObjects = xpath.evaluate(xpathExpression, inputSource, XPathConstants.NODESET);
379 } catch (Exception e) { 443 } catch (Exception e) {
380 throw new ApplicationException(e); 444 throw new ApplicationException(e);
381 } 445 }
382 return resultStr; 446 return resultStr;
383 } 447 }
448
449 public ArrayList<Node> evaluateToNodeArray(String xmlString, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
450 ArrayList<Node> result = null;
451 try {
452 XPathFactory xpathFactory = net.sf.saxon.xpath.XPathFactoryImpl.newInstance();
453 XPath xpath = xpathFactory.newXPath();
454 if (namespaceContext != null)
455 xpath.setNamespaceContext(namespaceContext);
456 if (nsContext != null)
457 xpath.setNamespaceContext(nsContext);
458 Reader stringReader = new StringReader(xmlString);
459 InputSource inputSource = new InputSource(stringReader);
460 Object resultObjects = xpath.evaluate(xpathExpression, inputSource, XPathConstants.NODESET);
461 if (resultObjects != null) {
462 result = nodesetToNodeArray(resultObjects);
463 }
464 } catch (Exception e) {
465 throw new ApplicationException(e);
466 }
467 return result;
468 }
469
470 public String insertAtCharPos(String xmlFragment, String charPosStr, String newXmlNodeStr) {
471 Integer charPos = new Integer(charPosStr);
472 int strCharIndex = getCharIndex(xmlFragment, charPos);
473 if (charPos == 0)
474 strCharIndex = getCharIndex(xmlFragment, charPos + 1) - 1;
475 String resultStr = xmlFragment.substring(0, strCharIndex) + newXmlNodeStr + xmlFragment.substring(strCharIndex);
476 return resultStr;
477 }
478
479 private int getCharIndex(String xmlFragment, int charPos) {
480 int size = xmlFragment.length();
481 int counter = 0;
482 int charCounter = 0;
483 int counterLastChar = -1;
484 boolean isEntity = false;
485 boolean isElement = false;
486 while (counter < size) {
487 char c = xmlFragment.charAt(counter);
488 switch (c) {
489 case '<': isElement = true; break;
490 case '>': isElement = false; break;
491 case '&': isEntity = true; break;
492 case ';': isEntity = false; break;
493 }
494 // count all chars which are not inside elements and entities
495 // if element closing char ">" is found it should not be counted as a char
496 // if an entity closing char ";" is found it should be counted cause the entity itself is one char long
497 if (! isEntity && ! isElement && !(c == '>')) {
498 charCounter++;
499 counterLastChar = counter;
500 }
501 if (charCounter == charPos) {
502 break;
503 }
504 counter++;
505 }
506 // input charPos was bigger than available chars: return the last available charPos
507 if (counter == size)
508 return counterLastChar + 1;
509 return counter + 1;
510 }
511
384 512
385 /** 513 /**
386 * <p> This will serialize a DOM <code>Node</code> to 514 * <p> This will serialize a DOM <code>Node</code> to
387 * the supplied <code>Writer</code>. </p> 515 * the supplied <code>Writer</code>. </p>
388 * 516 *