annotate src/de/mpiwg/anteater/xml/impl/JDOMParser.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
1 package de.mpiwg.anteater.xml.impl;
036535fcd179 anteater
jdamerow
parents:
diff changeset
2
036535fcd179 anteater
jdamerow
parents:
diff changeset
3 import java.io.File;
036535fcd179 anteater
jdamerow
parents:
diff changeset
4 import java.io.FileNotFoundException;
036535fcd179 anteater
jdamerow
parents:
diff changeset
5 import java.io.FileOutputStream;
036535fcd179 anteater
jdamerow
parents:
diff changeset
6 import java.io.IOException;
036535fcd179 anteater
jdamerow
parents:
diff changeset
7 import java.io.InputStream;
036535fcd179 anteater
jdamerow
parents:
diff changeset
8 import java.io.OutputStreamWriter;
036535fcd179 anteater
jdamerow
parents:
diff changeset
9 import java.io.StringReader;
036535fcd179 anteater
jdamerow
parents:
diff changeset
10 import java.nio.charset.Charset;
036535fcd179 anteater
jdamerow
parents:
diff changeset
11 import java.util.ArrayList;
036535fcd179 anteater
jdamerow
parents:
diff changeset
12 import java.util.List;
036535fcd179 anteater
jdamerow
parents:
diff changeset
13
036535fcd179 anteater
jdamerow
parents:
diff changeset
14 import org.jdom2.Content;
036535fcd179 anteater
jdamerow
parents:
diff changeset
15 import org.jdom2.Document;
036535fcd179 anteater
jdamerow
parents:
diff changeset
16 import org.jdom2.Element;
036535fcd179 anteater
jdamerow
parents:
diff changeset
17 import org.jdom2.JDOMException;
036535fcd179 anteater
jdamerow
parents:
diff changeset
18 import org.jdom2.Namespace;
036535fcd179 anteater
jdamerow
parents:
diff changeset
19 import org.jdom2.Text;
036535fcd179 anteater
jdamerow
parents:
diff changeset
20 import org.jdom2.filter.ElementFilter;
036535fcd179 anteater
jdamerow
parents:
diff changeset
21 import org.jdom2.input.SAXBuilder;
036535fcd179 anteater
jdamerow
parents:
diff changeset
22 import org.jdom2.output.Format;
036535fcd179 anteater
jdamerow
parents:
diff changeset
23 import org.jdom2.output.XMLOutputter;
036535fcd179 anteater
jdamerow
parents:
diff changeset
24 import org.jdom2.xpath.XPathExpression;
036535fcd179 anteater
jdamerow
parents:
diff changeset
25 import org.jdom2.xpath.XPathFactory;
036535fcd179 anteater
jdamerow
parents:
diff changeset
26
036535fcd179 anteater
jdamerow
parents:
diff changeset
27 import de.mpiwg.anteater.xml.IXMLParser;
036535fcd179 anteater
jdamerow
parents:
diff changeset
28
036535fcd179 anteater
jdamerow
parents:
diff changeset
29 public class JDOMParser implements IXMLParser {
036535fcd179 anteater
jdamerow
parents:
diff changeset
30
036535fcd179 anteater
jdamerow
parents:
diff changeset
31 private Document doc;
036535fcd179 anteater
jdamerow
parents:
diff changeset
32
036535fcd179 anteater
jdamerow
parents:
diff changeset
33 public JDOMParser(String pathOrContent, boolean loadFromFile) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
34 if (loadFromFile) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
35 init(pathOrContent);
036535fcd179 anteater
jdamerow
parents:
diff changeset
36 } else {
036535fcd179 anteater
jdamerow
parents:
diff changeset
37 try {
036535fcd179 anteater
jdamerow
parents:
diff changeset
38 SAXBuilder builder = new SAXBuilder();
036535fcd179 anteater
jdamerow
parents:
diff changeset
39 doc = builder.build(new StringReader(pathOrContent));
036535fcd179 anteater
jdamerow
parents:
diff changeset
40 } catch (JDOMException e) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
41 // TODO Auto-generated catch block
036535fcd179 anteater
jdamerow
parents:
diff changeset
42 e.printStackTrace();
036535fcd179 anteater
jdamerow
parents:
diff changeset
43 } catch (IOException e) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
44 // TODO Auto-generated catch block
036535fcd179 anteater
jdamerow
parents:
diff changeset
45 e.printStackTrace();
036535fcd179 anteater
jdamerow
parents:
diff changeset
46 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
47 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
48
036535fcd179 anteater
jdamerow
parents:
diff changeset
49 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
50
036535fcd179 anteater
jdamerow
parents:
diff changeset
51 public JDOMParser(InputStream stream) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
52 init(stream);
036535fcd179 anteater
jdamerow
parents:
diff changeset
53 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
54
036535fcd179 anteater
jdamerow
parents:
diff changeset
55 public JDOMParser() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
56 doc = new Document();
036535fcd179 anteater
jdamerow
parents:
diff changeset
57 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
58
036535fcd179 anteater
jdamerow
parents:
diff changeset
59 protected void init(String filepath) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
60 SAXBuilder builder = new SAXBuilder();
036535fcd179 anteater
jdamerow
parents:
diff changeset
61 File file = new File(filepath);
036535fcd179 anteater
jdamerow
parents:
diff changeset
62 if (file.exists()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
63 try {
036535fcd179 anteater
jdamerow
parents:
diff changeset
64 doc = builder.build(file);
036535fcd179 anteater
jdamerow
parents:
diff changeset
65 } catch (JDOMException e) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
66 // TODO Auto-generated catch block
036535fcd179 anteater
jdamerow
parents:
diff changeset
67 e.printStackTrace();
036535fcd179 anteater
jdamerow
parents:
diff changeset
68 } catch (IOException e) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
69 // TODO Auto-generated catch block
036535fcd179 anteater
jdamerow
parents:
diff changeset
70 e.printStackTrace();
036535fcd179 anteater
jdamerow
parents:
diff changeset
71 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
72 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
73 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
74
036535fcd179 anteater
jdamerow
parents:
diff changeset
75 protected void init(InputStream stream) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
76 SAXBuilder builder = new SAXBuilder();
036535fcd179 anteater
jdamerow
parents:
diff changeset
77 try {
036535fcd179 anteater
jdamerow
parents:
diff changeset
78 doc = builder.build(stream);
036535fcd179 anteater
jdamerow
parents:
diff changeset
79 } catch (JDOMException e) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
80 // TODO Auto-generated catch block
036535fcd179 anteater
jdamerow
parents:
diff changeset
81 e.printStackTrace();
036535fcd179 anteater
jdamerow
parents:
diff changeset
82 } catch (IOException e) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
83 // TODO Auto-generated catch block
036535fcd179 anteater
jdamerow
parents:
diff changeset
84 e.printStackTrace();
036535fcd179 anteater
jdamerow
parents:
diff changeset
85 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
86 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
87
036535fcd179 anteater
jdamerow
parents:
diff changeset
88 @Override
036535fcd179 anteater
jdamerow
parents:
diff changeset
89 public Element getRoot() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
90 return doc.getRootElement();
036535fcd179 anteater
jdamerow
parents:
diff changeset
91 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
92
036535fcd179 anteater
jdamerow
parents:
diff changeset
93 @Override
036535fcd179 anteater
jdamerow
parents:
diff changeset
94 public List<Element> executeXPath(String xpath, String defaultNSPrefix) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
95 if (doc == null)
036535fcd179 anteater
jdamerow
parents:
diff changeset
96 return null;
036535fcd179 anteater
jdamerow
parents:
diff changeset
97
036535fcd179 anteater
jdamerow
parents:
diff changeset
98 XPathFactory factory = XPathFactory.instance();
036535fcd179 anteater
jdamerow
parents:
diff changeset
99 // get namespaces but remove empty prefix to prevent xpath exception
036535fcd179 anteater
jdamerow
parents:
diff changeset
100 List<Namespace> namespaces = doc.getRootElement()
036535fcd179 anteater
jdamerow
parents:
diff changeset
101 .getNamespacesInScope();
036535fcd179 anteater
jdamerow
parents:
diff changeset
102 List<Namespace> finalNamespaces = new ArrayList<Namespace>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
103 for (Namespace ns : namespaces) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
104 if (!ns.getPrefix().isEmpty())
036535fcd179 anteater
jdamerow
parents:
diff changeset
105 finalNamespaces.add(ns);
036535fcd179 anteater
jdamerow
parents:
diff changeset
106 else if (!ns.getURI().trim().isEmpty() && defaultNSPrefix != null && !defaultNSPrefix.trim().isEmpty()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
107 finalNamespaces.add(Namespace.getNamespace(defaultNSPrefix, ns.getURI()));
036535fcd179 anteater
jdamerow
parents:
diff changeset
108 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
109 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
110 List<Element> results;
036535fcd179 anteater
jdamerow
parents:
diff changeset
111 // evaluate xpath
036535fcd179 anteater
jdamerow
parents:
diff changeset
112 XPathExpression<Element> expression = factory.compile(xpath,
036535fcd179 anteater
jdamerow
parents:
diff changeset
113 new ElementFilter(), null, finalNamespaces);
036535fcd179 anteater
jdamerow
parents:
diff changeset
114 results = expression.evaluate(doc.getRootElement());
036535fcd179 anteater
jdamerow
parents:
diff changeset
115
036535fcd179 anteater
jdamerow
parents:
diff changeset
116 return results;
036535fcd179 anteater
jdamerow
parents:
diff changeset
117 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
118
036535fcd179 anteater
jdamerow
parents:
diff changeset
119 protected String stripText(Element node) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
120 StringBuffer text = new StringBuffer();
036535fcd179 anteater
jdamerow
parents:
diff changeset
121 stripTextRecursivie(node, text);
036535fcd179 anteater
jdamerow
parents:
diff changeset
122 return text.toString().trim();
036535fcd179 anteater
jdamerow
parents:
diff changeset
123 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
124
036535fcd179 anteater
jdamerow
parents:
diff changeset
125 protected void stripTextRecursivie(Element node, StringBuffer text) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
126 for (Content child : node.getContent()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
127 if (child instanceof Text) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
128 text.append(((Text) child).getText() != null ? ((Text) child)
036535fcd179 anteater
jdamerow
parents:
diff changeset
129 .getTextNormalize() + " " : "");
036535fcd179 anteater
jdamerow
parents:
diff changeset
130 } else if (child instanceof Element) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
131 stripTextRecursivie((Element) child, text);
036535fcd179 anteater
jdamerow
parents:
diff changeset
132 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
133 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
134 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
135
036535fcd179 anteater
jdamerow
parents:
diff changeset
136 public boolean save(String filepath, boolean prettyprint) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
137 XMLOutputter out = null;
036535fcd179 anteater
jdamerow
parents:
diff changeset
138 if (prettyprint)
036535fcd179 anteater
jdamerow
parents:
diff changeset
139 out = new XMLOutputter(Format.getPrettyFormat());
036535fcd179 anteater
jdamerow
parents:
diff changeset
140 else
036535fcd179 anteater
jdamerow
parents:
diff changeset
141 out = new XMLOutputter();
036535fcd179 anteater
jdamerow
parents:
diff changeset
142 //FileWriter writer;
036535fcd179 anteater
jdamerow
parents:
diff changeset
143 FileOutputStream stream;
036535fcd179 anteater
jdamerow
parents:
diff changeset
144 try {
036535fcd179 anteater
jdamerow
parents:
diff changeset
145 stream = new FileOutputStream(new File(filepath));
036535fcd179 anteater
jdamerow
parents:
diff changeset
146 } catch (FileNotFoundException e1) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
147 // TODO Auto-generated catch block
036535fcd179 anteater
jdamerow
parents:
diff changeset
148 e1.printStackTrace();
036535fcd179 anteater
jdamerow
parents:
diff changeset
149 return false;
036535fcd179 anteater
jdamerow
parents:
diff changeset
150 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
151 OutputStreamWriter osWriter = new OutputStreamWriter(stream, Charset.forName("UTF-8"));
036535fcd179 anteater
jdamerow
parents:
diff changeset
152 try {
036535fcd179 anteater
jdamerow
parents:
diff changeset
153 //writer = new FileWriter(filepath);
036535fcd179 anteater
jdamerow
parents:
diff changeset
154 out.output(doc, osWriter);
036535fcd179 anteater
jdamerow
parents:
diff changeset
155 osWriter.flush();
036535fcd179 anteater
jdamerow
parents:
diff changeset
156 osWriter.close();
036535fcd179 anteater
jdamerow
parents:
diff changeset
157 } catch (IOException e) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
158 e.printStackTrace();
036535fcd179 anteater
jdamerow
parents:
diff changeset
159 return false;
036535fcd179 anteater
jdamerow
parents:
diff changeset
160 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
161 return true;
036535fcd179 anteater
jdamerow
parents:
diff changeset
162 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
163 }