annotate src/main/java/org/mpi/openmind/repository/utils/XMLUtil.java @ 79:b0aebac0780a

put statistics about number of entities, relations and attributes in xml dump. tags: entity-statistics, relation-statistics.
author casties
date Fri, 03 Mar 2017 18:59:20 +0100
parents a59984fd3c3f
children d4b456623d43
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
jurzua
parents:
diff changeset
1 package org.mpi.openmind.repository.utils;
jurzua
parents:
diff changeset
2
jurzua
parents:
diff changeset
3 //JAXP
jurzua
parents:
diff changeset
4 import javax.xml.parsers.DocumentBuilder;
jurzua
parents:
diff changeset
5 import javax.xml.parsers.DocumentBuilderFactory;
jurzua
parents:
diff changeset
6 import javax.xml.parsers.ParserConfigurationException;
jurzua
parents:
diff changeset
7 import javax.xml.transform.*;
jurzua
parents:
diff changeset
8 import javax.xml.transform.dom.*;
jurzua
parents:
diff changeset
9 import javax.xml.transform.stream.*;
jurzua
parents:
diff changeset
10 import org.w3c.dom.Document;
jurzua
parents:
diff changeset
11 import org.xml.sax.SAXException;
jurzua
parents:
diff changeset
12 import java.io.*;
jurzua
parents:
diff changeset
13
jurzua
parents:
diff changeset
14 import org.w3c.dom.Node;
jurzua
parents:
diff changeset
15 import org.w3c.dom.NodeList;
jurzua
parents:
diff changeset
16
jurzua
parents:
diff changeset
17
jurzua
parents:
diff changeset
18
jurzua
parents:
diff changeset
19 /**
jurzua
parents:
diff changeset
20 *
jurzua
parents:
diff changeset
21 * @author Jorge UrzĂșa
jurzua
parents:
diff changeset
22 */
jurzua
parents:
diff changeset
23 public class XMLUtil {
jurzua
parents:
diff changeset
24
jurzua
parents:
diff changeset
25 private static DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
jurzua
parents:
diff changeset
26
jurzua
parents:
diff changeset
27 public static String OWN_VALUE_CONFIG = "own-value-configuration";
jurzua
parents:
diff changeset
28 public static String OWN_VALUE_RULES = "own-value-rules";
jurzua
parents:
diff changeset
29 public static String OWN_VALUE_RULE = "own-value-rule";
jurzua
parents:
diff changeset
30 public static String TARGET_RELATION = "target-relation";
jurzua
parents:
diff changeset
31 public static String SOURCE_RELATION = "source-relation";
jurzua
parents:
diff changeset
32 public static String SUBSTRING = "substring";
jurzua
parents:
diff changeset
33 public static String NAME = "name";
jurzua
parents:
diff changeset
34 public static String SOURCE = "source";
jurzua
parents:
diff changeset
35 public static String TARGET = "target";
jurzua
parents:
diff changeset
36 public static String END_NODE = "end-node";
jurzua
parents:
diff changeset
37 public static String XOR = "xor";
jurzua
parents:
diff changeset
38 public static String RULE = "rule";
jurzua
parents:
diff changeset
39 public static String REF = "ref";
jurzua
parents:
diff changeset
40 public static String VALUE = "value";
jurzua
parents:
diff changeset
41 public static String TYPE = "type";
jurzua
parents:
diff changeset
42 public static String CONSTANT = "constant";
jurzua
parents:
diff changeset
43
jurzua
parents:
diff changeset
44 public static String PRINT_RULES = "print-rules";
jurzua
parents:
diff changeset
45 public static String PRINT_RULE = "print-rule";
jurzua
parents:
diff changeset
46
jurzua
parents:
diff changeset
47 public static String OPENMIND_DATA = "openmind-data";
jurzua
parents:
diff changeset
48 public static String ENTITIES = "entities";
jurzua
parents:
diff changeset
49 public static String ENTITY = "entity";
jurzua
parents:
diff changeset
50 public static String ENTITY_ID = "id";
jurzua
parents:
diff changeset
51 public static String ATTRIBUTES = "attributes";
jurzua
parents:
diff changeset
52 public static String ATTRIBUTE = "attribute";
jurzua
parents:
diff changeset
53 public static String ATTRIBUTE_NAME = "name";
jurzua
parents:
diff changeset
54 public static String ATTRIBUTE_VALUE = "value";
jurzua
parents:
diff changeset
55 public static String RELATION_SOURCE_ID = "source-id";
jurzua
parents:
diff changeset
56 public static String RELATION_TARGET_ID = "target-id";
jurzua
parents:
diff changeset
57 public static String RELATION_ID = "id";
jurzua
parents:
diff changeset
58 public static String VIEW = "view";
jurzua
parents:
diff changeset
59 public static String VIEWS = "views";
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
60 public static String MODIFICATION_TIME = "mtime";
1
jurzua
parents:
diff changeset
61 public static String VERSION = "version";
jurzua
parents:
diff changeset
62 public static String ID = "id";
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
63 public static String ROW_ID = "row-id";
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
64 public static String USER = "user";
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
65 public static String PUBLIC = "public";
1
jurzua
parents:
diff changeset
66 public static String CONCEPTS = "concepts";
jurzua
parents:
diff changeset
67 public static String CONCEPT = "concept";
jurzua
parents:
diff changeset
68 public static String ASSERTIONS = "assertions";
jurzua
parents:
diff changeset
69 public static String ASSERTION = "assertion";
77
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 29
diff changeset
70 public static String NORMALIZED = "norm";
1
jurzua
parents:
diff changeset
71
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 77
diff changeset
72 public static String ENTITY_STATS = "entity-statistics";
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 77
diff changeset
73 public static String RELATION_STATS = "relation-statistics";
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 77
diff changeset
74 public static String COUNT = "count";
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 77
diff changeset
75
1
jurzua
parents:
diff changeset
76 public static String META_DATA = "openmind-meta";
jurzua
parents:
diff changeset
77 public static String DEFINITIONS = "definitions";
jurzua
parents:
diff changeset
78 public static String DEFINITION = "definition";
jurzua
parents:
diff changeset
79 public static String LABEL = "label";
jurzua
parents:
diff changeset
80 public static String MAIN_LABEL = "main-label";
jurzua
parents:
diff changeset
81 public static String OBJECT_CLASS = "object-class";
jurzua
parents:
diff changeset
82 public static String OWN_VALUE = "own-value";
jurzua
parents:
diff changeset
83 public static String LABEL_NAME = "name";
jurzua
parents:
diff changeset
84 public static String RELATIONS = "relations";
jurzua
parents:
diff changeset
85 public static String RELATION = "relation";
jurzua
parents:
diff changeset
86 public static String RELATION_LABEL = "label";
jurzua
parents:
diff changeset
87 public static String RELATION_SOURCE = "source-class";
jurzua
parents:
diff changeset
88 public static String RELATION_TARGET = "target-class";
jurzua
parents:
diff changeset
89 public static String CONTENT_TYPE = "content-type";
jurzua
parents:
diff changeset
90
jurzua
parents:
diff changeset
91 public static String INSERTION_MODE = "insertion-mode";
jurzua
parents:
diff changeset
92 public static String MERGE = "merge";
jurzua
parents:
diff changeset
93 public static String REPLACE = "replace";
jurzua
parents:
diff changeset
94 public static String INSERT = "insert";
jurzua
parents:
diff changeset
95
jurzua
parents:
diff changeset
96 public static Node getNodeByName(NodeList nodeList, String name){
jurzua
parents:
diff changeset
97 for(int i=0; i < nodeList.getLength(); i++){
jurzua
parents:
diff changeset
98 Node tmp = nodeList.item(i);
jurzua
parents:
diff changeset
99 if(tmp.getNodeName().equals(name))
jurzua
parents:
diff changeset
100 return tmp;
jurzua
parents:
diff changeset
101 }
jurzua
parents:
diff changeset
102 return null;
jurzua
parents:
diff changeset
103 }
jurzua
parents:
diff changeset
104
jurzua
parents:
diff changeset
105 /** Parses XML file and returns XML document.
jurzua
parents:
diff changeset
106 * @param fileName XML file to parse
jurzua
parents:
diff changeset
107 * @return XML document or <B>null</B> if error occured
jurzua
parents:
diff changeset
108 */
jurzua
parents:
diff changeset
109 public static Document getDocument(String fileName) {
jurzua
parents:
diff changeset
110 //System.out.println("Parsing XML file... " + fileName);
jurzua
parents:
diff changeset
111 DocumentBuilder docBuilder;
jurzua
parents:
diff changeset
112 Document doc = null;
jurzua
parents:
diff changeset
113 DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
jurzua
parents:
diff changeset
114 docBuilderFactory.setIgnoringElementContentWhitespace(true);
jurzua
parents:
diff changeset
115 try {
jurzua
parents:
diff changeset
116 docBuilder = docBuilderFactory.newDocumentBuilder();
jurzua
parents:
diff changeset
117 }
jurzua
parents:
diff changeset
118 catch (ParserConfigurationException e) {
jurzua
parents:
diff changeset
119 System.out.println("Wrong parser configuration: " + e.getMessage());
jurzua
parents:
diff changeset
120 e.printStackTrace();
jurzua
parents:
diff changeset
121 return null;
jurzua
parents:
diff changeset
122 }
jurzua
parents:
diff changeset
123 File sourceFile = new File(fileName);
jurzua
parents:
diff changeset
124 try {
jurzua
parents:
diff changeset
125 doc = docBuilder.parse(sourceFile);
jurzua
parents:
diff changeset
126 }
jurzua
parents:
diff changeset
127 catch (SAXException e) {
jurzua
parents:
diff changeset
128 System.out.println("Wrong XML file structure: " + e.getMessage());
jurzua
parents:
diff changeset
129 e.printStackTrace();
jurzua
parents:
diff changeset
130 return null;
jurzua
parents:
diff changeset
131 }
jurzua
parents:
diff changeset
132 catch (IOException e) {
jurzua
parents:
diff changeset
133 System.out.println("Could not read source file: " + e.getMessage());
jurzua
parents:
diff changeset
134 e.printStackTrace();
jurzua
parents:
diff changeset
135 }
jurzua
parents:
diff changeset
136 //System.out.println("XML file parsed");
jurzua
parents:
diff changeset
137 return doc;
jurzua
parents:
diff changeset
138 }
jurzua
parents:
diff changeset
139
jurzua
parents:
diff changeset
140 /**
jurzua
parents:
diff changeset
141 * This method looks in the class-path for a file with the name given in input.
jurzua
parents:
diff changeset
142 * @param fileName
jurzua
parents:
diff changeset
143 * @return
jurzua
parents:
diff changeset
144 */
jurzua
parents:
diff changeset
145 public static Document getDocumentFromPathContext(String fileName) {
jurzua
parents:
diff changeset
146 //System.out.println("Parsing XML file... " + fileName);
jurzua
parents:
diff changeset
147 DocumentBuilder docBuilder;
jurzua
parents:
diff changeset
148 Document doc = null;
jurzua
parents:
diff changeset
149 DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
jurzua
parents:
diff changeset
150 docBuilderFactory.setIgnoringElementContentWhitespace(true);
jurzua
parents:
diff changeset
151 try {
jurzua
parents:
diff changeset
152 docBuilder = docBuilderFactory.newDocumentBuilder();
jurzua
parents:
diff changeset
153 }
jurzua
parents:
diff changeset
154 catch (ParserConfigurationException e) {
jurzua
parents:
diff changeset
155 System.out.println("Wrong parser configuration: " + e.getMessage());
jurzua
parents:
diff changeset
156 e.printStackTrace();
jurzua
parents:
diff changeset
157 return null;
jurzua
parents:
diff changeset
158 }
jurzua
parents:
diff changeset
159
jurzua
parents:
diff changeset
160 ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
jurzua
parents:
diff changeset
161 InputStream stream = classLoader.getResourceAsStream( fileName );
jurzua
parents:
diff changeset
162 try {
jurzua
parents:
diff changeset
163 doc = docBuilder.parse(stream);
jurzua
parents:
diff changeset
164 }
jurzua
parents:
diff changeset
165 catch (SAXException e) {
jurzua
parents:
diff changeset
166 System.out.println("Wrong XML file structure: " + e.getMessage());
jurzua
parents:
diff changeset
167 e.printStackTrace();
jurzua
parents:
diff changeset
168 return null;
jurzua
parents:
diff changeset
169 }
jurzua
parents:
diff changeset
170 catch (IOException e) {
jurzua
parents:
diff changeset
171 System.out.println("Could not read source file: " + e.getMessage());
jurzua
parents:
diff changeset
172 e.printStackTrace();
jurzua
parents:
diff changeset
173 }
jurzua
parents:
diff changeset
174 //System.out.println("XML file parsed");
jurzua
parents:
diff changeset
175 return doc;
jurzua
parents:
diff changeset
176 }
jurzua
parents:
diff changeset
177
jurzua
parents:
diff changeset
178 /** Saves XML Document into XML file.
jurzua
parents:
diff changeset
179 * @param fileName XML file name
jurzua
parents:
diff changeset
180 * @param doc XML document to save
jurzua
parents:
diff changeset
181 * @return <B>true</B> if method success <B>false</B> otherwise
jurzua
parents:
diff changeset
182 */
jurzua
parents:
diff changeset
183 static public boolean saveXMLDocument(String fileName, Document doc) {
jurzua
parents:
diff changeset
184
jurzua
parents:
diff changeset
185 File xmlOutputFile = new File(fileName);
jurzua
parents:
diff changeset
186 FileOutputStream fos;
jurzua
parents:
diff changeset
187 Transformer transformer;
jurzua
parents:
diff changeset
188 try {
jurzua
parents:
diff changeset
189 fos = new FileOutputStream(xmlOutputFile);
jurzua
parents:
diff changeset
190 }
jurzua
parents:
diff changeset
191 catch (FileNotFoundException e) {
jurzua
parents:
diff changeset
192 System.out.println("Error occured: " + e.getMessage());
jurzua
parents:
diff changeset
193 return false;
jurzua
parents:
diff changeset
194 }
jurzua
parents:
diff changeset
195 // Use a Transformer for output
jurzua
parents:
diff changeset
196 TransformerFactory transformerFactory = TransformerFactory.newInstance();
jurzua
parents:
diff changeset
197 try {
jurzua
parents:
diff changeset
198 transformer = transformerFactory.newTransformer();
jurzua
parents:
diff changeset
199 }
jurzua
parents:
diff changeset
200 catch (TransformerConfigurationException e) {
jurzua
parents:
diff changeset
201 System.out.println("Transformer configuration error: " + e.getMessage());
jurzua
parents:
diff changeset
202 return false;
jurzua
parents:
diff changeset
203 }
jurzua
parents:
diff changeset
204 DOMSource source = new DOMSource(doc);
jurzua
parents:
diff changeset
205 StreamResult result = new StreamResult(fos);
jurzua
parents:
diff changeset
206 // transform source into result will do save
jurzua
parents:
diff changeset
207 try {
jurzua
parents:
diff changeset
208 transformer.transform(source, result);
jurzua
parents:
diff changeset
209 }
jurzua
parents:
diff changeset
210 catch (TransformerException e) {
jurzua
parents:
diff changeset
211 System.out.println("Error transform: " + e.getMessage());
jurzua
parents:
diff changeset
212 }
jurzua
parents:
diff changeset
213 System.out.println("XML file saved.");
jurzua
parents:
diff changeset
214 return true;
jurzua
parents:
diff changeset
215 }
jurzua
parents:
diff changeset
216
jurzua
parents:
diff changeset
217 public static String transformateDocumentToString(Document doc){
jurzua
parents:
diff changeset
218 try{
jurzua
parents:
diff changeset
219 DOMSource domSource = new DOMSource(doc);
jurzua
parents:
diff changeset
220 StringWriter writer = new StringWriter();
jurzua
parents:
diff changeset
221 StreamResult result = new StreamResult(writer);
jurzua
parents:
diff changeset
222
jurzua
parents:
diff changeset
223 TransformerFactory tf = TransformerFactory.newInstance();
jurzua
parents:
diff changeset
224 Transformer transformer = tf.newTransformer();
jurzua
parents:
diff changeset
225 transformer.transform(domSource, result);
jurzua
parents:
diff changeset
226 return writer.toString();
jurzua
parents:
diff changeset
227 }catch(Exception e){
jurzua
parents:
diff changeset
228 e.printStackTrace();
jurzua
parents:
diff changeset
229 }
jurzua
parents:
diff changeset
230 return null;
jurzua
parents:
diff changeset
231 }
jurzua
parents:
diff changeset
232
jurzua
parents:
diff changeset
233 public String ptransformateDocumentToString(Document doc){
jurzua
parents:
diff changeset
234 try{
jurzua
parents:
diff changeset
235 DOMSource domSource = new DOMSource(doc);
jurzua
parents:
diff changeset
236 StringWriter writer = new StringWriter();
jurzua
parents:
diff changeset
237 StreamResult result = new StreamResult(writer);
jurzua
parents:
diff changeset
238
jurzua
parents:
diff changeset
239 TransformerFactory tf = TransformerFactory.newInstance();
jurzua
parents:
diff changeset
240 Transformer transformer = tf.newTransformer();
jurzua
parents:
diff changeset
241 transformer.transform(domSource, result);
jurzua
parents:
diff changeset
242 return writer.toString();
jurzua
parents:
diff changeset
243 }catch(Exception e){
jurzua
parents:
diff changeset
244 e.printStackTrace();
jurzua
parents:
diff changeset
245 }
jurzua
parents:
diff changeset
246 return null;
jurzua
parents:
diff changeset
247 }
jurzua
parents:
diff changeset
248
jurzua
parents:
diff changeset
249 public static Document transformateStringToDocument(String text){
jurzua
parents:
diff changeset
250 try{
jurzua
parents:
diff changeset
251 DocumentBuilder db = dbf.newDocumentBuilder();
jurzua
parents:
diff changeset
252 InputStream stream = new ByteArrayInputStream(text.getBytes("UTF-8"));
jurzua
parents:
diff changeset
253 return db.parse(stream);
jurzua
parents:
diff changeset
254 }catch(Exception e){
jurzua
parents:
diff changeset
255 e.printStackTrace();
jurzua
parents:
diff changeset
256 }
jurzua
parents:
diff changeset
257 return null;
jurzua
parents:
diff changeset
258 }
jurzua
parents:
diff changeset
259
jurzua
parents:
diff changeset
260 /**
jurzua
parents:
diff changeset
261 * <p>This method transforms the file
jurzua
parents:
diff changeset
262 * input using the a stylesheet giving as input</p>
jurzua
parents:
diff changeset
263 *
jurzua
parents:
diff changeset
264 * @param xsl is the stylesheet file used to transformate the input document.
jurzua
parents:
diff changeset
265 * @param input is the document, which will be transformated.
jurzua
parents:
diff changeset
266 * @return the document transformed by the xsl stylesheet.
jurzua
parents:
diff changeset
267 */
jurzua
parents:
diff changeset
268 public static Document xmlToXmlTransformation(Document xsl, Document input){
jurzua
parents:
diff changeset
269 try{
jurzua
parents:
diff changeset
270 TransformerFactory factory = TransformerFactory.newInstance();
jurzua
parents:
diff changeset
271
jurzua
parents:
diff changeset
272 InputStream stream = new ByteArrayInputStream(XMLUtil.transformateDocumentToString(xsl).getBytes("UTF-8"));
jurzua
parents:
diff changeset
273 //Transformer transformer = factory.newTransformer(new DOMSource(xsl));
jurzua
parents:
diff changeset
274 //Transformer transformer = factory.newTransformer(new StreamSource("transformatorAxel1.xsl"));
jurzua
parents:
diff changeset
275 Transformer transformer = factory.newTransformer(new StreamSource(stream));
jurzua
parents:
diff changeset
276 DOMResult result = new DOMResult();
jurzua
parents:
diff changeset
277 //transformer.transform(new DOMSource(input), new StreamResult(new FileOutputStream("mi.xml")));
jurzua
parents:
diff changeset
278 transformer.transform(new DOMSource(input), result);
jurzua
parents:
diff changeset
279
jurzua
parents:
diff changeset
280 return (Document)result.getNode();
jurzua
parents:
diff changeset
281 }catch(Exception e){
jurzua
parents:
diff changeset
282 e.printStackTrace();
jurzua
parents:
diff changeset
283 }
jurzua
parents:
diff changeset
284 return null;
jurzua
parents:
diff changeset
285 }
jurzua
parents:
diff changeset
286 }