annotate src/main/java/org/mpi/openmind/repository/utils/XMLUtil.java @ 127:3e772f7f43e0 default tip

ismi-date with long month names in xml dump.
author Robert Casties <casties@mpiwg-berlin.mpg.de>
date Thu, 11 May 2023 18:15:45 +0200
parents 8d79021099a4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
jurzua
parents:
diff changeset
1 package org.mpi.openmind.repository.utils;
jurzua
parents:
diff changeset
2
jurzua
parents:
diff changeset
3 //JAXP
jurzua
parents:
diff changeset
4 import javax.xml.parsers.DocumentBuilder;
jurzua
parents:
diff changeset
5 import javax.xml.parsers.DocumentBuilderFactory;
jurzua
parents:
diff changeset
6 import javax.xml.parsers.ParserConfigurationException;
jurzua
parents:
diff changeset
7 import javax.xml.transform.*;
jurzua
parents:
diff changeset
8 import javax.xml.transform.dom.*;
jurzua
parents:
diff changeset
9 import javax.xml.transform.stream.*;
jurzua
parents:
diff changeset
10 import org.w3c.dom.Document;
jurzua
parents:
diff changeset
11 import org.xml.sax.SAXException;
jurzua
parents:
diff changeset
12 import java.io.*;
jurzua
parents:
diff changeset
13
jurzua
parents:
diff changeset
14 import org.w3c.dom.Node;
jurzua
parents:
diff changeset
15 import org.w3c.dom.NodeList;
jurzua
parents:
diff changeset
16
jurzua
parents:
diff changeset
17
jurzua
parents:
diff changeset
18
jurzua
parents:
diff changeset
19 /**
jurzua
parents:
diff changeset
20 *
jurzua
parents:
diff changeset
21 * @author Jorge UrzĂșa
jurzua
parents:
diff changeset
22 */
jurzua
parents:
diff changeset
23 public class XMLUtil {
jurzua
parents:
diff changeset
24
jurzua
parents:
diff changeset
25 private static DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
jurzua
parents:
diff changeset
26
jurzua
parents:
diff changeset
27 public static String OWN_VALUE_CONFIG = "own-value-configuration";
jurzua
parents:
diff changeset
28 public static String OWN_VALUE_RULES = "own-value-rules";
jurzua
parents:
diff changeset
29 public static String OWN_VALUE_RULE = "own-value-rule";
jurzua
parents:
diff changeset
30 public static String TARGET_RELATION = "target-relation";
jurzua
parents:
diff changeset
31 public static String SOURCE_RELATION = "source-relation";
jurzua
parents:
diff changeset
32 public static String SUBSTRING = "substring";
jurzua
parents:
diff changeset
33 public static String NAME = "name";
jurzua
parents:
diff changeset
34 public static String SOURCE = "source";
jurzua
parents:
diff changeset
35 public static String TARGET = "target";
jurzua
parents:
diff changeset
36 public static String END_NODE = "end-node";
jurzua
parents:
diff changeset
37 public static String XOR = "xor";
jurzua
parents:
diff changeset
38 public static String RULE = "rule";
jurzua
parents:
diff changeset
39 public static String REF = "ref";
jurzua
parents:
diff changeset
40 public static String VALUE = "value";
jurzua
parents:
diff changeset
41 public static String TYPE = "type";
jurzua
parents:
diff changeset
42 public static String CONSTANT = "constant";
jurzua
parents:
diff changeset
43
jurzua
parents:
diff changeset
44 public static String PRINT_RULES = "print-rules";
jurzua
parents:
diff changeset
45 public static String PRINT_RULE = "print-rule";
jurzua
parents:
diff changeset
46
jurzua
parents:
diff changeset
47 public static String OPENMIND_DATA = "openmind-data";
jurzua
parents:
diff changeset
48 public static String ENTITIES = "entities";
jurzua
parents:
diff changeset
49 public static String ENTITY = "entity";
jurzua
parents:
diff changeset
50 public static String ENTITY_ID = "id";
jurzua
parents:
diff changeset
51 public static String ATTRIBUTES = "attributes";
jurzua
parents:
diff changeset
52 public static String ATTRIBUTE = "attribute";
jurzua
parents:
diff changeset
53 public static String ATTRIBUTE_NAME = "name";
jurzua
parents:
diff changeset
54 public static String ATTRIBUTE_VALUE = "value";
jurzua
parents:
diff changeset
55 public static String RELATION_SOURCE_ID = "source-id";
jurzua
parents:
diff changeset
56 public static String RELATION_TARGET_ID = "target-id";
jurzua
parents:
diff changeset
57 public static String RELATION_ID = "id";
jurzua
parents:
diff changeset
58 public static String VIEW = "view";
jurzua
parents:
diff changeset
59 public static String VIEWS = "views";
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
60 public static String MODIFICATION_TIME = "mtime";
1
jurzua
parents:
diff changeset
61 public static String VERSION = "version";
jurzua
parents:
diff changeset
62 public static String ID = "id";
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
63 public static String ROW_ID = "row-id";
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
64 public static String USER = "user";
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
65 public static String PUBLIC = "public";
1
jurzua
parents:
diff changeset
66 public static String CONCEPTS = "concepts";
jurzua
parents:
diff changeset
67 public static String CONCEPT = "concept";
jurzua
parents:
diff changeset
68 public static String ASSERTIONS = "assertions";
jurzua
parents:
diff changeset
69 public static String ASSERTION = "assertion";
77
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 29
diff changeset
70 public static String NORMALIZED = "norm";
120
3b0ce5e3302d add Node status field to XML export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 119
diff changeset
71 public static String STATUS = "status";
1
jurzua
parents:
diff changeset
72
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 77
diff changeset
73 public static String ENTITY_STATS = "entity-statistics";
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 77
diff changeset
74 public static String RELATION_STATS = "relation-statistics";
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 77
diff changeset
75 public static String COUNT = "count";
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 77
diff changeset
76
1
jurzua
parents:
diff changeset
77 public static String META_DATA = "openmind-meta";
jurzua
parents:
diff changeset
78 public static String DEFINITIONS = "definitions";
jurzua
parents:
diff changeset
79 public static String DEFINITION = "definition";
jurzua
parents:
diff changeset
80 public static String LABEL = "label";
jurzua
parents:
diff changeset
81 public static String MAIN_LABEL = "main-label";
jurzua
parents:
diff changeset
82 public static String OBJECT_CLASS = "object-class";
jurzua
parents:
diff changeset
83 public static String OWN_VALUE = "own-value";
jurzua
parents:
diff changeset
84 public static String LABEL_NAME = "name";
jurzua
parents:
diff changeset
85 public static String RELATIONS = "relations";
119
4eac7c57e593 add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 106
diff changeset
86 public static String INVRELATIONS = "invrelations";
1
jurzua
parents:
diff changeset
87 public static String RELATION = "relation";
jurzua
parents:
diff changeset
88 public static String RELATION_LABEL = "label";
jurzua
parents:
diff changeset
89 public static String RELATION_SOURCE = "source-class";
jurzua
parents:
diff changeset
90 public static String RELATION_TARGET = "target-class";
jurzua
parents:
diff changeset
91 public static String CONTENT_TYPE = "content-type";
jurzua
parents:
diff changeset
92
jurzua
parents:
diff changeset
93 public static String INSERTION_MODE = "insertion-mode";
jurzua
parents:
diff changeset
94 public static String MERGE = "merge";
jurzua
parents:
diff changeset
95 public static String REPLACE = "replace";
jurzua
parents:
diff changeset
96 public static String INSERT = "insert";
103
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 86
diff changeset
97
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 86
diff changeset
98 public static String ISODATE = "isodate";
106
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
99 public static String BIBID = "bibid";
122
8d79021099a4 XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 120
diff changeset
100 public static String ISMIDATE = "ismi-date";
1
jurzua
parents:
diff changeset
101
jurzua
parents:
diff changeset
102 public static Node getNodeByName(NodeList nodeList, String name){
jurzua
parents:
diff changeset
103 for(int i=0; i < nodeList.getLength(); i++){
jurzua
parents:
diff changeset
104 Node tmp = nodeList.item(i);
jurzua
parents:
diff changeset
105 if(tmp.getNodeName().equals(name))
jurzua
parents:
diff changeset
106 return tmp;
jurzua
parents:
diff changeset
107 }
jurzua
parents:
diff changeset
108 return null;
jurzua
parents:
diff changeset
109 }
jurzua
parents:
diff changeset
110
jurzua
parents:
diff changeset
111 /** Parses XML file and returns XML document.
jurzua
parents:
diff changeset
112 * @param fileName XML file to parse
jurzua
parents:
diff changeset
113 * @return XML document or <B>null</B> if error occured
jurzua
parents:
diff changeset
114 */
jurzua
parents:
diff changeset
115 public static Document getDocument(String fileName) {
jurzua
parents:
diff changeset
116 //System.out.println("Parsing XML file... " + fileName);
jurzua
parents:
diff changeset
117 DocumentBuilder docBuilder;
jurzua
parents:
diff changeset
118 Document doc = null;
jurzua
parents:
diff changeset
119 DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
jurzua
parents:
diff changeset
120 docBuilderFactory.setIgnoringElementContentWhitespace(true);
jurzua
parents:
diff changeset
121 try {
jurzua
parents:
diff changeset
122 docBuilder = docBuilderFactory.newDocumentBuilder();
jurzua
parents:
diff changeset
123 }
jurzua
parents:
diff changeset
124 catch (ParserConfigurationException e) {
jurzua
parents:
diff changeset
125 System.out.println("Wrong parser configuration: " + e.getMessage());
jurzua
parents:
diff changeset
126 e.printStackTrace();
jurzua
parents:
diff changeset
127 return null;
jurzua
parents:
diff changeset
128 }
jurzua
parents:
diff changeset
129 File sourceFile = new File(fileName);
jurzua
parents:
diff changeset
130 try {
jurzua
parents:
diff changeset
131 doc = docBuilder.parse(sourceFile);
jurzua
parents:
diff changeset
132 }
jurzua
parents:
diff changeset
133 catch (SAXException e) {
jurzua
parents:
diff changeset
134 System.out.println("Wrong XML file structure: " + e.getMessage());
jurzua
parents:
diff changeset
135 e.printStackTrace();
jurzua
parents:
diff changeset
136 return null;
jurzua
parents:
diff changeset
137 }
jurzua
parents:
diff changeset
138 catch (IOException e) {
jurzua
parents:
diff changeset
139 System.out.println("Could not read source file: " + e.getMessage());
jurzua
parents:
diff changeset
140 e.printStackTrace();
jurzua
parents:
diff changeset
141 }
jurzua
parents:
diff changeset
142 //System.out.println("XML file parsed");
jurzua
parents:
diff changeset
143 return doc;
jurzua
parents:
diff changeset
144 }
jurzua
parents:
diff changeset
145
jurzua
parents:
diff changeset
146 /**
jurzua
parents:
diff changeset
147 * This method looks in the class-path for a file with the name given in input.
jurzua
parents:
diff changeset
148 * @param fileName
jurzua
parents:
diff changeset
149 * @return
jurzua
parents:
diff changeset
150 */
jurzua
parents:
diff changeset
151 public static Document getDocumentFromPathContext(String fileName) {
jurzua
parents:
diff changeset
152 //System.out.println("Parsing XML file... " + fileName);
jurzua
parents:
diff changeset
153 DocumentBuilder docBuilder;
jurzua
parents:
diff changeset
154 Document doc = null;
jurzua
parents:
diff changeset
155 DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
jurzua
parents:
diff changeset
156 docBuilderFactory.setIgnoringElementContentWhitespace(true);
jurzua
parents:
diff changeset
157 try {
jurzua
parents:
diff changeset
158 docBuilder = docBuilderFactory.newDocumentBuilder();
jurzua
parents:
diff changeset
159 }
jurzua
parents:
diff changeset
160 catch (ParserConfigurationException e) {
jurzua
parents:
diff changeset
161 System.out.println("Wrong parser configuration: " + e.getMessage());
jurzua
parents:
diff changeset
162 e.printStackTrace();
jurzua
parents:
diff changeset
163 return null;
jurzua
parents:
diff changeset
164 }
jurzua
parents:
diff changeset
165
jurzua
parents:
diff changeset
166 ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
jurzua
parents:
diff changeset
167 InputStream stream = classLoader.getResourceAsStream( fileName );
jurzua
parents:
diff changeset
168 try {
jurzua
parents:
diff changeset
169 doc = docBuilder.parse(stream);
jurzua
parents:
diff changeset
170 }
jurzua
parents:
diff changeset
171 catch (SAXException e) {
jurzua
parents:
diff changeset
172 System.out.println("Wrong XML file structure: " + e.getMessage());
jurzua
parents:
diff changeset
173 e.printStackTrace();
jurzua
parents:
diff changeset
174 return null;
jurzua
parents:
diff changeset
175 }
jurzua
parents:
diff changeset
176 catch (IOException e) {
jurzua
parents:
diff changeset
177 System.out.println("Could not read source file: " + e.getMessage());
jurzua
parents:
diff changeset
178 e.printStackTrace();
jurzua
parents:
diff changeset
179 }
jurzua
parents:
diff changeset
180 //System.out.println("XML file parsed");
jurzua
parents:
diff changeset
181 return doc;
jurzua
parents:
diff changeset
182 }
jurzua
parents:
diff changeset
183
jurzua
parents:
diff changeset
184 /** Saves XML Document into XML file.
jurzua
parents:
diff changeset
185 * @param fileName XML file name
jurzua
parents:
diff changeset
186 * @param doc XML document to save
jurzua
parents:
diff changeset
187 * @return <B>true</B> if method success <B>false</B> otherwise
jurzua
parents:
diff changeset
188 */
jurzua
parents:
diff changeset
189 static public boolean saveXMLDocument(String fileName, Document doc) {
jurzua
parents:
diff changeset
190
jurzua
parents:
diff changeset
191 File xmlOutputFile = new File(fileName);
jurzua
parents:
diff changeset
192 FileOutputStream fos;
jurzua
parents:
diff changeset
193 Transformer transformer;
jurzua
parents:
diff changeset
194 try {
jurzua
parents:
diff changeset
195 fos = new FileOutputStream(xmlOutputFile);
jurzua
parents:
diff changeset
196 }
jurzua
parents:
diff changeset
197 catch (FileNotFoundException e) {
jurzua
parents:
diff changeset
198 System.out.println("Error occured: " + e.getMessage());
jurzua
parents:
diff changeset
199 return false;
jurzua
parents:
diff changeset
200 }
jurzua
parents:
diff changeset
201 // Use a Transformer for output
jurzua
parents:
diff changeset
202 TransformerFactory transformerFactory = TransformerFactory.newInstance();
jurzua
parents:
diff changeset
203 try {
jurzua
parents:
diff changeset
204 transformer = transformerFactory.newTransformer();
jurzua
parents:
diff changeset
205 }
jurzua
parents:
diff changeset
206 catch (TransformerConfigurationException e) {
jurzua
parents:
diff changeset
207 System.out.println("Transformer configuration error: " + e.getMessage());
86
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 79
diff changeset
208 try {
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 79
diff changeset
209 fos.close();
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 79
diff changeset
210 } catch (IOException e1) {
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 79
diff changeset
211 e1.printStackTrace();
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 79
diff changeset
212 }
1
jurzua
parents:
diff changeset
213 return false;
jurzua
parents:
diff changeset
214 }
jurzua
parents:
diff changeset
215 DOMSource source = new DOMSource(doc);
jurzua
parents:
diff changeset
216 StreamResult result = new StreamResult(fos);
jurzua
parents:
diff changeset
217 // transform source into result will do save
jurzua
parents:
diff changeset
218 try {
jurzua
parents:
diff changeset
219 transformer.transform(source, result);
jurzua
parents:
diff changeset
220 }
jurzua
parents:
diff changeset
221 catch (TransformerException e) {
jurzua
parents:
diff changeset
222 System.out.println("Error transform: " + e.getMessage());
jurzua
parents:
diff changeset
223 }
jurzua
parents:
diff changeset
224 System.out.println("XML file saved.");
jurzua
parents:
diff changeset
225 return true;
jurzua
parents:
diff changeset
226 }
jurzua
parents:
diff changeset
227
jurzua
parents:
diff changeset
228 public static String transformateDocumentToString(Document doc){
jurzua
parents:
diff changeset
229 try{
jurzua
parents:
diff changeset
230 DOMSource domSource = new DOMSource(doc);
jurzua
parents:
diff changeset
231 StringWriter writer = new StringWriter();
jurzua
parents:
diff changeset
232 StreamResult result = new StreamResult(writer);
jurzua
parents:
diff changeset
233
jurzua
parents:
diff changeset
234 TransformerFactory tf = TransformerFactory.newInstance();
jurzua
parents:
diff changeset
235 Transformer transformer = tf.newTransformer();
jurzua
parents:
diff changeset
236 transformer.transform(domSource, result);
jurzua
parents:
diff changeset
237 return writer.toString();
jurzua
parents:
diff changeset
238 }catch(Exception e){
jurzua
parents:
diff changeset
239 e.printStackTrace();
jurzua
parents:
diff changeset
240 }
jurzua
parents:
diff changeset
241 return null;
jurzua
parents:
diff changeset
242 }
jurzua
parents:
diff changeset
243
jurzua
parents:
diff changeset
244 public String ptransformateDocumentToString(Document doc){
jurzua
parents:
diff changeset
245 try{
jurzua
parents:
diff changeset
246 DOMSource domSource = new DOMSource(doc);
jurzua
parents:
diff changeset
247 StringWriter writer = new StringWriter();
jurzua
parents:
diff changeset
248 StreamResult result = new StreamResult(writer);
jurzua
parents:
diff changeset
249
jurzua
parents:
diff changeset
250 TransformerFactory tf = TransformerFactory.newInstance();
jurzua
parents:
diff changeset
251 Transformer transformer = tf.newTransformer();
jurzua
parents:
diff changeset
252 transformer.transform(domSource, result);
jurzua
parents:
diff changeset
253 return writer.toString();
jurzua
parents:
diff changeset
254 }catch(Exception e){
jurzua
parents:
diff changeset
255 e.printStackTrace();
jurzua
parents:
diff changeset
256 }
jurzua
parents:
diff changeset
257 return null;
jurzua
parents:
diff changeset
258 }
jurzua
parents:
diff changeset
259
jurzua
parents:
diff changeset
260 public static Document transformateStringToDocument(String text){
jurzua
parents:
diff changeset
261 try{
jurzua
parents:
diff changeset
262 DocumentBuilder db = dbf.newDocumentBuilder();
jurzua
parents:
diff changeset
263 InputStream stream = new ByteArrayInputStream(text.getBytes("UTF-8"));
jurzua
parents:
diff changeset
264 return db.parse(stream);
jurzua
parents:
diff changeset
265 }catch(Exception e){
jurzua
parents:
diff changeset
266 e.printStackTrace();
jurzua
parents:
diff changeset
267 }
jurzua
parents:
diff changeset
268 return null;
jurzua
parents:
diff changeset
269 }
jurzua
parents:
diff changeset
270
jurzua
parents:
diff changeset
271 /**
jurzua
parents:
diff changeset
272 * <p>This method transforms the file
jurzua
parents:
diff changeset
273 * input using the a stylesheet giving as input</p>
jurzua
parents:
diff changeset
274 *
jurzua
parents:
diff changeset
275 * @param xsl is the stylesheet file used to transformate the input document.
jurzua
parents:
diff changeset
276 * @param input is the document, which will be transformated.
jurzua
parents:
diff changeset
277 * @return the document transformed by the xsl stylesheet.
jurzua
parents:
diff changeset
278 */
jurzua
parents:
diff changeset
279 public static Document xmlToXmlTransformation(Document xsl, Document input){
jurzua
parents:
diff changeset
280 try{
jurzua
parents:
diff changeset
281 TransformerFactory factory = TransformerFactory.newInstance();
jurzua
parents:
diff changeset
282
jurzua
parents:
diff changeset
283 InputStream stream = new ByteArrayInputStream(XMLUtil.transformateDocumentToString(xsl).getBytes("UTF-8"));
jurzua
parents:
diff changeset
284 //Transformer transformer = factory.newTransformer(new DOMSource(xsl));
jurzua
parents:
diff changeset
285 //Transformer transformer = factory.newTransformer(new StreamSource("transformatorAxel1.xsl"));
jurzua
parents:
diff changeset
286 Transformer transformer = factory.newTransformer(new StreamSource(stream));
jurzua
parents:
diff changeset
287 DOMResult result = new DOMResult();
jurzua
parents:
diff changeset
288 //transformer.transform(new DOMSource(input), new StreamResult(new FileOutputStream("mi.xml")));
jurzua
parents:
diff changeset
289 transformer.transform(new DOMSource(input), result);
jurzua
parents:
diff changeset
290
jurzua
parents:
diff changeset
291 return (Document)result.getNode();
jurzua
parents:
diff changeset
292 }catch(Exception e){
jurzua
parents:
diff changeset
293 e.printStackTrace();
jurzua
parents:
diff changeset
294 }
jurzua
parents:
diff changeset
295 return null;
jurzua
parents:
diff changeset
296 }
jurzua
parents:
diff changeset
297 }