annotate src/de/mpiwg/anteater/xml/impl/AnalysisXMLManager.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children 1c2b4f5e2c05
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
1 package de.mpiwg.anteater.xml.impl;
036535fcd179 anteater
jdamerow
parents:
diff changeset
2
036535fcd179 anteater
jdamerow
parents:
diff changeset
3 import java.io.File;
036535fcd179 anteater
jdamerow
parents:
diff changeset
4 import java.io.IOException;
036535fcd179 anteater
jdamerow
parents:
diff changeset
5 import java.io.InputStream;
036535fcd179 anteater
jdamerow
parents:
diff changeset
6 import java.io.StringWriter;
036535fcd179 anteater
jdamerow
parents:
diff changeset
7 import java.util.ArrayList;
036535fcd179 anteater
jdamerow
parents:
diff changeset
8 import java.util.List;
036535fcd179 anteater
jdamerow
parents:
diff changeset
9
036535fcd179 anteater
jdamerow
parents:
diff changeset
10 import org.jdom2.Element;
036535fcd179 anteater
jdamerow
parents:
diff changeset
11 import org.jdom2.output.XMLOutputter;
036535fcd179 anteater
jdamerow
parents:
diff changeset
12
036535fcd179 anteater
jdamerow
parents:
diff changeset
13 import de.mpiwg.anteater.xml.IXMLParser;
036535fcd179 anteater
jdamerow
parents:
diff changeset
14
036535fcd179 anteater
jdamerow
parents:
diff changeset
15
036535fcd179 anteater
jdamerow
parents:
diff changeset
16 public class AnalysisXMLManager extends JDOMParser {
036535fcd179 anteater
jdamerow
parents:
diff changeset
17
036535fcd179 anteater
jdamerow
parents:
diff changeset
18 private String analysisFilepath;
036535fcd179 anteater
jdamerow
parents:
diff changeset
19
036535fcd179 anteater
jdamerow
parents:
diff changeset
20 public AnalysisXMLManager(String analysisFilepath) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
21 super();
036535fcd179 anteater
jdamerow
parents:
diff changeset
22 this.analysisFilepath = analysisFilepath;
036535fcd179 anteater
jdamerow
parents:
diff changeset
23
036535fcd179 anteater
jdamerow
parents:
diff changeset
24 File analysisFile = new File(analysisFilepath);
036535fcd179 anteater
jdamerow
parents:
diff changeset
25 if (analysisFile.exists())
036535fcd179 anteater
jdamerow
parents:
diff changeset
26 init(analysisFilepath);
036535fcd179 anteater
jdamerow
parents:
diff changeset
27 else {
036535fcd179 anteater
jdamerow
parents:
diff changeset
28 InputStream stream = getClass().getResourceAsStream("templates/analysisFile.xml");
036535fcd179 anteater
jdamerow
parents:
diff changeset
29 init(stream);
036535fcd179 anteater
jdamerow
parents:
diff changeset
30 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
31 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
32
036535fcd179 anteater
jdamerow
parents:
diff changeset
33 public List<String> getSummaryTexts() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
34 return getResults("/analysis/texts/summaries/summary");
036535fcd179 anteater
jdamerow
parents:
diff changeset
35 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
36
036535fcd179 anteater
jdamerow
parents:
diff changeset
37 public List<String> getSupplementaryInfoTexts() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
38 return getResults("/analysis/texts/supplementary_information/supplInfo");
036535fcd179 anteater
jdamerow
parents:
diff changeset
39 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
40
036535fcd179 anteater
jdamerow
parents:
diff changeset
41 public List<String> getSummaryNamesResults() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
42 return getResults("/analysis/scientificNames/summaries/result");
036535fcd179 anteater
jdamerow
parents:
diff changeset
43 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
44
036535fcd179 anteater
jdamerow
parents:
diff changeset
45 public List<String> getSupplementaryInfoNamesResults() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
46 return getResults("/analysis/scientificNames/supplementary_information/result");
036535fcd179 anteater
jdamerow
parents:
diff changeset
47 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
48
036535fcd179 anteater
jdamerow
parents:
diff changeset
49 public List<String> getSummaryPlacesResults() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
50 return getResultsViaParent("/analysis/places/summaries");
036535fcd179 anteater
jdamerow
parents:
diff changeset
51 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
52
036535fcd179 anteater
jdamerow
parents:
diff changeset
53 public List<String> getSupplementaryInfoPlacesResults() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
54 return getResultsViaParent("/analysis/places/supplementary_information");
036535fcd179 anteater
jdamerow
parents:
diff changeset
55 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
56
036535fcd179 anteater
jdamerow
parents:
diff changeset
57 public List<String> getSummaryPersonsResult() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
58 return getResults("/analysis/persons/summaries/stanford_ner");
036535fcd179 anteater
jdamerow
parents:
diff changeset
59 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
60
036535fcd179 anteater
jdamerow
parents:
diff changeset
61 public List<String> getSupplementaryInfoPersonsResult() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
62 return getResults("/analysis/persons/supplementary_information/stanford_ner");
036535fcd179 anteater
jdamerow
parents:
diff changeset
63 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
64
036535fcd179 anteater
jdamerow
parents:
diff changeset
65 public List<String> getResults(String xpath) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
66 List<Element> results = executeXPath(xpath, null);
036535fcd179 anteater
jdamerow
parents:
diff changeset
67 if (results != null && !results.isEmpty())
036535fcd179 anteater
jdamerow
parents:
diff changeset
68 return getResultsString(results);
036535fcd179 anteater
jdamerow
parents:
diff changeset
69 return new ArrayList<String>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
70 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
71
036535fcd179 anteater
jdamerow
parents:
diff changeset
72 public List<String> getResultsContents(String xpath) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
73 List<Element> results = executeXPath(xpath, null);
036535fcd179 anteater
jdamerow
parents:
diff changeset
74 if (results != null && !results.isEmpty()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
75 List<String> resultStrings = new ArrayList<String>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
76 for (Element elem : results) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
77 resultStrings.add(elem.getText());
036535fcd179 anteater
jdamerow
parents:
diff changeset
78 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
79 return resultStrings;
036535fcd179 anteater
jdamerow
parents:
diff changeset
80 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
81 return new ArrayList<String>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
82 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
83
036535fcd179 anteater
jdamerow
parents:
diff changeset
84 public List<String> getResultsViaParent(String xpath) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
85 List<Element> results = executeXPath(xpath, null);
036535fcd179 anteater
jdamerow
parents:
diff changeset
86 if (results != null && !results.isEmpty()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
87 Element parent = results.get(0);
036535fcd179 anteater
jdamerow
parents:
diff changeset
88 List<Element> children = parent.getChildren();
036535fcd179 anteater
jdamerow
parents:
diff changeset
89 return getResultsString(children);
036535fcd179 anteater
jdamerow
parents:
diff changeset
90 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
91 return new ArrayList<String>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
92 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
93
036535fcd179 anteater
jdamerow
parents:
diff changeset
94 protected List<String> getResultsString(List<Element> roots) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
95 List<String> results = new ArrayList<String>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
96
036535fcd179 anteater
jdamerow
parents:
diff changeset
97 for (Element rootNode : roots) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
98 XMLOutputter output = new XMLOutputter();
036535fcd179 anteater
jdamerow
parents:
diff changeset
99 StringWriter writer = new StringWriter();
036535fcd179 anteater
jdamerow
parents:
diff changeset
100 try {
036535fcd179 anteater
jdamerow
parents:
diff changeset
101 output.output(rootNode, writer);
036535fcd179 anteater
jdamerow
parents:
diff changeset
102 } catch (IOException e) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
103 // TODO Auto-generated catch block
036535fcd179 anteater
jdamerow
parents:
diff changeset
104 e.printStackTrace();
036535fcd179 anteater
jdamerow
parents:
diff changeset
105 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
106 results.add(writer.toString());
036535fcd179 anteater
jdamerow
parents:
diff changeset
107 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
108
036535fcd179 anteater
jdamerow
parents:
diff changeset
109 return results;
036535fcd179 anteater
jdamerow
parents:
diff changeset
110 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
111
036535fcd179 anteater
jdamerow
parents:
diff changeset
112 public void addSummaryNamesResult(String result) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
113 addElementToDoc(result, "/analysis/scientificNames/summaries");
036535fcd179 anteater
jdamerow
parents:
diff changeset
114 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
115
036535fcd179 anteater
jdamerow
parents:
diff changeset
116 public void addSupplInfNamesResult(String result) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
117 addElementToDoc(result, "/analysis/scientificNames/supplementary_information");
036535fcd179 anteater
jdamerow
parents:
diff changeset
118 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
119
036535fcd179 anteater
jdamerow
parents:
diff changeset
120 private void addElementToDoc(String result, String xpath) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
121 IXMLParser parser = new JDOMParser(result, false);
036535fcd179 anteater
jdamerow
parents:
diff changeset
122 Element root = parser.getRoot();
036535fcd179 anteater
jdamerow
parents:
diff changeset
123
036535fcd179 anteater
jdamerow
parents:
diff changeset
124 List<Element> nodes = executeXPath(xpath, null);
036535fcd179 anteater
jdamerow
parents:
diff changeset
125
036535fcd179 anteater
jdamerow
parents:
diff changeset
126 if (nodes != null && nodes.size() > 0) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
127 nodes.get(0).addContent(root.clone());
036535fcd179 anteater
jdamerow
parents:
diff changeset
128 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
129
036535fcd179 anteater
jdamerow
parents:
diff changeset
130 save(analysisFilepath, true);
036535fcd179 anteater
jdamerow
parents:
diff changeset
131 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
132
036535fcd179 anteater
jdamerow
parents:
diff changeset
133 public void addSummaryPlacesResult(String result) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
134 addElementToDoc(result, "/analysis/places/summaries");
036535fcd179 anteater
jdamerow
parents:
diff changeset
135 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
136
036535fcd179 anteater
jdamerow
parents:
diff changeset
137 public void addSupplInfPlacesResult(String result) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
138 addElementToDoc(result, "/analysis/places/supplementary_information");
036535fcd179 anteater
jdamerow
parents:
diff changeset
139 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
140
036535fcd179 anteater
jdamerow
parents:
diff changeset
141 public void addSummaryPersonsResult(String result) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
142 addElementToDoc(result, "/analysis/persons/summaries");
036535fcd179 anteater
jdamerow
parents:
diff changeset
143 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
144
036535fcd179 anteater
jdamerow
parents:
diff changeset
145 public void addSupplInfPersonsResult(String result) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
146 addElementToDoc(result, "/analysis/persons/supplementary_information");
036535fcd179 anteater
jdamerow
parents:
diff changeset
147 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
148
036535fcd179 anteater
jdamerow
parents:
diff changeset
149 public void addSummaryText(String text) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
150 addElementToDoc(text, "/analysis/texts/summaries");
036535fcd179 anteater
jdamerow
parents:
diff changeset
151 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
152
036535fcd179 anteater
jdamerow
parents:
diff changeset
153 public void addSupplInfText(String text) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
154 addElementToDoc(text, "/analysis/texts/supplementary_information");
036535fcd179 anteater
jdamerow
parents:
diff changeset
155 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
156 }