annotate src/de/mpiwg/anteater/xml/impl/AnalysisXMLManager.java @ 2:1c2b4f5e2c05

linnaeus for finding species
author jdamerow
date Mon, 22 Oct 2012 13:46:54 -0700
parents 036535fcd179
children ae96e4bc7fb2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
1 package de.mpiwg.anteater.xml.impl;
036535fcd179 anteater
jdamerow
parents:
diff changeset
2
036535fcd179 anteater
jdamerow
parents:
diff changeset
3 import java.io.File;
036535fcd179 anteater
jdamerow
parents:
diff changeset
4 import java.io.IOException;
036535fcd179 anteater
jdamerow
parents:
diff changeset
5 import java.io.InputStream;
036535fcd179 anteater
jdamerow
parents:
diff changeset
6 import java.io.StringWriter;
036535fcd179 anteater
jdamerow
parents:
diff changeset
7 import java.util.ArrayList;
036535fcd179 anteater
jdamerow
parents:
diff changeset
8 import java.util.List;
036535fcd179 anteater
jdamerow
parents:
diff changeset
9
036535fcd179 anteater
jdamerow
parents:
diff changeset
10 import org.jdom2.Element;
036535fcd179 anteater
jdamerow
parents:
diff changeset
11 import org.jdom2.output.XMLOutputter;
036535fcd179 anteater
jdamerow
parents:
diff changeset
12
036535fcd179 anteater
jdamerow
parents:
diff changeset
13 import de.mpiwg.anteater.xml.IXMLParser;
036535fcd179 anteater
jdamerow
parents:
diff changeset
14
036535fcd179 anteater
jdamerow
parents:
diff changeset
15
036535fcd179 anteater
jdamerow
parents:
diff changeset
16 public class AnalysisXMLManager extends JDOMParser {
036535fcd179 anteater
jdamerow
parents:
diff changeset
17
036535fcd179 anteater
jdamerow
parents:
diff changeset
18 private String analysisFilepath;
036535fcd179 anteater
jdamerow
parents:
diff changeset
19
036535fcd179 anteater
jdamerow
parents:
diff changeset
20 public AnalysisXMLManager(String analysisFilepath) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
21 super();
036535fcd179 anteater
jdamerow
parents:
diff changeset
22 this.analysisFilepath = analysisFilepath;
036535fcd179 anteater
jdamerow
parents:
diff changeset
23
036535fcd179 anteater
jdamerow
parents:
diff changeset
24 File analysisFile = new File(analysisFilepath);
036535fcd179 anteater
jdamerow
parents:
diff changeset
25 if (analysisFile.exists())
036535fcd179 anteater
jdamerow
parents:
diff changeset
26 init(analysisFilepath);
036535fcd179 anteater
jdamerow
parents:
diff changeset
27 else {
036535fcd179 anteater
jdamerow
parents:
diff changeset
28 InputStream stream = getClass().getResourceAsStream("templates/analysisFile.xml");
036535fcd179 anteater
jdamerow
parents:
diff changeset
29 init(stream);
036535fcd179 anteater
jdamerow
parents:
diff changeset
30 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
31 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
32
036535fcd179 anteater
jdamerow
parents:
diff changeset
33 public List<String> getSummaryTexts() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
34 return getResults("/analysis/texts/summaries/summary");
036535fcd179 anteater
jdamerow
parents:
diff changeset
35 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
36
036535fcd179 anteater
jdamerow
parents:
diff changeset
37 public List<String> getSupplementaryInfoTexts() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
38 return getResults("/analysis/texts/supplementary_information/supplInfo");
036535fcd179 anteater
jdamerow
parents:
diff changeset
39 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
40
036535fcd179 anteater
jdamerow
parents:
diff changeset
41 public List<String> getSummaryNamesResults() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
42 return getResults("/analysis/scientificNames/summaries/result");
036535fcd179 anteater
jdamerow
parents:
diff changeset
43 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
44
036535fcd179 anteater
jdamerow
parents:
diff changeset
45 public List<String> getSupplementaryInfoNamesResults() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
46 return getResults("/analysis/scientificNames/supplementary_information/result");
036535fcd179 anteater
jdamerow
parents:
diff changeset
47 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
48
2
1c2b4f5e2c05 linnaeus for finding species
jdamerow
parents: 0
diff changeset
49 public List<String> getSummaryCommonNamesResults() {
1c2b4f5e2c05 linnaeus for finding species
jdamerow
parents: 0
diff changeset
50 return getResults("/analysis/commonNames/summaries/result");
1c2b4f5e2c05 linnaeus for finding species
jdamerow
parents: 0
diff changeset
51 }
1c2b4f5e2c05 linnaeus for finding species
jdamerow
parents: 0
diff changeset
52
1c2b4f5e2c05 linnaeus for finding species
jdamerow
parents: 0
diff changeset
53 public List<String> getSupplementaryInfoCommonNamesResults() {
1c2b4f5e2c05 linnaeus for finding species
jdamerow
parents: 0
diff changeset
54 return getResults("/analysis/commonNames/supplementary_information/result");
1c2b4f5e2c05 linnaeus for finding species
jdamerow
parents: 0
diff changeset
55 }
1c2b4f5e2c05 linnaeus for finding species
jdamerow
parents: 0
diff changeset
56
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
57 public List<String> getSummaryPlacesResults() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
58 return getResultsViaParent("/analysis/places/summaries");
036535fcd179 anteater
jdamerow
parents:
diff changeset
59 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
60
036535fcd179 anteater
jdamerow
parents:
diff changeset
61 public List<String> getSupplementaryInfoPlacesResults() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
62 return getResultsViaParent("/analysis/places/supplementary_information");
036535fcd179 anteater
jdamerow
parents:
diff changeset
63 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
64
036535fcd179 anteater
jdamerow
parents:
diff changeset
65 public List<String> getSummaryPersonsResult() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
66 return getResults("/analysis/persons/summaries/stanford_ner");
036535fcd179 anteater
jdamerow
parents:
diff changeset
67 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
68
036535fcd179 anteater
jdamerow
parents:
diff changeset
69 public List<String> getSupplementaryInfoPersonsResult() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
70 return getResults("/analysis/persons/supplementary_information/stanford_ner");
036535fcd179 anteater
jdamerow
parents:
diff changeset
71 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
72
036535fcd179 anteater
jdamerow
parents:
diff changeset
73 public List<String> getResults(String xpath) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
74 List<Element> results = executeXPath(xpath, null);
036535fcd179 anteater
jdamerow
parents:
diff changeset
75 if (results != null && !results.isEmpty())
036535fcd179 anteater
jdamerow
parents:
diff changeset
76 return getResultsString(results);
036535fcd179 anteater
jdamerow
parents:
diff changeset
77 return new ArrayList<String>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
78 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
79
036535fcd179 anteater
jdamerow
parents:
diff changeset
80 public List<String> getResultsContents(String xpath) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
81 List<Element> results = executeXPath(xpath, null);
036535fcd179 anteater
jdamerow
parents:
diff changeset
82 if (results != null && !results.isEmpty()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
83 List<String> resultStrings = new ArrayList<String>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
84 for (Element elem : results) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
85 resultStrings.add(elem.getText());
036535fcd179 anteater
jdamerow
parents:
diff changeset
86 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
87 return resultStrings;
036535fcd179 anteater
jdamerow
parents:
diff changeset
88 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
89 return new ArrayList<String>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
90 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
91
036535fcd179 anteater
jdamerow
parents:
diff changeset
92 public List<String> getResultsViaParent(String xpath) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
93 List<Element> results = executeXPath(xpath, null);
036535fcd179 anteater
jdamerow
parents:
diff changeset
94 if (results != null && !results.isEmpty()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
95 Element parent = results.get(0);
036535fcd179 anteater
jdamerow
parents:
diff changeset
96 List<Element> children = parent.getChildren();
036535fcd179 anteater
jdamerow
parents:
diff changeset
97 return getResultsString(children);
036535fcd179 anteater
jdamerow
parents:
diff changeset
98 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
99 return new ArrayList<String>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
100 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
101
036535fcd179 anteater
jdamerow
parents:
diff changeset
102 protected List<String> getResultsString(List<Element> roots) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
103 List<String> results = new ArrayList<String>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
104
036535fcd179 anteater
jdamerow
parents:
diff changeset
105 for (Element rootNode : roots) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
106 XMLOutputter output = new XMLOutputter();
036535fcd179 anteater
jdamerow
parents:
diff changeset
107 StringWriter writer = new StringWriter();
036535fcd179 anteater
jdamerow
parents:
diff changeset
108 try {
036535fcd179 anteater
jdamerow
parents:
diff changeset
109 output.output(rootNode, writer);
036535fcd179 anteater
jdamerow
parents:
diff changeset
110 } catch (IOException e) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
111 // TODO Auto-generated catch block
036535fcd179 anteater
jdamerow
parents:
diff changeset
112 e.printStackTrace();
036535fcd179 anteater
jdamerow
parents:
diff changeset
113 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
114 results.add(writer.toString());
036535fcd179 anteater
jdamerow
parents:
diff changeset
115 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
116
036535fcd179 anteater
jdamerow
parents:
diff changeset
117 return results;
036535fcd179 anteater
jdamerow
parents:
diff changeset
118 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
119
036535fcd179 anteater
jdamerow
parents:
diff changeset
120 public void addSummaryNamesResult(String result) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
121 addElementToDoc(result, "/analysis/scientificNames/summaries");
036535fcd179 anteater
jdamerow
parents:
diff changeset
122 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
123
036535fcd179 anteater
jdamerow
parents:
diff changeset
124 public void addSupplInfNamesResult(String result) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
125 addElementToDoc(result, "/analysis/scientificNames/supplementary_information");
036535fcd179 anteater
jdamerow
parents:
diff changeset
126 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
127
036535fcd179 anteater
jdamerow
parents:
diff changeset
128 private void addElementToDoc(String result, String xpath) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
129 IXMLParser parser = new JDOMParser(result, false);
036535fcd179 anteater
jdamerow
parents:
diff changeset
130 Element root = parser.getRoot();
036535fcd179 anteater
jdamerow
parents:
diff changeset
131
036535fcd179 anteater
jdamerow
parents:
diff changeset
132 List<Element> nodes = executeXPath(xpath, null);
036535fcd179 anteater
jdamerow
parents:
diff changeset
133
036535fcd179 anteater
jdamerow
parents:
diff changeset
134 if (nodes != null && nodes.size() > 0) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
135 nodes.get(0).addContent(root.clone());
036535fcd179 anteater
jdamerow
parents:
diff changeset
136 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
137
036535fcd179 anteater
jdamerow
parents:
diff changeset
138 save(analysisFilepath, true);
036535fcd179 anteater
jdamerow
parents:
diff changeset
139 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
140
036535fcd179 anteater
jdamerow
parents:
diff changeset
141 public void addSummaryPlacesResult(String result) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
142 addElementToDoc(result, "/analysis/places/summaries");
036535fcd179 anteater
jdamerow
parents:
diff changeset
143 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
144
036535fcd179 anteater
jdamerow
parents:
diff changeset
145 public void addSupplInfPlacesResult(String result) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
146 addElementToDoc(result, "/analysis/places/supplementary_information");
036535fcd179 anteater
jdamerow
parents:
diff changeset
147 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
148
036535fcd179 anteater
jdamerow
parents:
diff changeset
149 public void addSummaryPersonsResult(String result) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
150 addElementToDoc(result, "/analysis/persons/summaries");
036535fcd179 anteater
jdamerow
parents:
diff changeset
151 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
152
036535fcd179 anteater
jdamerow
parents:
diff changeset
153 public void addSupplInfPersonsResult(String result) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
154 addElementToDoc(result, "/analysis/persons/supplementary_information");
036535fcd179 anteater
jdamerow
parents:
diff changeset
155 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
156
036535fcd179 anteater
jdamerow
parents:
diff changeset
157 public void addSummaryText(String text) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
158 addElementToDoc(text, "/analysis/texts/summaries");
036535fcd179 anteater
jdamerow
parents:
diff changeset
159 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
160
036535fcd179 anteater
jdamerow
parents:
diff changeset
161 public void addSupplInfText(String text) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
162 addElementToDoc(text, "/analysis/texts/supplementary_information");
036535fcd179 anteater
jdamerow
parents:
diff changeset
163 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
164 }