0
|
1 package de.mpiwg.anteater.results;
|
|
2
|
|
3 import java.io.File;
|
|
4 import java.util.ArrayList;
|
|
5 import java.util.Collections;
|
|
6 import java.util.Comparator;
|
|
7 import java.util.List;
|
|
8 import java.util.Map;
|
|
9 import java.util.Stack;
|
|
10
|
|
11 import org.apache.commons.lang3.StringEscapeUtils;
|
|
12
|
|
13 import de.mpiwg.anteater.AnteaterConfiguration;
|
|
14 import de.mpiwg.anteater.results.filter.FilterController;
|
|
15 import de.mpiwg.anteater.results.impl.ApplicantResultFinder;
|
|
16 import de.mpiwg.anteater.results.impl.ApplicantResultManager;
|
|
17 import de.mpiwg.anteater.results.impl.LocationResultFinder;
|
|
18 import de.mpiwg.anteater.results.impl.LocationResultManager;
|
|
19 import de.mpiwg.anteater.results.impl.ScientificNameResultFinder;
|
|
20 import de.mpiwg.anteater.results.impl.ScientificNameResultManager;
|
|
21 import de.mpiwg.anteater.text.Paragraph;
|
|
22 import de.mpiwg.anteater.text.TextInformation;
|
|
23 import de.mpiwg.anteater.text.TextPart;
|
|
24 import de.mpiwg.anteater.xml.IResultFileManager;
|
|
25 import de.mpiwg.anteater.xml.impl.ResultXMLManager;
|
|
26
|
|
27 public class ResultController {
|
|
28
|
|
29 public final static String SUMMARY_TAG = "<summary>";
|
|
30 public final static String SUMMARY_TAG_CLOSE = "</summary>";
|
|
31 public final static String SUPPLINFO_TAG = "<supplInfo>";
|
|
32 public final static String SUPPLINFO_TAG_CLOSE = "</supplInfo>";
|
|
33
|
|
34 public final static String COMPONENT_NAME = ResultController.class.getSimpleName();
|
|
35
|
|
36 private AnteaterConfiguration configuration;
|
|
37
|
|
38 public ResultController(AnteaterConfiguration configuration) {
|
|
39 this.configuration = configuration;
|
|
40 }
|
|
41
|
|
42 public List<ResultsCarrier> saveResults(List<TextInformation> infos) {
|
|
43
|
|
44 String resultFolder = configuration.getResultPath();
|
|
45 List<ResultsCarrier> resultCarriers = new ArrayList<ResultsCarrier>();
|
|
46
|
|
47 ApplicantResultManager applicantManager = new ApplicantResultManager(configuration);
|
|
48 ScientificNameResultManager speciesManager = new ScientificNameResultManager(configuration);
|
|
49 LocationResultManager locationManager = new LocationResultManager(configuration);
|
|
50
|
|
51 // find applicants
|
|
52 IResultFinder<ApplicantResult> applicantsFinder = new ApplicantResultFinder(configuration);
|
|
53 List<ApplicantResult> applicants = applicantsFinder.getResults(infos);
|
|
54 Map<TextInformation, List<ApplicantResult>> sortedApplicants = applicantManager.sortResultsByText(applicants);
|
|
55
|
|
56 // find species
|
|
57 IResultFinder<SpeciesScientificResult> namesFinder = new ScientificNameResultFinder();
|
|
58 List<SpeciesScientificResult> names = namesFinder.getResults(infos);
|
|
59 Map<TextInformation, List<SpeciesScientificResult>> sortedNames = speciesManager.sortResultsByText(names);
|
|
60
|
|
61 // find locations
|
|
62 IResultFinder<LocationResult> locationFinder = new LocationResultFinder(configuration, names, applicants);
|
|
63 List<LocationResult> locations = locationFinder.getResults(infos);
|
|
64 Map<TextInformation, List<LocationResult>> sortedPlaces = locationManager.sortResultsByText(locations);
|
|
65
|
|
66 FilterController filterController = new FilterController();
|
|
67
|
|
68 for (TextInformation info : infos) {
|
|
69 File textfile = new File(info.getFilepath());
|
|
70 String resultFile = resultFolder + File.separator + textfile.getName();
|
|
71 IResultFileManager resultManager = new ResultXMLManager(resultFile);
|
|
72
|
|
73 // annotate summaries
|
|
74 List<TextPart> summaries = info.getSummaries();
|
|
75 List<ApplicantResult> applicantResults = applicantManager.getPredictedResults(sortedApplicants.get(info));
|
|
76 List<SpeciesScientificResult> namesResults = speciesManager.getPredictedResults(sortedNames.get(info));
|
|
77 List<LocationResult> locResults = locationManager.getPredictedResults(sortedPlaces.get(info));
|
|
78
|
|
79 filterController.runFilters(info, applicantResults, namesResults, locResults);
|
|
80
|
|
81 // create carrier
|
|
82 ResultsCarrier carrier = new ResultsCarrier(info, applicantResults, namesResults, locResults);
|
|
83 resultCarriers.add(carrier);
|
|
84
|
|
85 for (TextPart sum : summaries) {
|
|
86
|
|
87 int textIndex = sum.getTextIdx();
|
|
88 List<AnnotationTag> tags = applicantManager.getSummaryTags(applicantResults, textIndex);
|
|
89 tags.addAll(speciesManager.getSummaryTags(namesResults, textIndex));
|
|
90 tags.addAll(locationManager.getSummaryTags(locResults, textIndex));
|
|
91 tags.addAll(getParagraphTags(sum));
|
|
92
|
|
93 StringBuffer annotatedSummary = new StringBuffer(sum.getText());
|
|
94 annotatedSummary = tagText(annotatedSummary, tags);
|
|
95
|
|
96 resultManager.addSummary(SUMMARY_TAG + annotatedSummary.toString() + SUMMARY_TAG_CLOSE);
|
|
97 }
|
|
98
|
|
99 // annotate supplementary information
|
|
100 List<TextPart> suppleInfs = info.getSupplInfos();
|
|
101
|
|
102 for (TextPart suppleInf : suppleInfs) {
|
|
103 int textIndex = suppleInf.getTextIdx();
|
|
104 List<AnnotationTag> tags = applicantManager.getSuppleInfTags(applicantResults, textIndex);
|
|
105 tags.addAll(speciesManager.getSuppleInfTags(namesResults, textIndex));
|
|
106 tags.addAll(locationManager.getSuppleInfTags(locResults, textIndex));
|
|
107 tags.addAll(getParagraphTags(suppleInf));
|
|
108
|
|
109 StringBuffer annotatedSuppleInf = new StringBuffer(suppleInf.getText());
|
|
110 annotatedSuppleInf = tagText(annotatedSuppleInf, tags);
|
|
111
|
|
112 resultManager.addSupplInf(SUPPLINFO_TAG + annotatedSuppleInf.toString() + SUPPLINFO_TAG_CLOSE);
|
|
113 }
|
|
114 }
|
|
115
|
|
116 return resultCarriers;
|
|
117 }
|
|
118
|
|
119
|
|
120 protected void checkTags(List<AnnotationTag> tags) {
|
|
121 AnnotationTag lastTag = null;
|
|
122
|
|
123 List<AnnotationTag> toBeRemoved = new ArrayList<AnnotationTag>();
|
|
124 for (AnnotationTag tag : tags) {
|
|
125 if (lastTag == null)
|
|
126 {
|
|
127 lastTag = tag;
|
|
128 continue;
|
|
129 }
|
|
130 if ((tag.getClosingPosition() < lastTag.getPosition()) || (tag.getPosition() >= lastTag.getPosition() && tag.getClosingPosition() <= lastTag.getClosingPosition())) {
|
|
131 lastTag = tag;
|
|
132 continue;
|
|
133 }
|
|
134 if (tag.getTag().startsWith("<p"))
|
|
135 toBeRemoved.add(lastTag);
|
|
136 else {
|
|
137 lastTag.setTag("<error type=\"" + lastTag.getType() + "\">");
|
|
138 lastTag.setClosingTag("</error>");
|
|
139 lastTag.setPosition(tag.getClosingPosition());
|
|
140 }
|
|
141 }
|
|
142
|
|
143 tags.removeAll(toBeRemoved);
|
|
144 }
|
|
145
|
|
146 protected StringBuffer tagText(StringBuffer text, List<AnnotationTag> tags) {
|
|
147 Collections.sort(tags, new Comparator<AnnotationTag>() {
|
|
148
|
|
149 @Override
|
|
150 public int compare(AnnotationTag o1, AnnotationTag o2) {
|
|
151 if (o2.getClosingPosition() - o1.getClosingPosition() != 0)
|
|
152 return o2.getClosingPosition() - o1.getClosingPosition();
|
|
153 return o1.getPosition() - o2.getPosition();
|
|
154 }
|
|
155 });
|
|
156
|
|
157 checkTags(tags);
|
|
158
|
|
159 Stack<AnnotationTag> tagStack = new Stack<AnnotationTag>();
|
|
160 StringBuffer finalText = new StringBuffer();
|
|
161
|
|
162 int end = text.length();
|
|
163 for (AnnotationTag tag : tags) {
|
|
164 if (text.length() >= tag.getPosition()) {
|
|
165 while (!tagStack.isEmpty() && tagStack.peek().getPosition() >= tag.getClosingPosition()) {
|
|
166 AnnotationTag topTag = tagStack.pop();
|
|
167 if (topTag.getPosition() < end) {
|
|
168 finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(topTag.getPosition(), end)));
|
|
169 }
|
|
170 finalText.insert(0, topTag.getTag());
|
|
171 end = topTag.getPosition();
|
|
172 // text.insert(topTag.getPosition(), topTag.getTag());
|
|
173 }
|
|
174 if (tag.getClosingPosition() < end) {
|
|
175 finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(tag.getClosingPosition(), end)));
|
|
176 }
|
|
177 finalText.insert(0, tag.getClosingTag());
|
|
178 end = tag.getClosingPosition();
|
|
179 //text.insert(tag.getClosingPosition(), tag.getClosingTag());
|
|
180 tagStack.push(tag);
|
|
181 }
|
|
182 else
|
|
183 configuration.getLogger().logMessage(COMPONENT_NAME,"Couldn't insert into summary: " + tag.getTag() + " at " + tag.getPosition());
|
|
184 }
|
|
185 while (!tagStack.isEmpty()) {
|
|
186 AnnotationTag topTag = tagStack.pop();
|
|
187 //text.insert(topTag.getPosition(), topTag.getTag());
|
|
188 if (topTag.getPosition() < end) {
|
|
189 finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(topTag.getPosition(), end)));
|
|
190 }
|
|
191 finalText.insert(0, topTag.getTag());
|
|
192 end = topTag.getPosition();
|
|
193 }
|
|
194 finalText.insert(0, text.substring(0, end));
|
|
195 return finalText;
|
|
196 }
|
|
197
|
|
198
|
|
199 protected List<AnnotationTag> getParagraphTags(TextPart part) {
|
|
200 int counter = 0;
|
|
201 List<AnnotationTag> tags = new ArrayList<AnnotationTag>();
|
|
202
|
|
203 for (Paragraph para : part.getParagraphsOfInterest()) {
|
|
204 AnnotationTag opentag = new AnnotationTag("<p type=\"" + para.getParagraphType() + "\">", counter, "</p>", counter + para.getParagraphText().length(), "paragraph");
|
|
205 tags.add(opentag);
|
|
206 // add 1 for "\n"
|
|
207 counter += para.getParagraphText().length() + 1;
|
|
208 }
|
|
209
|
|
210 return tags;
|
|
211 }
|
|
212
|
|
213 }
|