comparison src/de/mpiwg/anteater/results/ResultController.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:036535fcd179
1 package de.mpiwg.anteater.results;
2
3 import java.io.File;
4 import java.util.ArrayList;
5 import java.util.Collections;
6 import java.util.Comparator;
7 import java.util.List;
8 import java.util.Map;
9 import java.util.Stack;
10
11 import org.apache.commons.lang3.StringEscapeUtils;
12
13 import de.mpiwg.anteater.AnteaterConfiguration;
14 import de.mpiwg.anteater.results.filter.FilterController;
15 import de.mpiwg.anteater.results.impl.ApplicantResultFinder;
16 import de.mpiwg.anteater.results.impl.ApplicantResultManager;
17 import de.mpiwg.anteater.results.impl.LocationResultFinder;
18 import de.mpiwg.anteater.results.impl.LocationResultManager;
19 import de.mpiwg.anteater.results.impl.ScientificNameResultFinder;
20 import de.mpiwg.anteater.results.impl.ScientificNameResultManager;
21 import de.mpiwg.anteater.text.Paragraph;
22 import de.mpiwg.anteater.text.TextInformation;
23 import de.mpiwg.anteater.text.TextPart;
24 import de.mpiwg.anteater.xml.IResultFileManager;
25 import de.mpiwg.anteater.xml.impl.ResultXMLManager;
26
27 public class ResultController {
28
29 public final static String SUMMARY_TAG = "<summary>";
30 public final static String SUMMARY_TAG_CLOSE = "</summary>";
31 public final static String SUPPLINFO_TAG = "<supplInfo>";
32 public final static String SUPPLINFO_TAG_CLOSE = "</supplInfo>";
33
34 public final static String COMPONENT_NAME = ResultController.class.getSimpleName();
35
36 private AnteaterConfiguration configuration;
37
38 public ResultController(AnteaterConfiguration configuration) {
39 this.configuration = configuration;
40 }
41
42 public List<ResultsCarrier> saveResults(List<TextInformation> infos) {
43
44 String resultFolder = configuration.getResultPath();
45 List<ResultsCarrier> resultCarriers = new ArrayList<ResultsCarrier>();
46
47 ApplicantResultManager applicantManager = new ApplicantResultManager(configuration);
48 ScientificNameResultManager speciesManager = new ScientificNameResultManager(configuration);
49 LocationResultManager locationManager = new LocationResultManager(configuration);
50
51 // find applicants
52 IResultFinder<ApplicantResult> applicantsFinder = new ApplicantResultFinder(configuration);
53 List<ApplicantResult> applicants = applicantsFinder.getResults(infos);
54 Map<TextInformation, List<ApplicantResult>> sortedApplicants = applicantManager.sortResultsByText(applicants);
55
56 // find species
57 IResultFinder<SpeciesScientificResult> namesFinder = new ScientificNameResultFinder();
58 List<SpeciesScientificResult> names = namesFinder.getResults(infos);
59 Map<TextInformation, List<SpeciesScientificResult>> sortedNames = speciesManager.sortResultsByText(names);
60
61 // find locations
62 IResultFinder<LocationResult> locationFinder = new LocationResultFinder(configuration, names, applicants);
63 List<LocationResult> locations = locationFinder.getResults(infos);
64 Map<TextInformation, List<LocationResult>> sortedPlaces = locationManager.sortResultsByText(locations);
65
66 FilterController filterController = new FilterController();
67
68 for (TextInformation info : infos) {
69 File textfile = new File(info.getFilepath());
70 String resultFile = resultFolder + File.separator + textfile.getName();
71 IResultFileManager resultManager = new ResultXMLManager(resultFile);
72
73 // annotate summaries
74 List<TextPart> summaries = info.getSummaries();
75 List<ApplicantResult> applicantResults = applicantManager.getPredictedResults(sortedApplicants.get(info));
76 List<SpeciesScientificResult> namesResults = speciesManager.getPredictedResults(sortedNames.get(info));
77 List<LocationResult> locResults = locationManager.getPredictedResults(sortedPlaces.get(info));
78
79 filterController.runFilters(info, applicantResults, namesResults, locResults);
80
81 // create carrier
82 ResultsCarrier carrier = new ResultsCarrier(info, applicantResults, namesResults, locResults);
83 resultCarriers.add(carrier);
84
85 for (TextPart sum : summaries) {
86
87 int textIndex = sum.getTextIdx();
88 List<AnnotationTag> tags = applicantManager.getSummaryTags(applicantResults, textIndex);
89 tags.addAll(speciesManager.getSummaryTags(namesResults, textIndex));
90 tags.addAll(locationManager.getSummaryTags(locResults, textIndex));
91 tags.addAll(getParagraphTags(sum));
92
93 StringBuffer annotatedSummary = new StringBuffer(sum.getText());
94 annotatedSummary = tagText(annotatedSummary, tags);
95
96 resultManager.addSummary(SUMMARY_TAG + annotatedSummary.toString() + SUMMARY_TAG_CLOSE);
97 }
98
99 // annotate supplementary information
100 List<TextPart> suppleInfs = info.getSupplInfos();
101
102 for (TextPart suppleInf : suppleInfs) {
103 int textIndex = suppleInf.getTextIdx();
104 List<AnnotationTag> tags = applicantManager.getSuppleInfTags(applicantResults, textIndex);
105 tags.addAll(speciesManager.getSuppleInfTags(namesResults, textIndex));
106 tags.addAll(locationManager.getSuppleInfTags(locResults, textIndex));
107 tags.addAll(getParagraphTags(suppleInf));
108
109 StringBuffer annotatedSuppleInf = new StringBuffer(suppleInf.getText());
110 annotatedSuppleInf = tagText(annotatedSuppleInf, tags);
111
112 resultManager.addSupplInf(SUPPLINFO_TAG + annotatedSuppleInf.toString() + SUPPLINFO_TAG_CLOSE);
113 }
114 }
115
116 return resultCarriers;
117 }
118
119
120 protected void checkTags(List<AnnotationTag> tags) {
121 AnnotationTag lastTag = null;
122
123 List<AnnotationTag> toBeRemoved = new ArrayList<AnnotationTag>();
124 for (AnnotationTag tag : tags) {
125 if (lastTag == null)
126 {
127 lastTag = tag;
128 continue;
129 }
130 if ((tag.getClosingPosition() < lastTag.getPosition()) || (tag.getPosition() >= lastTag.getPosition() && tag.getClosingPosition() <= lastTag.getClosingPosition())) {
131 lastTag = tag;
132 continue;
133 }
134 if (tag.getTag().startsWith("<p"))
135 toBeRemoved.add(lastTag);
136 else {
137 lastTag.setTag("<error type=\"" + lastTag.getType() + "\">");
138 lastTag.setClosingTag("</error>");
139 lastTag.setPosition(tag.getClosingPosition());
140 }
141 }
142
143 tags.removeAll(toBeRemoved);
144 }
145
146 protected StringBuffer tagText(StringBuffer text, List<AnnotationTag> tags) {
147 Collections.sort(tags, new Comparator<AnnotationTag>() {
148
149 @Override
150 public int compare(AnnotationTag o1, AnnotationTag o2) {
151 if (o2.getClosingPosition() - o1.getClosingPosition() != 0)
152 return o2.getClosingPosition() - o1.getClosingPosition();
153 return o1.getPosition() - o2.getPosition();
154 }
155 });
156
157 checkTags(tags);
158
159 Stack<AnnotationTag> tagStack = new Stack<AnnotationTag>();
160 StringBuffer finalText = new StringBuffer();
161
162 int end = text.length();
163 for (AnnotationTag tag : tags) {
164 if (text.length() >= tag.getPosition()) {
165 while (!tagStack.isEmpty() && tagStack.peek().getPosition() >= tag.getClosingPosition()) {
166 AnnotationTag topTag = tagStack.pop();
167 if (topTag.getPosition() < end) {
168 finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(topTag.getPosition(), end)));
169 }
170 finalText.insert(0, topTag.getTag());
171 end = topTag.getPosition();
172 // text.insert(topTag.getPosition(), topTag.getTag());
173 }
174 if (tag.getClosingPosition() < end) {
175 finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(tag.getClosingPosition(), end)));
176 }
177 finalText.insert(0, tag.getClosingTag());
178 end = tag.getClosingPosition();
179 //text.insert(tag.getClosingPosition(), tag.getClosingTag());
180 tagStack.push(tag);
181 }
182 else
183 configuration.getLogger().logMessage(COMPONENT_NAME,"Couldn't insert into summary: " + tag.getTag() + " at " + tag.getPosition());
184 }
185 while (!tagStack.isEmpty()) {
186 AnnotationTag topTag = tagStack.pop();
187 //text.insert(topTag.getPosition(), topTag.getTag());
188 if (topTag.getPosition() < end) {
189 finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(topTag.getPosition(), end)));
190 }
191 finalText.insert(0, topTag.getTag());
192 end = topTag.getPosition();
193 }
194 finalText.insert(0, text.substring(0, end));
195 return finalText;
196 }
197
198
199 protected List<AnnotationTag> getParagraphTags(TextPart part) {
200 int counter = 0;
201 List<AnnotationTag> tags = new ArrayList<AnnotationTag>();
202
203 for (Paragraph para : part.getParagraphsOfInterest()) {
204 AnnotationTag opentag = new AnnotationTag("<p type=\"" + para.getParagraphType() + "\">", counter, "</p>", counter + para.getParagraphText().length(), "paragraph");
205 tags.add(opentag);
206 // add 1 for "\n"
207 counter += para.getParagraphText().length() + 1;
208 }
209
210 return tags;
211 }
212
213 }