Mercurial > hg > anteater
comparison src/de/mpiwg/anteater/results/ResultController.java @ 0:036535fcd179
anteater
author | jdamerow |
---|---|
date | Fri, 14 Sep 2012 10:30:43 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:036535fcd179 |
---|---|
1 package de.mpiwg.anteater.results; | |
2 | |
3 import java.io.File; | |
4 import java.util.ArrayList; | |
5 import java.util.Collections; | |
6 import java.util.Comparator; | |
7 import java.util.List; | |
8 import java.util.Map; | |
9 import java.util.Stack; | |
10 | |
11 import org.apache.commons.lang3.StringEscapeUtils; | |
12 | |
13 import de.mpiwg.anteater.AnteaterConfiguration; | |
14 import de.mpiwg.anteater.results.filter.FilterController; | |
15 import de.mpiwg.anteater.results.impl.ApplicantResultFinder; | |
16 import de.mpiwg.anteater.results.impl.ApplicantResultManager; | |
17 import de.mpiwg.anteater.results.impl.LocationResultFinder; | |
18 import de.mpiwg.anteater.results.impl.LocationResultManager; | |
19 import de.mpiwg.anteater.results.impl.ScientificNameResultFinder; | |
20 import de.mpiwg.anteater.results.impl.ScientificNameResultManager; | |
21 import de.mpiwg.anteater.text.Paragraph; | |
22 import de.mpiwg.anteater.text.TextInformation; | |
23 import de.mpiwg.anteater.text.TextPart; | |
24 import de.mpiwg.anteater.xml.IResultFileManager; | |
25 import de.mpiwg.anteater.xml.impl.ResultXMLManager; | |
26 | |
27 public class ResultController { | |
28 | |
29 public final static String SUMMARY_TAG = "<summary>"; | |
30 public final static String SUMMARY_TAG_CLOSE = "</summary>"; | |
31 public final static String SUPPLINFO_TAG = "<supplInfo>"; | |
32 public final static String SUPPLINFO_TAG_CLOSE = "</supplInfo>"; | |
33 | |
34 public final static String COMPONENT_NAME = ResultController.class.getSimpleName(); | |
35 | |
36 private AnteaterConfiguration configuration; | |
37 | |
38 public ResultController(AnteaterConfiguration configuration) { | |
39 this.configuration = configuration; | |
40 } | |
41 | |
42 public List<ResultsCarrier> saveResults(List<TextInformation> infos) { | |
43 | |
44 String resultFolder = configuration.getResultPath(); | |
45 List<ResultsCarrier> resultCarriers = new ArrayList<ResultsCarrier>(); | |
46 | |
47 ApplicantResultManager applicantManager = new ApplicantResultManager(configuration); | |
48 ScientificNameResultManager speciesManager = new ScientificNameResultManager(configuration); | |
49 LocationResultManager locationManager = new LocationResultManager(configuration); | |
50 | |
51 // find applicants | |
52 IResultFinder<ApplicantResult> applicantsFinder = new ApplicantResultFinder(configuration); | |
53 List<ApplicantResult> applicants = applicantsFinder.getResults(infos); | |
54 Map<TextInformation, List<ApplicantResult>> sortedApplicants = applicantManager.sortResultsByText(applicants); | |
55 | |
56 // find species | |
57 IResultFinder<SpeciesScientificResult> namesFinder = new ScientificNameResultFinder(); | |
58 List<SpeciesScientificResult> names = namesFinder.getResults(infos); | |
59 Map<TextInformation, List<SpeciesScientificResult>> sortedNames = speciesManager.sortResultsByText(names); | |
60 | |
61 // find locations | |
62 IResultFinder<LocationResult> locationFinder = new LocationResultFinder(configuration, names, applicants); | |
63 List<LocationResult> locations = locationFinder.getResults(infos); | |
64 Map<TextInformation, List<LocationResult>> sortedPlaces = locationManager.sortResultsByText(locations); | |
65 | |
66 FilterController filterController = new FilterController(); | |
67 | |
68 for (TextInformation info : infos) { | |
69 File textfile = new File(info.getFilepath()); | |
70 String resultFile = resultFolder + File.separator + textfile.getName(); | |
71 IResultFileManager resultManager = new ResultXMLManager(resultFile); | |
72 | |
73 // annotate summaries | |
74 List<TextPart> summaries = info.getSummaries(); | |
75 List<ApplicantResult> applicantResults = applicantManager.getPredictedResults(sortedApplicants.get(info)); | |
76 List<SpeciesScientificResult> namesResults = speciesManager.getPredictedResults(sortedNames.get(info)); | |
77 List<LocationResult> locResults = locationManager.getPredictedResults(sortedPlaces.get(info)); | |
78 | |
79 filterController.runFilters(info, applicantResults, namesResults, locResults); | |
80 | |
81 // create carrier | |
82 ResultsCarrier carrier = new ResultsCarrier(info, applicantResults, namesResults, locResults); | |
83 resultCarriers.add(carrier); | |
84 | |
85 for (TextPart sum : summaries) { | |
86 | |
87 int textIndex = sum.getTextIdx(); | |
88 List<AnnotationTag> tags = applicantManager.getSummaryTags(applicantResults, textIndex); | |
89 tags.addAll(speciesManager.getSummaryTags(namesResults, textIndex)); | |
90 tags.addAll(locationManager.getSummaryTags(locResults, textIndex)); | |
91 tags.addAll(getParagraphTags(sum)); | |
92 | |
93 StringBuffer annotatedSummary = new StringBuffer(sum.getText()); | |
94 annotatedSummary = tagText(annotatedSummary, tags); | |
95 | |
96 resultManager.addSummary(SUMMARY_TAG + annotatedSummary.toString() + SUMMARY_TAG_CLOSE); | |
97 } | |
98 | |
99 // annotate supplementary information | |
100 List<TextPart> suppleInfs = info.getSupplInfos(); | |
101 | |
102 for (TextPart suppleInf : suppleInfs) { | |
103 int textIndex = suppleInf.getTextIdx(); | |
104 List<AnnotationTag> tags = applicantManager.getSuppleInfTags(applicantResults, textIndex); | |
105 tags.addAll(speciesManager.getSuppleInfTags(namesResults, textIndex)); | |
106 tags.addAll(locationManager.getSuppleInfTags(locResults, textIndex)); | |
107 tags.addAll(getParagraphTags(suppleInf)); | |
108 | |
109 StringBuffer annotatedSuppleInf = new StringBuffer(suppleInf.getText()); | |
110 annotatedSuppleInf = tagText(annotatedSuppleInf, tags); | |
111 | |
112 resultManager.addSupplInf(SUPPLINFO_TAG + annotatedSuppleInf.toString() + SUPPLINFO_TAG_CLOSE); | |
113 } | |
114 } | |
115 | |
116 return resultCarriers; | |
117 } | |
118 | |
119 | |
120 protected void checkTags(List<AnnotationTag> tags) { | |
121 AnnotationTag lastTag = null; | |
122 | |
123 List<AnnotationTag> toBeRemoved = new ArrayList<AnnotationTag>(); | |
124 for (AnnotationTag tag : tags) { | |
125 if (lastTag == null) | |
126 { | |
127 lastTag = tag; | |
128 continue; | |
129 } | |
130 if ((tag.getClosingPosition() < lastTag.getPosition()) || (tag.getPosition() >= lastTag.getPosition() && tag.getClosingPosition() <= lastTag.getClosingPosition())) { | |
131 lastTag = tag; | |
132 continue; | |
133 } | |
134 if (tag.getTag().startsWith("<p")) | |
135 toBeRemoved.add(lastTag); | |
136 else { | |
137 lastTag.setTag("<error type=\"" + lastTag.getType() + "\">"); | |
138 lastTag.setClosingTag("</error>"); | |
139 lastTag.setPosition(tag.getClosingPosition()); | |
140 } | |
141 } | |
142 | |
143 tags.removeAll(toBeRemoved); | |
144 } | |
145 | |
146 protected StringBuffer tagText(StringBuffer text, List<AnnotationTag> tags) { | |
147 Collections.sort(tags, new Comparator<AnnotationTag>() { | |
148 | |
149 @Override | |
150 public int compare(AnnotationTag o1, AnnotationTag o2) { | |
151 if (o2.getClosingPosition() - o1.getClosingPosition() != 0) | |
152 return o2.getClosingPosition() - o1.getClosingPosition(); | |
153 return o1.getPosition() - o2.getPosition(); | |
154 } | |
155 }); | |
156 | |
157 checkTags(tags); | |
158 | |
159 Stack<AnnotationTag> tagStack = new Stack<AnnotationTag>(); | |
160 StringBuffer finalText = new StringBuffer(); | |
161 | |
162 int end = text.length(); | |
163 for (AnnotationTag tag : tags) { | |
164 if (text.length() >= tag.getPosition()) { | |
165 while (!tagStack.isEmpty() && tagStack.peek().getPosition() >= tag.getClosingPosition()) { | |
166 AnnotationTag topTag = tagStack.pop(); | |
167 if (topTag.getPosition() < end) { | |
168 finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(topTag.getPosition(), end))); | |
169 } | |
170 finalText.insert(0, topTag.getTag()); | |
171 end = topTag.getPosition(); | |
172 // text.insert(topTag.getPosition(), topTag.getTag()); | |
173 } | |
174 if (tag.getClosingPosition() < end) { | |
175 finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(tag.getClosingPosition(), end))); | |
176 } | |
177 finalText.insert(0, tag.getClosingTag()); | |
178 end = tag.getClosingPosition(); | |
179 //text.insert(tag.getClosingPosition(), tag.getClosingTag()); | |
180 tagStack.push(tag); | |
181 } | |
182 else | |
183 configuration.getLogger().logMessage(COMPONENT_NAME,"Couldn't insert into summary: " + tag.getTag() + " at " + tag.getPosition()); | |
184 } | |
185 while (!tagStack.isEmpty()) { | |
186 AnnotationTag topTag = tagStack.pop(); | |
187 //text.insert(topTag.getPosition(), topTag.getTag()); | |
188 if (topTag.getPosition() < end) { | |
189 finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(topTag.getPosition(), end))); | |
190 } | |
191 finalText.insert(0, topTag.getTag()); | |
192 end = topTag.getPosition(); | |
193 } | |
194 finalText.insert(0, text.substring(0, end)); | |
195 return finalText; | |
196 } | |
197 | |
198 | |
199 protected List<AnnotationTag> getParagraphTags(TextPart part) { | |
200 int counter = 0; | |
201 List<AnnotationTag> tags = new ArrayList<AnnotationTag>(); | |
202 | |
203 for (Paragraph para : part.getParagraphsOfInterest()) { | |
204 AnnotationTag opentag = new AnnotationTag("<p type=\"" + para.getParagraphType() + "\">", counter, "</p>", counter + para.getParagraphText().length(), "paragraph"); | |
205 tags.add(opentag); | |
206 // add 1 for "\n" | |
207 counter += para.getParagraphText().length() + 1; | |
208 } | |
209 | |
210 return tags; | |
211 } | |
212 | |
213 } |