annotate src/de/mpiwg/anteater/results/filter/NestedResultsFilter.java @ 9:51ed79e28b45

annotate texts with results and build events with linnaeus
author jdamerow
date Mon, 19 Nov 2012 16:36:15 -0700
parents 036535fcd179
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
1 package de.mpiwg.anteater.results.filter;
036535fcd179 anteater
jdamerow
parents:
diff changeset
2
036535fcd179 anteater
jdamerow
parents:
diff changeset
3 import java.util.ArrayList;
036535fcd179 anteater
jdamerow
parents:
diff changeset
4 import java.util.List;
036535fcd179 anteater
jdamerow
parents:
diff changeset
5
036535fcd179 anteater
jdamerow
parents:
diff changeset
6 import de.mpiwg.anteater.results.ApplicantResult;
036535fcd179 anteater
jdamerow
parents:
diff changeset
7 import de.mpiwg.anteater.results.IResult;
036535fcd179 anteater
jdamerow
parents:
diff changeset
8 import de.mpiwg.anteater.results.LocationResult;
036535fcd179 anteater
jdamerow
parents:
diff changeset
9 import de.mpiwg.anteater.results.SpeciesScientificResult;
036535fcd179 anteater
jdamerow
parents:
diff changeset
10 import de.mpiwg.anteater.text.TextInformation;
036535fcd179 anteater
jdamerow
parents:
diff changeset
11
036535fcd179 anteater
jdamerow
parents:
diff changeset
12 public class NestedResultsFilter implements IResultFilter {
036535fcd179 anteater
jdamerow
parents:
diff changeset
13
036535fcd179 anteater
jdamerow
parents:
diff changeset
14 @Override
036535fcd179 anteater
jdamerow
parents:
diff changeset
15 public void filterElements(TextInformation info,
036535fcd179 anteater
jdamerow
parents:
diff changeset
16 List<ApplicantResult> applicantResults,
036535fcd179 anteater
jdamerow
parents:
diff changeset
17 List<SpeciesScientificResult> speciesResults,
036535fcd179 anteater
jdamerow
parents:
diff changeset
18 List<LocationResult> locationResults) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
19
036535fcd179 anteater
jdamerow
parents:
diff changeset
20 List<ApplicantResult> applicantsToBeRemoved = new ArrayList<ApplicantResult>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
21 List<SpeciesScientificResult> speciesToBeRemoved = new ArrayList<SpeciesScientificResult>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
22 List<LocationResult> locationsToBeRemoved = new ArrayList<LocationResult>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
23 for (ApplicantResult applicant : applicantResults) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
24
036535fcd179 anteater
jdamerow
parents:
diff changeset
25 // check for nested applicants
036535fcd179 anteater
jdamerow
parents:
diff changeset
26 for (ApplicantResult applicant2 : applicantResults) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
27 if (applicant == applicant2)
036535fcd179 anteater
jdamerow
parents:
diff changeset
28 continue;
036535fcd179 anteater
jdamerow
parents:
diff changeset
29
036535fcd179 anteater
jdamerow
parents:
diff changeset
30 int checked = checkResults(applicant, applicant2);
036535fcd179 anteater
jdamerow
parents:
diff changeset
31 switch (checked) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
32 case 0:
036535fcd179 anteater
jdamerow
parents:
diff changeset
33 continue;
036535fcd179 anteater
jdamerow
parents:
diff changeset
34 case -1: {
036535fcd179 anteater
jdamerow
parents:
diff changeset
35 if (!applicantsToBeRemoved.contains(applicant2))
036535fcd179 anteater
jdamerow
parents:
diff changeset
36 applicantsToBeRemoved.add(applicant2);
036535fcd179 anteater
jdamerow
parents:
diff changeset
37 break;
036535fcd179 anteater
jdamerow
parents:
diff changeset
38 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
39 case 1 : {
036535fcd179 anteater
jdamerow
parents:
diff changeset
40 if (!applicantsToBeRemoved.contains(applicant))
036535fcd179 anteater
jdamerow
parents:
diff changeset
41 applicantsToBeRemoved.add(applicant);
036535fcd179 anteater
jdamerow
parents:
diff changeset
42 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
43 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
44 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
45
036535fcd179 anteater
jdamerow
parents:
diff changeset
46 // check for nested species
036535fcd179 anteater
jdamerow
parents:
diff changeset
47 for (SpeciesScientificResult species : speciesResults) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
48 int checked = checkResults(applicant, species);
036535fcd179 anteater
jdamerow
parents:
diff changeset
49
036535fcd179 anteater
jdamerow
parents:
diff changeset
50 switch (checked) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
51 case 0:
036535fcd179 anteater
jdamerow
parents:
diff changeset
52 continue;
036535fcd179 anteater
jdamerow
parents:
diff changeset
53 case -1: {
036535fcd179 anteater
jdamerow
parents:
diff changeset
54 if (!speciesToBeRemoved.contains(species))
036535fcd179 anteater
jdamerow
parents:
diff changeset
55 speciesToBeRemoved.add(species);
036535fcd179 anteater
jdamerow
parents:
diff changeset
56 break;
036535fcd179 anteater
jdamerow
parents:
diff changeset
57 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
58 case 1 : {
036535fcd179 anteater
jdamerow
parents:
diff changeset
59 if (!applicantsToBeRemoved.contains(applicant))
036535fcd179 anteater
jdamerow
parents:
diff changeset
60 applicantsToBeRemoved.add(applicant);
036535fcd179 anteater
jdamerow
parents:
diff changeset
61 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
62 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
63 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
64
036535fcd179 anteater
jdamerow
parents:
diff changeset
65 // check for nested places
036535fcd179 anteater
jdamerow
parents:
diff changeset
66 for (LocationResult location : locationResults) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
67 int checked = checkResults(applicant, location);
036535fcd179 anteater
jdamerow
parents:
diff changeset
68
036535fcd179 anteater
jdamerow
parents:
diff changeset
69 switch (checked) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
70 case 0:
036535fcd179 anteater
jdamerow
parents:
diff changeset
71 continue;
036535fcd179 anteater
jdamerow
parents:
diff changeset
72 case -1: {
036535fcd179 anteater
jdamerow
parents:
diff changeset
73 if (!locationsToBeRemoved.contains(location))
036535fcd179 anteater
jdamerow
parents:
diff changeset
74 locationsToBeRemoved.add(location);
036535fcd179 anteater
jdamerow
parents:
diff changeset
75 break;
036535fcd179 anteater
jdamerow
parents:
diff changeset
76 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
77 case 1 : {
036535fcd179 anteater
jdamerow
parents:
diff changeset
78 if (!applicantsToBeRemoved.contains(applicant))
036535fcd179 anteater
jdamerow
parents:
diff changeset
79 applicantsToBeRemoved.add(applicant);
036535fcd179 anteater
jdamerow
parents:
diff changeset
80 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
81 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
82 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
83 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
84
036535fcd179 anteater
jdamerow
parents:
diff changeset
85 // check species
036535fcd179 anteater
jdamerow
parents:
diff changeset
86 for (SpeciesScientificResult species : speciesResults) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
87
036535fcd179 anteater
jdamerow
parents:
diff changeset
88 // check for nested species
036535fcd179 anteater
jdamerow
parents:
diff changeset
89 for (SpeciesScientificResult species2 : speciesResults) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
90 if (species == species2)
036535fcd179 anteater
jdamerow
parents:
diff changeset
91 continue;
036535fcd179 anteater
jdamerow
parents:
diff changeset
92
036535fcd179 anteater
jdamerow
parents:
diff changeset
93 int checked = checkResults(species, species2);
036535fcd179 anteater
jdamerow
parents:
diff changeset
94
036535fcd179 anteater
jdamerow
parents:
diff changeset
95 switch (checked) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
96 case 0:
036535fcd179 anteater
jdamerow
parents:
diff changeset
97 continue;
036535fcd179 anteater
jdamerow
parents:
diff changeset
98 case -1: {
036535fcd179 anteater
jdamerow
parents:
diff changeset
99 if (!speciesToBeRemoved.contains(species2))
036535fcd179 anteater
jdamerow
parents:
diff changeset
100 speciesToBeRemoved.add(species2);
036535fcd179 anteater
jdamerow
parents:
diff changeset
101 break;
036535fcd179 anteater
jdamerow
parents:
diff changeset
102 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
103 case 1 : {
036535fcd179 anteater
jdamerow
parents:
diff changeset
104 if (!speciesToBeRemoved.contains(species))
036535fcd179 anteater
jdamerow
parents:
diff changeset
105 speciesToBeRemoved.add(species);
036535fcd179 anteater
jdamerow
parents:
diff changeset
106 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
107 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
108 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
109
036535fcd179 anteater
jdamerow
parents:
diff changeset
110 // check for nested location
036535fcd179 anteater
jdamerow
parents:
diff changeset
111 for (LocationResult location : locationResults) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
112 int checked = checkResults(species, location);
036535fcd179 anteater
jdamerow
parents:
diff changeset
113
036535fcd179 anteater
jdamerow
parents:
diff changeset
114 switch (checked) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
115 case 0:
036535fcd179 anteater
jdamerow
parents:
diff changeset
116 continue;
036535fcd179 anteater
jdamerow
parents:
diff changeset
117 case -1: {
036535fcd179 anteater
jdamerow
parents:
diff changeset
118 if (!locationsToBeRemoved.contains(location))
036535fcd179 anteater
jdamerow
parents:
diff changeset
119 locationsToBeRemoved.add(location);
036535fcd179 anteater
jdamerow
parents:
diff changeset
120 break;
036535fcd179 anteater
jdamerow
parents:
diff changeset
121 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
122 case 1 : {
036535fcd179 anteater
jdamerow
parents:
diff changeset
123 if (!speciesToBeRemoved.contains(species))
036535fcd179 anteater
jdamerow
parents:
diff changeset
124 speciesToBeRemoved.add(species);
036535fcd179 anteater
jdamerow
parents:
diff changeset
125 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
126 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
127 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
128 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
129
036535fcd179 anteater
jdamerow
parents:
diff changeset
130 // check for locations nested in locations
036535fcd179 anteater
jdamerow
parents:
diff changeset
131 for (LocationResult location : locationResults) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
132
036535fcd179 anteater
jdamerow
parents:
diff changeset
133 for (LocationResult location2 : locationResults) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
134 if (location == location2)
036535fcd179 anteater
jdamerow
parents:
diff changeset
135 continue;
036535fcd179 anteater
jdamerow
parents:
diff changeset
136
036535fcd179 anteater
jdamerow
parents:
diff changeset
137
036535fcd179 anteater
jdamerow
parents:
diff changeset
138 int checked = checkResults(location, location2);
036535fcd179 anteater
jdamerow
parents:
diff changeset
139
036535fcd179 anteater
jdamerow
parents:
diff changeset
140 switch (checked) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
141 case 0:
036535fcd179 anteater
jdamerow
parents:
diff changeset
142 continue;
036535fcd179 anteater
jdamerow
parents:
diff changeset
143 case -1: {
036535fcd179 anteater
jdamerow
parents:
diff changeset
144 if (!locationsToBeRemoved.contains(location2))
036535fcd179 anteater
jdamerow
parents:
diff changeset
145 locationsToBeRemoved.add(location2);
036535fcd179 anteater
jdamerow
parents:
diff changeset
146 break;
036535fcd179 anteater
jdamerow
parents:
diff changeset
147 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
148 case 1 : {
036535fcd179 anteater
jdamerow
parents:
diff changeset
149 if (!locationsToBeRemoved.contains(location))
036535fcd179 anteater
jdamerow
parents:
diff changeset
150 locationsToBeRemoved.add(location);
036535fcd179 anteater
jdamerow
parents:
diff changeset
151 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
152 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
153 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
154 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
155
036535fcd179 anteater
jdamerow
parents:
diff changeset
156 applicantResults.removeAll(applicantsToBeRemoved);
036535fcd179 anteater
jdamerow
parents:
diff changeset
157 speciesResults.removeAll(speciesToBeRemoved);
036535fcd179 anteater
jdamerow
parents:
diff changeset
158 locationResults.removeAll(locationsToBeRemoved);
036535fcd179 anteater
jdamerow
parents:
diff changeset
159 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
160
036535fcd179 anteater
jdamerow
parents:
diff changeset
161 /**
036535fcd179 anteater
jdamerow
parents:
diff changeset
162 * Method for checking if two results are nested. If yes, than:
036535fcd179 anteater
jdamerow
parents:
diff changeset
163 * <ul>
9
51ed79e28b45 annotate texts with results and build events with linnaeus
jdamerow
parents: 0
diff changeset
164 * <li>The result with the higher prediction is the right one (e.g. Regex has prediction 2.0)</li>
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
165 * <li>Otherwise the outer one is assumed to be correct.</li>
036535fcd179 anteater
jdamerow
parents:
diff changeset
166 * </ul>
036535fcd179 anteater
jdamerow
parents:
diff changeset
167 *
036535fcd179 anteater
jdamerow
parents:
diff changeset
168 * @param result1
036535fcd179 anteater
jdamerow
parents:
diff changeset
169 * @param result2
036535fcd179 anteater
jdamerow
parents:
diff changeset
170 * @return -1 if first one is correct, 1 if second one is correct, 0 if they
036535fcd179 anteater
jdamerow
parents:
diff changeset
171 * are not nesting.
036535fcd179 anteater
jdamerow
parents:
diff changeset
172 */
036535fcd179 anteater
jdamerow
parents:
diff changeset
173 protected int checkResults(IResult<?, ?> result1, IResult<?, ?> result2) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
174 // if result 1 is outer one
036535fcd179 anteater
jdamerow
parents:
diff changeset
175 if (result1.getFinding().getStart() <= result2.getFinding().getStart()
036535fcd179 anteater
jdamerow
parents:
diff changeset
176 && (result1.getFinding().getStart()
036535fcd179 anteater
jdamerow
parents:
diff changeset
177 + result1.getFinding().getLength() >= result2
036535fcd179 anteater
jdamerow
parents:
diff changeset
178 .getFinding().getStart()
036535fcd179 anteater
jdamerow
parents:
diff changeset
179 + result2.getFinding().getLength())) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
180
9
51ed79e28b45 annotate texts with results and build events with linnaeus
jdamerow
parents: 0
diff changeset
181 if (result1.getPrediction() > result2.getPrediction())
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
182 return -1;
9
51ed79e28b45 annotate texts with results and build events with linnaeus
jdamerow
parents: 0
diff changeset
183 if (result2.getPrediction() > result1.getPrediction())
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
184 return 1;
036535fcd179 anteater
jdamerow
parents:
diff changeset
185
036535fcd179 anteater
jdamerow
parents:
diff changeset
186 return -1;
036535fcd179 anteater
jdamerow
parents:
diff changeset
187 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
188
036535fcd179 anteater
jdamerow
parents:
diff changeset
189 // if result 2 is outer one
036535fcd179 anteater
jdamerow
parents:
diff changeset
190 if (result2.getFinding().getStart() <= result1.getFinding().getStart()
036535fcd179 anteater
jdamerow
parents:
diff changeset
191 && (result2.getFinding().getStart()
036535fcd179 anteater
jdamerow
parents:
diff changeset
192 + result2.getFinding().getLength() >= result1
036535fcd179 anteater
jdamerow
parents:
diff changeset
193 .getFinding().getStart()
036535fcd179 anteater
jdamerow
parents:
diff changeset
194 + result1.getFinding().getLength())) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
195
9
51ed79e28b45 annotate texts with results and build events with linnaeus
jdamerow
parents: 0
diff changeset
196 if (result1.getPrediction() > result2.getPrediction())
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
197 return -1;
9
51ed79e28b45 annotate texts with results and build events with linnaeus
jdamerow
parents: 0
diff changeset
198 if (result2.getPrediction() > result1.getPrediction())
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
199 return 1;
036535fcd179 anteater
jdamerow
parents:
diff changeset
200
036535fcd179 anteater
jdamerow
parents:
diff changeset
201 return 1;
036535fcd179 anteater
jdamerow
parents:
diff changeset
202 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
203
036535fcd179 anteater
jdamerow
parents:
diff changeset
204 return 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
205 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
206 }