0
|
1 package de.mpiwg.anteater.results.filter;
|
|
2
|
|
3 import java.util.ArrayList;
|
|
4 import java.util.List;
|
|
5
|
|
6 import de.mpiwg.anteater.results.ApplicantResult;
|
|
7 import de.mpiwg.anteater.results.IResult;
|
|
8 import de.mpiwg.anteater.results.LocationResult;
|
|
9 import de.mpiwg.anteater.results.SpeciesScientificResult;
|
|
10 import de.mpiwg.anteater.text.TextInformation;
|
|
11
|
|
12 public class NestedResultsFilter implements IResultFilter {
|
|
13
|
|
14 @Override
|
|
15 public void filterElements(TextInformation info,
|
|
16 List<ApplicantResult> applicantResults,
|
|
17 List<SpeciesScientificResult> speciesResults,
|
|
18 List<LocationResult> locationResults) {
|
|
19
|
|
20 List<ApplicantResult> applicantsToBeRemoved = new ArrayList<ApplicantResult>();
|
|
21 List<SpeciesScientificResult> speciesToBeRemoved = new ArrayList<SpeciesScientificResult>();
|
|
22 List<LocationResult> locationsToBeRemoved = new ArrayList<LocationResult>();
|
|
23 for (ApplicantResult applicant : applicantResults) {
|
|
24
|
|
25 // check for nested applicants
|
|
26 for (ApplicantResult applicant2 : applicantResults) {
|
|
27 if (applicant == applicant2)
|
|
28 continue;
|
|
29
|
|
30 int checked = checkResults(applicant, applicant2);
|
|
31 switch (checked) {
|
|
32 case 0:
|
|
33 continue;
|
|
34 case -1: {
|
|
35 if (!applicantsToBeRemoved.contains(applicant2))
|
|
36 applicantsToBeRemoved.add(applicant2);
|
|
37 break;
|
|
38 }
|
|
39 case 1 : {
|
|
40 if (!applicantsToBeRemoved.contains(applicant))
|
|
41 applicantsToBeRemoved.add(applicant);
|
|
42 }
|
|
43 }
|
|
44 }
|
|
45
|
|
46 // check for nested species
|
|
47 for (SpeciesScientificResult species : speciesResults) {
|
|
48 int checked = checkResults(applicant, species);
|
|
49
|
|
50 switch (checked) {
|
|
51 case 0:
|
|
52 continue;
|
|
53 case -1: {
|
|
54 if (!speciesToBeRemoved.contains(species))
|
|
55 speciesToBeRemoved.add(species);
|
|
56 break;
|
|
57 }
|
|
58 case 1 : {
|
|
59 if (!applicantsToBeRemoved.contains(applicant))
|
|
60 applicantsToBeRemoved.add(applicant);
|
|
61 }
|
|
62 }
|
|
63 }
|
|
64
|
|
65 // check for nested places
|
|
66 for (LocationResult location : locationResults) {
|
|
67 int checked = checkResults(applicant, location);
|
|
68
|
|
69 switch (checked) {
|
|
70 case 0:
|
|
71 continue;
|
|
72 case -1: {
|
|
73 if (!locationsToBeRemoved.contains(location))
|
|
74 locationsToBeRemoved.add(location);
|
|
75 break;
|
|
76 }
|
|
77 case 1 : {
|
|
78 if (!applicantsToBeRemoved.contains(applicant))
|
|
79 applicantsToBeRemoved.add(applicant);
|
|
80 }
|
|
81 }
|
|
82 }
|
|
83 }
|
|
84
|
|
85 // check species
|
|
86 for (SpeciesScientificResult species : speciesResults) {
|
|
87
|
|
88 // check for nested species
|
|
89 for (SpeciesScientificResult species2 : speciesResults) {
|
|
90 if (species == species2)
|
|
91 continue;
|
|
92
|
|
93 int checked = checkResults(species, species2);
|
|
94
|
|
95 switch (checked) {
|
|
96 case 0:
|
|
97 continue;
|
|
98 case -1: {
|
|
99 if (!speciesToBeRemoved.contains(species2))
|
|
100 speciesToBeRemoved.add(species2);
|
|
101 break;
|
|
102 }
|
|
103 case 1 : {
|
|
104 if (!speciesToBeRemoved.contains(species))
|
|
105 speciesToBeRemoved.add(species);
|
|
106 }
|
|
107 }
|
|
108 }
|
|
109
|
|
110 // check for nested location
|
|
111 for (LocationResult location : locationResults) {
|
|
112 int checked = checkResults(species, location);
|
|
113
|
|
114 switch (checked) {
|
|
115 case 0:
|
|
116 continue;
|
|
117 case -1: {
|
|
118 if (!locationsToBeRemoved.contains(location))
|
|
119 locationsToBeRemoved.add(location);
|
|
120 break;
|
|
121 }
|
|
122 case 1 : {
|
|
123 if (!speciesToBeRemoved.contains(species))
|
|
124 speciesToBeRemoved.add(species);
|
|
125 }
|
|
126 }
|
|
127 }
|
|
128 }
|
|
129
|
|
130 // check for locations nested in locations
|
|
131 for (LocationResult location : locationResults) {
|
|
132
|
|
133 for (LocationResult location2 : locationResults) {
|
|
134 if (location == location2)
|
|
135 continue;
|
|
136
|
|
137
|
|
138 int checked = checkResults(location, location2);
|
|
139
|
|
140 switch (checked) {
|
|
141 case 0:
|
|
142 continue;
|
|
143 case -1: {
|
|
144 if (!locationsToBeRemoved.contains(location2))
|
|
145 locationsToBeRemoved.add(location2);
|
|
146 break;
|
|
147 }
|
|
148 case 1 : {
|
|
149 if (!locationsToBeRemoved.contains(location))
|
|
150 locationsToBeRemoved.add(location);
|
|
151 }
|
|
152 }
|
|
153 }
|
|
154 }
|
|
155
|
|
156 applicantResults.removeAll(applicantsToBeRemoved);
|
|
157 speciesResults.removeAll(speciesToBeRemoved);
|
|
158 locationResults.removeAll(locationsToBeRemoved);
|
|
159 }
|
|
160
|
|
161 /**
|
|
162 * Method for checking if two results are nested. If yes, than:
|
|
163 * <ul>
|
|
164 * <li>If one result was found by Regex, this one is the correct one.</li>
|
|
165 * <li>Otherwise the outer one is assumed to be correct.</li>
|
|
166 * </ul>
|
|
167 *
|
|
168 * @param result1
|
|
169 * @param result2
|
|
170 * @return -1 if first one is correct, 1 if second one is correct, 0 if they
|
|
171 * are not nesting.
|
|
172 */
|
|
173 protected int checkResults(IResult<?, ?> result1, IResult<?, ?> result2) {
|
|
174 // if result 1 is outer one
|
|
175 if (result1.getFinding().getStart() <= result2.getFinding().getStart()
|
|
176 && (result1.getFinding().getStart()
|
|
177 + result1.getFinding().getLength() >= result2
|
|
178 .getFinding().getStart()
|
|
179 + result2.getFinding().getLength())) {
|
|
180
|
|
181 if (result1.getPrediction() == 2.0)
|
|
182 return -1;
|
|
183
|
|
184 if (result2.getPrediction() == 2.0)
|
|
185 return 1;
|
|
186
|
|
187 return -1;
|
|
188 }
|
|
189
|
|
190 // if result 2 is outer one
|
|
191 if (result2.getFinding().getStart() <= result1.getFinding().getStart()
|
|
192 && (result2.getFinding().getStart()
|
|
193 + result2.getFinding().getLength() >= result1
|
|
194 .getFinding().getStart()
|
|
195 + result1.getFinding().getLength())) {
|
|
196
|
|
197 if (result1.getPrediction() == 2.0)
|
|
198 return -1;
|
|
199
|
|
200 if (result2.getPrediction() == 2.0)
|
|
201 return 1;
|
|
202
|
|
203 return 1;
|
|
204 }
|
|
205
|
|
206 return 0;
|
|
207 }
|
|
208 }
|