Mercurial > hg > anteater
diff src/de/mpiwg/anteater/results/filter/NestedResultsFilter.java @ 0:036535fcd179
anteater
author | jdamerow |
---|---|
date | Fri, 14 Sep 2012 10:30:43 +0200 |
parents | |
children | 51ed79e28b45 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/anteater/results/filter/NestedResultsFilter.java Fri Sep 14 10:30:43 2012 +0200 @@ -0,0 +1,208 @@ +package de.mpiwg.anteater.results.filter; + +import java.util.ArrayList; +import java.util.List; + +import de.mpiwg.anteater.results.ApplicantResult; +import de.mpiwg.anteater.results.IResult; +import de.mpiwg.anteater.results.LocationResult; +import de.mpiwg.anteater.results.SpeciesScientificResult; +import de.mpiwg.anteater.text.TextInformation; + +public class NestedResultsFilter implements IResultFilter { + + @Override + public void filterElements(TextInformation info, + List<ApplicantResult> applicantResults, + List<SpeciesScientificResult> speciesResults, + List<LocationResult> locationResults) { + + List<ApplicantResult> applicantsToBeRemoved = new ArrayList<ApplicantResult>(); + List<SpeciesScientificResult> speciesToBeRemoved = new ArrayList<SpeciesScientificResult>(); + List<LocationResult> locationsToBeRemoved = new ArrayList<LocationResult>(); + for (ApplicantResult applicant : applicantResults) { + + // check for nested applicants + for (ApplicantResult applicant2 : applicantResults) { + if (applicant == applicant2) + continue; + + int checked = checkResults(applicant, applicant2); + switch (checked) { + case 0: + continue; + case -1: { + if (!applicantsToBeRemoved.contains(applicant2)) + applicantsToBeRemoved.add(applicant2); + break; + } + case 1 : { + if (!applicantsToBeRemoved.contains(applicant)) + applicantsToBeRemoved.add(applicant); + } + } + } + + // check for nested species + for (SpeciesScientificResult species : speciesResults) { + int checked = checkResults(applicant, species); + + switch (checked) { + case 0: + continue; + case -1: { + if (!speciesToBeRemoved.contains(species)) + speciesToBeRemoved.add(species); + break; + } + case 1 : { + if (!applicantsToBeRemoved.contains(applicant)) + applicantsToBeRemoved.add(applicant); + } + } + } + + // check for nested places + for (LocationResult location : locationResults) { + int checked = checkResults(applicant, location); + + switch (checked) { + case 0: + continue; + case -1: { + if (!locationsToBeRemoved.contains(location)) + locationsToBeRemoved.add(location); + break; + } + case 1 : { + if (!applicantsToBeRemoved.contains(applicant)) + applicantsToBeRemoved.add(applicant); + } + } + } + } + + // check species + for (SpeciesScientificResult species : speciesResults) { + + // check for nested species + for (SpeciesScientificResult species2 : speciesResults) { + if (species == species2) + continue; + + int checked = checkResults(species, species2); + + switch (checked) { + case 0: + continue; + case -1: { + if (!speciesToBeRemoved.contains(species2)) + speciesToBeRemoved.add(species2); + break; + } + case 1 : { + if (!speciesToBeRemoved.contains(species)) + speciesToBeRemoved.add(species); + } + } + } + + // check for nested location + for (LocationResult location : locationResults) { + int checked = checkResults(species, location); + + switch (checked) { + case 0: + continue; + case -1: { + if (!locationsToBeRemoved.contains(location)) + locationsToBeRemoved.add(location); + break; + } + case 1 : { + if (!speciesToBeRemoved.contains(species)) + speciesToBeRemoved.add(species); + } + } + } + } + + // check for locations nested in locations + for (LocationResult location : locationResults) { + + for (LocationResult location2 : locationResults) { + if (location == location2) + continue; + + + int checked = checkResults(location, location2); + + switch (checked) { + case 0: + continue; + case -1: { + if (!locationsToBeRemoved.contains(location2)) + locationsToBeRemoved.add(location2); + break; + } + case 1 : { + if (!locationsToBeRemoved.contains(location)) + locationsToBeRemoved.add(location); + } + } + } + } + + applicantResults.removeAll(applicantsToBeRemoved); + speciesResults.removeAll(speciesToBeRemoved); + locationResults.removeAll(locationsToBeRemoved); + } + + /** + * Method for checking if two results are nested. If yes, than: + * <ul> + * <li>If one result was found by Regex, this one is the correct one.</li> + * <li>Otherwise the outer one is assumed to be correct.</li> + * </ul> + * + * @param result1 + * @param result2 + * @return -1 if first one is correct, 1 if second one is correct, 0 if they + * are not nesting. + */ + protected int checkResults(IResult<?, ?> result1, IResult<?, ?> result2) { + // if result 1 is outer one + if (result1.getFinding().getStart() <= result2.getFinding().getStart() + && (result1.getFinding().getStart() + + result1.getFinding().getLength() >= result2 + .getFinding().getStart() + + result2.getFinding().getLength())) { + + if (result1.getPrediction() == 2.0) + return -1; + + if (result2.getPrediction() == 2.0) + return 1; + + return -1; + } + + // if result 2 is outer one + if (result2.getFinding().getStart() <= result1.getFinding().getStart() + && (result2.getFinding().getStart() + + result2.getFinding().getLength() >= result1 + .getFinding().getStart() + + result1.getFinding().getLength())) { + + if (result1.getPrediction() == 2.0) + return -1; + + if (result2.getPrediction() == 2.0) + return 1; + + return 1; + } + + return 0; + } +}