Mercurial > hg > anteater
diff src/de/mpiwg/anteater/results/filter/SpeciesInBetweenRegexApplicantFilter.java @ 0:036535fcd179
anteater
author | jdamerow |
---|---|
date | Fri, 14 Sep 2012 10:30:43 +0200 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/anteater/results/filter/SpeciesInBetweenRegexApplicantFilter.java Fri Sep 14 10:30:43 2012 +0200 @@ -0,0 +1,53 @@ +package de.mpiwg.anteater.results.filter; + +import java.util.ArrayList; +import java.util.List; + +import de.mpiwg.anteater.results.ApplicantResult; +import de.mpiwg.anteater.results.LocationResult; +import de.mpiwg.anteater.results.SpeciesScientificResult; +import de.mpiwg.anteater.text.Paragraph; +import de.mpiwg.anteater.text.TextInformation; +import de.mpiwg.anteater.text.TextPart; +import de.mpiwg.anteater.text.TextType; + +public class SpeciesInBetweenRegexApplicantFilter implements IResultFilter { + + @Override + public void filterElements(TextInformation info, + List<ApplicantResult> applicantResults, + List<SpeciesScientificResult> speciesResults, + List<LocationResult> locationResults) { + + List<TextPart> summaries = info.getSummaries(); + List<TextPart> suppleInfs = info.getSupplInfos(); + for (ApplicantResult applicant : applicantResults) { + + // if applicant was found with regex there can't be a species in it + if (applicant.getPrediction() == 2.0) { + TextPart part; + + if (applicant.getResult().getType() == TextType.TYPE_SUMMARY) + part = summaries.get(applicant.getResult().getTextIdx()); + else + part = suppleInfs.get(applicant.getResult().getTextIdx()); + + Paragraph paragraph = part.getParagraphOfIndex(applicant.getFinding().getStart()); + + List<SpeciesScientificResult> toBeRemoved = new ArrayList<SpeciesScientificResult>(); + for (SpeciesScientificResult species : speciesResults) { + // if species is from same text type and text index + if (species.getResult().getType() == applicant.getResult().getType() && species.getResult().getTextIdx() == applicant.getResult().getTextIdx()) { + // if species is not in same paragraph as applicant all is good + if (part.getParagraphOfIndex(species.getFinding().getStart()) != paragraph) + continue; + } + toBeRemoved.add(species); + } + + speciesResults.removeAll(toBeRemoved); + } + } + } + +}