Mercurial > hg > anteater
view src/de/mpiwg/anteater/results/filter/SpeciesInBetweenRegexApplicantFilter.java @ 0:036535fcd179
anteater
author | jdamerow |
---|---|
date | Fri, 14 Sep 2012 10:30:43 +0200 |
parents | |
children |
line wrap: on
line source
package de.mpiwg.anteater.results.filter; import java.util.ArrayList; import java.util.List; import de.mpiwg.anteater.results.ApplicantResult; import de.mpiwg.anteater.results.LocationResult; import de.mpiwg.anteater.results.SpeciesScientificResult; import de.mpiwg.anteater.text.Paragraph; import de.mpiwg.anteater.text.TextInformation; import de.mpiwg.anteater.text.TextPart; import de.mpiwg.anteater.text.TextType; public class SpeciesInBetweenRegexApplicantFilter implements IResultFilter { @Override public void filterElements(TextInformation info, List<ApplicantResult> applicantResults, List<SpeciesScientificResult> speciesResults, List<LocationResult> locationResults) { List<TextPart> summaries = info.getSummaries(); List<TextPart> suppleInfs = info.getSupplInfos(); for (ApplicantResult applicant : applicantResults) { // if applicant was found with regex there can't be a species in it if (applicant.getPrediction() == 2.0) { TextPart part; if (applicant.getResult().getType() == TextType.TYPE_SUMMARY) part = summaries.get(applicant.getResult().getTextIdx()); else part = suppleInfs.get(applicant.getResult().getTextIdx()); Paragraph paragraph = part.getParagraphOfIndex(applicant.getFinding().getStart()); List<SpeciesScientificResult> toBeRemoved = new ArrayList<SpeciesScientificResult>(); for (SpeciesScientificResult species : speciesResults) { // if species is from same text type and text index if (species.getResult().getType() == applicant.getResult().getType() && species.getResult().getTextIdx() == applicant.getResult().getTextIdx()) { // if species is not in same paragraph as applicant all is good if (part.getParagraphOfIndex(species.getFinding().getStart()) != paragraph) continue; } toBeRemoved.add(species); } speciesResults.removeAll(toBeRemoved); } } } }