diff src/de/mpiwg/anteater/results/filter/SpeciesInBetweenRegexApplicantFilter.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/anteater/results/filter/SpeciesInBetweenRegexApplicantFilter.java	Fri Sep 14 10:30:43 2012 +0200
@@ -0,0 +1,53 @@
+package de.mpiwg.anteater.results.filter;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.mpiwg.anteater.results.ApplicantResult;
+import de.mpiwg.anteater.results.LocationResult;
+import de.mpiwg.anteater.results.SpeciesScientificResult;
+import de.mpiwg.anteater.text.Paragraph;
+import de.mpiwg.anteater.text.TextInformation;
+import de.mpiwg.anteater.text.TextPart;
+import de.mpiwg.anteater.text.TextType;
+
+public class SpeciesInBetweenRegexApplicantFilter implements IResultFilter {
+
+	@Override
+	public void filterElements(TextInformation info,
+			List<ApplicantResult> applicantResults,
+			List<SpeciesScientificResult> speciesResults,
+			List<LocationResult> locationResults) {
+		
+		List<TextPart> summaries = info.getSummaries();
+		List<TextPart> suppleInfs = info.getSupplInfos();
+		for (ApplicantResult applicant : applicantResults) {
+			
+			// if applicant was found with regex there can't be a species in it
+			if (applicant.getPrediction() == 2.0) {
+				TextPart part;
+				
+				if (applicant.getResult().getType() == TextType.TYPE_SUMMARY)
+					part = summaries.get(applicant.getResult().getTextIdx());
+				else 
+					part = suppleInfs.get(applicant.getResult().getTextIdx());
+				
+				Paragraph paragraph = part.getParagraphOfIndex(applicant.getFinding().getStart());
+				
+				List<SpeciesScientificResult> toBeRemoved = new ArrayList<SpeciesScientificResult>();
+				for (SpeciesScientificResult species : speciesResults) {
+					// if species is from same text type and text index
+					if (species.getResult().getType() == applicant.getResult().getType() && species.getResult().getTextIdx() == applicant.getResult().getTextIdx()) {
+						// if species is not in same paragraph as applicant all is good
+						if (part.getParagraphOfIndex(species.getFinding().getStart()) !=  paragraph)
+							continue;
+					}
+					toBeRemoved.add(species);
+				}
+				
+				speciesResults.removeAll(toBeRemoved);
+			}
+		}
+	}
+
+}