diff src/de/mpiwg/anteater/results/filter/NestedResultsFilter.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children 51ed79e28b45
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/anteater/results/filter/NestedResultsFilter.java	Fri Sep 14 10:30:43 2012 +0200
@@ -0,0 +1,208 @@
+package de.mpiwg.anteater.results.filter;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.mpiwg.anteater.results.ApplicantResult;
+import de.mpiwg.anteater.results.IResult;
+import de.mpiwg.anteater.results.LocationResult;
+import de.mpiwg.anteater.results.SpeciesScientificResult;
+import de.mpiwg.anteater.text.TextInformation;
+
+public class NestedResultsFilter implements IResultFilter {
+
+	@Override
+	public void filterElements(TextInformation info,
+			List<ApplicantResult> applicantResults,
+			List<SpeciesScientificResult> speciesResults,
+			List<LocationResult> locationResults) {
+
+		List<ApplicantResult> applicantsToBeRemoved = new ArrayList<ApplicantResult>();
+		List<SpeciesScientificResult> speciesToBeRemoved = new ArrayList<SpeciesScientificResult>();
+		List<LocationResult> locationsToBeRemoved = new ArrayList<LocationResult>();
+		for (ApplicantResult applicant : applicantResults) {
+
+			// check for nested applicants
+			for (ApplicantResult applicant2 : applicantResults) {
+				if (applicant == applicant2)
+					continue;
+
+				int checked = checkResults(applicant, applicant2);
+				switch (checked) {
+				case 0:
+					continue;
+				case -1: {
+					if (!applicantsToBeRemoved.contains(applicant2))
+						applicantsToBeRemoved.add(applicant2);
+					break;
+				}
+				case 1 : {
+					if (!applicantsToBeRemoved.contains(applicant))
+						applicantsToBeRemoved.add(applicant);
+				}
+				}
+			}
+
+			// check for nested species
+			for (SpeciesScientificResult species : speciesResults) {
+				int checked = checkResults(applicant, species);
+				
+				switch (checked) {
+				case 0:
+					continue;
+				case -1: {
+					if (!speciesToBeRemoved.contains(species))
+						speciesToBeRemoved.add(species);
+					break;
+				}
+				case 1 : {
+					if (!applicantsToBeRemoved.contains(applicant))
+						applicantsToBeRemoved.add(applicant);
+				}
+				}
+			}
+			
+			// check for nested places
+			for (LocationResult location : locationResults) {
+				int checked = checkResults(applicant, location);
+				
+				switch (checked) {
+				case 0:
+					continue;
+				case -1: {
+					if (!locationsToBeRemoved.contains(location))
+						locationsToBeRemoved.add(location);
+					break;
+				}
+				case 1 : {
+					if (!applicantsToBeRemoved.contains(applicant))
+						applicantsToBeRemoved.add(applicant);
+				}
+				}
+			}
+		}
+		
+		// check species
+		for (SpeciesScientificResult species : speciesResults) {
+			
+			// check for nested species
+			for (SpeciesScientificResult species2 : speciesResults) {
+				if (species == species2)
+					continue;
+
+				int checked = checkResults(species, species2);
+					
+				switch (checked) {
+				case 0:
+					continue;
+				case -1: {
+					if (!speciesToBeRemoved.contains(species2))
+						speciesToBeRemoved.add(species2);
+					break;
+				}
+				case 1 : {
+					if (!speciesToBeRemoved.contains(species))
+						speciesToBeRemoved.add(species);
+				}
+				}
+			}
+			
+			// check for nested location
+			for (LocationResult location : locationResults) {
+				int checked = checkResults(species, location);
+				
+				switch (checked) {
+				case 0:
+					continue;
+				case -1: {
+					if (!locationsToBeRemoved.contains(location))
+						locationsToBeRemoved.add(location);
+					break;
+				}
+				case 1 : {
+					if (!speciesToBeRemoved.contains(species))
+						speciesToBeRemoved.add(species);
+				}
+				}
+			}
+		}
+		
+		// check for locations nested in locations
+		for (LocationResult location : locationResults) {
+			
+			for (LocationResult location2 : locationResults) {
+				if (location == location2)
+					continue;
+
+				
+				int checked = checkResults(location, location2);
+				
+				switch (checked) {
+				case 0:
+					continue;
+				case -1: {
+					if (!locationsToBeRemoved.contains(location2))
+						locationsToBeRemoved.add(location2);
+					break;
+				}
+				case 1 : {
+					if (!locationsToBeRemoved.contains(location))
+						locationsToBeRemoved.add(location);
+				}
+				}
+			}		
+		}
+		
+		applicantResults.removeAll(applicantsToBeRemoved);
+		speciesResults.removeAll(speciesToBeRemoved);
+		locationResults.removeAll(locationsToBeRemoved);
+	}
+
+	/**
+	 * Method for checking if two results are nested. If yes, than:
+	 * <ul>
+	 * <li>If one result was found by Regex, this one is the correct one.</li>
+	 * <li>Otherwise the outer one is assumed to be correct.</li>
+	 * </ul>
+	 * 
+	 * @param result1
+	 * @param result2
+	 * @return -1 if first one is correct, 1 if second one is correct, 0 if they
+	 *         are not nesting.
+	 */
+	protected int checkResults(IResult<?, ?> result1, IResult<?, ?> result2) {
+		// if result 1 is outer one
+		if (result1.getFinding().getStart() <= result2.getFinding().getStart()
+				&& (result1.getFinding().getStart()
+						+ result1.getFinding().getLength() >= result2
+						.getFinding().getStart()
+						+ result2.getFinding().getLength())) {
+
+			if (result1.getPrediction() == 2.0)
+				return -1;
+
+			if (result2.getPrediction() == 2.0)
+				return 1;
+
+			return -1;
+		}
+
+		// if result 2 is outer one
+		if (result2.getFinding().getStart() <= result1.getFinding().getStart()
+				&& (result2.getFinding().getStart()
+						+ result2.getFinding().getLength() >= result1
+						.getFinding().getStart()
+						+ result1.getFinding().getLength())) {
+
+			if (result1.getPrediction() == 2.0)
+				return -1;
+
+			if (result2.getPrediction() == 2.0)
+				return 1;
+
+			return 1;
+		}
+
+		return 0;
+	}
+}