annotate src/de/mpiwg/anteater/results/filter/SpeciesInBetweenRegexApplicantFilter.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
1 package de.mpiwg.anteater.results.filter;
036535fcd179 anteater
jdamerow
parents:
diff changeset
2
036535fcd179 anteater
jdamerow
parents:
diff changeset
3 import java.util.ArrayList;
036535fcd179 anteater
jdamerow
parents:
diff changeset
4 import java.util.List;
036535fcd179 anteater
jdamerow
parents:
diff changeset
5
036535fcd179 anteater
jdamerow
parents:
diff changeset
6 import de.mpiwg.anteater.results.ApplicantResult;
036535fcd179 anteater
jdamerow
parents:
diff changeset
7 import de.mpiwg.anteater.results.LocationResult;
036535fcd179 anteater
jdamerow
parents:
diff changeset
8 import de.mpiwg.anteater.results.SpeciesScientificResult;
036535fcd179 anteater
jdamerow
parents:
diff changeset
9 import de.mpiwg.anteater.text.Paragraph;
036535fcd179 anteater
jdamerow
parents:
diff changeset
10 import de.mpiwg.anteater.text.TextInformation;
036535fcd179 anteater
jdamerow
parents:
diff changeset
11 import de.mpiwg.anteater.text.TextPart;
036535fcd179 anteater
jdamerow
parents:
diff changeset
12 import de.mpiwg.anteater.text.TextType;
036535fcd179 anteater
jdamerow
parents:
diff changeset
13
036535fcd179 anteater
jdamerow
parents:
diff changeset
14 public class SpeciesInBetweenRegexApplicantFilter implements IResultFilter {
036535fcd179 anteater
jdamerow
parents:
diff changeset
15
036535fcd179 anteater
jdamerow
parents:
diff changeset
16 @Override
036535fcd179 anteater
jdamerow
parents:
diff changeset
17 public void filterElements(TextInformation info,
036535fcd179 anteater
jdamerow
parents:
diff changeset
18 List<ApplicantResult> applicantResults,
036535fcd179 anteater
jdamerow
parents:
diff changeset
19 List<SpeciesScientificResult> speciesResults,
036535fcd179 anteater
jdamerow
parents:
diff changeset
20 List<LocationResult> locationResults) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
21
036535fcd179 anteater
jdamerow
parents:
diff changeset
22 List<TextPart> summaries = info.getSummaries();
036535fcd179 anteater
jdamerow
parents:
diff changeset
23 List<TextPart> suppleInfs = info.getSupplInfos();
036535fcd179 anteater
jdamerow
parents:
diff changeset
24 for (ApplicantResult applicant : applicantResults) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
25
036535fcd179 anteater
jdamerow
parents:
diff changeset
26 // if applicant was found with regex there can't be a species in it
036535fcd179 anteater
jdamerow
parents:
diff changeset
27 if (applicant.getPrediction() == 2.0) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
28 TextPart part;
036535fcd179 anteater
jdamerow
parents:
diff changeset
29
036535fcd179 anteater
jdamerow
parents:
diff changeset
30 if (applicant.getResult().getType() == TextType.TYPE_SUMMARY)
036535fcd179 anteater
jdamerow
parents:
diff changeset
31 part = summaries.get(applicant.getResult().getTextIdx());
036535fcd179 anteater
jdamerow
parents:
diff changeset
32 else
036535fcd179 anteater
jdamerow
parents:
diff changeset
33 part = suppleInfs.get(applicant.getResult().getTextIdx());
036535fcd179 anteater
jdamerow
parents:
diff changeset
34
036535fcd179 anteater
jdamerow
parents:
diff changeset
35 Paragraph paragraph = part.getParagraphOfIndex(applicant.getFinding().getStart());
036535fcd179 anteater
jdamerow
parents:
diff changeset
36
036535fcd179 anteater
jdamerow
parents:
diff changeset
37 List<SpeciesScientificResult> toBeRemoved = new ArrayList<SpeciesScientificResult>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
38 for (SpeciesScientificResult species : speciesResults) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
39 // if species is from same text type and text index
036535fcd179 anteater
jdamerow
parents:
diff changeset
40 if (species.getResult().getType() == applicant.getResult().getType() && species.getResult().getTextIdx() == applicant.getResult().getTextIdx()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
41 // if species is not in same paragraph as applicant all is good
036535fcd179 anteater
jdamerow
parents:
diff changeset
42 if (part.getParagraphOfIndex(species.getFinding().getStart()) != paragraph)
036535fcd179 anteater
jdamerow
parents:
diff changeset
43 continue;
036535fcd179 anteater
jdamerow
parents:
diff changeset
44 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
45 toBeRemoved.add(species);
036535fcd179 anteater
jdamerow
parents:
diff changeset
46 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
47
036535fcd179 anteater
jdamerow
parents:
diff changeset
48 speciesResults.removeAll(toBeRemoved);
036535fcd179 anteater
jdamerow
parents:
diff changeset
49 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
50 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
51 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
52
036535fcd179 anteater
jdamerow
parents:
diff changeset
53 }