Mercurial > hg > anteater
comparison src/de/mpiwg/anteater/results/impl/ScientificNameResultFinder.java @ 9:51ed79e28b45
annotate texts with results and build events with linnaeus
author | jdamerow |
---|---|
date | Mon, 19 Nov 2012 16:36:15 -0700 |
parents | 0c7cf517ff2d |
children |
comparison
equal
deleted
inserted
replaced
8:05b2ad41e8f0 | 9:51ed79e28b45 |
---|---|
7 | 7 |
8 import de.mpiwg.anteater.places.PlaceInformation; | 8 import de.mpiwg.anteater.places.PlaceInformation; |
9 import de.mpiwg.anteater.places.PlacesExtraction; | 9 import de.mpiwg.anteater.places.PlacesExtraction; |
10 import de.mpiwg.anteater.results.IResultFinder; | 10 import de.mpiwg.anteater.results.IResultFinder; |
11 import de.mpiwg.anteater.results.SpeciesScientificResult; | 11 import de.mpiwg.anteater.results.SpeciesScientificResult; |
12 import de.mpiwg.anteater.species.NameFinder; | |
12 import de.mpiwg.anteater.species.common.CommonNameFindController; | 13 import de.mpiwg.anteater.species.common.CommonNameFindController; |
14 import de.mpiwg.anteater.species.common.impl.LinnaeusNameFinder; | |
13 import de.mpiwg.anteater.species.scientific.ScientificName; | 15 import de.mpiwg.anteater.species.scientific.ScientificName; |
14 import de.mpiwg.anteater.species.scientific.ScientificNamesExtraction; | 16 import de.mpiwg.anteater.species.scientific.ScientificNamesExtraction; |
15 import de.mpiwg.anteater.text.TextInformation; | 17 import de.mpiwg.anteater.text.TextInformation; |
16 | 18 |
17 public class ScientificNameResultFinder implements | 19 public class ScientificNameResultFinder implements |
19 | 21 |
20 @Override | 22 @Override |
21 public List<SpeciesScientificResult> getResults(List<TextInformation> infos) { | 23 public List<SpeciesScientificResult> getResults(List<TextInformation> infos) { |
22 List<SpeciesScientificResult> finalResults = new ArrayList<SpeciesScientificResult>(); | 24 List<SpeciesScientificResult> finalResults = new ArrayList<SpeciesScientificResult>(); |
23 | 25 |
26 // for each text | |
24 for (TextInformation info : infos) { | 27 for (TextInformation info : infos) { |
25 | 28 |
29 // get species and places | |
26 List<ScientificNamesExtraction> resultsExtractions = info.getScientificNamesExtractions(); | 30 List<ScientificNamesExtraction> resultsExtractions = info.getScientificNamesExtractions(); |
27 List<PlacesExtraction> placesExtractionFromText = info.getPlacesExtractions(); | 31 List<PlacesExtraction> placesExtractionFromText = info.getPlacesExtractions(); |
28 | 32 |
29 | 33 // get species sorted by type of text (summary or supplementary information) |
30 Map<Integer, Map<Integer, List<ScientificNamesExtraction>>> nameExtractionsByType = new HashMap<Integer, Map<Integer, List<ScientificNamesExtraction>>>(); | 34 Map<Integer, Map<Integer, List<ScientificNamesExtraction>>> nameExtractionsByType = new HashMap<Integer, Map<Integer, List<ScientificNamesExtraction>>>(); |
31 for (ScientificNamesExtraction ex : resultsExtractions) { | 35 for (ScientificNamesExtraction ex : resultsExtractions) { |
32 Map<Integer, List<ScientificNamesExtraction>> extractionsMap = nameExtractionsByType.get(ex.getType()); | 36 Map<Integer, List<ScientificNamesExtraction>> extractionsMap = nameExtractionsByType.get(ex.getType()); |
33 if (extractionsMap == null) | 37 if (extractionsMap == null) |
34 { | 38 { |
43 } | 47 } |
44 | 48 |
45 namesList.add(ex); | 49 namesList.add(ex); |
46 } | 50 } |
47 | 51 |
52 // get places sorted by type of text | |
48 Map<Integer, Map<Integer, List<PlacesExtraction>>> placeExtractionsByType = new HashMap<Integer, Map<Integer, List<PlacesExtraction>>>(); | 53 Map<Integer, Map<Integer, List<PlacesExtraction>>> placeExtractionsByType = new HashMap<Integer, Map<Integer, List<PlacesExtraction>>>(); |
49 for (PlacesExtraction ex : placesExtractionFromText) { | 54 for (PlacesExtraction ex : placesExtractionFromText) { |
50 Map<Integer, List<PlacesExtraction>> extractionsMap = placeExtractionsByType.get(ex.getType()); | 55 Map<Integer, List<PlacesExtraction>> extractionsMap = placeExtractionsByType.get(ex.getType()); |
51 if (extractionsMap == null) | 56 if (extractionsMap == null) |
52 { | 57 { |
61 } | 66 } |
62 | 67 |
63 placeList.add(ex); | 68 placeList.add(ex); |
64 } | 69 } |
65 | 70 |
71 // for each type | |
66 for (Integer type : nameExtractionsByType.keySet()) { | 72 for (Integer type : nameExtractionsByType.keySet()) { |
73 // for each text of this type | |
74 // if there are more than one text with this type | |
67 for (Integer textId : nameExtractionsByType.get(type).keySet()) { | 75 for (Integer textId : nameExtractionsByType.get(type).keySet()) { |
76 // get names and places | |
68 List<ScientificNamesExtraction> nameExtractions = nameExtractionsByType.get(type).get(textId); | 77 List<ScientificNamesExtraction> nameExtractions = nameExtractionsByType.get(type).get(textId); |
69 List<PlacesExtraction> placesExtractions = placeExtractionsByType.get(type).get(textId) != null ? placeExtractionsByType.get(type).get(textId) : new ArrayList<PlacesExtraction>(); | 78 List<PlacesExtraction> placesExtractions = placeExtractionsByType.get(type) != null && placeExtractionsByType.get(type).get(textId) != null ? placeExtractionsByType.get(type).get(textId) : new ArrayList<PlacesExtraction>(); |
70 | 79 |
80 // for each place calculate which indices it takes | |
71 List<Integer> placeIndices = new ArrayList<Integer>(); | 81 List<Integer> placeIndices = new ArrayList<Integer>(); |
72 | 82 |
73 for (PlacesExtraction placeExtraction : placesExtractions) { | 83 for (PlacesExtraction placeExtraction : placesExtractions) { |
74 List<PlaceInformation> places = placeExtraction.getPlaceInformation(); | 84 List<PlaceInformation> places = placeExtraction.getPlaceInformation(); |
75 | 85 |
82 placeIndices.add(i); | 92 placeIndices.add(i); |
83 } | 93 } |
84 } | 94 } |
85 } | 95 } |
86 | 96 |
97 // for each species calculate its range | |
87 List<Range> distinct = new ArrayList<Range>(); | 98 List<Range> distinct = new ArrayList<Range>(); |
88 | 99 |
89 for (ScientificNamesExtraction nameExtraction : nameExtractions) { | 100 for (ScientificNamesExtraction nameExtraction : nameExtractions) { |
90 List<ScientificName> names = nameExtraction.getNames(); | 101 List<ScientificName> names = nameExtraction.getNames(); |
91 | 102 |
94 // go through all distinct names | 105 // go through all distinct names |
95 for (Range r : distinct) { | 106 for (Range r : distinct) { |
96 // if there is already one that occupies range of current name | 107 // if there is already one that occupies range of current name |
97 if (r.start <= name.getStart() && r.end >= name.getStart() + name.getLength()) { | 108 if (r.start <= name.getStart() && r.end >= name.getStart() + name.getLength()) { |
98 // take common species results | 109 // take common species results |
99 if (nameExtraction.getFoundBy() == CommonNameFindController.class) { | 110 if (nameExtraction.getFoundBy() == NameFinder.LINNAEUS) { |
100 r.start = name.getStart(); | 111 r.start = name.getStart(); |
101 r.end = name.getStart() + name.getLength(); | 112 r.end = name.getStart() + name.getLength(); |
102 r.name = name; | 113 r.name = name; |
103 } | 114 } |
104 found = name; | 115 found = name; |
111 distinct.add(new Range(name.getStart(), name.getStart() + name.getLength(), name, nameExtraction)); | 122 distinct.add(new Range(name.getStart(), name.getStart() + name.getLength(), name, nameExtraction)); |
112 } | 123 } |
113 } | 124 } |
114 } | 125 } |
115 | 126 |
127 // check each species range if it is colliding with a place if yes don't keep it | |
116 for (Range range : distinct) { | 128 for (Range range : distinct) { |
117 ScientificName name = range.name; | 129 ScientificName name = range.name; |
118 if (!placeIndices.contains(name.getStart())) | 130 if (!placeIndices.contains(name.getStart())) |
119 { | 131 { |
120 SpeciesScientificResult finalResult = new SpeciesScientificResult(); | 132 SpeciesScientificResult finalResult = new SpeciesScientificResult(); |