comparison src/de/mpiwg/anteater/results/impl/ScientificNameResultFinder.java @ 9:51ed79e28b45

annotate texts with results and build events with linnaeus
author jdamerow
date Mon, 19 Nov 2012 16:36:15 -0700
parents 0c7cf517ff2d
children
comparison
equal deleted inserted replaced
8:05b2ad41e8f0 9:51ed79e28b45
7 7
8 import de.mpiwg.anteater.places.PlaceInformation; 8 import de.mpiwg.anteater.places.PlaceInformation;
9 import de.mpiwg.anteater.places.PlacesExtraction; 9 import de.mpiwg.anteater.places.PlacesExtraction;
10 import de.mpiwg.anteater.results.IResultFinder; 10 import de.mpiwg.anteater.results.IResultFinder;
11 import de.mpiwg.anteater.results.SpeciesScientificResult; 11 import de.mpiwg.anteater.results.SpeciesScientificResult;
12 import de.mpiwg.anteater.species.NameFinder;
12 import de.mpiwg.anteater.species.common.CommonNameFindController; 13 import de.mpiwg.anteater.species.common.CommonNameFindController;
14 import de.mpiwg.anteater.species.common.impl.LinnaeusNameFinder;
13 import de.mpiwg.anteater.species.scientific.ScientificName; 15 import de.mpiwg.anteater.species.scientific.ScientificName;
14 import de.mpiwg.anteater.species.scientific.ScientificNamesExtraction; 16 import de.mpiwg.anteater.species.scientific.ScientificNamesExtraction;
15 import de.mpiwg.anteater.text.TextInformation; 17 import de.mpiwg.anteater.text.TextInformation;
16 18
17 public class ScientificNameResultFinder implements 19 public class ScientificNameResultFinder implements
19 21
20 @Override 22 @Override
21 public List<SpeciesScientificResult> getResults(List<TextInformation> infos) { 23 public List<SpeciesScientificResult> getResults(List<TextInformation> infos) {
22 List<SpeciesScientificResult> finalResults = new ArrayList<SpeciesScientificResult>(); 24 List<SpeciesScientificResult> finalResults = new ArrayList<SpeciesScientificResult>();
23 25
26 // for each text
24 for (TextInformation info : infos) { 27 for (TextInformation info : infos) {
25 28
29 // get species and places
26 List<ScientificNamesExtraction> resultsExtractions = info.getScientificNamesExtractions(); 30 List<ScientificNamesExtraction> resultsExtractions = info.getScientificNamesExtractions();
27 List<PlacesExtraction> placesExtractionFromText = info.getPlacesExtractions(); 31 List<PlacesExtraction> placesExtractionFromText = info.getPlacesExtractions();
28 32
29 33 // get species sorted by type of text (summary or supplementary information)
30 Map<Integer, Map<Integer, List<ScientificNamesExtraction>>> nameExtractionsByType = new HashMap<Integer, Map<Integer, List<ScientificNamesExtraction>>>(); 34 Map<Integer, Map<Integer, List<ScientificNamesExtraction>>> nameExtractionsByType = new HashMap<Integer, Map<Integer, List<ScientificNamesExtraction>>>();
31 for (ScientificNamesExtraction ex : resultsExtractions) { 35 for (ScientificNamesExtraction ex : resultsExtractions) {
32 Map<Integer, List<ScientificNamesExtraction>> extractionsMap = nameExtractionsByType.get(ex.getType()); 36 Map<Integer, List<ScientificNamesExtraction>> extractionsMap = nameExtractionsByType.get(ex.getType());
33 if (extractionsMap == null) 37 if (extractionsMap == null)
34 { 38 {
43 } 47 }
44 48
45 namesList.add(ex); 49 namesList.add(ex);
46 } 50 }
47 51
52 // get places sorted by type of text
48 Map<Integer, Map<Integer, List<PlacesExtraction>>> placeExtractionsByType = new HashMap<Integer, Map<Integer, List<PlacesExtraction>>>(); 53 Map<Integer, Map<Integer, List<PlacesExtraction>>> placeExtractionsByType = new HashMap<Integer, Map<Integer, List<PlacesExtraction>>>();
49 for (PlacesExtraction ex : placesExtractionFromText) { 54 for (PlacesExtraction ex : placesExtractionFromText) {
50 Map<Integer, List<PlacesExtraction>> extractionsMap = placeExtractionsByType.get(ex.getType()); 55 Map<Integer, List<PlacesExtraction>> extractionsMap = placeExtractionsByType.get(ex.getType());
51 if (extractionsMap == null) 56 if (extractionsMap == null)
52 { 57 {
61 } 66 }
62 67
63 placeList.add(ex); 68 placeList.add(ex);
64 } 69 }
65 70
71 // for each type
66 for (Integer type : nameExtractionsByType.keySet()) { 72 for (Integer type : nameExtractionsByType.keySet()) {
73 // for each text of this type
74 // if there are more than one text with this type
67 for (Integer textId : nameExtractionsByType.get(type).keySet()) { 75 for (Integer textId : nameExtractionsByType.get(type).keySet()) {
76 // get names and places
68 List<ScientificNamesExtraction> nameExtractions = nameExtractionsByType.get(type).get(textId); 77 List<ScientificNamesExtraction> nameExtractions = nameExtractionsByType.get(type).get(textId);
69 List<PlacesExtraction> placesExtractions = placeExtractionsByType.get(type).get(textId) != null ? placeExtractionsByType.get(type).get(textId) : new ArrayList<PlacesExtraction>(); 78 List<PlacesExtraction> placesExtractions = placeExtractionsByType.get(type) != null && placeExtractionsByType.get(type).get(textId) != null ? placeExtractionsByType.get(type).get(textId) : new ArrayList<PlacesExtraction>();
70 79
80 // for each place calculate which indices it takes
71 List<Integer> placeIndices = new ArrayList<Integer>(); 81 List<Integer> placeIndices = new ArrayList<Integer>();
72 82
73 for (PlacesExtraction placeExtraction : placesExtractions) { 83 for (PlacesExtraction placeExtraction : placesExtractions) {
74 List<PlaceInformation> places = placeExtraction.getPlaceInformation(); 84 List<PlaceInformation> places = placeExtraction.getPlaceInformation();
75 85
82 placeIndices.add(i); 92 placeIndices.add(i);
83 } 93 }
84 } 94 }
85 } 95 }
86 96
97 // for each species calculate its range
87 List<Range> distinct = new ArrayList<Range>(); 98 List<Range> distinct = new ArrayList<Range>();
88 99
89 for (ScientificNamesExtraction nameExtraction : nameExtractions) { 100 for (ScientificNamesExtraction nameExtraction : nameExtractions) {
90 List<ScientificName> names = nameExtraction.getNames(); 101 List<ScientificName> names = nameExtraction.getNames();
91 102
94 // go through all distinct names 105 // go through all distinct names
95 for (Range r : distinct) { 106 for (Range r : distinct) {
96 // if there is already one that occupies range of current name 107 // if there is already one that occupies range of current name
97 if (r.start <= name.getStart() && r.end >= name.getStart() + name.getLength()) { 108 if (r.start <= name.getStart() && r.end >= name.getStart() + name.getLength()) {
98 // take common species results 109 // take common species results
99 if (nameExtraction.getFoundBy() == CommonNameFindController.class) { 110 if (nameExtraction.getFoundBy() == NameFinder.LINNAEUS) {
100 r.start = name.getStart(); 111 r.start = name.getStart();
101 r.end = name.getStart() + name.getLength(); 112 r.end = name.getStart() + name.getLength();
102 r.name = name; 113 r.name = name;
103 } 114 }
104 found = name; 115 found = name;
111 distinct.add(new Range(name.getStart(), name.getStart() + name.getLength(), name, nameExtraction)); 122 distinct.add(new Range(name.getStart(), name.getStart() + name.getLength(), name, nameExtraction));
112 } 123 }
113 } 124 }
114 } 125 }
115 126
127 // check each species range if it is colliding with a place if yes don't keep it
116 for (Range range : distinct) { 128 for (Range range : distinct) {
117 ScientificName name = range.name; 129 ScientificName name = range.name;
118 if (!placeIndices.contains(name.getStart())) 130 if (!placeIndices.contains(name.getStart()))
119 { 131 {
120 SpeciesScientificResult finalResult = new SpeciesScientificResult(); 132 SpeciesScientificResult finalResult = new SpeciesScientificResult();