Mercurial > hg > anteater
diff src/de/mpiwg/anteater/results/impl/ScientificNameResultFinder.java @ 9:51ed79e28b45
annotate texts with results and build events with linnaeus
author | jdamerow |
---|---|
date | Mon, 19 Nov 2012 16:36:15 -0700 |
parents | 0c7cf517ff2d |
children |
line wrap: on
line diff
--- a/src/de/mpiwg/anteater/results/impl/ScientificNameResultFinder.java Fri Nov 09 16:12:01 2012 -0700 +++ b/src/de/mpiwg/anteater/results/impl/ScientificNameResultFinder.java Mon Nov 19 16:36:15 2012 -0700 @@ -9,7 +9,9 @@ import de.mpiwg.anteater.places.PlacesExtraction; import de.mpiwg.anteater.results.IResultFinder; import de.mpiwg.anteater.results.SpeciesScientificResult; +import de.mpiwg.anteater.species.NameFinder; import de.mpiwg.anteater.species.common.CommonNameFindController; +import de.mpiwg.anteater.species.common.impl.LinnaeusNameFinder; import de.mpiwg.anteater.species.scientific.ScientificName; import de.mpiwg.anteater.species.scientific.ScientificNamesExtraction; import de.mpiwg.anteater.text.TextInformation; @@ -21,12 +23,14 @@ public List<SpeciesScientificResult> getResults(List<TextInformation> infos) { List<SpeciesScientificResult> finalResults = new ArrayList<SpeciesScientificResult>(); + // for each text for (TextInformation info : infos) { + // get species and places List<ScientificNamesExtraction> resultsExtractions = info.getScientificNamesExtractions(); List<PlacesExtraction> placesExtractionFromText = info.getPlacesExtractions(); - + // get species sorted by type of text (summary or supplementary information) Map<Integer, Map<Integer, List<ScientificNamesExtraction>>> nameExtractionsByType = new HashMap<Integer, Map<Integer, List<ScientificNamesExtraction>>>(); for (ScientificNamesExtraction ex : resultsExtractions) { Map<Integer, List<ScientificNamesExtraction>> extractionsMap = nameExtractionsByType.get(ex.getType()); @@ -45,6 +49,7 @@ namesList.add(ex); } + // get places sorted by type of text Map<Integer, Map<Integer, List<PlacesExtraction>>> placeExtractionsByType = new HashMap<Integer, Map<Integer, List<PlacesExtraction>>>(); for (PlacesExtraction ex : placesExtractionFromText) { Map<Integer, List<PlacesExtraction>> extractionsMap = placeExtractionsByType.get(ex.getType()); @@ -63,11 +68,16 @@ placeList.add(ex); } + // for each type for (Integer type : nameExtractionsByType.keySet()) { + // for each text of this type + // if there are more than one text with this type for (Integer textId : nameExtractionsByType.get(type).keySet()) { + // get names and places List<ScientificNamesExtraction> nameExtractions = nameExtractionsByType.get(type).get(textId); - List<PlacesExtraction> placesExtractions = placeExtractionsByType.get(type).get(textId) != null ? placeExtractionsByType.get(type).get(textId) : new ArrayList<PlacesExtraction>(); + List<PlacesExtraction> placesExtractions = placeExtractionsByType.get(type) != null && placeExtractionsByType.get(type).get(textId) != null ? placeExtractionsByType.get(type).get(textId) : new ArrayList<PlacesExtraction>(); + // for each place calculate which indices it takes List<Integer> placeIndices = new ArrayList<Integer>(); for (PlacesExtraction placeExtraction : placesExtractions) { @@ -84,6 +94,7 @@ } } + // for each species calculate its range List<Range> distinct = new ArrayList<Range>(); for (ScientificNamesExtraction nameExtraction : nameExtractions) { @@ -96,7 +107,7 @@ // if there is already one that occupies range of current name if (r.start <= name.getStart() && r.end >= name.getStart() + name.getLength()) { // take common species results - if (nameExtraction.getFoundBy() == CommonNameFindController.class) { + if (nameExtraction.getFoundBy() == NameFinder.LINNAEUS) { r.start = name.getStart(); r.end = name.getStart() + name.getLength(); r.name = name; @@ -113,6 +124,7 @@ } } + // check each species range if it is colliding with a place if yes don't keep it for (Range range : distinct) { ScientificName name = range.name; if (!placeIndices.contains(name.getStart()))