Mercurial > hg > anteater
view src/de/mpiwg/anteater/results/impl/ScientificNameResultFinder.java @ 9:51ed79e28b45
annotate texts with results and build events with linnaeus
author | jdamerow |
---|---|
date | Mon, 19 Nov 2012 16:36:15 -0700 |
parents | 0c7cf517ff2d |
children |
line wrap: on
line source
package de.mpiwg.anteater.results.impl; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import de.mpiwg.anteater.places.PlaceInformation; import de.mpiwg.anteater.places.PlacesExtraction; import de.mpiwg.anteater.results.IResultFinder; import de.mpiwg.anteater.results.SpeciesScientificResult; import de.mpiwg.anteater.species.NameFinder; import de.mpiwg.anteater.species.common.CommonNameFindController; import de.mpiwg.anteater.species.common.impl.LinnaeusNameFinder; import de.mpiwg.anteater.species.scientific.ScientificName; import de.mpiwg.anteater.species.scientific.ScientificNamesExtraction; import de.mpiwg.anteater.text.TextInformation; public class ScientificNameResultFinder implements IResultFinder<SpeciesScientificResult> { @Override public List<SpeciesScientificResult> getResults(List<TextInformation> infos) { List<SpeciesScientificResult> finalResults = new ArrayList<SpeciesScientificResult>(); // for each text for (TextInformation info : infos) { // get species and places List<ScientificNamesExtraction> resultsExtractions = info.getScientificNamesExtractions(); List<PlacesExtraction> placesExtractionFromText = info.getPlacesExtractions(); // get species sorted by type of text (summary or supplementary information) Map<Integer, Map<Integer, List<ScientificNamesExtraction>>> nameExtractionsByType = new HashMap<Integer, Map<Integer, List<ScientificNamesExtraction>>>(); for (ScientificNamesExtraction ex : resultsExtractions) { Map<Integer, List<ScientificNamesExtraction>> extractionsMap = nameExtractionsByType.get(ex.getType()); if (extractionsMap == null) { extractionsMap = new HashMap<Integer, List<ScientificNamesExtraction>>(); nameExtractionsByType.put(ex.getType(), extractionsMap); } List<ScientificNamesExtraction> namesList = extractionsMap.get(ex.getTextIdx()); if (namesList == null) { namesList = new ArrayList<ScientificNamesExtraction>(); extractionsMap.put(ex.getTextIdx(), namesList); } namesList.add(ex); } // get places sorted by type of text Map<Integer, Map<Integer, List<PlacesExtraction>>> placeExtractionsByType = new HashMap<Integer, Map<Integer, List<PlacesExtraction>>>(); for (PlacesExtraction ex : placesExtractionFromText) { Map<Integer, List<PlacesExtraction>> extractionsMap = placeExtractionsByType.get(ex.getType()); if (extractionsMap == null) { extractionsMap = new HashMap<Integer, List<PlacesExtraction>>(); placeExtractionsByType.put(ex.getType(), extractionsMap); } List<PlacesExtraction> placeList = extractionsMap.get(ex.getTextIdx()); if (placeList == null) { placeList = new ArrayList<PlacesExtraction>(); extractionsMap.put(ex.getTextIdx(), placeList); } placeList.add(ex); } // for each type for (Integer type : nameExtractionsByType.keySet()) { // for each text of this type // if there are more than one text with this type for (Integer textId : nameExtractionsByType.get(type).keySet()) { // get names and places List<ScientificNamesExtraction> nameExtractions = nameExtractionsByType.get(type).get(textId); List<PlacesExtraction> placesExtractions = placeExtractionsByType.get(type) != null && placeExtractionsByType.get(type).get(textId) != null ? placeExtractionsByType.get(type).get(textId) : new ArrayList<PlacesExtraction>(); // for each place calculate which indices it takes List<Integer> placeIndices = new ArrayList<Integer>(); for (PlacesExtraction placeExtraction : placesExtractions) { List<PlaceInformation> places = placeExtraction.getPlaceInformation(); // put all indices where there is a place according to placemaker // into list for (PlaceInformation place : places) { int start = place.getStart(); int end = place.getStart() + place.getLength(); for (int i = start; i < end; i++) { placeIndices.add(i); } } } // for each species calculate its range List<Range> distinct = new ArrayList<Range>(); for (ScientificNamesExtraction nameExtraction : nameExtractions) { List<ScientificName> names = nameExtraction.getNames(); for (ScientificName name : names) { ScientificName found = null; // go through all distinct names for (Range r : distinct) { // if there is already one that occupies range of current name if (r.start <= name.getStart() && r.end >= name.getStart() + name.getLength()) { // take common species results if (nameExtraction.getFoundBy() == NameFinder.LINNAEUS) { r.start = name.getStart(); r.end = name.getStart() + name.getLength(); r.name = name; } found = name; break; } } // if there was no name in list of distinct names found, add range if (found == null) { distinct.add(new Range(name.getStart(), name.getStart() + name.getLength(), name, nameExtraction)); } } } // check each species range if it is colliding with a place if yes don't keep it for (Range range : distinct) { ScientificName name = range.name; if (!placeIndices.contains(name.getStart())) { SpeciesScientificResult finalResult = new SpeciesScientificResult(); finalResult.setFinding(name); finalResult.setPrediction(1.0); finalResult.setResult(range.extraction); finalResult.setTextInfo(info); finalResults.add(finalResult); } } } } } return finalResults; } class Range { public int start; public int end; public ScientificName name; public ScientificNamesExtraction extraction; public Range(int start, int end, ScientificName name, ScientificNamesExtraction extraction) { super(); this.start = start; this.end = end; this.name = name; this.extraction = extraction; } } }