view src/de/mpiwg/anteater/results/impl/ScientificNameResultFinder.java @ 9:51ed79e28b45

annotate texts with results and build events with linnaeus
author jdamerow
date Mon, 19 Nov 2012 16:36:15 -0700
parents 0c7cf517ff2d
children
line wrap: on
line source

package de.mpiwg.anteater.results.impl;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import de.mpiwg.anteater.places.PlaceInformation;
import de.mpiwg.anteater.places.PlacesExtraction;
import de.mpiwg.anteater.results.IResultFinder;
import de.mpiwg.anteater.results.SpeciesScientificResult;
import de.mpiwg.anteater.species.NameFinder;
import de.mpiwg.anteater.species.common.CommonNameFindController;
import de.mpiwg.anteater.species.common.impl.LinnaeusNameFinder;
import de.mpiwg.anteater.species.scientific.ScientificName;
import de.mpiwg.anteater.species.scientific.ScientificNamesExtraction;
import de.mpiwg.anteater.text.TextInformation;

public class ScientificNameResultFinder implements
		IResultFinder<SpeciesScientificResult> {

	@Override
	public List<SpeciesScientificResult> getResults(List<TextInformation> infos) {
		List<SpeciesScientificResult> finalResults = new ArrayList<SpeciesScientificResult>();
		
		// for each text
		for (TextInformation info : infos) {
			
			// get species and places
			List<ScientificNamesExtraction> resultsExtractions = info.getScientificNamesExtractions();
			List<PlacesExtraction> placesExtractionFromText = info.getPlacesExtractions();
			
			// get species sorted by type of text (summary or supplementary information)
			Map<Integer, Map<Integer, List<ScientificNamesExtraction>>> nameExtractionsByType = new HashMap<Integer, Map<Integer, List<ScientificNamesExtraction>>>();
			for (ScientificNamesExtraction ex : resultsExtractions) {
				Map<Integer, List<ScientificNamesExtraction>> extractionsMap = nameExtractionsByType.get(ex.getType());
				if (extractionsMap == null) 
				{
					extractionsMap = new HashMap<Integer, List<ScientificNamesExtraction>>();
					nameExtractionsByType.put(ex.getType(), extractionsMap);
				}
				
				List<ScientificNamesExtraction> namesList = extractionsMap.get(ex.getTextIdx());
				if (namesList == null) {
					namesList = new ArrayList<ScientificNamesExtraction>();
					extractionsMap.put(ex.getTextIdx(), namesList);
				}
				
				namesList.add(ex);
			}
			
			// get places sorted by type of text
			Map<Integer, Map<Integer, List<PlacesExtraction>>> placeExtractionsByType = new HashMap<Integer, Map<Integer, List<PlacesExtraction>>>();
			for (PlacesExtraction ex : placesExtractionFromText) {
				Map<Integer, List<PlacesExtraction>> extractionsMap = placeExtractionsByType.get(ex.getType());
				if (extractionsMap == null) 
				{
					extractionsMap = new HashMap<Integer, List<PlacesExtraction>>();
					placeExtractionsByType.put(ex.getType(), extractionsMap);
				}
				
				List<PlacesExtraction> placeList = extractionsMap.get(ex.getTextIdx());
				if (placeList == null) {
					placeList = new ArrayList<PlacesExtraction>();
					extractionsMap.put(ex.getTextIdx(), placeList);
				}
				
				placeList.add(ex);
			}
			
			// for each type
			for (Integer type : nameExtractionsByType.keySet()) {
				// for each text of this type
				// if there are more than one text with this type
				for (Integer textId : nameExtractionsByType.get(type).keySet()) {
					// get names and places
					List<ScientificNamesExtraction> nameExtractions = nameExtractionsByType.get(type).get(textId);
					List<PlacesExtraction> placesExtractions = placeExtractionsByType.get(type) != null && placeExtractionsByType.get(type).get(textId) != null ? placeExtractionsByType.get(type).get(textId) : new ArrayList<PlacesExtraction>();
					
					// for each place calculate which indices it takes
					List<Integer> placeIndices = new ArrayList<Integer>();
					
					for (PlacesExtraction placeExtraction : placesExtractions) {
						List<PlaceInformation> places = placeExtraction.getPlaceInformation();
						
						// put all indices where there is a place according to placemaker
						// into list
						for (PlaceInformation place : places) {
							int start = place.getStart();
							int end = place.getStart() + place.getLength();
							for (int i = start; i < end; i++) {
								placeIndices.add(i);
							}
						}
					}
					
					// for each species calculate its range
					List<Range> distinct = new ArrayList<Range>();
							
					for (ScientificNamesExtraction nameExtraction : nameExtractions) {
						List<ScientificName> names = nameExtraction.getNames();
						
						for (ScientificName name : names) {
							ScientificName found = null;
							// go through all distinct names
							for (Range r : distinct) {
								// if there is already one that occupies range of current name
								if (r.start <= name.getStart() && r.end >= name.getStart() + name.getLength()) {
									// take common species results
									if (nameExtraction.getFoundBy() == NameFinder.LINNAEUS) {
										r.start = name.getStart();
										r.end = name.getStart() + name.getLength();
										r.name = name;
									}
									found = name;
									break;
								}
							}
							
							// if there was no name in list of distinct names found, add range
							if (found == null) {
								distinct.add(new Range(name.getStart(), name.getStart() + name.getLength(), name, nameExtraction));
							}
						}
					}
					
					// check each species range if it is colliding with a place if yes don't keep it
					for (Range range : distinct) {
						ScientificName name = range.name;
						if (!placeIndices.contains(name.getStart()))
						{
							SpeciesScientificResult finalResult = new SpeciesScientificResult();
							finalResult.setFinding(name);
							finalResult.setPrediction(1.0);
							finalResult.setResult(range.extraction);
							finalResult.setTextInfo(info);
							finalResults.add(finalResult);
						}
					}
				}
			}
		}
		return finalResults; 
	}

	class Range {
		public int start;
		public int end;
		public ScientificName name;
		public ScientificNamesExtraction extraction;
		
		public Range(int start, int end, ScientificName name, ScientificNamesExtraction extraction) {
			super();
			this.start = start;
			this.end = end;
			this.name = name;
			this.extraction = extraction;
		}
		
		
	}
}