view src/de/mpiwg/anteater/species/scientific/ScientificNameFindController.java @ 4:dcc35f89dce3

include linneaus findings
author jdamerow
date Thu, 25 Oct 2012 15:25:08 -0700
parents 036535fcd179
children 51ed79e28b45
line wrap: on
line source

package de.mpiwg.anteater.species.scientific;

import java.io.File;
import java.util.ArrayList;
import java.util.List;

import de.mpiwg.anteater.AnteaterConfiguration;
import de.mpiwg.anteater.species.scientific.impl.GNRDNameFinder;
import de.mpiwg.anteater.text.TextInformation;
import de.mpiwg.anteater.text.TextPart;
import de.mpiwg.anteater.text.TextType;
import de.mpiwg.anteater.xml.INameFinderParser;
import de.mpiwg.anteater.xml.impl.AnalysisXMLManager;
import de.mpiwg.anteater.xml.impl.GNRDXMLParser;

public class ScientificNameFindController {
	public final static String COMPONENT_NAME = ScientificNameFindController.class.getSimpleName();
	
	private AnteaterConfiguration configuration;

	public ScientificNameFindController(AnteaterConfiguration configuration) {
		this.configuration = configuration;
	}

	/**
	 * Method for retrieving scientific names in a XML document. It tooks all summaries and all
	 * suplementary informations and looks for names in them.
	 * @param file
	 * @return
	 */
	public List<ScientificNamesExtraction> findScientificNamesInXML(TextInformation info) {
		
		List<ScientificNamesExtraction> results = new ArrayList<ScientificNamesExtraction>();
		List<String> summaryAnalysisResults = new ArrayList<String>();
		List<String> supplinfAnalysisResults = new ArrayList<String>();
		
		// check if there are already stored results
		AnalysisXMLManager analysisManager = null;
		if (configuration.getAnalysisPath() != null && !configuration.getAnalysisPath().isEmpty()) {
			File file = new File(info.getFilepath());
			
			analysisManager = new AnalysisXMLManager(configuration.getAnalysisPath() + File.separator +  file.getName());
			
			configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for scientific names in summaries...");
			summaryAnalysisResults = analysisManager.getSummaryNamesResults();
			configuration.getLogger().logMessage("found " + summaryAnalysisResults.size() + " result(s).");
		
			configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for scientific names in supplementary information...");
			supplinfAnalysisResults = analysisManager.getSupplementaryInfoNamesResults();
			configuration.getLogger().logMessage("found " + supplinfAnalysisResults.size() + " result(s).");
		}
		
		IScientificNamesFinder nameFinder = new GNRDNameFinder(configuration.getLogger());
		
		// if there are no results for summaries, ask GNRD name finding service.
		if (summaryAnalysisResults.size() == 0) {
			configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for summaries, so will ask GNRDNameFinder.");
						
			for (TextPart sum : info.getSummaries()) {
				String sumResult = nameFinder.findScientificNames(sum.getText());
				if (sumResult != null) {
					summaryAnalysisResults.add(sumResult);
					
					// if there is an analysis folder, add result to analysis file
					if (analysisManager != null)
						analysisManager.addSummaryNamesResult(sumResult);
				}
			}
		}
		
		// if there are no results for supplementary information, ask GNRD name fining service
		if (supplinfAnalysisResults.size() == 0) {
			configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for supplementary information, so will ask GNRDNameFinder.");
			
			for (TextPart sInf : info.getSupplInfos()) {
				String supinfResult = nameFinder.findScientificNames(sInf.getText());
				if (supinfResult != null) {
					supplinfAnalysisResults.add(supinfResult);
					
					// if there is an analysis folder, add result to analysis file
					if (analysisManager != null)
						analysisManager.addSupplInfNamesResult(supinfResult);
				}
			}
		}
		
		configuration.getLogger().logMessage(COMPONENT_NAME, "Creating analysis results...");
		int idx = 0;
		for (String summaryResult : summaryAnalysisResults) {
			INameFinderParser nameParser = new GNRDXMLParser(summaryResult);
			
			List<ScientificName> scientificNames = nameParser.parseScientificNames();
			ScientificNamesExtraction scientificNameResult = new ScientificNamesExtraction();
			scientificNameResult.setType(TextType.TYPE_SUMMARY);
			scientificNameResult.setNames(scientificNames);
			scientificNameResult.setTextIdx(idx);
			scientificNameResult.setFoundBy(this.getClass());
			
			results.add(scientificNameResult);
			idx++;
		}
		
		idx = 0;
		for (String suplinfResult : supplinfAnalysisResults) {
			INameFinderParser nameParser = new GNRDXMLParser(suplinfResult);
			
			List<ScientificName> scientificNames = nameParser.parseScientificNames();
			ScientificNamesExtraction scientificNameResult = new ScientificNamesExtraction();
			scientificNameResult.setType(TextType.TYPE_SUPLINF);
			scientificNameResult.setNames(scientificNames);
			scientificNameResult.setTextIdx(idx);
			scientificNameResult.setFoundBy(this.getClass());
			
			results.add(scientificNameResult);
			idx++;
		}
		
		return results;
	}
}