diff src/de/mpiwg/anteater/species/scientific/ScientificNameFindController.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children dcc35f89dce3
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/anteater/species/scientific/ScientificNameFindController.java	Fri Sep 14 10:30:43 2012 +0200
@@ -0,0 +1,118 @@
+package de.mpiwg.anteater.species.scientific;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+import de.mpiwg.anteater.AnteaterConfiguration;
+import de.mpiwg.anteater.species.scientific.impl.GNRDNameFinder;
+import de.mpiwg.anteater.text.TextInformation;
+import de.mpiwg.anteater.text.TextPart;
+import de.mpiwg.anteater.text.TextType;
+import de.mpiwg.anteater.xml.INameFinderParser;
+import de.mpiwg.anteater.xml.impl.AnalysisXMLManager;
+import de.mpiwg.anteater.xml.impl.GNRDXMLParser;
+
+public class ScientificNameFindController {
+	public final static String COMPONENT_NAME = ScientificNameFindController.class.getSimpleName();
+	
+	private AnteaterConfiguration configuration;
+
+	public ScientificNameFindController(AnteaterConfiguration configuration) {
+		this.configuration = configuration;
+	}
+
+	/**
+	 * Method for retrieving scientific names in a XML document. It tooks all summaries and all
+	 * suplementary informations and looks for names in them.
+	 * @param file
+	 * @return
+	 */
+	public List<ScientificNamesExtraction> findScientificNamesInXML(TextInformation info) {
+		
+		List<ScientificNamesExtraction> results = new ArrayList<ScientificNamesExtraction>();
+		List<String> summaryAnalysisResults = new ArrayList<String>();
+		List<String> supplinfAnalysisResults = new ArrayList<String>();
+		
+		// check if there are already stored results
+		AnalysisXMLManager analysisManager = null;
+		if (configuration.getAnalysisPath() != null && !configuration.getAnalysisPath().isEmpty()) {
+			File file = new File(info.getFilepath());
+			
+			analysisManager = new AnalysisXMLManager(configuration.getAnalysisPath() + File.separator +  file.getName());
+			
+			configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for scientific names in summaries...");
+			summaryAnalysisResults = analysisManager.getSummaryNamesResults();
+			configuration.getLogger().logMessage("found " + summaryAnalysisResults.size() + " result(s).");
+		
+			configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for scientific names in supplementary information...");
+			supplinfAnalysisResults = analysisManager.getSupplementaryInfoNamesResults();
+			configuration.getLogger().logMessage("found " + supplinfAnalysisResults.size() + " result(s).");
+		}
+		
+		IScientificNamesFinder nameFinder = new GNRDNameFinder(configuration.getLogger());
+		
+		// if there are no results for summaries, ask GNRD name finding service.
+		if (summaryAnalysisResults.size() == 0) {
+			configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for summaries, so will ask GNRDNameFinder.");
+						
+			for (TextPart sum : info.getSummaries()) {
+				String sumResult = nameFinder.findScientificNames(sum.getText());
+				if (sumResult != null) {
+					summaryAnalysisResults.add(sumResult);
+					
+					// if there is an analysis folder, add result to analysis file
+					if (analysisManager != null)
+						analysisManager.addSummaryNamesResult(sumResult);
+				}
+			}
+		}
+		
+		// if there are no results for supplementary information, ask GNRD name fining service
+		if (supplinfAnalysisResults.size() == 0) {
+			configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for supplementary information, so will ask GNRDNameFinder.");
+			
+			for (TextPart sInf : info.getSupplInfos()) {
+				String supinfResult = nameFinder.findScientificNames(sInf.getText());
+				if (supinfResult != null) {
+					supplinfAnalysisResults.add(supinfResult);
+					
+					// if there is an analysis folder, add result to analysis file
+					if (analysisManager != null)
+						analysisManager.addSupplInfNamesResult(supinfResult);
+				}
+			}
+		}
+		
+		configuration.getLogger().logMessage(COMPONENT_NAME, "Creating analysis results...");
+		int idx = 0;
+		for (String summaryResult : summaryAnalysisResults) {
+			INameFinderParser nameParser = new GNRDXMLParser(summaryResult);
+			
+			List<ScientificName> scientificNames = nameParser.parseScientificNames();
+			ScientificNamesExtraction scientificNameResult = new ScientificNamesExtraction();
+			scientificNameResult.setType(TextType.TYPE_SUMMARY);
+			scientificNameResult.setNames(scientificNames);
+			scientificNameResult.setTextIdx(idx);
+			
+			results.add(scientificNameResult);
+			idx++;
+		}
+		
+		idx = 0;
+		for (String suplinfResult : supplinfAnalysisResults) {
+			INameFinderParser nameParser = new GNRDXMLParser(suplinfResult);
+			
+			List<ScientificName> scientificNames = nameParser.parseScientificNames();
+			ScientificNamesExtraction scientificNameResult = new ScientificNamesExtraction();
+			scientificNameResult.setType(TextType.TYPE_SUPLINF);
+			scientificNameResult.setNames(scientificNames);
+			scientificNameResult.setTextIdx(idx);
+			
+			results.add(scientificNameResult);
+			idx++;
+		}
+		
+		return results;
+	}
+}