Mercurial > hg > anteater
view src/de/mpiwg/anteater/species/common/CommonNameFindController.java @ 4:dcc35f89dce3
include linneaus findings
author | jdamerow |
---|---|
date | Thu, 25 Oct 2012 15:25:08 -0700 |
parents | ae96e4bc7fb2 |
children | 51ed79e28b45 |
line wrap: on
line source
package de.mpiwg.anteater.species.common; import java.io.File; import java.util.ArrayList; import java.util.List; import de.mpiwg.anteater.AnteaterConfiguration; import de.mpiwg.anteater.species.common.impl.LinnaeusNameFinder; import de.mpiwg.anteater.species.scientific.ScientificName; import de.mpiwg.anteater.species.scientific.ScientificNamesExtraction; import de.mpiwg.anteater.text.TextInformation; import de.mpiwg.anteater.text.TextPart; import de.mpiwg.anteater.text.TextType; import de.mpiwg.anteater.xml.ICommonNameFinderParser; import de.mpiwg.anteater.xml.impl.AnalysisXMLManager; import de.mpiwg.anteater.xml.impl.LinnaeusParser; public class CommonNameFindController { public final static String COMPONENT_NAME = CommonNameFindController.class.getSimpleName(); private AnteaterConfiguration configuration; public CommonNameFindController(AnteaterConfiguration configuration) { this.configuration = configuration; } public List<ScientificNamesExtraction> findCommonNamesInXML(TextInformation info) { List<ScientificNamesExtraction> results = new ArrayList<ScientificNamesExtraction>(); List<String> summaryAnalysisResults = new ArrayList<String>(); List<String> supplinfAnalysisResults = new ArrayList<String>(); // check if there are already stored results AnalysisXMLManager analysisManager = null; if (configuration.getAnalysisPath() != null && !configuration.getAnalysisPath().isEmpty()) { File file = new File(info.getFilepath()); analysisManager = new AnalysisXMLManager(configuration.getAnalysisPath() + File.separator + file.getName()); configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for species names in summaries..."); summaryAnalysisResults = analysisManager.getSummaryCommonNamesResults(); configuration.getLogger().logMessage("found " + summaryAnalysisResults.size() + " result(s)."); configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for species names in supplementary information..."); supplinfAnalysisResults = analysisManager.getSupplementaryInfoCommonNamesResults(); configuration.getLogger().logMessage("found " + supplinfAnalysisResults.size() + " result(s)."); } ICommonNameFinder nameFinder = new LinnaeusNameFinder(configuration.getLogger()); // if there are no results for summaries, ask GNRD name finding service. if (summaryAnalysisResults.size() == 0) { configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for summaries, so will ask LinnaeusNameFinder."); for (TextPart sum : info.getSummaries()) { String sumResult = nameFinder.findCommonNames(sum.getText()); if (sumResult != null) { summaryAnalysisResults.add(sumResult); // if there is an analysis folder, add result to analysis file if (analysisManager != null) { analysisManager.ensureCommonNamesStrucuture(); analysisManager.addSummaryCommonNamesResult(sumResult); } } } } // if there are no results for supplementary information, ask GNRD name fining service if (supplinfAnalysisResults.size() == 0) { configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for supplementary information, so will ask LinnaeusNameFinder."); for (TextPart sInf : info.getSupplInfos()) { String supinfResult = nameFinder.findCommonNames(sInf.getText()); if (supinfResult != null) { supplinfAnalysisResults.add(supinfResult); // if there is an analysis folder, add result to analysis file if (analysisManager != null) { analysisManager.ensureCommonNamesStrucuture(); analysisManager.addSupplInfCommonNamesResult(supinfResult); } } } } configuration.getLogger().logMessage(COMPONENT_NAME, "Creating analysis results..."); int idx = 0; for (String summaryResult : summaryAnalysisResults) { ICommonNameFinderParser nameParser = new LinnaeusParser(summaryResult); List<ScientificName> speciesNames = nameParser.parseSpeciesNames(); ScientificNamesExtraction speciesNameResult = new ScientificNamesExtraction(); speciesNameResult.setType(TextType.TYPE_SUMMARY); speciesNameResult.setNames(speciesNames); speciesNameResult.setTextIdx(idx); speciesNameResult.setFoundBy(this.getClass()); results.add(speciesNameResult); idx++; } idx = 0; for (String suplinfResult : supplinfAnalysisResults) { ICommonNameFinderParser nameParser = new LinnaeusParser(suplinfResult); List<ScientificName> scientificNames = nameParser.parseSpeciesNames(); ScientificNamesExtraction scientificNameResult = new ScientificNamesExtraction(); scientificNameResult.setType(TextType.TYPE_SUPLINF); scientificNameResult.setNames(scientificNames); scientificNameResult.setTextIdx(idx); scientificNameResult.setFoundBy(this.getClass()); results.add(scientificNameResult); idx++; } return results; } }