Mercurial > hg > anteater
diff src/de/mpiwg/anteater/species/scientific/ScientificNameFindController.java @ 0:036535fcd179
anteater
author | jdamerow |
---|---|
date | Fri, 14 Sep 2012 10:30:43 +0200 |
parents | |
children | dcc35f89dce3 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/anteater/species/scientific/ScientificNameFindController.java Fri Sep 14 10:30:43 2012 +0200 @@ -0,0 +1,118 @@ +package de.mpiwg.anteater.species.scientific; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +import de.mpiwg.anteater.AnteaterConfiguration; +import de.mpiwg.anteater.species.scientific.impl.GNRDNameFinder; +import de.mpiwg.anteater.text.TextInformation; +import de.mpiwg.anteater.text.TextPart; +import de.mpiwg.anteater.text.TextType; +import de.mpiwg.anteater.xml.INameFinderParser; +import de.mpiwg.anteater.xml.impl.AnalysisXMLManager; +import de.mpiwg.anteater.xml.impl.GNRDXMLParser; + +public class ScientificNameFindController { + public final static String COMPONENT_NAME = ScientificNameFindController.class.getSimpleName(); + + private AnteaterConfiguration configuration; + + public ScientificNameFindController(AnteaterConfiguration configuration) { + this.configuration = configuration; + } + + /** + * Method for retrieving scientific names in a XML document. It tooks all summaries and all + * suplementary informations and looks for names in them. + * @param file + * @return + */ + public List<ScientificNamesExtraction> findScientificNamesInXML(TextInformation info) { + + List<ScientificNamesExtraction> results = new ArrayList<ScientificNamesExtraction>(); + List<String> summaryAnalysisResults = new ArrayList<String>(); + List<String> supplinfAnalysisResults = new ArrayList<String>(); + + // check if there are already stored results + AnalysisXMLManager analysisManager = null; + if (configuration.getAnalysisPath() != null && !configuration.getAnalysisPath().isEmpty()) { + File file = new File(info.getFilepath()); + + analysisManager = new AnalysisXMLManager(configuration.getAnalysisPath() + File.separator + file.getName()); + + configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for scientific names in summaries..."); + summaryAnalysisResults = analysisManager.getSummaryNamesResults(); + configuration.getLogger().logMessage("found " + summaryAnalysisResults.size() + " result(s)."); + + configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for scientific names in supplementary information..."); + supplinfAnalysisResults = analysisManager.getSupplementaryInfoNamesResults(); + configuration.getLogger().logMessage("found " + supplinfAnalysisResults.size() + " result(s)."); + } + + IScientificNamesFinder nameFinder = new GNRDNameFinder(configuration.getLogger()); + + // if there are no results for summaries, ask GNRD name finding service. + if (summaryAnalysisResults.size() == 0) { + configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for summaries, so will ask GNRDNameFinder."); + + for (TextPart sum : info.getSummaries()) { + String sumResult = nameFinder.findScientificNames(sum.getText()); + if (sumResult != null) { + summaryAnalysisResults.add(sumResult); + + // if there is an analysis folder, add result to analysis file + if (analysisManager != null) + analysisManager.addSummaryNamesResult(sumResult); + } + } + } + + // if there are no results for supplementary information, ask GNRD name fining service + if (supplinfAnalysisResults.size() == 0) { + configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for supplementary information, so will ask GNRDNameFinder."); + + for (TextPart sInf : info.getSupplInfos()) { + String supinfResult = nameFinder.findScientificNames(sInf.getText()); + if (supinfResult != null) { + supplinfAnalysisResults.add(supinfResult); + + // if there is an analysis folder, add result to analysis file + if (analysisManager != null) + analysisManager.addSupplInfNamesResult(supinfResult); + } + } + } + + configuration.getLogger().logMessage(COMPONENT_NAME, "Creating analysis results..."); + int idx = 0; + for (String summaryResult : summaryAnalysisResults) { + INameFinderParser nameParser = new GNRDXMLParser(summaryResult); + + List<ScientificName> scientificNames = nameParser.parseScientificNames(); + ScientificNamesExtraction scientificNameResult = new ScientificNamesExtraction(); + scientificNameResult.setType(TextType.TYPE_SUMMARY); + scientificNameResult.setNames(scientificNames); + scientificNameResult.setTextIdx(idx); + + results.add(scientificNameResult); + idx++; + } + + idx = 0; + for (String suplinfResult : supplinfAnalysisResults) { + INameFinderParser nameParser = new GNRDXMLParser(suplinfResult); + + List<ScientificName> scientificNames = nameParser.parseScientificNames(); + ScientificNamesExtraction scientificNameResult = new ScientificNamesExtraction(); + scientificNameResult.setType(TextType.TYPE_SUPLINF); + scientificNameResult.setNames(scientificNames); + scientificNameResult.setTextIdx(idx); + + results.add(scientificNameResult); + idx++; + } + + return results; + } +}