Mercurial > hg > anteater
diff src/de/mpiwg/anteater/persons/PersonFinderController.java @ 0:036535fcd179
anteater
author | jdamerow |
---|---|
date | Fri, 14 Sep 2012 10:30:43 +0200 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/anteater/persons/PersonFinderController.java Fri Sep 14 10:30:43 2012 +0200 @@ -0,0 +1,113 @@ +package de.mpiwg.anteater.persons; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +import de.mpiwg.anteater.AnteaterConfiguration; +import de.mpiwg.anteater.persons.impl.StanfordNLPPersonFinder; +import de.mpiwg.anteater.text.TextInformation; +import de.mpiwg.anteater.text.TextPart; +import de.mpiwg.anteater.text.TextType; +import de.mpiwg.anteater.xml.IPersonFinderParser; +import de.mpiwg.anteater.xml.impl.AnalysisXMLManager; +import de.mpiwg.anteater.xml.impl.StanfordNERXMLParser; + +public class PersonFinderController { + + public final static String COMPONENT_NAME = PersonFinderController.class.getSimpleName(); + + private AnteaterConfiguration configuration; + + public PersonFinderController(AnteaterConfiguration configuration) { + this.configuration = configuration; + } + + public List<PersonsExtraction> findPersonsInXML(TextInformation info) { + List<PersonsExtraction> results = new ArrayList<PersonsExtraction>(); + List<String> summaryAnalysisResults = new ArrayList<String>(); + List<String> supplinfAnalysisResults = new ArrayList<String>(); + + // check if there are already stored results + AnalysisXMLManager analysisManager = null; + if (configuration.getAnalysisPath() != null && !configuration.getAnalysisPath().isEmpty()) { + File file = new File(info.getFilepath()); + + analysisManager = new AnalysisXMLManager(configuration.getAnalysisPath() + File.separator + file.getName()); + + configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for persons in summaries..."); + summaryAnalysisResults = analysisManager.getSummaryPersonsResult(); + configuration.getLogger().logMessage("found " + summaryAnalysisResults.size() + " result(s)."); + + configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for persons in supplementary information..."); + supplinfAnalysisResults = analysisManager.getSupplementaryInfoPersonsResult(); + configuration.getLogger().logMessage("found " + supplinfAnalysisResults.size() + " result(s)."); + } + + IPersonFinder personFinder = new StanfordNLPPersonFinder(configuration.getLogger()); + + // if there are no results for summaries, ask place finding service. + if (summaryAnalysisResults.size() == 0) { + configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for summaries, so will ask Stanford NLP NER Parser."); + + for (TextPart sum : info.getSummaries()) { + String sumResult = personFinder.findPersons(sum.getText()); + if (sumResult != null) { + summaryAnalysisResults.add(sumResult); + + // if there is an analysis folder, add result to analysis file + if (analysisManager != null) + analysisManager.addSummaryPersonsResult(sumResult); + } + } + } + + // if there are no results for supplementary information, ask GNRD name fining service + if (supplinfAnalysisResults.size() == 0) { + configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for supplementary information, so will ask Stanford NLP NER Parser."); + + for (TextPart sInf : info.getSupplInfos()) { + String supinfResult = personFinder.findPersons(sInf.getText()); + if (supinfResult != null) { + supplinfAnalysisResults.add(supinfResult); + + // if there is an analysis folder, add result to analysis file + if (analysisManager != null) + analysisManager.addSupplInfPersonsResult(supinfResult); + } + } + } + + // create objects + configuration.getLogger().logMessage(COMPONENT_NAME, "Creating analysis results..."); + int idx = 0; + for (String summaryResult : summaryAnalysisResults) { + IPersonFinderParser parser = new StanfordNERXMLParser(summaryResult); + + List<APerson> persons = parser.parsePersons(); + PersonsExtraction result = new PersonsExtraction(); + result.setType(TextType.TYPE_SUMMARY); + result.setPerson(persons); + result.setTextIdx(idx); + + results.add(result); + idx++; + } + + idx = 0; + for (String suplinfResult : supplinfAnalysisResults) { + IPersonFinderParser parser = new StanfordNERXMLParser(suplinfResult); + + List<APerson> persons = parser.parsePersons(); + PersonsExtraction result = new PersonsExtraction(); + result.setType(TextType.TYPE_SUPLINF); + result.setPerson(persons); + result.setTextIdx(idx); + + results.add(result); + idx++; + } + + return results; + } +}