diff src/de/mpiwg/anteater/persons/PersonFinderController.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/anteater/persons/PersonFinderController.java	Fri Sep 14 10:30:43 2012 +0200
@@ -0,0 +1,113 @@
+package de.mpiwg.anteater.persons;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+import de.mpiwg.anteater.AnteaterConfiguration;
+import de.mpiwg.anteater.persons.impl.StanfordNLPPersonFinder;
+import de.mpiwg.anteater.text.TextInformation;
+import de.mpiwg.anteater.text.TextPart;
+import de.mpiwg.anteater.text.TextType;
+import de.mpiwg.anteater.xml.IPersonFinderParser;
+import de.mpiwg.anteater.xml.impl.AnalysisXMLManager;
+import de.mpiwg.anteater.xml.impl.StanfordNERXMLParser;
+
+public class PersonFinderController {
+	
+	public final static String COMPONENT_NAME = PersonFinderController.class.getSimpleName();
+	
+	private AnteaterConfiguration configuration;
+
+	public PersonFinderController(AnteaterConfiguration configuration) {
+		this.configuration = configuration;
+	}
+	
+	public List<PersonsExtraction> findPersonsInXML(TextInformation info) {
+		List<PersonsExtraction> results = new ArrayList<PersonsExtraction>();
+		List<String> summaryAnalysisResults = new ArrayList<String>();
+		List<String> supplinfAnalysisResults = new ArrayList<String>();
+		
+		// check if there are already stored results
+		AnalysisXMLManager analysisManager = null;
+		if (configuration.getAnalysisPath() != null && !configuration.getAnalysisPath().isEmpty()) {
+			File file = new File(info.getFilepath());
+			
+			analysisManager = new AnalysisXMLManager(configuration.getAnalysisPath() + File.separator +  file.getName());
+			
+			configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for persons in summaries...");
+			summaryAnalysisResults = analysisManager.getSummaryPersonsResult();
+			configuration.getLogger().logMessage("found " + summaryAnalysisResults.size() + " result(s).");
+		
+			configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for persons in supplementary information...");
+			supplinfAnalysisResults = analysisManager.getSupplementaryInfoPersonsResult();
+			configuration.getLogger().logMessage("found " + supplinfAnalysisResults.size() + " result(s).");
+		}
+		
+		IPersonFinder personFinder = new StanfordNLPPersonFinder(configuration.getLogger());
+		
+		// if there are no results for summaries, ask place finding service.
+		if (summaryAnalysisResults.size() == 0) {
+			configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for summaries, so will ask Stanford NLP NER Parser.");
+						
+			for (TextPart sum : info.getSummaries()) {
+				String sumResult = personFinder.findPersons(sum.getText());
+				if (sumResult != null) {
+					summaryAnalysisResults.add(sumResult);
+					
+					// if there is an analysis folder, add result to analysis file
+					if (analysisManager != null)
+						analysisManager.addSummaryPersonsResult(sumResult);
+				}
+			}
+		}
+		
+		// if there are no results for supplementary information, ask GNRD name fining service
+		if (supplinfAnalysisResults.size() == 0) {
+			configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for supplementary information, so will ask Stanford NLP NER Parser.");
+			
+			for (TextPart sInf : info.getSupplInfos()) {
+				String supinfResult = personFinder.findPersons(sInf.getText());
+				if (supinfResult != null) {
+					supplinfAnalysisResults.add(supinfResult);
+					
+					// if there is an analysis folder, add result to analysis file
+					if (analysisManager != null)
+						analysisManager.addSupplInfPersonsResult(supinfResult);
+				}
+			}
+		}
+		
+		// create objects
+		configuration.getLogger().logMessage(COMPONENT_NAME, "Creating analysis results...");
+		int idx = 0;
+		for (String summaryResult : summaryAnalysisResults) {
+			IPersonFinderParser parser = new StanfordNERXMLParser(summaryResult);
+			
+			List<APerson> persons = parser.parsePersons();
+			PersonsExtraction result = new PersonsExtraction();
+			result.setType(TextType.TYPE_SUMMARY);
+			result.setPerson(persons);
+			result.setTextIdx(idx);
+			
+			results.add(result);
+			idx++;
+		}
+		
+		idx = 0;
+		for (String suplinfResult : supplinfAnalysisResults) {
+			IPersonFinderParser parser = new StanfordNERXMLParser(suplinfResult);
+			
+			List<APerson> persons = parser.parsePersons();
+			PersonsExtraction result = new PersonsExtraction();
+			result.setType(TextType.TYPE_SUPLINF);
+			result.setPerson(persons);
+			result.setTextIdx(idx);
+			
+			results.add(result);
+			idx++;
+		}
+		
+		return results;
+	}
+}