comparison src/de/mpiwg/anteater/persons/PersonFinderController.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:036535fcd179
1 package de.mpiwg.anteater.persons;
2
3 import java.io.File;
4 import java.util.ArrayList;
5 import java.util.List;
6
7 import de.mpiwg.anteater.AnteaterConfiguration;
8 import de.mpiwg.anteater.persons.impl.StanfordNLPPersonFinder;
9 import de.mpiwg.anteater.text.TextInformation;
10 import de.mpiwg.anteater.text.TextPart;
11 import de.mpiwg.anteater.text.TextType;
12 import de.mpiwg.anteater.xml.IPersonFinderParser;
13 import de.mpiwg.anteater.xml.impl.AnalysisXMLManager;
14 import de.mpiwg.anteater.xml.impl.StanfordNERXMLParser;
15
16 public class PersonFinderController {
17
18 public final static String COMPONENT_NAME = PersonFinderController.class.getSimpleName();
19
20 private AnteaterConfiguration configuration;
21
22 public PersonFinderController(AnteaterConfiguration configuration) {
23 this.configuration = configuration;
24 }
25
26 public List<PersonsExtraction> findPersonsInXML(TextInformation info) {
27 List<PersonsExtraction> results = new ArrayList<PersonsExtraction>();
28 List<String> summaryAnalysisResults = new ArrayList<String>();
29 List<String> supplinfAnalysisResults = new ArrayList<String>();
30
31 // check if there are already stored results
32 AnalysisXMLManager analysisManager = null;
33 if (configuration.getAnalysisPath() != null && !configuration.getAnalysisPath().isEmpty()) {
34 File file = new File(info.getFilepath());
35
36 analysisManager = new AnalysisXMLManager(configuration.getAnalysisPath() + File.separator + file.getName());
37
38 configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for persons in summaries...");
39 summaryAnalysisResults = analysisManager.getSummaryPersonsResult();
40 configuration.getLogger().logMessage("found " + summaryAnalysisResults.size() + " result(s).");
41
42 configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for persons in supplementary information...");
43 supplinfAnalysisResults = analysisManager.getSupplementaryInfoPersonsResult();
44 configuration.getLogger().logMessage("found " + supplinfAnalysisResults.size() + " result(s).");
45 }
46
47 IPersonFinder personFinder = new StanfordNLPPersonFinder(configuration.getLogger());
48
49 // if there are no results for summaries, ask place finding service.
50 if (summaryAnalysisResults.size() == 0) {
51 configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for summaries, so will ask Stanford NLP NER Parser.");
52
53 for (TextPart sum : info.getSummaries()) {
54 String sumResult = personFinder.findPersons(sum.getText());
55 if (sumResult != null) {
56 summaryAnalysisResults.add(sumResult);
57
58 // if there is an analysis folder, add result to analysis file
59 if (analysisManager != null)
60 analysisManager.addSummaryPersonsResult(sumResult);
61 }
62 }
63 }
64
65 // if there are no results for supplementary information, ask GNRD name fining service
66 if (supplinfAnalysisResults.size() == 0) {
67 configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for supplementary information, so will ask Stanford NLP NER Parser.");
68
69 for (TextPart sInf : info.getSupplInfos()) {
70 String supinfResult = personFinder.findPersons(sInf.getText());
71 if (supinfResult != null) {
72 supplinfAnalysisResults.add(supinfResult);
73
74 // if there is an analysis folder, add result to analysis file
75 if (analysisManager != null)
76 analysisManager.addSupplInfPersonsResult(supinfResult);
77 }
78 }
79 }
80
81 // create objects
82 configuration.getLogger().logMessage(COMPONENT_NAME, "Creating analysis results...");
83 int idx = 0;
84 for (String summaryResult : summaryAnalysisResults) {
85 IPersonFinderParser parser = new StanfordNERXMLParser(summaryResult);
86
87 List<APerson> persons = parser.parsePersons();
88 PersonsExtraction result = new PersonsExtraction();
89 result.setType(TextType.TYPE_SUMMARY);
90 result.setPerson(persons);
91 result.setTextIdx(idx);
92
93 results.add(result);
94 idx++;
95 }
96
97 idx = 0;
98 for (String suplinfResult : supplinfAnalysisResults) {
99 IPersonFinderParser parser = new StanfordNERXMLParser(suplinfResult);
100
101 List<APerson> persons = parser.parsePersons();
102 PersonsExtraction result = new PersonsExtraction();
103 result.setType(TextType.TYPE_SUPLINF);
104 result.setPerson(persons);
105 result.setTextIdx(idx);
106
107 results.add(result);
108 idx++;
109 }
110
111 return results;
112 }
113 }