annotate src/de/mpiwg/anteater/persons/PersonFinderController.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
1 package de.mpiwg.anteater.persons;
036535fcd179 anteater
jdamerow
parents:
diff changeset
2
036535fcd179 anteater
jdamerow
parents:
diff changeset
3 import java.io.File;
036535fcd179 anteater
jdamerow
parents:
diff changeset
4 import java.util.ArrayList;
036535fcd179 anteater
jdamerow
parents:
diff changeset
5 import java.util.List;
036535fcd179 anteater
jdamerow
parents:
diff changeset
6
036535fcd179 anteater
jdamerow
parents:
diff changeset
7 import de.mpiwg.anteater.AnteaterConfiguration;
036535fcd179 anteater
jdamerow
parents:
diff changeset
8 import de.mpiwg.anteater.persons.impl.StanfordNLPPersonFinder;
036535fcd179 anteater
jdamerow
parents:
diff changeset
9 import de.mpiwg.anteater.text.TextInformation;
036535fcd179 anteater
jdamerow
parents:
diff changeset
10 import de.mpiwg.anteater.text.TextPart;
036535fcd179 anteater
jdamerow
parents:
diff changeset
11 import de.mpiwg.anteater.text.TextType;
036535fcd179 anteater
jdamerow
parents:
diff changeset
12 import de.mpiwg.anteater.xml.IPersonFinderParser;
036535fcd179 anteater
jdamerow
parents:
diff changeset
13 import de.mpiwg.anteater.xml.impl.AnalysisXMLManager;
036535fcd179 anteater
jdamerow
parents:
diff changeset
14 import de.mpiwg.anteater.xml.impl.StanfordNERXMLParser;
036535fcd179 anteater
jdamerow
parents:
diff changeset
15
036535fcd179 anteater
jdamerow
parents:
diff changeset
16 public class PersonFinderController {
036535fcd179 anteater
jdamerow
parents:
diff changeset
17
036535fcd179 anteater
jdamerow
parents:
diff changeset
18 public final static String COMPONENT_NAME = PersonFinderController.class.getSimpleName();
036535fcd179 anteater
jdamerow
parents:
diff changeset
19
036535fcd179 anteater
jdamerow
parents:
diff changeset
20 private AnteaterConfiguration configuration;
036535fcd179 anteater
jdamerow
parents:
diff changeset
21
036535fcd179 anteater
jdamerow
parents:
diff changeset
22 public PersonFinderController(AnteaterConfiguration configuration) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
23 this.configuration = configuration;
036535fcd179 anteater
jdamerow
parents:
diff changeset
24 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
25
036535fcd179 anteater
jdamerow
parents:
diff changeset
26 public List<PersonsExtraction> findPersonsInXML(TextInformation info) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
27 List<PersonsExtraction> results = new ArrayList<PersonsExtraction>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
28 List<String> summaryAnalysisResults = new ArrayList<String>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
29 List<String> supplinfAnalysisResults = new ArrayList<String>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
30
036535fcd179 anteater
jdamerow
parents:
diff changeset
31 // check if there are already stored results
036535fcd179 anteater
jdamerow
parents:
diff changeset
32 AnalysisXMLManager analysisManager = null;
036535fcd179 anteater
jdamerow
parents:
diff changeset
33 if (configuration.getAnalysisPath() != null && !configuration.getAnalysisPath().isEmpty()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
34 File file = new File(info.getFilepath());
036535fcd179 anteater
jdamerow
parents:
diff changeset
35
036535fcd179 anteater
jdamerow
parents:
diff changeset
36 analysisManager = new AnalysisXMLManager(configuration.getAnalysisPath() + File.separator + file.getName());
036535fcd179 anteater
jdamerow
parents:
diff changeset
37
036535fcd179 anteater
jdamerow
parents:
diff changeset
38 configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for persons in summaries...");
036535fcd179 anteater
jdamerow
parents:
diff changeset
39 summaryAnalysisResults = analysisManager.getSummaryPersonsResult();
036535fcd179 anteater
jdamerow
parents:
diff changeset
40 configuration.getLogger().logMessage("found " + summaryAnalysisResults.size() + " result(s).");
036535fcd179 anteater
jdamerow
parents:
diff changeset
41
036535fcd179 anteater
jdamerow
parents:
diff changeset
42 configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for persons in supplementary information...");
036535fcd179 anteater
jdamerow
parents:
diff changeset
43 supplinfAnalysisResults = analysisManager.getSupplementaryInfoPersonsResult();
036535fcd179 anteater
jdamerow
parents:
diff changeset
44 configuration.getLogger().logMessage("found " + supplinfAnalysisResults.size() + " result(s).");
036535fcd179 anteater
jdamerow
parents:
diff changeset
45 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
46
036535fcd179 anteater
jdamerow
parents:
diff changeset
47 IPersonFinder personFinder = new StanfordNLPPersonFinder(configuration.getLogger());
036535fcd179 anteater
jdamerow
parents:
diff changeset
48
036535fcd179 anteater
jdamerow
parents:
diff changeset
49 // if there are no results for summaries, ask place finding service.
036535fcd179 anteater
jdamerow
parents:
diff changeset
50 if (summaryAnalysisResults.size() == 0) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
51 configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for summaries, so will ask Stanford NLP NER Parser.");
036535fcd179 anteater
jdamerow
parents:
diff changeset
52
036535fcd179 anteater
jdamerow
parents:
diff changeset
53 for (TextPart sum : info.getSummaries()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
54 String sumResult = personFinder.findPersons(sum.getText());
036535fcd179 anteater
jdamerow
parents:
diff changeset
55 if (sumResult != null) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
56 summaryAnalysisResults.add(sumResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
57
036535fcd179 anteater
jdamerow
parents:
diff changeset
58 // if there is an analysis folder, add result to analysis file
036535fcd179 anteater
jdamerow
parents:
diff changeset
59 if (analysisManager != null)
036535fcd179 anteater
jdamerow
parents:
diff changeset
60 analysisManager.addSummaryPersonsResult(sumResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
61 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
62 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
63 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
64
036535fcd179 anteater
jdamerow
parents:
diff changeset
65 // if there are no results for supplementary information, ask GNRD name fining service
036535fcd179 anteater
jdamerow
parents:
diff changeset
66 if (supplinfAnalysisResults.size() == 0) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
67 configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for supplementary information, so will ask Stanford NLP NER Parser.");
036535fcd179 anteater
jdamerow
parents:
diff changeset
68
036535fcd179 anteater
jdamerow
parents:
diff changeset
69 for (TextPart sInf : info.getSupplInfos()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
70 String supinfResult = personFinder.findPersons(sInf.getText());
036535fcd179 anteater
jdamerow
parents:
diff changeset
71 if (supinfResult != null) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
72 supplinfAnalysisResults.add(supinfResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
73
036535fcd179 anteater
jdamerow
parents:
diff changeset
74 // if there is an analysis folder, add result to analysis file
036535fcd179 anteater
jdamerow
parents:
diff changeset
75 if (analysisManager != null)
036535fcd179 anteater
jdamerow
parents:
diff changeset
76 analysisManager.addSupplInfPersonsResult(supinfResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
77 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
78 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
79 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
80
036535fcd179 anteater
jdamerow
parents:
diff changeset
81 // create objects
036535fcd179 anteater
jdamerow
parents:
diff changeset
82 configuration.getLogger().logMessage(COMPONENT_NAME, "Creating analysis results...");
036535fcd179 anteater
jdamerow
parents:
diff changeset
83 int idx = 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
84 for (String summaryResult : summaryAnalysisResults) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
85 IPersonFinderParser parser = new StanfordNERXMLParser(summaryResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
86
036535fcd179 anteater
jdamerow
parents:
diff changeset
87 List<APerson> persons = parser.parsePersons();
036535fcd179 anteater
jdamerow
parents:
diff changeset
88 PersonsExtraction result = new PersonsExtraction();
036535fcd179 anteater
jdamerow
parents:
diff changeset
89 result.setType(TextType.TYPE_SUMMARY);
036535fcd179 anteater
jdamerow
parents:
diff changeset
90 result.setPerson(persons);
036535fcd179 anteater
jdamerow
parents:
diff changeset
91 result.setTextIdx(idx);
036535fcd179 anteater
jdamerow
parents:
diff changeset
92
036535fcd179 anteater
jdamerow
parents:
diff changeset
93 results.add(result);
036535fcd179 anteater
jdamerow
parents:
diff changeset
94 idx++;
036535fcd179 anteater
jdamerow
parents:
diff changeset
95 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
96
036535fcd179 anteater
jdamerow
parents:
diff changeset
97 idx = 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
98 for (String suplinfResult : supplinfAnalysisResults) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
99 IPersonFinderParser parser = new StanfordNERXMLParser(suplinfResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
100
036535fcd179 anteater
jdamerow
parents:
diff changeset
101 List<APerson> persons = parser.parsePersons();
036535fcd179 anteater
jdamerow
parents:
diff changeset
102 PersonsExtraction result = new PersonsExtraction();
036535fcd179 anteater
jdamerow
parents:
diff changeset
103 result.setType(TextType.TYPE_SUPLINF);
036535fcd179 anteater
jdamerow
parents:
diff changeset
104 result.setPerson(persons);
036535fcd179 anteater
jdamerow
parents:
diff changeset
105 result.setTextIdx(idx);
036535fcd179 anteater
jdamerow
parents:
diff changeset
106
036535fcd179 anteater
jdamerow
parents:
diff changeset
107 results.add(result);
036535fcd179 anteater
jdamerow
parents:
diff changeset
108 idx++;
036535fcd179 anteater
jdamerow
parents:
diff changeset
109 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
110
036535fcd179 anteater
jdamerow
parents:
diff changeset
111 return results;
036535fcd179 anteater
jdamerow
parents:
diff changeset
112 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
113 }