annotate src/de/mpiwg/anteater/species/scientific/ScientificNameFindController.java @ 4:dcc35f89dce3

include linneaus findings
author jdamerow
date Thu, 25 Oct 2012 15:25:08 -0700
parents 036535fcd179
children 51ed79e28b45
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
1 package de.mpiwg.anteater.species.scientific;
036535fcd179 anteater
jdamerow
parents:
diff changeset
2
036535fcd179 anteater
jdamerow
parents:
diff changeset
3 import java.io.File;
036535fcd179 anteater
jdamerow
parents:
diff changeset
4 import java.util.ArrayList;
036535fcd179 anteater
jdamerow
parents:
diff changeset
5 import java.util.List;
036535fcd179 anteater
jdamerow
parents:
diff changeset
6
036535fcd179 anteater
jdamerow
parents:
diff changeset
7 import de.mpiwg.anteater.AnteaterConfiguration;
036535fcd179 anteater
jdamerow
parents:
diff changeset
8 import de.mpiwg.anteater.species.scientific.impl.GNRDNameFinder;
036535fcd179 anteater
jdamerow
parents:
diff changeset
9 import de.mpiwg.anteater.text.TextInformation;
036535fcd179 anteater
jdamerow
parents:
diff changeset
10 import de.mpiwg.anteater.text.TextPart;
036535fcd179 anteater
jdamerow
parents:
diff changeset
11 import de.mpiwg.anteater.text.TextType;
036535fcd179 anteater
jdamerow
parents:
diff changeset
12 import de.mpiwg.anteater.xml.INameFinderParser;
036535fcd179 anteater
jdamerow
parents:
diff changeset
13 import de.mpiwg.anteater.xml.impl.AnalysisXMLManager;
036535fcd179 anteater
jdamerow
parents:
diff changeset
14 import de.mpiwg.anteater.xml.impl.GNRDXMLParser;
036535fcd179 anteater
jdamerow
parents:
diff changeset
15
036535fcd179 anteater
jdamerow
parents:
diff changeset
16 public class ScientificNameFindController {
036535fcd179 anteater
jdamerow
parents:
diff changeset
17 public final static String COMPONENT_NAME = ScientificNameFindController.class.getSimpleName();
036535fcd179 anteater
jdamerow
parents:
diff changeset
18
036535fcd179 anteater
jdamerow
parents:
diff changeset
19 private AnteaterConfiguration configuration;
036535fcd179 anteater
jdamerow
parents:
diff changeset
20
036535fcd179 anteater
jdamerow
parents:
diff changeset
21 public ScientificNameFindController(AnteaterConfiguration configuration) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
22 this.configuration = configuration;
036535fcd179 anteater
jdamerow
parents:
diff changeset
23 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
24
036535fcd179 anteater
jdamerow
parents:
diff changeset
25 /**
036535fcd179 anteater
jdamerow
parents:
diff changeset
26 * Method for retrieving scientific names in a XML document. It tooks all summaries and all
036535fcd179 anteater
jdamerow
parents:
diff changeset
27 * suplementary informations and looks for names in them.
036535fcd179 anteater
jdamerow
parents:
diff changeset
28 * @param file
036535fcd179 anteater
jdamerow
parents:
diff changeset
29 * @return
036535fcd179 anteater
jdamerow
parents:
diff changeset
30 */
036535fcd179 anteater
jdamerow
parents:
diff changeset
31 public List<ScientificNamesExtraction> findScientificNamesInXML(TextInformation info) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
32
036535fcd179 anteater
jdamerow
parents:
diff changeset
33 List<ScientificNamesExtraction> results = new ArrayList<ScientificNamesExtraction>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
34 List<String> summaryAnalysisResults = new ArrayList<String>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
35 List<String> supplinfAnalysisResults = new ArrayList<String>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
36
036535fcd179 anteater
jdamerow
parents:
diff changeset
37 // check if there are already stored results
036535fcd179 anteater
jdamerow
parents:
diff changeset
38 AnalysisXMLManager analysisManager = null;
036535fcd179 anteater
jdamerow
parents:
diff changeset
39 if (configuration.getAnalysisPath() != null && !configuration.getAnalysisPath().isEmpty()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
40 File file = new File(info.getFilepath());
036535fcd179 anteater
jdamerow
parents:
diff changeset
41
036535fcd179 anteater
jdamerow
parents:
diff changeset
42 analysisManager = new AnalysisXMLManager(configuration.getAnalysisPath() + File.separator + file.getName());
036535fcd179 anteater
jdamerow
parents:
diff changeset
43
036535fcd179 anteater
jdamerow
parents:
diff changeset
44 configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for scientific names in summaries...");
036535fcd179 anteater
jdamerow
parents:
diff changeset
45 summaryAnalysisResults = analysisManager.getSummaryNamesResults();
036535fcd179 anteater
jdamerow
parents:
diff changeset
46 configuration.getLogger().logMessage("found " + summaryAnalysisResults.size() + " result(s).");
036535fcd179 anteater
jdamerow
parents:
diff changeset
47
036535fcd179 anteater
jdamerow
parents:
diff changeset
48 configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for scientific names in supplementary information...");
036535fcd179 anteater
jdamerow
parents:
diff changeset
49 supplinfAnalysisResults = analysisManager.getSupplementaryInfoNamesResults();
036535fcd179 anteater
jdamerow
parents:
diff changeset
50 configuration.getLogger().logMessage("found " + supplinfAnalysisResults.size() + " result(s).");
036535fcd179 anteater
jdamerow
parents:
diff changeset
51 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
52
036535fcd179 anteater
jdamerow
parents:
diff changeset
53 IScientificNamesFinder nameFinder = new GNRDNameFinder(configuration.getLogger());
036535fcd179 anteater
jdamerow
parents:
diff changeset
54
036535fcd179 anteater
jdamerow
parents:
diff changeset
55 // if there are no results for summaries, ask GNRD name finding service.
036535fcd179 anteater
jdamerow
parents:
diff changeset
56 if (summaryAnalysisResults.size() == 0) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
57 configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for summaries, so will ask GNRDNameFinder.");
036535fcd179 anteater
jdamerow
parents:
diff changeset
58
036535fcd179 anteater
jdamerow
parents:
diff changeset
59 for (TextPart sum : info.getSummaries()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
60 String sumResult = nameFinder.findScientificNames(sum.getText());
036535fcd179 anteater
jdamerow
parents:
diff changeset
61 if (sumResult != null) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
62 summaryAnalysisResults.add(sumResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
63
036535fcd179 anteater
jdamerow
parents:
diff changeset
64 // if there is an analysis folder, add result to analysis file
036535fcd179 anteater
jdamerow
parents:
diff changeset
65 if (analysisManager != null)
036535fcd179 anteater
jdamerow
parents:
diff changeset
66 analysisManager.addSummaryNamesResult(sumResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
67 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
68 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
69 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
70
036535fcd179 anteater
jdamerow
parents:
diff changeset
71 // if there are no results for supplementary information, ask GNRD name fining service
036535fcd179 anteater
jdamerow
parents:
diff changeset
72 if (supplinfAnalysisResults.size() == 0) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
73 configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for supplementary information, so will ask GNRDNameFinder.");
036535fcd179 anteater
jdamerow
parents:
diff changeset
74
036535fcd179 anteater
jdamerow
parents:
diff changeset
75 for (TextPart sInf : info.getSupplInfos()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
76 String supinfResult = nameFinder.findScientificNames(sInf.getText());
036535fcd179 anteater
jdamerow
parents:
diff changeset
77 if (supinfResult != null) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
78 supplinfAnalysisResults.add(supinfResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
79
036535fcd179 anteater
jdamerow
parents:
diff changeset
80 // if there is an analysis folder, add result to analysis file
036535fcd179 anteater
jdamerow
parents:
diff changeset
81 if (analysisManager != null)
036535fcd179 anteater
jdamerow
parents:
diff changeset
82 analysisManager.addSupplInfNamesResult(supinfResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
83 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
84 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
85 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
86
036535fcd179 anteater
jdamerow
parents:
diff changeset
87 configuration.getLogger().logMessage(COMPONENT_NAME, "Creating analysis results...");
036535fcd179 anteater
jdamerow
parents:
diff changeset
88 int idx = 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
89 for (String summaryResult : summaryAnalysisResults) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
90 INameFinderParser nameParser = new GNRDXMLParser(summaryResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
91
036535fcd179 anteater
jdamerow
parents:
diff changeset
92 List<ScientificName> scientificNames = nameParser.parseScientificNames();
036535fcd179 anteater
jdamerow
parents:
diff changeset
93 ScientificNamesExtraction scientificNameResult = new ScientificNamesExtraction();
036535fcd179 anteater
jdamerow
parents:
diff changeset
94 scientificNameResult.setType(TextType.TYPE_SUMMARY);
036535fcd179 anteater
jdamerow
parents:
diff changeset
95 scientificNameResult.setNames(scientificNames);
036535fcd179 anteater
jdamerow
parents:
diff changeset
96 scientificNameResult.setTextIdx(idx);
4
dcc35f89dce3 include linneaus findings
jdamerow
parents: 0
diff changeset
97 scientificNameResult.setFoundBy(this.getClass());
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
98
036535fcd179 anteater
jdamerow
parents:
diff changeset
99 results.add(scientificNameResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
100 idx++;
036535fcd179 anteater
jdamerow
parents:
diff changeset
101 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
102
036535fcd179 anteater
jdamerow
parents:
diff changeset
103 idx = 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
104 for (String suplinfResult : supplinfAnalysisResults) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
105 INameFinderParser nameParser = new GNRDXMLParser(suplinfResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
106
036535fcd179 anteater
jdamerow
parents:
diff changeset
107 List<ScientificName> scientificNames = nameParser.parseScientificNames();
036535fcd179 anteater
jdamerow
parents:
diff changeset
108 ScientificNamesExtraction scientificNameResult = new ScientificNamesExtraction();
036535fcd179 anteater
jdamerow
parents:
diff changeset
109 scientificNameResult.setType(TextType.TYPE_SUPLINF);
036535fcd179 anteater
jdamerow
parents:
diff changeset
110 scientificNameResult.setNames(scientificNames);
036535fcd179 anteater
jdamerow
parents:
diff changeset
111 scientificNameResult.setTextIdx(idx);
4
dcc35f89dce3 include linneaus findings
jdamerow
parents: 0
diff changeset
112 scientificNameResult.setFoundBy(this.getClass());
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
113
036535fcd179 anteater
jdamerow
parents:
diff changeset
114 results.add(scientificNameResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
115 idx++;
036535fcd179 anteater
jdamerow
parents:
diff changeset
116 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
117
036535fcd179 anteater
jdamerow
parents:
diff changeset
118 return results;
036535fcd179 anteater
jdamerow
parents:
diff changeset
119 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
120 }