0
|
1 package de.mpiwg.anteater.species.scientific;
|
|
2
|
|
3 import java.io.File;
|
|
4 import java.util.ArrayList;
|
|
5 import java.util.List;
|
|
6
|
|
7 import de.mpiwg.anteater.AnteaterConfiguration;
|
|
8 import de.mpiwg.anteater.species.scientific.impl.GNRDNameFinder;
|
|
9 import de.mpiwg.anteater.text.TextInformation;
|
|
10 import de.mpiwg.anteater.text.TextPart;
|
|
11 import de.mpiwg.anteater.text.TextType;
|
|
12 import de.mpiwg.anteater.xml.INameFinderParser;
|
|
13 import de.mpiwg.anteater.xml.impl.AnalysisXMLManager;
|
|
14 import de.mpiwg.anteater.xml.impl.GNRDXMLParser;
|
|
15
|
|
16 public class ScientificNameFindController {
|
|
17 public final static String COMPONENT_NAME = ScientificNameFindController.class.getSimpleName();
|
|
18
|
|
19 private AnteaterConfiguration configuration;
|
|
20
|
|
21 public ScientificNameFindController(AnteaterConfiguration configuration) {
|
|
22 this.configuration = configuration;
|
|
23 }
|
|
24
|
|
25 /**
|
|
26 * Method for retrieving scientific names in a XML document. It tooks all summaries and all
|
|
27 * suplementary informations and looks for names in them.
|
|
28 * @param file
|
|
29 * @return
|
|
30 */
|
|
31 public List<ScientificNamesExtraction> findScientificNamesInXML(TextInformation info) {
|
|
32
|
|
33 List<ScientificNamesExtraction> results = new ArrayList<ScientificNamesExtraction>();
|
|
34 List<String> summaryAnalysisResults = new ArrayList<String>();
|
|
35 List<String> supplinfAnalysisResults = new ArrayList<String>();
|
|
36
|
|
37 // check if there are already stored results
|
|
38 AnalysisXMLManager analysisManager = null;
|
|
39 if (configuration.getAnalysisPath() != null && !configuration.getAnalysisPath().isEmpty()) {
|
|
40 File file = new File(info.getFilepath());
|
|
41
|
|
42 analysisManager = new AnalysisXMLManager(configuration.getAnalysisPath() + File.separator + file.getName());
|
|
43
|
|
44 configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for scientific names in summaries...");
|
|
45 summaryAnalysisResults = analysisManager.getSummaryNamesResults();
|
|
46 configuration.getLogger().logMessage("found " + summaryAnalysisResults.size() + " result(s).");
|
|
47
|
|
48 configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for scientific names in supplementary information...");
|
|
49 supplinfAnalysisResults = analysisManager.getSupplementaryInfoNamesResults();
|
|
50 configuration.getLogger().logMessage("found " + supplinfAnalysisResults.size() + " result(s).");
|
|
51 }
|
|
52
|
|
53 IScientificNamesFinder nameFinder = new GNRDNameFinder(configuration.getLogger());
|
|
54
|
|
55 // if there are no results for summaries, ask GNRD name finding service.
|
|
56 if (summaryAnalysisResults.size() == 0) {
|
|
57 configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for summaries, so will ask GNRDNameFinder.");
|
|
58
|
|
59 for (TextPart sum : info.getSummaries()) {
|
|
60 String sumResult = nameFinder.findScientificNames(sum.getText());
|
|
61 if (sumResult != null) {
|
|
62 summaryAnalysisResults.add(sumResult);
|
|
63
|
|
64 // if there is an analysis folder, add result to analysis file
|
|
65 if (analysisManager != null)
|
|
66 analysisManager.addSummaryNamesResult(sumResult);
|
|
67 }
|
|
68 }
|
|
69 }
|
|
70
|
|
71 // if there are no results for supplementary information, ask GNRD name fining service
|
|
72 if (supplinfAnalysisResults.size() == 0) {
|
|
73 configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for supplementary information, so will ask GNRDNameFinder.");
|
|
74
|
|
75 for (TextPart sInf : info.getSupplInfos()) {
|
|
76 String supinfResult = nameFinder.findScientificNames(sInf.getText());
|
|
77 if (supinfResult != null) {
|
|
78 supplinfAnalysisResults.add(supinfResult);
|
|
79
|
|
80 // if there is an analysis folder, add result to analysis file
|
|
81 if (analysisManager != null)
|
|
82 analysisManager.addSupplInfNamesResult(supinfResult);
|
|
83 }
|
|
84 }
|
|
85 }
|
|
86
|
|
87 configuration.getLogger().logMessage(COMPONENT_NAME, "Creating analysis results...");
|
|
88 int idx = 0;
|
|
89 for (String summaryResult : summaryAnalysisResults) {
|
|
90 INameFinderParser nameParser = new GNRDXMLParser(summaryResult);
|
|
91
|
|
92 List<ScientificName> scientificNames = nameParser.parseScientificNames();
|
|
93 ScientificNamesExtraction scientificNameResult = new ScientificNamesExtraction();
|
|
94 scientificNameResult.setType(TextType.TYPE_SUMMARY);
|
|
95 scientificNameResult.setNames(scientificNames);
|
|
96 scientificNameResult.setTextIdx(idx);
|
4
|
97 scientificNameResult.setFoundBy(this.getClass());
|
0
|
98
|
|
99 results.add(scientificNameResult);
|
|
100 idx++;
|
|
101 }
|
|
102
|
|
103 idx = 0;
|
|
104 for (String suplinfResult : supplinfAnalysisResults) {
|
|
105 INameFinderParser nameParser = new GNRDXMLParser(suplinfResult);
|
|
106
|
|
107 List<ScientificName> scientificNames = nameParser.parseScientificNames();
|
|
108 ScientificNamesExtraction scientificNameResult = new ScientificNamesExtraction();
|
|
109 scientificNameResult.setType(TextType.TYPE_SUPLINF);
|
|
110 scientificNameResult.setNames(scientificNames);
|
|
111 scientificNameResult.setTextIdx(idx);
|
4
|
112 scientificNameResult.setFoundBy(this.getClass());
|
0
|
113
|
|
114 results.add(scientificNameResult);
|
|
115 idx++;
|
|
116 }
|
|
117
|
|
118 return results;
|
|
119 }
|
|
120 }
|