annotate src/de/mpiwg/anteater/places/PlaceFinderController.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
1 package de.mpiwg.anteater.places;
036535fcd179 anteater
jdamerow
parents:
diff changeset
2
036535fcd179 anteater
jdamerow
parents:
diff changeset
3 import java.io.File;
036535fcd179 anteater
jdamerow
parents:
diff changeset
4 import java.util.ArrayList;
036535fcd179 anteater
jdamerow
parents:
diff changeset
5 import java.util.List;
036535fcd179 anteater
jdamerow
parents:
diff changeset
6
036535fcd179 anteater
jdamerow
parents:
diff changeset
7 import de.mpiwg.anteater.AnteaterConfiguration;
036535fcd179 anteater
jdamerow
parents:
diff changeset
8 import de.mpiwg.anteater.places.impl.PlacemakerPlaceFinder;
036535fcd179 anteater
jdamerow
parents:
diff changeset
9 import de.mpiwg.anteater.text.TextInformation;
036535fcd179 anteater
jdamerow
parents:
diff changeset
10 import de.mpiwg.anteater.text.TextPart;
036535fcd179 anteater
jdamerow
parents:
diff changeset
11 import de.mpiwg.anteater.text.TextType;
036535fcd179 anteater
jdamerow
parents:
diff changeset
12 import de.mpiwg.anteater.xml.IPlaceFinderParser;
036535fcd179 anteater
jdamerow
parents:
diff changeset
13 import de.mpiwg.anteater.xml.impl.AnalysisXMLManager;
036535fcd179 anteater
jdamerow
parents:
diff changeset
14 import de.mpiwg.anteater.xml.impl.PlacemakerXMLParser;
036535fcd179 anteater
jdamerow
parents:
diff changeset
15
036535fcd179 anteater
jdamerow
parents:
diff changeset
16 public class PlaceFinderController {
036535fcd179 anteater
jdamerow
parents:
diff changeset
17
036535fcd179 anteater
jdamerow
parents:
diff changeset
18 public final static String COMPONENT_NAME = PlaceFinderController.class.getSimpleName();
036535fcd179 anteater
jdamerow
parents:
diff changeset
19
036535fcd179 anteater
jdamerow
parents:
diff changeset
20 private AnteaterConfiguration configuration;
036535fcd179 anteater
jdamerow
parents:
diff changeset
21
036535fcd179 anteater
jdamerow
parents:
diff changeset
22 public PlaceFinderController(AnteaterConfiguration configuration) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
23 this.configuration = configuration;
036535fcd179 anteater
jdamerow
parents:
diff changeset
24 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
25
036535fcd179 anteater
jdamerow
parents:
diff changeset
26 public List<PlacesExtraction> findPlacesInXML(TextInformation info) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
27 List<PlacesExtraction> results = new ArrayList<PlacesExtraction>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
28 List<String> summaryAnalysisResults = new ArrayList<String>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
29 List<String> supplinfAnalysisResults = new ArrayList<String>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
30
036535fcd179 anteater
jdamerow
parents:
diff changeset
31 // check if there are already stored results
036535fcd179 anteater
jdamerow
parents:
diff changeset
32 AnalysisXMLManager analysisManager = null;
036535fcd179 anteater
jdamerow
parents:
diff changeset
33 if (configuration.getAnalysisPath() != null && !configuration.getAnalysisPath().isEmpty()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
34 File file = new File(info.getFilepath());
036535fcd179 anteater
jdamerow
parents:
diff changeset
35
036535fcd179 anteater
jdamerow
parents:
diff changeset
36 analysisManager = new AnalysisXMLManager(configuration.getAnalysisPath() + File.separator + file.getName());
036535fcd179 anteater
jdamerow
parents:
diff changeset
37
036535fcd179 anteater
jdamerow
parents:
diff changeset
38 configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for places in summaries...");
036535fcd179 anteater
jdamerow
parents:
diff changeset
39 summaryAnalysisResults = analysisManager.getSummaryPlacesResults();
036535fcd179 anteater
jdamerow
parents:
diff changeset
40 configuration.getLogger().logMessage("found " + summaryAnalysisResults.size() + " result(s).");
036535fcd179 anteater
jdamerow
parents:
diff changeset
41
036535fcd179 anteater
jdamerow
parents:
diff changeset
42 configuration.getLogger().logMessageWithoutNewLine(COMPONENT_NAME, "Check analysis file for places in supplementary information...");
036535fcd179 anteater
jdamerow
parents:
diff changeset
43 supplinfAnalysisResults = analysisManager.getSupplementaryInfoPlacesResults();
036535fcd179 anteater
jdamerow
parents:
diff changeset
44 configuration.getLogger().logMessage("found " + supplinfAnalysisResults.size() + " result(s).");
036535fcd179 anteater
jdamerow
parents:
diff changeset
45 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
46
036535fcd179 anteater
jdamerow
parents:
diff changeset
47 IPlaceFinder placeFinder = new PlacemakerPlaceFinder(configuration.getLogger());
036535fcd179 anteater
jdamerow
parents:
diff changeset
48
036535fcd179 anteater
jdamerow
parents:
diff changeset
49 // if there are no results for summaries, ask place finding service.
036535fcd179 anteater
jdamerow
parents:
diff changeset
50 if (summaryAnalysisResults.size() == 0) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
51 configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for summaries, so will ask Placemaker.");
036535fcd179 anteater
jdamerow
parents:
diff changeset
52
036535fcd179 anteater
jdamerow
parents:
diff changeset
53 for (TextPart sum : info.getSummaries()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
54 String sumResult = placeFinder.findPlaces(sum.getText());
036535fcd179 anteater
jdamerow
parents:
diff changeset
55 if (sumResult != null && !sumResult.isEmpty()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
56 summaryAnalysisResults.add(sumResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
57
036535fcd179 anteater
jdamerow
parents:
diff changeset
58 // if there is an analysis folder, add result to analysis file
036535fcd179 anteater
jdamerow
parents:
diff changeset
59 if (analysisManager != null)
036535fcd179 anteater
jdamerow
parents:
diff changeset
60 analysisManager.addSummaryPlacesResult(sumResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
61 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
62 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
63 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
64
036535fcd179 anteater
jdamerow
parents:
diff changeset
65 // if there are no results for supplementary information, ask GNRD name fining service
036535fcd179 anteater
jdamerow
parents:
diff changeset
66 if (supplinfAnalysisResults.size() == 0) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
67 configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for supplementary information, so will ask Placemaker.");
036535fcd179 anteater
jdamerow
parents:
diff changeset
68
036535fcd179 anteater
jdamerow
parents:
diff changeset
69 for (TextPart sum : info.getSupplInfos()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
70 String supinfResult = placeFinder.findPlaces(sum.getText());
036535fcd179 anteater
jdamerow
parents:
diff changeset
71 if (supinfResult != null && !supinfResult.isEmpty()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
72 supplinfAnalysisResults.add(supinfResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
73
036535fcd179 anteater
jdamerow
parents:
diff changeset
74 // if there is an analysis folder, add result to analysis file
036535fcd179 anteater
jdamerow
parents:
diff changeset
75 if (analysisManager != null)
036535fcd179 anteater
jdamerow
parents:
diff changeset
76 analysisManager.addSupplInfPlacesResult(supinfResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
77 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
78 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
79 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
80
036535fcd179 anteater
jdamerow
parents:
diff changeset
81 // create objects
036535fcd179 anteater
jdamerow
parents:
diff changeset
82 configuration.getLogger().logMessage(COMPONENT_NAME, "Creating analysis results...");
036535fcd179 anteater
jdamerow
parents:
diff changeset
83 int idx = 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
84 for (String summaryResult : summaryAnalysisResults) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
85 IPlaceFinderParser parser = new PlacemakerXMLParser(summaryResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
86
036535fcd179 anteater
jdamerow
parents:
diff changeset
87 List<PlaceInformation> places = parser.parsePlaces();
036535fcd179 anteater
jdamerow
parents:
diff changeset
88 PlacesExtraction placeResult = new PlacesExtraction();
036535fcd179 anteater
jdamerow
parents:
diff changeset
89 placeResult.setType(TextType.TYPE_SUMMARY);
036535fcd179 anteater
jdamerow
parents:
diff changeset
90 placeResult.setPlaceInformation(places);
036535fcd179 anteater
jdamerow
parents:
diff changeset
91 placeResult.setTextIdx(idx);
036535fcd179 anteater
jdamerow
parents:
diff changeset
92
036535fcd179 anteater
jdamerow
parents:
diff changeset
93 results.add(placeResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
94 idx++;
036535fcd179 anteater
jdamerow
parents:
diff changeset
95 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
96
036535fcd179 anteater
jdamerow
parents:
diff changeset
97 idx = 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
98 for (String suplinfResult : supplinfAnalysisResults) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
99 IPlaceFinderParser parser = new PlacemakerXMLParser(suplinfResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
100
036535fcd179 anteater
jdamerow
parents:
diff changeset
101 List<PlaceInformation> scientificNames = parser.parsePlaces();
036535fcd179 anteater
jdamerow
parents:
diff changeset
102 PlacesExtraction placeResult = new PlacesExtraction();
036535fcd179 anteater
jdamerow
parents:
diff changeset
103 placeResult.setType(TextType.TYPE_SUPLINF);
036535fcd179 anteater
jdamerow
parents:
diff changeset
104 placeResult.setPlaceInformation(scientificNames);
036535fcd179 anteater
jdamerow
parents:
diff changeset
105 placeResult.setTextIdx(idx);
036535fcd179 anteater
jdamerow
parents:
diff changeset
106
036535fcd179 anteater
jdamerow
parents:
diff changeset
107 results.add(placeResult);
036535fcd179 anteater
jdamerow
parents:
diff changeset
108 idx++;
036535fcd179 anteater
jdamerow
parents:
diff changeset
109 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
110
036535fcd179 anteater
jdamerow
parents:
diff changeset
111 return results;
036535fcd179 anteater
jdamerow
parents:
diff changeset
112 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
113 }