0
|
1 package de.mpiwg.anteater.ml;
|
|
2
|
|
3 import java.util.ArrayList;
|
|
4 import java.util.HashMap;
|
|
5 import java.util.List;
|
|
6 import java.util.Map;
|
|
7
|
|
8 import de.mpiwg.anteater.AnteaterConfiguration;
|
|
9 import de.mpiwg.anteater.ml.impl.StanfordNLPTextParser;
|
|
10 import de.mpiwg.anteater.ml.impl.WekaMLComponent;
|
|
11 import de.mpiwg.anteater.ml.preprocessing.DataCreator;
|
|
12 import de.mpiwg.anteater.persons.APerson;
|
|
13 import de.mpiwg.anteater.persons.PersonsExtraction;
|
|
14 import de.mpiwg.anteater.persons.ml.preprocessing.ApplicantDataCreator;
|
|
15 import de.mpiwg.anteater.places.Place;
|
|
16 import de.mpiwg.anteater.places.PlaceInformation;
|
|
17 import de.mpiwg.anteater.places.PlacesExtraction;
|
|
18 import de.mpiwg.anteater.places.ml.preprocessing.LocationDataCreator;
|
|
19 import de.mpiwg.anteater.results.ApplicantResult;
|
|
20 import de.mpiwg.anteater.results.LocationResult;
|
|
21 import de.mpiwg.anteater.results.SpeciesScientificResult;
|
|
22 import de.mpiwg.anteater.text.TextInformation;
|
|
23
|
|
24 public class MLController {
|
|
25
|
|
26 public final static String COMPONENT_NAME = MLController.class.getSimpleName();
|
|
27
|
|
28 private AnteaterConfiguration configuration;
|
|
29
|
|
30 public MLController(AnteaterConfiguration configuration) {
|
|
31 this.configuration = configuration;
|
|
32 }
|
|
33
|
|
34 public List<ApplicantResult> runApplicantMLComponent(List<TextInformation> infos) {
|
|
35 configuration.getLogger().logMessage(COMPONENT_NAME, "Run Machine Learning component...");
|
|
36
|
|
37 DataCreator dataCreator = new ApplicantDataCreator(configuration);
|
|
38
|
|
39 List<String> arffFiles = new ArrayList<String>();
|
|
40 for (TextInformation info : infos) {
|
|
41 String file = dataCreator.createARFFFile(info, new StanfordNLPTextParser());
|
|
42 if (file != null)
|
|
43 arffFiles.add(file);
|
|
44 }
|
|
45
|
|
46 IMLComponent mlComponent = new WekaMLComponent("Applicant_LADTree.model");
|
|
47
|
|
48 List<ApplicantResult> mlresults = new ArrayList<ApplicantResult>();
|
|
49 for (String arffFile : arffFiles) {
|
|
50 List<Double> predictions = mlComponent.run(arffFile);
|
|
51 int idx = arffFiles.indexOf(arffFile);
|
|
52 TextInformation info = infos.get(idx);
|
|
53
|
|
54 List<PersonsExtraction> results = info.getPersonsExtractions();
|
|
55 Map<APerson, PersonsExtraction> persons = new HashMap<APerson, PersonsExtraction>();
|
|
56
|
|
57 List<APerson> ps = new ArrayList<APerson>();
|
|
58 for (PersonsExtraction r : results) {
|
|
59 ps.addAll(r.getPersons());
|
|
60 for (APerson p : r.getPersons())
|
|
61 persons.put(p, r);
|
|
62 }
|
|
63
|
|
64
|
|
65 for (int i = 0; i < predictions.size(); i++) {
|
|
66
|
|
67 ApplicantResult result = new ApplicantResult();
|
|
68
|
|
69 result.setFinding(ps.get(i));
|
|
70 result.setResult(persons.get(ps.get(i)));
|
|
71 result.setTextInfo(info);
|
|
72 result.setPrediction(predictions.get(i));
|
|
73 mlresults.add(result);
|
|
74 }
|
|
75 }
|
|
76
|
|
77 return mlresults;
|
|
78 }
|
|
79
|
|
80 public List<LocationResult> runLocationMLComponent(List<TextInformation> infos, List<SpeciesScientificResult> predictedSpecies, List<ApplicantResult> predictedApplicants) {
|
|
81 configuration.getLogger().logMessage(COMPONENT_NAME, "Run Machine Learning component for locations...");
|
|
82
|
|
83 DataCreator dataCreator = new LocationDataCreator(configuration, predictedSpecies, predictedApplicants);
|
|
84
|
|
85 List<String> arffFiles = new ArrayList<String>();
|
|
86 for (TextInformation info : infos) {
|
|
87 String file = dataCreator.createARFFFile(info, new StanfordNLPTextParser());
|
|
88 if (file != null)
|
|
89 arffFiles.add(file);
|
|
90 }
|
|
91
|
|
92
|
|
93 IMLComponent mlComponent = new WekaMLComponent("Location_LMT_moreTraining.model");
|
|
94
|
|
95 List<LocationResult> mlresults = new ArrayList<LocationResult>();
|
|
96 for (String arffFile : arffFiles) {
|
|
97 List<Double> predictions = mlComponent.run(arffFile);
|
|
98 int idx = arffFiles.indexOf(arffFile);
|
|
99 TextInformation info = infos.get(idx);
|
|
100
|
|
101 List<PlacesExtraction> results = info.getPlacesExtractions();
|
|
102 List<PlaceResultMapping> mappings = new ArrayList<MLController.PlaceResultMapping>();
|
|
103
|
|
104 for (PlacesExtraction r : results) {
|
|
105 for (PlaceInformation pi : r.getPlaceInformation()) {
|
|
106 for (Place p : pi.getPlaces())
|
|
107 mappings.add(new PlaceResultMapping(pi, p, r));
|
|
108 }
|
|
109 }
|
|
110
|
|
111
|
|
112 for (int i = 0; i < predictions.size(); i++) {
|
|
113
|
|
114 LocationResult result = new LocationResult();
|
|
115 PlaceResultMapping mapping = mappings.get(i);
|
|
116
|
|
117 result.setFinding(mapping.placeInformation);
|
|
118 result.setResult(mapping.placesExtraction);
|
|
119 result.setPlace(mapping.place);
|
|
120 result.setTextInfo(info);
|
|
121 result.setPrediction(predictions.get(i));
|
|
122 mlresults.add(result);
|
|
123 }
|
|
124 }
|
|
125
|
|
126 return mlresults;
|
|
127 }
|
|
128
|
|
129 class PlaceResultMapping {
|
|
130 public PlaceInformation placeInformation;
|
|
131 public Place place;
|
|
132 public PlacesExtraction placesExtraction;
|
|
133
|
|
134 public PlaceResultMapping(PlaceInformation placeInformation,
|
|
135 Place place, PlacesExtraction placesExtraction) {
|
|
136 super();
|
|
137 this.placeInformation = placeInformation;
|
|
138 this.place = place;
|
|
139 this.placesExtraction = placesExtraction;
|
|
140 }
|
|
141
|
|
142
|
|
143 }
|
|
144 }
|