annotate src/de/mpiwg/anteater/ml/MLController.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
1 package de.mpiwg.anteater.ml;
036535fcd179 anteater
jdamerow
parents:
diff changeset
2
036535fcd179 anteater
jdamerow
parents:
diff changeset
3 import java.util.ArrayList;
036535fcd179 anteater
jdamerow
parents:
diff changeset
4 import java.util.HashMap;
036535fcd179 anteater
jdamerow
parents:
diff changeset
5 import java.util.List;
036535fcd179 anteater
jdamerow
parents:
diff changeset
6 import java.util.Map;
036535fcd179 anteater
jdamerow
parents:
diff changeset
7
036535fcd179 anteater
jdamerow
parents:
diff changeset
8 import de.mpiwg.anteater.AnteaterConfiguration;
036535fcd179 anteater
jdamerow
parents:
diff changeset
9 import de.mpiwg.anteater.ml.impl.StanfordNLPTextParser;
036535fcd179 anteater
jdamerow
parents:
diff changeset
10 import de.mpiwg.anteater.ml.impl.WekaMLComponent;
036535fcd179 anteater
jdamerow
parents:
diff changeset
11 import de.mpiwg.anteater.ml.preprocessing.DataCreator;
036535fcd179 anteater
jdamerow
parents:
diff changeset
12 import de.mpiwg.anteater.persons.APerson;
036535fcd179 anteater
jdamerow
parents:
diff changeset
13 import de.mpiwg.anteater.persons.PersonsExtraction;
036535fcd179 anteater
jdamerow
parents:
diff changeset
14 import de.mpiwg.anteater.persons.ml.preprocessing.ApplicantDataCreator;
036535fcd179 anteater
jdamerow
parents:
diff changeset
15 import de.mpiwg.anteater.places.Place;
036535fcd179 anteater
jdamerow
parents:
diff changeset
16 import de.mpiwg.anteater.places.PlaceInformation;
036535fcd179 anteater
jdamerow
parents:
diff changeset
17 import de.mpiwg.anteater.places.PlacesExtraction;
036535fcd179 anteater
jdamerow
parents:
diff changeset
18 import de.mpiwg.anteater.places.ml.preprocessing.LocationDataCreator;
036535fcd179 anteater
jdamerow
parents:
diff changeset
19 import de.mpiwg.anteater.results.ApplicantResult;
036535fcd179 anteater
jdamerow
parents:
diff changeset
20 import de.mpiwg.anteater.results.LocationResult;
036535fcd179 anteater
jdamerow
parents:
diff changeset
21 import de.mpiwg.anteater.results.SpeciesScientificResult;
036535fcd179 anteater
jdamerow
parents:
diff changeset
22 import de.mpiwg.anteater.text.TextInformation;
036535fcd179 anteater
jdamerow
parents:
diff changeset
23
036535fcd179 anteater
jdamerow
parents:
diff changeset
24 public class MLController {
036535fcd179 anteater
jdamerow
parents:
diff changeset
25
036535fcd179 anteater
jdamerow
parents:
diff changeset
26 public final static String COMPONENT_NAME = MLController.class.getSimpleName();
036535fcd179 anteater
jdamerow
parents:
diff changeset
27
036535fcd179 anteater
jdamerow
parents:
diff changeset
28 private AnteaterConfiguration configuration;
036535fcd179 anteater
jdamerow
parents:
diff changeset
29
036535fcd179 anteater
jdamerow
parents:
diff changeset
30 public MLController(AnteaterConfiguration configuration) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
31 this.configuration = configuration;
036535fcd179 anteater
jdamerow
parents:
diff changeset
32 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
33
036535fcd179 anteater
jdamerow
parents:
diff changeset
34 public List<ApplicantResult> runApplicantMLComponent(List<TextInformation> infos) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
35 configuration.getLogger().logMessage(COMPONENT_NAME, "Run Machine Learning component...");
036535fcd179 anteater
jdamerow
parents:
diff changeset
36
036535fcd179 anteater
jdamerow
parents:
diff changeset
37 DataCreator dataCreator = new ApplicantDataCreator(configuration);
036535fcd179 anteater
jdamerow
parents:
diff changeset
38
036535fcd179 anteater
jdamerow
parents:
diff changeset
39 List<String> arffFiles = new ArrayList<String>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
40 for (TextInformation info : infos) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
41 String file = dataCreator.createARFFFile(info, new StanfordNLPTextParser());
036535fcd179 anteater
jdamerow
parents:
diff changeset
42 if (file != null)
036535fcd179 anteater
jdamerow
parents:
diff changeset
43 arffFiles.add(file);
036535fcd179 anteater
jdamerow
parents:
diff changeset
44 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
45
036535fcd179 anteater
jdamerow
parents:
diff changeset
46 IMLComponent mlComponent = new WekaMLComponent("Applicant_LADTree.model");
036535fcd179 anteater
jdamerow
parents:
diff changeset
47
036535fcd179 anteater
jdamerow
parents:
diff changeset
48 List<ApplicantResult> mlresults = new ArrayList<ApplicantResult>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
49 for (String arffFile : arffFiles) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
50 List<Double> predictions = mlComponent.run(arffFile);
036535fcd179 anteater
jdamerow
parents:
diff changeset
51 int idx = arffFiles.indexOf(arffFile);
036535fcd179 anteater
jdamerow
parents:
diff changeset
52 TextInformation info = infos.get(idx);
036535fcd179 anteater
jdamerow
parents:
diff changeset
53
036535fcd179 anteater
jdamerow
parents:
diff changeset
54 List<PersonsExtraction> results = info.getPersonsExtractions();
036535fcd179 anteater
jdamerow
parents:
diff changeset
55 Map<APerson, PersonsExtraction> persons = new HashMap<APerson, PersonsExtraction>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
56
036535fcd179 anteater
jdamerow
parents:
diff changeset
57 List<APerson> ps = new ArrayList<APerson>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
58 for (PersonsExtraction r : results) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
59 ps.addAll(r.getPersons());
036535fcd179 anteater
jdamerow
parents:
diff changeset
60 for (APerson p : r.getPersons())
036535fcd179 anteater
jdamerow
parents:
diff changeset
61 persons.put(p, r);
036535fcd179 anteater
jdamerow
parents:
diff changeset
62 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
63
036535fcd179 anteater
jdamerow
parents:
diff changeset
64
036535fcd179 anteater
jdamerow
parents:
diff changeset
65 for (int i = 0; i < predictions.size(); i++) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
66
036535fcd179 anteater
jdamerow
parents:
diff changeset
67 ApplicantResult result = new ApplicantResult();
036535fcd179 anteater
jdamerow
parents:
diff changeset
68
036535fcd179 anteater
jdamerow
parents:
diff changeset
69 result.setFinding(ps.get(i));
036535fcd179 anteater
jdamerow
parents:
diff changeset
70 result.setResult(persons.get(ps.get(i)));
036535fcd179 anteater
jdamerow
parents:
diff changeset
71 result.setTextInfo(info);
036535fcd179 anteater
jdamerow
parents:
diff changeset
72 result.setPrediction(predictions.get(i));
036535fcd179 anteater
jdamerow
parents:
diff changeset
73 mlresults.add(result);
036535fcd179 anteater
jdamerow
parents:
diff changeset
74 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
75 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
76
036535fcd179 anteater
jdamerow
parents:
diff changeset
77 return mlresults;
036535fcd179 anteater
jdamerow
parents:
diff changeset
78 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
79
036535fcd179 anteater
jdamerow
parents:
diff changeset
80 public List<LocationResult> runLocationMLComponent(List<TextInformation> infos, List<SpeciesScientificResult> predictedSpecies, List<ApplicantResult> predictedApplicants) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
81 configuration.getLogger().logMessage(COMPONENT_NAME, "Run Machine Learning component for locations...");
036535fcd179 anteater
jdamerow
parents:
diff changeset
82
036535fcd179 anteater
jdamerow
parents:
diff changeset
83 DataCreator dataCreator = new LocationDataCreator(configuration, predictedSpecies, predictedApplicants);
036535fcd179 anteater
jdamerow
parents:
diff changeset
84
036535fcd179 anteater
jdamerow
parents:
diff changeset
85 List<String> arffFiles = new ArrayList<String>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
86 for (TextInformation info : infos) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
87 String file = dataCreator.createARFFFile(info, new StanfordNLPTextParser());
036535fcd179 anteater
jdamerow
parents:
diff changeset
88 if (file != null)
036535fcd179 anteater
jdamerow
parents:
diff changeset
89 arffFiles.add(file);
036535fcd179 anteater
jdamerow
parents:
diff changeset
90 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
91
036535fcd179 anteater
jdamerow
parents:
diff changeset
92
036535fcd179 anteater
jdamerow
parents:
diff changeset
93 IMLComponent mlComponent = new WekaMLComponent("Location_LMT_moreTraining.model");
036535fcd179 anteater
jdamerow
parents:
diff changeset
94
036535fcd179 anteater
jdamerow
parents:
diff changeset
95 List<LocationResult> mlresults = new ArrayList<LocationResult>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
96 for (String arffFile : arffFiles) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
97 List<Double> predictions = mlComponent.run(arffFile);
036535fcd179 anteater
jdamerow
parents:
diff changeset
98 int idx = arffFiles.indexOf(arffFile);
036535fcd179 anteater
jdamerow
parents:
diff changeset
99 TextInformation info = infos.get(idx);
036535fcd179 anteater
jdamerow
parents:
diff changeset
100
036535fcd179 anteater
jdamerow
parents:
diff changeset
101 List<PlacesExtraction> results = info.getPlacesExtractions();
036535fcd179 anteater
jdamerow
parents:
diff changeset
102 List<PlaceResultMapping> mappings = new ArrayList<MLController.PlaceResultMapping>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
103
036535fcd179 anteater
jdamerow
parents:
diff changeset
104 for (PlacesExtraction r : results) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
105 for (PlaceInformation pi : r.getPlaceInformation()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
106 for (Place p : pi.getPlaces())
036535fcd179 anteater
jdamerow
parents:
diff changeset
107 mappings.add(new PlaceResultMapping(pi, p, r));
036535fcd179 anteater
jdamerow
parents:
diff changeset
108 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
109 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
110
036535fcd179 anteater
jdamerow
parents:
diff changeset
111
036535fcd179 anteater
jdamerow
parents:
diff changeset
112 for (int i = 0; i < predictions.size(); i++) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
113
036535fcd179 anteater
jdamerow
parents:
diff changeset
114 LocationResult result = new LocationResult();
036535fcd179 anteater
jdamerow
parents:
diff changeset
115 PlaceResultMapping mapping = mappings.get(i);
036535fcd179 anteater
jdamerow
parents:
diff changeset
116
036535fcd179 anteater
jdamerow
parents:
diff changeset
117 result.setFinding(mapping.placeInformation);
036535fcd179 anteater
jdamerow
parents:
diff changeset
118 result.setResult(mapping.placesExtraction);
036535fcd179 anteater
jdamerow
parents:
diff changeset
119 result.setPlace(mapping.place);
036535fcd179 anteater
jdamerow
parents:
diff changeset
120 result.setTextInfo(info);
036535fcd179 anteater
jdamerow
parents:
diff changeset
121 result.setPrediction(predictions.get(i));
036535fcd179 anteater
jdamerow
parents:
diff changeset
122 mlresults.add(result);
036535fcd179 anteater
jdamerow
parents:
diff changeset
123 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
124 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
125
036535fcd179 anteater
jdamerow
parents:
diff changeset
126 return mlresults;
036535fcd179 anteater
jdamerow
parents:
diff changeset
127 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
128
036535fcd179 anteater
jdamerow
parents:
diff changeset
129 class PlaceResultMapping {
036535fcd179 anteater
jdamerow
parents:
diff changeset
130 public PlaceInformation placeInformation;
036535fcd179 anteater
jdamerow
parents:
diff changeset
131 public Place place;
036535fcd179 anteater
jdamerow
parents:
diff changeset
132 public PlacesExtraction placesExtraction;
036535fcd179 anteater
jdamerow
parents:
diff changeset
133
036535fcd179 anteater
jdamerow
parents:
diff changeset
134 public PlaceResultMapping(PlaceInformation placeInformation,
036535fcd179 anteater
jdamerow
parents:
diff changeset
135 Place place, PlacesExtraction placesExtraction) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
136 super();
036535fcd179 anteater
jdamerow
parents:
diff changeset
137 this.placeInformation = placeInformation;
036535fcd179 anteater
jdamerow
parents:
diff changeset
138 this.place = place;
036535fcd179 anteater
jdamerow
parents:
diff changeset
139 this.placesExtraction = placesExtraction;
036535fcd179 anteater
jdamerow
parents:
diff changeset
140 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
141
036535fcd179 anteater
jdamerow
parents:
diff changeset
142
036535fcd179 anteater
jdamerow
parents:
diff changeset
143 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
144 }