comparison src/de/mpiwg/anteater/ml/MLController.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:036535fcd179
1 package de.mpiwg.anteater.ml;
2
3 import java.util.ArrayList;
4 import java.util.HashMap;
5 import java.util.List;
6 import java.util.Map;
7
8 import de.mpiwg.anteater.AnteaterConfiguration;
9 import de.mpiwg.anteater.ml.impl.StanfordNLPTextParser;
10 import de.mpiwg.anteater.ml.impl.WekaMLComponent;
11 import de.mpiwg.anteater.ml.preprocessing.DataCreator;
12 import de.mpiwg.anteater.persons.APerson;
13 import de.mpiwg.anteater.persons.PersonsExtraction;
14 import de.mpiwg.anteater.persons.ml.preprocessing.ApplicantDataCreator;
15 import de.mpiwg.anteater.places.Place;
16 import de.mpiwg.anteater.places.PlaceInformation;
17 import de.mpiwg.anteater.places.PlacesExtraction;
18 import de.mpiwg.anteater.places.ml.preprocessing.LocationDataCreator;
19 import de.mpiwg.anteater.results.ApplicantResult;
20 import de.mpiwg.anteater.results.LocationResult;
21 import de.mpiwg.anteater.results.SpeciesScientificResult;
22 import de.mpiwg.anteater.text.TextInformation;
23
24 public class MLController {
25
26 public final static String COMPONENT_NAME = MLController.class.getSimpleName();
27
28 private AnteaterConfiguration configuration;
29
30 public MLController(AnteaterConfiguration configuration) {
31 this.configuration = configuration;
32 }
33
34 public List<ApplicantResult> runApplicantMLComponent(List<TextInformation> infos) {
35 configuration.getLogger().logMessage(COMPONENT_NAME, "Run Machine Learning component...");
36
37 DataCreator dataCreator = new ApplicantDataCreator(configuration);
38
39 List<String> arffFiles = new ArrayList<String>();
40 for (TextInformation info : infos) {
41 String file = dataCreator.createARFFFile(info, new StanfordNLPTextParser());
42 if (file != null)
43 arffFiles.add(file);
44 }
45
46 IMLComponent mlComponent = new WekaMLComponent("Applicant_LADTree.model");
47
48 List<ApplicantResult> mlresults = new ArrayList<ApplicantResult>();
49 for (String arffFile : arffFiles) {
50 List<Double> predictions = mlComponent.run(arffFile);
51 int idx = arffFiles.indexOf(arffFile);
52 TextInformation info = infos.get(idx);
53
54 List<PersonsExtraction> results = info.getPersonsExtractions();
55 Map<APerson, PersonsExtraction> persons = new HashMap<APerson, PersonsExtraction>();
56
57 List<APerson> ps = new ArrayList<APerson>();
58 for (PersonsExtraction r : results) {
59 ps.addAll(r.getPersons());
60 for (APerson p : r.getPersons())
61 persons.put(p, r);
62 }
63
64
65 for (int i = 0; i < predictions.size(); i++) {
66
67 ApplicantResult result = new ApplicantResult();
68
69 result.setFinding(ps.get(i));
70 result.setResult(persons.get(ps.get(i)));
71 result.setTextInfo(info);
72 result.setPrediction(predictions.get(i));
73 mlresults.add(result);
74 }
75 }
76
77 return mlresults;
78 }
79
80 public List<LocationResult> runLocationMLComponent(List<TextInformation> infos, List<SpeciesScientificResult> predictedSpecies, List<ApplicantResult> predictedApplicants) {
81 configuration.getLogger().logMessage(COMPONENT_NAME, "Run Machine Learning component for locations...");
82
83 DataCreator dataCreator = new LocationDataCreator(configuration, predictedSpecies, predictedApplicants);
84
85 List<String> arffFiles = new ArrayList<String>();
86 for (TextInformation info : infos) {
87 String file = dataCreator.createARFFFile(info, new StanfordNLPTextParser());
88 if (file != null)
89 arffFiles.add(file);
90 }
91
92
93 IMLComponent mlComponent = new WekaMLComponent("Location_LMT_moreTraining.model");
94
95 List<LocationResult> mlresults = new ArrayList<LocationResult>();
96 for (String arffFile : arffFiles) {
97 List<Double> predictions = mlComponent.run(arffFile);
98 int idx = arffFiles.indexOf(arffFile);
99 TextInformation info = infos.get(idx);
100
101 List<PlacesExtraction> results = info.getPlacesExtractions();
102 List<PlaceResultMapping> mappings = new ArrayList<MLController.PlaceResultMapping>();
103
104 for (PlacesExtraction r : results) {
105 for (PlaceInformation pi : r.getPlaceInformation()) {
106 for (Place p : pi.getPlaces())
107 mappings.add(new PlaceResultMapping(pi, p, r));
108 }
109 }
110
111
112 for (int i = 0; i < predictions.size(); i++) {
113
114 LocationResult result = new LocationResult();
115 PlaceResultMapping mapping = mappings.get(i);
116
117 result.setFinding(mapping.placeInformation);
118 result.setResult(mapping.placesExtraction);
119 result.setPlace(mapping.place);
120 result.setTextInfo(info);
121 result.setPrediction(predictions.get(i));
122 mlresults.add(result);
123 }
124 }
125
126 return mlresults;
127 }
128
129 class PlaceResultMapping {
130 public PlaceInformation placeInformation;
131 public Place place;
132 public PlacesExtraction placesExtraction;
133
134 public PlaceResultMapping(PlaceInformation placeInformation,
135 Place place, PlacesExtraction placesExtraction) {
136 super();
137 this.placeInformation = placeInformation;
138 this.place = place;
139 this.placesExtraction = placesExtraction;
140 }
141
142
143 }
144 }