Mercurial > hg > anteater
comparison src/de/mpiwg/anteater/ml/MLController.java @ 0:036535fcd179
anteater
author | jdamerow |
---|---|
date | Fri, 14 Sep 2012 10:30:43 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:036535fcd179 |
---|---|
1 package de.mpiwg.anteater.ml; | |
2 | |
3 import java.util.ArrayList; | |
4 import java.util.HashMap; | |
5 import java.util.List; | |
6 import java.util.Map; | |
7 | |
8 import de.mpiwg.anteater.AnteaterConfiguration; | |
9 import de.mpiwg.anteater.ml.impl.StanfordNLPTextParser; | |
10 import de.mpiwg.anteater.ml.impl.WekaMLComponent; | |
11 import de.mpiwg.anteater.ml.preprocessing.DataCreator; | |
12 import de.mpiwg.anteater.persons.APerson; | |
13 import de.mpiwg.anteater.persons.PersonsExtraction; | |
14 import de.mpiwg.anteater.persons.ml.preprocessing.ApplicantDataCreator; | |
15 import de.mpiwg.anteater.places.Place; | |
16 import de.mpiwg.anteater.places.PlaceInformation; | |
17 import de.mpiwg.anteater.places.PlacesExtraction; | |
18 import de.mpiwg.anteater.places.ml.preprocessing.LocationDataCreator; | |
19 import de.mpiwg.anteater.results.ApplicantResult; | |
20 import de.mpiwg.anteater.results.LocationResult; | |
21 import de.mpiwg.anteater.results.SpeciesScientificResult; | |
22 import de.mpiwg.anteater.text.TextInformation; | |
23 | |
24 public class MLController { | |
25 | |
26 public final static String COMPONENT_NAME = MLController.class.getSimpleName(); | |
27 | |
28 private AnteaterConfiguration configuration; | |
29 | |
30 public MLController(AnteaterConfiguration configuration) { | |
31 this.configuration = configuration; | |
32 } | |
33 | |
34 public List<ApplicantResult> runApplicantMLComponent(List<TextInformation> infos) { | |
35 configuration.getLogger().logMessage(COMPONENT_NAME, "Run Machine Learning component..."); | |
36 | |
37 DataCreator dataCreator = new ApplicantDataCreator(configuration); | |
38 | |
39 List<String> arffFiles = new ArrayList<String>(); | |
40 for (TextInformation info : infos) { | |
41 String file = dataCreator.createARFFFile(info, new StanfordNLPTextParser()); | |
42 if (file != null) | |
43 arffFiles.add(file); | |
44 } | |
45 | |
46 IMLComponent mlComponent = new WekaMLComponent("Applicant_LADTree.model"); | |
47 | |
48 List<ApplicantResult> mlresults = new ArrayList<ApplicantResult>(); | |
49 for (String arffFile : arffFiles) { | |
50 List<Double> predictions = mlComponent.run(arffFile); | |
51 int idx = arffFiles.indexOf(arffFile); | |
52 TextInformation info = infos.get(idx); | |
53 | |
54 List<PersonsExtraction> results = info.getPersonsExtractions(); | |
55 Map<APerson, PersonsExtraction> persons = new HashMap<APerson, PersonsExtraction>(); | |
56 | |
57 List<APerson> ps = new ArrayList<APerson>(); | |
58 for (PersonsExtraction r : results) { | |
59 ps.addAll(r.getPersons()); | |
60 for (APerson p : r.getPersons()) | |
61 persons.put(p, r); | |
62 } | |
63 | |
64 | |
65 for (int i = 0; i < predictions.size(); i++) { | |
66 | |
67 ApplicantResult result = new ApplicantResult(); | |
68 | |
69 result.setFinding(ps.get(i)); | |
70 result.setResult(persons.get(ps.get(i))); | |
71 result.setTextInfo(info); | |
72 result.setPrediction(predictions.get(i)); | |
73 mlresults.add(result); | |
74 } | |
75 } | |
76 | |
77 return mlresults; | |
78 } | |
79 | |
80 public List<LocationResult> runLocationMLComponent(List<TextInformation> infos, List<SpeciesScientificResult> predictedSpecies, List<ApplicantResult> predictedApplicants) { | |
81 configuration.getLogger().logMessage(COMPONENT_NAME, "Run Machine Learning component for locations..."); | |
82 | |
83 DataCreator dataCreator = new LocationDataCreator(configuration, predictedSpecies, predictedApplicants); | |
84 | |
85 List<String> arffFiles = new ArrayList<String>(); | |
86 for (TextInformation info : infos) { | |
87 String file = dataCreator.createARFFFile(info, new StanfordNLPTextParser()); | |
88 if (file != null) | |
89 arffFiles.add(file); | |
90 } | |
91 | |
92 | |
93 IMLComponent mlComponent = new WekaMLComponent("Location_LMT_moreTraining.model"); | |
94 | |
95 List<LocationResult> mlresults = new ArrayList<LocationResult>(); | |
96 for (String arffFile : arffFiles) { | |
97 List<Double> predictions = mlComponent.run(arffFile); | |
98 int idx = arffFiles.indexOf(arffFile); | |
99 TextInformation info = infos.get(idx); | |
100 | |
101 List<PlacesExtraction> results = info.getPlacesExtractions(); | |
102 List<PlaceResultMapping> mappings = new ArrayList<MLController.PlaceResultMapping>(); | |
103 | |
104 for (PlacesExtraction r : results) { | |
105 for (PlaceInformation pi : r.getPlaceInformation()) { | |
106 for (Place p : pi.getPlaces()) | |
107 mappings.add(new PlaceResultMapping(pi, p, r)); | |
108 } | |
109 } | |
110 | |
111 | |
112 for (int i = 0; i < predictions.size(); i++) { | |
113 | |
114 LocationResult result = new LocationResult(); | |
115 PlaceResultMapping mapping = mappings.get(i); | |
116 | |
117 result.setFinding(mapping.placeInformation); | |
118 result.setResult(mapping.placesExtraction); | |
119 result.setPlace(mapping.place); | |
120 result.setTextInfo(info); | |
121 result.setPrediction(predictions.get(i)); | |
122 mlresults.add(result); | |
123 } | |
124 } | |
125 | |
126 return mlresults; | |
127 } | |
128 | |
129 class PlaceResultMapping { | |
130 public PlaceInformation placeInformation; | |
131 public Place place; | |
132 public PlacesExtraction placesExtraction; | |
133 | |
134 public PlaceResultMapping(PlaceInformation placeInformation, | |
135 Place place, PlacesExtraction placesExtraction) { | |
136 super(); | |
137 this.placeInformation = placeInformation; | |
138 this.place = place; | |
139 this.placesExtraction = placesExtraction; | |
140 } | |
141 | |
142 | |
143 } | |
144 } |