annotate src/de/mpiwg/anteater/AnteaterController.java @ 4:dcc35f89dce3

include linneaus findings
author jdamerow
date Thu, 25 Oct 2012 15:25:08 -0700
parents ae96e4bc7fb2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
1 package de.mpiwg.anteater;
036535fcd179 anteater
jdamerow
parents:
diff changeset
2
036535fcd179 anteater
jdamerow
parents:
diff changeset
3 import java.io.File;
036535fcd179 anteater
jdamerow
parents:
diff changeset
4 import java.io.FilenameFilter;
036535fcd179 anteater
jdamerow
parents:
diff changeset
5 import java.util.ArrayList;
036535fcd179 anteater
jdamerow
parents:
diff changeset
6 import java.util.List;
036535fcd179 anteater
jdamerow
parents:
diff changeset
7
036535fcd179 anteater
jdamerow
parents:
diff changeset
8 import de.mpiwg.anteater.events.EventController;
036535fcd179 anteater
jdamerow
parents:
diff changeset
9 import de.mpiwg.anteater.persons.PersonFinderController;
036535fcd179 anteater
jdamerow
parents:
diff changeset
10 import de.mpiwg.anteater.persons.PersonsExtraction;
036535fcd179 anteater
jdamerow
parents:
diff changeset
11 import de.mpiwg.anteater.places.PlaceFinderController;
036535fcd179 anteater
jdamerow
parents:
diff changeset
12 import de.mpiwg.anteater.places.PlacesExtraction;
036535fcd179 anteater
jdamerow
parents:
diff changeset
13 import de.mpiwg.anteater.results.ResultController;
036535fcd179 anteater
jdamerow
parents:
diff changeset
14 import de.mpiwg.anteater.results.ResultsCarrier;
3
ae96e4bc7fb2 save found species to analysis files
jdamerow
parents: 0
diff changeset
15 import de.mpiwg.anteater.species.common.CommonNameFindController;
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
16 import de.mpiwg.anteater.species.scientific.ScientificNameFindController;
036535fcd179 anteater
jdamerow
parents:
diff changeset
17 import de.mpiwg.anteater.species.scientific.ScientificNamesExtraction;
036535fcd179 anteater
jdamerow
parents:
diff changeset
18 import de.mpiwg.anteater.text.TextInformation;
036535fcd179 anteater
jdamerow
parents:
diff changeset
19 import de.mpiwg.anteater.text.TextManager;
036535fcd179 anteater
jdamerow
parents:
diff changeset
20
036535fcd179 anteater
jdamerow
parents:
diff changeset
21 public class AnteaterController {
036535fcd179 anteater
jdamerow
parents:
diff changeset
22
036535fcd179 anteater
jdamerow
parents:
diff changeset
23 public final static String COMPONENT_NAME = AnteaterController.class.getSimpleName();
036535fcd179 anteater
jdamerow
parents:
diff changeset
24
036535fcd179 anteater
jdamerow
parents:
diff changeset
25 public void runAnteater(AnteaterConfiguration configuration) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
26 configuration.getLogger().logMessage(COMPONENT_NAME, "Retrieving files...");
036535fcd179 anteater
jdamerow
parents:
diff changeset
27
036535fcd179 anteater
jdamerow
parents:
diff changeset
28 File folder = new File(configuration.getPathToTexts());
036535fcd179 anteater
jdamerow
parents:
diff changeset
29 File[] files = folder.listFiles(new FilenameFilter() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
30
036535fcd179 anteater
jdamerow
parents:
diff changeset
31 @Override
036535fcd179 anteater
jdamerow
parents:
diff changeset
32 public boolean accept(File arg0, String arg1) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
33 File child = new File(arg0.getAbsolutePath() + File.separator + arg1);
036535fcd179 anteater
jdamerow
parents:
diff changeset
34 if (child.isFile() && arg1.endsWith(".xml"))
036535fcd179 anteater
jdamerow
parents:
diff changeset
35 return true;
036535fcd179 anteater
jdamerow
parents:
diff changeset
36 return false;
036535fcd179 anteater
jdamerow
parents:
diff changeset
37 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
38 });
036535fcd179 anteater
jdamerow
parents:
diff changeset
39
036535fcd179 anteater
jdamerow
parents:
diff changeset
40 ScientificNameFindController scienceNameFindController = new ScientificNameFindController(configuration);
036535fcd179 anteater
jdamerow
parents:
diff changeset
41 PlaceFinderController placesController = new PlaceFinderController(configuration);
036535fcd179 anteater
jdamerow
parents:
diff changeset
42 PersonFinderController personsController = new PersonFinderController(configuration);
3
ae96e4bc7fb2 save found species to analysis files
jdamerow
parents: 0
diff changeset
43 CommonNameFindController commonNameFindController = new CommonNameFindController(configuration);
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
44
036535fcd179 anteater
jdamerow
parents:
diff changeset
45 List<TextInformation> textInformations = new ArrayList<TextInformation>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
46 TextManager textManager = new TextManager(configuration);
036535fcd179 anteater
jdamerow
parents:
diff changeset
47
036535fcd179 anteater
jdamerow
parents:
diff changeset
48 for (File f : files) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
49 TextInformation info = textManager.createTextInformations(f);
036535fcd179 anteater
jdamerow
parents:
diff changeset
50 textInformations.add(info);
036535fcd179 anteater
jdamerow
parents:
diff changeset
51 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
52
036535fcd179 anteater
jdamerow
parents:
diff changeset
53
036535fcd179 anteater
jdamerow
parents:
diff changeset
54 for (TextInformation info : textInformations) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
55 configuration.getLogger().logMessage(COMPONENT_NAME, "Working on file: " + info.getFilepath());
036535fcd179 anteater
jdamerow
parents:
diff changeset
56 // get scientific names
036535fcd179 anteater
jdamerow
parents:
diff changeset
57 List<ScientificNamesExtraction> scienNameResults = scienceNameFindController.findScientificNamesInXML(info);
036535fcd179 anteater
jdamerow
parents:
diff changeset
58 info.setScientificNamesExtractions(scienNameResults);
036535fcd179 anteater
jdamerow
parents:
diff changeset
59
3
ae96e4bc7fb2 save found species to analysis files
jdamerow
parents: 0
diff changeset
60 //get common names and scientiric
4
dcc35f89dce3 include linneaus findings
jdamerow
parents: 3
diff changeset
61 List<ScientificNamesExtraction> commonNamesResults = commonNameFindController.findCommonNamesInXML(info);
dcc35f89dce3 include linneaus findings
jdamerow
parents: 3
diff changeset
62 info.getScientificNamesExtractions().addAll(commonNamesResults);
3
ae96e4bc7fb2 save found species to analysis files
jdamerow
parents: 0
diff changeset
63
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
64 // get places
036535fcd179 anteater
jdamerow
parents:
diff changeset
65 List<PlacesExtraction> placesResults = placesController.findPlacesInXML(info);
036535fcd179 anteater
jdamerow
parents:
diff changeset
66 info.setPlacesExtractions(placesResults);
036535fcd179 anteater
jdamerow
parents:
diff changeset
67
036535fcd179 anteater
jdamerow
parents:
diff changeset
68 // get persons
036535fcd179 anteater
jdamerow
parents:
diff changeset
69 List<PersonsExtraction> personsResults = personsController.findPersonsInXML(info);
036535fcd179 anteater
jdamerow
parents:
diff changeset
70 info.setPersonsExtractions(personsResults);
036535fcd179 anteater
jdamerow
parents:
diff changeset
71
036535fcd179 anteater
jdamerow
parents:
diff changeset
72 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
73
036535fcd179 anteater
jdamerow
parents:
diff changeset
74 // retrieve and save results
036535fcd179 anteater
jdamerow
parents:
diff changeset
75 ResultController resultController = new ResultController(configuration);
036535fcd179 anteater
jdamerow
parents:
diff changeset
76 List<ResultsCarrier> resultCarriers = resultController.saveResults(textInformations);
036535fcd179 anteater
jdamerow
parents:
diff changeset
77
036535fcd179 anteater
jdamerow
parents:
diff changeset
78 EventController eventController = new EventController(configuration);
036535fcd179 anteater
jdamerow
parents:
diff changeset
79 eventController.createEvents(resultCarriers);
036535fcd179 anteater
jdamerow
parents:
diff changeset
80 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
81
036535fcd179 anteater
jdamerow
parents:
diff changeset
82 }