# HG changeset patch
# User jdamerow
# Date 1350940874 25200
# Node ID ae96e4bc7fb2dd289ff5026ff6892f376441fe3e
# Parent 1c2b4f5e2c0592a298847d8faeb2efa80e02478a
save found species to analysis files
diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 analysis/00-18565.xml
--- a/analysis/00-18565.xml Mon Oct 22 13:46:54 2012 -0700
+++ b/analysis/00-18565.xml Mon Oct 22 14:21:14 2012 -0700
@@ -85,6 +85,8 @@
+
+
diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 analysis/01-14522.xml
--- a/analysis/01-14522.xml Mon Oct 22 13:46:54 2012 -0700
+++ b/analysis/01-14522.xml Mon Oct 22 14:21:14 2012 -0700
@@ -81,6 +81,22 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 analysis/01-19062.xml
--- a/analysis/01-19062.xml Mon Oct 22 13:46:54 2012 -0700
+++ b/analysis/01-19062.xml Mon Oct 22 14:21:14 2012 -0700
@@ -89,6 +89,29 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 analysis/2010-23822.xml
--- a/analysis/2010-23822.xml Mon Oct 22 13:46:54 2012 -0700
+++ b/analysis/2010-23822.xml Mon Oct 22 14:21:14 2012 -0700
@@ -132,6 +132,44 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 src/de/mpiwg/anteater/AnteaterController.java
--- a/src/de/mpiwg/anteater/AnteaterController.java Mon Oct 22 13:46:54 2012 -0700
+++ b/src/de/mpiwg/anteater/AnteaterController.java Mon Oct 22 14:21:14 2012 -0700
@@ -12,6 +12,8 @@
import de.mpiwg.anteater.places.PlacesExtraction;
import de.mpiwg.anteater.results.ResultController;
import de.mpiwg.anteater.results.ResultsCarrier;
+import de.mpiwg.anteater.species.common.CommonNameFindController;
+import de.mpiwg.anteater.species.common.CommonNamesExtraction;
import de.mpiwg.anteater.species.scientific.ScientificNameFindController;
import de.mpiwg.anteater.species.scientific.ScientificNamesExtraction;
import de.mpiwg.anteater.text.TextInformation;
@@ -39,6 +41,7 @@
ScientificNameFindController scienceNameFindController = new ScientificNameFindController(configuration);
PlaceFinderController placesController = new PlaceFinderController(configuration);
PersonFinderController personsController = new PersonFinderController(configuration);
+ CommonNameFindController commonNameFindController = new CommonNameFindController(configuration);
List textInformations = new ArrayList();
TextManager textManager = new TextManager(configuration);
@@ -55,6 +58,10 @@
List scienNameResults = scienceNameFindController.findScientificNamesInXML(info);
info.setScientificNamesExtractions(scienNameResults);
+ //get common names and scientiric
+ List commonNamesResults = commonNameFindController.findCommonNamesInXML(info);
+ info.setCommonNamesExtractions(commonNamesResults);
+
// get places
List placesResults = placesController.findPlacesInXML(info);
info.setPlacesExtractions(placesResults);
diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 src/de/mpiwg/anteater/species/common/CommonNameFindController.java
--- a/src/de/mpiwg/anteater/species/common/CommonNameFindController.java Mon Oct 22 13:46:54 2012 -0700
+++ b/src/de/mpiwg/anteater/species/common/CommonNameFindController.java Mon Oct 22 14:21:14 2012 -0700
@@ -5,8 +5,12 @@
import java.util.List;
import de.mpiwg.anteater.AnteaterConfiguration;
+import de.mpiwg.anteater.species.common.impl.LinnaeusNameFinder;
+import de.mpiwg.anteater.species.scientific.IScientificNamesFinder;
import de.mpiwg.anteater.species.scientific.ScientificNamesExtraction;
+import de.mpiwg.anteater.species.scientific.impl.GNRDNameFinder;
import de.mpiwg.anteater.text.TextInformation;
+import de.mpiwg.anteater.text.TextPart;
import de.mpiwg.anteater.xml.impl.AnalysisXMLManager;
public class CommonNameFindController {
@@ -19,7 +23,7 @@
this.configuration = configuration;
}
- public List findCommonNamesInXML(TextInformation info) {
+ public List findCommonNamesInXML(TextInformation info) {
List results = new ArrayList();
List summaryAnalysisResults = new ArrayList();
List supplinfAnalysisResults = new ArrayList();
@@ -40,6 +44,40 @@
configuration.getLogger().logMessage("found " + supplinfAnalysisResults.size() + " result(s).");
}
+ ICommonNameFinder nameFinder = new LinnaeusNameFinder(configuration.getLogger());
+
+ // if there are no results for summaries, ask GNRD name finding service.
+ if (summaryAnalysisResults.size() == 0) {
+ configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for summaries, so will ask LinnaeusNameFinder.");
+
+ for (TextPart sum : info.getSummaries()) {
+ String sumResult = nameFinder.findCommonNames(sum.getText());
+ if (sumResult != null) {
+ summaryAnalysisResults.add(sumResult);
+
+ // if there is an analysis folder, add result to analysis file
+ if (analysisManager != null)
+ analysisManager.addSummaryCommonNamesResult(sumResult);
+ }
+ }
+ }
+
+ // if there are no results for supplementary information, ask GNRD name fining service
+ if (supplinfAnalysisResults.size() == 0) {
+ configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for supplementary information, so will ask LinnaeusNameFinder.");
+
+ for (TextPart sInf : info.getSupplInfos()) {
+ String supinfResult = nameFinder.findCommonNames(sInf.getText());
+ if (supinfResult != null) {
+ supplinfAnalysisResults.add(supinfResult);
+
+ // if there is an analysis folder, add result to analysis file
+ if (analysisManager != null)
+ analysisManager.addSupplInfCommonNamesResult(supinfResult);
+ }
+ }
+ }
+
return null;
}
diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 src/de/mpiwg/anteater/species/common/impl/LinnaeusNameFinder.java
--- a/src/de/mpiwg/anteater/species/common/impl/LinnaeusNameFinder.java Mon Oct 22 13:46:54 2012 -0700
+++ b/src/de/mpiwg/anteater/species/common/impl/LinnaeusNameFinder.java Mon Oct 22 14:21:14 2012 -0700
@@ -2,12 +2,11 @@
import java.io.InputStream;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.logging.Logger;
-import martin.common.ArgParser;
+import org.apache.commons.lang3.StringEscapeUtils;
+
import uk.ac.man.documentparser.dataholders.Document;
import uk.ac.man.entitytagger.Mention;
import uk.ac.man.entitytagger.doc.TaggedDocument;
@@ -48,9 +47,20 @@
Document doc = new Document("id", "title", "", text, text, Document.Text_raw_type.TEXT, "", null, Document.Type.OTHER, null, "", "", "", "", null);
TaggedDocument tagged = MatchOperations.matchDocument(matcher, doc);
List species = tagged.getAllMatches();
- for (Mention s : species)
- System.out.println("found " + s.getMostProbableID() + ": " + s.getText() + " at " + s.getStart());
- return null;
+ StringBuffer sb = new StringBuffer();
+ sb.append("");
+
+ for (Mention s : species) {
+ sb.append("");
+ }
+
+ sb.append("");
+ System.out.println(sb.toString());
+ return sb.toString();
}
public Postprocessor getPostprocessor(Map comments,
diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 src/de/mpiwg/anteater/text/TextInformation.java
--- a/src/de/mpiwg/anteater/text/TextInformation.java Mon Oct 22 13:46:54 2012 -0700
+++ b/src/de/mpiwg/anteater/text/TextInformation.java Mon Oct 22 14:21:14 2012 -0700
@@ -4,6 +4,7 @@
import de.mpiwg.anteater.persons.PersonsExtraction;
import de.mpiwg.anteater.places.PlacesExtraction;
+import de.mpiwg.anteater.species.common.CommonNamesExtraction;
import de.mpiwg.anteater.species.scientific.ScientificNamesExtraction;
public class TextInformation {
@@ -13,6 +14,7 @@
private List scientificNamesExtractions;
private List placesExtractions;
private List personsExtractions;
+ private List commonNamesExtractions;
private String filepath;
public void setSummaries(List summaries) {
@@ -51,4 +53,10 @@
public List getPersonsExtractions() {
return personsExtractions;
}
+ public void setCommonNamesExtractions(List commonNamesExtractions) {
+ this.commonNamesExtractions = commonNamesExtractions;
+ }
+ public List getCommonNamesExtractions() {
+ return commonNamesExtractions;
+ }
}
diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 src/de/mpiwg/anteater/xml/impl/AnalysisXMLManager.java
--- a/src/de/mpiwg/anteater/xml/impl/AnalysisXMLManager.java Mon Oct 22 13:46:54 2012 -0700
+++ b/src/de/mpiwg/anteater/xml/impl/AnalysisXMLManager.java Mon Oct 22 14:21:14 2012 -0700
@@ -124,6 +124,14 @@
public void addSupplInfNamesResult(String result) {
addElementToDoc(result, "/analysis/scientificNames/supplementary_information");
}
+
+ public void addSummaryCommonNamesResult(String result) {
+ addElementToDoc(result, "/analysis/commonNames/summaries");
+ }
+
+ public void addSupplInfCommonNamesResult(String result) {
+ addElementToDoc(result, "/analysis/commonNames/supplementary_information");
+ }
private void addElementToDoc(String result, String xpath) {
IXMLParser parser = new JDOMParser(result, false);
diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 src/de/mpiwg/anteater/xml/impl/templates/analysisFile.xml
--- a/src/de/mpiwg/anteater/xml/impl/templates/analysisFile.xml Mon Oct 22 13:46:54 2012 -0700
+++ b/src/de/mpiwg/anteater/xml/impl/templates/analysisFile.xml Mon Oct 22 14:21:14 2012 -0700
@@ -15,6 +15,14 @@
+
+
+
+
+
+
+
+