# HG changeset patch # User jdamerow # Date 1350940874 25200 # Node ID ae96e4bc7fb2dd289ff5026ff6892f376441fe3e # Parent 1c2b4f5e2c0592a298847d8faeb2efa80e02478a save found species to analysis files diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 analysis/00-18565.xml --- a/analysis/00-18565.xml Mon Oct 22 13:46:54 2012 -0700 +++ b/analysis/00-18565.xml Mon Oct 22 14:21:14 2012 -0700 @@ -85,6 +85,8 @@ + + diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 analysis/01-14522.xml --- a/analysis/01-14522.xml Mon Oct 22 13:46:54 2012 -0700 +++ b/analysis/01-14522.xml Mon Oct 22 14:21:14 2012 -0700 @@ -81,6 +81,22 @@ + + + + + + + + + + + + + + + + diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 analysis/01-19062.xml --- a/analysis/01-19062.xml Mon Oct 22 13:46:54 2012 -0700 +++ b/analysis/01-19062.xml Mon Oct 22 14:21:14 2012 -0700 @@ -89,6 +89,29 @@ + + + + + + + + + + + + + + + + + + + + + + + diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 analysis/2010-23822.xml --- a/analysis/2010-23822.xml Mon Oct 22 13:46:54 2012 -0700 +++ b/analysis/2010-23822.xml Mon Oct 22 14:21:14 2012 -0700 @@ -132,6 +132,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 src/de/mpiwg/anteater/AnteaterController.java --- a/src/de/mpiwg/anteater/AnteaterController.java Mon Oct 22 13:46:54 2012 -0700 +++ b/src/de/mpiwg/anteater/AnteaterController.java Mon Oct 22 14:21:14 2012 -0700 @@ -12,6 +12,8 @@ import de.mpiwg.anteater.places.PlacesExtraction; import de.mpiwg.anteater.results.ResultController; import de.mpiwg.anteater.results.ResultsCarrier; +import de.mpiwg.anteater.species.common.CommonNameFindController; +import de.mpiwg.anteater.species.common.CommonNamesExtraction; import de.mpiwg.anteater.species.scientific.ScientificNameFindController; import de.mpiwg.anteater.species.scientific.ScientificNamesExtraction; import de.mpiwg.anteater.text.TextInformation; @@ -39,6 +41,7 @@ ScientificNameFindController scienceNameFindController = new ScientificNameFindController(configuration); PlaceFinderController placesController = new PlaceFinderController(configuration); PersonFinderController personsController = new PersonFinderController(configuration); + CommonNameFindController commonNameFindController = new CommonNameFindController(configuration); List textInformations = new ArrayList(); TextManager textManager = new TextManager(configuration); @@ -55,6 +58,10 @@ List scienNameResults = scienceNameFindController.findScientificNamesInXML(info); info.setScientificNamesExtractions(scienNameResults); + //get common names and scientiric + List commonNamesResults = commonNameFindController.findCommonNamesInXML(info); + info.setCommonNamesExtractions(commonNamesResults); + // get places List placesResults = placesController.findPlacesInXML(info); info.setPlacesExtractions(placesResults); diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 src/de/mpiwg/anteater/species/common/CommonNameFindController.java --- a/src/de/mpiwg/anteater/species/common/CommonNameFindController.java Mon Oct 22 13:46:54 2012 -0700 +++ b/src/de/mpiwg/anteater/species/common/CommonNameFindController.java Mon Oct 22 14:21:14 2012 -0700 @@ -5,8 +5,12 @@ import java.util.List; import de.mpiwg.anteater.AnteaterConfiguration; +import de.mpiwg.anteater.species.common.impl.LinnaeusNameFinder; +import de.mpiwg.anteater.species.scientific.IScientificNamesFinder; import de.mpiwg.anteater.species.scientific.ScientificNamesExtraction; +import de.mpiwg.anteater.species.scientific.impl.GNRDNameFinder; import de.mpiwg.anteater.text.TextInformation; +import de.mpiwg.anteater.text.TextPart; import de.mpiwg.anteater.xml.impl.AnalysisXMLManager; public class CommonNameFindController { @@ -19,7 +23,7 @@ this.configuration = configuration; } - public List findCommonNamesInXML(TextInformation info) { + public List findCommonNamesInXML(TextInformation info) { List results = new ArrayList(); List summaryAnalysisResults = new ArrayList(); List supplinfAnalysisResults = new ArrayList(); @@ -40,6 +44,40 @@ configuration.getLogger().logMessage("found " + supplinfAnalysisResults.size() + " result(s)."); } + ICommonNameFinder nameFinder = new LinnaeusNameFinder(configuration.getLogger()); + + // if there are no results for summaries, ask GNRD name finding service. + if (summaryAnalysisResults.size() == 0) { + configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for summaries, so will ask LinnaeusNameFinder."); + + for (TextPart sum : info.getSummaries()) { + String sumResult = nameFinder.findCommonNames(sum.getText()); + if (sumResult != null) { + summaryAnalysisResults.add(sumResult); + + // if there is an analysis folder, add result to analysis file + if (analysisManager != null) + analysisManager.addSummaryCommonNamesResult(sumResult); + } + } + } + + // if there are no results for supplementary information, ask GNRD name fining service + if (supplinfAnalysisResults.size() == 0) { + configuration.getLogger().logMessage(COMPONENT_NAME, "No results found for supplementary information, so will ask LinnaeusNameFinder."); + + for (TextPart sInf : info.getSupplInfos()) { + String supinfResult = nameFinder.findCommonNames(sInf.getText()); + if (supinfResult != null) { + supplinfAnalysisResults.add(supinfResult); + + // if there is an analysis folder, add result to analysis file + if (analysisManager != null) + analysisManager.addSupplInfCommonNamesResult(supinfResult); + } + } + } + return null; } diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 src/de/mpiwg/anteater/species/common/impl/LinnaeusNameFinder.java --- a/src/de/mpiwg/anteater/species/common/impl/LinnaeusNameFinder.java Mon Oct 22 13:46:54 2012 -0700 +++ b/src/de/mpiwg/anteater/species/common/impl/LinnaeusNameFinder.java Mon Oct 22 14:21:14 2012 -0700 @@ -2,12 +2,11 @@ import java.io.InputStream; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.logging.Logger; -import martin.common.ArgParser; +import org.apache.commons.lang3.StringEscapeUtils; + import uk.ac.man.documentparser.dataholders.Document; import uk.ac.man.entitytagger.Mention; import uk.ac.man.entitytagger.doc.TaggedDocument; @@ -48,9 +47,20 @@ Document doc = new Document("id", "title", "", text, text, Document.Text_raw_type.TEXT, "", null, Document.Type.OTHER, null, "", "", "", "", null); TaggedDocument tagged = MatchOperations.matchDocument(matcher, doc); List species = tagged.getAllMatches(); - for (Mention s : species) - System.out.println("found " + s.getMostProbableID() + ": " + s.getText() + " at " + s.getStart()); - return null; + StringBuffer sb = new StringBuffer(); + sb.append(""); + + for (Mention s : species) { + sb.append(""); + } + + sb.append(""); + System.out.println(sb.toString()); + return sb.toString(); } public Postprocessor getPostprocessor(Map comments, diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 src/de/mpiwg/anteater/text/TextInformation.java --- a/src/de/mpiwg/anteater/text/TextInformation.java Mon Oct 22 13:46:54 2012 -0700 +++ b/src/de/mpiwg/anteater/text/TextInformation.java Mon Oct 22 14:21:14 2012 -0700 @@ -4,6 +4,7 @@ import de.mpiwg.anteater.persons.PersonsExtraction; import de.mpiwg.anteater.places.PlacesExtraction; +import de.mpiwg.anteater.species.common.CommonNamesExtraction; import de.mpiwg.anteater.species.scientific.ScientificNamesExtraction; public class TextInformation { @@ -13,6 +14,7 @@ private List scientificNamesExtractions; private List placesExtractions; private List personsExtractions; + private List commonNamesExtractions; private String filepath; public void setSummaries(List summaries) { @@ -51,4 +53,10 @@ public List getPersonsExtractions() { return personsExtractions; } + public void setCommonNamesExtractions(List commonNamesExtractions) { + this.commonNamesExtractions = commonNamesExtractions; + } + public List getCommonNamesExtractions() { + return commonNamesExtractions; + } } diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 src/de/mpiwg/anteater/xml/impl/AnalysisXMLManager.java --- a/src/de/mpiwg/anteater/xml/impl/AnalysisXMLManager.java Mon Oct 22 13:46:54 2012 -0700 +++ b/src/de/mpiwg/anteater/xml/impl/AnalysisXMLManager.java Mon Oct 22 14:21:14 2012 -0700 @@ -124,6 +124,14 @@ public void addSupplInfNamesResult(String result) { addElementToDoc(result, "/analysis/scientificNames/supplementary_information"); } + + public void addSummaryCommonNamesResult(String result) { + addElementToDoc(result, "/analysis/commonNames/summaries"); + } + + public void addSupplInfCommonNamesResult(String result) { + addElementToDoc(result, "/analysis/commonNames/supplementary_information"); + } private void addElementToDoc(String result, String xpath) { IXMLParser parser = new JDOMParser(result, false); diff -r 1c2b4f5e2c05 -r ae96e4bc7fb2 src/de/mpiwg/anteater/xml/impl/templates/analysisFile.xml --- a/src/de/mpiwg/anteater/xml/impl/templates/analysisFile.xml Mon Oct 22 13:46:54 2012 -0700 +++ b/src/de/mpiwg/anteater/xml/impl/templates/analysisFile.xml Mon Oct 22 14:21:14 2012 -0700 @@ -15,6 +15,14 @@ + + + + + + + +