Mercurial > hg > anteater
diff src/de/mpiwg/anteater/results/ResultController.java @ 0:036535fcd179
anteater
author | jdamerow |
---|---|
date | Fri, 14 Sep 2012 10:30:43 +0200 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/anteater/results/ResultController.java Fri Sep 14 10:30:43 2012 +0200 @@ -0,0 +1,213 @@ +package de.mpiwg.anteater.results; + +import java.io.File; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.commons.lang3.StringEscapeUtils; + +import de.mpiwg.anteater.AnteaterConfiguration; +import de.mpiwg.anteater.results.filter.FilterController; +import de.mpiwg.anteater.results.impl.ApplicantResultFinder; +import de.mpiwg.anteater.results.impl.ApplicantResultManager; +import de.mpiwg.anteater.results.impl.LocationResultFinder; +import de.mpiwg.anteater.results.impl.LocationResultManager; +import de.mpiwg.anteater.results.impl.ScientificNameResultFinder; +import de.mpiwg.anteater.results.impl.ScientificNameResultManager; +import de.mpiwg.anteater.text.Paragraph; +import de.mpiwg.anteater.text.TextInformation; +import de.mpiwg.anteater.text.TextPart; +import de.mpiwg.anteater.xml.IResultFileManager; +import de.mpiwg.anteater.xml.impl.ResultXMLManager; + +public class ResultController { + + public final static String SUMMARY_TAG = "<summary>"; + public final static String SUMMARY_TAG_CLOSE = "</summary>"; + public final static String SUPPLINFO_TAG = "<supplInfo>"; + public final static String SUPPLINFO_TAG_CLOSE = "</supplInfo>"; + + public final static String COMPONENT_NAME = ResultController.class.getSimpleName(); + + private AnteaterConfiguration configuration; + + public ResultController(AnteaterConfiguration configuration) { + this.configuration = configuration; + } + + public List<ResultsCarrier> saveResults(List<TextInformation> infos) { + + String resultFolder = configuration.getResultPath(); + List<ResultsCarrier> resultCarriers = new ArrayList<ResultsCarrier>(); + + ApplicantResultManager applicantManager = new ApplicantResultManager(configuration); + ScientificNameResultManager speciesManager = new ScientificNameResultManager(configuration); + LocationResultManager locationManager = new LocationResultManager(configuration); + + // find applicants + IResultFinder<ApplicantResult> applicantsFinder = new ApplicantResultFinder(configuration); + List<ApplicantResult> applicants = applicantsFinder.getResults(infos); + Map<TextInformation, List<ApplicantResult>> sortedApplicants = applicantManager.sortResultsByText(applicants); + + // find species + IResultFinder<SpeciesScientificResult> namesFinder = new ScientificNameResultFinder(); + List<SpeciesScientificResult> names = namesFinder.getResults(infos); + Map<TextInformation, List<SpeciesScientificResult>> sortedNames = speciesManager.sortResultsByText(names); + + // find locations + IResultFinder<LocationResult> locationFinder = new LocationResultFinder(configuration, names, applicants); + List<LocationResult> locations = locationFinder.getResults(infos); + Map<TextInformation, List<LocationResult>> sortedPlaces = locationManager.sortResultsByText(locations); + + FilterController filterController = new FilterController(); + + for (TextInformation info : infos) { + File textfile = new File(info.getFilepath()); + String resultFile = resultFolder + File.separator + textfile.getName(); + IResultFileManager resultManager = new ResultXMLManager(resultFile); + + // annotate summaries + List<TextPart> summaries = info.getSummaries(); + List<ApplicantResult> applicantResults = applicantManager.getPredictedResults(sortedApplicants.get(info)); + List<SpeciesScientificResult> namesResults = speciesManager.getPredictedResults(sortedNames.get(info)); + List<LocationResult> locResults = locationManager.getPredictedResults(sortedPlaces.get(info)); + + filterController.runFilters(info, applicantResults, namesResults, locResults); + + // create carrier + ResultsCarrier carrier = new ResultsCarrier(info, applicantResults, namesResults, locResults); + resultCarriers.add(carrier); + + for (TextPart sum : summaries) { + + int textIndex = sum.getTextIdx(); + List<AnnotationTag> tags = applicantManager.getSummaryTags(applicantResults, textIndex); + tags.addAll(speciesManager.getSummaryTags(namesResults, textIndex)); + tags.addAll(locationManager.getSummaryTags(locResults, textIndex)); + tags.addAll(getParagraphTags(sum)); + + StringBuffer annotatedSummary = new StringBuffer(sum.getText()); + annotatedSummary = tagText(annotatedSummary, tags); + + resultManager.addSummary(SUMMARY_TAG + annotatedSummary.toString() + SUMMARY_TAG_CLOSE); + } + + // annotate supplementary information + List<TextPart> suppleInfs = info.getSupplInfos(); + + for (TextPart suppleInf : suppleInfs) { + int textIndex = suppleInf.getTextIdx(); + List<AnnotationTag> tags = applicantManager.getSuppleInfTags(applicantResults, textIndex); + tags.addAll(speciesManager.getSuppleInfTags(namesResults, textIndex)); + tags.addAll(locationManager.getSuppleInfTags(locResults, textIndex)); + tags.addAll(getParagraphTags(suppleInf)); + + StringBuffer annotatedSuppleInf = new StringBuffer(suppleInf.getText()); + annotatedSuppleInf = tagText(annotatedSuppleInf, tags); + + resultManager.addSupplInf(SUPPLINFO_TAG + annotatedSuppleInf.toString() + SUPPLINFO_TAG_CLOSE); + } + } + + return resultCarriers; + } + + + protected void checkTags(List<AnnotationTag> tags) { + AnnotationTag lastTag = null; + + List<AnnotationTag> toBeRemoved = new ArrayList<AnnotationTag>(); + for (AnnotationTag tag : tags) { + if (lastTag == null) + { + lastTag = tag; + continue; + } + if ((tag.getClosingPosition() < lastTag.getPosition()) || (tag.getPosition() >= lastTag.getPosition() && tag.getClosingPosition() <= lastTag.getClosingPosition())) { + lastTag = tag; + continue; + } + if (tag.getTag().startsWith("<p")) + toBeRemoved.add(lastTag); + else { + lastTag.setTag("<error type=\"" + lastTag.getType() + "\">"); + lastTag.setClosingTag("</error>"); + lastTag.setPosition(tag.getClosingPosition()); + } + } + + tags.removeAll(toBeRemoved); + } + + protected StringBuffer tagText(StringBuffer text, List<AnnotationTag> tags) { + Collections.sort(tags, new Comparator<AnnotationTag>() { + + @Override + public int compare(AnnotationTag o1, AnnotationTag o2) { + if (o2.getClosingPosition() - o1.getClosingPosition() != 0) + return o2.getClosingPosition() - o1.getClosingPosition(); + return o1.getPosition() - o2.getPosition(); + } + }); + + checkTags(tags); + + Stack<AnnotationTag> tagStack = new Stack<AnnotationTag>(); + StringBuffer finalText = new StringBuffer(); + + int end = text.length(); + for (AnnotationTag tag : tags) { + if (text.length() >= tag.getPosition()) { + while (!tagStack.isEmpty() && tagStack.peek().getPosition() >= tag.getClosingPosition()) { + AnnotationTag topTag = tagStack.pop(); + if (topTag.getPosition() < end) { + finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(topTag.getPosition(), end))); + } + finalText.insert(0, topTag.getTag()); + end = topTag.getPosition(); +// text.insert(topTag.getPosition(), topTag.getTag()); + } + if (tag.getClosingPosition() < end) { + finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(tag.getClosingPosition(), end))); + } + finalText.insert(0, tag.getClosingTag()); + end = tag.getClosingPosition(); + //text.insert(tag.getClosingPosition(), tag.getClosingTag()); + tagStack.push(tag); + } + else + configuration.getLogger().logMessage(COMPONENT_NAME,"Couldn't insert into summary: " + tag.getTag() + " at " + tag.getPosition()); + } + while (!tagStack.isEmpty()) { + AnnotationTag topTag = tagStack.pop(); + //text.insert(topTag.getPosition(), topTag.getTag()); + if (topTag.getPosition() < end) { + finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(topTag.getPosition(), end))); + } + finalText.insert(0, topTag.getTag()); + end = topTag.getPosition(); + } + finalText.insert(0, text.substring(0, end)); + return finalText; + } + + + protected List<AnnotationTag> getParagraphTags(TextPart part) { + int counter = 0; + List<AnnotationTag> tags = new ArrayList<AnnotationTag>(); + + for (Paragraph para : part.getParagraphsOfInterest()) { + AnnotationTag opentag = new AnnotationTag("<p type=\"" + para.getParagraphType() + "\">", counter, "</p>", counter + para.getParagraphText().length(), "paragraph"); + tags.add(opentag); + // add 1 for "\n" + counter += para.getParagraphText().length() + 1; + } + + return tags; + } + +}