Mercurial > hg > anteater
view src/de/mpiwg/anteater/results/ResultController.java @ 0:036535fcd179
anteater
author | jdamerow |
---|---|
date | Fri, 14 Sep 2012 10:30:43 +0200 |
parents | |
children |
line wrap: on
line source
package de.mpiwg.anteater.results; import java.io.File; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Stack; import org.apache.commons.lang3.StringEscapeUtils; import de.mpiwg.anteater.AnteaterConfiguration; import de.mpiwg.anteater.results.filter.FilterController; import de.mpiwg.anteater.results.impl.ApplicantResultFinder; import de.mpiwg.anteater.results.impl.ApplicantResultManager; import de.mpiwg.anteater.results.impl.LocationResultFinder; import de.mpiwg.anteater.results.impl.LocationResultManager; import de.mpiwg.anteater.results.impl.ScientificNameResultFinder; import de.mpiwg.anteater.results.impl.ScientificNameResultManager; import de.mpiwg.anteater.text.Paragraph; import de.mpiwg.anteater.text.TextInformation; import de.mpiwg.anteater.text.TextPart; import de.mpiwg.anteater.xml.IResultFileManager; import de.mpiwg.anteater.xml.impl.ResultXMLManager; public class ResultController { public final static String SUMMARY_TAG = "<summary>"; public final static String SUMMARY_TAG_CLOSE = "</summary>"; public final static String SUPPLINFO_TAG = "<supplInfo>"; public final static String SUPPLINFO_TAG_CLOSE = "</supplInfo>"; public final static String COMPONENT_NAME = ResultController.class.getSimpleName(); private AnteaterConfiguration configuration; public ResultController(AnteaterConfiguration configuration) { this.configuration = configuration; } public List<ResultsCarrier> saveResults(List<TextInformation> infos) { String resultFolder = configuration.getResultPath(); List<ResultsCarrier> resultCarriers = new ArrayList<ResultsCarrier>(); ApplicantResultManager applicantManager = new ApplicantResultManager(configuration); ScientificNameResultManager speciesManager = new ScientificNameResultManager(configuration); LocationResultManager locationManager = new LocationResultManager(configuration); // find applicants IResultFinder<ApplicantResult> applicantsFinder = new ApplicantResultFinder(configuration); List<ApplicantResult> applicants = applicantsFinder.getResults(infos); Map<TextInformation, List<ApplicantResult>> sortedApplicants = applicantManager.sortResultsByText(applicants); // find species IResultFinder<SpeciesScientificResult> namesFinder = new ScientificNameResultFinder(); List<SpeciesScientificResult> names = namesFinder.getResults(infos); Map<TextInformation, List<SpeciesScientificResult>> sortedNames = speciesManager.sortResultsByText(names); // find locations IResultFinder<LocationResult> locationFinder = new LocationResultFinder(configuration, names, applicants); List<LocationResult> locations = locationFinder.getResults(infos); Map<TextInformation, List<LocationResult>> sortedPlaces = locationManager.sortResultsByText(locations); FilterController filterController = new FilterController(); for (TextInformation info : infos) { File textfile = new File(info.getFilepath()); String resultFile = resultFolder + File.separator + textfile.getName(); IResultFileManager resultManager = new ResultXMLManager(resultFile); // annotate summaries List<TextPart> summaries = info.getSummaries(); List<ApplicantResult> applicantResults = applicantManager.getPredictedResults(sortedApplicants.get(info)); List<SpeciesScientificResult> namesResults = speciesManager.getPredictedResults(sortedNames.get(info)); List<LocationResult> locResults = locationManager.getPredictedResults(sortedPlaces.get(info)); filterController.runFilters(info, applicantResults, namesResults, locResults); // create carrier ResultsCarrier carrier = new ResultsCarrier(info, applicantResults, namesResults, locResults); resultCarriers.add(carrier); for (TextPart sum : summaries) { int textIndex = sum.getTextIdx(); List<AnnotationTag> tags = applicantManager.getSummaryTags(applicantResults, textIndex); tags.addAll(speciesManager.getSummaryTags(namesResults, textIndex)); tags.addAll(locationManager.getSummaryTags(locResults, textIndex)); tags.addAll(getParagraphTags(sum)); StringBuffer annotatedSummary = new StringBuffer(sum.getText()); annotatedSummary = tagText(annotatedSummary, tags); resultManager.addSummary(SUMMARY_TAG + annotatedSummary.toString() + SUMMARY_TAG_CLOSE); } // annotate supplementary information List<TextPart> suppleInfs = info.getSupplInfos(); for (TextPart suppleInf : suppleInfs) { int textIndex = suppleInf.getTextIdx(); List<AnnotationTag> tags = applicantManager.getSuppleInfTags(applicantResults, textIndex); tags.addAll(speciesManager.getSuppleInfTags(namesResults, textIndex)); tags.addAll(locationManager.getSuppleInfTags(locResults, textIndex)); tags.addAll(getParagraphTags(suppleInf)); StringBuffer annotatedSuppleInf = new StringBuffer(suppleInf.getText()); annotatedSuppleInf = tagText(annotatedSuppleInf, tags); resultManager.addSupplInf(SUPPLINFO_TAG + annotatedSuppleInf.toString() + SUPPLINFO_TAG_CLOSE); } } return resultCarriers; } protected void checkTags(List<AnnotationTag> tags) { AnnotationTag lastTag = null; List<AnnotationTag> toBeRemoved = new ArrayList<AnnotationTag>(); for (AnnotationTag tag : tags) { if (lastTag == null) { lastTag = tag; continue; } if ((tag.getClosingPosition() < lastTag.getPosition()) || (tag.getPosition() >= lastTag.getPosition() && tag.getClosingPosition() <= lastTag.getClosingPosition())) { lastTag = tag; continue; } if (tag.getTag().startsWith("<p")) toBeRemoved.add(lastTag); else { lastTag.setTag("<error type=\"" + lastTag.getType() + "\">"); lastTag.setClosingTag("</error>"); lastTag.setPosition(tag.getClosingPosition()); } } tags.removeAll(toBeRemoved); } protected StringBuffer tagText(StringBuffer text, List<AnnotationTag> tags) { Collections.sort(tags, new Comparator<AnnotationTag>() { @Override public int compare(AnnotationTag o1, AnnotationTag o2) { if (o2.getClosingPosition() - o1.getClosingPosition() != 0) return o2.getClosingPosition() - o1.getClosingPosition(); return o1.getPosition() - o2.getPosition(); } }); checkTags(tags); Stack<AnnotationTag> tagStack = new Stack<AnnotationTag>(); StringBuffer finalText = new StringBuffer(); int end = text.length(); for (AnnotationTag tag : tags) { if (text.length() >= tag.getPosition()) { while (!tagStack.isEmpty() && tagStack.peek().getPosition() >= tag.getClosingPosition()) { AnnotationTag topTag = tagStack.pop(); if (topTag.getPosition() < end) { finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(topTag.getPosition(), end))); } finalText.insert(0, topTag.getTag()); end = topTag.getPosition(); // text.insert(topTag.getPosition(), topTag.getTag()); } if (tag.getClosingPosition() < end) { finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(tag.getClosingPosition(), end))); } finalText.insert(0, tag.getClosingTag()); end = tag.getClosingPosition(); //text.insert(tag.getClosingPosition(), tag.getClosingTag()); tagStack.push(tag); } else configuration.getLogger().logMessage(COMPONENT_NAME,"Couldn't insert into summary: " + tag.getTag() + " at " + tag.getPosition()); } while (!tagStack.isEmpty()) { AnnotationTag topTag = tagStack.pop(); //text.insert(topTag.getPosition(), topTag.getTag()); if (topTag.getPosition() < end) { finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(topTag.getPosition(), end))); } finalText.insert(0, topTag.getTag()); end = topTag.getPosition(); } finalText.insert(0, text.substring(0, end)); return finalText; } protected List<AnnotationTag> getParagraphTags(TextPart part) { int counter = 0; List<AnnotationTag> tags = new ArrayList<AnnotationTag>(); for (Paragraph para : part.getParagraphsOfInterest()) { AnnotationTag opentag = new AnnotationTag("<p type=\"" + para.getParagraphType() + "\">", counter, "</p>", counter + para.getParagraphText().length(), "paragraph"); tags.add(opentag); // add 1 for "\n" counter += para.getParagraphText().length() + 1; } return tags; } }