view src/de/mpiwg/anteater/results/ResultController.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
line wrap: on
line source

package de.mpiwg.anteater.results;

import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Stack;

import org.apache.commons.lang3.StringEscapeUtils;

import de.mpiwg.anteater.AnteaterConfiguration;
import de.mpiwg.anteater.results.filter.FilterController;
import de.mpiwg.anteater.results.impl.ApplicantResultFinder;
import de.mpiwg.anteater.results.impl.ApplicantResultManager;
import de.mpiwg.anteater.results.impl.LocationResultFinder;
import de.mpiwg.anteater.results.impl.LocationResultManager;
import de.mpiwg.anteater.results.impl.ScientificNameResultFinder;
import de.mpiwg.anteater.results.impl.ScientificNameResultManager;
import de.mpiwg.anteater.text.Paragraph;
import de.mpiwg.anteater.text.TextInformation;
import de.mpiwg.anteater.text.TextPart;
import de.mpiwg.anteater.xml.IResultFileManager;
import de.mpiwg.anteater.xml.impl.ResultXMLManager;

public class ResultController {
	
	public final static String SUMMARY_TAG = "<summary>";
	public final static String SUMMARY_TAG_CLOSE = "</summary>";
	public final static String SUPPLINFO_TAG = "<supplInfo>";
	public final static String SUPPLINFO_TAG_CLOSE = "</supplInfo>";
	
	public final static String COMPONENT_NAME = ResultController.class.getSimpleName();
	
	private AnteaterConfiguration configuration;
	
	public ResultController(AnteaterConfiguration configuration) {
		this.configuration = configuration;
	}

	public List<ResultsCarrier> saveResults(List<TextInformation> infos) {
		
		String resultFolder = configuration.getResultPath();
		List<ResultsCarrier> resultCarriers = new ArrayList<ResultsCarrier>();
		
		ApplicantResultManager applicantManager = new ApplicantResultManager(configuration);
		ScientificNameResultManager speciesManager = new ScientificNameResultManager(configuration);
		LocationResultManager locationManager = new LocationResultManager(configuration);
		
		// find applicants
		IResultFinder<ApplicantResult> applicantsFinder = new ApplicantResultFinder(configuration);
		List<ApplicantResult> applicants = applicantsFinder.getResults(infos);
		Map<TextInformation, List<ApplicantResult>> sortedApplicants = applicantManager.sortResultsByText(applicants);
		
		// find species
		IResultFinder<SpeciesScientificResult> namesFinder = new ScientificNameResultFinder();
		List<SpeciesScientificResult> names = namesFinder.getResults(infos);
		Map<TextInformation, List<SpeciesScientificResult>> sortedNames = speciesManager.sortResultsByText(names);
		
		// find locations
		IResultFinder<LocationResult> locationFinder = new LocationResultFinder(configuration, names, applicants);
		List<LocationResult> locations = locationFinder.getResults(infos);
		Map<TextInformation, List<LocationResult>> sortedPlaces = locationManager.sortResultsByText(locations);
		
		FilterController filterController = new FilterController();
		
		for (TextInformation info : infos) {
			File textfile = new File(info.getFilepath());
			String resultFile = resultFolder  + File.separator + textfile.getName();
			IResultFileManager resultManager = new ResultXMLManager(resultFile);
			
			// annotate summaries
			List<TextPart> summaries = info.getSummaries();
			List<ApplicantResult> applicantResults = applicantManager.getPredictedResults(sortedApplicants.get(info));
			List<SpeciesScientificResult> namesResults = speciesManager.getPredictedResults(sortedNames.get(info));
			List<LocationResult> locResults = locationManager.getPredictedResults(sortedPlaces.get(info));
			
			filterController.runFilters(info, applicantResults, namesResults, locResults);
			
			// create carrier
			ResultsCarrier carrier = new ResultsCarrier(info, applicantResults, namesResults, locResults);
			resultCarriers.add(carrier);
			
			for (TextPart sum : summaries) {
				
				int textIndex = sum.getTextIdx();
				List<AnnotationTag> tags = applicantManager.getSummaryTags(applicantResults, textIndex);
				tags.addAll(speciesManager.getSummaryTags(namesResults, textIndex));
				tags.addAll(locationManager.getSummaryTags(locResults, textIndex));
				tags.addAll(getParagraphTags(sum));
				
				StringBuffer annotatedSummary = new StringBuffer(sum.getText());
				annotatedSummary = tagText(annotatedSummary, tags);
				
				resultManager.addSummary(SUMMARY_TAG + annotatedSummary.toString() + SUMMARY_TAG_CLOSE);
			}
			
			// annotate supplementary information
			List<TextPart> suppleInfs = info.getSupplInfos();
			
			for (TextPart suppleInf : suppleInfs) {
				int textIndex = suppleInf.getTextIdx();
				List<AnnotationTag> tags = applicantManager.getSuppleInfTags(applicantResults, textIndex);
				tags.addAll(speciesManager.getSuppleInfTags(namesResults, textIndex));
				tags.addAll(locationManager.getSuppleInfTags(locResults, textIndex));
				tags.addAll(getParagraphTags(suppleInf));
				
				StringBuffer annotatedSuppleInf = new StringBuffer(suppleInf.getText());
				annotatedSuppleInf = tagText(annotatedSuppleInf, tags);
				
				resultManager.addSupplInf(SUPPLINFO_TAG + annotatedSuppleInf.toString() + SUPPLINFO_TAG_CLOSE);
			}
		}
		
		return resultCarriers;
	}
	
	
	protected void checkTags(List<AnnotationTag> tags) {
		AnnotationTag lastTag = null;
		
		List<AnnotationTag> toBeRemoved = new ArrayList<AnnotationTag>();
		for (AnnotationTag tag : tags) {
			if (lastTag == null)
			{
				lastTag = tag;
				continue;
			}
			if ((tag.getClosingPosition() < lastTag.getPosition()) || (tag.getPosition() >= lastTag.getPosition() && tag.getClosingPosition() <= lastTag.getClosingPosition())) {
				lastTag = tag;
				continue;
			}
			if (tag.getTag().startsWith("<p"))
				toBeRemoved.add(lastTag);
			else {
				lastTag.setTag("<error type=\"" + lastTag.getType() + "\">");
				lastTag.setClosingTag("</error>");
				lastTag.setPosition(tag.getClosingPosition());
			}
		}
		
		tags.removeAll(toBeRemoved);
	}
	
	protected StringBuffer tagText(StringBuffer text, List<AnnotationTag> tags) {
		Collections.sort(tags, new Comparator<AnnotationTag>() {

			@Override
			public int compare(AnnotationTag o1, AnnotationTag o2) {
				if (o2.getClosingPosition() - o1.getClosingPosition() != 0)
					return o2.getClosingPosition() - o1.getClosingPosition();
				return o1.getPosition() - o2.getPosition();	
			}
		});
		
		checkTags(tags);
		
		Stack<AnnotationTag> tagStack = new Stack<AnnotationTag>();
		StringBuffer finalText = new StringBuffer();
		
		int end = text.length();
		for (AnnotationTag tag : tags) {
			if (text.length() >= tag.getPosition()) {
				while (!tagStack.isEmpty() && tagStack.peek().getPosition() >= tag.getClosingPosition()) {
					AnnotationTag topTag = tagStack.pop();
					if (topTag.getPosition() < end) {
						finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(topTag.getPosition(), end)));
					}
					finalText.insert(0, topTag.getTag());
					end = topTag.getPosition();
//					text.insert(topTag.getPosition(), topTag.getTag());
				}
				if (tag.getClosingPosition() < end) {
					finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(tag.getClosingPosition(), end)));
				}
				finalText.insert(0, tag.getClosingTag());
				end = tag.getClosingPosition();
				//text.insert(tag.getClosingPosition(), tag.getClosingTag());
				tagStack.push(tag);
			}
			else
				configuration.getLogger().logMessage(COMPONENT_NAME,"Couldn't insert into summary: " + tag.getTag() + " at " + tag.getPosition());
		}
		while (!tagStack.isEmpty()) {
			AnnotationTag topTag = tagStack.pop();
			//text.insert(topTag.getPosition(), topTag.getTag());
			if (topTag.getPosition() < end) {
				finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(topTag.getPosition(), end)));
			}
			finalText.insert(0, topTag.getTag());
			end = topTag.getPosition();
		}
		finalText.insert(0, text.substring(0, end));
		return finalText;
	}
	
	
	protected List<AnnotationTag> getParagraphTags(TextPart part) {
		int counter = 0;
		List<AnnotationTag> tags = new ArrayList<AnnotationTag>();
		
		for (Paragraph para : part.getParagraphsOfInterest()) {
			AnnotationTag opentag = new AnnotationTag("<p type=\"" + para.getParagraphType() + "\">", counter, "</p>", counter + para.getParagraphText().length(), "paragraph");
			tags.add(opentag);
			// add 1 for "\n"
			counter += para.getParagraphText().length() + 1;
		}
		
		return tags;
	}

}