diff src/de/mpiwg/anteater/results/ResultController.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/anteater/results/ResultController.java	Fri Sep 14 10:30:43 2012 +0200
@@ -0,0 +1,213 @@
+package de.mpiwg.anteater.results;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+
+import org.apache.commons.lang3.StringEscapeUtils;
+
+import de.mpiwg.anteater.AnteaterConfiguration;
+import de.mpiwg.anteater.results.filter.FilterController;
+import de.mpiwg.anteater.results.impl.ApplicantResultFinder;
+import de.mpiwg.anteater.results.impl.ApplicantResultManager;
+import de.mpiwg.anteater.results.impl.LocationResultFinder;
+import de.mpiwg.anteater.results.impl.LocationResultManager;
+import de.mpiwg.anteater.results.impl.ScientificNameResultFinder;
+import de.mpiwg.anteater.results.impl.ScientificNameResultManager;
+import de.mpiwg.anteater.text.Paragraph;
+import de.mpiwg.anteater.text.TextInformation;
+import de.mpiwg.anteater.text.TextPart;
+import de.mpiwg.anteater.xml.IResultFileManager;
+import de.mpiwg.anteater.xml.impl.ResultXMLManager;
+
+public class ResultController {
+	
+	public final static String SUMMARY_TAG = "<summary>";
+	public final static String SUMMARY_TAG_CLOSE = "</summary>";
+	public final static String SUPPLINFO_TAG = "<supplInfo>";
+	public final static String SUPPLINFO_TAG_CLOSE = "</supplInfo>";
+	
+	public final static String COMPONENT_NAME = ResultController.class.getSimpleName();
+	
+	private AnteaterConfiguration configuration;
+	
+	public ResultController(AnteaterConfiguration configuration) {
+		this.configuration = configuration;
+	}
+
+	public List<ResultsCarrier> saveResults(List<TextInformation> infos) {
+		
+		String resultFolder = configuration.getResultPath();
+		List<ResultsCarrier> resultCarriers = new ArrayList<ResultsCarrier>();
+		
+		ApplicantResultManager applicantManager = new ApplicantResultManager(configuration);
+		ScientificNameResultManager speciesManager = new ScientificNameResultManager(configuration);
+		LocationResultManager locationManager = new LocationResultManager(configuration);
+		
+		// find applicants
+		IResultFinder<ApplicantResult> applicantsFinder = new ApplicantResultFinder(configuration);
+		List<ApplicantResult> applicants = applicantsFinder.getResults(infos);
+		Map<TextInformation, List<ApplicantResult>> sortedApplicants = applicantManager.sortResultsByText(applicants);
+		
+		// find species
+		IResultFinder<SpeciesScientificResult> namesFinder = new ScientificNameResultFinder();
+		List<SpeciesScientificResult> names = namesFinder.getResults(infos);
+		Map<TextInformation, List<SpeciesScientificResult>> sortedNames = speciesManager.sortResultsByText(names);
+		
+		// find locations
+		IResultFinder<LocationResult> locationFinder = new LocationResultFinder(configuration, names, applicants);
+		List<LocationResult> locations = locationFinder.getResults(infos);
+		Map<TextInformation, List<LocationResult>> sortedPlaces = locationManager.sortResultsByText(locations);
+		
+		FilterController filterController = new FilterController();
+		
+		for (TextInformation info : infos) {
+			File textfile = new File(info.getFilepath());
+			String resultFile = resultFolder  + File.separator + textfile.getName();
+			IResultFileManager resultManager = new ResultXMLManager(resultFile);
+			
+			// annotate summaries
+			List<TextPart> summaries = info.getSummaries();
+			List<ApplicantResult> applicantResults = applicantManager.getPredictedResults(sortedApplicants.get(info));
+			List<SpeciesScientificResult> namesResults = speciesManager.getPredictedResults(sortedNames.get(info));
+			List<LocationResult> locResults = locationManager.getPredictedResults(sortedPlaces.get(info));
+			
+			filterController.runFilters(info, applicantResults, namesResults, locResults);
+			
+			// create carrier
+			ResultsCarrier carrier = new ResultsCarrier(info, applicantResults, namesResults, locResults);
+			resultCarriers.add(carrier);
+			
+			for (TextPart sum : summaries) {
+				
+				int textIndex = sum.getTextIdx();
+				List<AnnotationTag> tags = applicantManager.getSummaryTags(applicantResults, textIndex);
+				tags.addAll(speciesManager.getSummaryTags(namesResults, textIndex));
+				tags.addAll(locationManager.getSummaryTags(locResults, textIndex));
+				tags.addAll(getParagraphTags(sum));
+				
+				StringBuffer annotatedSummary = new StringBuffer(sum.getText());
+				annotatedSummary = tagText(annotatedSummary, tags);
+				
+				resultManager.addSummary(SUMMARY_TAG + annotatedSummary.toString() + SUMMARY_TAG_CLOSE);
+			}
+			
+			// annotate supplementary information
+			List<TextPart> suppleInfs = info.getSupplInfos();
+			
+			for (TextPart suppleInf : suppleInfs) {
+				int textIndex = suppleInf.getTextIdx();
+				List<AnnotationTag> tags = applicantManager.getSuppleInfTags(applicantResults, textIndex);
+				tags.addAll(speciesManager.getSuppleInfTags(namesResults, textIndex));
+				tags.addAll(locationManager.getSuppleInfTags(locResults, textIndex));
+				tags.addAll(getParagraphTags(suppleInf));
+				
+				StringBuffer annotatedSuppleInf = new StringBuffer(suppleInf.getText());
+				annotatedSuppleInf = tagText(annotatedSuppleInf, tags);
+				
+				resultManager.addSupplInf(SUPPLINFO_TAG + annotatedSuppleInf.toString() + SUPPLINFO_TAG_CLOSE);
+			}
+		}
+		
+		return resultCarriers;
+	}
+	
+	
+	protected void checkTags(List<AnnotationTag> tags) {
+		AnnotationTag lastTag = null;
+		
+		List<AnnotationTag> toBeRemoved = new ArrayList<AnnotationTag>();
+		for (AnnotationTag tag : tags) {
+			if (lastTag == null)
+			{
+				lastTag = tag;
+				continue;
+			}
+			if ((tag.getClosingPosition() < lastTag.getPosition()) || (tag.getPosition() >= lastTag.getPosition() && tag.getClosingPosition() <= lastTag.getClosingPosition())) {
+				lastTag = tag;
+				continue;
+			}
+			if (tag.getTag().startsWith("<p"))
+				toBeRemoved.add(lastTag);
+			else {
+				lastTag.setTag("<error type=\"" + lastTag.getType() + "\">");
+				lastTag.setClosingTag("</error>");
+				lastTag.setPosition(tag.getClosingPosition());
+			}
+		}
+		
+		tags.removeAll(toBeRemoved);
+	}
+	
+	protected StringBuffer tagText(StringBuffer text, List<AnnotationTag> tags) {
+		Collections.sort(tags, new Comparator<AnnotationTag>() {
+
+			@Override
+			public int compare(AnnotationTag o1, AnnotationTag o2) {
+				if (o2.getClosingPosition() - o1.getClosingPosition() != 0)
+					return o2.getClosingPosition() - o1.getClosingPosition();
+				return o1.getPosition() - o2.getPosition();	
+			}
+		});
+		
+		checkTags(tags);
+		
+		Stack<AnnotationTag> tagStack = new Stack<AnnotationTag>();
+		StringBuffer finalText = new StringBuffer();
+		
+		int end = text.length();
+		for (AnnotationTag tag : tags) {
+			if (text.length() >= tag.getPosition()) {
+				while (!tagStack.isEmpty() && tagStack.peek().getPosition() >= tag.getClosingPosition()) {
+					AnnotationTag topTag = tagStack.pop();
+					if (topTag.getPosition() < end) {
+						finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(topTag.getPosition(), end)));
+					}
+					finalText.insert(0, topTag.getTag());
+					end = topTag.getPosition();
+//					text.insert(topTag.getPosition(), topTag.getTag());
+				}
+				if (tag.getClosingPosition() < end) {
+					finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(tag.getClosingPosition(), end)));
+				}
+				finalText.insert(0, tag.getClosingTag());
+				end = tag.getClosingPosition();
+				//text.insert(tag.getClosingPosition(), tag.getClosingTag());
+				tagStack.push(tag);
+			}
+			else
+				configuration.getLogger().logMessage(COMPONENT_NAME,"Couldn't insert into summary: " + tag.getTag() + " at " + tag.getPosition());
+		}
+		while (!tagStack.isEmpty()) {
+			AnnotationTag topTag = tagStack.pop();
+			//text.insert(topTag.getPosition(), topTag.getTag());
+			if (topTag.getPosition() < end) {
+				finalText.insert(0, StringEscapeUtils.escapeXml(text.substring(topTag.getPosition(), end)));
+			}
+			finalText.insert(0, topTag.getTag());
+			end = topTag.getPosition();
+		}
+		finalText.insert(0, text.substring(0, end));
+		return finalText;
+	}
+	
+	
+	protected List<AnnotationTag> getParagraphTags(TextPart part) {
+		int counter = 0;
+		List<AnnotationTag> tags = new ArrayList<AnnotationTag>();
+		
+		for (Paragraph para : part.getParagraphsOfInterest()) {
+			AnnotationTag opentag = new AnnotationTag("<p type=\"" + para.getParagraphType() + "\">", counter, "</p>", counter + para.getParagraphText().length(), "paragraph");
+			tags.add(opentag);
+			// add 1 for "\n"
+			counter += para.getParagraphText().length() + 1;
+		}
+		
+		return tags;
+	}
+
+}