view src/de/mpiwg/anteater/ml/impl/ICUTextParser.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
line wrap: on
line source

package de.mpiwg.anteater.ml.impl;

import java.util.ArrayList;
import java.util.List;
import java.util.Locale;

import com.ibm.icu.text.BreakIterator;

import de.mpiwg.anteater.ml.ITextParser;

public class ICUTextParser implements ITextParser {

	@Override
	public List<String> getSentences(String text) {
		BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US);
		iterator.setText(text);
		
		List<String> sentences = new ArrayList<String>();
		
		int start = iterator.first();
		for (int end = iterator.next();
		    end != BreakIterator.DONE;
		    start = end, end = iterator.next()) {
		  	sentences.add(text.substring(start,end));
		}
		
		return sentences;
	}

	@Override
	public List<Word> getSubjects(String sentence) {
		// TODO Auto-generated method stub
		return null;
	}

	@Override
	public List<Word> getAbbreviations(String sentence) {
		// TODO Auto-generated method stub
		return null;
	}

}