Mercurial > hg > anteater
view src/de/mpiwg/anteater/ml/impl/ICUTextParser.java @ 0:036535fcd179
anteater
author | jdamerow |
---|---|
date | Fri, 14 Sep 2012 10:30:43 +0200 |
parents | |
children |
line wrap: on
line source
package de.mpiwg.anteater.ml.impl; import java.util.ArrayList; import java.util.List; import java.util.Locale; import com.ibm.icu.text.BreakIterator; import de.mpiwg.anteater.ml.ITextParser; public class ICUTextParser implements ITextParser { @Override public List<String> getSentences(String text) { BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US); iterator.setText(text); List<String> sentences = new ArrayList<String>(); int start = iterator.first(); for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) { sentences.add(text.substring(start,end)); } return sentences; } @Override public List<Word> getSubjects(String sentence) { // TODO Auto-generated method stub return null; } @Override public List<Word> getAbbreviations(String sentence) { // TODO Auto-generated method stub return null; } }