comparison src/de/mpiwg/anteater/ml/impl/ICUTextParser.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:036535fcd179
1 package de.mpiwg.anteater.ml.impl;
2
3 import java.util.ArrayList;
4 import java.util.List;
5 import java.util.Locale;
6
7 import com.ibm.icu.text.BreakIterator;
8
9 import de.mpiwg.anteater.ml.ITextParser;
10
11 public class ICUTextParser implements ITextParser {
12
13 @Override
14 public List<String> getSentences(String text) {
15 BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US);
16 iterator.setText(text);
17
18 List<String> sentences = new ArrayList<String>();
19
20 int start = iterator.first();
21 for (int end = iterator.next();
22 end != BreakIterator.DONE;
23 start = end, end = iterator.next()) {
24 sentences.add(text.substring(start,end));
25 }
26
27 return sentences;
28 }
29
30 @Override
31 public List<Word> getSubjects(String sentence) {
32 // TODO Auto-generated method stub
33 return null;
34 }
35
36 @Override
37 public List<Word> getAbbreviations(String sentence) {
38 // TODO Auto-generated method stub
39 return null;
40 }
41
42 }