Mercurial > hg > anteater
comparison src/de/mpiwg/anteater/ml/impl/ICUTextParser.java @ 0:036535fcd179
anteater
author | jdamerow |
---|---|
date | Fri, 14 Sep 2012 10:30:43 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:036535fcd179 |
---|---|
1 package de.mpiwg.anteater.ml.impl; | |
2 | |
3 import java.util.ArrayList; | |
4 import java.util.List; | |
5 import java.util.Locale; | |
6 | |
7 import com.ibm.icu.text.BreakIterator; | |
8 | |
9 import de.mpiwg.anteater.ml.ITextParser; | |
10 | |
11 public class ICUTextParser implements ITextParser { | |
12 | |
13 @Override | |
14 public List<String> getSentences(String text) { | |
15 BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US); | |
16 iterator.setText(text); | |
17 | |
18 List<String> sentences = new ArrayList<String>(); | |
19 | |
20 int start = iterator.first(); | |
21 for (int end = iterator.next(); | |
22 end != BreakIterator.DONE; | |
23 start = end, end = iterator.next()) { | |
24 sentences.add(text.substring(start,end)); | |
25 } | |
26 | |
27 return sentences; | |
28 } | |
29 | |
30 @Override | |
31 public List<Word> getSubjects(String sentence) { | |
32 // TODO Auto-generated method stub | |
33 return null; | |
34 } | |
35 | |
36 @Override | |
37 public List<Word> getAbbreviations(String sentence) { | |
38 // TODO Auto-generated method stub | |
39 return null; | |
40 } | |
41 | |
42 } |