comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/DonatusStemFilter.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:408254cf2f1d
1 package de.mpg.mpiwg.berlin.mpdl.donatus.analysis;
2
3 import org.apache.lucene.analysis.Token;
4 import org.apache.lucene.analysis.TokenFilter;
5 import org.apache.lucene.analysis.TokenStream;
6
7 import java.io.IOException;
8 import java.util.Set;
9
10 public final class DonatusStemFilter extends TokenFilter {
11 private DonatusAnalyzer analyzer;
12 private Token token = null;
13 private DonatusStemmer stemmer = null;
14 private Set exclusionSet = null;
15
16 public DonatusStemFilter(TokenStream in) {
17 super(in);
18 stemmer = new DonatusStemmer();
19 }
20
21 public DonatusStemFilter(DonatusAnalyzer analyzer, TokenStream in, Set exclusionSet) {
22 this(in);
23 this.analyzer = analyzer;
24 this.exclusionSet = exclusionSet;
25 this.stemmer.setLanguage(analyzer.getLanguage());
26 }
27
28 public final Token next() throws IOException {
29 if (( token = input.next()) == null) {
30 return null;
31 } else if ( exclusionSet != null && exclusionSet.contains( token.termText() ) ) {
32 return token;
33 } else {
34 String s = stemmer.stem(token.termText());
35 // If not stemmed, dont waste the time creating a new token
36 if ( !s.equals( token.termText() ) ) {
37 return new Token( s, token.startOffset(),
38 token.endOffset(), token.type() );
39 }
40 return token;
41 }
42 }
43
44 public void setStemmer(DonatusStemmer stemmer) {
45 if ( stemmer != null ) {
46 this.stemmer = stemmer;
47 }
48 }
49
50 public void setExclusionSet(Set exclusionSet) {
51 this.exclusionSet = exclusionSet;
52 }
53 }