Mercurial > hg > mpdl-group
comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/DonatusStemFilter.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:408254cf2f1d |
---|---|
1 package de.mpg.mpiwg.berlin.mpdl.donatus.analysis; | |
2 | |
3 import org.apache.lucene.analysis.Token; | |
4 import org.apache.lucene.analysis.TokenFilter; | |
5 import org.apache.lucene.analysis.TokenStream; | |
6 | |
7 import java.io.IOException; | |
8 import java.util.Set; | |
9 | |
10 public final class DonatusStemFilter extends TokenFilter { | |
11 private DonatusAnalyzer analyzer; | |
12 private Token token = null; | |
13 private DonatusStemmer stemmer = null; | |
14 private Set exclusionSet = null; | |
15 | |
16 public DonatusStemFilter(TokenStream in) { | |
17 super(in); | |
18 stemmer = new DonatusStemmer(); | |
19 } | |
20 | |
21 public DonatusStemFilter(DonatusAnalyzer analyzer, TokenStream in, Set exclusionSet) { | |
22 this(in); | |
23 this.analyzer = analyzer; | |
24 this.exclusionSet = exclusionSet; | |
25 this.stemmer.setLanguage(analyzer.getLanguage()); | |
26 } | |
27 | |
28 public final Token next() throws IOException { | |
29 if (( token = input.next()) == null) { | |
30 return null; | |
31 } else if ( exclusionSet != null && exclusionSet.contains( token.termText() ) ) { | |
32 return token; | |
33 } else { | |
34 String s = stemmer.stem(token.termText()); | |
35 // If not stemmed, dont waste the time creating a new token | |
36 if ( !s.equals( token.termText() ) ) { | |
37 return new Token( s, token.startOffset(), | |
38 token.endOffset(), token.type() ); | |
39 } | |
40 return token; | |
41 } | |
42 } | |
43 | |
44 public void setStemmer(DonatusStemmer stemmer) { | |
45 if ( stemmer != null ) { | |
46 this.stemmer = stemmer; | |
47 } | |
48 } | |
49 | |
50 public void setExclusionSet(Set exclusionSet) { | |
51 this.exclusionSet = exclusionSet; | |
52 } | |
53 } |