Mercurial > hg > mpdl-group
view software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/DonatusStemFilter.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.donatus.analysis; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import java.io.IOException; import java.util.Set; public final class DonatusStemFilter extends TokenFilter { private DonatusAnalyzer analyzer; private Token token = null; private DonatusStemmer stemmer = null; private Set exclusionSet = null; public DonatusStemFilter(TokenStream in) { super(in); stemmer = new DonatusStemmer(); } public DonatusStemFilter(DonatusAnalyzer analyzer, TokenStream in, Set exclusionSet) { this(in); this.analyzer = analyzer; this.exclusionSet = exclusionSet; this.stemmer.setLanguage(analyzer.getLanguage()); } public final Token next() throws IOException { if (( token = input.next()) == null) { return null; } else if ( exclusionSet != null && exclusionSet.contains( token.termText() ) ) { return token; } else { String s = stemmer.stem(token.termText()); // If not stemmed, dont waste the time creating a new token if ( !s.equals( token.termText() ) ) { return new Token( s, token.startOffset(), token.endOffset(), token.type() ); } return token; } } public void setStemmer(DonatusStemmer stemmer) { if ( stemmer != null ) { this.stemmer = stemmer; } } public void setExclusionSet(Set exclusionSet) { this.exclusionSet = exclusionSet; } }