Mercurial > hg > mpdl-group
view software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlStemFilter.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.lt.analyzer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import java.io.IOException; import java.util.Set; public final class MpdlStemFilter extends TokenFilter { private MpdlMorphAnalyzer analyzer; private Token token = null; private MpdlStemmer stemmer = null; private Set exclusionSet = null; public MpdlStemFilter(TokenStream in) { super(in); stemmer = new MpdlStemmer(); } public MpdlStemFilter(MpdlMorphAnalyzer analyzer, TokenStream in, Set exclusionSet) { this(in); this.analyzer = analyzer; this.exclusionSet = exclusionSet; this.stemmer.setLanguage(analyzer.getLanguage()); } public final Token next() throws IOException { if (( token = input.next()) == null) { return null; } else if ( exclusionSet != null && exclusionSet.contains( token.termText() ) ) { return token; } else { String s = stemmer.stem(token.termText()); // If not stemmed, dont waste the time creating a new token if ( !s.equals( token.termText() ) ) { return new Token( s, token.startOffset(), token.endOffset(), token.type() ); } return token; } } public void setStemmer(MpdlStemmer stemmer) { if ( stemmer != null ) { this.stemmer = stemmer; } } public void setExclusionSet(Set exclusionSet) { this.exclusionSet = exclusionSet; } }