view software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/DonatusStemFilter.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children
line wrap: on
line source

package de.mpg.mpiwg.berlin.mpdl.donatus.analysis;

import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;

import java.io.IOException;
import java.util.Set;

public final class DonatusStemFilter extends TokenFilter {
  private DonatusAnalyzer analyzer;
  private Token token = null;
  private DonatusStemmer stemmer = null;
  private Set exclusionSet = null;

  public DonatusStemFilter(TokenStream in) {
    super(in);
    stemmer = new DonatusStemmer();
  }

  public DonatusStemFilter(DonatusAnalyzer analyzer, TokenStream in, Set exclusionSet) {
    this(in);
    this.analyzer = analyzer;
    this.exclusionSet = exclusionSet;
    this.stemmer.setLanguage(analyzer.getLanguage());
  }

  public final Token next() throws IOException {
    if (( token = input.next()) == null) {
      return null;
    } else if ( exclusionSet != null && exclusionSet.contains( token.termText() ) ) {
      return token;
    } else {
      String s = stemmer.stem(token.termText());
      // If not stemmed, dont waste the time creating a new token
      if ( !s.equals( token.termText() ) ) {
        return new Token( s, token.startOffset(),
          token.endOffset(), token.type() );
      }
      return token;
    }
  }

  public void setStemmer(DonatusStemmer stemmer) {
    if ( stemmer != null ) {
      this.stemmer = stemmer;
    }
  }

  public void setExclusionSet(Set exclusionSet) {
    this.exclusionSet = exclusionSet;
  }
}