Mercurial > hg > mpdl-group
view software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlTokenizerAnalyzer.java @ 16:257f67be5c00
diverse Fehlerbehebungen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 27 Sep 2011 16:40:57 +0200 |
parents | 2396a569e446 |
children |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.lt.analyzer; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.util.ArrayList; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants; public class MpdlTokenizerAnalyzer extends Analyzer { protected String language = MpdlConstants.DEFAULT_LANGUAGE; protected MpdlNormalizer normalizer = null; public MpdlTokenizerAnalyzer(String language) { this.language = language; this.normalizer = new MpdlNormalizer(language); // default normalizer } public MpdlTokenizerAnalyzer(MpdlNormalizer normalizer, String language) { this.language = language; this.normalizer = normalizer; } public TokenStream tokenStream(String fieldName, Reader reader) { MpdlTokenizer tmpTokenizer = new MpdlTokenizer(reader, language, normalizer); TokenStream result = (TokenStream) tmpTokenizer; result = new MpdlFilter(result); // filter to remove the hyphen in a token etc. result = new LowerCaseFilter(result); return result; } public ArrayList<Token> getToken(String inputString) throws ApplicationException { ArrayList<Token> token = new ArrayList<Token>(); try { Reader reader = new StringReader(inputString); MpdlTokenizer tmpTokenizer = new MpdlTokenizer(reader, language, normalizer); TokenStream result = (TokenStream) tmpTokenizer; result = new MpdlFilter(result); // filter to remove the hyphen in a token etc. result = new LowerCaseFilter(result); Token t = result.next(); while (t != null) { token.add(t); t = result.next(); } } catch (IOException e) { throw new ApplicationException(e); } return token; } }