comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlTokenizerAnalyzer.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children 2396a569e446
comparison
equal deleted inserted replaced
-1:000000000000 0:408254cf2f1d
1 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer;
2
3 import java.io.IOException;
4 import java.io.Reader;
5 import java.io.StringReader;
6 import java.util.ArrayList;
7
8 import org.apache.lucene.analysis.Analyzer;
9 import org.apache.lucene.analysis.LowerCaseFilter;
10 import org.apache.lucene.analysis.Token;
11 import org.apache.lucene.analysis.TokenStream;
12
13 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
14 import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
15
16 public class MpdlTokenizerAnalyzer extends Analyzer {
17 protected String language = MpdlConstants.DEFAULT_LANGUAGE;
18 protected MpdlNormalizer normalizer = null;
19
20 public MpdlTokenizerAnalyzer(String language) {
21 this.language = language;
22 this.normalizer = new MpdlNormalizer(language); // default normalizer
23 }
24
25 public MpdlTokenizerAnalyzer(MpdlNormalizer normalizer, String language) {
26 this.language = language;
27 this.normalizer = normalizer;
28 }
29
30 public TokenStream tokenStream(String fieldName, Reader reader) {
31 TokenStream result = new MpdlTokenizer(reader, language, normalizer);
32 result = new MpdlFilter(result); // filter to remove the hyphen in a token etc.
33 result = new LowerCaseFilter(result);
34 return result;
35 }
36
37 public ArrayList<Token> getToken(String inputString) throws ApplicationException {
38 ArrayList<Token> token = new ArrayList<Token>();
39 try {
40 Reader reader = new StringReader(inputString);
41 TokenStream result = new MpdlTokenizer(reader, language, normalizer);
42 result = new MpdlFilter(result); // filter to remove the hyphen in a token etc.
43 result = new LowerCaseFilter(result);
44 Token t = result.next();
45 while (t != null) {
46 token.add(t);
47 t = result.next();
48 }
49 } catch (IOException e) {
50 throw new ApplicationException(e);
51 }
52 return token;
53 }
54
55 }