|
0
|
1 /*
|
|
|
2 Copyright (C) 2005-2012, by the President and Fellows of Harvard College.
|
|
|
3
|
|
|
4 Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
5 you may not use this file except in compliance with the License.
|
|
|
6 You may obtain a copy of the License at
|
|
|
7
|
|
|
8 http://www.apache.org/licenses/LICENSE-2.0
|
|
|
9
|
|
|
10 Unless required by applicable law or agreed to in writing, software
|
|
|
11 distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
13 See the License for the specific language governing permissions and
|
|
|
14 limitations under the License.
|
|
|
15
|
|
|
16 Dataverse Network - A web application to share, preserve and analyze research data.
|
|
|
17 Developed at the Institute for Quantitative Social Science, Harvard University.
|
|
|
18 Version 3.0.
|
|
|
19 */
|
|
|
20 package lia.analysis.positional;
|
|
|
21
|
|
|
22 import org.apache.lucene.analysis.Analyzer;
|
|
|
23 import org.apache.lucene.analysis.LowerCaseTokenizer;
|
|
|
24 import org.apache.lucene.analysis.PorterStemFilter;
|
|
|
25 import org.apache.lucene.analysis.StopAnalyzer;
|
|
|
26 import org.apache.lucene.analysis.StopFilter;
|
|
|
27 import org.apache.lucene.analysis.TokenStream;
|
|
|
28
|
|
|
29 import java.io.Reader;
|
|
|
30 import java.util.Set;
|
|
|
31
|
|
|
32 public class PositionalPorterStopAnalyzer extends Analyzer {
|
|
|
33 private Set stopWords;
|
|
|
34
|
|
|
35 public PositionalPorterStopAnalyzer() {
|
|
|
36 this(StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
|
|
37 }
|
|
|
38
|
|
|
39 public PositionalPorterStopAnalyzer(Set stopWords) {
|
|
|
40 this.stopWords = stopWords;
|
|
|
41 }
|
|
|
42
|
|
|
43 public TokenStream tokenStream(String fieldName, Reader reader) {
|
|
|
44 StopFilter stopFilter = new StopFilter(true,
|
|
|
45 new LowerCaseTokenizer(reader),
|
|
|
46 stopWords);
|
|
|
47 stopFilter.setEnablePositionIncrements(true);
|
|
|
48 return new PorterStemFilter(stopFilter);
|
|
|
49 }
|
|
|
50 }
|