Mercurial > hg > LGDataverses
comparison src/main/java/lia/analysis/positional/PositionalPorterStopAnalyzer.java @ 0:fcb8807fbd84
Fist commit!
| author | "jurzua <jurzua@mpiwg-berlin.mpg.de>" |
|---|---|
| date | Tue, 10 Mar 2015 15:15:30 +0100 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:fcb8807fbd84 |
|---|---|
| 1 /* | |
| 2 Copyright (C) 2005-2012, by the President and Fellows of Harvard College. | |
| 3 | |
| 4 Licensed under the Apache License, Version 2.0 (the "License"); | |
| 5 you may not use this file except in compliance with the License. | |
| 6 You may obtain a copy of the License at | |
| 7 | |
| 8 http://www.apache.org/licenses/LICENSE-2.0 | |
| 9 | |
| 10 Unless required by applicable law or agreed to in writing, software | |
| 11 distributed under the License is distributed on an "AS IS" BASIS, | |
| 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 13 See the License for the specific language governing permissions and | |
| 14 limitations under the License. | |
| 15 | |
| 16 Dataverse Network - A web application to share, preserve and analyze research data. | |
| 17 Developed at the Institute for Quantitative Social Science, Harvard University. | |
| 18 Version 3.0. | |
| 19 */ | |
| 20 package lia.analysis.positional; | |
| 21 | |
| 22 import org.apache.lucene.analysis.Analyzer; | |
| 23 import org.apache.lucene.analysis.LowerCaseTokenizer; | |
| 24 import org.apache.lucene.analysis.PorterStemFilter; | |
| 25 import org.apache.lucene.analysis.StopAnalyzer; | |
| 26 import org.apache.lucene.analysis.StopFilter; | |
| 27 import org.apache.lucene.analysis.TokenStream; | |
| 28 | |
| 29 import java.io.Reader; | |
| 30 import java.util.Set; | |
| 31 | |
| 32 public class PositionalPorterStopAnalyzer extends Analyzer { | |
| 33 private Set stopWords; | |
| 34 | |
| 35 public PositionalPorterStopAnalyzer() { | |
| 36 this(StopAnalyzer.ENGLISH_STOP_WORDS_SET); | |
| 37 } | |
| 38 | |
| 39 public PositionalPorterStopAnalyzer(Set stopWords) { | |
| 40 this.stopWords = stopWords; | |
| 41 } | |
| 42 | |
| 43 public TokenStream tokenStream(String fieldName, Reader reader) { | |
| 44 StopFilter stopFilter = new StopFilter(true, | |
| 45 new LowerCaseTokenizer(reader), | |
| 46 stopWords); | |
| 47 stopFilter.setEnablePositionIncrements(true); | |
| 48 return new PorterStemFilter(stopFilter); | |
| 49 } | |
| 50 } |
