mpdl-group: software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlTokenizer.java comparison

comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlTokenizer.java @ 16:257f67be5c00

diverse Fehlerbehebungen

author	Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date	Tue, 27 Sep 2011 16:40:57 +0200
parents	5df60f24e997
children

comparison

equal deleted inserted replaced

-:e99964f390e4
+:257f67be5c00
 public class MpdlTokenizer extends Tokenizer {
 private static final int MAX_WORD_LEN = 255;
 private static final int IO_BUFFER_SIZE = 1024;
 private static String SPECIAL_NOT_WORD_DELIM_SYMBOL = new Character('\u2424').toString();
-private boolean regWithoutSemicolon = false;  // hack: in some cases there are words with a semicolon, then the normalization should be without semicolon
 private boolean isInNotWordDelimMode = false;
 private int offset = 0, bufferIndex = 0, dataLen = 0;
 private char[] buffer = new char[MAX_WORD_LEN];
 private char[] ioBuffer = new char[IO_BUFFER_SIZE];
 private MpdlNormalizer normalizer;
 super(input);
 this.language = language;
 this.normalizer = normalizer;
 }
-public void setRegWithoutSemicolon(boolean regWithoutSemicolon) {
-this.regWithoutSemicolon = regWithoutSemicolon;
-}
-public boolean isRegWithoutSemicolon() {
-return regWithoutSemicolon;
-}
 /** Returns true iff a character should be included in a token.  This
 * tokenizer generates as tokens adjacent sequences of characters which
 * satisfy this predicate.  Characters for which this is false are used to
 * define token boundaries and are not included in tokens. */
 protected boolean isTokenChar(char c) {
 boolean isTokenChar = true;
-if (isRegWithoutSemicolon() && c == ';')  // hack: special case for regularization and normalization; feel free to remove it later
-return true;
 switch (c) {
 case ' ': isTokenChar = false; break;
 case '.': isTokenChar = false; break;
 case ',': isTokenChar = false; break;
 case '!': isTokenChar = false; break;

Mercurial > hg > mpdl-group

comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlTokenizer.java @ 16:257f67be5c00