Mercurial > hg > mpdl-group
diff software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexEN.java @ 14:5df60f24e997
diverse Fehlerbehebungen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Mon, 29 Aug 2011 17:40:02 +0200 |
parents | 1ec29fdd0db8 |
children |
line wrap: on
line diff
--- a/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexEN.java Tue Apr 19 16:51:59 2011 +0200 +++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexEN.java Mon Aug 29 17:40:02 2011 +0200 @@ -1,12 +1,11 @@ -/* The following code was generated by JFlex 1.4.3 on 22.02.11 12:03 */ +/* The following code was generated by JFlex 1.4.3 on 21.07.11 11:22 */ /* * Normalization rules for English text * [this is a JFlex specification] * * Wolfgang Schmidle - * version 0.96 - * 2011-02-21 + * version 2011-07-12 * */ @@ -16,7 +15,7 @@ /** * This class is a scanner generated by * <a href="http://www.jflex.de/">JFlex</a> 1.4.3 - * on 22.02.11 12:03 from the specification file + * on 21.07.11 11:22 from the specification file * <tt>MpdlNormalizerLexEN.lex</tt> */ public class MpdlNormalizerLexEN { @@ -40,14 +39,16 @@ * l is of the form l = 2*k, k a non negative integer */ private static final int ZZ_LEXSTATE[] = { - 0, 0, 1, 1, 2, 2, 1, 1 + 0, 0, 1, 1, 2, 2, 3, 3 }; /** * Translates characters to character classes */ private static final String ZZ_CMAP_PACKED = - "\12\0\1\1\65\0\1\3\u013e\0\1\2\ufe80\0"; + "\12\0\1\3\25\0\1\2\14\0\1\1\2\0\1\1\17\0\1\5"+ + "\40\0\1\1\2\0\1\1\20\0\1\1\5\0\1\1\1\0\1\1"+ + "\u0101\0\1\4\ufe80\0"; /** * Translates characters to character classes @@ -60,10 +61,10 @@ private static final int [] ZZ_ACTION = zzUnpackAction(); private static final String ZZ_ACTION_PACKED_0 = - "\3\0\1\1\1\2\1\3\1\4\1\5"; + "\4\0\2\1\1\2\1\3\1\4\1\5\1\6"; private static int [] zzUnpackAction() { - int [] result = new int[8]; + int [] result = new int[11]; int offset = 0; offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); return result; @@ -88,10 +89,11 @@ private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); private static final String ZZ_ROWMAP_PACKED_0 = - "\0\0\0\4\0\10\0\14\0\14\0\14\0\14\0\14"; + "\0\0\0\6\0\14\0\22\0\30\0\36\0\30\0\30"+ + "\0\30\0\30\0\30"; private static int [] zzUnpackRowMap() { - int [] result = new int[8]; + int [] result = new int[11]; int offset = 0; offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); return result; @@ -114,11 +116,13 @@ private static final int [] ZZ_TRANS = zzUnpackTrans(); private static final String ZZ_TRANS_PACKED_0 = - "\1\4\1\0\1\4\1\5\1\4\1\6\1\7\1\5"+ - "\1\4\1\10\1\7\1\5\4\0"; + "\1\5\1\6\1\5\1\0\1\5\1\7\1\5\1\6"+ + "\1\5\1\10\1\11\1\7\1\5\1\6\1\5\1\12"+ + "\1\11\1\7\1\5\1\6\1\5\1\13\1\11\1\7"+ + "\10\0\1\5\3\0"; private static int [] zzUnpackTrans() { - int [] result = new int[16]; + int [] result = new int[36]; int offset = 0; offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); return result; @@ -156,10 +160,10 @@ private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); private static final String ZZ_ATTRIBUTE_PACKED_0 = - "\3\0\5\11"; + "\4\0\1\11\1\1\5\11"; private static int [] zzUnpackAttribute() { - int [] result = new int[8]; + int [] result = new int[11]; int offset = 0; offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); return result; @@ -236,6 +240,8 @@ normalized += norm; } + private static final String LB = "[\u002d\u00ad] "; + /** * Creates a new scanner @@ -267,7 +273,7 @@ char [] map = new char[0x10000]; int i = 0; /* index in packed string */ int j = 0; /* index in unpacked array */ - while (i < 14) { + while (i < 46) { int count = packed.charAt(i++); char value = packed.charAt(i++); do map[j++] = value; while (--count > 0); @@ -537,29 +543,36 @@ case 5: { switch (problem) { case 1: return ""; - default: return normalized; + default: return normalized.replaceAll(LB, ""); } } - case 6: break; + case 7: break; case 2: { problem = 1; add(yytext()); } - case 7: break; + case 8: break; case 4: { add("s"); } - case 8: break; + case 9: break; case 3: { switch (problem) { case 1: return original; default: return normalized; } } - case 9: break; + case 10: break; + case 6: + { switch (problem) { + case 1: return original; + default: return normalized.replaceAll(LB, "").toLowerCase(); + } + } + case 11: break; case 1: { add(yytext()); } - case 10: break; + case 12: break; default: if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { zzAtEOF = true;