Mercurial > hg > mpdl-group
diff software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexDE.java @ 23:e845310098ba
diverse Korrekturen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 27 Nov 2012 12:35:19 +0100 |
parents | 4a3641ae14d2 |
children |
line wrap: on
line diff
--- a/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexDE.java Wed Dec 14 13:57:09 2011 +0100 +++ b/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexDE.java Tue Nov 27 12:35:19 2012 +0100 @@ -1,11 +1,11 @@ -/* The following code was generated by JFlex 1.4.3 on 05.09.11 10:34 */ +/* The following code was generated by JFlex 1.4.3 on 28.03.12 18:57 */ /* * Normalization rules for German text * [this is a JFlex specification] * * Wolfgang Schmidle - * version 2011-08-10 + * version 2011-07-12 * */ @@ -15,8 +15,8 @@ /** * This class is a scanner generated by * <a href="http://www.jflex.de/">JFlex</a> 1.4.3 - * on 05.09.11 10:34 from the specification file - * <tt>MpdlNormalizerLexDE.lex</tt> + * on 28.03.12 18:57 from the specification file + * <tt>/Users/jwillenborg/test/jflexNew/MpdlNormalizerLexDE.lex</tt> */ public class MpdlNormalizerLexDE { @@ -27,13 +27,12 @@ private static final int ZZ_BUFFERSIZE = 16384; /** lexical states */ - public static final int SEARCH = 10; - public static final int DICT_ASCII = 6; - public static final int SEARCH_ASCII = 12; + public static final int SEARCH = 6; public static final int DICT = 4; public static final int YYINITIAL = 0; + public static final int CELEX = 8; public static final int DISP = 2; - public static final int GRIMM = 8; + public static final int GRIMM = 10; /** * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l @@ -42,18 +41,18 @@ * l is of the form l = 2*k, k a non negative integer */ private static final int ZZ_LEXSTATE[] = { - 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6 + 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5 }; /** * Translates characters to character classes */ private static final String ZZ_CMAP_PACKED = - "\12\0\1\3\25\0\1\2\14\0\1\1\2\0\1\1\17\0\1\20"+ - "\32\4\6\0\1\11\2\4\1\5\12\4\1\13\5\4\1\7\5\4"+ - "\1\1\1\0\1\1\106\0\1\14\21\0\1\15\5\0\1\16\2\0"+ - "\1\17\4\0\1\14\21\0\1\15\5\0\1\16\202\0\1\6\u01e4\0"+ - "\1\12\1\0\1\10\ufc99\0"; + "\12\0\1\3\25\0\1\2\14\0\1\11\1\0\1\10\1\1\13\0"+ + "\1\6\1\0\1\7\1\0\1\24\32\4\6\0\1\15\2\4\1\5"+ + "\12\4\1\17\5\4\1\13\5\4\1\1\1\0\1\1\106\0\1\20"+ + "\21\0\1\21\5\0\1\22\2\0\1\23\4\0\1\20\21\0\1\21"+ + "\5\0\1\22\202\0\1\12\u01e4\0\1\16\1\0\1\14\ufc99\0"; /** * Translates characters to character classes @@ -66,12 +65,12 @@ private static final int [] ZZ_ACTION = zzUnpackAction(); private static final String ZZ_ACTION_PACKED_0 = - "\7\0\2\1\1\2\1\3\1\4\3\1\1\5\1\3"+ - "\3\1\1\6\1\7\1\10\1\11\1\12\1\13\1\14"+ - "\1\15\1\16\1\17"; + "\6\0\4\1\1\2\1\3\1\4\3\1\1\5\1\6"+ + "\3\3\3\1\1\7\1\10\1\11\1\12\1\13\2\0"+ + "\1\14\1\15\1\16\1\17\3\0\1\1\2\0"; private static int [] zzUnpackAction() { - int [] result = new int[30]; + int [] result = new int[41]; int offset = 0; offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); return result; @@ -96,13 +95,15 @@ private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); private static final String ZZ_ROWMAP_PACKED_0 = - "\0\0\0\21\0\42\0\63\0\104\0\125\0\146\0\167"+ - "\0\210\0\167\0\167\0\167\0\231\0\252\0\273\0\167"+ - "\0\210\0\314\0\335\0\356\0\167\0\167\0\167\0\167"+ - "\0\167\0\167\0\167\0\167\0\167\0\167"; + "\0\0\0\25\0\52\0\77\0\124\0\151\0\176\0\223"+ + "\0\250\0\275\0\176\0\176\0\176\0\322\0\347\0\374"+ + "\0\176\0\176\0\223\0\250\0\275\0\u0111\0\u0126\0\u013b"+ + "\0\176\0\176\0\176\0\176\0\176\0\u0150\0\250\0\176"+ + "\0\176\0\176\0\176\0\u0165\0\u017a\0\u018f\0\u0165\0\u01a4"+ + "\0\u01b9"; private static int [] zzUnpackRowMap() { - int [] result = new int[30]; + int [] result = new int[41]; int offset = 0; offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); return result; @@ -125,25 +126,29 @@ private static final int [] ZZ_TRANS = zzUnpackTrans(); private static final String ZZ_TRANS_PACKED_0 = - "\1\10\1\11\1\10\1\0\1\10\1\11\1\12\1\11"+ - "\1\10\1\11\6\10\1\13\1\10\1\11\1\10\1\14"+ - "\1\10\1\11\1\12\1\15\1\10\1\16\1\10\1\17"+ - "\4\10\1\13\1\10\1\11\1\10\1\20\1\10\1\11"+ - "\1\12\1\15\1\10\1\16\1\10\1\17\4\10\2\13"+ - "\1\21\1\13\1\20\1\10\1\11\1\12\1\22\1\13"+ - "\1\23\1\13\1\24\1\25\1\26\1\27\1\30\1\13"+ - "\1\10\1\11\1\10\1\20\1\10\1\11\1\12\1\15"+ - "\1\10\1\16\1\10\1\17\3\10\1\31\1\13\1\10"+ - "\1\11\1\10\1\32\1\10\1\11\1\12\1\15\1\10"+ - "\1\16\1\10\1\17\4\10\2\13\1\21\1\13\1\32"+ - "\1\10\1\11\1\12\1\22\1\13\1\23\1\13\1\24"+ - "\1\25\1\26\1\27\1\30\1\13\23\0\1\10\20\0"+ - "\1\10\5\0\1\33\1\0\1\34\10\0\1\10\7\0"+ - "\1\35\20\0\1\36\10\0\1\10\5\0\1\33\1\0"+ - "\1\27\10\0\1\10\7\0\1\25\20\0\1\26\6\0"; + "\1\7\1\10\1\7\1\0\1\7\1\10\1\11\2\7"+ + "\1\12\1\13\1\10\1\7\1\10\6\7\1\14\1\7"+ + "\1\10\1\7\1\15\1\7\1\10\1\11\2\7\1\12"+ + "\1\13\1\16\1\7\1\17\1\7\1\20\4\7\1\14"+ + "\1\7\1\10\1\7\1\21\1\7\1\10\1\11\2\7"+ + "\1\12\1\13\1\16\1\7\1\17\1\7\1\20\4\7"+ + "\1\14\1\7\1\10\1\7\1\22\1\7\1\10\1\11"+ + "\2\7\1\12\1\13\1\16\1\7\1\17\1\7\1\20"+ + "\4\7\2\14\1\23\1\14\1\21\1\7\1\10\1\24"+ + "\2\14\1\25\1\13\1\26\1\14\1\27\1\14\1\30"+ + "\1\31\1\32\1\33\1\34\1\14\1\7\1\10\1\7"+ + "\1\21\1\7\1\10\1\11\2\7\1\12\1\13\1\16"+ + "\1\7\1\17\1\7\1\20\3\7\1\35\1\14\27\0"+ + "\1\7\22\0\6\36\2\0\15\36\2\0\1\7\3\0"+ + "\1\37\20\0\1\7\11\0\1\40\1\0\1\41\10\0"+ + "\1\7\13\0\1\42\24\0\1\43\10\0\1\7\11\0"+ + "\1\40\1\0\1\33\10\0\1\7\13\0\1\31\24\0"+ + "\1\32\6\0\6\36\1\0\1\44\1\45\14\36\6\0"+ + "\1\46\16\0\6\36\1\0\1\47\1\45\14\36\10\0"+ + "\1\50\14\0\6\51\2\0\23\51\1\0\1\7\15\51"; private static int [] zzUnpackTrans() { - int [] result = new int[255]; + int [] result = new int[462]; int offset = 0; offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); return result; @@ -181,10 +186,11 @@ private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); private static final String ZZ_ATTRIBUTE_PACKED_0 = - "\7\0\1\11\1\1\3\11\3\1\1\11\4\1\12\11"; + "\6\0\1\11\3\1\3\11\3\1\2\11\6\1\5\11"+ + "\2\0\4\11\3\0\1\1\2\0"; private static int [] zzUnpackAttribute() { - int [] result = new int[30]; + int [] result = new int[41]; int offset = 0; offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); return result; @@ -252,8 +258,6 @@ private boolean zzEOFDone; /* user code: */ - public static final int CELEX = DICT_ASCII; - private String original = ""; private String normalized = ""; private int problem = 0; @@ -296,7 +300,7 @@ char [] map = new char[0x10000]; int i = 0; /* index in packed string */ int j = 0; /* index in unpacked array */ - while (i < 88) { + while (i < 98) { int count = packed.charAt(i++); char value = packed.charAt(i++); do map[j++] = value; while (--count > 0); @@ -563,7 +567,7 @@ zzMarkedPos = zzMarkedPosL; switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { - case 10: + case 11: { add("sz"); } case 16: break; @@ -571,7 +575,7 @@ { problem = 1; add(yytext()); } case 17: break; - case 6: + case 7: { add("ae"); } case 18: break; @@ -590,11 +594,11 @@ { add("ΓΌ"); } case 21: break; - case 8: + case 9: { add("ue"); } case 22: break; - case 11: + case 6: { switch (problem) { case 1: return original; default: return normalized.replaceAll(LB, "").toLowerCase(); @@ -613,11 +617,11 @@ { add(yytext()); } case 26: break; - case 9: + case 10: { add("ss"); } case 27: break; - case 7: + case 8: { add("oe"); } case 28: break;