comparison software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexDE.java @ 23:e845310098ba

diverse Korrekturen
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 27 Nov 2012 12:35:19 +0100
parents 4a3641ae14d2
children
comparison
equal deleted inserted replaced
22:6a45a982c333 23:e845310098ba
1 /* The following code was generated by JFlex 1.4.3 on 05.09.11 10:34 */ 1 /* The following code was generated by JFlex 1.4.3 on 28.03.12 18:57 */
2 2
3 /* 3 /*
4 * Normalization rules for German text 4 * Normalization rules for German text
5 * [this is a JFlex specification] 5 * [this is a JFlex specification]
6 * 6 *
7 * Wolfgang Schmidle 7 * Wolfgang Schmidle
8 * version 2011-08-10 8 * version 2011-07-12
9 * 9 *
10 */ 10 */
11 11
12 package de.mpg.mpiwg.berlin.mpdl.lt.text.norm.lang; 12 package de.mpg.mpiwg.berlin.mpdl.lt.text.norm.lang;
13 13
14 14
15 /** 15 /**
16 * This class is a scanner generated by 16 * This class is a scanner generated by
17 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3 17 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3
18 * on 05.09.11 10:34 from the specification file 18 * on 28.03.12 18:57 from the specification file
19 * <tt>MpdlNormalizerLexDE.lex</tt> 19 * <tt>/Users/jwillenborg/test/jflexNew/MpdlNormalizerLexDE.lex</tt>
20 */ 20 */
21 public class MpdlNormalizerLexDE { 21 public class MpdlNormalizerLexDE {
22 22
23 /** This character denotes the end of file */ 23 /** This character denotes the end of file */
24 public static final int YYEOF = -1; 24 public static final int YYEOF = -1;
25 25
26 /** initial size of the lookahead buffer */ 26 /** initial size of the lookahead buffer */
27 private static final int ZZ_BUFFERSIZE = 16384; 27 private static final int ZZ_BUFFERSIZE = 16384;
28 28
29 /** lexical states */ 29 /** lexical states */
30 public static final int SEARCH = 10; 30 public static final int SEARCH = 6;
31 public static final int DICT_ASCII = 6;
32 public static final int SEARCH_ASCII = 12;
33 public static final int DICT = 4; 31 public static final int DICT = 4;
34 public static final int YYINITIAL = 0; 32 public static final int YYINITIAL = 0;
33 public static final int CELEX = 8;
35 public static final int DISP = 2; 34 public static final int DISP = 2;
36 public static final int GRIMM = 8; 35 public static final int GRIMM = 10;
37 36
38 /** 37 /**
39 * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l 38 * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
40 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l 39 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
41 * at the beginning of a line 40 * at the beginning of a line
42 * l is of the form l = 2*k, k a non negative integer 41 * l is of the form l = 2*k, k a non negative integer
43 */ 42 */
44 private static final int ZZ_LEXSTATE[] = { 43 private static final int ZZ_LEXSTATE[] = {
45 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6 44 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5
46 }; 45 };
47 46
48 /** 47 /**
49 * Translates characters to character classes 48 * Translates characters to character classes
50 */ 49 */
51 private static final String ZZ_CMAP_PACKED = 50 private static final String ZZ_CMAP_PACKED =
52 "\12\0\1\3\25\0\1\2\14\0\1\1\2\0\1\1\17\0\1\20"+ 51 "\12\0\1\3\25\0\1\2\14\0\1\11\1\0\1\10\1\1\13\0"+
53 "\32\4\6\0\1\11\2\4\1\5\12\4\1\13\5\4\1\7\5\4"+ 52 "\1\6\1\0\1\7\1\0\1\24\32\4\6\0\1\15\2\4\1\5"+
54 "\1\1\1\0\1\1\106\0\1\14\21\0\1\15\5\0\1\16\2\0"+ 53 "\12\4\1\17\5\4\1\13\5\4\1\1\1\0\1\1\106\0\1\20"+
55 "\1\17\4\0\1\14\21\0\1\15\5\0\1\16\202\0\1\6\u01e4\0"+ 54 "\21\0\1\21\5\0\1\22\2\0\1\23\4\0\1\20\21\0\1\21"+
56 "\1\12\1\0\1\10\ufc99\0"; 55 "\5\0\1\22\202\0\1\12\u01e4\0\1\16\1\0\1\14\ufc99\0";
57 56
58 /** 57 /**
59 * Translates characters to character classes 58 * Translates characters to character classes
60 */ 59 */
61 private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED); 60 private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
64 * Translates DFA states to action switch labels. 63 * Translates DFA states to action switch labels.
65 */ 64 */
66 private static final int [] ZZ_ACTION = zzUnpackAction(); 65 private static final int [] ZZ_ACTION = zzUnpackAction();
67 66
68 private static final String ZZ_ACTION_PACKED_0 = 67 private static final String ZZ_ACTION_PACKED_0 =
69 "\7\0\2\1\1\2\1\3\1\4\3\1\1\5\1\3"+ 68 "\6\0\4\1\1\2\1\3\1\4\3\1\1\5\1\6"+
70 "\3\1\1\6\1\7\1\10\1\11\1\12\1\13\1\14"+ 69 "\3\3\3\1\1\7\1\10\1\11\1\12\1\13\2\0"+
71 "\1\15\1\16\1\17"; 70 "\1\14\1\15\1\16\1\17\3\0\1\1\2\0";
72 71
73 private static int [] zzUnpackAction() { 72 private static int [] zzUnpackAction() {
74 int [] result = new int[30]; 73 int [] result = new int[41];
75 int offset = 0; 74 int offset = 0;
76 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); 75 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
77 return result; 76 return result;
78 } 77 }
79 78
94 * Translates a state to a row index in the transition table 93 * Translates a state to a row index in the transition table
95 */ 94 */
96 private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); 95 private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
97 96
98 private static final String ZZ_ROWMAP_PACKED_0 = 97 private static final String ZZ_ROWMAP_PACKED_0 =
99 "\0\0\0\21\0\42\0\63\0\104\0\125\0\146\0\167"+ 98 "\0\0\0\25\0\52\0\77\0\124\0\151\0\176\0\223"+
100 "\0\210\0\167\0\167\0\167\0\231\0\252\0\273\0\167"+ 99 "\0\250\0\275\0\176\0\176\0\176\0\322\0\347\0\374"+
101 "\0\210\0\314\0\335\0\356\0\167\0\167\0\167\0\167"+ 100 "\0\176\0\176\0\223\0\250\0\275\0\u0111\0\u0126\0\u013b"+
102 "\0\167\0\167\0\167\0\167\0\167\0\167"; 101 "\0\176\0\176\0\176\0\176\0\176\0\u0150\0\250\0\176"+
102 "\0\176\0\176\0\176\0\u0165\0\u017a\0\u018f\0\u0165\0\u01a4"+
103 "\0\u01b9";
103 104
104 private static int [] zzUnpackRowMap() { 105 private static int [] zzUnpackRowMap() {
105 int [] result = new int[30]; 106 int [] result = new int[41];
106 int offset = 0; 107 int offset = 0;
107 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); 108 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
108 return result; 109 return result;
109 } 110 }
110 111
123 * The transition table of the DFA 124 * The transition table of the DFA
124 */ 125 */
125 private static final int [] ZZ_TRANS = zzUnpackTrans(); 126 private static final int [] ZZ_TRANS = zzUnpackTrans();
126 127
127 private static final String ZZ_TRANS_PACKED_0 = 128 private static final String ZZ_TRANS_PACKED_0 =
128 "\1\10\1\11\1\10\1\0\1\10\1\11\1\12\1\11"+ 129 "\1\7\1\10\1\7\1\0\1\7\1\10\1\11\2\7"+
129 "\1\10\1\11\6\10\1\13\1\10\1\11\1\10\1\14"+ 130 "\1\12\1\13\1\10\1\7\1\10\6\7\1\14\1\7"+
130 "\1\10\1\11\1\12\1\15\1\10\1\16\1\10\1\17"+ 131 "\1\10\1\7\1\15\1\7\1\10\1\11\2\7\1\12"+
131 "\4\10\1\13\1\10\1\11\1\10\1\20\1\10\1\11"+ 132 "\1\13\1\16\1\7\1\17\1\7\1\20\4\7\1\14"+
132 "\1\12\1\15\1\10\1\16\1\10\1\17\4\10\2\13"+ 133 "\1\7\1\10\1\7\1\21\1\7\1\10\1\11\2\7"+
133 "\1\21\1\13\1\20\1\10\1\11\1\12\1\22\1\13"+ 134 "\1\12\1\13\1\16\1\7\1\17\1\7\1\20\4\7"+
134 "\1\23\1\13\1\24\1\25\1\26\1\27\1\30\1\13"+ 135 "\1\14\1\7\1\10\1\7\1\22\1\7\1\10\1\11"+
135 "\1\10\1\11\1\10\1\20\1\10\1\11\1\12\1\15"+ 136 "\2\7\1\12\1\13\1\16\1\7\1\17\1\7\1\20"+
136 "\1\10\1\16\1\10\1\17\3\10\1\31\1\13\1\10"+ 137 "\4\7\2\14\1\23\1\14\1\21\1\7\1\10\1\24"+
137 "\1\11\1\10\1\32\1\10\1\11\1\12\1\15\1\10"+ 138 "\2\14\1\25\1\13\1\26\1\14\1\27\1\14\1\30"+
138 "\1\16\1\10\1\17\4\10\2\13\1\21\1\13\1\32"+ 139 "\1\31\1\32\1\33\1\34\1\14\1\7\1\10\1\7"+
139 "\1\10\1\11\1\12\1\22\1\13\1\23\1\13\1\24"+ 140 "\1\21\1\7\1\10\1\11\2\7\1\12\1\13\1\16"+
140 "\1\25\1\26\1\27\1\30\1\13\23\0\1\10\20\0"+ 141 "\1\7\1\17\1\7\1\20\3\7\1\35\1\14\27\0"+
141 "\1\10\5\0\1\33\1\0\1\34\10\0\1\10\7\0"+ 142 "\1\7\22\0\6\36\2\0\15\36\2\0\1\7\3\0"+
142 "\1\35\20\0\1\36\10\0\1\10\5\0\1\33\1\0"+ 143 "\1\37\20\0\1\7\11\0\1\40\1\0\1\41\10\0"+
143 "\1\27\10\0\1\10\7\0\1\25\20\0\1\26\6\0"; 144 "\1\7\13\0\1\42\24\0\1\43\10\0\1\7\11\0"+
145 "\1\40\1\0\1\33\10\0\1\7\13\0\1\31\24\0"+
146 "\1\32\6\0\6\36\1\0\1\44\1\45\14\36\6\0"+
147 "\1\46\16\0\6\36\1\0\1\47\1\45\14\36\10\0"+
148 "\1\50\14\0\6\51\2\0\23\51\1\0\1\7\15\51";
144 149
145 private static int [] zzUnpackTrans() { 150 private static int [] zzUnpackTrans() {
146 int [] result = new int[255]; 151 int [] result = new int[462];
147 int offset = 0; 152 int offset = 0;
148 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); 153 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
149 return result; 154 return result;
150 } 155 }
151 156
179 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code> 184 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
180 */ 185 */
181 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); 186 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
182 187
183 private static final String ZZ_ATTRIBUTE_PACKED_0 = 188 private static final String ZZ_ATTRIBUTE_PACKED_0 =
184 "\7\0\1\11\1\1\3\11\3\1\1\11\4\1\12\11"; 189 "\6\0\1\11\3\1\3\11\3\1\2\11\6\1\5\11"+
190 "\2\0\4\11\3\0\1\1\2\0";
185 191
186 private static int [] zzUnpackAttribute() { 192 private static int [] zzUnpackAttribute() {
187 int [] result = new int[30]; 193 int [] result = new int[41];
188 int offset = 0; 194 int offset = 0;
189 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); 195 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
190 return result; 196 return result;
191 } 197 }
192 198
250 256
251 /** denotes if the user-EOF-code has already been executed */ 257 /** denotes if the user-EOF-code has already been executed */
252 private boolean zzEOFDone; 258 private boolean zzEOFDone;
253 259
254 /* user code: */ 260 /* user code: */
255 public static final int CELEX = DICT_ASCII;
256
257 private String original = ""; 261 private String original = "";
258 private String normalized = ""; 262 private String normalized = "";
259 private int problem = 0; 263 private int problem = 0;
260 264
261 private void add (String norm) { 265 private void add (String norm) {
294 */ 298 */
295 private static char [] zzUnpackCMap(String packed) { 299 private static char [] zzUnpackCMap(String packed) {
296 char [] map = new char[0x10000]; 300 char [] map = new char[0x10000];
297 int i = 0; /* index in packed string */ 301 int i = 0; /* index in packed string */
298 int j = 0; /* index in unpacked array */ 302 int j = 0; /* index in unpacked array */
299 while (i < 88) { 303 while (i < 98) {
300 int count = packed.charAt(i++); 304 int count = packed.charAt(i++);
301 char value = packed.charAt(i++); 305 char value = packed.charAt(i++);
302 do map[j++] = value; while (--count > 0); 306 do map[j++] = value; while (--count > 0);
303 } 307 }
304 return map; 308 return map;
561 565
562 // store back cached position 566 // store back cached position
563 zzMarkedPos = zzMarkedPosL; 567 zzMarkedPos = zzMarkedPosL;
564 568
565 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { 569 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
566 case 10: 570 case 11:
567 { add("sz"); 571 { add("sz");
568 } 572 }
569 case 16: break; 573 case 16: break;
570 case 3: 574 case 3:
571 { problem = 1; add(yytext()); 575 { problem = 1; add(yytext());
572 } 576 }
573 case 17: break; 577 case 17: break;
574 case 6: 578 case 7:
575 { add("ae"); 579 { add("ae");
576 } 580 }
577 case 18: break; 581 case 18: break;
578 case 2: 582 case 2:
579 { add("s"); 583 { add("s");
588 case 20: break; 592 case 20: break;
589 case 13: 593 case 13:
590 { add("ü"); 594 { add("ü");
591 } 595 }
592 case 21: break; 596 case 21: break;
593 case 8: 597 case 9:
594 { add("ue"); 598 { add("ue");
595 } 599 }
596 case 22: break; 600 case 22: break;
597 case 11: 601 case 6:
598 { switch (problem) { 602 { switch (problem) {
599 case 1: return original; 603 case 1: return original;
600 default: return normalized.replaceAll(LB, "").toLowerCase(); 604 default: return normalized.replaceAll(LB, "").toLowerCase();
601 } 605 }
602 } 606 }
611 case 25: break; 615 case 25: break;
612 case 1: 616 case 1:
613 { add(yytext()); 617 { add(yytext());
614 } 618 }
615 case 26: break; 619 case 26: break;
616 case 9: 620 case 10:
617 { add("ss"); 621 { add("ss");
618 } 622 }
619 case 27: break; 623 case 27: break;
620 case 7: 624 case 8:
621 { add("oe"); 625 { add("oe");
622 } 626 }
623 case 28: break; 627 case 28: break;
624 case 15: 628 case 15:
625 { add("ö"); 629 { add("ö");