Mercurial > hg > mpdl-group
comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexDE.java @ 16:257f67be5c00
diverse Fehlerbehebungen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 27 Sep 2011 16:40:57 +0200 |
parents | 5df60f24e997 |
children |
comparison
equal
deleted
inserted
replaced
15:e99964f390e4 | 16:257f67be5c00 |
---|---|
1 /* The following code was generated by JFlex 1.4.3 on 03.08.11 18:24 */ | 1 /* The following code was generated by JFlex 1.4.3 on 05.09.11 10:34 */ |
2 | 2 |
3 /* | 3 /* |
4 * Normalization rules for German text | 4 * Normalization rules for German text |
5 * [this is a JFlex specification] | 5 * [this is a JFlex specification] |
6 * | 6 * |
7 * Wolfgang Schmidle | 7 * Wolfgang Schmidle |
8 * version 2011-07-12 | 8 * version 2011-08-10 |
9 * | 9 * |
10 */ | 10 */ |
11 | 11 |
12 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang; | 12 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang; |
13 | 13 |
14 | 14 |
15 /** | 15 /** |
16 * This class is a scanner generated by | 16 * This class is a scanner generated by |
17 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3 | 17 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3 |
18 * on 03.08.11 18:24 from the specification file | 18 * on 05.09.11 10:34 from the specification file |
19 * <tt>MpdlNormalizerLexDE.lex</tt> | 19 * <tt>MpdlNormalizerLexDE.lex</tt> |
20 */ | 20 */ |
21 public class MpdlNormalizerLexDE { | 21 public class MpdlNormalizerLexDE { |
22 | 22 |
23 /** This character denotes the end of file */ | 23 /** This character denotes the end of file */ |
25 | 25 |
26 /** initial size of the lookahead buffer */ | 26 /** initial size of the lookahead buffer */ |
27 private static final int ZZ_BUFFERSIZE = 16384; | 27 private static final int ZZ_BUFFERSIZE = 16384; |
28 | 28 |
29 /** lexical states */ | 29 /** lexical states */ |
30 public static final int SEARCH = 6; | 30 public static final int SEARCH = 10; |
31 public static final int DICT_ASCII = 6; | |
32 public static final int SEARCH_ASCII = 12; | |
31 public static final int DICT = 4; | 33 public static final int DICT = 4; |
32 public static final int YYINITIAL = 0; | 34 public static final int YYINITIAL = 0; |
33 public static final int CELEX = 8; | |
34 public static final int DISP = 2; | 35 public static final int DISP = 2; |
35 public static final int GRIMM = 10; | 36 public static final int GRIMM = 8; |
36 | 37 |
37 /** | 38 /** |
38 * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l | 39 * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l |
39 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l | 40 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l |
40 * at the beginning of a line | 41 * at the beginning of a line |
41 * l is of the form l = 2*k, k a non negative integer | 42 * l is of the form l = 2*k, k a non negative integer |
42 */ | 43 */ |
43 private static final int ZZ_LEXSTATE[] = { | 44 private static final int ZZ_LEXSTATE[] = { |
44 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5 | 45 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6 |
45 }; | 46 }; |
46 | 47 |
47 /** | 48 /** |
48 * Translates characters to character classes | 49 * Translates characters to character classes |
49 */ | 50 */ |
63 * Translates DFA states to action switch labels. | 64 * Translates DFA states to action switch labels. |
64 */ | 65 */ |
65 private static final int [] ZZ_ACTION = zzUnpackAction(); | 66 private static final int [] ZZ_ACTION = zzUnpackAction(); |
66 | 67 |
67 private static final String ZZ_ACTION_PACKED_0 = | 68 private static final String ZZ_ACTION_PACKED_0 = |
68 "\6\0\2\1\1\2\1\3\1\4\3\1\1\5\1\6"+ | 69 "\7\0\2\1\1\2\1\3\1\4\3\1\1\5\1\3"+ |
69 "\1\3\3\1\1\7\1\10\1\11\1\12\1\13\1\14"+ | 70 "\3\1\1\6\1\7\1\10\1\11\1\12\1\13\1\14"+ |
70 "\1\15\1\16\1\17"; | 71 "\1\15\1\16\1\17"; |
71 | 72 |
72 private static int [] zzUnpackAction() { | 73 private static int [] zzUnpackAction() { |
73 int [] result = new int[29]; | 74 int [] result = new int[30]; |
74 int offset = 0; | 75 int offset = 0; |
75 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); | 76 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); |
76 return result; | 77 return result; |
77 } | 78 } |
78 | 79 |
94 */ | 95 */ |
95 private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); | 96 private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); |
96 | 97 |
97 private static final String ZZ_ROWMAP_PACKED_0 = | 98 private static final String ZZ_ROWMAP_PACKED_0 = |
98 "\0\0\0\21\0\42\0\63\0\104\0\125\0\146\0\167"+ | 99 "\0\0\0\21\0\42\0\63\0\104\0\125\0\146\0\167"+ |
99 "\0\146\0\146\0\146\0\210\0\231\0\252\0\146\0\146"+ | 100 "\0\210\0\167\0\167\0\167\0\231\0\252\0\273\0\167"+ |
100 "\0\167\0\273\0\314\0\335\0\146\0\146\0\146\0\146"+ | 101 "\0\210\0\314\0\335\0\356\0\167\0\167\0\167\0\167"+ |
101 "\0\146\0\146\0\146\0\146\0\146"; | 102 "\0\167\0\167\0\167\0\167\0\167\0\167"; |
102 | 103 |
103 private static int [] zzUnpackRowMap() { | 104 private static int [] zzUnpackRowMap() { |
104 int [] result = new int[29]; | 105 int [] result = new int[30]; |
105 int offset = 0; | 106 int offset = 0; |
106 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); | 107 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); |
107 return result; | 108 return result; |
108 } | 109 } |
109 | 110 |
122 * The transition table of the DFA | 123 * The transition table of the DFA |
123 */ | 124 */ |
124 private static final int [] ZZ_TRANS = zzUnpackTrans(); | 125 private static final int [] ZZ_TRANS = zzUnpackTrans(); |
125 | 126 |
126 private static final String ZZ_TRANS_PACKED_0 = | 127 private static final String ZZ_TRANS_PACKED_0 = |
127 "\1\7\1\10\1\7\1\0\1\7\1\10\1\11\1\10"+ | 128 "\1\10\1\11\1\10\1\0\1\10\1\11\1\12\1\11"+ |
128 "\1\7\1\10\6\7\1\12\1\7\1\10\1\7\1\13"+ | 129 "\1\10\1\11\6\10\1\13\1\10\1\11\1\10\1\14"+ |
129 "\1\7\1\10\1\11\1\14\1\7\1\15\1\7\1\16"+ | 130 "\1\10\1\11\1\12\1\15\1\10\1\16\1\10\1\17"+ |
130 "\4\7\1\12\1\7\1\10\1\7\1\17\1\7\1\10"+ | 131 "\4\10\1\13\1\10\1\11\1\10\1\20\1\10\1\11"+ |
131 "\1\11\1\14\1\7\1\15\1\7\1\16\4\7\1\12"+ | 132 "\1\12\1\15\1\10\1\16\1\10\1\17\4\10\2\13"+ |
132 "\1\7\1\10\1\7\1\20\1\7\1\10\1\11\1\14"+ | 133 "\1\21\1\13\1\20\1\10\1\11\1\12\1\22\1\13"+ |
133 "\1\7\1\15\1\7\1\16\4\7\2\12\1\21\1\12"+ | 134 "\1\23\1\13\1\24\1\25\1\26\1\27\1\30\1\13"+ |
134 "\1\17\1\7\1\10\1\11\1\22\1\12\1\23\1\12"+ | 135 "\1\10\1\11\1\10\1\20\1\10\1\11\1\12\1\15"+ |
135 "\1\24\1\25\1\26\1\27\1\30\1\12\1\7\1\10"+ | 136 "\1\10\1\16\1\10\1\17\3\10\1\31\1\13\1\10"+ |
136 "\1\7\1\17\1\7\1\10\1\11\1\14\1\7\1\15"+ | 137 "\1\11\1\10\1\32\1\10\1\11\1\12\1\15\1\10"+ |
137 "\1\7\1\16\3\7\1\31\1\12\23\0\1\7\20\0"+ | 138 "\1\16\1\10\1\17\4\10\2\13\1\21\1\13\1\32"+ |
138 "\1\7\5\0\1\32\1\0\1\33\10\0\1\7\7\0"+ | 139 "\1\10\1\11\1\12\1\22\1\13\1\23\1\13\1\24"+ |
139 "\1\34\20\0\1\35\10\0\1\7\5\0\1\32\1\0"+ | 140 "\1\25\1\26\1\27\1\30\1\13\23\0\1\10\20\0"+ |
140 "\1\27\10\0\1\7\7\0\1\25\20\0\1\26\6\0"; | 141 "\1\10\5\0\1\33\1\0\1\34\10\0\1\10\7\0"+ |
142 "\1\35\20\0\1\36\10\0\1\10\5\0\1\33\1\0"+ | |
143 "\1\27\10\0\1\10\7\0\1\25\20\0\1\26\6\0"; | |
141 | 144 |
142 private static int [] zzUnpackTrans() { | 145 private static int [] zzUnpackTrans() { |
143 int [] result = new int[238]; | 146 int [] result = new int[255]; |
144 int offset = 0; | 147 int offset = 0; |
145 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); | 148 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); |
146 return result; | 149 return result; |
147 } | 150 } |
148 | 151 |
176 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code> | 179 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code> |
177 */ | 180 */ |
178 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); | 181 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); |
179 | 182 |
180 private static final String ZZ_ATTRIBUTE_PACKED_0 = | 183 private static final String ZZ_ATTRIBUTE_PACKED_0 = |
181 "\6\0\1\11\1\1\3\11\3\1\2\11\4\1\11\11"; | 184 "\7\0\1\11\1\1\3\11\3\1\1\11\4\1\12\11"; |
182 | 185 |
183 private static int [] zzUnpackAttribute() { | 186 private static int [] zzUnpackAttribute() { |
184 int [] result = new int[29]; | 187 int [] result = new int[30]; |
185 int offset = 0; | 188 int offset = 0; |
186 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); | 189 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); |
187 return result; | 190 return result; |
188 } | 191 } |
189 | 192 |
247 | 250 |
248 /** denotes if the user-EOF-code has already been executed */ | 251 /** denotes if the user-EOF-code has already been executed */ |
249 private boolean zzEOFDone; | 252 private boolean zzEOFDone; |
250 | 253 |
251 /* user code: */ | 254 /* user code: */ |
255 public static final int CELEX = DICT_ASCII; | |
256 | |
252 private String original = ""; | 257 private String original = ""; |
253 private String normalized = ""; | 258 private String normalized = ""; |
254 private int problem = 0; | 259 private int problem = 0; |
255 | 260 |
256 private void add (String norm) { | 261 private void add (String norm) { |
556 | 561 |
557 // store back cached position | 562 // store back cached position |
558 zzMarkedPos = zzMarkedPosL; | 563 zzMarkedPos = zzMarkedPosL; |
559 | 564 |
560 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { | 565 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { |
561 case 11: | 566 case 10: |
562 { add("sz"); | 567 { add("sz"); |
563 } | 568 } |
564 case 16: break; | 569 case 16: break; |
565 case 3: | 570 case 3: |
566 { problem = 1; add(yytext()); | 571 { problem = 1; add(yytext()); |
567 } | 572 } |
568 case 17: break; | 573 case 17: break; |
569 case 7: | 574 case 6: |
570 { add("ae"); | 575 { add("ae"); |
571 } | 576 } |
572 case 18: break; | 577 case 18: break; |
573 case 2: | 578 case 2: |
574 { add("s"); | 579 { add("s"); |
583 case 20: break; | 588 case 20: break; |
584 case 13: | 589 case 13: |
585 { add("ü"); | 590 { add("ü"); |
586 } | 591 } |
587 case 21: break; | 592 case 21: break; |
588 case 9: | 593 case 8: |
589 { add("ue"); | 594 { add("ue"); |
590 } | 595 } |
591 case 22: break; | 596 case 22: break; |
592 case 6: | 597 case 11: |
593 { switch (problem) { | 598 { switch (problem) { |
594 case 1: return original; | 599 case 1: return original; |
595 default: return normalized.replaceAll(LB, "").toLowerCase(); | 600 default: return normalized.replaceAll(LB, "").toLowerCase(); |
596 } | 601 } |
597 } | 602 } |
606 case 25: break; | 611 case 25: break; |
607 case 1: | 612 case 1: |
608 { add(yytext()); | 613 { add(yytext()); |
609 } | 614 } |
610 case 26: break; | 615 case 26: break; |
611 case 10: | 616 case 9: |
612 { add("ss"); | 617 { add("ss"); |
613 } | 618 } |
614 case 27: break; | 619 case 27: break; |
615 case 8: | 620 case 7: |
616 { add("oe"); | 621 { add("oe"); |
617 } | 622 } |
618 case 28: break; | 623 case 28: break; |
619 case 15: | 624 case 15: |
620 { add("ö"); | 625 { add("ö"); |