Mercurial > hg > mpdl-group
comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexIT.java @ 14:5df60f24e997
diverse Fehlerbehebungen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Mon, 29 Aug 2011 17:40:02 +0200 |
parents | 1ec29fdd0db8 |
children |
comparison
equal
deleted
inserted
replaced
13:469d927b9ca7 | 14:5df60f24e997 |
---|---|
1 /* The following code was generated by JFlex 1.4.3 on 22.02.11 12:03 */ | 1 /* The following code was generated by JFlex 1.4.3 on 21.07.11 11:22 */ |
2 | 2 |
3 /* | 3 /* |
4 * Normalization rules for Italian text | 4 * Normalization rules for Italian text |
5 * [this is a JFlex specification] | 5 * [this is a JFlex specification] |
6 * | 6 * |
7 * Wolfgang Schmidle | 7 * Wolfgang Schmidle |
8 * version 0.96 | 8 * version 2011-07-12 |
9 * 2011-02-21 | |
10 * | 9 * |
11 */ | 10 */ |
12 | 11 |
13 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang; | 12 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang; |
14 | 13 |
15 | 14 |
16 /** | 15 /** |
17 * This class is a scanner generated by | 16 * This class is a scanner generated by |
18 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3 | 17 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3 |
19 * on 22.02.11 12:03 from the specification file | 18 * on 21.07.11 11:22 from the specification file |
20 * <tt>MpdlNormalizerLexIT.lex</tt> | 19 * <tt>MpdlNormalizerLexIT.lex</tt> |
21 */ | 20 */ |
22 public class MpdlNormalizerLexIT { | 21 public class MpdlNormalizerLexIT { |
23 | 22 |
24 /** This character denotes the end of file */ | 23 /** This character denotes the end of file */ |
45 | 44 |
46 /** | 45 /** |
47 * Translates characters to character classes | 46 * Translates characters to character classes |
48 */ | 47 */ |
49 private static final String ZZ_CMAP_PACKED = | 48 private static final String ZZ_CMAP_PACKED = |
50 "\12\0\1\5\42\0\1\4\22\0\1\51\1\1\3\2\1\1\3\2"+ | 49 "\12\0\1\6\25\0\1\5\14\0\1\4\22\0\1\52\1\1\3\2"+ |
51 "\1\40\1\0\1\2\1\3\2\2\1\41\1\2\1\47\1\3\1\2"+ | 50 "\1\1\3\2\1\41\1\0\1\2\1\3\2\2\1\42\1\2\1\50"+ |
52 "\1\37\1\44\1\50\2\2\1\0\1\2\6\0\1\43\3\2\1\11"+ | 51 "\1\3\1\2\1\40\1\45\1\51\2\2\1\0\1\2\6\0\1\44"+ |
53 "\2\2\1\42\1\6\1\35\1\2\1\3\1\2\1\7\1\36\1\13"+ | 52 "\3\2\1\12\2\2\1\43\1\7\1\36\1\2\1\3\1\2\1\10"+ |
54 "\1\45\1\12\1\2\1\10\1\15\1\46\2\2\1\0\1\2\62\0"+ | 53 "\1\37\1\14\1\46\1\13\1\2\1\11\1\16\1\47\2\2\1\0"+ |
55 "\1\4\22\0\1\16\5\0\1\32\1\0\1\17\3\0\1\20\5\0"+ | 54 "\1\2\62\0\1\4\22\0\1\17\5\0\1\33\1\0\1\20\3\0"+ |
56 "\1\21\6\0\1\22\5\0\1\30\1\23\5\0\1\31\1\0\1\24"+ | 55 "\1\21\5\0\1\22\6\0\1\23\5\0\1\31\1\24\5\0\1\32"+ |
57 "\3\0\1\25\5\0\1\26\6\0\1\27\37\0\1\1\70\0\1\34"+ | 56 "\1\0\1\25\3\0\1\26\5\0\1\27\6\0\1\30\37\0\1\1"+ |
58 "\1\33\53\0\1\14\ufe80\0"; | 57 "\70\0\1\35\1\34\53\0\1\15\ufe80\0"; |
59 | 58 |
60 /** | 59 /** |
61 * Translates characters to character classes | 60 * Translates characters to character classes |
62 */ | 61 */ |
63 private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED); | 62 private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED); |
66 * Translates DFA states to action switch labels. | 65 * Translates DFA states to action switch labels. |
67 */ | 66 */ |
68 private static final int [] ZZ_ACTION = zzUnpackAction(); | 67 private static final int [] ZZ_ACTION = zzUnpackAction(); |
69 | 68 |
70 private static final String ZZ_ACTION_PACKED_0 = | 69 private static final String ZZ_ACTION_PACKED_0 = |
71 "\11\0\1\1\1\2\2\3\1\4\1\5\1\2\1\3"+ | 70 "\11\0\1\1\1\2\2\3\1\1\1\4\1\2\1\3"+ |
72 "\1\6\1\2\1\7\1\10\1\11\1\12\1\13\5\3"+ | 71 "\1\5\1\2\1\6\1\7\1\10\1\11\1\12\5\3"+ |
73 "\1\14\1\2\1\3\1\6\1\2\1\15\1\16\1\17"+ | 72 "\1\13\1\2\1\3\1\5\1\2\1\14\1\15\1\16"+ |
74 "\1\20\1\21\1\22\1\23\1\24\1\25\1\26\1\27"+ | 73 "\1\17\1\20\1\21\1\22\1\23\1\24\1\25\1\26"+ |
75 "\1\30\4\0\1\31\1\32\1\0\1\33\1\0\1\34"+ | 74 "\1\27\1\30\4\0\1\31\1\32\1\33\1\0\1\34"+ |
76 "\1\35\1\0\1\36\1\37\1\40\4\0\1\41\5\0"+ | 75 "\1\0\1\35\1\36\1\0\1\37\1\40\1\41\4\0"+ |
77 "\1\42\1\43\2\0\1\44\1\0\1\45\5\0\1\44"+ | 76 "\1\42\6\0\1\43\1\44\4\0\1\45\1\0\1\46"+ |
78 "\1\46\3\0\1\47"; | 77 "\10\0\1\47\4\0\1\45\2\0\1\50"; |
79 | 78 |
80 private static int [] zzUnpackAction() { | 79 private static int [] zzUnpackAction() { |
81 int [] result = new int[89]; | 80 int [] result = new int[100]; |
82 int offset = 0; | 81 int offset = 0; |
83 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); | 82 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); |
84 return result; | 83 return result; |
85 } | 84 } |
86 | 85 |
101 * Translates a state to a row index in the transition table | 100 * Translates a state to a row index in the transition table |
102 */ | 101 */ |
103 private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); | 102 private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); |
104 | 103 |
105 private static final String ZZ_ROWMAP_PACKED_0 = | 104 private static final String ZZ_ROWMAP_PACKED_0 = |
106 "\0\0\0\52\0\124\0\176\0\250\0\322\0\374\0\u0126"+ | 105 "\0\0\0\53\0\126\0\201\0\254\0\327\0\u0102\0\u012d"+ |
107 "\0\u0150\0\0\0\0\0\0\0\u017a\0\0\0\0\0\u01a4"+ | 106 "\0\u0158\0\0\0\0\0\0\0\u0183\0\u01ae\0\0\0\u01d9"+ |
108 "\0\u01ce\0\0\0\u01f8\0\0\0\0\0\0\0\0\0\0"+ | 107 "\0\u0204\0\0\0\u022f\0\0\0\0\0\0\0\0\0\0"+ |
109 "\0\u0222\0\u024c\0\u0276\0\u02a0\0\u02ca\0\0\0\u02f4\0\u031e"+ | 108 "\0\u025a\0\u0285\0\u02b0\0\u02db\0\u0306\0\0\0\u0331\0\u035c"+ |
110 "\0\u0348\0\u0372\0\u039c\0\0\0\0\0\0\0\0\0\0"+ | 109 "\0\u0387\0\u03b2\0\u03dd\0\0\0\0\0\0\0\0\0\0"+ |
111 "\0\0\0\0\0\0\0\0\0\0\0\0\0\u03c6\0\u03f0"+ | 110 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\u0408"+ |
112 "\0\u041a\0\0\0\0\0\0\0\u0444\0\0\0\u046e\0\0"+ | 111 "\0\u0433\0\u045e\0\u0489\0\0\0\0\0\0\0\u04b4\0\0"+ |
113 "\0\0\0\u0498\0\0\0\0\0\0\0\u04c2\0\u04ec\0\u0516"+ | 112 "\0\u04df\0\0\0\0\0\u050a\0\0\0\0\0\0\0\u0535"+ |
114 "\0\u0540\0\0\0\u056a\0\u0594\0\u05be\0\u05e8\0\u0612\0\0"+ | 113 "\0\u0560\0\u058b\0\u05b6\0\0\0\u05e1\0\u060c\0\u0637\0\u0662"+ |
115 "\0\0\0\u063c\0\u031e\0\u0666\0\u0690\0\0\0\u06ba\0\u06e4"+ | 114 "\0\u068d\0\0\0\0\0\0\0\u06b8\0\u06e3\0\u070e\0\u035c"+ |
116 "\0\u070e\0\0\0\u0738\0\0\0\0\0\u0762\0\u078c\0\u07b6"+ | 115 "\0\u0739\0\u0764\0\0\0\u078f\0\u07ba\0\u07e5\0\0\0\u0810"+ |
117 "\0\0"; | 116 "\0\u083b\0\u0866\0\u0891\0\0\0\u08bc\0\u08e7\0\u0912\0\u093d"+ |
117 "\0\0\0\u0968\0\u0993\0\0"; | |
118 | 118 |
119 private static int [] zzUnpackRowMap() { | 119 private static int [] zzUnpackRowMap() { |
120 int [] result = new int[89]; | 120 int [] result = new int[100]; |
121 int offset = 0; | 121 int offset = 0; |
122 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); | 122 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); |
123 return result; | 123 return result; |
124 } | 124 } |
125 | 125 |
138 * The transition table of the DFA | 138 * The transition table of the DFA |
139 */ | 139 */ |
140 private static final int [] ZZ_TRANS = zzUnpackTrans(); | 140 private static final int [] ZZ_TRANS = zzUnpackTrans(); |
141 | 141 |
142 private static final String ZZ_TRANS_PACKED_0 = | 142 private static final String ZZ_TRANS_PACKED_0 = |
143 "\52\0\1\12\1\13\1\14\1\15\1\16\1\17\1\20"+ | 143 "\53\0\1\12\1\13\1\14\1\15\1\16\1\12\1\17"+ |
144 "\1\14\1\21\1\13\1\15\1\14\1\22\1\23\5\12"+ | 144 "\1\20\1\14\1\21\1\13\1\15\1\14\1\22\1\23"+ |
145 "\2\13\1\12\2\13\1\24\1\25\1\26\1\27\1\30"+ | 145 "\5\12\2\13\1\12\2\13\1\24\1\25\1\26\1\27"+ |
146 "\1\12\1\13\1\31\2\13\1\14\1\13\1\23\1\32"+ | 146 "\1\30\1\12\1\13\1\31\2\13\1\14\1\13\1\23"+ |
147 "\1\33\1\34\1\35\1\36\1\12\1\13\1\14\1\15"+ | 147 "\1\32\1\33\1\34\1\35\1\36\1\12\1\13\1\14"+ |
148 "\1\16\1\17\1\37\1\14\1\21\1\13\1\15\1\40"+ | 148 "\1\15\1\16\1\12\1\17\1\37\1\14\1\21\1\13"+ |
149 "\1\41\1\42\5\12\2\13\1\12\2\13\1\24\1\25"+ | 149 "\1\15\1\40\1\41\1\42\5\12\2\13\1\12\2\13"+ |
150 "\1\26\1\27\1\30\1\12\1\13\1\31\2\13\1\43"+ | 150 "\1\24\1\25\1\26\1\27\1\30\1\12\1\13\1\31"+ |
151 "\1\13\1\42\1\32\1\33\1\34\1\35\1\36\1\12"+ | 151 "\2\13\1\43\1\13\1\42\1\32\1\33\1\34\1\35"+ |
152 "\1\13\1\14\1\15\1\16\1\44\1\20\1\14\1\21"+ | 152 "\1\36\1\12\1\13\1\14\1\15\1\16\1\12\1\44"+ |
153 "\1\13\1\15\1\14\1\22\1\23\1\45\1\46\1\47"+ | 153 "\1\20\1\14\1\21\1\13\1\15\1\14\1\22\1\23"+ |
154 "\1\50\1\51\1\52\1\53\1\54\1\55\1\56\1\24"+ | 154 "\1\45\1\46\1\47\1\50\1\51\1\52\1\53\1\54"+ |
155 "\1\25\1\26\1\27\1\30\1\12\1\13\1\31\2\13"+ | 155 "\1\55\1\56\1\24\1\25\1\26\1\27\1\30\1\12"+ |
156 "\1\14\1\13\1\23\1\32\1\33\1\34\1\35\1\36"+ | 156 "\1\13\1\31\2\13\1\14\1\13\1\23\1\32\1\33"+ |
157 "\1\12\1\13\1\14\1\15\1\16\1\44\1\37\1\14"+ | 157 "\1\34\1\35\1\36\1\12\1\13\1\14\1\15\1\16"+ |
158 "\1\12\1\44\1\37\1\14\1\21\1\13\1\15\1\40"+ | |
159 "\1\41\1\42\1\45\1\46\1\47\1\50\1\51\1\52"+ | |
160 "\1\53\1\54\1\55\1\56\1\24\1\25\1\26\1\27"+ | |
161 "\1\30\1\12\1\13\1\31\2\13\1\43\1\13\1\42"+ | |
162 "\1\32\1\33\1\34\1\35\1\36\1\12\1\13\1\14"+ | |
163 "\1\15\1\16\1\12\1\57\1\20\1\14\1\21\1\13"+ | |
164 "\1\15\1\14\1\22\1\23\1\45\1\46\1\47\1\50"+ | |
165 "\1\51\1\52\1\53\1\54\1\55\1\56\1\24\1\25"+ | |
166 "\1\26\1\27\1\30\1\12\1\13\1\31\2\13\1\14"+ | |
167 "\1\13\1\23\1\32\1\33\1\34\1\35\1\36\1\12"+ | |
168 "\1\13\1\14\1\15\1\16\1\12\1\57\1\37\1\14"+ | |
158 "\1\21\1\13\1\15\1\40\1\41\1\42\1\45\1\46"+ | 169 "\1\21\1\13\1\15\1\40\1\41\1\42\1\45\1\46"+ |
159 "\1\47\1\50\1\51\1\52\1\53\1\54\1\55\1\56"+ | 170 "\1\47\1\50\1\51\1\52\1\53\1\54\1\55\1\56"+ |
160 "\1\24\1\25\1\26\1\27\1\30\1\12\1\13\1\31"+ | 171 "\1\24\1\25\1\26\1\27\1\30\1\12\1\13\1\31"+ |
161 "\2\13\1\43\1\13\1\42\1\32\1\33\1\34\1\35"+ | 172 "\2\13\1\43\1\13\1\42\1\32\1\33\1\34\1\35"+ |
162 "\1\36\1\12\1\13\1\14\1\15\1\16\1\17\1\20"+ | 173 "\1\36\7\0\1\60\4\0\1\61\1\62\42\0\1\63"+ |
163 "\1\14\1\21\1\13\1\15\1\14\1\22\1\23\1\45"+ | 174 "\114\0\1\64\1\0\1\64\6\0\1\65\103\0\1\66"+ |
164 "\1\46\1\47\1\50\1\51\1\52\1\53\1\54\1\55"+ | 175 "\23\0\1\67\44\0\1\70\5\0\1\70\2\0\1\70"+ |
165 "\1\56\1\24\1\25\1\26\1\27\1\30\1\12\1\13"+ | 176 "\3\0\1\70\5\0\2\70\1\0\2\70\1\0\3\70"+ |
166 "\1\31\2\13\1\14\1\13\1\23\1\32\1\33\1\34"+ | 177 "\2\0\1\70\1\0\2\70\1\0\2\70\46\0\1\71"+ |
167 "\1\35\1\36\1\12\1\13\1\14\1\15\1\16\1\17"+ | 178 "\60\0\1\72\5\0\2\73\1\74\3\0\2\73\1\0"+ |
168 "\1\37\1\14\1\21\1\13\1\15\1\40\1\41\1\42"+ | 179 "\3\73\13\0\1\73\6\0\1\73\2\0\1\73\2\0"+ |
169 "\1\45\1\46\1\47\1\50\1\51\1\52\1\53\1\54"+ | 180 "\4\73\50\0\1\75\1\0\1\76\3\0\2\77\1\100"+ |
170 "\1\55\1\56\1\24\1\25\1\26\1\27\1\30\1\12"+ | 181 "\3\0\2\77\1\0\3\77\13\0\1\77\6\0\1\77"+ |
171 "\1\13\1\31\2\13\1\43\1\13\1\42\1\32\1\33"+ | 182 "\2\0\1\77\2\0\4\77\11\0\1\101\25\0\1\66"+ |
172 "\1\34\1\35\1\36\6\0\1\57\4\0\1\60\1\61"+ | 183 "\26\0\1\102\52\0\1\102\3\0\1\103\35\0\1\104"+ |
173 "\41\0\1\62\113\0\1\63\1\0\1\63\36\0\1\64"+ | 184 "\5\0\1\104\2\0\1\104\3\0\1\104\5\0\2\104"+ |
174 "\22\0\1\65\44\0\1\66\4\0\1\66\2\0\1\66"+ | 185 "\1\0\2\104\1\0\3\104\2\0\1\104\1\0\2\104"+ |
175 "\3\0\1\66\5\0\2\66\1\0\2\66\1\0\3\66"+ | 186 "\1\0\2\104\44\0\1\105\4\0\1\106\16\0\1\107"+ |
176 "\2\0\1\66\1\0\2\66\1\0\2\66\45\0\1\67"+ | 187 "\54\0\1\110\52\0\1\110\3\0\1\111\40\0\1\112"+ |
177 "\57\0\1\70\5\0\2\71\1\72\2\0\2\71\1\0"+ | 188 "\105\0\1\113\55\0\1\114\15\0\1\115\52\0\1\116"+ |
178 "\3\71\13\0\1\71\6\0\1\71\2\0\1\71\2\0"+ | 189 "\51\0\1\117\4\0\1\120\54\0\1\121\43\0\1\122"+ |
179 "\4\71\47\0\1\73\1\0\1\74\3\0\2\75\1\76"+ | 190 "\7\0\1\120\44\0\1\123\52\0\1\123\1\124\1\125"+ |
180 "\2\0\2\75\1\0\3\75\13\0\1\75\6\0\1\75"+ | 191 "\46\0\1\126\4\0\1\61\54\0\1\127\43\0\1\130"+ |
181 "\2\0\1\75\2\0\4\75\10\0\1\77\25\0\1\64"+ | 192 "\7\0\1\61\40\0\2\73\4\0\2\73\1\0\3\73"+ |
182 "\25\0\1\100\51\0\1\100\3\0\1\101\35\0\1\102"+ | 193 "\13\0\1\73\6\0\1\73\2\0\1\73\2\0\4\73"+ |
183 "\4\0\1\102\2\0\1\102\3\0\1\102\5\0\2\102"+ | 194 "\3\0\2\77\4\0\2\77\1\0\3\77\13\0\1\77"+ |
184 "\1\0\2\102\1\0\3\102\2\0\1\102\1\0\2\102"+ | 195 "\6\0\1\77\2\0\1\77\2\0\4\77\6\0\1\131"+ |
185 "\1\0\2\102\43\0\1\103\4\0\1\104\15\0\1\105"+ | 196 "\51\0\1\132\53\0\1\133\53\0\1\134\50\0\1\135"+ |
186 "\53\0\1\106\51\0\1\106\3\0\1\107\72\0\1\110"+ | 197 "\3\0\1\136\47\0\1\137\52\0\1\140\56\0\1\120"+ |
187 "\54\0\1\111\12\0\2\71\3\0\2\71\1\0\3\71"+ | 198 "\46\0\1\141\61\0\1\120\43\0\1\142\104\0\1\143"+ |
188 "\13\0\1\71\6\0\1\71\2\0\1\71\2\0\4\71"+ | 199 "\24\0\1\61\55\0\1\61\46\0\1\136\50\0\1\144"+ |
189 "\3\0\2\75\3\0\2\75\1\0\3\75\13\0\1\75"+ | 200 "\44\0"; |
190 "\6\0\1\75\2\0\1\75\2\0\4\75\5\0\1\112"+ | |
191 "\3\0\1\113\53\0\1\114\43\0\1\115\6\0\1\113"+ | |
192 "\43\0\1\116\51\0\1\116\1\117\1\120\46\0\1\121"+ | |
193 "\3\0\1\60\53\0\1\122\43\0\1\123\6\0\1\60"+ | |
194 "\46\0\1\113\45\0\1\124\60\0\1\113\43\0\1\125"+ | |
195 "\50\0\1\126\2\0\1\127\52\0\1\60\54\0\1\60"+ | |
196 "\45\0\1\127\100\0\1\130\20\0\1\131\44\0"; | |
197 | 201 |
198 private static int [] zzUnpackTrans() { | 202 private static int [] zzUnpackTrans() { |
199 int [] result = new int[2016]; | 203 int [] result = new int[2494]; |
200 int offset = 0; | 204 int offset = 0; |
201 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); | 205 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); |
202 return result; | 206 return result; |
203 } | 207 } |
204 | 208 |
232 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code> | 236 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code> |
233 */ | 237 */ |
234 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); | 238 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); |
235 | 239 |
236 private static final String ZZ_ATTRIBUTE_PACKED_0 = | 240 private static final String ZZ_ATTRIBUTE_PACKED_0 = |
237 "\1\10\7\0\1\1\3\11\1\1\2\11\2\1\1\11"+ | 241 "\1\10\7\0\1\1\3\11\2\1\1\11\2\1\1\11"+ |
238 "\1\1\5\11\5\1\1\11\5\1\13\11\3\0\3\11"+ | 242 "\1\1\5\11\5\1\1\11\5\1\14\11\4\0\3\11"+ |
239 "\1\0\1\11\1\0\2\11\1\0\3\11\4\0\1\11"+ | 243 "\1\0\1\11\1\0\2\11\1\0\3\11\4\0\1\11"+ |
240 "\5\0\2\11\2\0\1\1\1\0\1\11\3\0\1\11"+ | 244 "\5\0\3\11\4\0\1\1\1\0\1\11\3\0\1\11"+ |
241 "\1\0\2\11\3\0\1\11"; | 245 "\4\0\1\11\4\0\1\11\2\0\1\11"; |
242 | 246 |
243 private static int [] zzUnpackAttribute() { | 247 private static int [] zzUnpackAttribute() { |
244 int [] result = new int[89]; | 248 int [] result = new int[100]; |
245 int offset = 0; | 249 int offset = 0; |
246 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); | 250 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); |
247 return result; | 251 return result; |
248 } | 252 } |
249 | 253 |
323 private void add (String norm) { | 327 private void add (String norm) { |
324 original += yytext(); | 328 original += yytext(); |
325 normalized += norm; | 329 normalized += norm; |
326 } | 330 } |
327 | 331 |
332 private static final String LB = "[\u002d\u00ad] "; | |
333 | |
328 | 334 |
329 /** | 335 /** |
330 * Creates a new scanner | 336 * Creates a new scanner |
331 * There is also a java.io.InputStream version of this constructor. | 337 * There is also a java.io.InputStream version of this constructor. |
332 * | 338 * |
354 */ | 360 */ |
355 private static char [] zzUnpackCMap(String packed) { | 361 private static char [] zzUnpackCMap(String packed) { |
356 char [] map = new char[0x10000]; | 362 char [] map = new char[0x10000]; |
357 int i = 0; /* index in packed string */ | 363 int i = 0; /* index in packed string */ |
358 int j = 0; /* index in unpacked array */ | 364 int j = 0; /* index in unpacked array */ |
359 while (i < 168) { | 365 while (i < 172) { |
360 int count = packed.charAt(i++); | 366 int count = packed.charAt(i++); |
361 char value = packed.charAt(i++); | 367 char value = packed.charAt(i++); |
362 do map[j++] = value; while (--count > 0); | 368 do map[j++] = value; while (--count > 0); |
363 } | 369 } |
364 return map; | 370 return map; |
654 | 660 |
655 // store back cached position | 661 // store back cached position |
656 zzMarkedPos = zzMarkedPosL; | 662 zzMarkedPos = zzMarkedPosL; |
657 | 663 |
658 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { | 664 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { |
659 case 32: | 665 case 33: |
660 // lookahead expression with fixed base length | 666 // lookahead expression with fixed base length |
661 zzMarkedPos = zzStartRead + 1; | 667 zzMarkedPos = zzStartRead + 1; |
662 { cv = CONS; add("U"); | 668 { cv = CONS; add("U"); |
663 } | 669 } |
664 case 40: break; | 670 case 41: break; |
665 case 15: | 671 case 14: |
666 { add("Á"); | 672 { add("Á"); |
667 } | 673 } |
668 case 41: break; | 674 case 42: break; |
669 case 39: | 675 case 40: |
670 // lookahead expression with fixed lookahead length | 676 // lookahead expression with fixed lookahead length |
671 yypushback(1); | 677 yypushback(1); |
672 { add(yytext()); | 678 { add(yytext()); |
673 } | 679 } |
674 case 42: break; | 680 case 43: break; |
675 case 38: | 681 case 39: |
676 // lookahead expression with fixed base length | 682 // lookahead expression with fixed base length |
677 zzMarkedPos = zzStartRead + 3; | 683 zzMarkedPos = zzStartRead + 3; |
678 { add(yytext()); | 684 { add(yytext()); |
679 } | 685 } |
680 case 43: break; | 686 case 44: break; |
681 case 37: | 687 case 38: |
682 // lookahead expression with fixed base length | 688 // lookahead expression with fixed base length |
683 zzMarkedPos = zzStartRead + 2; | 689 zzMarkedPos = zzStartRead + 2; |
684 { add(yytext()); | 690 { add(yytext()); |
685 } | 691 } |
686 case 44: break; | 692 case 45: break; |
693 case 26: | |
694 { add(yytext()); | |
695 } | |
696 case 46: break; | |
697 case 21: | |
698 { add("í"); | |
699 } | |
700 case 47: break; | |
701 case 8: | |
702 { cv = VOWEL; add("AE"); | |
703 } | |
704 case 48: break; | |
705 case 11: | |
706 { problem = 1; cv = 0; add(yytext()); | |
707 } | |
708 case 49: break; | |
687 case 4: | 709 case 4: |
688 { add(yytext()); | |
689 } | |
690 case 45: break; | |
691 case 22: | |
692 { add("í"); | |
693 } | |
694 case 46: break; | |
695 case 9: | |
696 { cv = VOWEL; add("AE"); | |
697 } | |
698 case 47: break; | |
699 case 5: | |
700 { switch (problem) { | 710 { switch (problem) { |
701 case 1: return original; | 711 case 1: return original; |
702 default: return normalized; | 712 default: return normalized; |
703 } | 713 } |
704 } | 714 } |
705 case 48: break; | 715 case 50: break; |
706 case 29: | 716 case 30: |
707 // lookahead expression with fixed base length | 717 // lookahead expression with fixed base length |
708 zzMarkedPos = zzStartRead + 1; | 718 zzMarkedPos = zzStartRead + 1; |
709 { cv = CONS; add("u"); | 719 { cv = CONS; add("u"); |
710 } | 720 } |
711 case 49: break; | 721 case 51: break; |
712 case 20: | 722 case 19: |
713 { add("á"); | 723 { add("á"); |
714 } | 724 } |
715 case 50: break; | 725 case 52: break; |
716 case 1: | 726 case 1: |
717 { cv = 0; add(yytext()); | 727 { cv = 0; add(yytext()); |
718 } | 728 } |
719 case 51: break; | 729 case 53: break; |
720 case 33: | 730 case 24: |
731 { switch (problem) { | |
732 case 1: return original; | |
733 default: return normalized.replaceAll(LB, "").toLowerCase(); | |
734 } | |
735 } | |
736 case 54: break; | |
737 case 34: | |
721 // lookahead expression with fixed base length | 738 // lookahead expression with fixed base length |
722 zzMarkedPos = zzStartRead + 1; | 739 zzMarkedPos = zzStartRead + 1; |
723 { cv = VOWEL; add(yytext().replaceAll("u", "v").replaceAll("U", "V")); | 740 { cv = VOWEL; add(yytext().replaceAll("u", "v").replaceAll("U", "V")); |
724 } | 741 } |
725 case 52: break; | 742 case 55: break; |
726 case 34: | 743 case 35: |
727 { cv = VOWEL; add("zio"); | 744 { cv = VOWEL; add("zio"); |
728 } | 745 } |
729 case 53: break; | 746 case 56: break; |
730 case 11: | 747 case 10: |
731 { cv = VOWEL; add("OE"); | 748 { cv = VOWEL; add("OE"); |
732 } | 749 } |
733 case 54: break; | 750 case 57: break; |
734 case 19: | 751 case 18: |
735 { add("Ú"); | 752 { add("Ú"); |
736 } | 753 } |
737 case 55: break; | 754 case 58: break; |
738 case 36: | 755 case 37: |
739 // general lookahead, find correct zzMarkedPos | 756 // general lookahead, find correct zzMarkedPos |
740 { int zzFState = 7; | 757 { int zzFState = 7; |
741 int zzFPos = zzStartRead; | 758 int zzFPos = zzStartRead; |
742 if (zzFin.length <= zzBufferL.length) { zzFin = new boolean[zzBufferL.length+1]; } | 759 if (zzFin.length <= zzBufferL.length) { zzFin = new boolean[zzBufferL.length+1]; } |
743 boolean zzFinL[] = zzFin; | 760 boolean zzFinL[] = zzFin; |
756 }; | 773 }; |
757 zzMarkedPos = zzFPos; | 774 zzMarkedPos = zzFPos; |
758 } | 775 } |
759 { cv = VOWEL; add(yytext().replace("ſ", "s")); | 776 { cv = VOWEL; add(yytext().replace("ſ", "s")); |
760 } | 777 } |
761 case 56: break; | 778 case 59: break; |
762 case 3: | 779 case 3: |
763 { cv = CONS; add(yytext()); | 780 { cv = CONS; add(yytext()); |
764 } | 781 } |
765 case 57: break; | 782 case 60: break; |
766 case 31: | 783 case 32: |
767 { cv = CONS; add("QU"); | 784 { cv = CONS; add("QU"); |
768 } | 785 } |
769 case 58: break; | 786 case 61: break; |
770 case 16: | 787 case 15: |
771 { add("É"); | 788 { add("É"); |
772 } | 789 } |
773 case 59: break; | 790 case 62: break; |
774 case 27: | 791 case 28: |
775 // lookahead expression with fixed base length | 792 // lookahead expression with fixed base length |
776 zzMarkedPos = zzStartRead + 1; | 793 zzMarkedPos = zzStartRead + 1; |
777 { switch(cv) { | 794 { switch(cv) { |
778 case VOWEL: add(yytext().replace("u", "v").replace("U", "V")); break; | 795 case VOWEL: add(yytext().replace("u", "v").replace("U", "V")); break; |
779 default: cv = VOWEL; add(yytext()); break; | 796 default: cv = VOWEL; add(yytext()); break; |
780 } | 797 } |
781 } | 798 } |
782 case 60: break; | 799 case 63: break; |
783 case 7: | 800 case 6: |
784 { cv = CONS; add("ss"); | 801 { cv = CONS; add("ss"); |
785 } | 802 } |
786 case 61: break; | 803 case 64: break; |
787 case 6: | 804 case 5: |
788 { cv = CONS; add("s"); | 805 { cv = CONS; add("s"); |
789 } | 806 } |
790 case 62: break; | 807 case 65: break; |
791 case 35: | 808 case 13: |
809 { switch (problem) { | |
810 case 1: return ""; | |
811 default: return normalized.replaceAll(LB, ""); | |
812 } | |
813 } | |
814 case 66: break; | |
815 case 36: | |
792 { cv = VOWEL; add("ZIO"); | 816 { cv = VOWEL; add("ZIO"); |
793 } | 817 } |
794 case 63: break; | 818 case 67: break; |
795 case 2: | 819 case 2: |
796 { cv = VOWEL; add(yytext()); | 820 { cv = VOWEL; add(yytext()); |
797 } | 821 } |
798 case 64: break; | 822 case 68: break; |
799 case 18: | 823 case 17: |
800 { add("Ó"); | 824 { add("Ó"); |
801 } | 825 } |
802 case 65: break; | 826 case 69: break; |
803 case 24: | 827 case 23: |
804 { add("ú"); | 828 { add("ú"); |
805 } | 829 } |
806 case 66: break; | 830 case 70: break; |
807 case 30: | 831 case 31: |
808 { cv = CONS; add("Qu"); | 832 { cv = CONS; add("Qu"); |
809 } | 833 } |
810 case 67: break; | 834 case 71: break; |
811 case 21: | 835 case 20: |
812 { add("é"); | 836 { add("é"); |
813 } | 837 } |
814 case 68: break; | 838 case 72: break; |
815 case 8: | 839 case 7: |
816 { cv = VOWEL; add("ae"); | 840 { cv = VOWEL; add("ae"); |
817 } | 841 } |
818 case 69: break; | 842 case 73: break; |
819 case 14: | 843 case 12: |
820 { switch (problem) { | |
821 case 1: return ""; | |
822 default: return normalized; | |
823 } | |
824 } | |
825 case 70: break; | |
826 case 13: | |
827 { add(""); | 844 { add(""); |
828 } | 845 } |
829 case 71: break; | 846 case 74: break; |
830 case 23: | 847 case 22: |
831 { add("ó"); | 848 { add("ó"); |
832 } | 849 } |
833 case 72: break; | 850 case 75: break; |
834 case 10: | 851 case 9: |
835 { cv = VOWEL; add("oe"); | 852 { cv = VOWEL; add("oe"); |
836 } | 853 } |
837 case 73: break; | 854 case 76: break; |
838 case 28: | 855 case 29: |
839 { cv = CONS; add("qu"); | 856 { cv = CONS; add("qu"); |
840 } | 857 } |
841 case 74: break; | 858 case 77: break; |
842 case 12: | |
843 { problem = 1; add(yytext()); | |
844 } | |
845 case 75: break; | |
846 case 25: | 859 case 25: |
847 { switch(cv) { | 860 { switch(cv) { |
848 case CONS: add(yytext().replace("v", "u").replace("V", "U")); break; | 861 case CONS: add(yytext().replace("v", "u").replace("V", "U")); break; |
849 default: cv = CONS; add(yytext()); break; | 862 default: cv = CONS; add(yytext()); break; |
850 } | 863 } |
851 } | 864 } |
852 case 76: break; | 865 case 78: break; |
853 case 26: | 866 case 27: |
854 { cv = VOWEL; add("ii"); | 867 { cv = VOWEL; add("ii"); |
855 } | 868 } |
856 case 77: break; | 869 case 79: break; |
857 case 17: | 870 case 16: |
858 { add("Í"); | 871 { add("Í"); |
859 } | 872 } |
860 case 78: break; | 873 case 80: break; |
861 default: | 874 default: |
862 if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { | 875 if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { |
863 zzAtEOF = true; | 876 zzAtEOF = true; |
864 return null; | 877 return null; |
865 } | 878 } |