Mercurial > hg > mpdl-group
comparison software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexIT.java @ 19:4a3641ae14d2
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 09 Nov 2011 15:32:05 +0100 |
parents | |
children | e845310098ba |
comparison
equal
deleted
inserted
replaced
18:dc5e9fcb3fdc | 19:4a3641ae14d2 |
---|---|
1 /* The following code was generated by JFlex 1.4.3 on 21.07.11 11:22 */ | |
2 | |
3 /* | |
4 * Normalization rules for Italian text | |
5 * [this is a JFlex specification] | |
6 * | |
7 * Wolfgang Schmidle | |
8 * version 2011-07-12 | |
9 * | |
10 */ | |
11 | |
12 package de.mpg.mpiwg.berlin.mpdl.lt.text.norm.lang; | |
13 | |
14 | |
15 /** | |
16 * This class is a scanner generated by | |
17 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3 | |
18 * on 21.07.11 11:22 from the specification file | |
19 * <tt>MpdlNormalizerLexIT.lex</tt> | |
20 */ | |
21 public class MpdlNormalizerLexIT { | |
22 | |
23 /** This character denotes the end of file */ | |
24 public static final int YYEOF = -1; | |
25 | |
26 /** initial size of the lookahead buffer */ | |
27 private static final int ZZ_BUFFERSIZE = 16384; | |
28 | |
29 /** lexical states */ | |
30 public static final int SEARCH = 6; | |
31 public static final int DICT = 4; | |
32 public static final int YYINITIAL = 0; | |
33 public static final int DISP = 2; | |
34 | |
35 /** | |
36 * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l | |
37 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l | |
38 * at the beginning of a line | |
39 * l is of the form l = 2*k, k a non negative integer | |
40 */ | |
41 private static final int ZZ_LEXSTATE[] = { | |
42 0, 0, 1, 2, 3, 4, 5, 6 | |
43 }; | |
44 | |
45 /** | |
46 * Translates characters to character classes | |
47 */ | |
48 private static final String ZZ_CMAP_PACKED = | |
49 "\12\0\1\6\25\0\1\5\14\0\1\4\22\0\1\52\1\1\3\2"+ | |
50 "\1\1\3\2\1\41\1\0\1\2\1\3\2\2\1\42\1\2\1\50"+ | |
51 "\1\3\1\2\1\40\1\45\1\51\2\2\1\0\1\2\6\0\1\44"+ | |
52 "\3\2\1\12\2\2\1\43\1\7\1\36\1\2\1\3\1\2\1\10"+ | |
53 "\1\37\1\14\1\46\1\13\1\2\1\11\1\16\1\47\2\2\1\0"+ | |
54 "\1\2\62\0\1\4\22\0\1\17\5\0\1\33\1\0\1\20\3\0"+ | |
55 "\1\21\5\0\1\22\6\0\1\23\5\0\1\31\1\24\5\0\1\32"+ | |
56 "\1\0\1\25\3\0\1\26\5\0\1\27\6\0\1\30\37\0\1\1"+ | |
57 "\70\0\1\35\1\34\53\0\1\15\ufe80\0"; | |
58 | |
59 /** | |
60 * Translates characters to character classes | |
61 */ | |
62 private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED); | |
63 | |
64 /** | |
65 * Translates DFA states to action switch labels. | |
66 */ | |
67 private static final int [] ZZ_ACTION = zzUnpackAction(); | |
68 | |
69 private static final String ZZ_ACTION_PACKED_0 = | |
70 "\11\0\1\1\1\2\2\3\1\1\1\4\1\2\1\3"+ | |
71 "\1\5\1\2\1\6\1\7\1\10\1\11\1\12\5\3"+ | |
72 "\1\13\1\2\1\3\1\5\1\2\1\14\1\15\1\16"+ | |
73 "\1\17\1\20\1\21\1\22\1\23\1\24\1\25\1\26"+ | |
74 "\1\27\1\30\4\0\1\31\1\32\1\33\1\0\1\34"+ | |
75 "\1\0\1\35\1\36\1\0\1\37\1\40\1\41\4\0"+ | |
76 "\1\42\6\0\1\43\1\44\4\0\1\45\1\0\1\46"+ | |
77 "\10\0\1\47\4\0\1\45\2\0\1\50"; | |
78 | |
79 private static int [] zzUnpackAction() { | |
80 int [] result = new int[100]; | |
81 int offset = 0; | |
82 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); | |
83 return result; | |
84 } | |
85 | |
86 private static int zzUnpackAction(String packed, int offset, int [] result) { | |
87 int i = 0; /* index in packed string */ | |
88 int j = offset; /* index in unpacked array */ | |
89 int l = packed.length(); | |
90 while (i < l) { | |
91 int count = packed.charAt(i++); | |
92 int value = packed.charAt(i++); | |
93 do result[j++] = value; while (--count > 0); | |
94 } | |
95 return j; | |
96 } | |
97 | |
98 | |
99 /** | |
100 * Translates a state to a row index in the transition table | |
101 */ | |
102 private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); | |
103 | |
104 private static final String ZZ_ROWMAP_PACKED_0 = | |
105 "\0\0\0\53\0\126\0\201\0\254\0\327\0\u0102\0\u012d"+ | |
106 "\0\u0158\0\0\0\0\0\0\0\u0183\0\u01ae\0\0\0\u01d9"+ | |
107 "\0\u0204\0\0\0\u022f\0\0\0\0\0\0\0\0\0\0"+ | |
108 "\0\u025a\0\u0285\0\u02b0\0\u02db\0\u0306\0\0\0\u0331\0\u035c"+ | |
109 "\0\u0387\0\u03b2\0\u03dd\0\0\0\0\0\0\0\0\0\0"+ | |
110 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\u0408"+ | |
111 "\0\u0433\0\u045e\0\u0489\0\0\0\0\0\0\0\u04b4\0\0"+ | |
112 "\0\u04df\0\0\0\0\0\u050a\0\0\0\0\0\0\0\u0535"+ | |
113 "\0\u0560\0\u058b\0\u05b6\0\0\0\u05e1\0\u060c\0\u0637\0\u0662"+ | |
114 "\0\u068d\0\0\0\0\0\0\0\u06b8\0\u06e3\0\u070e\0\u035c"+ | |
115 "\0\u0739\0\u0764\0\0\0\u078f\0\u07ba\0\u07e5\0\0\0\u0810"+ | |
116 "\0\u083b\0\u0866\0\u0891\0\0\0\u08bc\0\u08e7\0\u0912\0\u093d"+ | |
117 "\0\0\0\u0968\0\u0993\0\0"; | |
118 | |
119 private static int [] zzUnpackRowMap() { | |
120 int [] result = new int[100]; | |
121 int offset = 0; | |
122 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); | |
123 return result; | |
124 } | |
125 | |
126 private static int zzUnpackRowMap(String packed, int offset, int [] result) { | |
127 int i = 0; /* index in packed string */ | |
128 int j = offset; /* index in unpacked array */ | |
129 int l = packed.length(); | |
130 while (i < l) { | |
131 int high = packed.charAt(i++) << 16; | |
132 result[j++] = high | packed.charAt(i++); | |
133 } | |
134 return j; | |
135 } | |
136 | |
137 /** | |
138 * The transition table of the DFA | |
139 */ | |
140 private static final int [] ZZ_TRANS = zzUnpackTrans(); | |
141 | |
142 private static final String ZZ_TRANS_PACKED_0 = | |
143 "\53\0\1\12\1\13\1\14\1\15\1\16\1\12\1\17"+ | |
144 "\1\20\1\14\1\21\1\13\1\15\1\14\1\22\1\23"+ | |
145 "\5\12\2\13\1\12\2\13\1\24\1\25\1\26\1\27"+ | |
146 "\1\30\1\12\1\13\1\31\2\13\1\14\1\13\1\23"+ | |
147 "\1\32\1\33\1\34\1\35\1\36\1\12\1\13\1\14"+ | |
148 "\1\15\1\16\1\12\1\17\1\37\1\14\1\21\1\13"+ | |
149 "\1\15\1\40\1\41\1\42\5\12\2\13\1\12\2\13"+ | |
150 "\1\24\1\25\1\26\1\27\1\30\1\12\1\13\1\31"+ | |
151 "\2\13\1\43\1\13\1\42\1\32\1\33\1\34\1\35"+ | |
152 "\1\36\1\12\1\13\1\14\1\15\1\16\1\12\1\44"+ | |
153 "\1\20\1\14\1\21\1\13\1\15\1\14\1\22\1\23"+ | |
154 "\1\45\1\46\1\47\1\50\1\51\1\52\1\53\1\54"+ | |
155 "\1\55\1\56\1\24\1\25\1\26\1\27\1\30\1\12"+ | |
156 "\1\13\1\31\2\13\1\14\1\13\1\23\1\32\1\33"+ | |
157 "\1\34\1\35\1\36\1\12\1\13\1\14\1\15\1\16"+ | |
158 "\1\12\1\44\1\37\1\14\1\21\1\13\1\15\1\40"+ | |
159 "\1\41\1\42\1\45\1\46\1\47\1\50\1\51\1\52"+ | |
160 "\1\53\1\54\1\55\1\56\1\24\1\25\1\26\1\27"+ | |
161 "\1\30\1\12\1\13\1\31\2\13\1\43\1\13\1\42"+ | |
162 "\1\32\1\33\1\34\1\35\1\36\1\12\1\13\1\14"+ | |
163 "\1\15\1\16\1\12\1\57\1\20\1\14\1\21\1\13"+ | |
164 "\1\15\1\14\1\22\1\23\1\45\1\46\1\47\1\50"+ | |
165 "\1\51\1\52\1\53\1\54\1\55\1\56\1\24\1\25"+ | |
166 "\1\26\1\27\1\30\1\12\1\13\1\31\2\13\1\14"+ | |
167 "\1\13\1\23\1\32\1\33\1\34\1\35\1\36\1\12"+ | |
168 "\1\13\1\14\1\15\1\16\1\12\1\57\1\37\1\14"+ | |
169 "\1\21\1\13\1\15\1\40\1\41\1\42\1\45\1\46"+ | |
170 "\1\47\1\50\1\51\1\52\1\53\1\54\1\55\1\56"+ | |
171 "\1\24\1\25\1\26\1\27\1\30\1\12\1\13\1\31"+ | |
172 "\2\13\1\43\1\13\1\42\1\32\1\33\1\34\1\35"+ | |
173 "\1\36\7\0\1\60\4\0\1\61\1\62\42\0\1\63"+ | |
174 "\114\0\1\64\1\0\1\64\6\0\1\65\103\0\1\66"+ | |
175 "\23\0\1\67\44\0\1\70\5\0\1\70\2\0\1\70"+ | |
176 "\3\0\1\70\5\0\2\70\1\0\2\70\1\0\3\70"+ | |
177 "\2\0\1\70\1\0\2\70\1\0\2\70\46\0\1\71"+ | |
178 "\60\0\1\72\5\0\2\73\1\74\3\0\2\73\1\0"+ | |
179 "\3\73\13\0\1\73\6\0\1\73\2\0\1\73\2\0"+ | |
180 "\4\73\50\0\1\75\1\0\1\76\3\0\2\77\1\100"+ | |
181 "\3\0\2\77\1\0\3\77\13\0\1\77\6\0\1\77"+ | |
182 "\2\0\1\77\2\0\4\77\11\0\1\101\25\0\1\66"+ | |
183 "\26\0\1\102\52\0\1\102\3\0\1\103\35\0\1\104"+ | |
184 "\5\0\1\104\2\0\1\104\3\0\1\104\5\0\2\104"+ | |
185 "\1\0\2\104\1\0\3\104\2\0\1\104\1\0\2\104"+ | |
186 "\1\0\2\104\44\0\1\105\4\0\1\106\16\0\1\107"+ | |
187 "\54\0\1\110\52\0\1\110\3\0\1\111\40\0\1\112"+ | |
188 "\105\0\1\113\55\0\1\114\15\0\1\115\52\0\1\116"+ | |
189 "\51\0\1\117\4\0\1\120\54\0\1\121\43\0\1\122"+ | |
190 "\7\0\1\120\44\0\1\123\52\0\1\123\1\124\1\125"+ | |
191 "\46\0\1\126\4\0\1\61\54\0\1\127\43\0\1\130"+ | |
192 "\7\0\1\61\40\0\2\73\4\0\2\73\1\0\3\73"+ | |
193 "\13\0\1\73\6\0\1\73\2\0\1\73\2\0\4\73"+ | |
194 "\3\0\2\77\4\0\2\77\1\0\3\77\13\0\1\77"+ | |
195 "\6\0\1\77\2\0\1\77\2\0\4\77\6\0\1\131"+ | |
196 "\51\0\1\132\53\0\1\133\53\0\1\134\50\0\1\135"+ | |
197 "\3\0\1\136\47\0\1\137\52\0\1\140\56\0\1\120"+ | |
198 "\46\0\1\141\61\0\1\120\43\0\1\142\104\0\1\143"+ | |
199 "\24\0\1\61\55\0\1\61\46\0\1\136\50\0\1\144"+ | |
200 "\44\0"; | |
201 | |
202 private static int [] zzUnpackTrans() { | |
203 int [] result = new int[2494]; | |
204 int offset = 0; | |
205 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); | |
206 return result; | |
207 } | |
208 | |
209 private static int zzUnpackTrans(String packed, int offset, int [] result) { | |
210 int i = 0; /* index in packed string */ | |
211 int j = offset; /* index in unpacked array */ | |
212 int l = packed.length(); | |
213 while (i < l) { | |
214 int count = packed.charAt(i++); | |
215 int value = packed.charAt(i++); | |
216 value--; | |
217 do result[j++] = value; while (--count > 0); | |
218 } | |
219 return j; | |
220 } | |
221 | |
222 | |
223 /* error codes */ | |
224 private static final int ZZ_UNKNOWN_ERROR = 0; | |
225 private static final int ZZ_NO_MATCH = 1; | |
226 private static final int ZZ_PUSHBACK_2BIG = 2; | |
227 | |
228 /* error messages for the codes above */ | |
229 private static final String ZZ_ERROR_MSG[] = { | |
230 "Unkown internal scanner error", | |
231 "Error: could not match input", | |
232 "Error: pushback value was too large" | |
233 }; | |
234 | |
235 /** | |
236 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code> | |
237 */ | |
238 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); | |
239 | |
240 private static final String ZZ_ATTRIBUTE_PACKED_0 = | |
241 "\1\10\7\0\1\1\3\11\2\1\1\11\2\1\1\11"+ | |
242 "\1\1\5\11\5\1\1\11\5\1\14\11\4\0\3\11"+ | |
243 "\1\0\1\11\1\0\2\11\1\0\3\11\4\0\1\11"+ | |
244 "\5\0\3\11\4\0\1\1\1\0\1\11\3\0\1\11"+ | |
245 "\4\0\1\11\4\0\1\11\2\0\1\11"; | |
246 | |
247 private static int [] zzUnpackAttribute() { | |
248 int [] result = new int[100]; | |
249 int offset = 0; | |
250 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); | |
251 return result; | |
252 } | |
253 | |
254 private static int zzUnpackAttribute(String packed, int offset, int [] result) { | |
255 int i = 0; /* index in packed string */ | |
256 int j = offset; /* index in unpacked array */ | |
257 int l = packed.length(); | |
258 while (i < l) { | |
259 int count = packed.charAt(i++); | |
260 int value = packed.charAt(i++); | |
261 do result[j++] = value; while (--count > 0); | |
262 } | |
263 return j; | |
264 } | |
265 | |
266 /** the input device */ | |
267 private java.io.Reader zzReader; | |
268 | |
269 /** the current state of the DFA */ | |
270 private int zzState; | |
271 | |
272 /** the current lexical state */ | |
273 private int zzLexicalState = YYINITIAL; | |
274 | |
275 /** this buffer contains the current text to be matched and is | |
276 the source of the yytext() string */ | |
277 private char zzBuffer[] = new char[ZZ_BUFFERSIZE]; | |
278 | |
279 /** the textposition at the last accepting state */ | |
280 private int zzMarkedPos; | |
281 | |
282 /** the current text position in the buffer */ | |
283 private int zzCurrentPos; | |
284 | |
285 /** startRead marks the beginning of the yytext() string in the buffer */ | |
286 private int zzStartRead; | |
287 | |
288 /** endRead marks the last character in the buffer, that has been read | |
289 from input */ | |
290 private int zzEndRead; | |
291 | |
292 /** number of newlines encountered up to the start of the matched text */ | |
293 private int yyline; | |
294 | |
295 /** the number of characters up to the start of the matched text */ | |
296 private int yychar; | |
297 | |
298 /** | |
299 * the number of characters from the last newline up to the start of the | |
300 * matched text | |
301 */ | |
302 private int yycolumn; | |
303 | |
304 /** | |
305 * zzAtBOL == true <=> the scanner is currently at the beginning of a line | |
306 */ | |
307 private boolean zzAtBOL = true; | |
308 | |
309 /** zzAtEOF == true <=> the scanner is at the EOF */ | |
310 private boolean zzAtEOF; | |
311 | |
312 /** denotes if the user-EOF-code has already been executed */ | |
313 private boolean zzEOFDone; | |
314 | |
315 /** For the backwards DFA of general lookahead statements */ | |
316 private boolean [] zzFin = new boolean [ZZ_BUFFERSIZE+1]; | |
317 | |
318 /* user code: */ | |
319 private static final int CONS = 1; | |
320 private static final int VOWEL = 2; | |
321 private int cv = 0; // consonant = 1, vowel = 2, everything else = 0 | |
322 | |
323 private String original = ""; | |
324 private String normalized = ""; | |
325 private int problem = 0; | |
326 | |
327 private void add (String norm) { | |
328 original += yytext(); | |
329 normalized += norm; | |
330 } | |
331 | |
332 private static final String LB = "[\u002d\u00ad] "; | |
333 | |
334 | |
335 /** | |
336 * Creates a new scanner | |
337 * There is also a java.io.InputStream version of this constructor. | |
338 * | |
339 * @param in the java.io.Reader to read input from. | |
340 */ | |
341 public MpdlNormalizerLexIT(java.io.Reader in) { | |
342 this.zzReader = in; | |
343 } | |
344 | |
345 /** | |
346 * Creates a new scanner. | |
347 * There is also java.io.Reader version of this constructor. | |
348 * | |
349 * @param in the java.io.Inputstream to read input from. | |
350 */ | |
351 public MpdlNormalizerLexIT(java.io.InputStream in) { | |
352 this(new java.io.InputStreamReader(in)); | |
353 } | |
354 | |
355 /** | |
356 * Unpacks the compressed character translation table. | |
357 * | |
358 * @param packed the packed character translation table | |
359 * @return the unpacked character translation table | |
360 */ | |
361 private static char [] zzUnpackCMap(String packed) { | |
362 char [] map = new char[0x10000]; | |
363 int i = 0; /* index in packed string */ | |
364 int j = 0; /* index in unpacked array */ | |
365 while (i < 172) { | |
366 int count = packed.charAt(i++); | |
367 char value = packed.charAt(i++); | |
368 do map[j++] = value; while (--count > 0); | |
369 } | |
370 return map; | |
371 } | |
372 | |
373 | |
374 /** | |
375 * Refills the input buffer. | |
376 * | |
377 * @return <code>false</code>, iff there was new input. | |
378 * | |
379 * @exception java.io.IOException if any I/O-Error occurs | |
380 */ | |
381 private boolean zzRefill() throws java.io.IOException { | |
382 | |
383 /* first: make room (if you can) */ | |
384 if (zzStartRead > 0) { | |
385 System.arraycopy(zzBuffer, zzStartRead, | |
386 zzBuffer, 0, | |
387 zzEndRead-zzStartRead); | |
388 | |
389 /* translate stored positions */ | |
390 zzEndRead-= zzStartRead; | |
391 zzCurrentPos-= zzStartRead; | |
392 zzMarkedPos-= zzStartRead; | |
393 zzStartRead = 0; | |
394 } | |
395 | |
396 /* is the buffer big enough? */ | |
397 if (zzCurrentPos >= zzBuffer.length) { | |
398 /* if not: blow it up */ | |
399 char newBuffer[] = new char[zzCurrentPos*2]; | |
400 System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length); | |
401 zzBuffer = newBuffer; | |
402 } | |
403 | |
404 /* finally: fill the buffer with new input */ | |
405 int numRead = zzReader.read(zzBuffer, zzEndRead, | |
406 zzBuffer.length-zzEndRead); | |
407 | |
408 if (numRead > 0) { | |
409 zzEndRead+= numRead; | |
410 return false; | |
411 } | |
412 // unlikely but not impossible: read 0 characters, but not at end of stream | |
413 if (numRead == 0) { | |
414 int c = zzReader.read(); | |
415 if (c == -1) { | |
416 return true; | |
417 } else { | |
418 zzBuffer[zzEndRead++] = (char) c; | |
419 return false; | |
420 } | |
421 } | |
422 | |
423 // numRead < 0 | |
424 return true; | |
425 } | |
426 | |
427 | |
428 /** | |
429 * Closes the input stream. | |
430 */ | |
431 public final void yyclose() throws java.io.IOException { | |
432 zzAtEOF = true; /* indicate end of file */ | |
433 zzEndRead = zzStartRead; /* invalidate buffer */ | |
434 | |
435 if (zzReader != null) | |
436 zzReader.close(); | |
437 } | |
438 | |
439 | |
440 /** | |
441 * Resets the scanner to read from a new input stream. | |
442 * Does not close the old reader. | |
443 * | |
444 * All internal variables are reset, the old input stream | |
445 * <b>cannot</b> be reused (internal buffer is discarded and lost). | |
446 * Lexical state is set to <tt>ZZ_INITIAL</tt>. | |
447 * | |
448 * @param reader the new input stream | |
449 */ | |
450 public final void yyreset(java.io.Reader reader) { | |
451 zzReader = reader; | |
452 zzAtBOL = true; | |
453 zzAtEOF = false; | |
454 zzEOFDone = false; | |
455 zzEndRead = zzStartRead = 0; | |
456 zzCurrentPos = zzMarkedPos = 0; | |
457 yyline = yychar = yycolumn = 0; | |
458 zzLexicalState = YYINITIAL; | |
459 } | |
460 | |
461 | |
462 /** | |
463 * Returns the current lexical state. | |
464 */ | |
465 public final int yystate() { | |
466 return zzLexicalState; | |
467 } | |
468 | |
469 | |
470 /** | |
471 * Enters a new lexical state | |
472 * | |
473 * @param newState the new lexical state | |
474 */ | |
475 public final void yybegin(int newState) { | |
476 zzLexicalState = newState; | |
477 } | |
478 | |
479 | |
480 /** | |
481 * Returns the text matched by the current regular expression. | |
482 */ | |
483 public final String yytext() { | |
484 return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead ); | |
485 } | |
486 | |
487 | |
488 /** | |
489 * Returns the character at position <tt>pos</tt> from the | |
490 * matched text. | |
491 * | |
492 * It is equivalent to yytext().charAt(pos), but faster | |
493 * | |
494 * @param pos the position of the character to fetch. | |
495 * A value from 0 to yylength()-1. | |
496 * | |
497 * @return the character at position pos | |
498 */ | |
499 public final char yycharat(int pos) { | |
500 return zzBuffer[zzStartRead+pos]; | |
501 } | |
502 | |
503 | |
504 /** | |
505 * Returns the length of the matched text region. | |
506 */ | |
507 public final int yylength() { | |
508 return zzMarkedPos-zzStartRead; | |
509 } | |
510 | |
511 | |
512 /** | |
513 * Reports an error that occured while scanning. | |
514 * | |
515 * In a wellformed scanner (no or only correct usage of | |
516 * yypushback(int) and a match-all fallback rule) this method | |
517 * will only be called with things that "Can't Possibly Happen". | |
518 * If this method is called, something is seriously wrong | |
519 * (e.g. a JFlex bug producing a faulty scanner etc.). | |
520 * | |
521 * Usual syntax/scanner level error handling should be done | |
522 * in error fallback rules. | |
523 * | |
524 * @param errorCode the code of the errormessage to display | |
525 */ | |
526 private void zzScanError(int errorCode) { | |
527 String message; | |
528 try { | |
529 message = ZZ_ERROR_MSG[errorCode]; | |
530 } | |
531 catch (ArrayIndexOutOfBoundsException e) { | |
532 message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR]; | |
533 } | |
534 | |
535 throw new Error(message); | |
536 } | |
537 | |
538 | |
539 /** | |
540 * Pushes the specified amount of characters back into the input stream. | |
541 * | |
542 * They will be read again by then next call of the scanning method | |
543 * | |
544 * @param number the number of characters to be read again. | |
545 * This number must not be greater than yylength()! | |
546 */ | |
547 public void yypushback(int number) { | |
548 if ( number > yylength() ) | |
549 zzScanError(ZZ_PUSHBACK_2BIG); | |
550 | |
551 zzMarkedPos -= number; | |
552 } | |
553 | |
554 | |
555 /** | |
556 * Resumes scanning until the next regular expression is matched, | |
557 * the end of input is encountered or an I/O-Error occurs. | |
558 * | |
559 * @return the next token | |
560 * @exception java.io.IOException if any I/O-Error occurs | |
561 */ | |
562 public java.lang.String yylex() throws java.io.IOException { | |
563 int zzInput; | |
564 int zzAction; | |
565 | |
566 // cached fields: | |
567 int zzCurrentPosL; | |
568 int zzMarkedPosL; | |
569 int zzEndReadL = zzEndRead; | |
570 char [] zzBufferL = zzBuffer; | |
571 char [] zzCMapL = ZZ_CMAP; | |
572 | |
573 int [] zzTransL = ZZ_TRANS; | |
574 int [] zzRowMapL = ZZ_ROWMAP; | |
575 int [] zzAttrL = ZZ_ATTRIBUTE; | |
576 | |
577 while (true) { | |
578 zzMarkedPosL = zzMarkedPos; | |
579 | |
580 if (zzMarkedPosL > zzStartRead) { | |
581 switch (zzBufferL[zzMarkedPosL-1]) { | |
582 case '\n': | |
583 case '\u000B': | |
584 case '\u000C': | |
585 case '\u0085': | |
586 case '\u2028': | |
587 case '\u2029': | |
588 zzAtBOL = true; | |
589 break; | |
590 case '\r': | |
591 if (zzMarkedPosL < zzEndReadL) | |
592 zzAtBOL = zzBufferL[zzMarkedPosL] != '\n'; | |
593 else if (zzAtEOF) | |
594 zzAtBOL = false; | |
595 else { | |
596 boolean eof = zzRefill(); | |
597 zzMarkedPosL = zzMarkedPos; | |
598 zzEndReadL = zzEndRead; | |
599 zzBufferL = zzBuffer; | |
600 if (eof) | |
601 zzAtBOL = false; | |
602 else | |
603 zzAtBOL = zzBufferL[zzMarkedPosL] != '\n'; | |
604 } | |
605 break; | |
606 default: | |
607 zzAtBOL = false; | |
608 } | |
609 } | |
610 zzAction = -1; | |
611 | |
612 zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL; | |
613 | |
614 if (zzAtBOL) | |
615 zzState = ZZ_LEXSTATE[zzLexicalState+1]; | |
616 else | |
617 zzState = ZZ_LEXSTATE[zzLexicalState]; | |
618 | |
619 | |
620 zzForAction: { | |
621 while (true) { | |
622 | |
623 if (zzCurrentPosL < zzEndReadL) | |
624 zzInput = zzBufferL[zzCurrentPosL++]; | |
625 else if (zzAtEOF) { | |
626 zzInput = YYEOF; | |
627 break zzForAction; | |
628 } | |
629 else { | |
630 // store back cached positions | |
631 zzCurrentPos = zzCurrentPosL; | |
632 zzMarkedPos = zzMarkedPosL; | |
633 boolean eof = zzRefill(); | |
634 // get translated positions and possibly new buffer | |
635 zzCurrentPosL = zzCurrentPos; | |
636 zzMarkedPosL = zzMarkedPos; | |
637 zzBufferL = zzBuffer; | |
638 zzEndReadL = zzEndRead; | |
639 if (eof) { | |
640 zzInput = YYEOF; | |
641 break zzForAction; | |
642 } | |
643 else { | |
644 zzInput = zzBufferL[zzCurrentPosL++]; | |
645 } | |
646 } | |
647 int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ]; | |
648 if (zzNext == -1) break zzForAction; | |
649 zzState = zzNext; | |
650 | |
651 int zzAttributes = zzAttrL[zzState]; | |
652 if ( (zzAttributes & 1) == 1 ) { | |
653 zzAction = zzState; | |
654 zzMarkedPosL = zzCurrentPosL; | |
655 if ( (zzAttributes & 8) == 8 ) break zzForAction; | |
656 } | |
657 | |
658 } | |
659 } | |
660 | |
661 // store back cached position | |
662 zzMarkedPos = zzMarkedPosL; | |
663 | |
664 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { | |
665 case 33: | |
666 // lookahead expression with fixed base length | |
667 zzMarkedPos = zzStartRead + 1; | |
668 { cv = CONS; add("U"); | |
669 } | |
670 case 41: break; | |
671 case 14: | |
672 { add("Á"); | |
673 } | |
674 case 42: break; | |
675 case 40: | |
676 // lookahead expression with fixed lookahead length | |
677 yypushback(1); | |
678 { add(yytext()); | |
679 } | |
680 case 43: break; | |
681 case 39: | |
682 // lookahead expression with fixed base length | |
683 zzMarkedPos = zzStartRead + 3; | |
684 { add(yytext()); | |
685 } | |
686 case 44: break; | |
687 case 38: | |
688 // lookahead expression with fixed base length | |
689 zzMarkedPos = zzStartRead + 2; | |
690 { add(yytext()); | |
691 } | |
692 case 45: break; | |
693 case 26: | |
694 { add(yytext()); | |
695 } | |
696 case 46: break; | |
697 case 21: | |
698 { add("í"); | |
699 } | |
700 case 47: break; | |
701 case 8: | |
702 { cv = VOWEL; add("AE"); | |
703 } | |
704 case 48: break; | |
705 case 11: | |
706 { problem = 1; cv = 0; add(yytext()); | |
707 } | |
708 case 49: break; | |
709 case 4: | |
710 { switch (problem) { | |
711 case 1: return original; | |
712 default: return normalized; | |
713 } | |
714 } | |
715 case 50: break; | |
716 case 30: | |
717 // lookahead expression with fixed base length | |
718 zzMarkedPos = zzStartRead + 1; | |
719 { cv = CONS; add("u"); | |
720 } | |
721 case 51: break; | |
722 case 19: | |
723 { add("á"); | |
724 } | |
725 case 52: break; | |
726 case 1: | |
727 { cv = 0; add(yytext()); | |
728 } | |
729 case 53: break; | |
730 case 24: | |
731 { switch (problem) { | |
732 case 1: return original; | |
733 default: return normalized.replaceAll(LB, "").toLowerCase(); | |
734 } | |
735 } | |
736 case 54: break; | |
737 case 34: | |
738 // lookahead expression with fixed base length | |
739 zzMarkedPos = zzStartRead + 1; | |
740 { cv = VOWEL; add(yytext().replaceAll("u", "v").replaceAll("U", "V")); | |
741 } | |
742 case 55: break; | |
743 case 35: | |
744 { cv = VOWEL; add("zio"); | |
745 } | |
746 case 56: break; | |
747 case 10: | |
748 { cv = VOWEL; add("OE"); | |
749 } | |
750 case 57: break; | |
751 case 18: | |
752 { add("Ú"); | |
753 } | |
754 case 58: break; | |
755 case 37: | |
756 // general lookahead, find correct zzMarkedPos | |
757 { int zzFState = 7; | |
758 int zzFPos = zzStartRead; | |
759 if (zzFin.length <= zzBufferL.length) { zzFin = new boolean[zzBufferL.length+1]; } | |
760 boolean zzFinL[] = zzFin; | |
761 while (zzFState != -1 && zzFPos < zzMarkedPos) { | |
762 if ((zzAttrL[zzFState] & 1) == 1) { zzFinL[zzFPos] = true; } | |
763 zzInput = zzBufferL[zzFPos++]; | |
764 zzFState = zzTransL[ zzRowMapL[zzFState] + zzCMapL[zzInput] ]; | |
765 } | |
766 if (zzFState != -1 && (zzAttrL[zzFState] & 1) == 1) { zzFinL[zzFPos] = true; } | |
767 | |
768 zzFState = 8; | |
769 zzFPos = zzMarkedPos; | |
770 while (!zzFinL[zzFPos] || (zzAttrL[zzFState] & 1) != 1) { | |
771 zzInput = zzBufferL[--zzFPos]; | |
772 zzFState = zzTransL[ zzRowMapL[zzFState] + zzCMapL[zzInput] ]; | |
773 }; | |
774 zzMarkedPos = zzFPos; | |
775 } | |
776 { cv = VOWEL; add(yytext().replace("ſ", "s")); | |
777 } | |
778 case 59: break; | |
779 case 3: | |
780 { cv = CONS; add(yytext()); | |
781 } | |
782 case 60: break; | |
783 case 32: | |
784 { cv = CONS; add("QU"); | |
785 } | |
786 case 61: break; | |
787 case 15: | |
788 { add("É"); | |
789 } | |
790 case 62: break; | |
791 case 28: | |
792 // lookahead expression with fixed base length | |
793 zzMarkedPos = zzStartRead + 1; | |
794 { switch(cv) { | |
795 case VOWEL: add(yytext().replace("u", "v").replace("U", "V")); break; | |
796 default: cv = VOWEL; add(yytext()); break; | |
797 } | |
798 } | |
799 case 63: break; | |
800 case 6: | |
801 { cv = CONS; add("ss"); | |
802 } | |
803 case 64: break; | |
804 case 5: | |
805 { cv = CONS; add("s"); | |
806 } | |
807 case 65: break; | |
808 case 13: | |
809 { switch (problem) { | |
810 case 1: return ""; | |
811 default: return normalized.replaceAll(LB, ""); | |
812 } | |
813 } | |
814 case 66: break; | |
815 case 36: | |
816 { cv = VOWEL; add("ZIO"); | |
817 } | |
818 case 67: break; | |
819 case 2: | |
820 { cv = VOWEL; add(yytext()); | |
821 } | |
822 case 68: break; | |
823 case 17: | |
824 { add("Ó"); | |
825 } | |
826 case 69: break; | |
827 case 23: | |
828 { add("ú"); | |
829 } | |
830 case 70: break; | |
831 case 31: | |
832 { cv = CONS; add("Qu"); | |
833 } | |
834 case 71: break; | |
835 case 20: | |
836 { add("é"); | |
837 } | |
838 case 72: break; | |
839 case 7: | |
840 { cv = VOWEL; add("ae"); | |
841 } | |
842 case 73: break; | |
843 case 12: | |
844 { add(""); | |
845 } | |
846 case 74: break; | |
847 case 22: | |
848 { add("ó"); | |
849 } | |
850 case 75: break; | |
851 case 9: | |
852 { cv = VOWEL; add("oe"); | |
853 } | |
854 case 76: break; | |
855 case 29: | |
856 { cv = CONS; add("qu"); | |
857 } | |
858 case 77: break; | |
859 case 25: | |
860 { switch(cv) { | |
861 case CONS: add(yytext().replace("v", "u").replace("V", "U")); break; | |
862 default: cv = CONS; add(yytext()); break; | |
863 } | |
864 } | |
865 case 78: break; | |
866 case 27: | |
867 { cv = VOWEL; add("ii"); | |
868 } | |
869 case 79: break; | |
870 case 16: | |
871 { add("Í"); | |
872 } | |
873 case 80: break; | |
874 default: | |
875 if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { | |
876 zzAtEOF = true; | |
877 return null; | |
878 } | |
879 else { | |
880 zzScanError(ZZ_NO_MATCH); | |
881 } | |
882 } | |
883 } | |
884 } | |
885 | |
886 | |
887 } |