comparison software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexIT.java @ 19:4a3641ae14d2

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 09 Nov 2011 15:32:05 +0100
parents
children e845310098ba
comparison
equal deleted inserted replaced
18:dc5e9fcb3fdc 19:4a3641ae14d2
1 /* The following code was generated by JFlex 1.4.3 on 21.07.11 11:22 */
2
3 /*
4 * Normalization rules for Italian text
5 * [this is a JFlex specification]
6 *
7 * Wolfgang Schmidle
8 * version 2011-07-12
9 *
10 */
11
12 package de.mpg.mpiwg.berlin.mpdl.lt.text.norm.lang;
13
14
15 /**
16 * This class is a scanner generated by
17 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3
18 * on 21.07.11 11:22 from the specification file
19 * <tt>MpdlNormalizerLexIT.lex</tt>
20 */
21 public class MpdlNormalizerLexIT {
22
23 /** This character denotes the end of file */
24 public static final int YYEOF = -1;
25
26 /** initial size of the lookahead buffer */
27 private static final int ZZ_BUFFERSIZE = 16384;
28
29 /** lexical states */
30 public static final int SEARCH = 6;
31 public static final int DICT = 4;
32 public static final int YYINITIAL = 0;
33 public static final int DISP = 2;
34
35 /**
36 * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
37 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
38 * at the beginning of a line
39 * l is of the form l = 2*k, k a non negative integer
40 */
41 private static final int ZZ_LEXSTATE[] = {
42 0, 0, 1, 2, 3, 4, 5, 6
43 };
44
45 /**
46 * Translates characters to character classes
47 */
48 private static final String ZZ_CMAP_PACKED =
49 "\12\0\1\6\25\0\1\5\14\0\1\4\22\0\1\52\1\1\3\2"+
50 "\1\1\3\2\1\41\1\0\1\2\1\3\2\2\1\42\1\2\1\50"+
51 "\1\3\1\2\1\40\1\45\1\51\2\2\1\0\1\2\6\0\1\44"+
52 "\3\2\1\12\2\2\1\43\1\7\1\36\1\2\1\3\1\2\1\10"+
53 "\1\37\1\14\1\46\1\13\1\2\1\11\1\16\1\47\2\2\1\0"+
54 "\1\2\62\0\1\4\22\0\1\17\5\0\1\33\1\0\1\20\3\0"+
55 "\1\21\5\0\1\22\6\0\1\23\5\0\1\31\1\24\5\0\1\32"+
56 "\1\0\1\25\3\0\1\26\5\0\1\27\6\0\1\30\37\0\1\1"+
57 "\70\0\1\35\1\34\53\0\1\15\ufe80\0";
58
59 /**
60 * Translates characters to character classes
61 */
62 private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
63
64 /**
65 * Translates DFA states to action switch labels.
66 */
67 private static final int [] ZZ_ACTION = zzUnpackAction();
68
69 private static final String ZZ_ACTION_PACKED_0 =
70 "\11\0\1\1\1\2\2\3\1\1\1\4\1\2\1\3"+
71 "\1\5\1\2\1\6\1\7\1\10\1\11\1\12\5\3"+
72 "\1\13\1\2\1\3\1\5\1\2\1\14\1\15\1\16"+
73 "\1\17\1\20\1\21\1\22\1\23\1\24\1\25\1\26"+
74 "\1\27\1\30\4\0\1\31\1\32\1\33\1\0\1\34"+
75 "\1\0\1\35\1\36\1\0\1\37\1\40\1\41\4\0"+
76 "\1\42\6\0\1\43\1\44\4\0\1\45\1\0\1\46"+
77 "\10\0\1\47\4\0\1\45\2\0\1\50";
78
79 private static int [] zzUnpackAction() {
80 int [] result = new int[100];
81 int offset = 0;
82 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
83 return result;
84 }
85
86 private static int zzUnpackAction(String packed, int offset, int [] result) {
87 int i = 0; /* index in packed string */
88 int j = offset; /* index in unpacked array */
89 int l = packed.length();
90 while (i < l) {
91 int count = packed.charAt(i++);
92 int value = packed.charAt(i++);
93 do result[j++] = value; while (--count > 0);
94 }
95 return j;
96 }
97
98
99 /**
100 * Translates a state to a row index in the transition table
101 */
102 private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
103
104 private static final String ZZ_ROWMAP_PACKED_0 =
105 "\0\0\0\53\0\126\0\201\0\254\0\327\0\u0102\0\u012d"+
106 "\0\u0158\0\0\0\0\0\0\0\u0183\0\u01ae\0\0\0\u01d9"+
107 "\0\u0204\0\0\0\u022f\0\0\0\0\0\0\0\0\0\0"+
108 "\0\u025a\0\u0285\0\u02b0\0\u02db\0\u0306\0\0\0\u0331\0\u035c"+
109 "\0\u0387\0\u03b2\0\u03dd\0\0\0\0\0\0\0\0\0\0"+
110 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\u0408"+
111 "\0\u0433\0\u045e\0\u0489\0\0\0\0\0\0\0\u04b4\0\0"+
112 "\0\u04df\0\0\0\0\0\u050a\0\0\0\0\0\0\0\u0535"+
113 "\0\u0560\0\u058b\0\u05b6\0\0\0\u05e1\0\u060c\0\u0637\0\u0662"+
114 "\0\u068d\0\0\0\0\0\0\0\u06b8\0\u06e3\0\u070e\0\u035c"+
115 "\0\u0739\0\u0764\0\0\0\u078f\0\u07ba\0\u07e5\0\0\0\u0810"+
116 "\0\u083b\0\u0866\0\u0891\0\0\0\u08bc\0\u08e7\0\u0912\0\u093d"+
117 "\0\0\0\u0968\0\u0993\0\0";
118
119 private static int [] zzUnpackRowMap() {
120 int [] result = new int[100];
121 int offset = 0;
122 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
123 return result;
124 }
125
126 private static int zzUnpackRowMap(String packed, int offset, int [] result) {
127 int i = 0; /* index in packed string */
128 int j = offset; /* index in unpacked array */
129 int l = packed.length();
130 while (i < l) {
131 int high = packed.charAt(i++) << 16;
132 result[j++] = high | packed.charAt(i++);
133 }
134 return j;
135 }
136
137 /**
138 * The transition table of the DFA
139 */
140 private static final int [] ZZ_TRANS = zzUnpackTrans();
141
142 private static final String ZZ_TRANS_PACKED_0 =
143 "\53\0\1\12\1\13\1\14\1\15\1\16\1\12\1\17"+
144 "\1\20\1\14\1\21\1\13\1\15\1\14\1\22\1\23"+
145 "\5\12\2\13\1\12\2\13\1\24\1\25\1\26\1\27"+
146 "\1\30\1\12\1\13\1\31\2\13\1\14\1\13\1\23"+
147 "\1\32\1\33\1\34\1\35\1\36\1\12\1\13\1\14"+
148 "\1\15\1\16\1\12\1\17\1\37\1\14\1\21\1\13"+
149 "\1\15\1\40\1\41\1\42\5\12\2\13\1\12\2\13"+
150 "\1\24\1\25\1\26\1\27\1\30\1\12\1\13\1\31"+
151 "\2\13\1\43\1\13\1\42\1\32\1\33\1\34\1\35"+
152 "\1\36\1\12\1\13\1\14\1\15\1\16\1\12\1\44"+
153 "\1\20\1\14\1\21\1\13\1\15\1\14\1\22\1\23"+
154 "\1\45\1\46\1\47\1\50\1\51\1\52\1\53\1\54"+
155 "\1\55\1\56\1\24\1\25\1\26\1\27\1\30\1\12"+
156 "\1\13\1\31\2\13\1\14\1\13\1\23\1\32\1\33"+
157 "\1\34\1\35\1\36\1\12\1\13\1\14\1\15\1\16"+
158 "\1\12\1\44\1\37\1\14\1\21\1\13\1\15\1\40"+
159 "\1\41\1\42\1\45\1\46\1\47\1\50\1\51\1\52"+
160 "\1\53\1\54\1\55\1\56\1\24\1\25\1\26\1\27"+
161 "\1\30\1\12\1\13\1\31\2\13\1\43\1\13\1\42"+
162 "\1\32\1\33\1\34\1\35\1\36\1\12\1\13\1\14"+
163 "\1\15\1\16\1\12\1\57\1\20\1\14\1\21\1\13"+
164 "\1\15\1\14\1\22\1\23\1\45\1\46\1\47\1\50"+
165 "\1\51\1\52\1\53\1\54\1\55\1\56\1\24\1\25"+
166 "\1\26\1\27\1\30\1\12\1\13\1\31\2\13\1\14"+
167 "\1\13\1\23\1\32\1\33\1\34\1\35\1\36\1\12"+
168 "\1\13\1\14\1\15\1\16\1\12\1\57\1\37\1\14"+
169 "\1\21\1\13\1\15\1\40\1\41\1\42\1\45\1\46"+
170 "\1\47\1\50\1\51\1\52\1\53\1\54\1\55\1\56"+
171 "\1\24\1\25\1\26\1\27\1\30\1\12\1\13\1\31"+
172 "\2\13\1\43\1\13\1\42\1\32\1\33\1\34\1\35"+
173 "\1\36\7\0\1\60\4\0\1\61\1\62\42\0\1\63"+
174 "\114\0\1\64\1\0\1\64\6\0\1\65\103\0\1\66"+
175 "\23\0\1\67\44\0\1\70\5\0\1\70\2\0\1\70"+
176 "\3\0\1\70\5\0\2\70\1\0\2\70\1\0\3\70"+
177 "\2\0\1\70\1\0\2\70\1\0\2\70\46\0\1\71"+
178 "\60\0\1\72\5\0\2\73\1\74\3\0\2\73\1\0"+
179 "\3\73\13\0\1\73\6\0\1\73\2\0\1\73\2\0"+
180 "\4\73\50\0\1\75\1\0\1\76\3\0\2\77\1\100"+
181 "\3\0\2\77\1\0\3\77\13\0\1\77\6\0\1\77"+
182 "\2\0\1\77\2\0\4\77\11\0\1\101\25\0\1\66"+
183 "\26\0\1\102\52\0\1\102\3\0\1\103\35\0\1\104"+
184 "\5\0\1\104\2\0\1\104\3\0\1\104\5\0\2\104"+
185 "\1\0\2\104\1\0\3\104\2\0\1\104\1\0\2\104"+
186 "\1\0\2\104\44\0\1\105\4\0\1\106\16\0\1\107"+
187 "\54\0\1\110\52\0\1\110\3\0\1\111\40\0\1\112"+
188 "\105\0\1\113\55\0\1\114\15\0\1\115\52\0\1\116"+
189 "\51\0\1\117\4\0\1\120\54\0\1\121\43\0\1\122"+
190 "\7\0\1\120\44\0\1\123\52\0\1\123\1\124\1\125"+
191 "\46\0\1\126\4\0\1\61\54\0\1\127\43\0\1\130"+
192 "\7\0\1\61\40\0\2\73\4\0\2\73\1\0\3\73"+
193 "\13\0\1\73\6\0\1\73\2\0\1\73\2\0\4\73"+
194 "\3\0\2\77\4\0\2\77\1\0\3\77\13\0\1\77"+
195 "\6\0\1\77\2\0\1\77\2\0\4\77\6\0\1\131"+
196 "\51\0\1\132\53\0\1\133\53\0\1\134\50\0\1\135"+
197 "\3\0\1\136\47\0\1\137\52\0\1\140\56\0\1\120"+
198 "\46\0\1\141\61\0\1\120\43\0\1\142\104\0\1\143"+
199 "\24\0\1\61\55\0\1\61\46\0\1\136\50\0\1\144"+
200 "\44\0";
201
202 private static int [] zzUnpackTrans() {
203 int [] result = new int[2494];
204 int offset = 0;
205 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
206 return result;
207 }
208
209 private static int zzUnpackTrans(String packed, int offset, int [] result) {
210 int i = 0; /* index in packed string */
211 int j = offset; /* index in unpacked array */
212 int l = packed.length();
213 while (i < l) {
214 int count = packed.charAt(i++);
215 int value = packed.charAt(i++);
216 value--;
217 do result[j++] = value; while (--count > 0);
218 }
219 return j;
220 }
221
222
223 /* error codes */
224 private static final int ZZ_UNKNOWN_ERROR = 0;
225 private static final int ZZ_NO_MATCH = 1;
226 private static final int ZZ_PUSHBACK_2BIG = 2;
227
228 /* error messages for the codes above */
229 private static final String ZZ_ERROR_MSG[] = {
230 "Unkown internal scanner error",
231 "Error: could not match input",
232 "Error: pushback value was too large"
233 };
234
235 /**
236 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
237 */
238 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
239
240 private static final String ZZ_ATTRIBUTE_PACKED_0 =
241 "\1\10\7\0\1\1\3\11\2\1\1\11\2\1\1\11"+
242 "\1\1\5\11\5\1\1\11\5\1\14\11\4\0\3\11"+
243 "\1\0\1\11\1\0\2\11\1\0\3\11\4\0\1\11"+
244 "\5\0\3\11\4\0\1\1\1\0\1\11\3\0\1\11"+
245 "\4\0\1\11\4\0\1\11\2\0\1\11";
246
247 private static int [] zzUnpackAttribute() {
248 int [] result = new int[100];
249 int offset = 0;
250 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
251 return result;
252 }
253
254 private static int zzUnpackAttribute(String packed, int offset, int [] result) {
255 int i = 0; /* index in packed string */
256 int j = offset; /* index in unpacked array */
257 int l = packed.length();
258 while (i < l) {
259 int count = packed.charAt(i++);
260 int value = packed.charAt(i++);
261 do result[j++] = value; while (--count > 0);
262 }
263 return j;
264 }
265
266 /** the input device */
267 private java.io.Reader zzReader;
268
269 /** the current state of the DFA */
270 private int zzState;
271
272 /** the current lexical state */
273 private int zzLexicalState = YYINITIAL;
274
275 /** this buffer contains the current text to be matched and is
276 the source of the yytext() string */
277 private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
278
279 /** the textposition at the last accepting state */
280 private int zzMarkedPos;
281
282 /** the current text position in the buffer */
283 private int zzCurrentPos;
284
285 /** startRead marks the beginning of the yytext() string in the buffer */
286 private int zzStartRead;
287
288 /** endRead marks the last character in the buffer, that has been read
289 from input */
290 private int zzEndRead;
291
292 /** number of newlines encountered up to the start of the matched text */
293 private int yyline;
294
295 /** the number of characters up to the start of the matched text */
296 private int yychar;
297
298 /**
299 * the number of characters from the last newline up to the start of the
300 * matched text
301 */
302 private int yycolumn;
303
304 /**
305 * zzAtBOL == true <=> the scanner is currently at the beginning of a line
306 */
307 private boolean zzAtBOL = true;
308
309 /** zzAtEOF == true <=> the scanner is at the EOF */
310 private boolean zzAtEOF;
311
312 /** denotes if the user-EOF-code has already been executed */
313 private boolean zzEOFDone;
314
315 /** For the backwards DFA of general lookahead statements */
316 private boolean [] zzFin = new boolean [ZZ_BUFFERSIZE+1];
317
318 /* user code: */
319 private static final int CONS = 1;
320 private static final int VOWEL = 2;
321 private int cv = 0; // consonant = 1, vowel = 2, everything else = 0
322
323 private String original = "";
324 private String normalized = "";
325 private int problem = 0;
326
327 private void add (String norm) {
328 original += yytext();
329 normalized += norm;
330 }
331
332 private static final String LB = "[\u002d\u00ad] ";
333
334
335 /**
336 * Creates a new scanner
337 * There is also a java.io.InputStream version of this constructor.
338 *
339 * @param in the java.io.Reader to read input from.
340 */
341 public MpdlNormalizerLexIT(java.io.Reader in) {
342 this.zzReader = in;
343 }
344
345 /**
346 * Creates a new scanner.
347 * There is also java.io.Reader version of this constructor.
348 *
349 * @param in the java.io.Inputstream to read input from.
350 */
351 public MpdlNormalizerLexIT(java.io.InputStream in) {
352 this(new java.io.InputStreamReader(in));
353 }
354
355 /**
356 * Unpacks the compressed character translation table.
357 *
358 * @param packed the packed character translation table
359 * @return the unpacked character translation table
360 */
361 private static char [] zzUnpackCMap(String packed) {
362 char [] map = new char[0x10000];
363 int i = 0; /* index in packed string */
364 int j = 0; /* index in unpacked array */
365 while (i < 172) {
366 int count = packed.charAt(i++);
367 char value = packed.charAt(i++);
368 do map[j++] = value; while (--count > 0);
369 }
370 return map;
371 }
372
373
374 /**
375 * Refills the input buffer.
376 *
377 * @return <code>false</code>, iff there was new input.
378 *
379 * @exception java.io.IOException if any I/O-Error occurs
380 */
381 private boolean zzRefill() throws java.io.IOException {
382
383 /* first: make room (if you can) */
384 if (zzStartRead > 0) {
385 System.arraycopy(zzBuffer, zzStartRead,
386 zzBuffer, 0,
387 zzEndRead-zzStartRead);
388
389 /* translate stored positions */
390 zzEndRead-= zzStartRead;
391 zzCurrentPos-= zzStartRead;
392 zzMarkedPos-= zzStartRead;
393 zzStartRead = 0;
394 }
395
396 /* is the buffer big enough? */
397 if (zzCurrentPos >= zzBuffer.length) {
398 /* if not: blow it up */
399 char newBuffer[] = new char[zzCurrentPos*2];
400 System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
401 zzBuffer = newBuffer;
402 }
403
404 /* finally: fill the buffer with new input */
405 int numRead = zzReader.read(zzBuffer, zzEndRead,
406 zzBuffer.length-zzEndRead);
407
408 if (numRead > 0) {
409 zzEndRead+= numRead;
410 return false;
411 }
412 // unlikely but not impossible: read 0 characters, but not at end of stream
413 if (numRead == 0) {
414 int c = zzReader.read();
415 if (c == -1) {
416 return true;
417 } else {
418 zzBuffer[zzEndRead++] = (char) c;
419 return false;
420 }
421 }
422
423 // numRead < 0
424 return true;
425 }
426
427
428 /**
429 * Closes the input stream.
430 */
431 public final void yyclose() throws java.io.IOException {
432 zzAtEOF = true; /* indicate end of file */
433 zzEndRead = zzStartRead; /* invalidate buffer */
434
435 if (zzReader != null)
436 zzReader.close();
437 }
438
439
440 /**
441 * Resets the scanner to read from a new input stream.
442 * Does not close the old reader.
443 *
444 * All internal variables are reset, the old input stream
445 * <b>cannot</b> be reused (internal buffer is discarded and lost).
446 * Lexical state is set to <tt>ZZ_INITIAL</tt>.
447 *
448 * @param reader the new input stream
449 */
450 public final void yyreset(java.io.Reader reader) {
451 zzReader = reader;
452 zzAtBOL = true;
453 zzAtEOF = false;
454 zzEOFDone = false;
455 zzEndRead = zzStartRead = 0;
456 zzCurrentPos = zzMarkedPos = 0;
457 yyline = yychar = yycolumn = 0;
458 zzLexicalState = YYINITIAL;
459 }
460
461
462 /**
463 * Returns the current lexical state.
464 */
465 public final int yystate() {
466 return zzLexicalState;
467 }
468
469
470 /**
471 * Enters a new lexical state
472 *
473 * @param newState the new lexical state
474 */
475 public final void yybegin(int newState) {
476 zzLexicalState = newState;
477 }
478
479
480 /**
481 * Returns the text matched by the current regular expression.
482 */
483 public final String yytext() {
484 return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
485 }
486
487
488 /**
489 * Returns the character at position <tt>pos</tt> from the
490 * matched text.
491 *
492 * It is equivalent to yytext().charAt(pos), but faster
493 *
494 * @param pos the position of the character to fetch.
495 * A value from 0 to yylength()-1.
496 *
497 * @return the character at position pos
498 */
499 public final char yycharat(int pos) {
500 return zzBuffer[zzStartRead+pos];
501 }
502
503
504 /**
505 * Returns the length of the matched text region.
506 */
507 public final int yylength() {
508 return zzMarkedPos-zzStartRead;
509 }
510
511
512 /**
513 * Reports an error that occured while scanning.
514 *
515 * In a wellformed scanner (no or only correct usage of
516 * yypushback(int) and a match-all fallback rule) this method
517 * will only be called with things that "Can't Possibly Happen".
518 * If this method is called, something is seriously wrong
519 * (e.g. a JFlex bug producing a faulty scanner etc.).
520 *
521 * Usual syntax/scanner level error handling should be done
522 * in error fallback rules.
523 *
524 * @param errorCode the code of the errormessage to display
525 */
526 private void zzScanError(int errorCode) {
527 String message;
528 try {
529 message = ZZ_ERROR_MSG[errorCode];
530 }
531 catch (ArrayIndexOutOfBoundsException e) {
532 message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
533 }
534
535 throw new Error(message);
536 }
537
538
539 /**
540 * Pushes the specified amount of characters back into the input stream.
541 *
542 * They will be read again by then next call of the scanning method
543 *
544 * @param number the number of characters to be read again.
545 * This number must not be greater than yylength()!
546 */
547 public void yypushback(int number) {
548 if ( number > yylength() )
549 zzScanError(ZZ_PUSHBACK_2BIG);
550
551 zzMarkedPos -= number;
552 }
553
554
555 /**
556 * Resumes scanning until the next regular expression is matched,
557 * the end of input is encountered or an I/O-Error occurs.
558 *
559 * @return the next token
560 * @exception java.io.IOException if any I/O-Error occurs
561 */
562 public java.lang.String yylex() throws java.io.IOException {
563 int zzInput;
564 int zzAction;
565
566 // cached fields:
567 int zzCurrentPosL;
568 int zzMarkedPosL;
569 int zzEndReadL = zzEndRead;
570 char [] zzBufferL = zzBuffer;
571 char [] zzCMapL = ZZ_CMAP;
572
573 int [] zzTransL = ZZ_TRANS;
574 int [] zzRowMapL = ZZ_ROWMAP;
575 int [] zzAttrL = ZZ_ATTRIBUTE;
576
577 while (true) {
578 zzMarkedPosL = zzMarkedPos;
579
580 if (zzMarkedPosL > zzStartRead) {
581 switch (zzBufferL[zzMarkedPosL-1]) {
582 case '\n':
583 case '\u000B':
584 case '\u000C':
585 case '\u0085':
586 case '\u2028':
587 case '\u2029':
588 zzAtBOL = true;
589 break;
590 case '\r':
591 if (zzMarkedPosL < zzEndReadL)
592 zzAtBOL = zzBufferL[zzMarkedPosL] != '\n';
593 else if (zzAtEOF)
594 zzAtBOL = false;
595 else {
596 boolean eof = zzRefill();
597 zzMarkedPosL = zzMarkedPos;
598 zzEndReadL = zzEndRead;
599 zzBufferL = zzBuffer;
600 if (eof)
601 zzAtBOL = false;
602 else
603 zzAtBOL = zzBufferL[zzMarkedPosL] != '\n';
604 }
605 break;
606 default:
607 zzAtBOL = false;
608 }
609 }
610 zzAction = -1;
611
612 zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
613
614 if (zzAtBOL)
615 zzState = ZZ_LEXSTATE[zzLexicalState+1];
616 else
617 zzState = ZZ_LEXSTATE[zzLexicalState];
618
619
620 zzForAction: {
621 while (true) {
622
623 if (zzCurrentPosL < zzEndReadL)
624 zzInput = zzBufferL[zzCurrentPosL++];
625 else if (zzAtEOF) {
626 zzInput = YYEOF;
627 break zzForAction;
628 }
629 else {
630 // store back cached positions
631 zzCurrentPos = zzCurrentPosL;
632 zzMarkedPos = zzMarkedPosL;
633 boolean eof = zzRefill();
634 // get translated positions and possibly new buffer
635 zzCurrentPosL = zzCurrentPos;
636 zzMarkedPosL = zzMarkedPos;
637 zzBufferL = zzBuffer;
638 zzEndReadL = zzEndRead;
639 if (eof) {
640 zzInput = YYEOF;
641 break zzForAction;
642 }
643 else {
644 zzInput = zzBufferL[zzCurrentPosL++];
645 }
646 }
647 int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
648 if (zzNext == -1) break zzForAction;
649 zzState = zzNext;
650
651 int zzAttributes = zzAttrL[zzState];
652 if ( (zzAttributes & 1) == 1 ) {
653 zzAction = zzState;
654 zzMarkedPosL = zzCurrentPosL;
655 if ( (zzAttributes & 8) == 8 ) break zzForAction;
656 }
657
658 }
659 }
660
661 // store back cached position
662 zzMarkedPos = zzMarkedPosL;
663
664 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
665 case 33:
666 // lookahead expression with fixed base length
667 zzMarkedPos = zzStartRead + 1;
668 { cv = CONS; add("U");
669 }
670 case 41: break;
671 case 14:
672 { add("Á");
673 }
674 case 42: break;
675 case 40:
676 // lookahead expression with fixed lookahead length
677 yypushback(1);
678 { add(yytext());
679 }
680 case 43: break;
681 case 39:
682 // lookahead expression with fixed base length
683 zzMarkedPos = zzStartRead + 3;
684 { add(yytext());
685 }
686 case 44: break;
687 case 38:
688 // lookahead expression with fixed base length
689 zzMarkedPos = zzStartRead + 2;
690 { add(yytext());
691 }
692 case 45: break;
693 case 26:
694 { add(yytext());
695 }
696 case 46: break;
697 case 21:
698 { add("í");
699 }
700 case 47: break;
701 case 8:
702 { cv = VOWEL; add("AE");
703 }
704 case 48: break;
705 case 11:
706 { problem = 1; cv = 0; add(yytext());
707 }
708 case 49: break;
709 case 4:
710 { switch (problem) {
711 case 1: return original;
712 default: return normalized;
713 }
714 }
715 case 50: break;
716 case 30:
717 // lookahead expression with fixed base length
718 zzMarkedPos = zzStartRead + 1;
719 { cv = CONS; add("u");
720 }
721 case 51: break;
722 case 19:
723 { add("á");
724 }
725 case 52: break;
726 case 1:
727 { cv = 0; add(yytext());
728 }
729 case 53: break;
730 case 24:
731 { switch (problem) {
732 case 1: return original;
733 default: return normalized.replaceAll(LB, "").toLowerCase();
734 }
735 }
736 case 54: break;
737 case 34:
738 // lookahead expression with fixed base length
739 zzMarkedPos = zzStartRead + 1;
740 { cv = VOWEL; add(yytext().replaceAll("u", "v").replaceAll("U", "V"));
741 }
742 case 55: break;
743 case 35:
744 { cv = VOWEL; add("zio");
745 }
746 case 56: break;
747 case 10:
748 { cv = VOWEL; add("OE");
749 }
750 case 57: break;
751 case 18:
752 { add("Ú");
753 }
754 case 58: break;
755 case 37:
756 // general lookahead, find correct zzMarkedPos
757 { int zzFState = 7;
758 int zzFPos = zzStartRead;
759 if (zzFin.length <= zzBufferL.length) { zzFin = new boolean[zzBufferL.length+1]; }
760 boolean zzFinL[] = zzFin;
761 while (zzFState != -1 && zzFPos < zzMarkedPos) {
762 if ((zzAttrL[zzFState] & 1) == 1) { zzFinL[zzFPos] = true; }
763 zzInput = zzBufferL[zzFPos++];
764 zzFState = zzTransL[ zzRowMapL[zzFState] + zzCMapL[zzInput] ];
765 }
766 if (zzFState != -1 && (zzAttrL[zzFState] & 1) == 1) { zzFinL[zzFPos] = true; }
767
768 zzFState = 8;
769 zzFPos = zzMarkedPos;
770 while (!zzFinL[zzFPos] || (zzAttrL[zzFState] & 1) != 1) {
771 zzInput = zzBufferL[--zzFPos];
772 zzFState = zzTransL[ zzRowMapL[zzFState] + zzCMapL[zzInput] ];
773 };
774 zzMarkedPos = zzFPos;
775 }
776 { cv = VOWEL; add(yytext().replace("ſ", "s"));
777 }
778 case 59: break;
779 case 3:
780 { cv = CONS; add(yytext());
781 }
782 case 60: break;
783 case 32:
784 { cv = CONS; add("QU");
785 }
786 case 61: break;
787 case 15:
788 { add("É");
789 }
790 case 62: break;
791 case 28:
792 // lookahead expression with fixed base length
793 zzMarkedPos = zzStartRead + 1;
794 { switch(cv) {
795 case VOWEL: add(yytext().replace("u", "v").replace("U", "V")); break;
796 default: cv = VOWEL; add(yytext()); break;
797 }
798 }
799 case 63: break;
800 case 6:
801 { cv = CONS; add("ss");
802 }
803 case 64: break;
804 case 5:
805 { cv = CONS; add("s");
806 }
807 case 65: break;
808 case 13:
809 { switch (problem) {
810 case 1: return "";
811 default: return normalized.replaceAll(LB, "");
812 }
813 }
814 case 66: break;
815 case 36:
816 { cv = VOWEL; add("ZIO");
817 }
818 case 67: break;
819 case 2:
820 { cv = VOWEL; add(yytext());
821 }
822 case 68: break;
823 case 17:
824 { add("Ó");
825 }
826 case 69: break;
827 case 23:
828 { add("ú");
829 }
830 case 70: break;
831 case 31:
832 { cv = CONS; add("Qu");
833 }
834 case 71: break;
835 case 20:
836 { add("é");
837 }
838 case 72: break;
839 case 7:
840 { cv = VOWEL; add("ae");
841 }
842 case 73: break;
843 case 12:
844 { add("");
845 }
846 case 74: break;
847 case 22:
848 { add("ó");
849 }
850 case 75: break;
851 case 9:
852 { cv = VOWEL; add("oe");
853 }
854 case 76: break;
855 case 29:
856 { cv = CONS; add("qu");
857 }
858 case 77: break;
859 case 25:
860 { switch(cv) {
861 case CONS: add(yytext().replace("v", "u").replace("V", "U")); break;
862 default: cv = CONS; add(yytext()); break;
863 }
864 }
865 case 78: break;
866 case 27:
867 { cv = VOWEL; add("ii");
868 }
869 case 79: break;
870 case 16:
871 { add("Í");
872 }
873 case 80: break;
874 default:
875 if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
876 zzAtEOF = true;
877 return null;
878 }
879 else {
880 zzScanError(ZZ_NO_MATCH);
881 }
882 }
883 }
884 }
885
886
887 }