comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexLA.java @ 9:1ec29fdd0db8

neue .lex Dateien f?r Normalisierung / externe Objekte update
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 22 Feb 2011 16:03:45 +0100
parents
children 5df60f24e997
comparison
equal deleted inserted replaced
8:d2a1c14fde31 9:1ec29fdd0db8
1 /* The following code was generated by JFlex 1.4.3 on 22.02.11 12:04 */
2
3 /*
4 * Normalization rules for Latin text
5 * [this is a JFlex specification]
6 *
7 * Wolfgang Schmidle
8 * version 0.96
9 * 2011-02-21
10 *
11 */
12
13 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
14
15
16 /**
17 * This class is a scanner generated by
18 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3
19 * on 22.02.11 12:04 from the specification file
20 * <tt>MpdlNormalizerLexLA.lex</tt>
21 */
22 public class MpdlNormalizerLexLA {
23
24 /** This character denotes the end of file */
25 public static final int YYEOF = -1;
26
27 /** initial size of the lookahead buffer */
28 private static final int ZZ_BUFFERSIZE = 16384;
29
30 /** lexical states */
31 public static final int RENAISSANCE_DICT = 10;
32 public static final int RENAISSANCE_DISP = 8;
33 public static final int SEARCH = 6;
34 public static final int DICT = 4;
35 public static final int YYINITIAL = 0;
36 public static final int RENAISSANCE_SEARCH = 12;
37 public static final int DISP = 2;
38
39 /**
40 * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
41 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
42 * at the beginning of a line
43 * l is of the form l = 2*k, k a non negative integer
44 */
45 private static final int ZZ_LEXSTATE[] = {
46 0, 0, 1, 2, 3, 4, 1, 2, 1, 2, 3, 4, 1, 2
47 };
48
49 /**
50 * Translates characters to character classes
51 */
52 private static final String ZZ_CMAP_PACKED =
53 "\12\0\1\5\42\0\1\4\23\0\1\1\3\2\1\1\2\2\1\52"+
54 "\1\1\1\0\1\2\1\3\2\2\1\1\1\2\1\45\1\3\2\2"+
55 "\1\63\1\64\2\2\1\0\1\2\6\0\1\56\1\2\1\46\1\42"+
56 "\1\10\2\2\1\50\1\13\1\26\1\2\1\47\1\37\1\12\1\60"+
57 "\1\16\1\6\1\15\1\31\1\14\1\7\1\11\2\2\1\0\1\2"+
58 "\62\0\1\4\30\0\1\24\30\0\1\22\1\36\1\30\1\54\3\0"+
59 "\1\23\1\0\1\40\1\32\1\0\1\57\1\44\1\33\1\51\1\61"+
60 "\2\0\1\41\1\34\1\53\4\0\1\43\1\35\1\55\1\62\34\0"+
61 "\1\23\71\0\1\25\53\0\1\17\u0181\0\1\27\ud4fe\0\1\20\u0590\0"+
62 "\1\21\u226e\0";
63
64 /**
65 * Translates characters to character classes
66 */
67 private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
68
69 /**
70 * Translates DFA states to action switch labels.
71 */
72 private static final int [] ZZ_ACTION = zzUnpackAction();
73
74 private static final String ZZ_ACTION_PACKED_0 =
75 "\10\0\1\1\1\2\2\3\1\4\1\5\1\3\1\2"+
76 "\1\3\1\2\1\6\1\1\1\7\1\10\1\11\1\12"+
77 "\11\1\1\3\2\1\3\2\2\3\2\2\1\3\1\6"+
78 "\3\3\1\1\1\2\1\13\4\0\1\14\1\15\1\16"+
79 "\1\0\1\17\1\20\1\21\1\22\1\0\1\23\20\0"+
80 "\1\24\3\0\1\25\3\0\1\26\1\0\1\27\3\0"+
81 "\1\30\1\31\1\32\1\0\1\33\1\34\2\0\1\35"+
82 "\16\0\1\36\1\0\1\37\1\0\1\40\1\0\1\41"+
83 "\1\42\1\43\1\44\1\0\1\45\1\0\1\46\1\0"+
84 "\1\47\1\0\1\50\3\0\1\51\10\0\1\52\6\0"+
85 "\1\53\1\51\1\54\1\55\1\56\1\57\5\0";
86
87 private static int [] zzUnpackAction() {
88 int [] result = new int[166];
89 int offset = 0;
90 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
91 return result;
92 }
93
94 private static int zzUnpackAction(String packed, int offset, int [] result) {
95 int i = 0; /* index in packed string */
96 int j = offset; /* index in unpacked array */
97 int l = packed.length();
98 while (i < l) {
99 int count = packed.charAt(i++);
100 int value = packed.charAt(i++);
101 do result[j++] = value; while (--count > 0);
102 }
103 return j;
104 }
105
106
107 /**
108 * Translates a state to a row index in the transition table
109 */
110 private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
111
112 private static final String ZZ_ROWMAP_PACKED_0 =
113 "\0\0\0\65\0\152\0\237\0\324\0\u0109\0\u013e\0\u0173"+
114 "\0\u01a8\0\u01a8\0\u01a8\0\u01dd\0\u01a8\0\u01a8\0\u0212\0\u0247"+
115 "\0\u027c\0\u02b1\0\u01a8\0\u0173\0\u01a8\0\u01a8\0\u01a8\0\u01a8"+
116 "\0\u02e6\0\u031b\0\u0350\0\u0385\0\u03ba\0\u03ef\0\u0424\0\u0459"+
117 "\0\u048e\0\u04c3\0\u04f8\0\u052d\0\u0562\0\u0597\0\u05cc\0\u0601"+
118 "\0\u0636\0\u066b\0\u06a0\0\u06d5\0\u070a\0\u073f\0\u0774\0\u07a9"+
119 "\0\u07de\0\u0813\0\u01a8\0\u0848\0\u087d\0\u08b2\0\u01a8\0\u01a8"+
120 "\0\u01a8\0\u01a8\0\u08e7\0\u01a8\0\u01a8\0\u01a8\0\u01a8\0\u091c"+
121 "\0\u01a8\0\u0951\0\u0986\0\u09bb\0\u09f0\0\u0a25\0\u0a5a\0\u0a8f"+
122 "\0\u0ac4\0\u0af9\0\u0b2e\0\u0b63\0\u0b98\0\u0bcd\0\u0c02\0\u0c37"+
123 "\0\u0c6c\0\u01a8\0\u0ca1\0\u0cd6\0\u0d0b\0\u01a8\0\u0d40\0\u0d75"+
124 "\0\u0daa\0\u01a8\0\u0ddf\0\u01a8\0\u0e14\0\u0e49\0\u0e7e\0\u01a8"+
125 "\0\u01a8\0\u01a8\0\u0eb3\0\u01a8\0\u01a8\0\u0ee8\0\u0f1d\0\u01a8"+
126 "\0\u0f52\0\u0f87\0\u0fbc\0\u0ff1\0\u1026\0\u105b\0\u1090\0\u10c5"+
127 "\0\u10fa\0\u112f\0\u1164\0\u1199\0\u11ce\0\u07de\0\u01a8\0\u1203"+
128 "\0\u01a8\0\u1238\0\u01a8\0\u126d\0\u01a8\0\u01a8\0\u01a8\0\u01a8"+
129 "\0\u12a2\0\u01a8\0\u12d7\0\u01a8\0\u130c\0\u01a8\0\u1341\0\u01a8"+
130 "\0\u1376\0\u13ab\0\u06d5\0\u13e0\0\u1415\0\u144a\0\u147f\0\u14b4"+
131 "\0\u14e9\0\u01a8\0\u151e\0\u1553\0\u01a8\0\u1588\0\u15bd\0\u15f2"+
132 "\0\u1627\0\u165c\0\u1691\0\u01a8\0\u01a8\0\u01a8\0\u01a8\0\u01a8"+
133 "\0\u01a8\0\u16c6\0\u16fb\0\u1730\0\u1765\0\u179a";
134
135 private static int [] zzUnpackRowMap() {
136 int [] result = new int[166];
137 int offset = 0;
138 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
139 return result;
140 }
141
142 private static int zzUnpackRowMap(String packed, int offset, int [] result) {
143 int i = 0; /* index in packed string */
144 int j = offset; /* index in unpacked array */
145 int l = packed.length();
146 while (i < l) {
147 int high = packed.charAt(i++) << 16;
148 result[j++] = high | packed.charAt(i++);
149 }
150 return j;
151 }
152
153 /**
154 * The transition table of the DFA
155 */
156 private static final int [] ZZ_TRANS = zzUnpackTrans();
157
158 private static final String ZZ_TRANS_PACKED_0 =
159 "\20\0\1\10\44\0\1\11\1\12\1\13\1\14\1\15"+
160 "\1\16\1\17\1\20\1\12\1\21\1\13\1\22\1\13"+
161 "\1\14\1\13\1\23\1\24\1\11\1\25\1\26\1\27"+
162 "\1\30\2\11\1\31\1\13\1\32\1\33\1\34\1\35"+
163 "\1\36\1\13\1\37\1\40\1\13\1\41\1\11\1\42"+
164 "\1\13\1\14\1\13\1\11\1\13\1\11\1\43\1\44"+
165 "\1\45\1\11\1\46\2\11\1\47\1\50\1\11\1\12"+
166 "\1\13\1\14\1\15\1\16\1\51\1\52\1\12\1\21"+
167 "\1\13\1\53\1\13\1\14\1\54\1\55\1\24\1\11"+
168 "\1\25\1\26\1\27\1\30\2\11\1\31\1\13\1\32"+
169 "\1\33\1\34\1\35\1\36\1\13\1\37\1\40\1\13"+
170 "\1\41\1\11\1\56\1\13\1\14\1\57\1\11\1\60"+
171 "\1\61\1\43\1\44\1\45\1\11\1\46\2\11\1\62"+
172 "\1\50\1\11\1\12\1\13\1\14\1\15\1\63\1\17"+
173 "\1\20\1\12\1\21\1\13\1\22\1\13\1\14\1\13"+
174 "\1\23\1\24\1\11\1\25\1\26\1\27\1\30\2\11"+
175 "\1\31\1\13\1\32\1\33\1\34\1\35\1\36\1\13"+
176 "\1\37\1\40\1\13\1\41\1\11\1\42\1\13\1\14"+
177 "\1\13\1\11\1\13\1\11\1\43\1\44\1\45\1\11"+
178 "\1\46\2\11\1\47\1\50\1\11\1\12\1\13\1\14"+
179 "\1\15\1\63\1\51\1\52\1\12\1\21\1\13\1\53"+
180 "\1\13\1\14\1\54\1\55\1\24\1\11\1\25\1\26"+
181 "\1\27\1\30\2\11\1\31\1\13\1\32\1\33\1\34"+
182 "\1\35\1\36\1\13\1\37\1\40\1\13\1\41\1\11"+
183 "\1\56\1\13\1\14\1\57\1\11\1\60\1\61\1\43"+
184 "\1\44\1\45\1\11\1\46\2\11\1\62\1\50\13\0"+
185 "\1\64\2\0\1\65\1\66\51\0\1\67\101\0\1\70"+
186 "\141\0\1\71\52\0\1\71\11\0\1\72\15\0\1\73"+
187 "\36\0\1\74\5\0\2\74\2\0\1\74\42\0\1\74"+
188 "\1\0\1\74\1\75\1\76\1\74\3\0\2\77\1\100"+
189 "\1\0\1\77\2\0\2\77\1\0\4\77\2\0\1\77"+
190 "\6\0\1\77\5\0\1\77\2\0\1\77\2\0\4\77"+
191 "\1\0\1\77\11\0\1\77\26\0\1\101\44\0\1\102"+
192 "\2\0\2\103\1\0\2\104\13\0\1\104\5\0\1\104"+
193 "\33\0\1\105\2\0\2\106\1\0\2\107\13\0\1\107"+
194 "\5\0\1\107\33\0\1\110\2\0\2\111\1\0\2\112"+
195 "\13\0\1\112\5\0\1\112\33\0\1\113\2\0\2\114"+
196 "\1\0\2\115\13\0\1\115\5\0\1\115\33\0\1\116"+
197 "\1\0\1\117\2\120\1\0\2\121\13\0\1\121\5\0"+
198 "\1\121\32\0\1\122\1\102\22\0\1\123\5\0\1\124"+
199 "\6\0\1\125\23\0\1\126\1\105\5\0\1\127\1\130"+
200 "\13\0\1\131\40\0\1\132\1\113\33\0\1\133\27\0"+
201 "\1\134\23\0\1\135\5\0\1\136\7\0\1\137\26\0"+
202 "\1\140\52\0\1\141\5\0\1\122\1\102\6\0\1\142"+
203 "\100\0\1\143\112\0\1\26\64\0\1\30\1\0\1\144"+
204 "\4\0\1\74\5\0\2\74\2\0\1\74\42\0\1\74"+
205 "\1\0\1\74\2\0\1\74\3\0\2\145\1\146\1\0"+
206 "\1\145\2\0\2\145\1\0\4\145\2\0\1\145\6\0"+
207 "\1\145\5\0\1\145\2\0\1\145\2\0\4\145\1\0"+
208 "\1\145\11\0\1\145\7\0\1\147\1\0\1\72\15\0"+
209 "\1\73\36\0\1\150\5\0\2\150\2\0\1\150\42\0"+
210 "\1\150\1\0\1\150\1\75\1\76\1\150\13\0\1\151"+
211 "\13\0\1\101\46\0\1\152\63\0\1\153\1\152\63\0"+
212 "\1\154\1\0\1\140\52\0\1\141\51\0\1\155\64\0"+
213 "\1\156\20\0\1\132\60\0\1\150\5\0\2\150\2\0"+
214 "\1\150\42\0\1\150\1\0\1\150\2\0\1\150\13\0"+
215 "\1\157\62\0\1\160\63\0\1\161\1\160\63\0\1\162"+
216 "\57\0\2\77\2\0\1\77\2\0\2\77\1\0\4\77"+
217 "\2\0\1\77\6\0\1\77\5\0\1\77\2\0\1\77"+
218 "\2\0\4\77\1\0\1\77\11\0\1\77\7\0\1\103"+
219 "\65\0\1\163\62\0\1\102\2\0\2\103\61\0\1\106"+
220 "\65\0\1\164\62\0\1\105\2\0\2\106\61\0\1\111"+
221 "\65\0\1\165\62\0\1\110\2\0\2\111\61\0\1\114"+
222 "\65\0\1\166\62\0\1\113\2\0\2\114\61\0\1\120"+
223 "\62\0\1\167\67\0\1\170\62\0\1\116\2\0\2\120"+
224 "\57\0\1\171\1\172\63\0\1\173\1\174\63\0\1\175"+
225 "\64\0\1\176\64\0\1\177\64\0\1\200\1\201\63\0"+
226 "\1\202\1\203\63\0\1\204\1\205\63\0\1\206\1\207"+
227 "\63\0\1\210\64\0\1\204\61\0\2\145\2\0\1\145"+
228 "\2\0\2\145\1\0\4\145\2\0\1\145\6\0\1\145"+
229 "\5\0\1\145\2\0\1\145\2\0\4\145\1\0\1\145"+
230 "\11\0\1\145\44\0\1\211\24\0\1\212\7\0\1\213"+
231 "\65\0\1\214\53\0\1\215\11\0\1\213\112\0\1\216"+
232 "\66\0\1\217\64\0\1\220\22\0\1\221\7\0\1\65"+
233 "\65\0\1\222\53\0\1\223\11\0\1\65\56\0\1\224"+
234 "\61\0\1\122\64\0\1\126\64\0\1\225\64\0\1\134"+
235 "\66\0\1\226\64\0\1\227\64\0\1\230\64\0\1\231"+
236 "\64\0\1\232\64\0\1\233\62\0\1\234\73\0\1\213"+
237 "\54\0\1\235\76\0\1\213\53\0\1\236\64\0\1\237"+
238 "\64\0\1\240\73\0\1\65\66\0\1\65\53\0\1\241"+
239 "\67\0\1\242\64\0\1\243\64\0\1\244\64\0\1\245"+
240 "\64\0\1\143\64\0\1\246\61\0\1\171\64\0\1\173"+
241 "\64\0\1\200\64\0\1\202\64\0\1\206\57\0";
242
243 private static int [] zzUnpackTrans() {
244 int [] result = new int[6095];
245 int offset = 0;
246 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
247 return result;
248 }
249
250 private static int zzUnpackTrans(String packed, int offset, int [] result) {
251 int i = 0; /* index in packed string */
252 int j = offset; /* index in unpacked array */
253 int l = packed.length();
254 while (i < l) {
255 int count = packed.charAt(i++);
256 int value = packed.charAt(i++);
257 value--;
258 do result[j++] = value; while (--count > 0);
259 }
260 return j;
261 }
262
263
264 /* error codes */
265 private static final int ZZ_UNKNOWN_ERROR = 0;
266 private static final int ZZ_NO_MATCH = 1;
267 private static final int ZZ_PUSHBACK_2BIG = 2;
268
269 /* error messages for the codes above */
270 private static final String ZZ_ERROR_MSG[] = {
271 "Unkown internal scanner error",
272 "Error: could not match input",
273 "Error: pushback value was too large"
274 };
275
276 /**
277 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
278 */
279 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
280
281 private static final String ZZ_ATTRIBUTE_PACKED_0 =
282 "\6\0\1\1\1\0\3\11\1\1\2\11\4\1\1\11"+
283 "\1\1\4\11\32\1\1\11\3\0\4\11\1\0\4\11"+
284 "\1\0\1\11\20\0\1\11\3\0\1\11\3\0\1\11"+
285 "\1\0\1\11\3\0\3\11\1\0\2\11\2\0\1\11"+
286 "\16\0\1\11\1\0\1\11\1\0\1\11\1\0\4\11"+
287 "\1\0\1\11\1\0\1\11\1\0\1\11\1\0\1\11"+
288 "\3\0\1\1\5\0\1\11\2\0\1\11\6\0\6\11"+
289 "\5\0";
290
291 private static int [] zzUnpackAttribute() {
292 int [] result = new int[166];
293 int offset = 0;
294 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
295 return result;
296 }
297
298 private static int zzUnpackAttribute(String packed, int offset, int [] result) {
299 int i = 0; /* index in packed string */
300 int j = offset; /* index in unpacked array */
301 int l = packed.length();
302 while (i < l) {
303 int count = packed.charAt(i++);
304 int value = packed.charAt(i++);
305 do result[j++] = value; while (--count > 0);
306 }
307 return j;
308 }
309
310 /** the input device */
311 private java.io.Reader zzReader;
312
313 /** the current state of the DFA */
314 private int zzState;
315
316 /** the current lexical state */
317 private int zzLexicalState = YYINITIAL;
318
319 /** this buffer contains the current text to be matched and is
320 the source of the yytext() string */
321 private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
322
323 /** the textposition at the last accepting state */
324 private int zzMarkedPos;
325
326 /** the current text position in the buffer */
327 private int zzCurrentPos;
328
329 /** startRead marks the beginning of the yytext() string in the buffer */
330 private int zzStartRead;
331
332 /** endRead marks the last character in the buffer, that has been read
333 from input */
334 private int zzEndRead;
335
336 /** number of newlines encountered up to the start of the matched text */
337 private int yyline;
338
339 /** the number of characters up to the start of the matched text */
340 private int yychar;
341
342 /**
343 * the number of characters from the last newline up to the start of the
344 * matched text
345 */
346 private int yycolumn;
347
348 /**
349 * zzAtBOL == true <=> the scanner is currently at the beginning of a line
350 */
351 private boolean zzAtBOL = true;
352
353 /** zzAtEOF == true <=> the scanner is at the EOF */
354 private boolean zzAtEOF;
355
356 /** denotes if the user-EOF-code has already been executed */
357 private boolean zzEOFDone;
358
359 /** For the backwards DFA of general lookahead statements */
360 private boolean [] zzFin = new boolean [ZZ_BUFFERSIZE+1];
361
362 /* user code: */
363 private static final int CONS = 1;
364 private static final int VOWEL = 2;
365 private int cv = 0; // consonant = 1, vowel = 2, everything else = 0
366
367 private String original = "";
368 private String normalized = "";
369 private int problem = 0;
370
371 private void add (String norm) {
372 original += yytext();
373 normalized += norm;
374 }
375
376
377 /**
378 * Creates a new scanner
379 * There is also a java.io.InputStream version of this constructor.
380 *
381 * @param in the java.io.Reader to read input from.
382 */
383 public MpdlNormalizerLexLA(java.io.Reader in) {
384 this.zzReader = in;
385 }
386
387 /**
388 * Creates a new scanner.
389 * There is also java.io.Reader version of this constructor.
390 *
391 * @param in the java.io.Inputstream to read input from.
392 */
393 public MpdlNormalizerLexLA(java.io.InputStream in) {
394 this(new java.io.InputStreamReader(in));
395 }
396
397 /**
398 * Unpacks the compressed character translation table.
399 *
400 * @param packed the packed character translation table
401 * @return the unpacked character translation table
402 */
403 private static char [] zzUnpackCMap(String packed) {
404 char [] map = new char[0x10000];
405 int i = 0; /* index in packed string */
406 int j = 0; /* index in unpacked array */
407 while (i < 184) {
408 int count = packed.charAt(i++);
409 char value = packed.charAt(i++);
410 do map[j++] = value; while (--count > 0);
411 }
412 return map;
413 }
414
415
416 /**
417 * Refills the input buffer.
418 *
419 * @return <code>false</code>, iff there was new input.
420 *
421 * @exception java.io.IOException if any I/O-Error occurs
422 */
423 private boolean zzRefill() throws java.io.IOException {
424
425 /* first: make room (if you can) */
426 if (zzStartRead > 0) {
427 System.arraycopy(zzBuffer, zzStartRead,
428 zzBuffer, 0,
429 zzEndRead-zzStartRead);
430
431 /* translate stored positions */
432 zzEndRead-= zzStartRead;
433 zzCurrentPos-= zzStartRead;
434 zzMarkedPos-= zzStartRead;
435 zzStartRead = 0;
436 }
437
438 /* is the buffer big enough? */
439 if (zzCurrentPos >= zzBuffer.length) {
440 /* if not: blow it up */
441 char newBuffer[] = new char[zzCurrentPos*2];
442 System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
443 zzBuffer = newBuffer;
444 }
445
446 /* finally: fill the buffer with new input */
447 int numRead = zzReader.read(zzBuffer, zzEndRead,
448 zzBuffer.length-zzEndRead);
449
450 if (numRead > 0) {
451 zzEndRead+= numRead;
452 return false;
453 }
454 // unlikely but not impossible: read 0 characters, but not at end of stream
455 if (numRead == 0) {
456 int c = zzReader.read();
457 if (c == -1) {
458 return true;
459 } else {
460 zzBuffer[zzEndRead++] = (char) c;
461 return false;
462 }
463 }
464
465 // numRead < 0
466 return true;
467 }
468
469
470 /**
471 * Closes the input stream.
472 */
473 public final void yyclose() throws java.io.IOException {
474 zzAtEOF = true; /* indicate end of file */
475 zzEndRead = zzStartRead; /* invalidate buffer */
476
477 if (zzReader != null)
478 zzReader.close();
479 }
480
481
482 /**
483 * Resets the scanner to read from a new input stream.
484 * Does not close the old reader.
485 *
486 * All internal variables are reset, the old input stream
487 * <b>cannot</b> be reused (internal buffer is discarded and lost).
488 * Lexical state is set to <tt>ZZ_INITIAL</tt>.
489 *
490 * @param reader the new input stream
491 */
492 public final void yyreset(java.io.Reader reader) {
493 zzReader = reader;
494 zzAtBOL = true;
495 zzAtEOF = false;
496 zzEOFDone = false;
497 zzEndRead = zzStartRead = 0;
498 zzCurrentPos = zzMarkedPos = 0;
499 yyline = yychar = yycolumn = 0;
500 zzLexicalState = YYINITIAL;
501 }
502
503
504 /**
505 * Returns the current lexical state.
506 */
507 public final int yystate() {
508 return zzLexicalState;
509 }
510
511
512 /**
513 * Enters a new lexical state
514 *
515 * @param newState the new lexical state
516 */
517 public final void yybegin(int newState) {
518 zzLexicalState = newState;
519 }
520
521
522 /**
523 * Returns the text matched by the current regular expression.
524 */
525 public final String yytext() {
526 return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
527 }
528
529
530 /**
531 * Returns the character at position <tt>pos</tt> from the
532 * matched text.
533 *
534 * It is equivalent to yytext().charAt(pos), but faster
535 *
536 * @param pos the position of the character to fetch.
537 * A value from 0 to yylength()-1.
538 *
539 * @return the character at position pos
540 */
541 public final char yycharat(int pos) {
542 return zzBuffer[zzStartRead+pos];
543 }
544
545
546 /**
547 * Returns the length of the matched text region.
548 */
549 public final int yylength() {
550 return zzMarkedPos-zzStartRead;
551 }
552
553
554 /**
555 * Reports an error that occured while scanning.
556 *
557 * In a wellformed scanner (no or only correct usage of
558 * yypushback(int) and a match-all fallback rule) this method
559 * will only be called with things that "Can't Possibly Happen".
560 * If this method is called, something is seriously wrong
561 * (e.g. a JFlex bug producing a faulty scanner etc.).
562 *
563 * Usual syntax/scanner level error handling should be done
564 * in error fallback rules.
565 *
566 * @param errorCode the code of the errormessage to display
567 */
568 private void zzScanError(int errorCode) {
569 String message;
570 try {
571 message = ZZ_ERROR_MSG[errorCode];
572 }
573 catch (ArrayIndexOutOfBoundsException e) {
574 message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
575 }
576
577 throw new Error(message);
578 }
579
580
581 /**
582 * Pushes the specified amount of characters back into the input stream.
583 *
584 * They will be read again by then next call of the scanning method
585 *
586 * @param number the number of characters to be read again.
587 * This number must not be greater than yylength()!
588 */
589 public void yypushback(int number) {
590 if ( number > yylength() )
591 zzScanError(ZZ_PUSHBACK_2BIG);
592
593 zzMarkedPos -= number;
594 }
595
596
597 /**
598 * Resumes scanning until the next regular expression is matched,
599 * the end of input is encountered or an I/O-Error occurs.
600 *
601 * @return the next token
602 * @exception java.io.IOException if any I/O-Error occurs
603 */
604 public java.lang.String yylex() throws java.io.IOException {
605 int zzInput;
606 int zzAction;
607
608 // cached fields:
609 int zzCurrentPosL;
610 int zzMarkedPosL;
611 int zzEndReadL = zzEndRead;
612 char [] zzBufferL = zzBuffer;
613 char [] zzCMapL = ZZ_CMAP;
614
615 int [] zzTransL = ZZ_TRANS;
616 int [] zzRowMapL = ZZ_ROWMAP;
617 int [] zzAttrL = ZZ_ATTRIBUTE;
618
619 while (true) {
620 zzMarkedPosL = zzMarkedPos;
621
622 if (zzMarkedPosL > zzStartRead) {
623 switch (zzBufferL[zzMarkedPosL-1]) {
624 case '\n':
625 case '\u000B':
626 case '\u000C':
627 case '\u0085':
628 case '\u2028':
629 case '\u2029':
630 zzAtBOL = true;
631 break;
632 case '\r':
633 if (zzMarkedPosL < zzEndReadL)
634 zzAtBOL = zzBufferL[zzMarkedPosL] != '\n';
635 else if (zzAtEOF)
636 zzAtBOL = false;
637 else {
638 boolean eof = zzRefill();
639 zzMarkedPosL = zzMarkedPos;
640 zzEndReadL = zzEndRead;
641 zzBufferL = zzBuffer;
642 if (eof)
643 zzAtBOL = false;
644 else
645 zzAtBOL = zzBufferL[zzMarkedPosL] != '\n';
646 }
647 break;
648 default:
649 zzAtBOL = false;
650 }
651 }
652 zzAction = -1;
653
654 zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
655
656 if (zzAtBOL)
657 zzState = ZZ_LEXSTATE[zzLexicalState+1];
658 else
659 zzState = ZZ_LEXSTATE[zzLexicalState];
660
661
662 zzForAction: {
663 while (true) {
664
665 if (zzCurrentPosL < zzEndReadL)
666 zzInput = zzBufferL[zzCurrentPosL++];
667 else if (zzAtEOF) {
668 zzInput = YYEOF;
669 break zzForAction;
670 }
671 else {
672 // store back cached positions
673 zzCurrentPos = zzCurrentPosL;
674 zzMarkedPos = zzMarkedPosL;
675 boolean eof = zzRefill();
676 // get translated positions and possibly new buffer
677 zzCurrentPosL = zzCurrentPos;
678 zzMarkedPosL = zzMarkedPos;
679 zzBufferL = zzBuffer;
680 zzEndReadL = zzEndRead;
681 if (eof) {
682 zzInput = YYEOF;
683 break zzForAction;
684 }
685 else {
686 zzInput = zzBufferL[zzCurrentPosL++];
687 }
688 }
689 int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
690 if (zzNext == -1) break zzForAction;
691 zzState = zzNext;
692
693 int zzAttributes = zzAttrL[zzState];
694 if ( (zzAttributes & 1) == 1 ) {
695 zzAction = zzState;
696 zzMarkedPosL = zzCurrentPosL;
697 if ( (zzAttributes & 8) == 8 ) break zzForAction;
698 }
699
700 }
701 }
702
703 // store back cached position
704 zzMarkedPos = zzMarkedPosL;
705
706 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
707 case 39:
708 // lookahead expression with fixed base length
709 zzMarkedPos = zzStartRead + 2;
710 { add("um");
711 }
712 case 48: break;
713 case 28:
714 // lookahead expression with fixed base length
715 zzMarkedPos = zzStartRead + 1;
716 { cv = CONS; add("U");
717 }
718 case 49: break;
719 case 4:
720 { add(yytext());
721 }
722 case 50: break;
723 case 46:
724 // lookahead expression with fixed base length
725 zzMarkedPos = zzStartRead + 3;
726 { add("Hic");
727 }
728 case 51: break;
729 case 9:
730 { cv = VOWEL; add("AE");
731 }
732 case 52: break;
733 case 1:
734 { problem = 1; cv = 0; add(yytext());
735 }
736 case 53: break;
737 case 5:
738 { switch (problem) {
739 case 1: return original;
740 default: return normalized;
741 }
742 }
743 case 54: break;
744 case 18:
745 // lookahead expression with fixed base length
746 zzMarkedPos = zzStartRead + 1;
747 { cv = CONS; add("u");
748 }
749 case 55: break;
750 case 21:
751 // lookahead expression with fixed base length
752 zzMarkedPos = zzStartRead + 1;
753 { add("e");
754 }
755 case 56: break;
756 case 29:
757 // lookahead expression with fixed base length
758 zzMarkedPos = zzStartRead + 1;
759 { cv = VOWEL; add(yytext().replaceAll("u", "v").replaceAll("U", "V"));
760 }
761 case 57: break;
762 case 34:
763 // lookahead expression with fixed base length
764 zzMarkedPos = zzStartRead + 2;
765 { add("et");
766 }
767 case 58: break;
768 case 41:
769 // general lookahead, find correct zzMarkedPos
770 { int zzFState = 5;
771 int zzFPos = zzStartRead;
772 if (zzFin.length <= zzBufferL.length) { zzFin = new boolean[zzBufferL.length+1]; }
773 boolean zzFinL[] = zzFin;
774 while (zzFState != -1 && zzFPos < zzMarkedPos) {
775 if ((zzAttrL[zzFState] & 1) == 1) { zzFinL[zzFPos] = true; }
776 zzInput = zzBufferL[zzFPos++];
777 zzFState = zzTransL[ zzRowMapL[zzFState] + zzCMapL[zzInput] ];
778 }
779 if (zzFState != -1 && (zzAttrL[zzFState] & 1) == 1) { zzFinL[zzFPos] = true; }
780
781 zzFState = 6;
782 zzFPos = zzMarkedPos;
783 while (!zzFinL[zzFPos] || (zzAttrL[zzFState] & 1) != 1) {
784 zzInput = zzBufferL[--zzFPos];
785 zzFState = zzTransL[ zzRowMapL[zzFState] + zzCMapL[zzInput] ];
786 };
787 zzMarkedPos = zzFPos;
788 }
789 { cv = VOWEL; add(yytext().replace("ſ", "s"));
790 }
791 case 59: break;
792 case 3:
793 { cv = CONS; add(yytext());
794 }
795 case 60: break;
796 case 27:
797 { cv = VOWEL; add("oi");
798 }
799 case 61: break;
800 case 25:
801 { cv = CONS; add("QU");
802 }
803 case 62: break;
804 case 15:
805 // lookahead expression with fixed base length
806 zzMarkedPos = zzStartRead + 1;
807 { switch(cv) {
808 case VOWEL: add(yytext().replace("u", "v").replace("U", "V")); break;
809 default: cv = VOWEL; add(yytext()); break;
810 }
811 }
812 case 63: break;
813 case 7:
814 { cv = CONS; add("ss");
815 }
816 case 64: break;
817 case 6:
818 { cv = CONS; add("s");
819 }
820 case 65: break;
821 case 22:
822 // lookahead expression with fixed base length
823 zzMarkedPos = zzStartRead + 1;
824 { add("o");
825 }
826 case 66: break;
827 case 33:
828 // lookahead expression with fixed base length
829 zzMarkedPos = zzStartRead + 2;
830 { add("ac");
831 }
832 case 67: break;
833 case 2:
834 { cv = VOWEL; add(yytext());
835 }
836 case 68: break;
837 case 43:
838 // lookahead expression with fixed base length
839 zzMarkedPos = zzStartRead + 3;
840 { add("qui");
841 }
842 case 69: break;
843 case 35:
844 // lookahead expression with fixed base length
845 zzMarkedPos = zzStartRead + 2;
846 { add("er");
847 }
848 case 70: break;
849 case 24:
850 { cv = CONS; add("Qu");
851 }
852 case 71: break;
853 case 30:
854 // lookahead expression with fixed base length
855 zzMarkedPos = zzStartRead + 2;
856 { add("ve");
857 }
858 case 72: break;
859 case 38:
860 // lookahead expression with fixed base length
861 zzMarkedPos = zzStartRead + 2;
862 { add("us");
863 }
864 case 73: break;
865 case 32:
866 // lookahead expression with fixed base length
867 zzMarkedPos = zzStartRead + 2;
868 { add("am");
869 }
870 case 74: break;
871 case 8:
872 { cv = VOWEL; add("ae");
873 }
874 case 75: break;
875 case 11:
876 { switch (problem) {
877 case 1: return "";
878 default: return normalized;
879 }
880 }
881 case 76: break;
882 case 26:
883 { add("ar");
884 }
885 case 77: break;
886 case 45:
887 // lookahead expression with fixed base length
888 zzMarkedPos = zzStartRead + 3;
889 { add("hic");
890 }
891 case 78: break;
892 case 17:
893 { cv = VOWEL; add("uu");
894 }
895 case 79: break;
896 case 40:
897 // lookahead expression with fixed base length
898 zzMarkedPos = zzStartRead + 2;
899 { add("ul");
900 }
901 case 80: break;
902 case 20:
903 // lookahead expression with fixed base length
904 zzMarkedPos = zzStartRead + 1;
905 { add("a");
906 }
907 case 81: break;
908 case 10:
909 { cv = VOWEL; add("oe");
910 }
911 case 82: break;
912 case 16:
913 { cv = VOWEL; add("ui");
914 }
915 case 83: break;
916 case 14:
917 { cv = CONS; add("qu");
918 }
919 case 84: break;
920 case 47:
921 // lookahead expression with fixed base length
922 zzMarkedPos = zzStartRead + 4;
923 { add("que");
924 }
925 case 85: break;
926 case 23:
927 // lookahead expression with fixed base length
928 zzMarkedPos = zzStartRead + 1;
929 { add("u");
930 }
931 case 86: break;
932 case 36:
933 // lookahead expression with fixed base length
934 zzMarkedPos = zzStartRead + 2;
935 { add("es");
936 }
937 case 87: break;
938 case 44:
939 // lookahead expression with fixed base length
940 zzMarkedPos = zzStartRead + 3;
941 { add("Qui");
942 }
943 case 88: break;
944 case 42:
945 // lookahead expression with fixed base length
946 zzMarkedPos = zzStartRead + 1;
947 { add("i");
948 }
949 case 89: break;
950 case 12:
951 { add("X");
952 }
953 case 90: break;
954 case 13:
955 { switch(cv) {
956 case CONS: add(yytext().replace("v", "u").replace("V", "U")); break;
957 default: cv = CONS; add(yytext()); break;
958 }
959 }
960 case 91: break;
961 case 19:
962 { cv = VOWEL; add("ii");
963 }
964 case 92: break;
965 case 31:
966 // lookahead expression with fixed base length
967 zzMarkedPos = zzStartRead + 2;
968 { add("as");
969 }
970 case 93: break;
971 case 37:
972 // lookahead expression with fixed base length
973 zzMarkedPos = zzStartRead + 2;
974 { add("od");
975 }
976 case 94: break;
977 default:
978 if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
979 zzAtEOF = true;
980 return null;
981 }
982 else {
983 zzScanError(ZZ_NO_MATCH);
984 }
985 }
986 }
987 }
988
989
990 }