comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexAR.java @ 9:1ec29fdd0db8

neue .lex Dateien f?r Normalisierung / externe Objekte update
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 22 Feb 2011 16:03:45 +0100
parents
children 5df60f24e997
comparison
equal deleted inserted replaced
8:d2a1c14fde31 9:1ec29fdd0db8
1 /* The following code was generated by JFlex 1.4.3 on 22.02.11 12:02 */
2
3 /*
4 * Normalization rules for Arabic text
5 * [this is a JFlex specification]
6 *
7 * Wolfgang Schmidle
8 * version 0.96
9 * 2011-02-21
10 *
11 */
12
13 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
14
15
16 /**
17 * This class is a scanner generated by
18 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3
19 * on 22.02.11 12:02 from the specification file
20 * <tt>MpdlNormalizerLexAR.lex</tt>
21 */
22 public class MpdlNormalizerLexAR {
23
24 /** This character denotes the end of file */
25 public static final int YYEOF = -1;
26
27 /** initial size of the lookahead buffer */
28 private static final int ZZ_BUFFERSIZE = 16384;
29
30 /** lexical states */
31 public static final int SEARCH = 6;
32 public static final int DICT = 4;
33 public static final int YYINITIAL = 0;
34 public static final int DISP = 2;
35
36 /**
37 * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
38 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
39 * at the beginning of a line
40 * l is of the form l = 2*k, k a non negative integer
41 */
42 private static final int ZZ_LEXSTATE[] = {
43 0, 0, 1, 1, 2, 2, 1, 1
44 };
45
46 /**
47 * Translates characters to character classes
48 */
49 private static final String ZZ_CMAP_PACKED =
50 "\12\0\1\1\65\0\1\2\uffbf\0";
51
52 /**
53 * Translates characters to character classes
54 */
55 private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
56
57 /**
58 * Translates DFA states to action switch labels.
59 */
60 private static final int [] ZZ_ACTION = zzUnpackAction();
61
62 private static final String ZZ_ACTION_PACKED_0 =
63 "\3\0\1\1\1\2\1\3\1\4";
64
65 private static int [] zzUnpackAction() {
66 int [] result = new int[7];
67 int offset = 0;
68 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
69 return result;
70 }
71
72 private static int zzUnpackAction(String packed, int offset, int [] result) {
73 int i = 0; /* index in packed string */
74 int j = offset; /* index in unpacked array */
75 int l = packed.length();
76 while (i < l) {
77 int count = packed.charAt(i++);
78 int value = packed.charAt(i++);
79 do result[j++] = value; while (--count > 0);
80 }
81 return j;
82 }
83
84
85 /**
86 * Translates a state to a row index in the transition table
87 */
88 private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
89
90 private static final String ZZ_ROWMAP_PACKED_0 =
91 "\0\0\0\3\0\6\0\11\0\11\0\11\0\11";
92
93 private static int [] zzUnpackRowMap() {
94 int [] result = new int[7];
95 int offset = 0;
96 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
97 return result;
98 }
99
100 private static int zzUnpackRowMap(String packed, int offset, int [] result) {
101 int i = 0; /* index in packed string */
102 int j = offset; /* index in unpacked array */
103 int l = packed.length();
104 while (i < l) {
105 int high = packed.charAt(i++) << 16;
106 result[j++] = high | packed.charAt(i++);
107 }
108 return j;
109 }
110
111 /**
112 * The transition table of the DFA
113 */
114 private static final int [] ZZ_TRANS = zzUnpackTrans();
115
116 private static final String ZZ_TRANS_PACKED_0 =
117 "\1\4\1\0\1\5\1\4\1\6\1\5\1\4\1\7"+
118 "\1\5\3\0";
119
120 private static int [] zzUnpackTrans() {
121 int [] result = new int[12];
122 int offset = 0;
123 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
124 return result;
125 }
126
127 private static int zzUnpackTrans(String packed, int offset, int [] result) {
128 int i = 0; /* index in packed string */
129 int j = offset; /* index in unpacked array */
130 int l = packed.length();
131 while (i < l) {
132 int count = packed.charAt(i++);
133 int value = packed.charAt(i++);
134 value--;
135 do result[j++] = value; while (--count > 0);
136 }
137 return j;
138 }
139
140
141 /* error codes */
142 private static final int ZZ_UNKNOWN_ERROR = 0;
143 private static final int ZZ_NO_MATCH = 1;
144 private static final int ZZ_PUSHBACK_2BIG = 2;
145
146 /* error messages for the codes above */
147 private static final String ZZ_ERROR_MSG[] = {
148 "Unkown internal scanner error",
149 "Error: could not match input",
150 "Error: pushback value was too large"
151 };
152
153 /**
154 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
155 */
156 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
157
158 private static final String ZZ_ATTRIBUTE_PACKED_0 =
159 "\3\0\4\11";
160
161 private static int [] zzUnpackAttribute() {
162 int [] result = new int[7];
163 int offset = 0;
164 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
165 return result;
166 }
167
168 private static int zzUnpackAttribute(String packed, int offset, int [] result) {
169 int i = 0; /* index in packed string */
170 int j = offset; /* index in unpacked array */
171 int l = packed.length();
172 while (i < l) {
173 int count = packed.charAt(i++);
174 int value = packed.charAt(i++);
175 do result[j++] = value; while (--count > 0);
176 }
177 return j;
178 }
179
180 /** the input device */
181 private java.io.Reader zzReader;
182
183 /** the current state of the DFA */
184 private int zzState;
185
186 /** the current lexical state */
187 private int zzLexicalState = YYINITIAL;
188
189 /** this buffer contains the current text to be matched and is
190 the source of the yytext() string */
191 private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
192
193 /** the textposition at the last accepting state */
194 private int zzMarkedPos;
195
196 /** the current text position in the buffer */
197 private int zzCurrentPos;
198
199 /** startRead marks the beginning of the yytext() string in the buffer */
200 private int zzStartRead;
201
202 /** endRead marks the last character in the buffer, that has been read
203 from input */
204 private int zzEndRead;
205
206 /** number of newlines encountered up to the start of the matched text */
207 private int yyline;
208
209 /** the number of characters up to the start of the matched text */
210 private int yychar;
211
212 /**
213 * the number of characters from the last newline up to the start of the
214 * matched text
215 */
216 private int yycolumn;
217
218 /**
219 * zzAtBOL == true <=> the scanner is currently at the beginning of a line
220 */
221 private boolean zzAtBOL = true;
222
223 /** zzAtEOF == true <=> the scanner is at the EOF */
224 private boolean zzAtEOF;
225
226 /** denotes if the user-EOF-code has already been executed */
227 private boolean zzEOFDone;
228
229 /* user code: */
230 private String original = "";
231 private String normalized = "";
232 private int problem = 0;
233
234 private void add (String norm) {
235 original += yytext();
236 normalized += norm;
237 }
238
239
240 /**
241 * Creates a new scanner
242 * There is also a java.io.InputStream version of this constructor.
243 *
244 * @param in the java.io.Reader to read input from.
245 */
246 public MpdlNormalizerLexAR(java.io.Reader in) {
247 this.zzReader = in;
248 }
249
250 /**
251 * Creates a new scanner.
252 * There is also java.io.Reader version of this constructor.
253 *
254 * @param in the java.io.Inputstream to read input from.
255 */
256 public MpdlNormalizerLexAR(java.io.InputStream in) {
257 this(new java.io.InputStreamReader(in));
258 }
259
260 /**
261 * Unpacks the compressed character translation table.
262 *
263 * @param packed the packed character translation table
264 * @return the unpacked character translation table
265 */
266 private static char [] zzUnpackCMap(String packed) {
267 char [] map = new char[0x10000];
268 int i = 0; /* index in packed string */
269 int j = 0; /* index in unpacked array */
270 while (i < 10) {
271 int count = packed.charAt(i++);
272 char value = packed.charAt(i++);
273 do map[j++] = value; while (--count > 0);
274 }
275 return map;
276 }
277
278
279 /**
280 * Refills the input buffer.
281 *
282 * @return <code>false</code>, iff there was new input.
283 *
284 * @exception java.io.IOException if any I/O-Error occurs
285 */
286 private boolean zzRefill() throws java.io.IOException {
287
288 /* first: make room (if you can) */
289 if (zzStartRead > 0) {
290 System.arraycopy(zzBuffer, zzStartRead,
291 zzBuffer, 0,
292 zzEndRead-zzStartRead);
293
294 /* translate stored positions */
295 zzEndRead-= zzStartRead;
296 zzCurrentPos-= zzStartRead;
297 zzMarkedPos-= zzStartRead;
298 zzStartRead = 0;
299 }
300
301 /* is the buffer big enough? */
302 if (zzCurrentPos >= zzBuffer.length) {
303 /* if not: blow it up */
304 char newBuffer[] = new char[zzCurrentPos*2];
305 System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
306 zzBuffer = newBuffer;
307 }
308
309 /* finally: fill the buffer with new input */
310 int numRead = zzReader.read(zzBuffer, zzEndRead,
311 zzBuffer.length-zzEndRead);
312
313 if (numRead > 0) {
314 zzEndRead+= numRead;
315 return false;
316 }
317 // unlikely but not impossible: read 0 characters, but not at end of stream
318 if (numRead == 0) {
319 int c = zzReader.read();
320 if (c == -1) {
321 return true;
322 } else {
323 zzBuffer[zzEndRead++] = (char) c;
324 return false;
325 }
326 }
327
328 // numRead < 0
329 return true;
330 }
331
332
333 /**
334 * Closes the input stream.
335 */
336 public final void yyclose() throws java.io.IOException {
337 zzAtEOF = true; /* indicate end of file */
338 zzEndRead = zzStartRead; /* invalidate buffer */
339
340 if (zzReader != null)
341 zzReader.close();
342 }
343
344
345 /**
346 * Resets the scanner to read from a new input stream.
347 * Does not close the old reader.
348 *
349 * All internal variables are reset, the old input stream
350 * <b>cannot</b> be reused (internal buffer is discarded and lost).
351 * Lexical state is set to <tt>ZZ_INITIAL</tt>.
352 *
353 * @param reader the new input stream
354 */
355 public final void yyreset(java.io.Reader reader) {
356 zzReader = reader;
357 zzAtBOL = true;
358 zzAtEOF = false;
359 zzEOFDone = false;
360 zzEndRead = zzStartRead = 0;
361 zzCurrentPos = zzMarkedPos = 0;
362 yyline = yychar = yycolumn = 0;
363 zzLexicalState = YYINITIAL;
364 }
365
366
367 /**
368 * Returns the current lexical state.
369 */
370 public final int yystate() {
371 return zzLexicalState;
372 }
373
374
375 /**
376 * Enters a new lexical state
377 *
378 * @param newState the new lexical state
379 */
380 public final void yybegin(int newState) {
381 zzLexicalState = newState;
382 }
383
384
385 /**
386 * Returns the text matched by the current regular expression.
387 */
388 public final String yytext() {
389 return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
390 }
391
392
393 /**
394 * Returns the character at position <tt>pos</tt> from the
395 * matched text.
396 *
397 * It is equivalent to yytext().charAt(pos), but faster
398 *
399 * @param pos the position of the character to fetch.
400 * A value from 0 to yylength()-1.
401 *
402 * @return the character at position pos
403 */
404 public final char yycharat(int pos) {
405 return zzBuffer[zzStartRead+pos];
406 }
407
408
409 /**
410 * Returns the length of the matched text region.
411 */
412 public final int yylength() {
413 return zzMarkedPos-zzStartRead;
414 }
415
416
417 /**
418 * Reports an error that occured while scanning.
419 *
420 * In a wellformed scanner (no or only correct usage of
421 * yypushback(int) and a match-all fallback rule) this method
422 * will only be called with things that "Can't Possibly Happen".
423 * If this method is called, something is seriously wrong
424 * (e.g. a JFlex bug producing a faulty scanner etc.).
425 *
426 * Usual syntax/scanner level error handling should be done
427 * in error fallback rules.
428 *
429 * @param errorCode the code of the errormessage to display
430 */
431 private void zzScanError(int errorCode) {
432 String message;
433 try {
434 message = ZZ_ERROR_MSG[errorCode];
435 }
436 catch (ArrayIndexOutOfBoundsException e) {
437 message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
438 }
439
440 throw new Error(message);
441 }
442
443
444 /**
445 * Pushes the specified amount of characters back into the input stream.
446 *
447 * They will be read again by then next call of the scanning method
448 *
449 * @param number the number of characters to be read again.
450 * This number must not be greater than yylength()!
451 */
452 public void yypushback(int number) {
453 if ( number > yylength() )
454 zzScanError(ZZ_PUSHBACK_2BIG);
455
456 zzMarkedPos -= number;
457 }
458
459
460 /**
461 * Resumes scanning until the next regular expression is matched,
462 * the end of input is encountered or an I/O-Error occurs.
463 *
464 * @return the next token
465 * @exception java.io.IOException if any I/O-Error occurs
466 */
467 public java.lang.String yylex() throws java.io.IOException {
468 int zzInput;
469 int zzAction;
470
471 // cached fields:
472 int zzCurrentPosL;
473 int zzMarkedPosL;
474 int zzEndReadL = zzEndRead;
475 char [] zzBufferL = zzBuffer;
476 char [] zzCMapL = ZZ_CMAP;
477
478 int [] zzTransL = ZZ_TRANS;
479 int [] zzRowMapL = ZZ_ROWMAP;
480 int [] zzAttrL = ZZ_ATTRIBUTE;
481
482 while (true) {
483 zzMarkedPosL = zzMarkedPos;
484
485 zzAction = -1;
486
487 zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
488
489 zzState = ZZ_LEXSTATE[zzLexicalState];
490
491
492 zzForAction: {
493 while (true) {
494
495 if (zzCurrentPosL < zzEndReadL)
496 zzInput = zzBufferL[zzCurrentPosL++];
497 else if (zzAtEOF) {
498 zzInput = YYEOF;
499 break zzForAction;
500 }
501 else {
502 // store back cached positions
503 zzCurrentPos = zzCurrentPosL;
504 zzMarkedPos = zzMarkedPosL;
505 boolean eof = zzRefill();
506 // get translated positions and possibly new buffer
507 zzCurrentPosL = zzCurrentPos;
508 zzMarkedPosL = zzMarkedPos;
509 zzBufferL = zzBuffer;
510 zzEndReadL = zzEndRead;
511 if (eof) {
512 zzInput = YYEOF;
513 break zzForAction;
514 }
515 else {
516 zzInput = zzBufferL[zzCurrentPosL++];
517 }
518 }
519 int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
520 if (zzNext == -1) break zzForAction;
521 zzState = zzNext;
522
523 int zzAttributes = zzAttrL[zzState];
524 if ( (zzAttributes & 1) == 1 ) {
525 zzAction = zzState;
526 zzMarkedPosL = zzCurrentPosL;
527 if ( (zzAttributes & 8) == 8 ) break zzForAction;
528 }
529
530 }
531 }
532
533 // store back cached position
534 zzMarkedPos = zzMarkedPosL;
535
536 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
537 case 4:
538 { switch (problem) {
539 case 1: return "";
540 default: return normalized;
541 }
542 }
543 case 5: break;
544 case 2:
545 { problem = 1; add(yytext());
546 }
547 case 6: break;
548 case 3:
549 { switch (problem) {
550 case 1: return original;
551 default: return normalized;
552 }
553 }
554 case 7: break;
555 case 1:
556 { add(yytext());
557 }
558 case 8: break;
559 default:
560 if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
561 zzAtEOF = true;
562 return null;
563 }
564 else {
565 zzScanError(ZZ_NO_MATCH);
566 }
567 }
568 }
569 }
570
571
572 }