comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexEL.java @ 9:1ec29fdd0db8

neue .lex Dateien f?r Normalisierung / externe Objekte update
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 22 Feb 2011 16:03:45 +0100
parents
children 5df60f24e997
comparison
equal deleted inserted replaced
8:d2a1c14fde31 9:1ec29fdd0db8
1 /* The following code was generated by JFlex 1.4.3 on 22.02.11 12:03 */
2
3 /*
4 * Normalization rules for Greek text
5 * [this is a JFlex specification]
6 *
7 * Wolfgang Schmidle
8 * version 0.96
9 * 2011-02-21
10 *
11 */
12
13 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
14
15
16 /**
17 * This class is a scanner generated by
18 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3
19 * on 22.02.11 12:03 from the specification file
20 * <tt>MpdlNormalizerLexEL.lex</tt>
21 */
22 public class MpdlNormalizerLexEL {
23
24 /** This character denotes the end of file */
25 public static final int YYEOF = -1;
26
27 /** initial size of the lookahead buffer */
28 private static final int ZZ_BUFFERSIZE = 16384;
29
30 /** lexical states */
31 public static final int SEARCH = 6;
32 public static final int DICT = 4;
33 public static final int YYINITIAL = 0;
34 public static final int DISP = 2;
35
36 /**
37 * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
38 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
39 * at the beginning of a line
40 * l is of the form l = 2*k, k a non negative integer
41 */
42 private static final int ZZ_LEXSTATE[] = {
43 0, 0, 1, 1, 2, 2, 3, 3
44 };
45
46 /**
47 * Translates characters to character classes
48 */
49 private static final String ZZ_CMAP_PACKED =
50 "\12\0\1\1\65\0\1\3\32\3\6\0\32\3\u0331\0\1\4\1\5"+
51 "\1\6\1\7\15\0\1\2\3\0\2\2\11\0\1\10\1\11\1\12"+
52 "\u1ba1\0\1\13\1\0\1\15\1\0\1\16\1\0\1\20\1\0\1\21"+
53 "\1\0\1\22\1\0\1\23\65\0\1\14\17\0\1\17\57\0\1\24"+
54 "\ue00d\0";
55
56 /**
57 * Translates characters to character classes
58 */
59 private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
60
61 /**
62 * Translates DFA states to action switch labels.
63 */
64 private static final int [] ZZ_ACTION = zzUnpackAction();
65
66 private static final String ZZ_ACTION_PACKED_0 =
67 "\4\0\1\1\1\2\1\3\1\4\1\5\1\6\1\7"+
68 "\1\10\1\11\1\12\1\13\12\1\1\14\1\0\1\15"+
69 "\1\0\1\16\1\0\1\17\1\0\1\20\1\0\1\21"+
70 "\1\0\1\22\1\0\1\23\1\0\1\24\1\0\1\25"+
71 "\1\0";
72
73 private static int [] zzUnpackAction() {
74 int [] result = new int[45];
75 int offset = 0;
76 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
77 return result;
78 }
79
80 private static int zzUnpackAction(String packed, int offset, int [] result) {
81 int i = 0; /* index in packed string */
82 int j = offset; /* index in unpacked array */
83 int l = packed.length();
84 while (i < l) {
85 int count = packed.charAt(i++);
86 int value = packed.charAt(i++);
87 do result[j++] = value; while (--count > 0);
88 }
89 return j;
90 }
91
92
93 /**
94 * Translates a state to a row index in the transition table
95 */
96 private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
97
98 private static final String ZZ_ROWMAP_PACKED_0 =
99 "\0\0\0\25\0\52\0\77\0\124\0\124\0\124\0\124"+
100 "\0\124\0\124\0\124\0\124\0\124\0\124\0\124\0\151"+
101 "\0\176\0\223\0\250\0\275\0\322\0\347\0\374\0\u0111"+
102 "\0\u0126\0\124\0\u013b\0\124\0\u0150\0\124\0\u0165\0\124"+
103 "\0\u017a\0\124\0\u018f\0\124\0\u01a4\0\124\0\u01b9\0\124"+
104 "\0\u01ce\0\124\0\u01e3\0\124\0\u01f8";
105
106 private static int [] zzUnpackRowMap() {
107 int [] result = new int[45];
108 int offset = 0;
109 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
110 return result;
111 }
112
113 private static int zzUnpackRowMap(String packed, int offset, int [] result) {
114 int i = 0; /* index in packed string */
115 int j = offset; /* index in unpacked array */
116 int l = packed.length();
117 while (i < l) {
118 int high = packed.charAt(i++) << 16;
119 result[j++] = high | packed.charAt(i++);
120 }
121 return j;
122 }
123
124 /**
125 * The transition table of the DFA
126 */
127 private static final int [] ZZ_TRANS = zzUnpackTrans();
128
129 private static final String ZZ_TRANS_PACKED_0 =
130 "\1\5\1\0\24\5\1\6\1\5\1\7\1\10\1\11"+
131 "\1\12\1\13\1\14\1\15\1\16\13\5\1\17\1\5"+
132 "\1\7\1\10\1\11\1\12\1\13\1\14\1\15\1\16"+
133 "\1\20\1\21\1\22\1\23\1\24\1\25\1\26\1\27"+
134 "\1\30\1\31\1\5\1\6\1\5\1\7\1\10\1\11"+
135 "\1\12\1\13\1\14\1\15\1\16\1\20\1\21\1\22"+
136 "\1\23\1\24\1\25\1\26\1\27\1\30\1\31\26\0"+
137 "\1\32\1\33\23\0\1\34\1\35\23\0\1\36\1\37"+
138 "\23\0\1\40\1\41\23\0\1\42\1\43\23\0\1\44"+
139 "\1\45\23\0\1\46\1\47\23\0\1\50\1\51\23\0"+
140 "\1\52\1\53\23\0\1\54\1\55\23\0\1\32\24\0"+
141 "\1\34\24\0\1\36\24\0\1\40\24\0\1\42\24\0"+
142 "\1\44\24\0\1\46\24\0\1\50\24\0\1\52\24\0"+
143 "\1\54\23\0";
144
145 private static int [] zzUnpackTrans() {
146 int [] result = new int[525];
147 int offset = 0;
148 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
149 return result;
150 }
151
152 private static int zzUnpackTrans(String packed, int offset, int [] result) {
153 int i = 0; /* index in packed string */
154 int j = offset; /* index in unpacked array */
155 int l = packed.length();
156 while (i < l) {
157 int count = packed.charAt(i++);
158 int value = packed.charAt(i++);
159 value--;
160 do result[j++] = value; while (--count > 0);
161 }
162 return j;
163 }
164
165
166 /* error codes */
167 private static final int ZZ_UNKNOWN_ERROR = 0;
168 private static final int ZZ_NO_MATCH = 1;
169 private static final int ZZ_PUSHBACK_2BIG = 2;
170
171 /* error messages for the codes above */
172 private static final String ZZ_ERROR_MSG[] = {
173 "Unkown internal scanner error",
174 "Error: could not match input",
175 "Error: pushback value was too large"
176 };
177
178 /**
179 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
180 */
181 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
182
183 private static final String ZZ_ATTRIBUTE_PACKED_0 =
184 "\4\0\13\11\12\1\1\11\1\0\1\11\1\0\1\11"+
185 "\1\0\1\11\1\0\1\11\1\0\1\11\1\0\1\11"+
186 "\1\0\1\11\1\0\1\11\1\0\1\11\1\0";
187
188 private static int [] zzUnpackAttribute() {
189 int [] result = new int[45];
190 int offset = 0;
191 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
192 return result;
193 }
194
195 private static int zzUnpackAttribute(String packed, int offset, int [] result) {
196 int i = 0; /* index in packed string */
197 int j = offset; /* index in unpacked array */
198 int l = packed.length();
199 while (i < l) {
200 int count = packed.charAt(i++);
201 int value = packed.charAt(i++);
202 do result[j++] = value; while (--count > 0);
203 }
204 return j;
205 }
206
207 /** the input device */
208 private java.io.Reader zzReader;
209
210 /** the current state of the DFA */
211 private int zzState;
212
213 /** the current lexical state */
214 private int zzLexicalState = YYINITIAL;
215
216 /** this buffer contains the current text to be matched and is
217 the source of the yytext() string */
218 private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
219
220 /** the textposition at the last accepting state */
221 private int zzMarkedPos;
222
223 /** the current text position in the buffer */
224 private int zzCurrentPos;
225
226 /** startRead marks the beginning of the yytext() string in the buffer */
227 private int zzStartRead;
228
229 /** endRead marks the last character in the buffer, that has been read
230 from input */
231 private int zzEndRead;
232
233 /** number of newlines encountered up to the start of the matched text */
234 private int yyline;
235
236 /** the number of characters up to the start of the matched text */
237 private int yychar;
238
239 /**
240 * the number of characters from the last newline up to the start of the
241 * matched text
242 */
243 private int yycolumn;
244
245 /**
246 * zzAtBOL == true <=> the scanner is currently at the beginning of a line
247 */
248 private boolean zzAtBOL = true;
249
250 /** zzAtEOF == true <=> the scanner is at the EOF */
251 private boolean zzAtEOF;
252
253 /** denotes if the user-EOF-code has already been executed */
254 private boolean zzEOFDone;
255
256 /* user code: */
257 private String original = "";
258 private String normalized = "";
259 private int problem = 0;
260
261 private void add (String norm) {
262 original += yytext();
263 normalized += norm;
264 }
265
266
267 /**
268 * Creates a new scanner
269 * There is also a java.io.InputStream version of this constructor.
270 *
271 * @param in the java.io.Reader to read input from.
272 */
273 public MpdlNormalizerLexEL(java.io.Reader in) {
274 this.zzReader = in;
275 }
276
277 /**
278 * Creates a new scanner.
279 * There is also java.io.Reader version of this constructor.
280 *
281 * @param in the java.io.Inputstream to read input from.
282 */
283 public MpdlNormalizerLexEL(java.io.InputStream in) {
284 this(new java.io.InputStreamReader(in));
285 }
286
287 /**
288 * Unpacks the compressed character translation table.
289 *
290 * @param packed the packed character translation table
291 * @return the unpacked character translation table
292 */
293 private static char [] zzUnpackCMap(String packed) {
294 char [] map = new char[0x10000];
295 int i = 0; /* index in packed string */
296 int j = 0; /* index in unpacked array */
297 while (i < 82) {
298 int count = packed.charAt(i++);
299 char value = packed.charAt(i++);
300 do map[j++] = value; while (--count > 0);
301 }
302 return map;
303 }
304
305
306 /**
307 * Refills the input buffer.
308 *
309 * @return <code>false</code>, iff there was new input.
310 *
311 * @exception java.io.IOException if any I/O-Error occurs
312 */
313 private boolean zzRefill() throws java.io.IOException {
314
315 /* first: make room (if you can) */
316 if (zzStartRead > 0) {
317 System.arraycopy(zzBuffer, zzStartRead,
318 zzBuffer, 0,
319 zzEndRead-zzStartRead);
320
321 /* translate stored positions */
322 zzEndRead-= zzStartRead;
323 zzCurrentPos-= zzStartRead;
324 zzMarkedPos-= zzStartRead;
325 zzStartRead = 0;
326 }
327
328 /* is the buffer big enough? */
329 if (zzCurrentPos >= zzBuffer.length) {
330 /* if not: blow it up */
331 char newBuffer[] = new char[zzCurrentPos*2];
332 System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
333 zzBuffer = newBuffer;
334 }
335
336 /* finally: fill the buffer with new input */
337 int numRead = zzReader.read(zzBuffer, zzEndRead,
338 zzBuffer.length-zzEndRead);
339
340 if (numRead > 0) {
341 zzEndRead+= numRead;
342 return false;
343 }
344 // unlikely but not impossible: read 0 characters, but not at end of stream
345 if (numRead == 0) {
346 int c = zzReader.read();
347 if (c == -1) {
348 return true;
349 } else {
350 zzBuffer[zzEndRead++] = (char) c;
351 return false;
352 }
353 }
354
355 // numRead < 0
356 return true;
357 }
358
359
360 /**
361 * Closes the input stream.
362 */
363 public final void yyclose() throws java.io.IOException {
364 zzAtEOF = true; /* indicate end of file */
365 zzEndRead = zzStartRead; /* invalidate buffer */
366
367 if (zzReader != null)
368 zzReader.close();
369 }
370
371
372 /**
373 * Resets the scanner to read from a new input stream.
374 * Does not close the old reader.
375 *
376 * All internal variables are reset, the old input stream
377 * <b>cannot</b> be reused (internal buffer is discarded and lost).
378 * Lexical state is set to <tt>ZZ_INITIAL</tt>.
379 *
380 * @param reader the new input stream
381 */
382 public final void yyreset(java.io.Reader reader) {
383 zzReader = reader;
384 zzAtBOL = true;
385 zzAtEOF = false;
386 zzEOFDone = false;
387 zzEndRead = zzStartRead = 0;
388 zzCurrentPos = zzMarkedPos = 0;
389 yyline = yychar = yycolumn = 0;
390 zzLexicalState = YYINITIAL;
391 }
392
393
394 /**
395 * Returns the current lexical state.
396 */
397 public final int yystate() {
398 return zzLexicalState;
399 }
400
401
402 /**
403 * Enters a new lexical state
404 *
405 * @param newState the new lexical state
406 */
407 public final void yybegin(int newState) {
408 zzLexicalState = newState;
409 }
410
411
412 /**
413 * Returns the text matched by the current regular expression.
414 */
415 public final String yytext() {
416 return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
417 }
418
419
420 /**
421 * Returns the character at position <tt>pos</tt> from the
422 * matched text.
423 *
424 * It is equivalent to yytext().charAt(pos), but faster
425 *
426 * @param pos the position of the character to fetch.
427 * A value from 0 to yylength()-1.
428 *
429 * @return the character at position pos
430 */
431 public final char yycharat(int pos) {
432 return zzBuffer[zzStartRead+pos];
433 }
434
435
436 /**
437 * Returns the length of the matched text region.
438 */
439 public final int yylength() {
440 return zzMarkedPos-zzStartRead;
441 }
442
443
444 /**
445 * Reports an error that occured while scanning.
446 *
447 * In a wellformed scanner (no or only correct usage of
448 * yypushback(int) and a match-all fallback rule) this method
449 * will only be called with things that "Can't Possibly Happen".
450 * If this method is called, something is seriously wrong
451 * (e.g. a JFlex bug producing a faulty scanner etc.).
452 *
453 * Usual syntax/scanner level error handling should be done
454 * in error fallback rules.
455 *
456 * @param errorCode the code of the errormessage to display
457 */
458 private void zzScanError(int errorCode) {
459 String message;
460 try {
461 message = ZZ_ERROR_MSG[errorCode];
462 }
463 catch (ArrayIndexOutOfBoundsException e) {
464 message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
465 }
466
467 throw new Error(message);
468 }
469
470
471 /**
472 * Pushes the specified amount of characters back into the input stream.
473 *
474 * They will be read again by then next call of the scanning method
475 *
476 * @param number the number of characters to be read again.
477 * This number must not be greater than yylength()!
478 */
479 public void yypushback(int number) {
480 if ( number > yylength() )
481 zzScanError(ZZ_PUSHBACK_2BIG);
482
483 zzMarkedPos -= number;
484 }
485
486
487 /**
488 * Resumes scanning until the next regular expression is matched,
489 * the end of input is encountered or an I/O-Error occurs.
490 *
491 * @return the next token
492 * @exception java.io.IOException if any I/O-Error occurs
493 */
494 public java.lang.String yylex() throws java.io.IOException {
495 int zzInput;
496 int zzAction;
497
498 // cached fields:
499 int zzCurrentPosL;
500 int zzMarkedPosL;
501 int zzEndReadL = zzEndRead;
502 char [] zzBufferL = zzBuffer;
503 char [] zzCMapL = ZZ_CMAP;
504
505 int [] zzTransL = ZZ_TRANS;
506 int [] zzRowMapL = ZZ_ROWMAP;
507 int [] zzAttrL = ZZ_ATTRIBUTE;
508
509 while (true) {
510 zzMarkedPosL = zzMarkedPos;
511
512 zzAction = -1;
513
514 zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
515
516 zzState = ZZ_LEXSTATE[zzLexicalState];
517
518
519 zzForAction: {
520 while (true) {
521
522 if (zzCurrentPosL < zzEndReadL)
523 zzInput = zzBufferL[zzCurrentPosL++];
524 else if (zzAtEOF) {
525 zzInput = YYEOF;
526 break zzForAction;
527 }
528 else {
529 // store back cached positions
530 zzCurrentPos = zzCurrentPosL;
531 zzMarkedPos = zzMarkedPosL;
532 boolean eof = zzRefill();
533 // get translated positions and possibly new buffer
534 zzCurrentPosL = zzCurrentPos;
535 zzMarkedPosL = zzMarkedPos;
536 zzBufferL = zzBuffer;
537 zzEndReadL = zzEndRead;
538 if (eof) {
539 zzInput = YYEOF;
540 break zzForAction;
541 }
542 else {
543 zzInput = zzBufferL[zzCurrentPosL++];
544 }
545 }
546 int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
547 if (zzNext == -1) break zzForAction;
548 zzState = zzNext;
549
550 int zzAttributes = zzAttrL[zzState];
551 if ( (zzAttributes & 1) == 1 ) {
552 zzAction = zzState;
553 zzMarkedPosL = zzCurrentPosL;
554 if ( (zzAttributes & 8) == 8 ) break zzForAction;
555 }
556
557 }
558 }
559
560 // store back cached position
561 zzMarkedPos = zzMarkedPosL;
562
563 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
564 case 21:
565 // lookahead expression with fixed base length
566 zzMarkedPos = zzStartRead + 1;
567 { add("ῴ");
568 }
569 case 22: break;
570 case 6:
571 { add("ή");
572 }
573 case 23: break;
574 case 15:
575 // lookahead expression with fixed base length
576 zzMarkedPos = zzStartRead + 1;
577 { add("ή");
578 }
579 case 24: break;
580 case 7:
581 { add("ί");
582 }
583 case 25: break;
584 case 1:
585 { add(yytext());
586 }
587 case 26: break;
588 case 20:
589 // lookahead expression with fixed base length
590 zzMarkedPos = zzStartRead + 1;
591 { add("ώ");
592 }
593 case 27: break;
594 case 17:
595 // lookahead expression with fixed base length
596 zzMarkedPos = zzStartRead + 1;
597 { add("ί");
598 }
599 case 28: break;
600 case 13:
601 // lookahead expression with fixed base length
602 zzMarkedPos = zzStartRead + 1;
603 { add("ᾴ");
604 }
605 case 29: break;
606 case 8:
607 { add("ό");
608 }
609 case 30: break;
610 case 12:
611 // lookahead expression with fixed base length
612 zzMarkedPos = zzStartRead + 1;
613 { add("ά");
614 }
615 case 31: break;
616 case 9:
617 { add("ύ");
618 }
619 case 32: break;
620 case 3:
621 { problem = 1; add(yytext());
622 }
623 case 33: break;
624 case 18:
625 // lookahead expression with fixed base length
626 zzMarkedPos = zzStartRead + 1;
627 { add("ό");
628 }
629 case 34: break;
630 case 4:
631 { add("ά");
632 }
633 case 35: break;
634 case 2:
635 { switch (problem) {
636 case 1: return original;
637 default: return normalized;
638 }
639 }
640 case 36: break;
641 case 10:
642 { add("ώ");
643 }
644 case 37: break;
645 case 14:
646 // lookahead expression with fixed base length
647 zzMarkedPos = zzStartRead + 1;
648 { add("έ");
649 }
650 case 38: break;
651 case 16:
652 // lookahead expression with fixed base length
653 zzMarkedPos = zzStartRead + 1;
654 { add("ῄ");
655 }
656 case 39: break;
657 case 5:
658 { add("έ");
659 }
660 case 40: break;
661 case 11:
662 { switch (problem) {
663 case 1: return "";
664 default: return normalized;
665 }
666 }
667 case 41: break;
668 case 19:
669 // lookahead expression with fixed base length
670 zzMarkedPos = zzStartRead + 1;
671 { add("ύ");
672 }
673 case 42: break;
674 default:
675 if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
676 zzAtEOF = true;
677 return null;
678 }
679 else {
680 zzScanError(ZZ_NO_MATCH);
681 }
682 }
683 }
684 }
685
686
687 }