comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexEL.java @ 14:5df60f24e997

diverse Fehlerbehebungen
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Mon, 29 Aug 2011 17:40:02 +0200
parents 1ec29fdd0db8
children 257f67be5c00
comparison
equal deleted inserted replaced
13:469d927b9ca7 14:5df60f24e997
1 /* The following code was generated by JFlex 1.4.3 on 22.02.11 12:03 */ 1 /* The following code was generated by JFlex 1.4.3 on 03.08.11 18:23 */
2 2
3 /* 3 /*
4 * Normalization rules for Greek text 4 * Normalization rules for Greek text
5 * [this is a JFlex specification] 5 * [this is a JFlex specification]
6 * 6 *
7 * Wolfgang Schmidle 7 * Wolfgang Schmidle
8 * version 0.96 8 * version 2011-08-03
9 * 2011-02-21
10 * 9 *
11 */ 10 */
12 11
13 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang; 12 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
14 13
15 14
16 /** 15 /**
17 * This class is a scanner generated by 16 * This class is a scanner generated by
18 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3 17 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3
19 * on 22.02.11 12:03 from the specification file 18 * on 03.08.11 18:23 from the specification file
20 * <tt>MpdlNormalizerLexEL.lex</tt> 19 * <tt>MpdlNormalizerLexEL.lex</tt>
21 */ 20 */
22 public class MpdlNormalizerLexEL { 21 public class MpdlNormalizerLexEL {
23 22
24 /** This character denotes the end of file */ 23 /** This character denotes the end of file */
29 28
30 /** lexical states */ 29 /** lexical states */
31 public static final int SEARCH = 6; 30 public static final int SEARCH = 6;
32 public static final int DICT = 4; 31 public static final int DICT = 4;
33 public static final int YYINITIAL = 0; 32 public static final int YYINITIAL = 0;
33 public static final int SIGMA = 8;
34 public static final int DISP = 2; 34 public static final int DISP = 2;
35 35
36 /** 36 /**
37 * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l 37 * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
38 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l 38 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
39 * at the beginning of a line 39 * at the beginning of a line
40 * l is of the form l = 2*k, k a non negative integer 40 * l is of the form l = 2*k, k a non negative integer
41 */ 41 */
42 private static final int ZZ_LEXSTATE[] = { 42 private static final int ZZ_LEXSTATE[] = {
43 0, 0, 1, 1, 2, 2, 3, 3 43 0, 0, 1, 1, 2, 2, 3, 3, 4, 4
44 }; 44 };
45 45
46 /** 46 /**
47 * Translates characters to character classes 47 * Translates characters to character classes
48 */ 48 */
49 private static final String ZZ_CMAP_PACKED = 49 private static final String ZZ_CMAP_PACKED =
50 "\12\0\1\1\65\0\1\3\32\3\6\0\32\3\u0331\0\1\4\1\5"+ 50 "\12\0\1\3\25\0\1\2\14\0\1\1\2\0\1\1\17\0\1\5"+
51 "\1\6\1\7\15\0\1\2\3\0\2\2\11\0\1\10\1\11\1\12"+ 51 "\32\5\6\0\1\6\2\5\1\6\20\5\1\6\5\5\1\1\1\0"+
52 "\u1ba1\0\1\13\1\0\1\15\1\0\1\16\1\0\1\20\1\0\1\21"+ 52 "\1\1\u032e\0\1\7\1\10\1\11\1\12\15\0\1\4\3\0\1\4"+
53 "\1\0\1\22\1\0\1\23\65\0\1\14\17\0\1\17\57\0\1\24"+ 53 "\1\30\11\0\1\13\1\14\1\15\u1ba1\0\1\16\1\0\1\20\1\0"+
54 "\ue00d\0"; 54 "\1\21\1\0\1\23\1\0\1\24\1\0\1\25\1\0\1\26\65\0"+
55 "\1\17\17\0\1\22\57\0\1\27\ue00d\0";
55 56
56 /** 57 /**
57 * Translates characters to character classes 58 * Translates characters to character classes
58 */ 59 */
59 private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED); 60 private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
62 * Translates DFA states to action switch labels. 63 * Translates DFA states to action switch labels.
63 */ 64 */
64 private static final int [] ZZ_ACTION = zzUnpackAction(); 65 private static final int [] ZZ_ACTION = zzUnpackAction();
65 66
66 private static final String ZZ_ACTION_PACKED_0 = 67 private static final String ZZ_ACTION_PACKED_0 =
67 "\4\0\1\1\1\2\1\3\1\4\1\5\1\6\1\7"+ 68 "\5\0\2\1\2\2\1\3\1\4\1\5\1\6\1\7"+
68 "\1\10\1\11\1\12\1\13\12\1\1\14\1\0\1\15"+ 69 "\1\10\1\11\1\12\1\13\12\1\1\14\1\15\1\16"+
69 "\1\0\1\16\1\0\1\17\1\0\1\20\1\0\1\21"+ 70 "\1\0\1\17\1\0\1\20\1\0\1\21\1\0\1\22"+
70 "\1\0\1\22\1\0\1\23\1\0\1\24\1\0\1\25"+ 71 "\1\0\1\23\1\0\1\24\1\0\1\25\1\0\1\26"+
71 "\1\0"; 72 "\1\0\1\27\1\0";
72 73
73 private static int [] zzUnpackAction() { 74 private static int [] zzUnpackAction() {
74 int [] result = new int[45]; 75 int [] result = new int[50];
75 int offset = 0; 76 int offset = 0;
76 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); 77 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
77 return result; 78 return result;
78 } 79 }
79 80
94 * Translates a state to a row index in the transition table 95 * Translates a state to a row index in the transition table
95 */ 96 */
96 private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); 97 private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
97 98
98 private static final String ZZ_ROWMAP_PACKED_0 = 99 private static final String ZZ_ROWMAP_PACKED_0 =
99 "\0\0\0\25\0\52\0\77\0\124\0\124\0\124\0\124"+ 100 "\0\0\0\31\0\62\0\113\0\144\0\175\0\226\0\175"+
100 "\0\124\0\124\0\124\0\124\0\124\0\124\0\124\0\151"+ 101 "\0\226\0\175\0\175\0\175\0\175\0\175\0\175\0\175"+
101 "\0\176\0\223\0\250\0\275\0\322\0\347\0\374\0\u0111"+ 102 "\0\175\0\175\0\257\0\310\0\341\0\372\0\u0113\0\u012c"+
102 "\0\u0126\0\124\0\u013b\0\124\0\u0150\0\124\0\u0165\0\124"+ 103 "\0\u0145\0\u015e\0\u0177\0\u0190\0\175\0\175\0\175\0\u01a9"+
103 "\0\u017a\0\124\0\u018f\0\124\0\u01a4\0\124\0\u01b9\0\124"+ 104 "\0\175\0\u01c2\0\175\0\u01db\0\175\0\u01f4\0\175\0\u020d"+
104 "\0\u01ce\0\124\0\u01e3\0\124\0\u01f8"; 105 "\0\175\0\u0226\0\175\0\u023f\0\175\0\u0258\0\175\0\u0271"+
106 "\0\175\0\u028a";
105 107
106 private static int [] zzUnpackRowMap() { 108 private static int [] zzUnpackRowMap() {
107 int [] result = new int[45]; 109 int [] result = new int[50];
108 int offset = 0; 110 int offset = 0;
109 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); 111 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
110 return result; 112 return result;
111 } 113 }
112 114
125 * The transition table of the DFA 127 * The transition table of the DFA
126 */ 128 */
127 private static final int [] ZZ_TRANS = zzUnpackTrans(); 129 private static final int [] ZZ_TRANS = zzUnpackTrans();
128 130
129 private static final String ZZ_TRANS_PACKED_0 = 131 private static final String ZZ_TRANS_PACKED_0 =
130 "\1\5\1\0\24\5\1\6\1\5\1\7\1\10\1\11"+ 132 "\1\6\1\7\1\6\1\0\1\6\1\10\1\11\1\12"+
131 "\1\12\1\13\1\14\1\15\1\16\13\5\1\17\1\5"+ 133 "\1\13\1\14\1\15\1\16\1\17\1\20\14\6\1\7"+
132 "\1\7\1\10\1\11\1\12\1\13\1\14\1\15\1\16"+ 134 "\1\6\1\21\1\6\1\10\1\11\1\12\1\13\1\14"+
133 "\1\20\1\21\1\22\1\23\1\24\1\25\1\26\1\27"+ 135 "\1\15\1\16\1\17\1\20\14\6\1\7\1\6\1\22"+
134 "\1\30\1\31\1\5\1\6\1\5\1\7\1\10\1\11"+ 136 "\1\6\1\10\1\11\1\12\1\13\1\14\1\15\1\16"+
135 "\1\12\1\13\1\14\1\15\1\16\1\20\1\21\1\22"+ 137 "\1\17\1\20\1\23\1\24\1\25\1\26\1\27\1\30"+
136 "\1\23\1\24\1\25\1\26\1\27\1\30\1\31\26\0"+ 138 "\1\31\1\32\1\33\1\34\2\6\1\7\1\6\1\35"+
137 "\1\32\1\33\23\0\1\34\1\35\23\0\1\36\1\37"+ 139 "\1\6\1\10\1\11\1\12\1\13\1\14\1\15\1\16"+
138 "\23\0\1\40\1\41\23\0\1\42\1\43\23\0\1\44"+ 140 "\1\17\1\20\1\23\1\24\1\25\1\26\1\27\1\30"+
139 "\1\45\23\0\1\46\1\47\23\0\1\50\1\51\23\0"+ 141 "\1\31\1\32\1\33\1\34\2\6\1\7\1\6\1\22"+
140 "\1\52\1\53\23\0\1\54\1\55\23\0\1\32\24\0"+ 142 "\1\6\1\10\1\11\1\12\1\13\1\14\1\15\1\16"+
141 "\1\34\24\0\1\36\24\0\1\40\24\0\1\42\24\0"+ 143 "\1\17\1\20\1\23\1\24\1\25\1\26\1\27\1\30"+
142 "\1\44\24\0\1\46\24\0\1\50\24\0\1\52\24\0"+ 144 "\1\31\1\32\1\33\1\34\1\36\33\0\1\6\31\0"+
143 "\1\54\23\0"; 145 "\1\37\1\40\23\0\1\40\3\0\1\41\1\42\23\0"+
146 "\1\42\3\0\1\43\1\44\23\0\1\44\3\0\1\45"+
147 "\1\46\23\0\1\46\3\0\1\47\1\50\23\0\1\50"+
148 "\3\0\1\51\1\52\23\0\1\52\3\0\1\53\1\54"+
149 "\23\0\1\54\3\0\1\55\1\56\23\0\1\56\3\0"+
150 "\1\57\1\60\23\0\1\60\3\0\1\61\1\62\23\0"+
151 "\1\62\3\0\1\37\30\0\1\41\30\0\1\43\30\0"+
152 "\1\45\30\0\1\47\30\0\1\51\30\0\1\53\30\0"+
153 "\1\55\30\0\1\57\30\0\1\61\25\0";
144 154
145 private static int [] zzUnpackTrans() { 155 private static int [] zzUnpackTrans() {
146 int [] result = new int[525]; 156 int [] result = new int[675];
147 int offset = 0; 157 int offset = 0;
148 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); 158 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
149 return result; 159 return result;
150 } 160 }
151 161
179 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code> 189 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
180 */ 190 */
181 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); 191 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
182 192
183 private static final String ZZ_ATTRIBUTE_PACKED_0 = 193 private static final String ZZ_ATTRIBUTE_PACKED_0 =
184 "\4\0\13\11\12\1\1\11\1\0\1\11\1\0\1\11"+ 194 "\5\0\1\11\1\1\1\11\1\1\11\11\12\1\3\11"+
185 "\1\0\1\11\1\0\1\11\1\0\1\11\1\0\1\11"+ 195 "\1\0\1\11\1\0\1\11\1\0\1\11\1\0\1\11"+
186 "\1\0\1\11\1\0\1\11\1\0\1\11\1\0"; 196 "\1\0\1\11\1\0\1\11\1\0\1\11\1\0\1\11"+
197 "\1\0\1\11\1\0";
187 198
188 private static int [] zzUnpackAttribute() { 199 private static int [] zzUnpackAttribute() {
189 int [] result = new int[45]; 200 int [] result = new int[50];
190 int offset = 0; 201 int offset = 0;
191 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); 202 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
192 return result; 203 return result;
193 } 204 }
194 205
261 private void add (String norm) { 272 private void add (String norm) {
262 original += yytext(); 273 original += yytext();
263 normalized += norm; 274 normalized += norm;
264 } 275 }
265 276
277 private static final String LB = "[\u002d\u00ad] ";
278
266 279
267 /** 280 /**
268 * Creates a new scanner 281 * Creates a new scanner
269 * There is also a java.io.InputStream version of this constructor. 282 * There is also a java.io.InputStream version of this constructor.
270 * 283 *
292 */ 305 */
293 private static char [] zzUnpackCMap(String packed) { 306 private static char [] zzUnpackCMap(String packed) {
294 char [] map = new char[0x10000]; 307 char [] map = new char[0x10000];
295 int i = 0; /* index in packed string */ 308 int i = 0; /* index in packed string */
296 int j = 0; /* index in unpacked array */ 309 int j = 0; /* index in unpacked array */
297 while (i < 82) { 310 while (i < 112) {
298 int count = packed.charAt(i++); 311 int count = packed.charAt(i++);
299 char value = packed.charAt(i++); 312 char value = packed.charAt(i++);
300 do map[j++] = value; while (--count > 0); 313 do map[j++] = value; while (--count > 0);
301 } 314 }
302 return map; 315 return map;
559 572
560 // store back cached position 573 // store back cached position
561 zzMarkedPos = zzMarkedPosL; 574 zzMarkedPos = zzMarkedPosL;
562 575
563 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { 576 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
564 case 21: 577 case 23:
565 // lookahead expression with fixed base length 578 // lookahead expression with fixed base length
566 zzMarkedPos = zzStartRead + 1; 579 zzMarkedPos = zzStartRead + 1;
567 { add("ῴ"); 580 { add("ῴ");
568 } 581 }
569 case 22: break; 582 case 24: break;
583 case 5:
584 { add("ή");
585 }
586 case 25: break;
587 case 17:
588 // lookahead expression with fixed base length
589 zzMarkedPos = zzStartRead + 1;
590 { add("ή");
591 }
592 case 26: break;
593 case 13:
594 { add("σ");
595 }
596 case 27: break;
570 case 6: 597 case 6:
571 { add("ή");
572 }
573 case 23: break;
574 case 15:
575 // lookahead expression with fixed base length
576 zzMarkedPos = zzStartRead + 1;
577 { add("ή");
578 }
579 case 24: break;
580 case 7:
581 { add("ί"); 598 { add("ί");
582 } 599 }
583 case 25: break; 600 case 28: break;
584 case 1: 601 case 1:
585 { add(yytext()); 602 { add(yytext());
586 } 603 }
587 case 26: break; 604 case 29: break;
605 case 22:
606 // lookahead expression with fixed base length
607 zzMarkedPos = zzStartRead + 1;
608 { add("ώ");
609 }
610 case 30: break;
611 case 11:
612 { switch (problem) {
613 case 1: return "";
614 default: return normalized.replaceAll(LB, "");
615 }
616 }
617 case 31: break;
618 case 19:
619 // lookahead expression with fixed base length
620 zzMarkedPos = zzStartRead + 1;
621 { add("ί");
622 }
623 case 32: break;
624 case 15:
625 // lookahead expression with fixed base length
626 zzMarkedPos = zzStartRead + 1;
627 { add("ᾴ");
628 }
629 case 33: break;
630 case 7:
631 { add("ό");
632 }
633 case 34: break;
634 case 14:
635 // lookahead expression with fixed base length
636 zzMarkedPos = zzStartRead + 1;
637 { add("ά");
638 }
639 case 35: break;
640 case 12:
641 { switch (problem) {
642 case 1: return original;
643 default: return normalized.replaceAll(LB, "").toLowerCase();
644 }
645 }
646 case 36: break;
647 case 8:
648 { add("ύ");
649 }
650 case 37: break;
651 case 2:
652 { problem = 1; add(yytext());
653 }
654 case 38: break;
588 case 20: 655 case 20:
589 // lookahead expression with fixed base length 656 // lookahead expression with fixed base length
590 zzMarkedPos = zzStartRead + 1; 657 zzMarkedPos = zzStartRead + 1;
591 { add("ώ"); 658 { add("ό");
592 } 659 }
593 case 27: break; 660 case 39: break;
594 case 17:
595 // lookahead expression with fixed base length
596 zzMarkedPos = zzStartRead + 1;
597 { add("ί");
598 }
599 case 28: break;
600 case 13:
601 // lookahead expression with fixed base length
602 zzMarkedPos = zzStartRead + 1;
603 { add("ᾴ");
604 }
605 case 29: break;
606 case 8:
607 { add("ό");
608 }
609 case 30: break;
610 case 12:
611 // lookahead expression with fixed base length
612 zzMarkedPos = zzStartRead + 1;
613 { add("ά");
614 }
615 case 31: break;
616 case 9:
617 { add("ύ");
618 }
619 case 32: break;
620 case 3: 661 case 3:
621 { problem = 1; add(yytext());
622 }
623 case 33: break;
624 case 18:
625 // lookahead expression with fixed base length
626 zzMarkedPos = zzStartRead + 1;
627 { add("ό");
628 }
629 case 34: break;
630 case 4:
631 { add("ά"); 662 { add("ά");
632 } 663 }
633 case 35: break; 664 case 40: break;
634 case 2: 665 case 10:
635 { switch (problem) { 666 { switch (problem) {
636 case 1: return original; 667 case 1: return original;
637 default: return normalized; 668 default: return normalized;
638 } 669 }
639 } 670 }
640 case 36: break; 671 case 41: break;
641 case 10: 672 case 9:
642 { add("ώ"); 673 { add("ώ");
643 } 674 }
644 case 37: break; 675 case 42: break;
645 case 14: 676 case 16:
646 // lookahead expression with fixed base length 677 // lookahead expression with fixed base length
647 zzMarkedPos = zzStartRead + 1; 678 zzMarkedPos = zzStartRead + 1;
648 { add("έ"); 679 { add("έ");
649 } 680 }
650 case 38: break; 681 case 43: break;
651 case 16: 682 case 18:
652 // lookahead expression with fixed base length 683 // lookahead expression with fixed base length
653 zzMarkedPos = zzStartRead + 1; 684 zzMarkedPos = zzStartRead + 1;
654 { add("ῄ"); 685 { add("ῄ");
655 } 686 }
656 case 39: break; 687 case 44: break;
657 case 5: 688 case 4:
658 { add("έ"); 689 { add("έ");
659 } 690 }
660 case 40: break; 691 case 45: break;
661 case 11: 692 case 21:
662 { switch (problem) {
663 case 1: return "";
664 default: return normalized;
665 }
666 }
667 case 41: break;
668 case 19:
669 // lookahead expression with fixed base length 693 // lookahead expression with fixed base length
670 zzMarkedPos = zzStartRead + 1; 694 zzMarkedPos = zzStartRead + 1;
671 { add("ύ"); 695 { add("ύ");
672 } 696 }
673 case 42: break; 697 case 46: break;
674 default: 698 default:
675 if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { 699 if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
676 zzAtEOF = true; 700 zzAtEOF = true;
677 return null; 701 return null;
678 } 702 }