Mercurial > hg > mpdl-group
comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexEL.java @ 14:5df60f24e997
diverse Fehlerbehebungen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Mon, 29 Aug 2011 17:40:02 +0200 |
parents | 1ec29fdd0db8 |
children | 257f67be5c00 |
comparison
equal
deleted
inserted
replaced
13:469d927b9ca7 | 14:5df60f24e997 |
---|---|
1 /* The following code was generated by JFlex 1.4.3 on 22.02.11 12:03 */ | 1 /* The following code was generated by JFlex 1.4.3 on 03.08.11 18:23 */ |
2 | 2 |
3 /* | 3 /* |
4 * Normalization rules for Greek text | 4 * Normalization rules for Greek text |
5 * [this is a JFlex specification] | 5 * [this is a JFlex specification] |
6 * | 6 * |
7 * Wolfgang Schmidle | 7 * Wolfgang Schmidle |
8 * version 0.96 | 8 * version 2011-08-03 |
9 * 2011-02-21 | |
10 * | 9 * |
11 */ | 10 */ |
12 | 11 |
13 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang; | 12 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang; |
14 | 13 |
15 | 14 |
16 /** | 15 /** |
17 * This class is a scanner generated by | 16 * This class is a scanner generated by |
18 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3 | 17 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3 |
19 * on 22.02.11 12:03 from the specification file | 18 * on 03.08.11 18:23 from the specification file |
20 * <tt>MpdlNormalizerLexEL.lex</tt> | 19 * <tt>MpdlNormalizerLexEL.lex</tt> |
21 */ | 20 */ |
22 public class MpdlNormalizerLexEL { | 21 public class MpdlNormalizerLexEL { |
23 | 22 |
24 /** This character denotes the end of file */ | 23 /** This character denotes the end of file */ |
29 | 28 |
30 /** lexical states */ | 29 /** lexical states */ |
31 public static final int SEARCH = 6; | 30 public static final int SEARCH = 6; |
32 public static final int DICT = 4; | 31 public static final int DICT = 4; |
33 public static final int YYINITIAL = 0; | 32 public static final int YYINITIAL = 0; |
33 public static final int SIGMA = 8; | |
34 public static final int DISP = 2; | 34 public static final int DISP = 2; |
35 | 35 |
36 /** | 36 /** |
37 * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l | 37 * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l |
38 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l | 38 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l |
39 * at the beginning of a line | 39 * at the beginning of a line |
40 * l is of the form l = 2*k, k a non negative integer | 40 * l is of the form l = 2*k, k a non negative integer |
41 */ | 41 */ |
42 private static final int ZZ_LEXSTATE[] = { | 42 private static final int ZZ_LEXSTATE[] = { |
43 0, 0, 1, 1, 2, 2, 3, 3 | 43 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 |
44 }; | 44 }; |
45 | 45 |
46 /** | 46 /** |
47 * Translates characters to character classes | 47 * Translates characters to character classes |
48 */ | 48 */ |
49 private static final String ZZ_CMAP_PACKED = | 49 private static final String ZZ_CMAP_PACKED = |
50 "\12\0\1\1\65\0\1\3\32\3\6\0\32\3\u0331\0\1\4\1\5"+ | 50 "\12\0\1\3\25\0\1\2\14\0\1\1\2\0\1\1\17\0\1\5"+ |
51 "\1\6\1\7\15\0\1\2\3\0\2\2\11\0\1\10\1\11\1\12"+ | 51 "\32\5\6\0\1\6\2\5\1\6\20\5\1\6\5\5\1\1\1\0"+ |
52 "\u1ba1\0\1\13\1\0\1\15\1\0\1\16\1\0\1\20\1\0\1\21"+ | 52 "\1\1\u032e\0\1\7\1\10\1\11\1\12\15\0\1\4\3\0\1\4"+ |
53 "\1\0\1\22\1\0\1\23\65\0\1\14\17\0\1\17\57\0\1\24"+ | 53 "\1\30\11\0\1\13\1\14\1\15\u1ba1\0\1\16\1\0\1\20\1\0"+ |
54 "\ue00d\0"; | 54 "\1\21\1\0\1\23\1\0\1\24\1\0\1\25\1\0\1\26\65\0"+ |
55 "\1\17\17\0\1\22\57\0\1\27\ue00d\0"; | |
55 | 56 |
56 /** | 57 /** |
57 * Translates characters to character classes | 58 * Translates characters to character classes |
58 */ | 59 */ |
59 private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED); | 60 private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED); |
62 * Translates DFA states to action switch labels. | 63 * Translates DFA states to action switch labels. |
63 */ | 64 */ |
64 private static final int [] ZZ_ACTION = zzUnpackAction(); | 65 private static final int [] ZZ_ACTION = zzUnpackAction(); |
65 | 66 |
66 private static final String ZZ_ACTION_PACKED_0 = | 67 private static final String ZZ_ACTION_PACKED_0 = |
67 "\4\0\1\1\1\2\1\3\1\4\1\5\1\6\1\7"+ | 68 "\5\0\2\1\2\2\1\3\1\4\1\5\1\6\1\7"+ |
68 "\1\10\1\11\1\12\1\13\12\1\1\14\1\0\1\15"+ | 69 "\1\10\1\11\1\12\1\13\12\1\1\14\1\15\1\16"+ |
69 "\1\0\1\16\1\0\1\17\1\0\1\20\1\0\1\21"+ | 70 "\1\0\1\17\1\0\1\20\1\0\1\21\1\0\1\22"+ |
70 "\1\0\1\22\1\0\1\23\1\0\1\24\1\0\1\25"+ | 71 "\1\0\1\23\1\0\1\24\1\0\1\25\1\0\1\26"+ |
71 "\1\0"; | 72 "\1\0\1\27\1\0"; |
72 | 73 |
73 private static int [] zzUnpackAction() { | 74 private static int [] zzUnpackAction() { |
74 int [] result = new int[45]; | 75 int [] result = new int[50]; |
75 int offset = 0; | 76 int offset = 0; |
76 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); | 77 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); |
77 return result; | 78 return result; |
78 } | 79 } |
79 | 80 |
94 * Translates a state to a row index in the transition table | 95 * Translates a state to a row index in the transition table |
95 */ | 96 */ |
96 private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); | 97 private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); |
97 | 98 |
98 private static final String ZZ_ROWMAP_PACKED_0 = | 99 private static final String ZZ_ROWMAP_PACKED_0 = |
99 "\0\0\0\25\0\52\0\77\0\124\0\124\0\124\0\124"+ | 100 "\0\0\0\31\0\62\0\113\0\144\0\175\0\226\0\175"+ |
100 "\0\124\0\124\0\124\0\124\0\124\0\124\0\124\0\151"+ | 101 "\0\226\0\175\0\175\0\175\0\175\0\175\0\175\0\175"+ |
101 "\0\176\0\223\0\250\0\275\0\322\0\347\0\374\0\u0111"+ | 102 "\0\175\0\175\0\257\0\310\0\341\0\372\0\u0113\0\u012c"+ |
102 "\0\u0126\0\124\0\u013b\0\124\0\u0150\0\124\0\u0165\0\124"+ | 103 "\0\u0145\0\u015e\0\u0177\0\u0190\0\175\0\175\0\175\0\u01a9"+ |
103 "\0\u017a\0\124\0\u018f\0\124\0\u01a4\0\124\0\u01b9\0\124"+ | 104 "\0\175\0\u01c2\0\175\0\u01db\0\175\0\u01f4\0\175\0\u020d"+ |
104 "\0\u01ce\0\124\0\u01e3\0\124\0\u01f8"; | 105 "\0\175\0\u0226\0\175\0\u023f\0\175\0\u0258\0\175\0\u0271"+ |
106 "\0\175\0\u028a"; | |
105 | 107 |
106 private static int [] zzUnpackRowMap() { | 108 private static int [] zzUnpackRowMap() { |
107 int [] result = new int[45]; | 109 int [] result = new int[50]; |
108 int offset = 0; | 110 int offset = 0; |
109 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); | 111 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); |
110 return result; | 112 return result; |
111 } | 113 } |
112 | 114 |
125 * The transition table of the DFA | 127 * The transition table of the DFA |
126 */ | 128 */ |
127 private static final int [] ZZ_TRANS = zzUnpackTrans(); | 129 private static final int [] ZZ_TRANS = zzUnpackTrans(); |
128 | 130 |
129 private static final String ZZ_TRANS_PACKED_0 = | 131 private static final String ZZ_TRANS_PACKED_0 = |
130 "\1\5\1\0\24\5\1\6\1\5\1\7\1\10\1\11"+ | 132 "\1\6\1\7\1\6\1\0\1\6\1\10\1\11\1\12"+ |
131 "\1\12\1\13\1\14\1\15\1\16\13\5\1\17\1\5"+ | 133 "\1\13\1\14\1\15\1\16\1\17\1\20\14\6\1\7"+ |
132 "\1\7\1\10\1\11\1\12\1\13\1\14\1\15\1\16"+ | 134 "\1\6\1\21\1\6\1\10\1\11\1\12\1\13\1\14"+ |
133 "\1\20\1\21\1\22\1\23\1\24\1\25\1\26\1\27"+ | 135 "\1\15\1\16\1\17\1\20\14\6\1\7\1\6\1\22"+ |
134 "\1\30\1\31\1\5\1\6\1\5\1\7\1\10\1\11"+ | 136 "\1\6\1\10\1\11\1\12\1\13\1\14\1\15\1\16"+ |
135 "\1\12\1\13\1\14\1\15\1\16\1\20\1\21\1\22"+ | 137 "\1\17\1\20\1\23\1\24\1\25\1\26\1\27\1\30"+ |
136 "\1\23\1\24\1\25\1\26\1\27\1\30\1\31\26\0"+ | 138 "\1\31\1\32\1\33\1\34\2\6\1\7\1\6\1\35"+ |
137 "\1\32\1\33\23\0\1\34\1\35\23\0\1\36\1\37"+ | 139 "\1\6\1\10\1\11\1\12\1\13\1\14\1\15\1\16"+ |
138 "\23\0\1\40\1\41\23\0\1\42\1\43\23\0\1\44"+ | 140 "\1\17\1\20\1\23\1\24\1\25\1\26\1\27\1\30"+ |
139 "\1\45\23\0\1\46\1\47\23\0\1\50\1\51\23\0"+ | 141 "\1\31\1\32\1\33\1\34\2\6\1\7\1\6\1\22"+ |
140 "\1\52\1\53\23\0\1\54\1\55\23\0\1\32\24\0"+ | 142 "\1\6\1\10\1\11\1\12\1\13\1\14\1\15\1\16"+ |
141 "\1\34\24\0\1\36\24\0\1\40\24\0\1\42\24\0"+ | 143 "\1\17\1\20\1\23\1\24\1\25\1\26\1\27\1\30"+ |
142 "\1\44\24\0\1\46\24\0\1\50\24\0\1\52\24\0"+ | 144 "\1\31\1\32\1\33\1\34\1\36\33\0\1\6\31\0"+ |
143 "\1\54\23\0"; | 145 "\1\37\1\40\23\0\1\40\3\0\1\41\1\42\23\0"+ |
146 "\1\42\3\0\1\43\1\44\23\0\1\44\3\0\1\45"+ | |
147 "\1\46\23\0\1\46\3\0\1\47\1\50\23\0\1\50"+ | |
148 "\3\0\1\51\1\52\23\0\1\52\3\0\1\53\1\54"+ | |
149 "\23\0\1\54\3\0\1\55\1\56\23\0\1\56\3\0"+ | |
150 "\1\57\1\60\23\0\1\60\3\0\1\61\1\62\23\0"+ | |
151 "\1\62\3\0\1\37\30\0\1\41\30\0\1\43\30\0"+ | |
152 "\1\45\30\0\1\47\30\0\1\51\30\0\1\53\30\0"+ | |
153 "\1\55\30\0\1\57\30\0\1\61\25\0"; | |
144 | 154 |
145 private static int [] zzUnpackTrans() { | 155 private static int [] zzUnpackTrans() { |
146 int [] result = new int[525]; | 156 int [] result = new int[675]; |
147 int offset = 0; | 157 int offset = 0; |
148 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); | 158 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); |
149 return result; | 159 return result; |
150 } | 160 } |
151 | 161 |
179 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code> | 189 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code> |
180 */ | 190 */ |
181 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); | 191 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); |
182 | 192 |
183 private static final String ZZ_ATTRIBUTE_PACKED_0 = | 193 private static final String ZZ_ATTRIBUTE_PACKED_0 = |
184 "\4\0\13\11\12\1\1\11\1\0\1\11\1\0\1\11"+ | 194 "\5\0\1\11\1\1\1\11\1\1\11\11\12\1\3\11"+ |
185 "\1\0\1\11\1\0\1\11\1\0\1\11\1\0\1\11"+ | 195 "\1\0\1\11\1\0\1\11\1\0\1\11\1\0\1\11"+ |
186 "\1\0\1\11\1\0\1\11\1\0\1\11\1\0"; | 196 "\1\0\1\11\1\0\1\11\1\0\1\11\1\0\1\11"+ |
197 "\1\0\1\11\1\0"; | |
187 | 198 |
188 private static int [] zzUnpackAttribute() { | 199 private static int [] zzUnpackAttribute() { |
189 int [] result = new int[45]; | 200 int [] result = new int[50]; |
190 int offset = 0; | 201 int offset = 0; |
191 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); | 202 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); |
192 return result; | 203 return result; |
193 } | 204 } |
194 | 205 |
261 private void add (String norm) { | 272 private void add (String norm) { |
262 original += yytext(); | 273 original += yytext(); |
263 normalized += norm; | 274 normalized += norm; |
264 } | 275 } |
265 | 276 |
277 private static final String LB = "[\u002d\u00ad] "; | |
278 | |
266 | 279 |
267 /** | 280 /** |
268 * Creates a new scanner | 281 * Creates a new scanner |
269 * There is also a java.io.InputStream version of this constructor. | 282 * There is also a java.io.InputStream version of this constructor. |
270 * | 283 * |
292 */ | 305 */ |
293 private static char [] zzUnpackCMap(String packed) { | 306 private static char [] zzUnpackCMap(String packed) { |
294 char [] map = new char[0x10000]; | 307 char [] map = new char[0x10000]; |
295 int i = 0; /* index in packed string */ | 308 int i = 0; /* index in packed string */ |
296 int j = 0; /* index in unpacked array */ | 309 int j = 0; /* index in unpacked array */ |
297 while (i < 82) { | 310 while (i < 112) { |
298 int count = packed.charAt(i++); | 311 int count = packed.charAt(i++); |
299 char value = packed.charAt(i++); | 312 char value = packed.charAt(i++); |
300 do map[j++] = value; while (--count > 0); | 313 do map[j++] = value; while (--count > 0); |
301 } | 314 } |
302 return map; | 315 return map; |
559 | 572 |
560 // store back cached position | 573 // store back cached position |
561 zzMarkedPos = zzMarkedPosL; | 574 zzMarkedPos = zzMarkedPosL; |
562 | 575 |
563 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { | 576 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { |
564 case 21: | 577 case 23: |
565 // lookahead expression with fixed base length | 578 // lookahead expression with fixed base length |
566 zzMarkedPos = zzStartRead + 1; | 579 zzMarkedPos = zzStartRead + 1; |
567 { add("ῴ"); | 580 { add("ῴ"); |
568 } | 581 } |
569 case 22: break; | 582 case 24: break; |
583 case 5: | |
584 { add("ή"); | |
585 } | |
586 case 25: break; | |
587 case 17: | |
588 // lookahead expression with fixed base length | |
589 zzMarkedPos = zzStartRead + 1; | |
590 { add("ή"); | |
591 } | |
592 case 26: break; | |
593 case 13: | |
594 { add("σ"); | |
595 } | |
596 case 27: break; | |
570 case 6: | 597 case 6: |
571 { add("ή"); | |
572 } | |
573 case 23: break; | |
574 case 15: | |
575 // lookahead expression with fixed base length | |
576 zzMarkedPos = zzStartRead + 1; | |
577 { add("ή"); | |
578 } | |
579 case 24: break; | |
580 case 7: | |
581 { add("ί"); | 598 { add("ί"); |
582 } | 599 } |
583 case 25: break; | 600 case 28: break; |
584 case 1: | 601 case 1: |
585 { add(yytext()); | 602 { add(yytext()); |
586 } | 603 } |
587 case 26: break; | 604 case 29: break; |
605 case 22: | |
606 // lookahead expression with fixed base length | |
607 zzMarkedPos = zzStartRead + 1; | |
608 { add("ώ"); | |
609 } | |
610 case 30: break; | |
611 case 11: | |
612 { switch (problem) { | |
613 case 1: return ""; | |
614 default: return normalized.replaceAll(LB, ""); | |
615 } | |
616 } | |
617 case 31: break; | |
618 case 19: | |
619 // lookahead expression with fixed base length | |
620 zzMarkedPos = zzStartRead + 1; | |
621 { add("ί"); | |
622 } | |
623 case 32: break; | |
624 case 15: | |
625 // lookahead expression with fixed base length | |
626 zzMarkedPos = zzStartRead + 1; | |
627 { add("ᾴ"); | |
628 } | |
629 case 33: break; | |
630 case 7: | |
631 { add("ό"); | |
632 } | |
633 case 34: break; | |
634 case 14: | |
635 // lookahead expression with fixed base length | |
636 zzMarkedPos = zzStartRead + 1; | |
637 { add("ά"); | |
638 } | |
639 case 35: break; | |
640 case 12: | |
641 { switch (problem) { | |
642 case 1: return original; | |
643 default: return normalized.replaceAll(LB, "").toLowerCase(); | |
644 } | |
645 } | |
646 case 36: break; | |
647 case 8: | |
648 { add("ύ"); | |
649 } | |
650 case 37: break; | |
651 case 2: | |
652 { problem = 1; add(yytext()); | |
653 } | |
654 case 38: break; | |
588 case 20: | 655 case 20: |
589 // lookahead expression with fixed base length | 656 // lookahead expression with fixed base length |
590 zzMarkedPos = zzStartRead + 1; | 657 zzMarkedPos = zzStartRead + 1; |
591 { add("ώ"); | 658 { add("ό"); |
592 } | 659 } |
593 case 27: break; | 660 case 39: break; |
594 case 17: | |
595 // lookahead expression with fixed base length | |
596 zzMarkedPos = zzStartRead + 1; | |
597 { add("ί"); | |
598 } | |
599 case 28: break; | |
600 case 13: | |
601 // lookahead expression with fixed base length | |
602 zzMarkedPos = zzStartRead + 1; | |
603 { add("ᾴ"); | |
604 } | |
605 case 29: break; | |
606 case 8: | |
607 { add("ό"); | |
608 } | |
609 case 30: break; | |
610 case 12: | |
611 // lookahead expression with fixed base length | |
612 zzMarkedPos = zzStartRead + 1; | |
613 { add("ά"); | |
614 } | |
615 case 31: break; | |
616 case 9: | |
617 { add("ύ"); | |
618 } | |
619 case 32: break; | |
620 case 3: | 661 case 3: |
621 { problem = 1; add(yytext()); | |
622 } | |
623 case 33: break; | |
624 case 18: | |
625 // lookahead expression with fixed base length | |
626 zzMarkedPos = zzStartRead + 1; | |
627 { add("ό"); | |
628 } | |
629 case 34: break; | |
630 case 4: | |
631 { add("ά"); | 662 { add("ά"); |
632 } | 663 } |
633 case 35: break; | 664 case 40: break; |
634 case 2: | 665 case 10: |
635 { switch (problem) { | 666 { switch (problem) { |
636 case 1: return original; | 667 case 1: return original; |
637 default: return normalized; | 668 default: return normalized; |
638 } | 669 } |
639 } | 670 } |
640 case 36: break; | 671 case 41: break; |
641 case 10: | 672 case 9: |
642 { add("ώ"); | 673 { add("ώ"); |
643 } | 674 } |
644 case 37: break; | 675 case 42: break; |
645 case 14: | 676 case 16: |
646 // lookahead expression with fixed base length | 677 // lookahead expression with fixed base length |
647 zzMarkedPos = zzStartRead + 1; | 678 zzMarkedPos = zzStartRead + 1; |
648 { add("έ"); | 679 { add("έ"); |
649 } | 680 } |
650 case 38: break; | 681 case 43: break; |
651 case 16: | 682 case 18: |
652 // lookahead expression with fixed base length | 683 // lookahead expression with fixed base length |
653 zzMarkedPos = zzStartRead + 1; | 684 zzMarkedPos = zzStartRead + 1; |
654 { add("ῄ"); | 685 { add("ῄ"); |
655 } | 686 } |
656 case 39: break; | 687 case 44: break; |
657 case 5: | 688 case 4: |
658 { add("έ"); | 689 { add("έ"); |
659 } | 690 } |
660 case 40: break; | 691 case 45: break; |
661 case 11: | 692 case 21: |
662 { switch (problem) { | |
663 case 1: return ""; | |
664 default: return normalized; | |
665 } | |
666 } | |
667 case 41: break; | |
668 case 19: | |
669 // lookahead expression with fixed base length | 693 // lookahead expression with fixed base length |
670 zzMarkedPos = zzStartRead + 1; | 694 zzMarkedPos = zzStartRead + 1; |
671 { add("ύ"); | 695 { add("ύ"); |
672 } | 696 } |
673 case 42: break; | 697 case 46: break; |
674 default: | 698 default: |
675 if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { | 699 if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { |
676 zzAtEOF = true; | 700 zzAtEOF = true; |
677 return null; | 701 return null; |
678 } | 702 } |