comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexDE.java @ 16:257f67be5c00

diverse Fehlerbehebungen
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 27 Sep 2011 16:40:57 +0200
parents 5df60f24e997
children
comparison
equal deleted inserted replaced
15:e99964f390e4 16:257f67be5c00
1 /* The following code was generated by JFlex 1.4.3 on 03.08.11 18:24 */ 1 /* The following code was generated by JFlex 1.4.3 on 05.09.11 10:34 */
2 2
3 /* 3 /*
4 * Normalization rules for German text 4 * Normalization rules for German text
5 * [this is a JFlex specification] 5 * [this is a JFlex specification]
6 * 6 *
7 * Wolfgang Schmidle 7 * Wolfgang Schmidle
8 * version 2011-07-12 8 * version 2011-08-10
9 * 9 *
10 */ 10 */
11 11
12 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang; 12 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
13 13
14 14
15 /** 15 /**
16 * This class is a scanner generated by 16 * This class is a scanner generated by
17 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3 17 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3
18 * on 03.08.11 18:24 from the specification file 18 * on 05.09.11 10:34 from the specification file
19 * <tt>MpdlNormalizerLexDE.lex</tt> 19 * <tt>MpdlNormalizerLexDE.lex</tt>
20 */ 20 */
21 public class MpdlNormalizerLexDE { 21 public class MpdlNormalizerLexDE {
22 22
23 /** This character denotes the end of file */ 23 /** This character denotes the end of file */
25 25
26 /** initial size of the lookahead buffer */ 26 /** initial size of the lookahead buffer */
27 private static final int ZZ_BUFFERSIZE = 16384; 27 private static final int ZZ_BUFFERSIZE = 16384;
28 28
29 /** lexical states */ 29 /** lexical states */
30 public static final int SEARCH = 6; 30 public static final int SEARCH = 10;
31 public static final int DICT_ASCII = 6;
32 public static final int SEARCH_ASCII = 12;
31 public static final int DICT = 4; 33 public static final int DICT = 4;
32 public static final int YYINITIAL = 0; 34 public static final int YYINITIAL = 0;
33 public static final int CELEX = 8;
34 public static final int DISP = 2; 35 public static final int DISP = 2;
35 public static final int GRIMM = 10; 36 public static final int GRIMM = 8;
36 37
37 /** 38 /**
38 * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l 39 * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
39 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l 40 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
40 * at the beginning of a line 41 * at the beginning of a line
41 * l is of the form l = 2*k, k a non negative integer 42 * l is of the form l = 2*k, k a non negative integer
42 */ 43 */
43 private static final int ZZ_LEXSTATE[] = { 44 private static final int ZZ_LEXSTATE[] = {
44 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5 45 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6
45 }; 46 };
46 47
47 /** 48 /**
48 * Translates characters to character classes 49 * Translates characters to character classes
49 */ 50 */
63 * Translates DFA states to action switch labels. 64 * Translates DFA states to action switch labels.
64 */ 65 */
65 private static final int [] ZZ_ACTION = zzUnpackAction(); 66 private static final int [] ZZ_ACTION = zzUnpackAction();
66 67
67 private static final String ZZ_ACTION_PACKED_0 = 68 private static final String ZZ_ACTION_PACKED_0 =
68 "\6\0\2\1\1\2\1\3\1\4\3\1\1\5\1\6"+ 69 "\7\0\2\1\1\2\1\3\1\4\3\1\1\5\1\3"+
69 "\1\3\3\1\1\7\1\10\1\11\1\12\1\13\1\14"+ 70 "\3\1\1\6\1\7\1\10\1\11\1\12\1\13\1\14"+
70 "\1\15\1\16\1\17"; 71 "\1\15\1\16\1\17";
71 72
72 private static int [] zzUnpackAction() { 73 private static int [] zzUnpackAction() {
73 int [] result = new int[29]; 74 int [] result = new int[30];
74 int offset = 0; 75 int offset = 0;
75 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); 76 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
76 return result; 77 return result;
77 } 78 }
78 79
94 */ 95 */
95 private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); 96 private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
96 97
97 private static final String ZZ_ROWMAP_PACKED_0 = 98 private static final String ZZ_ROWMAP_PACKED_0 =
98 "\0\0\0\21\0\42\0\63\0\104\0\125\0\146\0\167"+ 99 "\0\0\0\21\0\42\0\63\0\104\0\125\0\146\0\167"+
99 "\0\146\0\146\0\146\0\210\0\231\0\252\0\146\0\146"+ 100 "\0\210\0\167\0\167\0\167\0\231\0\252\0\273\0\167"+
100 "\0\167\0\273\0\314\0\335\0\146\0\146\0\146\0\146"+ 101 "\0\210\0\314\0\335\0\356\0\167\0\167\0\167\0\167"+
101 "\0\146\0\146\0\146\0\146\0\146"; 102 "\0\167\0\167\0\167\0\167\0\167\0\167";
102 103
103 private static int [] zzUnpackRowMap() { 104 private static int [] zzUnpackRowMap() {
104 int [] result = new int[29]; 105 int [] result = new int[30];
105 int offset = 0; 106 int offset = 0;
106 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); 107 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
107 return result; 108 return result;
108 } 109 }
109 110
122 * The transition table of the DFA 123 * The transition table of the DFA
123 */ 124 */
124 private static final int [] ZZ_TRANS = zzUnpackTrans(); 125 private static final int [] ZZ_TRANS = zzUnpackTrans();
125 126
126 private static final String ZZ_TRANS_PACKED_0 = 127 private static final String ZZ_TRANS_PACKED_0 =
127 "\1\7\1\10\1\7\1\0\1\7\1\10\1\11\1\10"+ 128 "\1\10\1\11\1\10\1\0\1\10\1\11\1\12\1\11"+
128 "\1\7\1\10\6\7\1\12\1\7\1\10\1\7\1\13"+ 129 "\1\10\1\11\6\10\1\13\1\10\1\11\1\10\1\14"+
129 "\1\7\1\10\1\11\1\14\1\7\1\15\1\7\1\16"+ 130 "\1\10\1\11\1\12\1\15\1\10\1\16\1\10\1\17"+
130 "\4\7\1\12\1\7\1\10\1\7\1\17\1\7\1\10"+ 131 "\4\10\1\13\1\10\1\11\1\10\1\20\1\10\1\11"+
131 "\1\11\1\14\1\7\1\15\1\7\1\16\4\7\1\12"+ 132 "\1\12\1\15\1\10\1\16\1\10\1\17\4\10\2\13"+
132 "\1\7\1\10\1\7\1\20\1\7\1\10\1\11\1\14"+ 133 "\1\21\1\13\1\20\1\10\1\11\1\12\1\22\1\13"+
133 "\1\7\1\15\1\7\1\16\4\7\2\12\1\21\1\12"+ 134 "\1\23\1\13\1\24\1\25\1\26\1\27\1\30\1\13"+
134 "\1\17\1\7\1\10\1\11\1\22\1\12\1\23\1\12"+ 135 "\1\10\1\11\1\10\1\20\1\10\1\11\1\12\1\15"+
135 "\1\24\1\25\1\26\1\27\1\30\1\12\1\7\1\10"+ 136 "\1\10\1\16\1\10\1\17\3\10\1\31\1\13\1\10"+
136 "\1\7\1\17\1\7\1\10\1\11\1\14\1\7\1\15"+ 137 "\1\11\1\10\1\32\1\10\1\11\1\12\1\15\1\10"+
137 "\1\7\1\16\3\7\1\31\1\12\23\0\1\7\20\0"+ 138 "\1\16\1\10\1\17\4\10\2\13\1\21\1\13\1\32"+
138 "\1\7\5\0\1\32\1\0\1\33\10\0\1\7\7\0"+ 139 "\1\10\1\11\1\12\1\22\1\13\1\23\1\13\1\24"+
139 "\1\34\20\0\1\35\10\0\1\7\5\0\1\32\1\0"+ 140 "\1\25\1\26\1\27\1\30\1\13\23\0\1\10\20\0"+
140 "\1\27\10\0\1\7\7\0\1\25\20\0\1\26\6\0"; 141 "\1\10\5\0\1\33\1\0\1\34\10\0\1\10\7\0"+
142 "\1\35\20\0\1\36\10\0\1\10\5\0\1\33\1\0"+
143 "\1\27\10\0\1\10\7\0\1\25\20\0\1\26\6\0";
141 144
142 private static int [] zzUnpackTrans() { 145 private static int [] zzUnpackTrans() {
143 int [] result = new int[238]; 146 int [] result = new int[255];
144 int offset = 0; 147 int offset = 0;
145 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); 148 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
146 return result; 149 return result;
147 } 150 }
148 151
176 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code> 179 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
177 */ 180 */
178 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); 181 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
179 182
180 private static final String ZZ_ATTRIBUTE_PACKED_0 = 183 private static final String ZZ_ATTRIBUTE_PACKED_0 =
181 "\6\0\1\11\1\1\3\11\3\1\2\11\4\1\11\11"; 184 "\7\0\1\11\1\1\3\11\3\1\1\11\4\1\12\11";
182 185
183 private static int [] zzUnpackAttribute() { 186 private static int [] zzUnpackAttribute() {
184 int [] result = new int[29]; 187 int [] result = new int[30];
185 int offset = 0; 188 int offset = 0;
186 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); 189 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
187 return result; 190 return result;
188 } 191 }
189 192
247 250
248 /** denotes if the user-EOF-code has already been executed */ 251 /** denotes if the user-EOF-code has already been executed */
249 private boolean zzEOFDone; 252 private boolean zzEOFDone;
250 253
251 /* user code: */ 254 /* user code: */
255 public static final int CELEX = DICT_ASCII;
256
252 private String original = ""; 257 private String original = "";
253 private String normalized = ""; 258 private String normalized = "";
254 private int problem = 0; 259 private int problem = 0;
255 260
256 private void add (String norm) { 261 private void add (String norm) {
556 561
557 // store back cached position 562 // store back cached position
558 zzMarkedPos = zzMarkedPosL; 563 zzMarkedPos = zzMarkedPosL;
559 564
560 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { 565 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
561 case 11: 566 case 10:
562 { add("sz"); 567 { add("sz");
563 } 568 }
564 case 16: break; 569 case 16: break;
565 case 3: 570 case 3:
566 { problem = 1; add(yytext()); 571 { problem = 1; add(yytext());
567 } 572 }
568 case 17: break; 573 case 17: break;
569 case 7: 574 case 6:
570 { add("ae"); 575 { add("ae");
571 } 576 }
572 case 18: break; 577 case 18: break;
573 case 2: 578 case 2:
574 { add("s"); 579 { add("s");
583 case 20: break; 588 case 20: break;
584 case 13: 589 case 13:
585 { add("ü"); 590 { add("ü");
586 } 591 }
587 case 21: break; 592 case 21: break;
588 case 9: 593 case 8:
589 { add("ue"); 594 { add("ue");
590 } 595 }
591 case 22: break; 596 case 22: break;
592 case 6: 597 case 11:
593 { switch (problem) { 598 { switch (problem) {
594 case 1: return original; 599 case 1: return original;
595 default: return normalized.replaceAll(LB, "").toLowerCase(); 600 default: return normalized.replaceAll(LB, "").toLowerCase();
596 } 601 }
597 } 602 }
606 case 25: break; 611 case 25: break;
607 case 1: 612 case 1:
608 { add(yytext()); 613 { add(yytext());
609 } 614 }
610 case 26: break; 615 case 26: break;
611 case 10: 616 case 9:
612 { add("ss"); 617 { add("ss");
613 } 618 }
614 case 27: break; 619 case 27: break;
615 case 8: 620 case 7:
616 { add("oe"); 621 { add("oe");
617 } 622 }
618 case 28: break; 623 case 28: break;
619 case 15: 624 case 15:
620 { add("ö"); 625 { add("ö");