Mercurial > hg > mpdl-group
comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexEN.java @ 14:5df60f24e997
diverse Fehlerbehebungen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Mon, 29 Aug 2011 17:40:02 +0200 |
parents | 1ec29fdd0db8 |
children |
comparison
equal
deleted
inserted
replaced
13:469d927b9ca7 | 14:5df60f24e997 |
---|---|
1 /* The following code was generated by JFlex 1.4.3 on 22.02.11 12:03 */ | 1 /* The following code was generated by JFlex 1.4.3 on 21.07.11 11:22 */ |
2 | 2 |
3 /* | 3 /* |
4 * Normalization rules for English text | 4 * Normalization rules for English text |
5 * [this is a JFlex specification] | 5 * [this is a JFlex specification] |
6 * | 6 * |
7 * Wolfgang Schmidle | 7 * Wolfgang Schmidle |
8 * version 0.96 | 8 * version 2011-07-12 |
9 * 2011-02-21 | |
10 * | 9 * |
11 */ | 10 */ |
12 | 11 |
13 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang; | 12 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang; |
14 | 13 |
15 | 14 |
16 /** | 15 /** |
17 * This class is a scanner generated by | 16 * This class is a scanner generated by |
18 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3 | 17 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3 |
19 * on 22.02.11 12:03 from the specification file | 18 * on 21.07.11 11:22 from the specification file |
20 * <tt>MpdlNormalizerLexEN.lex</tt> | 19 * <tt>MpdlNormalizerLexEN.lex</tt> |
21 */ | 20 */ |
22 public class MpdlNormalizerLexEN { | 21 public class MpdlNormalizerLexEN { |
23 | 22 |
24 /** This character denotes the end of file */ | 23 /** This character denotes the end of file */ |
38 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l | 37 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l |
39 * at the beginning of a line | 38 * at the beginning of a line |
40 * l is of the form l = 2*k, k a non negative integer | 39 * l is of the form l = 2*k, k a non negative integer |
41 */ | 40 */ |
42 private static final int ZZ_LEXSTATE[] = { | 41 private static final int ZZ_LEXSTATE[] = { |
43 0, 0, 1, 1, 2, 2, 1, 1 | 42 0, 0, 1, 1, 2, 2, 3, 3 |
44 }; | 43 }; |
45 | 44 |
46 /** | 45 /** |
47 * Translates characters to character classes | 46 * Translates characters to character classes |
48 */ | 47 */ |
49 private static final String ZZ_CMAP_PACKED = | 48 private static final String ZZ_CMAP_PACKED = |
50 "\12\0\1\1\65\0\1\3\u013e\0\1\2\ufe80\0"; | 49 "\12\0\1\3\25\0\1\2\14\0\1\1\2\0\1\1\17\0\1\5"+ |
50 "\40\0\1\1\2\0\1\1\20\0\1\1\5\0\1\1\1\0\1\1"+ | |
51 "\u0101\0\1\4\ufe80\0"; | |
51 | 52 |
52 /** | 53 /** |
53 * Translates characters to character classes | 54 * Translates characters to character classes |
54 */ | 55 */ |
55 private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED); | 56 private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED); |
58 * Translates DFA states to action switch labels. | 59 * Translates DFA states to action switch labels. |
59 */ | 60 */ |
60 private static final int [] ZZ_ACTION = zzUnpackAction(); | 61 private static final int [] ZZ_ACTION = zzUnpackAction(); |
61 | 62 |
62 private static final String ZZ_ACTION_PACKED_0 = | 63 private static final String ZZ_ACTION_PACKED_0 = |
63 "\3\0\1\1\1\2\1\3\1\4\1\5"; | 64 "\4\0\2\1\1\2\1\3\1\4\1\5\1\6"; |
64 | 65 |
65 private static int [] zzUnpackAction() { | 66 private static int [] zzUnpackAction() { |
66 int [] result = new int[8]; | 67 int [] result = new int[11]; |
67 int offset = 0; | 68 int offset = 0; |
68 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); | 69 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); |
69 return result; | 70 return result; |
70 } | 71 } |
71 | 72 |
86 * Translates a state to a row index in the transition table | 87 * Translates a state to a row index in the transition table |
87 */ | 88 */ |
88 private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); | 89 private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); |
89 | 90 |
90 private static final String ZZ_ROWMAP_PACKED_0 = | 91 private static final String ZZ_ROWMAP_PACKED_0 = |
91 "\0\0\0\4\0\10\0\14\0\14\0\14\0\14\0\14"; | 92 "\0\0\0\6\0\14\0\22\0\30\0\36\0\30\0\30"+ |
93 "\0\30\0\30\0\30"; | |
92 | 94 |
93 private static int [] zzUnpackRowMap() { | 95 private static int [] zzUnpackRowMap() { |
94 int [] result = new int[8]; | 96 int [] result = new int[11]; |
95 int offset = 0; | 97 int offset = 0; |
96 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); | 98 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); |
97 return result; | 99 return result; |
98 } | 100 } |
99 | 101 |
112 * The transition table of the DFA | 114 * The transition table of the DFA |
113 */ | 115 */ |
114 private static final int [] ZZ_TRANS = zzUnpackTrans(); | 116 private static final int [] ZZ_TRANS = zzUnpackTrans(); |
115 | 117 |
116 private static final String ZZ_TRANS_PACKED_0 = | 118 private static final String ZZ_TRANS_PACKED_0 = |
117 "\1\4\1\0\1\4\1\5\1\4\1\6\1\7\1\5"+ | 119 "\1\5\1\6\1\5\1\0\1\5\1\7\1\5\1\6"+ |
118 "\1\4\1\10\1\7\1\5\4\0"; | 120 "\1\5\1\10\1\11\1\7\1\5\1\6\1\5\1\12"+ |
121 "\1\11\1\7\1\5\1\6\1\5\1\13\1\11\1\7"+ | |
122 "\10\0\1\5\3\0"; | |
119 | 123 |
120 private static int [] zzUnpackTrans() { | 124 private static int [] zzUnpackTrans() { |
121 int [] result = new int[16]; | 125 int [] result = new int[36]; |
122 int offset = 0; | 126 int offset = 0; |
123 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); | 127 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); |
124 return result; | 128 return result; |
125 } | 129 } |
126 | 130 |
154 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code> | 158 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code> |
155 */ | 159 */ |
156 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); | 160 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); |
157 | 161 |
158 private static final String ZZ_ATTRIBUTE_PACKED_0 = | 162 private static final String ZZ_ATTRIBUTE_PACKED_0 = |
159 "\3\0\5\11"; | 163 "\4\0\1\11\1\1\5\11"; |
160 | 164 |
161 private static int [] zzUnpackAttribute() { | 165 private static int [] zzUnpackAttribute() { |
162 int [] result = new int[8]; | 166 int [] result = new int[11]; |
163 int offset = 0; | 167 int offset = 0; |
164 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); | 168 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); |
165 return result; | 169 return result; |
166 } | 170 } |
167 | 171 |
234 private void add (String norm) { | 238 private void add (String norm) { |
235 original += yytext(); | 239 original += yytext(); |
236 normalized += norm; | 240 normalized += norm; |
237 } | 241 } |
238 | 242 |
243 private static final String LB = "[\u002d\u00ad] "; | |
244 | |
239 | 245 |
240 /** | 246 /** |
241 * Creates a new scanner | 247 * Creates a new scanner |
242 * There is also a java.io.InputStream version of this constructor. | 248 * There is also a java.io.InputStream version of this constructor. |
243 * | 249 * |
265 */ | 271 */ |
266 private static char [] zzUnpackCMap(String packed) { | 272 private static char [] zzUnpackCMap(String packed) { |
267 char [] map = new char[0x10000]; | 273 char [] map = new char[0x10000]; |
268 int i = 0; /* index in packed string */ | 274 int i = 0; /* index in packed string */ |
269 int j = 0; /* index in unpacked array */ | 275 int j = 0; /* index in unpacked array */ |
270 while (i < 14) { | 276 while (i < 46) { |
271 int count = packed.charAt(i++); | 277 int count = packed.charAt(i++); |
272 char value = packed.charAt(i++); | 278 char value = packed.charAt(i++); |
273 do map[j++] = value; while (--count > 0); | 279 do map[j++] = value; while (--count > 0); |
274 } | 280 } |
275 return map; | 281 return map; |
535 | 541 |
536 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { | 542 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { |
537 case 5: | 543 case 5: |
538 { switch (problem) { | 544 { switch (problem) { |
539 case 1: return ""; | 545 case 1: return ""; |
540 default: return normalized; | 546 default: return normalized.replaceAll(LB, ""); |
541 } | 547 } |
542 } | 548 } |
543 case 6: break; | 549 case 7: break; |
544 case 2: | 550 case 2: |
545 { problem = 1; add(yytext()); | 551 { problem = 1; add(yytext()); |
546 } | 552 } |
547 case 7: break; | 553 case 8: break; |
548 case 4: | 554 case 4: |
549 { add("s"); | 555 { add("s"); |
550 } | 556 } |
551 case 8: break; | 557 case 9: break; |
552 case 3: | 558 case 3: |
553 { switch (problem) { | 559 { switch (problem) { |
554 case 1: return original; | 560 case 1: return original; |
555 default: return normalized; | 561 default: return normalized; |
556 } | 562 } |
557 } | 563 } |
558 case 9: break; | 564 case 10: break; |
565 case 6: | |
566 { switch (problem) { | |
567 case 1: return original; | |
568 default: return normalized.replaceAll(LB, "").toLowerCase(); | |
569 } | |
570 } | |
571 case 11: break; | |
559 case 1: | 572 case 1: |
560 { add(yytext()); | 573 { add(yytext()); |
561 } | 574 } |
562 case 10: break; | 575 case 12: break; |
563 default: | 576 default: |
564 if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { | 577 if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { |
565 zzAtEOF = true; | 578 zzAtEOF = true; |
566 return null; | 579 return null; |
567 } | 580 } |