comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexEN.java @ 14:5df60f24e997

diverse Fehlerbehebungen
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Mon, 29 Aug 2011 17:40:02 +0200
parents 1ec29fdd0db8
children
comparison
equal deleted inserted replaced
13:469d927b9ca7 14:5df60f24e997
1 /* The following code was generated by JFlex 1.4.3 on 22.02.11 12:03 */ 1 /* The following code was generated by JFlex 1.4.3 on 21.07.11 11:22 */
2 2
3 /* 3 /*
4 * Normalization rules for English text 4 * Normalization rules for English text
5 * [this is a JFlex specification] 5 * [this is a JFlex specification]
6 * 6 *
7 * Wolfgang Schmidle 7 * Wolfgang Schmidle
8 * version 0.96 8 * version 2011-07-12
9 * 2011-02-21
10 * 9 *
11 */ 10 */
12 11
13 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang; 12 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
14 13
15 14
16 /** 15 /**
17 * This class is a scanner generated by 16 * This class is a scanner generated by
18 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3 17 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3
19 * on 22.02.11 12:03 from the specification file 18 * on 21.07.11 11:22 from the specification file
20 * <tt>MpdlNormalizerLexEN.lex</tt> 19 * <tt>MpdlNormalizerLexEN.lex</tt>
21 */ 20 */
22 public class MpdlNormalizerLexEN { 21 public class MpdlNormalizerLexEN {
23 22
24 /** This character denotes the end of file */ 23 /** This character denotes the end of file */
38 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l 37 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
39 * at the beginning of a line 38 * at the beginning of a line
40 * l is of the form l = 2*k, k a non negative integer 39 * l is of the form l = 2*k, k a non negative integer
41 */ 40 */
42 private static final int ZZ_LEXSTATE[] = { 41 private static final int ZZ_LEXSTATE[] = {
43 0, 0, 1, 1, 2, 2, 1, 1 42 0, 0, 1, 1, 2, 2, 3, 3
44 }; 43 };
45 44
46 /** 45 /**
47 * Translates characters to character classes 46 * Translates characters to character classes
48 */ 47 */
49 private static final String ZZ_CMAP_PACKED = 48 private static final String ZZ_CMAP_PACKED =
50 "\12\0\1\1\65\0\1\3\u013e\0\1\2\ufe80\0"; 49 "\12\0\1\3\25\0\1\2\14\0\1\1\2\0\1\1\17\0\1\5"+
50 "\40\0\1\1\2\0\1\1\20\0\1\1\5\0\1\1\1\0\1\1"+
51 "\u0101\0\1\4\ufe80\0";
51 52
52 /** 53 /**
53 * Translates characters to character classes 54 * Translates characters to character classes
54 */ 55 */
55 private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED); 56 private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
58 * Translates DFA states to action switch labels. 59 * Translates DFA states to action switch labels.
59 */ 60 */
60 private static final int [] ZZ_ACTION = zzUnpackAction(); 61 private static final int [] ZZ_ACTION = zzUnpackAction();
61 62
62 private static final String ZZ_ACTION_PACKED_0 = 63 private static final String ZZ_ACTION_PACKED_0 =
63 "\3\0\1\1\1\2\1\3\1\4\1\5"; 64 "\4\0\2\1\1\2\1\3\1\4\1\5\1\6";
64 65
65 private static int [] zzUnpackAction() { 66 private static int [] zzUnpackAction() {
66 int [] result = new int[8]; 67 int [] result = new int[11];
67 int offset = 0; 68 int offset = 0;
68 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); 69 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
69 return result; 70 return result;
70 } 71 }
71 72
86 * Translates a state to a row index in the transition table 87 * Translates a state to a row index in the transition table
87 */ 88 */
88 private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); 89 private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
89 90
90 private static final String ZZ_ROWMAP_PACKED_0 = 91 private static final String ZZ_ROWMAP_PACKED_0 =
91 "\0\0\0\4\0\10\0\14\0\14\0\14\0\14\0\14"; 92 "\0\0\0\6\0\14\0\22\0\30\0\36\0\30\0\30"+
93 "\0\30\0\30\0\30";
92 94
93 private static int [] zzUnpackRowMap() { 95 private static int [] zzUnpackRowMap() {
94 int [] result = new int[8]; 96 int [] result = new int[11];
95 int offset = 0; 97 int offset = 0;
96 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); 98 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
97 return result; 99 return result;
98 } 100 }
99 101
112 * The transition table of the DFA 114 * The transition table of the DFA
113 */ 115 */
114 private static final int [] ZZ_TRANS = zzUnpackTrans(); 116 private static final int [] ZZ_TRANS = zzUnpackTrans();
115 117
116 private static final String ZZ_TRANS_PACKED_0 = 118 private static final String ZZ_TRANS_PACKED_0 =
117 "\1\4\1\0\1\4\1\5\1\4\1\6\1\7\1\5"+ 119 "\1\5\1\6\1\5\1\0\1\5\1\7\1\5\1\6"+
118 "\1\4\1\10\1\7\1\5\4\0"; 120 "\1\5\1\10\1\11\1\7\1\5\1\6\1\5\1\12"+
121 "\1\11\1\7\1\5\1\6\1\5\1\13\1\11\1\7"+
122 "\10\0\1\5\3\0";
119 123
120 private static int [] zzUnpackTrans() { 124 private static int [] zzUnpackTrans() {
121 int [] result = new int[16]; 125 int [] result = new int[36];
122 int offset = 0; 126 int offset = 0;
123 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); 127 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
124 return result; 128 return result;
125 } 129 }
126 130
154 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code> 158 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
155 */ 159 */
156 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); 160 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
157 161
158 private static final String ZZ_ATTRIBUTE_PACKED_0 = 162 private static final String ZZ_ATTRIBUTE_PACKED_0 =
159 "\3\0\5\11"; 163 "\4\0\1\11\1\1\5\11";
160 164
161 private static int [] zzUnpackAttribute() { 165 private static int [] zzUnpackAttribute() {
162 int [] result = new int[8]; 166 int [] result = new int[11];
163 int offset = 0; 167 int offset = 0;
164 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); 168 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
165 return result; 169 return result;
166 } 170 }
167 171
234 private void add (String norm) { 238 private void add (String norm) {
235 original += yytext(); 239 original += yytext();
236 normalized += norm; 240 normalized += norm;
237 } 241 }
238 242
243 private static final String LB = "[\u002d\u00ad] ";
244
239 245
240 /** 246 /**
241 * Creates a new scanner 247 * Creates a new scanner
242 * There is also a java.io.InputStream version of this constructor. 248 * There is also a java.io.InputStream version of this constructor.
243 * 249 *
265 */ 271 */
266 private static char [] zzUnpackCMap(String packed) { 272 private static char [] zzUnpackCMap(String packed) {
267 char [] map = new char[0x10000]; 273 char [] map = new char[0x10000];
268 int i = 0; /* index in packed string */ 274 int i = 0; /* index in packed string */
269 int j = 0; /* index in unpacked array */ 275 int j = 0; /* index in unpacked array */
270 while (i < 14) { 276 while (i < 46) {
271 int count = packed.charAt(i++); 277 int count = packed.charAt(i++);
272 char value = packed.charAt(i++); 278 char value = packed.charAt(i++);
273 do map[j++] = value; while (--count > 0); 279 do map[j++] = value; while (--count > 0);
274 } 280 }
275 return map; 281 return map;
535 541
536 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { 542 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
537 case 5: 543 case 5:
538 { switch (problem) { 544 { switch (problem) {
539 case 1: return ""; 545 case 1: return "";
540 default: return normalized; 546 default: return normalized.replaceAll(LB, "");
541 } 547 }
542 } 548 }
543 case 6: break; 549 case 7: break;
544 case 2: 550 case 2:
545 { problem = 1; add(yytext()); 551 { problem = 1; add(yytext());
546 } 552 }
547 case 7: break; 553 case 8: break;
548 case 4: 554 case 4:
549 { add("s"); 555 { add("s");
550 } 556 }
551 case 8: break; 557 case 9: break;
552 case 3: 558 case 3:
553 { switch (problem) { 559 { switch (problem) {
554 case 1: return original; 560 case 1: return original;
555 default: return normalized; 561 default: return normalized;
556 } 562 }
557 } 563 }
558 case 9: break; 564 case 10: break;
565 case 6:
566 { switch (problem) {
567 case 1: return original;
568 default: return normalized.replaceAll(LB, "").toLowerCase();
569 }
570 }
571 case 11: break;
559 case 1: 572 case 1:
560 { add(yytext()); 573 { add(yytext());
561 } 574 }
562 case 10: break; 575 case 12: break;
563 default: 576 default:
564 if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { 577 if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
565 zzAtEOF = true; 578 zzAtEOF = true;
566 return null; 579 return null;
567 } 580 }