comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexDE.java @ 9:1ec29fdd0db8

neue .lex Dateien f?r Normalisierung / externe Objekte update
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 22 Feb 2011 16:03:45 +0100
parents
children 5df60f24e997
comparison
equal deleted inserted replaced
8:d2a1c14fde31 9:1ec29fdd0db8
1 /* The following code was generated by JFlex 1.4.3 on 22.02.11 12:03 */
2
3 /*
4 * Normalization rules for German text
5 * [this is a JFlex specification]
6 *
7 * Wolfgang Schmidle
8 * version 0.96
9 * 2011-02-21
10 *
11 */
12
13 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
14
15
16 /**
17 * This class is a scanner generated by
18 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3
19 * on 22.02.11 12:03 from the specification file
20 * <tt>MpdlNormalizerLexDE.lex</tt>
21 */
22 public class MpdlNormalizerLexDE {
23
24 /** This character denotes the end of file */
25 public static final int YYEOF = -1;
26
27 /** initial size of the lookahead buffer */
28 private static final int ZZ_BUFFERSIZE = 16384;
29
30 /** lexical states */
31 public static final int SEARCH = 6;
32 public static final int DICT = 4;
33 public static final int YYINITIAL = 0;
34 public static final int CELEX = 8;
35 public static final int DISP = 2;
36 public static final int GRIMM = 10;
37
38 /**
39 * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
40 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
41 * at the beginning of a line
42 * l is of the form l = 2*k, k a non negative integer
43 */
44 private static final int ZZ_LEXSTATE[] = {
45 0, 0, 1, 1, 2, 2, 1, 1, 3, 3, 4, 4
46 };
47
48 /**
49 * Translates characters to character classes
50 */
51 private static final String ZZ_CMAP_PACKED =
52 "\12\0\1\1\65\0\1\15\32\2\6\0\1\6\15\2\1\10\5\2"+
53 "\1\4\5\2\111\0\1\11\21\0\1\12\5\0\1\13\2\0\1\14"+
54 "\4\0\1\11\21\0\1\12\5\0\1\13\202\0\1\3\u01e4\0\1\7"+
55 "\1\0\1\5\ufc99\0";
56
57 /**
58 * Translates characters to character classes
59 */
60 private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
61
62 /**
63 * Translates DFA states to action switch labels.
64 */
65 private static final int [] ZZ_ACTION = zzUnpackAction();
66
67 private static final String ZZ_ACTION_PACKED_0 =
68 "\5\0\1\1\1\2\1\3\1\4\3\1\1\5\3\1"+
69 "\1\6\1\7\1\10\1\11\1\12\1\13\1\14\1\15"+
70 "\1\16";
71
72 private static int [] zzUnpackAction() {
73 int [] result = new int[25];
74 int offset = 0;
75 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
76 return result;
77 }
78
79 private static int zzUnpackAction(String packed, int offset, int [] result) {
80 int i = 0; /* index in packed string */
81 int j = offset; /* index in unpacked array */
82 int l = packed.length();
83 while (i < l) {
84 int count = packed.charAt(i++);
85 int value = packed.charAt(i++);
86 do result[j++] = value; while (--count > 0);
87 }
88 return j;
89 }
90
91
92 /**
93 * Translates a state to a row index in the transition table
94 */
95 private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
96
97 private static final String ZZ_ROWMAP_PACKED_0 =
98 "\0\0\0\16\0\34\0\52\0\70\0\106\0\106\0\106"+
99 "\0\106\0\124\0\142\0\160\0\106\0\176\0\214\0\232"+
100 "\0\106\0\106\0\106\0\106\0\106\0\106\0\106\0\106"+
101 "\0\106";
102
103 private static int [] zzUnpackRowMap() {
104 int [] result = new int[25];
105 int offset = 0;
106 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
107 return result;
108 }
109
110 private static int zzUnpackRowMap(String packed, int offset, int [] result) {
111 int i = 0; /* index in packed string */
112 int j = offset; /* index in unpacked array */
113 int l = packed.length();
114 while (i < l) {
115 int high = packed.charAt(i++) << 16;
116 result[j++] = high | packed.charAt(i++);
117 }
118 return j;
119 }
120
121 /**
122 * The transition table of the DFA
123 */
124 private static final int [] ZZ_TRANS = zzUnpackTrans();
125
126 private static final String ZZ_TRANS_PACKED_0 =
127 "\1\6\1\0\1\6\1\7\11\6\1\10\1\6\1\11"+
128 "\1\6\1\7\1\12\1\6\1\13\1\6\1\14\4\6"+
129 "\1\10\1\6\1\15\1\6\1\7\1\12\1\6\1\13"+
130 "\1\6\1\14\4\6\2\10\1\15\1\6\1\7\1\16"+
131 "\1\10\1\17\1\10\1\20\1\21\1\22\1\23\1\24"+
132 "\1\10\1\6\1\15\1\6\1\7\1\12\1\6\1\13"+
133 "\1\6\1\14\3\6\1\25\1\10\23\0\1\26\1\0"+
134 "\1\27\15\0\1\30\15\0\1\31\13\0\1\26\1\0"+
135 "\1\23\15\0\1\21\15\0\1\22\6\0";
136
137 private static int [] zzUnpackTrans() {
138 int [] result = new int[168];
139 int offset = 0;
140 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
141 return result;
142 }
143
144 private static int zzUnpackTrans(String packed, int offset, int [] result) {
145 int i = 0; /* index in packed string */
146 int j = offset; /* index in unpacked array */
147 int l = packed.length();
148 while (i < l) {
149 int count = packed.charAt(i++);
150 int value = packed.charAt(i++);
151 value--;
152 do result[j++] = value; while (--count > 0);
153 }
154 return j;
155 }
156
157
158 /* error codes */
159 private static final int ZZ_UNKNOWN_ERROR = 0;
160 private static final int ZZ_NO_MATCH = 1;
161 private static final int ZZ_PUSHBACK_2BIG = 2;
162
163 /* error messages for the codes above */
164 private static final String ZZ_ERROR_MSG[] = {
165 "Unkown internal scanner error",
166 "Error: could not match input",
167 "Error: pushback value was too large"
168 };
169
170 /**
171 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
172 */
173 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
174
175 private static final String ZZ_ATTRIBUTE_PACKED_0 =
176 "\5\0\4\11\3\1\1\11\3\1\11\11";
177
178 private static int [] zzUnpackAttribute() {
179 int [] result = new int[25];
180 int offset = 0;
181 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
182 return result;
183 }
184
185 private static int zzUnpackAttribute(String packed, int offset, int [] result) {
186 int i = 0; /* index in packed string */
187 int j = offset; /* index in unpacked array */
188 int l = packed.length();
189 while (i < l) {
190 int count = packed.charAt(i++);
191 int value = packed.charAt(i++);
192 do result[j++] = value; while (--count > 0);
193 }
194 return j;
195 }
196
197 /** the input device */
198 private java.io.Reader zzReader;
199
200 /** the current state of the DFA */
201 private int zzState;
202
203 /** the current lexical state */
204 private int zzLexicalState = YYINITIAL;
205
206 /** this buffer contains the current text to be matched and is
207 the source of the yytext() string */
208 private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
209
210 /** the textposition at the last accepting state */
211 private int zzMarkedPos;
212
213 /** the current text position in the buffer */
214 private int zzCurrentPos;
215
216 /** startRead marks the beginning of the yytext() string in the buffer */
217 private int zzStartRead;
218
219 /** endRead marks the last character in the buffer, that has been read
220 from input */
221 private int zzEndRead;
222
223 /** number of newlines encountered up to the start of the matched text */
224 private int yyline;
225
226 /** the number of characters up to the start of the matched text */
227 private int yychar;
228
229 /**
230 * the number of characters from the last newline up to the start of the
231 * matched text
232 */
233 private int yycolumn;
234
235 /**
236 * zzAtBOL == true <=> the scanner is currently at the beginning of a line
237 */
238 private boolean zzAtBOL = true;
239
240 /** zzAtEOF == true <=> the scanner is at the EOF */
241 private boolean zzAtEOF;
242
243 /** denotes if the user-EOF-code has already been executed */
244 private boolean zzEOFDone;
245
246 /* user code: */
247 private String original = "";
248 private String normalized = "";
249 private int problem = 0;
250
251 private void add (String norm) {
252 original += yytext();
253 normalized += norm;
254 }
255
256
257 /**
258 * Creates a new scanner
259 * There is also a java.io.InputStream version of this constructor.
260 *
261 * @param in the java.io.Reader to read input from.
262 */
263 public MpdlNormalizerLexDE(java.io.Reader in) {
264 this.zzReader = in;
265 }
266
267 /**
268 * Creates a new scanner.
269 * There is also java.io.Reader version of this constructor.
270 *
271 * @param in the java.io.Inputstream to read input from.
272 */
273 public MpdlNormalizerLexDE(java.io.InputStream in) {
274 this(new java.io.InputStreamReader(in));
275 }
276
277 /**
278 * Unpacks the compressed character translation table.
279 *
280 * @param packed the packed character translation table
281 * @return the unpacked character translation table
282 */
283 private static char [] zzUnpackCMap(String packed) {
284 char [] map = new char[0x10000];
285 int i = 0; /* index in packed string */
286 int j = 0; /* index in unpacked array */
287 while (i < 66) {
288 int count = packed.charAt(i++);
289 char value = packed.charAt(i++);
290 do map[j++] = value; while (--count > 0);
291 }
292 return map;
293 }
294
295
296 /**
297 * Refills the input buffer.
298 *
299 * @return <code>false</code>, iff there was new input.
300 *
301 * @exception java.io.IOException if any I/O-Error occurs
302 */
303 private boolean zzRefill() throws java.io.IOException {
304
305 /* first: make room (if you can) */
306 if (zzStartRead > 0) {
307 System.arraycopy(zzBuffer, zzStartRead,
308 zzBuffer, 0,
309 zzEndRead-zzStartRead);
310
311 /* translate stored positions */
312 zzEndRead-= zzStartRead;
313 zzCurrentPos-= zzStartRead;
314 zzMarkedPos-= zzStartRead;
315 zzStartRead = 0;
316 }
317
318 /* is the buffer big enough? */
319 if (zzCurrentPos >= zzBuffer.length) {
320 /* if not: blow it up */
321 char newBuffer[] = new char[zzCurrentPos*2];
322 System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
323 zzBuffer = newBuffer;
324 }
325
326 /* finally: fill the buffer with new input */
327 int numRead = zzReader.read(zzBuffer, zzEndRead,
328 zzBuffer.length-zzEndRead);
329
330 if (numRead > 0) {
331 zzEndRead+= numRead;
332 return false;
333 }
334 // unlikely but not impossible: read 0 characters, but not at end of stream
335 if (numRead == 0) {
336 int c = zzReader.read();
337 if (c == -1) {
338 return true;
339 } else {
340 zzBuffer[zzEndRead++] = (char) c;
341 return false;
342 }
343 }
344
345 // numRead < 0
346 return true;
347 }
348
349
350 /**
351 * Closes the input stream.
352 */
353 public final void yyclose() throws java.io.IOException {
354 zzAtEOF = true; /* indicate end of file */
355 zzEndRead = zzStartRead; /* invalidate buffer */
356
357 if (zzReader != null)
358 zzReader.close();
359 }
360
361
362 /**
363 * Resets the scanner to read from a new input stream.
364 * Does not close the old reader.
365 *
366 * All internal variables are reset, the old input stream
367 * <b>cannot</b> be reused (internal buffer is discarded and lost).
368 * Lexical state is set to <tt>ZZ_INITIAL</tt>.
369 *
370 * @param reader the new input stream
371 */
372 public final void yyreset(java.io.Reader reader) {
373 zzReader = reader;
374 zzAtBOL = true;
375 zzAtEOF = false;
376 zzEOFDone = false;
377 zzEndRead = zzStartRead = 0;
378 zzCurrentPos = zzMarkedPos = 0;
379 yyline = yychar = yycolumn = 0;
380 zzLexicalState = YYINITIAL;
381 }
382
383
384 /**
385 * Returns the current lexical state.
386 */
387 public final int yystate() {
388 return zzLexicalState;
389 }
390
391
392 /**
393 * Enters a new lexical state
394 *
395 * @param newState the new lexical state
396 */
397 public final void yybegin(int newState) {
398 zzLexicalState = newState;
399 }
400
401
402 /**
403 * Returns the text matched by the current regular expression.
404 */
405 public final String yytext() {
406 return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
407 }
408
409
410 /**
411 * Returns the character at position <tt>pos</tt> from the
412 * matched text.
413 *
414 * It is equivalent to yytext().charAt(pos), but faster
415 *
416 * @param pos the position of the character to fetch.
417 * A value from 0 to yylength()-1.
418 *
419 * @return the character at position pos
420 */
421 public final char yycharat(int pos) {
422 return zzBuffer[zzStartRead+pos];
423 }
424
425
426 /**
427 * Returns the length of the matched text region.
428 */
429 public final int yylength() {
430 return zzMarkedPos-zzStartRead;
431 }
432
433
434 /**
435 * Reports an error that occured while scanning.
436 *
437 * In a wellformed scanner (no or only correct usage of
438 * yypushback(int) and a match-all fallback rule) this method
439 * will only be called with things that "Can't Possibly Happen".
440 * If this method is called, something is seriously wrong
441 * (e.g. a JFlex bug producing a faulty scanner etc.).
442 *
443 * Usual syntax/scanner level error handling should be done
444 * in error fallback rules.
445 *
446 * @param errorCode the code of the errormessage to display
447 */
448 private void zzScanError(int errorCode) {
449 String message;
450 try {
451 message = ZZ_ERROR_MSG[errorCode];
452 }
453 catch (ArrayIndexOutOfBoundsException e) {
454 message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
455 }
456
457 throw new Error(message);
458 }
459
460
461 /**
462 * Pushes the specified amount of characters back into the input stream.
463 *
464 * They will be read again by then next call of the scanning method
465 *
466 * @param number the number of characters to be read again.
467 * This number must not be greater than yylength()!
468 */
469 public void yypushback(int number) {
470 if ( number > yylength() )
471 zzScanError(ZZ_PUSHBACK_2BIG);
472
473 zzMarkedPos -= number;
474 }
475
476
477 /**
478 * Resumes scanning until the next regular expression is matched,
479 * the end of input is encountered or an I/O-Error occurs.
480 *
481 * @return the next token
482 * @exception java.io.IOException if any I/O-Error occurs
483 */
484 public java.lang.String yylex() throws java.io.IOException {
485 int zzInput;
486 int zzAction;
487
488 // cached fields:
489 int zzCurrentPosL;
490 int zzMarkedPosL;
491 int zzEndReadL = zzEndRead;
492 char [] zzBufferL = zzBuffer;
493 char [] zzCMapL = ZZ_CMAP;
494
495 int [] zzTransL = ZZ_TRANS;
496 int [] zzRowMapL = ZZ_ROWMAP;
497 int [] zzAttrL = ZZ_ATTRIBUTE;
498
499 while (true) {
500 zzMarkedPosL = zzMarkedPos;
501
502 zzAction = -1;
503
504 zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
505
506 zzState = ZZ_LEXSTATE[zzLexicalState];
507
508
509 zzForAction: {
510 while (true) {
511
512 if (zzCurrentPosL < zzEndReadL)
513 zzInput = zzBufferL[zzCurrentPosL++];
514 else if (zzAtEOF) {
515 zzInput = YYEOF;
516 break zzForAction;
517 }
518 else {
519 // store back cached positions
520 zzCurrentPos = zzCurrentPosL;
521 zzMarkedPos = zzMarkedPosL;
522 boolean eof = zzRefill();
523 // get translated positions and possibly new buffer
524 zzCurrentPosL = zzCurrentPos;
525 zzMarkedPosL = zzMarkedPos;
526 zzBufferL = zzBuffer;
527 zzEndReadL = zzEndRead;
528 if (eof) {
529 zzInput = YYEOF;
530 break zzForAction;
531 }
532 else {
533 zzInput = zzBufferL[zzCurrentPosL++];
534 }
535 }
536 int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
537 if (zzNext == -1) break zzForAction;
538 zzState = zzNext;
539
540 int zzAttributes = zzAttrL[zzState];
541 if ( (zzAttributes & 1) == 1 ) {
542 zzAction = zzState;
543 zzMarkedPosL = zzCurrentPosL;
544 if ( (zzAttributes & 8) == 8 ) break zzForAction;
545 }
546
547 }
548 }
549
550 // store back cached position
551 zzMarkedPos = zzMarkedPosL;
552
553 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
554 case 10:
555 { add("sz");
556 }
557 case 15: break;
558 case 3:
559 { problem = 1; add(yytext());
560 }
561 case 16: break;
562 case 6:
563 { add("ae");
564 }
565 case 17: break;
566 case 2:
567 { add("s");
568 }
569 case 18: break;
570 case 4:
571 { switch (problem) {
572 case 1: return original;
573 default: return normalized;
574 }
575 }
576 case 19: break;
577 case 12:
578 { add("ü");
579 }
580 case 20: break;
581 case 8:
582 { add("ue");
583 }
584 case 21: break;
585 case 11:
586 { add("u");
587 }
588 case 22: break;
589 case 13:
590 { add("ä");
591 }
592 case 23: break;
593 case 1:
594 { add(yytext());
595 }
596 case 24: break;
597 case 9:
598 { add("ss");
599 }
600 case 25: break;
601 case 7:
602 { add("oe");
603 }
604 case 26: break;
605 case 14:
606 { add("ö");
607 }
608 case 27: break;
609 case 5:
610 { switch (problem) {
611 case 1: return "";
612 default: return normalized;
613 }
614 }
615 case 28: break;
616 default:
617 if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
618 zzAtEOF = true;
619 return null;
620 }
621 else {
622 zzScanError(ZZ_NO_MATCH);
623 }
624 }
625 }
626 }
627
628
629 }