comparison software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexAR.java @ 19:4a3641ae14d2

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 09 Nov 2011 15:32:05 +0100
parents
children e845310098ba
comparison
equal deleted inserted replaced
18:dc5e9fcb3fdc 19:4a3641ae14d2
1 /* The following code was generated by JFlex 1.4.3 on 21.07.11 11:22 */
2
3 /*
4 * Normalization rules for Arabic text
5 * [this is a JFlex specification]
6 *
7 * Wolfgang Schmidle
8 * version 2011-02-28
9 *
10 */
11
12 package de.mpg.mpiwg.berlin.mpdl.lt.text.norm.lang;
13
14
15 /**
16 * This class is a scanner generated by
17 * <a href="http://www.jflex.de/">JFlex</a> 1.4.3
18 * on 21.07.11 11:22 from the specification file
19 * <tt>MpdlNormalizerLexAR.lex</tt>
20 */
21 public class MpdlNormalizerLexAR {
22
23 /** This character denotes the end of file */
24 public static final int YYEOF = -1;
25
26 /** initial size of the lookahead buffer */
27 private static final int ZZ_BUFFERSIZE = 16384;
28
29 /** lexical states */
30 public static final int SEARCH = 6;
31 public static final int DICT = 4;
32 public static final int YYINITIAL = 0;
33 public static final int DISP = 2;
34
35 /**
36 * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
37 * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
38 * at the beginning of a line
39 * l is of the form l = 2*k, k a non negative integer
40 */
41 private static final int ZZ_LEXSTATE[] = {
42 0, 0, 1, 1, 2, 2, 3, 3
43 };
44
45 /**
46 * Translates characters to character classes
47 */
48 private static final String ZZ_CMAP_PACKED =
49 "\12\0\1\3\25\0\1\2\14\0\1\1\2\0\1\1\17\0\1\4"+
50 "\40\0\1\1\2\0\1\1\20\0\1\1\5\0\1\1\1\0\1\1"+
51 "\uff82\0";
52
53 /**
54 * Translates characters to character classes
55 */
56 private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
57
58 /**
59 * Translates DFA states to action switch labels.
60 */
61 private static final int [] ZZ_ACTION = zzUnpackAction();
62
63 private static final String ZZ_ACTION_PACKED_0 =
64 "\4\0\2\1\1\2\1\3\1\4\1\5";
65
66 private static int [] zzUnpackAction() {
67 int [] result = new int[10];
68 int offset = 0;
69 offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
70 return result;
71 }
72
73 private static int zzUnpackAction(String packed, int offset, int [] result) {
74 int i = 0; /* index in packed string */
75 int j = offset; /* index in unpacked array */
76 int l = packed.length();
77 while (i < l) {
78 int count = packed.charAt(i++);
79 int value = packed.charAt(i++);
80 do result[j++] = value; while (--count > 0);
81 }
82 return j;
83 }
84
85
86 /**
87 * Translates a state to a row index in the transition table
88 */
89 private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
90
91 private static final String ZZ_ROWMAP_PACKED_0 =
92 "\0\0\0\5\0\12\0\17\0\24\0\31\0\24\0\24"+
93 "\0\24\0\24";
94
95 private static int [] zzUnpackRowMap() {
96 int [] result = new int[10];
97 int offset = 0;
98 offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
99 return result;
100 }
101
102 private static int zzUnpackRowMap(String packed, int offset, int [] result) {
103 int i = 0; /* index in packed string */
104 int j = offset; /* index in unpacked array */
105 int l = packed.length();
106 while (i < l) {
107 int high = packed.charAt(i++) << 16;
108 result[j++] = high | packed.charAt(i++);
109 }
110 return j;
111 }
112
113 /**
114 * The transition table of the DFA
115 */
116 private static final int [] ZZ_TRANS = zzUnpackTrans();
117
118 private static final String ZZ_TRANS_PACKED_0 =
119 "\1\5\1\6\1\5\1\0\1\7\1\5\1\6\1\5"+
120 "\1\10\1\7\1\5\1\6\1\5\1\11\1\7\1\5"+
121 "\1\6\1\5\1\12\1\7\7\0\1\5\2\0";
122
123 private static int [] zzUnpackTrans() {
124 int [] result = new int[30];
125 int offset = 0;
126 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
127 return result;
128 }
129
130 private static int zzUnpackTrans(String packed, int offset, int [] result) {
131 int i = 0; /* index in packed string */
132 int j = offset; /* index in unpacked array */
133 int l = packed.length();
134 while (i < l) {
135 int count = packed.charAt(i++);
136 int value = packed.charAt(i++);
137 value--;
138 do result[j++] = value; while (--count > 0);
139 }
140 return j;
141 }
142
143
144 /* error codes */
145 private static final int ZZ_UNKNOWN_ERROR = 0;
146 private static final int ZZ_NO_MATCH = 1;
147 private static final int ZZ_PUSHBACK_2BIG = 2;
148
149 /* error messages for the codes above */
150 private static final String ZZ_ERROR_MSG[] = {
151 "Unkown internal scanner error",
152 "Error: could not match input",
153 "Error: pushback value was too large"
154 };
155
156 /**
157 * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
158 */
159 private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
160
161 private static final String ZZ_ATTRIBUTE_PACKED_0 =
162 "\4\0\1\11\1\1\4\11";
163
164 private static int [] zzUnpackAttribute() {
165 int [] result = new int[10];
166 int offset = 0;
167 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
168 return result;
169 }
170
171 private static int zzUnpackAttribute(String packed, int offset, int [] result) {
172 int i = 0; /* index in packed string */
173 int j = offset; /* index in unpacked array */
174 int l = packed.length();
175 while (i < l) {
176 int count = packed.charAt(i++);
177 int value = packed.charAt(i++);
178 do result[j++] = value; while (--count > 0);
179 }
180 return j;
181 }
182
183 /** the input device */
184 private java.io.Reader zzReader;
185
186 /** the current state of the DFA */
187 private int zzState;
188
189 /** the current lexical state */
190 private int zzLexicalState = YYINITIAL;
191
192 /** this buffer contains the current text to be matched and is
193 the source of the yytext() string */
194 private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
195
196 /** the textposition at the last accepting state */
197 private int zzMarkedPos;
198
199 /** the current text position in the buffer */
200 private int zzCurrentPos;
201
202 /** startRead marks the beginning of the yytext() string in the buffer */
203 private int zzStartRead;
204
205 /** endRead marks the last character in the buffer, that has been read
206 from input */
207 private int zzEndRead;
208
209 /** number of newlines encountered up to the start of the matched text */
210 private int yyline;
211
212 /** the number of characters up to the start of the matched text */
213 private int yychar;
214
215 /**
216 * the number of characters from the last newline up to the start of the
217 * matched text
218 */
219 private int yycolumn;
220
221 /**
222 * zzAtBOL == true <=> the scanner is currently at the beginning of a line
223 */
224 private boolean zzAtBOL = true;
225
226 /** zzAtEOF == true <=> the scanner is at the EOF */
227 private boolean zzAtEOF;
228
229 /** denotes if the user-EOF-code has already been executed */
230 private boolean zzEOFDone;
231
232 /* user code: */
233 private String original = "";
234 private String normalized = "";
235 private int problem = 0;
236
237 private void add (String norm) {
238 original += yytext();
239 normalized += norm;
240 }
241
242 private static final String LB = "[\u002d\u00ad] ";
243
244
245 /**
246 * Creates a new scanner
247 * There is also a java.io.InputStream version of this constructor.
248 *
249 * @param in the java.io.Reader to read input from.
250 */
251 public MpdlNormalizerLexAR(java.io.Reader in) {
252 this.zzReader = in;
253 }
254
255 /**
256 * Creates a new scanner.
257 * There is also java.io.Reader version of this constructor.
258 *
259 * @param in the java.io.Inputstream to read input from.
260 */
261 public MpdlNormalizerLexAR(java.io.InputStream in) {
262 this(new java.io.InputStreamReader(in));
263 }
264
265 /**
266 * Unpacks the compressed character translation table.
267 *
268 * @param packed the packed character translation table
269 * @return the unpacked character translation table
270 */
271 private static char [] zzUnpackCMap(String packed) {
272 char [] map = new char[0x10000];
273 int i = 0; /* index in packed string */
274 int j = 0; /* index in unpacked array */
275 while (i < 42) {
276 int count = packed.charAt(i++);
277 char value = packed.charAt(i++);
278 do map[j++] = value; while (--count > 0);
279 }
280 return map;
281 }
282
283
284 /**
285 * Refills the input buffer.
286 *
287 * @return <code>false</code>, iff there was new input.
288 *
289 * @exception java.io.IOException if any I/O-Error occurs
290 */
291 private boolean zzRefill() throws java.io.IOException {
292
293 /* first: make room (if you can) */
294 if (zzStartRead > 0) {
295 System.arraycopy(zzBuffer, zzStartRead,
296 zzBuffer, 0,
297 zzEndRead-zzStartRead);
298
299 /* translate stored positions */
300 zzEndRead-= zzStartRead;
301 zzCurrentPos-= zzStartRead;
302 zzMarkedPos-= zzStartRead;
303 zzStartRead = 0;
304 }
305
306 /* is the buffer big enough? */
307 if (zzCurrentPos >= zzBuffer.length) {
308 /* if not: blow it up */
309 char newBuffer[] = new char[zzCurrentPos*2];
310 System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
311 zzBuffer = newBuffer;
312 }
313
314 /* finally: fill the buffer with new input */
315 int numRead = zzReader.read(zzBuffer, zzEndRead,
316 zzBuffer.length-zzEndRead);
317
318 if (numRead > 0) {
319 zzEndRead+= numRead;
320 return false;
321 }
322 // unlikely but not impossible: read 0 characters, but not at end of stream
323 if (numRead == 0) {
324 int c = zzReader.read();
325 if (c == -1) {
326 return true;
327 } else {
328 zzBuffer[zzEndRead++] = (char) c;
329 return false;
330 }
331 }
332
333 // numRead < 0
334 return true;
335 }
336
337
338 /**
339 * Closes the input stream.
340 */
341 public final void yyclose() throws java.io.IOException {
342 zzAtEOF = true; /* indicate end of file */
343 zzEndRead = zzStartRead; /* invalidate buffer */
344
345 if (zzReader != null)
346 zzReader.close();
347 }
348
349
350 /**
351 * Resets the scanner to read from a new input stream.
352 * Does not close the old reader.
353 *
354 * All internal variables are reset, the old input stream
355 * <b>cannot</b> be reused (internal buffer is discarded and lost).
356 * Lexical state is set to <tt>ZZ_INITIAL</tt>.
357 *
358 * @param reader the new input stream
359 */
360 public final void yyreset(java.io.Reader reader) {
361 zzReader = reader;
362 zzAtBOL = true;
363 zzAtEOF = false;
364 zzEOFDone = false;
365 zzEndRead = zzStartRead = 0;
366 zzCurrentPos = zzMarkedPos = 0;
367 yyline = yychar = yycolumn = 0;
368 zzLexicalState = YYINITIAL;
369 }
370
371
372 /**
373 * Returns the current lexical state.
374 */
375 public final int yystate() {
376 return zzLexicalState;
377 }
378
379
380 /**
381 * Enters a new lexical state
382 *
383 * @param newState the new lexical state
384 */
385 public final void yybegin(int newState) {
386 zzLexicalState = newState;
387 }
388
389
390 /**
391 * Returns the text matched by the current regular expression.
392 */
393 public final String yytext() {
394 return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
395 }
396
397
398 /**
399 * Returns the character at position <tt>pos</tt> from the
400 * matched text.
401 *
402 * It is equivalent to yytext().charAt(pos), but faster
403 *
404 * @param pos the position of the character to fetch.
405 * A value from 0 to yylength()-1.
406 *
407 * @return the character at position pos
408 */
409 public final char yycharat(int pos) {
410 return zzBuffer[zzStartRead+pos];
411 }
412
413
414 /**
415 * Returns the length of the matched text region.
416 */
417 public final int yylength() {
418 return zzMarkedPos-zzStartRead;
419 }
420
421
422 /**
423 * Reports an error that occured while scanning.
424 *
425 * In a wellformed scanner (no or only correct usage of
426 * yypushback(int) and a match-all fallback rule) this method
427 * will only be called with things that "Can't Possibly Happen".
428 * If this method is called, something is seriously wrong
429 * (e.g. a JFlex bug producing a faulty scanner etc.).
430 *
431 * Usual syntax/scanner level error handling should be done
432 * in error fallback rules.
433 *
434 * @param errorCode the code of the errormessage to display
435 */
436 private void zzScanError(int errorCode) {
437 String message;
438 try {
439 message = ZZ_ERROR_MSG[errorCode];
440 }
441 catch (ArrayIndexOutOfBoundsException e) {
442 message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
443 }
444
445 throw new Error(message);
446 }
447
448
449 /**
450 * Pushes the specified amount of characters back into the input stream.
451 *
452 * They will be read again by then next call of the scanning method
453 *
454 * @param number the number of characters to be read again.
455 * This number must not be greater than yylength()!
456 */
457 public void yypushback(int number) {
458 if ( number > yylength() )
459 zzScanError(ZZ_PUSHBACK_2BIG);
460
461 zzMarkedPos -= number;
462 }
463
464
465 /**
466 * Resumes scanning until the next regular expression is matched,
467 * the end of input is encountered or an I/O-Error occurs.
468 *
469 * @return the next token
470 * @exception java.io.IOException if any I/O-Error occurs
471 */
472 public java.lang.String yylex() throws java.io.IOException {
473 int zzInput;
474 int zzAction;
475
476 // cached fields:
477 int zzCurrentPosL;
478 int zzMarkedPosL;
479 int zzEndReadL = zzEndRead;
480 char [] zzBufferL = zzBuffer;
481 char [] zzCMapL = ZZ_CMAP;
482
483 int [] zzTransL = ZZ_TRANS;
484 int [] zzRowMapL = ZZ_ROWMAP;
485 int [] zzAttrL = ZZ_ATTRIBUTE;
486
487 while (true) {
488 zzMarkedPosL = zzMarkedPos;
489
490 zzAction = -1;
491
492 zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
493
494 zzState = ZZ_LEXSTATE[zzLexicalState];
495
496
497 zzForAction: {
498 while (true) {
499
500 if (zzCurrentPosL < zzEndReadL)
501 zzInput = zzBufferL[zzCurrentPosL++];
502 else if (zzAtEOF) {
503 zzInput = YYEOF;
504 break zzForAction;
505 }
506 else {
507 // store back cached positions
508 zzCurrentPos = zzCurrentPosL;
509 zzMarkedPos = zzMarkedPosL;
510 boolean eof = zzRefill();
511 // get translated positions and possibly new buffer
512 zzCurrentPosL = zzCurrentPos;
513 zzMarkedPosL = zzMarkedPos;
514 zzBufferL = zzBuffer;
515 zzEndReadL = zzEndRead;
516 if (eof) {
517 zzInput = YYEOF;
518 break zzForAction;
519 }
520 else {
521 zzInput = zzBufferL[zzCurrentPosL++];
522 }
523 }
524 int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
525 if (zzNext == -1) break zzForAction;
526 zzState = zzNext;
527
528 int zzAttributes = zzAttrL[zzState];
529 if ( (zzAttributes & 1) == 1 ) {
530 zzAction = zzState;
531 zzMarkedPosL = zzCurrentPosL;
532 if ( (zzAttributes & 8) == 8 ) break zzForAction;
533 }
534
535 }
536 }
537
538 // store back cached position
539 zzMarkedPos = zzMarkedPosL;
540
541 switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
542 case 5:
543 { switch (problem) {
544 case 1: return original;
545 default: return normalized.replaceAll(LB, "");
546 }
547 }
548 case 6: break;
549 case 4:
550 { switch (problem) {
551 case 1: return "";
552 default: return normalized.replaceAll(LB, "");
553 }
554 }
555 case 7: break;
556 case 2:
557 { problem = 1; add(yytext());
558 }
559 case 8: break;
560 case 3:
561 { switch (problem) {
562 case 1: return original;
563 default: return normalized;
564 }
565 }
566 case 9: break;
567 case 1:
568 { add(yytext());
569 }
570 case 10: break;
571 default:
572 if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
573 zzAtEOF = true;
574 return null;
575 }
576 else {
577 zzScanError(ZZ_NO_MATCH);
578 }
579 }
580 }
581 }
582
583
584 }