diff software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexDE.java @ 23:e845310098ba

diverse Korrekturen
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 27 Nov 2012 12:35:19 +0100
parents 4a3641ae14d2
children
line wrap: on
line diff
--- a/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexDE.java	Wed Dec 14 13:57:09 2011 +0100
+++ b/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexDE.java	Tue Nov 27 12:35:19 2012 +0100
@@ -1,11 +1,11 @@
-/* The following code was generated by JFlex 1.4.3 on 05.09.11 10:34 */
+/* The following code was generated by JFlex 1.4.3 on 28.03.12 18:57 */
 
 /*
  * Normalization rules for German text
  * [this is a JFlex specification]
  *
  * Wolfgang Schmidle 
- * version 2011-08-10
+ * version 2011-07-12
  *
  */
 
@@ -15,8 +15,8 @@
 /**
  * This class is a scanner generated by 
  * <a href="http://www.jflex.de/">JFlex</a> 1.4.3
- * on 05.09.11 10:34 from the specification file
- * <tt>MpdlNormalizerLexDE.lex</tt>
+ * on 28.03.12 18:57 from the specification file
+ * <tt>/Users/jwillenborg/test/jflexNew/MpdlNormalizerLexDE.lex</tt>
  */
 public class MpdlNormalizerLexDE {
 
@@ -27,13 +27,12 @@
   private static final int ZZ_BUFFERSIZE = 16384;
 
   /** lexical states */
-  public static final int SEARCH = 10;
-  public static final int DICT_ASCII = 6;
-  public static final int SEARCH_ASCII = 12;
+  public static final int SEARCH = 6;
   public static final int DICT = 4;
   public static final int YYINITIAL = 0;
+  public static final int CELEX = 8;
   public static final int DISP = 2;
-  public static final int GRIMM = 8;
+  public static final int GRIMM = 10;
 
   /**
    * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
@@ -42,18 +41,18 @@
    * l is of the form l = 2*k, k a non negative integer
    */
   private static final int ZZ_LEXSTATE[] = { 
-     0,  0,  1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6, 6
+     0,  0,  1,  1,  2,  2,  3,  3,  4,  4,  5, 5
   };
 
   /** 
    * Translates characters to character classes
    */
   private static final String ZZ_CMAP_PACKED = 
-    "\12\0\1\3\25\0\1\2\14\0\1\1\2\0\1\1\17\0\1\20"+
-    "\32\4\6\0\1\11\2\4\1\5\12\4\1\13\5\4\1\7\5\4"+
-    "\1\1\1\0\1\1\106\0\1\14\21\0\1\15\5\0\1\16\2\0"+
-    "\1\17\4\0\1\14\21\0\1\15\5\0\1\16\202\0\1\6\u01e4\0"+
-    "\1\12\1\0\1\10\ufc99\0";
+    "\12\0\1\3\25\0\1\2\14\0\1\11\1\0\1\10\1\1\13\0"+
+    "\1\6\1\0\1\7\1\0\1\24\32\4\6\0\1\15\2\4\1\5"+
+    "\12\4\1\17\5\4\1\13\5\4\1\1\1\0\1\1\106\0\1\20"+
+    "\21\0\1\21\5\0\1\22\2\0\1\23\4\0\1\20\21\0\1\21"+
+    "\5\0\1\22\202\0\1\12\u01e4\0\1\16\1\0\1\14\ufc99\0";
 
   /** 
    * Translates characters to character classes
@@ -66,12 +65,12 @@
   private static final int [] ZZ_ACTION = zzUnpackAction();
 
   private static final String ZZ_ACTION_PACKED_0 =
-    "\7\0\2\1\1\2\1\3\1\4\3\1\1\5\1\3"+
-    "\3\1\1\6\1\7\1\10\1\11\1\12\1\13\1\14"+
-    "\1\15\1\16\1\17";
+    "\6\0\4\1\1\2\1\3\1\4\3\1\1\5\1\6"+
+    "\3\3\3\1\1\7\1\10\1\11\1\12\1\13\2\0"+
+    "\1\14\1\15\1\16\1\17\3\0\1\1\2\0";
 
   private static int [] zzUnpackAction() {
-    int [] result = new int[30];
+    int [] result = new int[41];
     int offset = 0;
     offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
     return result;
@@ -96,13 +95,15 @@
   private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
 
   private static final String ZZ_ROWMAP_PACKED_0 =
-    "\0\0\0\21\0\42\0\63\0\104\0\125\0\146\0\167"+
-    "\0\210\0\167\0\167\0\167\0\231\0\252\0\273\0\167"+
-    "\0\210\0\314\0\335\0\356\0\167\0\167\0\167\0\167"+
-    "\0\167\0\167\0\167\0\167\0\167\0\167";
+    "\0\0\0\25\0\52\0\77\0\124\0\151\0\176\0\223"+
+    "\0\250\0\275\0\176\0\176\0\176\0\322\0\347\0\374"+
+    "\0\176\0\176\0\223\0\250\0\275\0\u0111\0\u0126\0\u013b"+
+    "\0\176\0\176\0\176\0\176\0\176\0\u0150\0\250\0\176"+
+    "\0\176\0\176\0\176\0\u0165\0\u017a\0\u018f\0\u0165\0\u01a4"+
+    "\0\u01b9";
 
   private static int [] zzUnpackRowMap() {
-    int [] result = new int[30];
+    int [] result = new int[41];
     int offset = 0;
     offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
     return result;
@@ -125,25 +126,29 @@
   private static final int [] ZZ_TRANS = zzUnpackTrans();
 
   private static final String ZZ_TRANS_PACKED_0 =
-    "\1\10\1\11\1\10\1\0\1\10\1\11\1\12\1\11"+
-    "\1\10\1\11\6\10\1\13\1\10\1\11\1\10\1\14"+
-    "\1\10\1\11\1\12\1\15\1\10\1\16\1\10\1\17"+
-    "\4\10\1\13\1\10\1\11\1\10\1\20\1\10\1\11"+
-    "\1\12\1\15\1\10\1\16\1\10\1\17\4\10\2\13"+
-    "\1\21\1\13\1\20\1\10\1\11\1\12\1\22\1\13"+
-    "\1\23\1\13\1\24\1\25\1\26\1\27\1\30\1\13"+
-    "\1\10\1\11\1\10\1\20\1\10\1\11\1\12\1\15"+
-    "\1\10\1\16\1\10\1\17\3\10\1\31\1\13\1\10"+
-    "\1\11\1\10\1\32\1\10\1\11\1\12\1\15\1\10"+
-    "\1\16\1\10\1\17\4\10\2\13\1\21\1\13\1\32"+
-    "\1\10\1\11\1\12\1\22\1\13\1\23\1\13\1\24"+
-    "\1\25\1\26\1\27\1\30\1\13\23\0\1\10\20\0"+
-    "\1\10\5\0\1\33\1\0\1\34\10\0\1\10\7\0"+
-    "\1\35\20\0\1\36\10\0\1\10\5\0\1\33\1\0"+
-    "\1\27\10\0\1\10\7\0\1\25\20\0\1\26\6\0";
+    "\1\7\1\10\1\7\1\0\1\7\1\10\1\11\2\7"+
+    "\1\12\1\13\1\10\1\7\1\10\6\7\1\14\1\7"+
+    "\1\10\1\7\1\15\1\7\1\10\1\11\2\7\1\12"+
+    "\1\13\1\16\1\7\1\17\1\7\1\20\4\7\1\14"+
+    "\1\7\1\10\1\7\1\21\1\7\1\10\1\11\2\7"+
+    "\1\12\1\13\1\16\1\7\1\17\1\7\1\20\4\7"+
+    "\1\14\1\7\1\10\1\7\1\22\1\7\1\10\1\11"+
+    "\2\7\1\12\1\13\1\16\1\7\1\17\1\7\1\20"+
+    "\4\7\2\14\1\23\1\14\1\21\1\7\1\10\1\24"+
+    "\2\14\1\25\1\13\1\26\1\14\1\27\1\14\1\30"+
+    "\1\31\1\32\1\33\1\34\1\14\1\7\1\10\1\7"+
+    "\1\21\1\7\1\10\1\11\2\7\1\12\1\13\1\16"+
+    "\1\7\1\17\1\7\1\20\3\7\1\35\1\14\27\0"+
+    "\1\7\22\0\6\36\2\0\15\36\2\0\1\7\3\0"+
+    "\1\37\20\0\1\7\11\0\1\40\1\0\1\41\10\0"+
+    "\1\7\13\0\1\42\24\0\1\43\10\0\1\7\11\0"+
+    "\1\40\1\0\1\33\10\0\1\7\13\0\1\31\24\0"+
+    "\1\32\6\0\6\36\1\0\1\44\1\45\14\36\6\0"+
+    "\1\46\16\0\6\36\1\0\1\47\1\45\14\36\10\0"+
+    "\1\50\14\0\6\51\2\0\23\51\1\0\1\7\15\51";
 
   private static int [] zzUnpackTrans() {
-    int [] result = new int[255];
+    int [] result = new int[462];
     int offset = 0;
     offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
     return result;
@@ -181,10 +186,11 @@
   private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
 
   private static final String ZZ_ATTRIBUTE_PACKED_0 =
-    "\7\0\1\11\1\1\3\11\3\1\1\11\4\1\12\11";
+    "\6\0\1\11\3\1\3\11\3\1\2\11\6\1\5\11"+
+    "\2\0\4\11\3\0\1\1\2\0";
 
   private static int [] zzUnpackAttribute() {
-    int [] result = new int[30];
+    int [] result = new int[41];
     int offset = 0;
     offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
     return result;
@@ -252,8 +258,6 @@
   private boolean zzEOFDone;
 
   /* user code: */
-	public static final int CELEX = DICT_ASCII;
-
 	private String original = "";
 	private String normalized = "";
 	private int problem = 0;
@@ -296,7 +300,7 @@
     char [] map = new char[0x10000];
     int i = 0;  /* index in packed string  */
     int j = 0;  /* index in unpacked array */
-    while (i < 88) {
+    while (i < 98) {
       int  count = packed.charAt(i++);
       char value = packed.charAt(i++);
       do map[j++] = value; while (--count > 0);
@@ -563,7 +567,7 @@
       zzMarkedPos = zzMarkedPosL;
 
       switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 10: 
+        case 11: 
           { add("sz");
           }
         case 16: break;
@@ -571,7 +575,7 @@
           { problem = 1; add(yytext());
           }
         case 17: break;
-        case 6: 
+        case 7: 
           { add("ae");
           }
         case 18: break;
@@ -590,11 +594,11 @@
           { add("ΓΌ");
           }
         case 21: break;
-        case 8: 
+        case 9: 
           { add("ue");
           }
         case 22: break;
-        case 11: 
+        case 6: 
           { switch (problem) {
 			case 1: return original;
 			default: return normalized.replaceAll(LB, "").toLowerCase();
@@ -613,11 +617,11 @@
           { add(yytext());
           }
         case 26: break;
-        case 9: 
+        case 10: 
           { add("ss");
           }
         case 27: break;
-        case 7: 
+        case 8: 
           { add("oe");
           }
         case 28: break;