diff software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexEN.java @ 14:5df60f24e997

diverse Fehlerbehebungen
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Mon, 29 Aug 2011 17:40:02 +0200
parents 1ec29fdd0db8
children
line wrap: on
line diff
--- a/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexEN.java	Tue Apr 19 16:51:59 2011 +0200
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexEN.java	Mon Aug 29 17:40:02 2011 +0200
@@ -1,12 +1,11 @@
-/* The following code was generated by JFlex 1.4.3 on 22.02.11 12:03 */
+/* The following code was generated by JFlex 1.4.3 on 21.07.11 11:22 */
 
 /*
  * Normalization rules for English text
  * [this is a JFlex specification]
  *
  * Wolfgang Schmidle 
- * version 0.96
- * 2011-02-21
+ * version 2011-07-12
  *
  */
 
@@ -16,7 +15,7 @@
 /**
  * This class is a scanner generated by 
  * <a href="http://www.jflex.de/">JFlex</a> 1.4.3
- * on 22.02.11 12:03 from the specification file
+ * on 21.07.11 11:22 from the specification file
  * <tt>MpdlNormalizerLexEN.lex</tt>
  */
 public class MpdlNormalizerLexEN {
@@ -40,14 +39,16 @@
    * l is of the form l = 2*k, k a non negative integer
    */
   private static final int ZZ_LEXSTATE[] = { 
-     0,  0,  1,  1,  2,  2,  1, 1
+     0,  0,  1,  1,  2,  2,  3, 3
   };
 
   /** 
    * Translates characters to character classes
    */
   private static final String ZZ_CMAP_PACKED = 
-    "\12\0\1\1\65\0\1\3\u013e\0\1\2\ufe80\0";
+    "\12\0\1\3\25\0\1\2\14\0\1\1\2\0\1\1\17\0\1\5"+
+    "\40\0\1\1\2\0\1\1\20\0\1\1\5\0\1\1\1\0\1\1"+
+    "\u0101\0\1\4\ufe80\0";
 
   /** 
    * Translates characters to character classes
@@ -60,10 +61,10 @@
   private static final int [] ZZ_ACTION = zzUnpackAction();
 
   private static final String ZZ_ACTION_PACKED_0 =
-    "\3\0\1\1\1\2\1\3\1\4\1\5";
+    "\4\0\2\1\1\2\1\3\1\4\1\5\1\6";
 
   private static int [] zzUnpackAction() {
-    int [] result = new int[8];
+    int [] result = new int[11];
     int offset = 0;
     offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
     return result;
@@ -88,10 +89,11 @@
   private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
 
   private static final String ZZ_ROWMAP_PACKED_0 =
-    "\0\0\0\4\0\10\0\14\0\14\0\14\0\14\0\14";
+    "\0\0\0\6\0\14\0\22\0\30\0\36\0\30\0\30"+
+    "\0\30\0\30\0\30";
 
   private static int [] zzUnpackRowMap() {
-    int [] result = new int[8];
+    int [] result = new int[11];
     int offset = 0;
     offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
     return result;
@@ -114,11 +116,13 @@
   private static final int [] ZZ_TRANS = zzUnpackTrans();
 
   private static final String ZZ_TRANS_PACKED_0 =
-    "\1\4\1\0\1\4\1\5\1\4\1\6\1\7\1\5"+
-    "\1\4\1\10\1\7\1\5\4\0";
+    "\1\5\1\6\1\5\1\0\1\5\1\7\1\5\1\6"+
+    "\1\5\1\10\1\11\1\7\1\5\1\6\1\5\1\12"+
+    "\1\11\1\7\1\5\1\6\1\5\1\13\1\11\1\7"+
+    "\10\0\1\5\3\0";
 
   private static int [] zzUnpackTrans() {
-    int [] result = new int[16];
+    int [] result = new int[36];
     int offset = 0;
     offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
     return result;
@@ -156,10 +160,10 @@
   private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
 
   private static final String ZZ_ATTRIBUTE_PACKED_0 =
-    "\3\0\5\11";
+    "\4\0\1\11\1\1\5\11";
 
   private static int [] zzUnpackAttribute() {
-    int [] result = new int[8];
+    int [] result = new int[11];
     int offset = 0;
     offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
     return result;
@@ -236,6 +240,8 @@
 		normalized += norm;
 	}
 
+	private static final String LB = "[\u002d\u00ad] ";
+
 
   /**
    * Creates a new scanner
@@ -267,7 +273,7 @@
     char [] map = new char[0x10000];
     int i = 0;  /* index in packed string  */
     int j = 0;  /* index in unpacked array */
-    while (i < 14) {
+    while (i < 46) {
       int  count = packed.charAt(i++);
       char value = packed.charAt(i++);
       do map[j++] = value; while (--count > 0);
@@ -537,29 +543,36 @@
         case 5: 
           { switch (problem) {
 			case 1: return "";
-			default: return normalized;
+			default: return normalized.replaceAll(LB, "");
 		}
           }
-        case 6: break;
+        case 7: break;
         case 2: 
           { problem = 1; add(yytext());
           }
-        case 7: break;
+        case 8: break;
         case 4: 
           { add("s");
           }
-        case 8: break;
+        case 9: break;
         case 3: 
           { switch (problem) {
 			case 1: return original;
 			default: return normalized;
 		}
           }
-        case 9: break;
+        case 10: break;
+        case 6: 
+          { switch (problem) {
+			case 1: return original;
+			default: return normalized.replaceAll(LB, "").toLowerCase();
+		}
+          }
+        case 11: break;
         case 1: 
           { add(yytext());
           }
-        case 10: break;
+        case 12: break;
         default: 
           if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
             zzAtEOF = true;