annotate software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/transcode/Unicode2Buckwalter.lex @ 19:4a3641ae14d2

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 09 Nov 2011 15:32:05 +0100
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
19
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
1 package de.mpg.mpiwg.berlin.mpdl.lt.general;
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
2
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
3 %%
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
4 %{
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
5 /*
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
6 * Betacode to Unicode conversion
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
7 */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
8
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
9 %}
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
10
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
11 %class Unicode2BuckwalterLex
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
12 %public
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
13 %type java.lang.String
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
14 %unicode
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
15 %%
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
16
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
17
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
18 "<"[^>]+">" { return yytext(); }
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
19
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
20 "\u0621" { return "'"; } /* Hamza */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
21 "\u0622" { return "|"; } /* ALEF WITH MADDA ABOVE from AraMorph */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
22 "\u0623" { return ">"; } /* Hamza */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
23 "\u0624" { return "&"; } /* Hamza */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
24 "\u0625" { return "<"; } /* Alif + HamzaBelow */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
25 "\u0626" { return "}"; } /* Ya + HamzaAbove */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
26 "\u0627" { return "A"; } /* Alif */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
27 "\u0628" { return "b"; } /* Ba */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
28 "\u0629" { return "p"; } /* TaMarbuta */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
29 "\u062A" { return "t"; } /* Ta */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
30 "\u062B" { return "v"; } /* Tha */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
31 "\u062C" { return "j"; } /* Jeem */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
32 "\u062D" { return "H"; } /* HHa */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
33 "\u062E" { return "x"; } /* Kha */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
34 "\u062F" { return "d"; } /* Dal */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
35 "\u0630" { return "*"; } /* Thal */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
36 "\u0631" { return "r"; } /* Ra */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
37 "\u0632" { return "z"; } /* Zain */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
38 "\u0633" { return "s"; } /* Seen */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
39 "\u0634" { return "$"; } /* Sheen */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
40 "\u0635" { return "S"; } /* Sad */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
41 "\u0636" { return "D"; } /* DDad */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
42 "\u0637" { return "T"; } /* TTa */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
43 "\u0638" { return "Z"; } /* DTha */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
44 "\u0639" { return "E"; } /* Ain */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
45 "\u063A" { return "g"; } /* Ghain */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
46
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
47 "\u0640" { return "_"; } /* Tatweel */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
48 "\u0641" { return "f"; } /* Fa */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
49 "\u0642" { return "q"; } /* Qaf */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
50 "\u0643" { return "k"; } /* Kaf */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
51 "\u0644" { return "l"; } /* Lam */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
52 "\u0645" { return "m"; } /* Meem */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
53 "\u0646" { return "n"; } /* Noon */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
54 "\u0647" { return "h"; } /* Ha */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
55 "\u0648" { return "w"; } /* Waw */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
56 "\u0649" { return "Y"; } /* AlifMaksura */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
57 "\u064A" { return "y"; } /* Ya */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
58 "\u064B" { return "F"; } /* Fathatan */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
59 "\u064C" { return "N"; } /* Dammatan */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
60 "\u064D" { return "K"; } /* Kasratan */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
61 "\u064E" { return "a"; } /* Fatha */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
62 "\u064F" { return "u"; } /* Damma */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
63 "\u0650" { return "i"; } /* Kasra */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
64 "\u0651" { return "~"; } /* Shadda */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
65 "\u0652" { return "o"; } /* Sukun */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
66 "\u0653" { return "^"; } /* Maddah */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
67 "\u0654" { return "#"; } /* HamzaAbove */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
68
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
69 "\u0670" { return "`"; } /* AlifKhanjareeya */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
70 "\u0671" { return "{"; } /* Alif + HamzatWasl */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
71
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
72 "\u067E" { return "P"; } /* PEH from AraMorph */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
73 "\u0686" { return "J"; } /* TCHEH from AraMorph */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
74 "\u06A4" { return "V"; } /* VEH from AraMorph */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
75 "\u06AF" { return "G"; } /* GAF from AraMorph */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
76 "\u0698" { return "R"; } /* JEH from AraMorph */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
77 "\u061F" { return "?"; } /* QUESTION MARK from AraMorph */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
78
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
79 "\u06DC" { return ":"; } /* SmallHighSeen */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
80 "\u06DF" { return "@"; } /* SmallHighRoundedZero */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
81
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
82 "\u06E2" { return "["; } /* SmallHighMeemIsolatedForm */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
83 "\u06E3" { return ";"; } /* SmallLowSeen */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
84 "\u06E5" { return ","; } /* SmallWaw */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
85 "\u06E6" { return "."; } /* SmallYa */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
86 "\u06E8" { return "!"; } /* SmallHighNoon */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
87 "\u06EA" { return "-"; } /* EmptyCentreLowStop */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
88 "\u06EB" { return "+"; } /* EmptyCentreHighStop */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
89 "\u06EC" { return "%"; } /* RoundedHighStopWithFilledCentre */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
90 "\u06ED" { return "]"; } /* SmallLowMeem */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
91
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
92 [\&_]"vert;" { return "|"; }
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
93 [\&_]"lpar;" { return "("; }
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
94 [\&_]"rpar;" { return ")"; }
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
95 [\_\&]"lt;" { return "&lt;"; }
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
96 [\_\&]"gt;" { return "&gt;"; }
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
97 "&#039;" { return "'"; }
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
98
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
99 "&"[a-zA-Z]+";" { return yytext(); }
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
100
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
101 . { return yytext(); }
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
102 \n { return yytext(); }
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
103
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
104 /* make problemes */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
105 /* "\u06E0" { return "\\""; } SmallHighUprightRectangularZero */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
106
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
107
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
108 /* double entries */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
109 /* "\u060C" { return ","; } COMMA from AraMorph */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
110 /* "\u061B" { return ";"; } SEMICOLON from AraMorph */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
111
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
112 /* not in buckwalter contained */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
113 /* \u0679 : ARABIC LETTER TTEH */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
114 /* \u0688 : ARABIC LETTER DDAL */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
115 /* \u06A9 : ARABIC LETTER KEHEH */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
116 /* \u0691 : ARABIC LETTER RREH */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
117 /* \u06BA : ARABIC LETTER NOON GHUNNA */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
118 /* \u06BE : ARABIC LETTER HEH DOACHASHMEE */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
119 /* \u06C1 : ARABIC LETTER HEH GOAL */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
120 /* \u06D2 : ARABIC LETTER YEH BARREE */
4a3641ae14d2 Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
121