Mercurial > hg > mpdl-group
comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Unicode2Buckwalter.lex @ 6:2396a569e446
new functions: externalObjects, normalizer, Unicode2Betacode
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 08 Feb 2011 14:54:09 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:94305c504178 | 6:2396a569e446 |
---|---|
1 package de.mpg.mpiwg.berlin.mpdl.lt.general; | |
2 | |
3 %% | |
4 %{ | |
5 /* | |
6 * Betacode to Unicode conversion | |
7 */ | |
8 | |
9 %} | |
10 | |
11 %class Unicode2BuckwalterLex | |
12 %public | |
13 %type java.lang.String | |
14 %unicode | |
15 %% | |
16 | |
17 | |
18 "<"[^>]+">" { return yytext(); } | |
19 | |
20 "\u0621" { return "'"; } /* Hamza */ | |
21 "\u0622" { return "|"; } /* ALEF WITH MADDA ABOVE from AraMorph */ | |
22 "\u0623" { return ">"; } /* Hamza */ | |
23 "\u0624" { return "&"; } /* Hamza */ | |
24 "\u0625" { return "<"; } /* Alif + HamzaBelow */ | |
25 "\u0626" { return "}"; } /* Ya + HamzaAbove */ | |
26 "\u0627" { return "A"; } /* Alif */ | |
27 "\u0628" { return "b"; } /* Ba */ | |
28 "\u0629" { return "p"; } /* TaMarbuta */ | |
29 "\u062A" { return "t"; } /* Ta */ | |
30 "\u062B" { return "v"; } /* Tha */ | |
31 "\u062C" { return "j"; } /* Jeem */ | |
32 "\u062D" { return "H"; } /* HHa */ | |
33 "\u062E" { return "x"; } /* Kha */ | |
34 "\u062F" { return "d"; } /* Dal */ | |
35 "\u0630" { return "*"; } /* Thal */ | |
36 "\u0631" { return "r"; } /* Ra */ | |
37 "\u0632" { return "z"; } /* Zain */ | |
38 "\u0633" { return "s"; } /* Seen */ | |
39 "\u0634" { return "$"; } /* Sheen */ | |
40 "\u0635" { return "S"; } /* Sad */ | |
41 "\u0636" { return "D"; } /* DDad */ | |
42 "\u0637" { return "T"; } /* TTa */ | |
43 "\u0638" { return "Z"; } /* DTha */ | |
44 "\u0639" { return "E"; } /* Ain */ | |
45 "\u063A" { return "g"; } /* Ghain */ | |
46 | |
47 "\u0640" { return "_"; } /* Tatweel */ | |
48 "\u0641" { return "f"; } /* Fa */ | |
49 "\u0642" { return "q"; } /* Qaf */ | |
50 "\u0643" { return "k"; } /* Kaf */ | |
51 "\u0644" { return "l"; } /* Lam */ | |
52 "\u0645" { return "m"; } /* Meem */ | |
53 "\u0646" { return "n"; } /* Noon */ | |
54 "\u0647" { return "h"; } /* Ha */ | |
55 "\u0648" { return "w"; } /* Waw */ | |
56 "\u0649" { return "Y"; } /* AlifMaksura */ | |
57 "\u064A" { return "y"; } /* Ya */ | |
58 "\u064B" { return "F"; } /* Fathatan */ | |
59 "\u064C" { return "N"; } /* Dammatan */ | |
60 "\u064D" { return "K"; } /* Kasratan */ | |
61 "\u064E" { return "a"; } /* Fatha */ | |
62 "\u064F" { return "u"; } /* Damma */ | |
63 "\u0650" { return "i"; } /* Kasra */ | |
64 "\u0651" { return "~"; } /* Shadda */ | |
65 "\u0652" { return "o"; } /* Sukun */ | |
66 "\u0653" { return "^"; } /* Maddah */ | |
67 "\u0654" { return "#"; } /* HamzaAbove */ | |
68 | |
69 "\u0670" { return "`"; } /* AlifKhanjareeya */ | |
70 "\u0671" { return "{"; } /* Alif + HamzatWasl */ | |
71 | |
72 "\u067E" { return "P"; } /* PEH from AraMorph */ | |
73 "\u0686" { return "J"; } /* TCHEH from AraMorph */ | |
74 "\u06A4" { return "V"; } /* VEH from AraMorph */ | |
75 "\u06AF" { return "G"; } /* GAF from AraMorph */ | |
76 "\u0698" { return "R"; } /* JEH from AraMorph */ | |
77 "\u061F" { return "?"; } /* QUESTION MARK from AraMorph */ | |
78 | |
79 "\u06DC" { return ":"; } /* SmallHighSeen */ | |
80 "\u06DF" { return "@"; } /* SmallHighRoundedZero */ | |
81 | |
82 "\u06E2" { return "["; } /* SmallHighMeemIsolatedForm */ | |
83 "\u06E3" { return ";"; } /* SmallLowSeen */ | |
84 "\u06E5" { return ","; } /* SmallWaw */ | |
85 "\u06E6" { return "."; } /* SmallYa */ | |
86 "\u06E8" { return "!"; } /* SmallHighNoon */ | |
87 "\u06EA" { return "-"; } /* EmptyCentreLowStop */ | |
88 "\u06EB" { return "+"; } /* EmptyCentreHighStop */ | |
89 "\u06EC" { return "%"; } /* RoundedHighStopWithFilledCentre */ | |
90 "\u06ED" { return "]"; } /* SmallLowMeem */ | |
91 | |
92 [\&_]"vert;" { return "|"; } | |
93 [\&_]"lpar;" { return "("; } | |
94 [\&_]"rpar;" { return ")"; } | |
95 [\_\&]"lt;" { return "<"; } | |
96 [\_\&]"gt;" { return ">"; } | |
97 "'" { return "'"; } | |
98 | |
99 "&"[a-zA-Z]+";" { return yytext(); } | |
100 | |
101 . { return yytext(); } | |
102 \n { return yytext(); } | |
103 | |
104 /* make problemes */ | |
105 /* "\u06E0" { return "\\""; } SmallHighUprightRectangularZero */ | |
106 | |
107 | |
108 /* double entries */ | |
109 /* "\u060C" { return ","; } COMMA from AraMorph */ | |
110 /* "\u061B" { return ";"; } SEMICOLON from AraMorph */ | |
111 | |
112 /* not in buckwalter contained */ | |
113 /* \u0679 : ARABIC LETTER TTEH */ | |
114 /* \u0688 : ARABIC LETTER DDAL */ | |
115 /* \u06A9 : ARABIC LETTER KEHEH */ | |
116 /* \u0691 : ARABIC LETTER RREH */ | |
117 /* \u06BA : ARABIC LETTER NOON GHUNNA */ | |
118 /* \u06BE : ARABIC LETTER HEH DOACHASHMEE */ | |
119 /* \u06C1 : ARABIC LETTER HEH GOAL */ | |
120 /* \u06D2 : ARABIC LETTER YEH BARREE */ | |
121 |