Mercurial > hg > mpdl-group
comparison software/mpdl-services/mpiwg-mpdl-lt/bin/de/mpg/mpiwg/berlin/mpdl/lt/text/transcode/Buckwalter2Unicode.lex @ 23:e845310098ba
diverse Korrekturen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 27 Nov 2012 12:35:19 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
22:6a45a982c333 | 23:e845310098ba |
---|---|
1 package de.mpg.mpiwg.berlin.mpdl.lt.general; | |
2 | |
3 %% | |
4 %{ | |
5 /* | |
6 * Betacode to Unicode conversion | |
7 */ | |
8 | |
9 %} | |
10 | |
11 %class Buckwalter2UnicodeLex | |
12 %public | |
13 %type java.lang.String | |
14 %unicode | |
15 %% | |
16 | |
17 | |
18 "<"[^>]+">" { return yytext(); } | |
19 | |
20 "'" { return "\u0621"; } /* Hamza */ | |
21 "|" { return "\u0622"; } /* ALEF WITH MADDA ABOVE from AraMorph */ | |
22 ">" { return "\u0623"; } /* Hamza */ | |
23 "&" { return "\u0624"; } /* Hamza */ | |
24 "<" { return "\u0625"; } /* Alif + HamzaBelow */ | |
25 "}" { return "\u0626"; } /* Ya + HamzaAbove */ | |
26 "A" { return "\u0627"; } /* Alif */ | |
27 "b" { return "\u0628"; } /* Ba */ | |
28 "p" { return "\u0629"; } /* TaMarbuta */ | |
29 "t" { return "\u062A"; } /* Ta */ | |
30 "v" { return "\u062B"; } /* Tha */ | |
31 "j" { return "\u062C"; } /* Jeem */ | |
32 "H" { return "\u062D"; } /* HHa */ | |
33 "x" { return "\u062E"; } /* Kha */ | |
34 "d" { return "\u062F"; } /* Dal */ | |
35 "*" { return "\u0630"; } /* Thal */ | |
36 "r" { return "\u0631"; } /* Ra */ | |
37 "z" { return "\u0632"; } /* Zain */ | |
38 "s" { return "\u0633"; } /* Seen */ | |
39 "$" { return "\u0634"; } /* Sheen */ | |
40 "S" { return "\u0635"; } /* Sad */ | |
41 "D" { return "\u0636"; } /* DDad */ | |
42 "T" { return "\u0637"; } /* TTa */ | |
43 "Z" { return "\u0638"; } /* DTha */ | |
44 "E" { return "\u0639"; } /* Ain */ | |
45 "g" { return "\u063A"; } /* Ghain */ | |
46 | |
47 "_" { return "\u0640"; } /* Tatweel */ | |
48 "f" { return "\u0641"; } /* Fa */ | |
49 "q" { return "\u0642"; } /* Qaf */ | |
50 "k" { return "\u0643"; } /* Kaf */ | |
51 "l" { return "\u0644"; } /* Lam */ | |
52 "m" { return "\u0645"; } /* Meem */ | |
53 "n" { return "\u0646"; } /* Noon */ | |
54 "h" { return "\u0647"; } /* Ha */ | |
55 "w" { return "\u0648"; } /* Waw */ | |
56 "Y" { return "\u0649"; } /* AlifMaksura */ | |
57 "y" { return "\u064A"; } /* Ya */ | |
58 "F" { return "\u064B"; } /* Fathatan */ | |
59 "N" { return "\u064C"; } /* Dammatan */ | |
60 "K" { return "\u064D"; } /* Kasratan */ | |
61 "a" { return "\u064E"; } /* Fatha */ | |
62 "u" { return "\u064F"; } /* Damma */ | |
63 "i" { return "\u0650"; } /* Kasra */ | |
64 "~" { return "\u0651"; } /* Shadda */ | |
65 "o" { return "\u0652"; } /* Sukun */ | |
66 "^" { return "\u0653"; } /* Maddah */ | |
67 "#" { return "\u0654"; } /* HamzaAbove */ | |
68 | |
69 "`" { return "\u0670"; } /* AlifKhanjareeya */ | |
70 "{" { return "\u0671"; } /* Alif + HamzatWasl */ | |
71 | |
72 "P" { return "\u067E"; } /* PEH from AraMorph */ | |
73 "J" { return "\u0686"; } /* TCHEH from AraMorph */ | |
74 "V" { return "\u06A4"; } /* VEH from AraMorph */ | |
75 "G" { return "\u06AF"; } /* GAF from AraMorph */ | |
76 "R" { return "\u0698"; } /* JEH from AraMorph */ | |
77 "?" { return "\u061F"; } /* QUESTION MARK from AraMorph */ | |
78 | |
79 ":" { return "\u06DC"; } /* SmallHighSeen */ | |
80 "@" { return "\u06DF"; } /* SmallHighRoundedZero */ | |
81 | |
82 "[" { return "\u06E2"; } /* SmallHighMeemIsolatedForm */ | |
83 ";" { return "\u06E3"; } /* SmallLowSeen */ | |
84 "," { return "\u06E5"; } /* SmallWaw */ | |
85 "." { return "\u06E6"; } /* SmallYa */ | |
86 "!" { return "\u06E8"; } /* SmallHighNoon */ | |
87 "-" { return "\u06EA"; } /* EmptyCentreLowStop */ | |
88 "+" { return "\u06EB"; } /* EmptyCentreHighStop */ | |
89 "%" { return "\u06EC"; } /* RoundedHighStopWithFilledCentre */ | |
90 "]" { return "\u06ED"; } /* SmallLowMeem */ | |
91 | |
92 [\&_]"vert;" { return "|"; } | |
93 [\&_]"lpar;" { return "("; } | |
94 [\&_]"rpar;" { return ")"; } | |
95 [\_\&]"lt;" { return "<"; } | |
96 [\_\&]"gt;" { return ">"; } | |
97 "'" { return "'"; } | |
98 | |
99 "&"[a-zA-Z]+";" { return yytext(); } | |
100 | |
101 . { return yytext(); } | |
102 \n { return yytext(); } | |
103 | |
104 /* make problemes */ | |
105 /* "\\"" { return "\u06E0"; } SmallHighUprightRectangularZero */ | |
106 | |
107 | |
108 /* double entries */ | |
109 /* "," { return "\u060C"; } COMMA from AraMorph */ | |
110 /* ";" { return "\u061B"; } SEMICOLON from AraMorph */ | |
111 | |
112 /* not in buckwalter contained */ | |
113 /* \u0679 : ARABIC LETTER TTEH */ | |
114 /* \u0688 : ARABIC LETTER DDAL */ | |
115 /* \u06A9 : ARABIC LETTER KEHEH */ | |
116 /* \u0691 : ARABIC LETTER RREH */ | |
117 /* \u06BA : ARABIC LETTER NOON GHUNNA */ | |
118 /* \u06BE : ARABIC LETTER HEH DOACHASHMEE */ | |
119 /* \u06C1 : ARABIC LETTER HEH GOAL */ | |
120 /* \u06D2 : ARABIC LETTER YEH BARREE */ | |
121 |