Mercurial > hg > mpdl-group
diff software/mpdl-services/mpiwg-mpdl-lt/bin/de/mpg/mpiwg/berlin/mpdl/lt/text/transcode/Unicode2Buckwalter.lex @ 23:e845310098ba
diverse Korrekturen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 27 Nov 2012 12:35:19 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services/mpiwg-mpdl-lt/bin/de/mpg/mpiwg/berlin/mpdl/lt/text/transcode/Unicode2Buckwalter.lex Tue Nov 27 12:35:19 2012 +0100 @@ -0,0 +1,121 @@ +package de.mpg.mpiwg.berlin.mpdl.lt.general; + +%% +%{ + /* + * Betacode to Unicode conversion + */ + +%} + +%class Unicode2BuckwalterLex +%public +%type java.lang.String +%unicode +%% + + +"<"[^>]+">" { return yytext(); } + +"\u0621" { return "'"; } /* Hamza */ +"\u0622" { return "|"; } /* ALEF WITH MADDA ABOVE from AraMorph */ +"\u0623" { return ">"; } /* Hamza */ +"\u0624" { return "&"; } /* Hamza */ +"\u0625" { return "<"; } /* Alif + HamzaBelow */ +"\u0626" { return "}"; } /* Ya + HamzaAbove */ +"\u0627" { return "A"; } /* Alif */ +"\u0628" { return "b"; } /* Ba */ +"\u0629" { return "p"; } /* TaMarbuta */ +"\u062A" { return "t"; } /* Ta */ +"\u062B" { return "v"; } /* Tha */ +"\u062C" { return "j"; } /* Jeem */ +"\u062D" { return "H"; } /* HHa */ +"\u062E" { return "x"; } /* Kha */ +"\u062F" { return "d"; } /* Dal */ +"\u0630" { return "*"; } /* Thal */ +"\u0631" { return "r"; } /* Ra */ +"\u0632" { return "z"; } /* Zain */ +"\u0633" { return "s"; } /* Seen */ +"\u0634" { return "$"; } /* Sheen */ +"\u0635" { return "S"; } /* Sad */ +"\u0636" { return "D"; } /* DDad */ +"\u0637" { return "T"; } /* TTa */ +"\u0638" { return "Z"; } /* DTha */ +"\u0639" { return "E"; } /* Ain */ +"\u063A" { return "g"; } /* Ghain */ + +"\u0640" { return "_"; } /* Tatweel */ +"\u0641" { return "f"; } /* Fa */ +"\u0642" { return "q"; } /* Qaf */ +"\u0643" { return "k"; } /* Kaf */ +"\u0644" { return "l"; } /* Lam */ +"\u0645" { return "m"; } /* Meem */ +"\u0646" { return "n"; } /* Noon */ +"\u0647" { return "h"; } /* Ha */ +"\u0648" { return "w"; } /* Waw */ +"\u0649" { return "Y"; } /* AlifMaksura */ +"\u064A" { return "y"; } /* Ya */ +"\u064B" { return "F"; } /* Fathatan */ +"\u064C" { return "N"; } /* Dammatan */ +"\u064D" { return "K"; } /* Kasratan */ +"\u064E" { return "a"; } /* Fatha */ +"\u064F" { return "u"; } /* Damma */ +"\u0650" { return "i"; } /* Kasra */ +"\u0651" { return "~"; } /* Shadda */ +"\u0652" { return "o"; } /* Sukun */ +"\u0653" { return "^"; } /* Maddah */ +"\u0654" { return "#"; } /* HamzaAbove */ + +"\u0670" { return "`"; } /* AlifKhanjareeya */ +"\u0671" { return "{"; } /* Alif + HamzatWasl */ + +"\u067E" { return "P"; } /* PEH from AraMorph */ +"\u0686" { return "J"; } /* TCHEH from AraMorph */ +"\u06A4" { return "V"; } /* VEH from AraMorph */ +"\u06AF" { return "G"; } /* GAF from AraMorph */ +"\u0698" { return "R"; } /* JEH from AraMorph */ +"\u061F" { return "?"; } /* QUESTION MARK from AraMorph */ + +"\u06DC" { return ":"; } /* SmallHighSeen */ +"\u06DF" { return "@"; } /* SmallHighRoundedZero */ + +"\u06E2" { return "["; } /* SmallHighMeemIsolatedForm */ +"\u06E3" { return ";"; } /* SmallLowSeen */ +"\u06E5" { return ","; } /* SmallWaw */ +"\u06E6" { return "."; } /* SmallYa */ +"\u06E8" { return "!"; } /* SmallHighNoon */ +"\u06EA" { return "-"; } /* EmptyCentreLowStop */ +"\u06EB" { return "+"; } /* EmptyCentreHighStop */ +"\u06EC" { return "%"; } /* RoundedHighStopWithFilledCentre */ +"\u06ED" { return "]"; } /* SmallLowMeem */ + +[\&_]"vert;" { return "|"; } +[\&_]"lpar;" { return "("; } +[\&_]"rpar;" { return ")"; } +[\_\&]"lt;" { return "<"; } +[\_\&]"gt;" { return ">"; } +"'" { return "'"; } + +"&"[a-zA-Z]+";" { return yytext(); } + +. { return yytext(); } +\n { return yytext(); } + +/* make problemes */ +/* "\u06E0" { return "\\""; } SmallHighUprightRectangularZero */ + + +/* double entries */ +/* "\u060C" { return ","; } COMMA from AraMorph */ +/* "\u061B" { return ";"; } SEMICOLON from AraMorph */ + +/* not in buckwalter contained */ +/* \u0679 : ARABIC LETTER TTEH */ +/* \u0688 : ARABIC LETTER DDAL */ +/* \u06A9 : ARABIC LETTER KEHEH */ +/* \u0691 : ARABIC LETTER RREH */ +/* \u06BA : ARABIC LETTER NOON GHUNNA */ +/* \u06BE : ARABIC LETTER HEH DOACHASHMEE */ +/* \u06C1 : ARABIC LETTER HEH GOAL */ +/* \u06D2 : ARABIC LETTER YEH BARREE */ +