Mercurial > hg > mpdl-group
view software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/transcode/Unicode2Buckwalter.lex @ 19:4a3641ae14d2
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 09 Nov 2011 15:32:05 +0100 |
parents | |
children |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.lt.general; %% %{ /* * Betacode to Unicode conversion */ %} %class Unicode2BuckwalterLex %public %type java.lang.String %unicode %% "<"[^>]+">" { return yytext(); } "\u0621" { return "'"; } /* Hamza */ "\u0622" { return "|"; } /* ALEF WITH MADDA ABOVE from AraMorph */ "\u0623" { return ">"; } /* Hamza */ "\u0624" { return "&"; } /* Hamza */ "\u0625" { return "<"; } /* Alif + HamzaBelow */ "\u0626" { return "}"; } /* Ya + HamzaAbove */ "\u0627" { return "A"; } /* Alif */ "\u0628" { return "b"; } /* Ba */ "\u0629" { return "p"; } /* TaMarbuta */ "\u062A" { return "t"; } /* Ta */ "\u062B" { return "v"; } /* Tha */ "\u062C" { return "j"; } /* Jeem */ "\u062D" { return "H"; } /* HHa */ "\u062E" { return "x"; } /* Kha */ "\u062F" { return "d"; } /* Dal */ "\u0630" { return "*"; } /* Thal */ "\u0631" { return "r"; } /* Ra */ "\u0632" { return "z"; } /* Zain */ "\u0633" { return "s"; } /* Seen */ "\u0634" { return "$"; } /* Sheen */ "\u0635" { return "S"; } /* Sad */ "\u0636" { return "D"; } /* DDad */ "\u0637" { return "T"; } /* TTa */ "\u0638" { return "Z"; } /* DTha */ "\u0639" { return "E"; } /* Ain */ "\u063A" { return "g"; } /* Ghain */ "\u0640" { return "_"; } /* Tatweel */ "\u0641" { return "f"; } /* Fa */ "\u0642" { return "q"; } /* Qaf */ "\u0643" { return "k"; } /* Kaf */ "\u0644" { return "l"; } /* Lam */ "\u0645" { return "m"; } /* Meem */ "\u0646" { return "n"; } /* Noon */ "\u0647" { return "h"; } /* Ha */ "\u0648" { return "w"; } /* Waw */ "\u0649" { return "Y"; } /* AlifMaksura */ "\u064A" { return "y"; } /* Ya */ "\u064B" { return "F"; } /* Fathatan */ "\u064C" { return "N"; } /* Dammatan */ "\u064D" { return "K"; } /* Kasratan */ "\u064E" { return "a"; } /* Fatha */ "\u064F" { return "u"; } /* Damma */ "\u0650" { return "i"; } /* Kasra */ "\u0651" { return "~"; } /* Shadda */ "\u0652" { return "o"; } /* Sukun */ "\u0653" { return "^"; } /* Maddah */ "\u0654" { return "#"; } /* HamzaAbove */ "\u0670" { return "`"; } /* AlifKhanjareeya */ "\u0671" { return "{"; } /* Alif + HamzatWasl */ "\u067E" { return "P"; } /* PEH from AraMorph */ "\u0686" { return "J"; } /* TCHEH from AraMorph */ "\u06A4" { return "V"; } /* VEH from AraMorph */ "\u06AF" { return "G"; } /* GAF from AraMorph */ "\u0698" { return "R"; } /* JEH from AraMorph */ "\u061F" { return "?"; } /* QUESTION MARK from AraMorph */ "\u06DC" { return ":"; } /* SmallHighSeen */ "\u06DF" { return "@"; } /* SmallHighRoundedZero */ "\u06E2" { return "["; } /* SmallHighMeemIsolatedForm */ "\u06E3" { return ";"; } /* SmallLowSeen */ "\u06E5" { return ","; } /* SmallWaw */ "\u06E6" { return "."; } /* SmallYa */ "\u06E8" { return "!"; } /* SmallHighNoon */ "\u06EA" { return "-"; } /* EmptyCentreLowStop */ "\u06EB" { return "+"; } /* EmptyCentreHighStop */ "\u06EC" { return "%"; } /* RoundedHighStopWithFilledCentre */ "\u06ED" { return "]"; } /* SmallLowMeem */ [\&_]"vert;" { return "|"; } [\&_]"lpar;" { return "("; } [\&_]"rpar;" { return ")"; } [\_\&]"lt;" { return "<"; } [\_\&]"gt;" { return ">"; } "'" { return "'"; } "&"[a-zA-Z]+";" { return yytext(); } . { return yytext(); } \n { return yytext(); } /* make problemes */ /* "\u06E0" { return "\\""; } SmallHighUprightRectangularZero */ /* double entries */ /* "\u060C" { return ","; } COMMA from AraMorph */ /* "\u061B" { return ";"; } SEMICOLON from AraMorph */ /* not in buckwalter contained */ /* \u0679 : ARABIC LETTER TTEH */ /* \u0688 : ARABIC LETTER DDAL */ /* \u06A9 : ARABIC LETTER KEHEH */ /* \u0691 : ARABIC LETTER RREH */ /* \u06BA : ARABIC LETTER NOON GHUNNA */ /* \u06BE : ARABIC LETTER HEH DOACHASHMEE */ /* \u06C1 : ARABIC LETTER HEH GOAL */ /* \u06D2 : ARABIC LETTER YEH BARREE */