Mercurial > hg > mpdl-group
view software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/transcode/Buckwalter2Unicode.lex @ 19:4a3641ae14d2
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 09 Nov 2011 15:32:05 +0100 |
parents | |
children |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.lt.general; %% %{ /* * Betacode to Unicode conversion */ %} %class Buckwalter2UnicodeLex %public %type java.lang.String %unicode %% "<"[^>]+">" { return yytext(); } "'" { return "\u0621"; } /* Hamza */ "|" { return "\u0622"; } /* ALEF WITH MADDA ABOVE from AraMorph */ ">" { return "\u0623"; } /* Hamza */ "&" { return "\u0624"; } /* Hamza */ "<" { return "\u0625"; } /* Alif + HamzaBelow */ "}" { return "\u0626"; } /* Ya + HamzaAbove */ "A" { return "\u0627"; } /* Alif */ "b" { return "\u0628"; } /* Ba */ "p" { return "\u0629"; } /* TaMarbuta */ "t" { return "\u062A"; } /* Ta */ "v" { return "\u062B"; } /* Tha */ "j" { return "\u062C"; } /* Jeem */ "H" { return "\u062D"; } /* HHa */ "x" { return "\u062E"; } /* Kha */ "d" { return "\u062F"; } /* Dal */ "*" { return "\u0630"; } /* Thal */ "r" { return "\u0631"; } /* Ra */ "z" { return "\u0632"; } /* Zain */ "s" { return "\u0633"; } /* Seen */ "$" { return "\u0634"; } /* Sheen */ "S" { return "\u0635"; } /* Sad */ "D" { return "\u0636"; } /* DDad */ "T" { return "\u0637"; } /* TTa */ "Z" { return "\u0638"; } /* DTha */ "E" { return "\u0639"; } /* Ain */ "g" { return "\u063A"; } /* Ghain */ "_" { return "\u0640"; } /* Tatweel */ "f" { return "\u0641"; } /* Fa */ "q" { return "\u0642"; } /* Qaf */ "k" { return "\u0643"; } /* Kaf */ "l" { return "\u0644"; } /* Lam */ "m" { return "\u0645"; } /* Meem */ "n" { return "\u0646"; } /* Noon */ "h" { return "\u0647"; } /* Ha */ "w" { return "\u0648"; } /* Waw */ "Y" { return "\u0649"; } /* AlifMaksura */ "y" { return "\u064A"; } /* Ya */ "F" { return "\u064B"; } /* Fathatan */ "N" { return "\u064C"; } /* Dammatan */ "K" { return "\u064D"; } /* Kasratan */ "a" { return "\u064E"; } /* Fatha */ "u" { return "\u064F"; } /* Damma */ "i" { return "\u0650"; } /* Kasra */ "~" { return "\u0651"; } /* Shadda */ "o" { return "\u0652"; } /* Sukun */ "^" { return "\u0653"; } /* Maddah */ "#" { return "\u0654"; } /* HamzaAbove */ "`" { return "\u0670"; } /* AlifKhanjareeya */ "{" { return "\u0671"; } /* Alif + HamzatWasl */ "P" { return "\u067E"; } /* PEH from AraMorph */ "J" { return "\u0686"; } /* TCHEH from AraMorph */ "V" { return "\u06A4"; } /* VEH from AraMorph */ "G" { return "\u06AF"; } /* GAF from AraMorph */ "R" { return "\u0698"; } /* JEH from AraMorph */ "?" { return "\u061F"; } /* QUESTION MARK from AraMorph */ ":" { return "\u06DC"; } /* SmallHighSeen */ "@" { return "\u06DF"; } /* SmallHighRoundedZero */ "[" { return "\u06E2"; } /* SmallHighMeemIsolatedForm */ ";" { return "\u06E3"; } /* SmallLowSeen */ "," { return "\u06E5"; } /* SmallWaw */ "." { return "\u06E6"; } /* SmallYa */ "!" { return "\u06E8"; } /* SmallHighNoon */ "-" { return "\u06EA"; } /* EmptyCentreLowStop */ "+" { return "\u06EB"; } /* EmptyCentreHighStop */ "%" { return "\u06EC"; } /* RoundedHighStopWithFilledCentre */ "]" { return "\u06ED"; } /* SmallLowMeem */ [\&_]"vert;" { return "|"; } [\&_]"lpar;" { return "("; } [\&_]"rpar;" { return ")"; } [\_\&]"lt;" { return "<"; } [\_\&]"gt;" { return ">"; } "'" { return "'"; } "&"[a-zA-Z]+";" { return yytext(); } . { return yytext(); } \n { return yytext(); } /* make problemes */ /* "\\"" { return "\u06E0"; } SmallHighUprightRectangularZero */ /* double entries */ /* "," { return "\u060C"; } COMMA from AraMorph */ /* ";" { return "\u061B"; } SEMICOLON from AraMorph */ /* not in buckwalter contained */ /* \u0679 : ARABIC LETTER TTEH */ /* \u0688 : ARABIC LETTER DDAL */ /* \u06A9 : ARABIC LETTER KEHEH */ /* \u0691 : ARABIC LETTER RREH */ /* \u06BA : ARABIC LETTER NOON GHUNNA */ /* \u06BE : ARABIC LETTER HEH DOACHASHMEE */ /* \u06C1 : ARABIC LETTER HEH GOAL */ /* \u06D2 : ARABIC LETTER YEH BARREE */