%% %{ /* * Buckwalter+ to Arabic script (Unicode) conversion * v. 1.3, Malcolm D. Hyman, 2002-10-18 * [this is a jflex specification] */ static public String ALPHA_TABLE = "'|>&<}AbptvjHxd*rzs$SDTZEg _fqklmnhwYy"; %} %class ArabicLex %public %implements LexS %yylexthrow LexException %type java.lang.String %unicode %char LETTER=[\'|>&<\}AbptvjHxd*rzs$SDTZEg_fqklmnhwYy] %% "^"?"Allah" { return "\ufdf2"; } {LETTER} { return "" + (char)(0x0621 + ALPHA_TABLE.indexOf(yycharat(0))); } [FNKauio] { } // consume with NOP "^" { } "-" { } "~" { // TASHDID return "\u0651"; } "`" { // DAGGER ALIF return "\u0670"; } "{" { // WASLA-ON-ALIF return "\u0671"; } ("^"[aAbgdewzhtiklmncseofqxZD*r])+/[ .,;?] { // this rule matches sequences of diagram letters such as "ABGD" // FIXME: text pane sometimes splits line after overscore StringBuffer emit = new StringBuffer(); for (int i = 0; i < yylength(); i++) { switch (yycharat(i)) { case 'a': emit.append("\u0627"); break; // ALIF case 'A': emit.append("\u0627"); break; // ALIF case 'b': emit.append("\u0628"); break; // BAA case 'g': emit.append("\u062c"); break; // JEEM case 'd': emit.append("\u062f"); break; // DAL case 'e': emit.append("\u0647"); break; // HA case 'w': emit.append("\u0648"); break; // WAW case 'z': emit.append("\u0632"); break; // ZAIN case 'h': emit.append("\u062d"); break; // HAA case 't': emit.append("\u0637"); break; // TAH case 'i': emit.append("\u0640"); break; // ALIF MAQSURAH case 'k': emit.append("\u0643"); break; // CAF case 'l': emit.append("\u0644"); break; // LAM case 'm': emit.append("\u0645"); break; // MEEM case 'n': emit.append("\u0646"); break; // NOON case 'c': emit.append("\u0633"); break; // SEEN case 's': emit.append("\u0633"); break; // SEEN case 'o': emit.append("\u0639"); break; // AIN case 'f': emit.append("\u0641"); break; // FA case 'q': emit.append("\u0642"); break; // QAF case 'x': emit.append("\u062e"); break; // KHAA case 'Z': emit.append("\u0635"); break; // SAD case 'D': emit.append("\u0636"); break; // DAD case '*': emit.append("\u0638"); break; // DHAH case 'r': emit.append("\u0631"); break; // REH case '^': emit.append("\u0305"); continue; // NON-SPACING OVERSCORE default: throw new LexException("Unknown letter in diagram at " + yychar); } emit.append("\u200c"); // ZERO-WIDTH NON-JOINER } return emit.toString(); } "," { return "\u060c"; // ARABIC COMMA } ";" { return "\u061b"; // ARABIC SEMICOLON } "?" { return "\u061f"; // ARABIC QUESTION MARK } "." { return ".\u200f"; // ensure period is in R-to-L context } .|\n { return yytext(); }