%% %{ /* * Beta Code to Unicode Romanization conversion, * v. 1.0, Malcolm D. Hyman, 2003-06-24 * [this is a jflex specification] * * This Romanization should be round-trippable. The downside is that * we get some rather naff combinations of diacritics. One day, however, * fonts will be better at diacritic stacking. * * Supported subset of Beta Code: * -- Greek alphabet and diacritics * -- escapes not supported at all: $, &, @, <, >, {, } * -- punctuation (%): common items (%-%9 supported) * -- quotation marks ("): mostly supported * -- brackets ([]): [/]-[8/]8 supported * -- text symbols (#): support default, #2, #3, #5 * * Extensions/modifications to Beta Code standard: * -- lowercase alphabetic characters treated as if uppercase */ static final String BRACEL = "{", BRACER = "}"; static final String INTERNAL_ERROR = "Internal lexer error"; boolean uppercase = false, vLong = false; public String compose(String s) { if (uppercase) { uppercase = false; if (s.length() > 1) { return Character.toUpperCase(s.charAt(0)) + s.substring(1, s.length()); } else { return s.toUpperCase(); } } else return s; } %} %class GreekTranslitLex %public %implements LexS %yylexthrow LexException %type java.lang.String %unicode %char %% { ")"[AaEeIiOoUu] { vLong = false; return compose(yytext().substring(1, 2).toLowerCase()); } [AaEeIiIOoUu]")"? { vLong = false; return compose(yytext().substring(0, 1).toLowerCase()); } [BbGgDdZzKkLlMmNnPpRrSsTt] { vLong = false; return compose(yytext().toLowerCase()); } ")"?[Hh]")"? { vLong = true; return compose("\u0113"); } [Qq] { vLong = false; return compose("th"); } [Cc] { vLong = false; return compose("x"); } [Ff] { vLong = false; return compose("ph"); } [Xx] { vLong = false; return compose("kh"); } [Yy] { vLong = false; return compose("ps"); } ")"?[Ww]")"? { vLong = true; return compose("\u014d"); } [Aa]")"?"|" { vLong = false; // sic return compose("\u0101i"); } [Aa]"(|" { vLong = false; // sic return compose("h\u0101i"); } [Aa]")"?"/|" { vLong = false; // sic return compose("\u0101\u0301i"); } [Aa]")"?"\\|" { vLong = false; // sic return compose("\u0101\u0300i"); } [Aa]")"?"=|" { vLong = false; // sic return compose("\u00e2i"); } [Aa]"(/|" { vLong = false; // sic return compose("h\u0101\u0301i"); } [Aa]"(\\|" { vLong = false; // sic return compose("h\u0101\u0300i"); } [Aa]"(=|" { vLong = false; // sic return compose("h\u00e2i"); } "("[Aa] { vLong = false; return compose("ha"); } "("[Ee] { vLong = false; return compose("he"); } "("[Hh] { vLong = true; return compose("h\u0113"); } "("[Ii] { vLong = false; return compose("hi"); } "("[Oo] { vLong = false; return compose("ho"); } "("[Uu] { vLong = false; return compose("hu"); } "("[Ww] { vLong = true; return compose("h\u014d"); } [Aa]"(" { vLong = false; return compose("ha"); } [Ee]"(" { vLong = false; return compose("he"); } [Hh]"(" { vLong = true; return compose("h\u0113"); } [Ii]"(" { vLong = false; return compose("hi"); } [Oo]"(" { vLong = false; return compose("ho"); } [Uu]"(" { vLong = false; return compose("hu"); } [Ww]"(" { vLong = true; return compose("h\u014d"); } ")"?"/"[Aa] { vLong = false; return compose("\u00e1"); } ")"?"/"[Ee] { vLong = false; return compose("\u00e9"); } ")"?"/"[Hh] { vLong = true; return compose("\u0113\u0301"); } ")"?"/"[Ii] { vLong = false; return compose("\u00ed"); } ")"?"/"[Oo] { vLong = false; return compose("\u00f3"); } ")"?"/"[Uu] { vLong = false; return compose("\u00fa"); } ")"?"/"[Ww] { vLong = true; return compose("\u014d\u0301"); } ")"?"\\"[Aa] { vLong = false; return compose("\u00e0"); } ")"?"\\"[Ee] { vLong = false; return compose("\u00e8"); } ")"?"\\"[Hh] { vLong = true; return compose("\u0113\u0300"); } ")"?"\\"[Ii] { vLong = false; return compose("\u00ec"); } ")"?"\\"[Oo] { vLong = false; return compose("\u00f2"); } ")"?"\\"[Uu] { vLong = false; return compose("\u00f9"); } ")"?"\\"[Ww] { vLong = true; return compose("\u014d\u0301"); } ")"?"="[Aa] { vLong = false; return compose("\u00e2"); } ")"?"="[Hh] { vLong = true; return compose("\u00ea"); } ")"?"="[Ii] { vLong = false; return compose("\u00ee"); } ")"?"="[Uu] { vLong = false; return compose("\u00fb"); } ")"?"="[Ww] { vLong = true; return compose("\u00f4"); } [Aa]")"?"/" { vLong = false; return compose("\u00e1"); } [Ee]")"?"/" { vLong = false; return compose("\u00e9"); } [Hh]")"?"/" { vLong = true; return compose("\u0113\u0301"); } [Ii]")"?"/" { vLong = false; return compose("\u00ed"); } [Oo]")"?"/" { vLong = false; return compose("\u00f3"); } [Uu]")"?"/" { vLong = false; return compose("\u00fa"); } [Ww]")"?"/" { vLong = true; return compose("\u014d\u0301"); } [Aa]")"?"\\" { vLong = false; return compose("\u00e0"); } [Ee]")"?"\\" { vLong = false; return compose("\u00e8"); } [Hh]")"?"\\" { vLong = true; return compose("\u0113\u0300"); } [Ii]")"?"\\" { vLong = false; return compose("\u00ec"); } [Oo]")"?"\\" { vLong = false; return compose("\u00f2"); } [Uu]")"?"\\" { vLong = false; return compose("\u00f9"); } [Ww]")"?"\\" { vLong = true; return compose("\u014d\u0301"); } [Aa]")"?"=" { vLong = false; return compose("\u00e2"); } [Hh]")"?"=" { vLong = true; return compose("\u00ea"); } [Ii]")"?"=" { vLong = false; return compose("\u00ee"); } [Uu]")"?"=" { vLong = false; return compose("\u00fb"); } [Ww]")"?"=" { vLong = true; return compose("\u00f4"); } "(/"[Aa] { vLong = false; return compose("h\u00e1"); } "(/"[Ee] { vLong = false; return compose("h\u00e9"); } "(/"[Hh] { vLong = true; return compose("h\u0113\u0301"); } "(/"[Ii] { vLong = false; return compose("h\u00ed"); } "(/"[Oo] { vLong = false; return compose("h\u00f3"); } "(/"[Uu] { vLong = false; return compose("h\u00fa"); } "(/"[Ww] { vLong = true; return compose("h\u014d\u0301"); } "(\\"[Aa] { vLong = false; return compose("h\u00e0"); } "(\\"[Ee] { vLong = false; return compose("h\u00e8"); } "(\\"[Hh] { vLong = true; return compose("h\u0113\u0300"); } "(\\"[Ii] { vLong = false; return compose("h\u00ec"); } "(\\"[Oo] { vLong = false; return compose("h\u00f2"); } "(\\"[Uu] { vLong = false; return compose("h\u00f9"); } "(\\"[Ww] { vLong = true; return compose("h\u014d\u0301"); } "(="[Aa] { vLong = false; return compose("h\u00e2"); } "(="[Hh] { vLong = true; return compose("h\u00ea"); } "(="[Ii] { vLong = false; return compose("h\u00ee"); } "(="[Uu] { vLong = false; return compose("h\u00fb"); } "(="[Ww] { vLong = true; return compose("h\u00f4"); } [Aa]"(/" { vLong = false; return compose("h\u00e1"); } [Ee]"(/" { vLong = false; return compose("h\u00e9"); } [Hh]"(/" { vLong = true; return compose("h\u0113\u0301"); } [Ii]"(/" { vLong = false; return compose("h\u00ed"); } [Oo]"(/" { vLong = false; return compose("h\u00f3"); } [Uu]"(/" { vLong = false; return compose("h\u00fa"); } [Ww]"(/" { vLong = true; return compose("h\u014d\u0301"); } [Aa]"(\\" { vLong = false; return compose("h\u00e0"); } [Ee]"(\\" { vLong = false; return compose("h\u00e8"); } [Hh]"(\\" { vLong = true; return compose("h\u0113\u0300"); } [Ii]"(\\" { vLong = false; return compose("h\u00ec"); } [Oo]"(\\" { vLong = false; return compose("h\u00f2"); } [Uu]"(\\" { vLong = false; return compose("h\u00f9"); } [Ww]"(\\" { vLong = true; return compose("h\u014d\u0301"); } [Aa]"(=" { vLong = false; return compose("h\u00e2"); } [Hh]"(=" { vLong = true; return compose("h\u00ea"); } [Ii]"(=" { vLong = false; return compose("h\u00ee"); } [Uu]"(=" { vLong = false; return compose("h\u00fb"); } [Ww]"(=" { vLong = true; return compose("h\u00f4"); } "|" { uppercase = false; if (vLong) { vLong = false; return "i"; } else { vLong = false; throw new LexException("illegal iota subscript: char " + yychar); } } [Ii]"+" { vLong = false; return compose("\u00ef"); } [Ii]"/+" { vLong = false; return compose("\u00ef\u0301"); } [Ii]"\\+" { vLong = false; return compose("\u00ef\u0300"); } [Ii]"=+" { vLong = false; return compose("\u00ef\u0302"); } [Uu]"+" { vLong = false; return compose("\u00fc"); } [Uu]"/+" { vLong = false; return compose("\u00fc\u0301"); } [Uu]"\\+" { vLong = false; return compose("\u00fc\u0300"); } [Uu]"=+" { vLong = false; return compose("\u00fc\u0302"); } [Rr]"(" { vLong = false; return compose("rh"); } [Aa][Ii]"(" { vLong = false; return compose("hai"); } [Aa][Ii]"(/" { vLong = false; return compose("ha\u00ed"); } [Aa][Ii]"(\\" { vLong = false; return compose("ha\u00ec"); } [Aa][Ii]"(=" { vLong = false; return compose("ha\u00ee"); } [Ee][Ii]"(" { vLong = false; return compose("hei"); } [Ee][Ii]"(/" { vLong = false; return compose("he\u00ed"); } [Ee][Ii]"(\\" { vLong = false; return compose("he\u00ec"); } [Ee][Ii]"(=" { vLong = false; return compose("he\u00ee"); } [Oo][Ii]"(" { vLong = false; return compose("hoi"); } [Oo][Ii]"(/" { vLong = false; return compose("ho\u00ed"); } [Oo][Ii]"(\\" { vLong = false; return compose("ho\u00ec"); } [Oo][Ii]"(=" { vLong = false; return compose("ho\u00ee"); } [Aa][Uu]"(" { vLong = false; return compose("hau"); } [Aa][Uu]"(/" { vLong = false; return compose("ha\u00fa"); } [Aa][Uu]"(\\" { vLong = false; return compose("ha\u00f9"); } [Aa][Uu]"(=" { vLong = false; return compose("ha\u00fb"); } [Ee][Uu]"(" { vLong = false; return compose("heu"); } [Ee][Uu]"(/" { vLong = false; return compose("he\u00fa"); } [Ee][Uu]"(\\" { vLong = false; return compose("he\u00f9"); } [Ee][Uu]"(=" { vLong = false; return compose("he\u00fb"); } [Oo][Uu]"(" { vLong = false; return compose("hou"); } [Oo][Uu]"(/" { vLong = false; return compose("ho\u00fa"); } [Oo][Uu]"(\\" { vLong = false; return compose("ho\u00f9"); } [Oo][Uu]"(=" { vLong = false; return compose("ho\u00fb"); } "+" { uppercase = false; vLong = false; throw new LexException("illegal dieresis: char " + yychar); } [()] { uppercase = false; vLong = false; throw new LexException("illegal breathing: char " + yychar); } [/\\=] { uppercase = false; vLong = false; throw new LexException("illegal accent: char " + yychar); } "*" { vLong = false; uppercase = true; } "#"[235]? { vLong = false; if (yylength() == 1) return compose("\u02b9"); switch (yycharat(1)) { case '2': return compose("\u03da"); case '3': return compose("\u03de"); case '5': return compose("\u03e0"); default: throw new LexException(INTERNAL_ERROR); } } "["[1-8]? { uppercase = false; vLong = false; if (yylength() == 1) return BRACEL; switch (yycharat(1)) { case '1': return "("; case '2': return "<"; case '3': return "["; case '4': return "\u301a"; case '5': return "\u230a"; case '6': return "\u2308"; case '7': return "\u2308"; case '8': return "\u230a"; default: throw new LexException(INTERNAL_ERROR); } } "]"[1-8]? { uppercase = false; vLong = false; if (yylength() == 1) return BRACER; switch (yycharat(1)) { case '1': return ")"; case '2': return ">"; case '3': return "]"; case '4': return "\u301b"; case '5': return "\u230b"; case '6': return "\u2309"; case '7': return "\u230b"; case '8': return "\u2309"; default: throw new LexException(INTERNAL_ERROR); } } "\""[1-7]? { uppercase = false; vLong = false; if (yylength() == 1) return "\""; switch (yycharat(1)) { case '1': return "\u201e"; case '2': return "\u201d"; case '3': return "'"; case '4': return "\u201a"; case '5': return "\u2019"; case '6': return "\u00ab"; case '7': return "\u00bb"; default: throw new LexException(INTERNAL_ERROR); } } "%"[1-9]? { uppercase = false; vLong = false; if (yylength() == 1) return "\u2020"; switch (yycharat(1)) { case '1': return "?"; case '2': return "*"; case '3': return "/"; case '4': return "!"; case '5': return "|"; case '6': return "="; case '7': return "+"; case '8': return "%"; case '9': return "&"; default: throw new LexException(INTERNAL_ERROR); } } [_\^] { uppercase = false; vLong = false; return ""; } .|\n { uppercase = false; vLong = false; return yytext(); } }