view software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/transcode/Betacode2Unicode.lex.old @ 19:4a3641ae14d2

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 09 Nov 2011 15:32:05 +0100
parents
children
line wrap: on
line source

package de.mpg.mpiwg.berlin.mpdl.lt.general;

%%
%{
    /*
     * Betacode to Unicode conversion
     */

  private int isUpper = 0;

  private String toUnicodeGreek(int in) {
    String retStr = toUnicode(in - (isUpper * 0x0020));
    isUpper = 0;
    return retStr;
  }

  private String toUnicode(int in) {
    char c = (char) in;
    Character ch = new Character(c);
    String retString = ch.toString();
    return retString;
  }

%}

%class Betacode2UnicodeLex
%public
%type java.lang.String
%unicode
%%


"<"[^>]+">" { return yytext(); }

"*j"    { return "H"; }
"j"     { return "h"; }
"*v"    { return "F"; }
"v"    { return "f"; }
"*s"    { return toUnicode(0x03a3); }

"!" 	{ return "."; }
":"   { return toUnicode(0x00B7); }  /* MPDL update  */

"a)"	{ return toUnicode(0x1F00); }
"a("	{ return toUnicode(0x1F01); }
"a)\\"	{ return toUnicode(0x1F02); }
"a(\\"	{ return toUnicode(0x1F03); }
"a)/"	{ return toUnicode(0x1F04); }
"a(/"	{ return toUnicode(0x1F05); }
"a)="	{ return toUnicode(0x1F06); }
"a(="	{ return toUnicode(0x1F07); }
"*)a"	{ return toUnicode(0x1F08); }
"*(a"	{ return toUnicode(0x1F09); }
"*)\\a"	{ return toUnicode(0x1F0A); }
"*(\\a"	{ return toUnicode(0x1F0B); }
"*)/a"	{ return toUnicode(0x1F0C); }
"*(/a"	{ return toUnicode(0x1F0D); }
"*)=a"	{ return toUnicode(0x1F0E); }
"*(=a"	{ return toUnicode(0x1F0F); }
"e)"	{ return toUnicode(0x1F10); }
"e("	{ return toUnicode(0x1F11); }
"e)\\"	{ return toUnicode(0x1F12); }
"e(\\"	{ return toUnicode(0x1F13); }
"e)/"	{ return toUnicode(0x1F14); }
"e(/"	{ return toUnicode(0x1F15); }
"*)e"	{ return toUnicode(0x1F18); }
"*(e"	{ return toUnicode(0x1F19); }
"*)\\e"	{ return toUnicode(0x1F1A); }
"*(\\e"	{ return toUnicode(0x1F1B); }
"*)/e"	{ return toUnicode(0x1F1C); }
"*(/e"	{ return toUnicode(0x1F1D); }
"h)"	{ return toUnicode(0x1F20); }
"h("	{ return toUnicode(0x1F21); }
"h)\\"	{ return toUnicode(0x1F22); }
"h(\\"	{ return toUnicode(0x1F23); }
"h)/"	{ return toUnicode(0x1F24); }
"h(/"	{ return toUnicode(0x1F25); }
"h)="	{ return toUnicode(0x1F26); }
"h(="	{ return toUnicode(0x1F27); }
"*)h"	{ return toUnicode(0x1F28); }
"*(h"	{ return toUnicode(0x1F29); }
"*)\\h"	{ return toUnicode(0x1F2A); }
"*(\\h"	{ return toUnicode(0x1F2B); }
"*)/h"	{ return toUnicode(0x1F2C); }
"*(/h"	{ return toUnicode(0x1F2D); }
"*)=h"	{ return toUnicode(0x1F2E); }
"*(=h"	{ return toUnicode(0x1F2F); }
"i)"	{ return toUnicode(0x1F30); }
"i("	{ return toUnicode(0x1F31); }
"i)\\"	{ return toUnicode(0x1F32); }
"i(\\"	{ return toUnicode(0x1F33); }
"i)/"	{ return toUnicode(0x1F34); }
"i(/"	{ return toUnicode(0x1F35); }
"i)="	{ return toUnicode(0x1F36); }
"i(="	{ return toUnicode(0x1F37); }
"*)i"	{ return toUnicode(0x1F38); }
"*(i"	{ return toUnicode(0x1F39); }
"*)\\i"	{ return toUnicode(0x1F3A); }
"*(\\i"	{ return toUnicode(0x1F3B); }
"*)/i"	{ return toUnicode(0x1F3C); }
"*(/i"	{ return toUnicode(0x1F3D); }
"*)=i"	{ return toUnicode(0x1F3E); }
"*(=i"	{ return toUnicode(0x1F3F); }
"o)"	{ return toUnicode(0x1F40); }
"o("	{ return toUnicode(0x1F41); }
"o)\\"	{ return toUnicode(0x1F42); }
"o(\\"	{ return toUnicode(0x1F43); }
"o)/"	{ return toUnicode(0x1F44); }
"o(/"	{ return toUnicode(0x1F45); }
"*)o"	{ return toUnicode(0x1F48); }
"*(o"	{ return toUnicode(0x1F49); }
"*)\\o"	{ return toUnicode(0x1F4A); }
"*(\\o"	{ return toUnicode(0x1F4B); }
"*)/o"	{ return toUnicode(0x1F4C); }
"*(/o"	{ return toUnicode(0x1F4D); }
"u)"	{ return toUnicode(0x1F50); }
"u("	{ return toUnicode(0x1F51); }
"u)\\"	{ return toUnicode(0x1F52); }
"u(\\"	{ return toUnicode(0x1F53); }
"u)/"	{ return toUnicode(0x1F54); }
"u(/"	{ return toUnicode(0x1F55); }
"u)="	{ return toUnicode(0x1F56); }
"u(="	{ return toUnicode(0x1F57); }
"*(u"	{ return toUnicode(0x1F59); }
"*(\\u"	{ return toUnicode(0x1F5B); }
"*(/u"	{ return toUnicode(0x1F5D); }
"*(=u"	{ return toUnicode(0x1F5F); }
"w)"	{ return toUnicode(0x1F60); }
"w("	{ return toUnicode(0x1F61); }
"w)\\"	{ return toUnicode(0x1F62); }
"w(\\"	{ return toUnicode(0x1F63); }
"w)/"	{ return toUnicode(0x1F64); }
"w(/"	{ return toUnicode(0x1F65); }
"w)="	{ return toUnicode(0x1F66); }
"w(="	{ return toUnicode(0x1F67); }
"*)w"	{ return toUnicode(0x1F68); }
"*(w"	{ return toUnicode(0x1F69); }
"*)\\w"	{ return toUnicode(0x1F6A); }
"*(\\w"	{ return toUnicode(0x1F6B); }
"*)/w"	{ return toUnicode(0x1F6C); }
"*(/w"	{ return toUnicode(0x1F6D); }
"*)=w"	{ return toUnicode(0x1F6E); }
"*(=w"	{ return toUnicode(0x1F6F); }
"a\\"	{ return toUnicode(0x1F70); }
"a/"	{ return toUnicode(0x1F71); }
"e\\"	{ return toUnicode(0x1F72); }
"e/"	{ return toUnicode(0x1F73); }
"h\\"	{ return toUnicode(0x1F74); }
"h/"	{ return toUnicode(0x1F75); }
"i\\"	{ return toUnicode(0x1F76); }
"i/"	{ return toUnicode(0x1F77); }
"o\\"	{ return toUnicode(0x1F78); }
"o/"	{ return toUnicode(0x1F79); }
"u\\"	{ return toUnicode(0x1F7A); }
"u/"	{ return toUnicode(0x1F7B); }
"w\\"	{ return toUnicode(0x1F7C); }
"w/"	{ return toUnicode(0x1F7D); }
"a)|"	{ return toUnicode(0x1F80); }
"a(|"	{ return toUnicode(0x1F81); }
"a)\\|"	{ return toUnicode(0x1F82); }
"a(\\|"	{ return toUnicode(0x1F83); }
"a)/|"	{ return toUnicode(0x1F84); }
"a(/|"	{ return toUnicode(0x1F85); }
"a)=|"	{ return toUnicode(0x1F86); }
"a(=|"	{ return toUnicode(0x1F87); }
"*)|a"	{ return toUnicode(0x1F88); }
"*(|a"	{ return toUnicode(0x1F89); }
"*)\\|a"	{ return toUnicode(0x1F8A); }
"*(\\|a"	{ return toUnicode(0x1F8B); }
"*)/|a"	{ return toUnicode(0x1F8C); }
"*(/|a"	{ return toUnicode(0x1F8D); }
"*)=|a"	{ return toUnicode(0x1F8E); }
"*(=|a"	{ return toUnicode(0x1F8F); }
"h)|"	{ return toUnicode(0x1F90); }
"h(|"	{ return toUnicode(0x1F91); }
"h)\\|"	{ return toUnicode(0x1F92); }
"h(\\|"	{ return toUnicode(0x1F93); }
"h)/|"	{ return toUnicode(0x1F94); }
"h(/|"	{ return toUnicode(0x1F95); }
"h)=|"	{ return toUnicode(0x1F96); }
"h(=|"	{ return toUnicode(0x1F97); }
"*)|h"	{ return toUnicode(0x1F98); }
"*(|h"	{ return toUnicode(0x1F99); }
"*)\\|h"	{ return toUnicode(0x1F9A); }
"*(\\|h"	{ return toUnicode(0x1F9B); }
"*)/|h"	{ return toUnicode(0x1F9C); }
"*(/|h"	{ return toUnicode(0x1F9D); }
"*)=|h"	{ return toUnicode(0x1F9E); }
"*(=|h"	{ return toUnicode(0x1F9F); }
"w)|"	{ return toUnicode(0x1FA0); }
"w(|"	{ return toUnicode(0x1FA1); }
"w)\\|"	{ return toUnicode(0x1FA2); }
"w(\\|"	{ return toUnicode(0x1FA3); }
"w)/|"	{ return toUnicode(0x1FA4); }
"w(/|"	{ return toUnicode(0x1FA5); }
"w)=|"	{ return toUnicode(0x1FA6); }
"w(=|"	{ return toUnicode(0x1FA7); }
"*)|w"	{ return toUnicode(0x1FA8); }
"*(|w"	{ return toUnicode(0x1FA9); }
"*)\\|w"	{ return toUnicode(0x1FAA); }
"*(\\|w"	{ return toUnicode(0x1FAB); }
"*)/|w"	{ return toUnicode(0x1FAC); }
"*(/|w"	{ return toUnicode(0x1FAD); }
"*)=|w"	{ return toUnicode(0x1FAE); }
"*(=|w"	{ return toUnicode(0x1FAF); }
"a^"	{ return toUnicode(0x1FB0); }
"a_"	{ return toUnicode(0x1FB1); }
"a\\|"	{ return toUnicode(0x1FB2); }
"a|"	{ return toUnicode(0x1FB3); }
"a/|"	{ return toUnicode(0x1FB4); }
"a="	{ return toUnicode(0x1FB6); }
"a=|"	{ return toUnicode(0x1FB7); }
"*a^"	{ return toUnicode(0x1FB8); }
"*a_"	{ return toUnicode(0x1FB9); }
"*a\\"	{ return toUnicode(0x1FBA); }
"*a/"	{ return toUnicode(0x1FBB); }
"*a|"	{ return toUnicode(0x1FBC); }
"h\\|"	{ return toUnicode(0x1FC2); }
"h|"	{ return toUnicode(0x1FC3); }
"h/|"	{ return toUnicode(0x1FC4); }
"h="	{ return toUnicode(0x1FC6); }
"h=|"	{ return toUnicode(0x1FC7); }
"*e\\"	{ return toUnicode(0x1FC8); }
"*e/"	{ return toUnicode(0x1FC9); }
"*h\\"	{ return toUnicode(0x1FCA); }
"*h/"	{ return toUnicode(0x1FCB); }
"*h|"	{ return toUnicode(0x1FCC); }
"i^"	{ return toUnicode(0x1FD0); }
"i_"	{ return toUnicode(0x1FD1); }
"i+\\"	{ return toUnicode(0x1FD2); }
"i+/"	{ return toUnicode(0x1FD3); }
"i="	{ return toUnicode(0x1FD6); }
"i+="	{ return toUnicode(0x1FD7); }
"*i^"	{ return toUnicode(0x1FD8); }
"*i_"	{ return toUnicode(0x1FD9); }
"*i\\"	{ return toUnicode(0x1FDA); }
"*i/"	{ return toUnicode(0x1FDB); }
"u^"	{ return toUnicode(0x1FE0); }
"u_"	{ return toUnicode(0x1FE1); }
"u+\\"	{ return toUnicode(0x1FE2); }
"u+/"	{ return toUnicode(0x1FE3); }
"r)"	{ return toUnicode(0x1FE4); }
"r("	{ return toUnicode(0x1FE5); }
"u="	{ return toUnicode(0x1FE6); }
"u+="	{ return toUnicode(0x1FE7); }
"*u^"	{ return toUnicode(0x1FE8); }
"*u_"	{ return toUnicode(0x1FE9); }
"*u\\"	{ return toUnicode(0x1FEA); }
"*u/"	{ return toUnicode(0x1FEB); }
"*(r"	{ return toUnicode(0x1FEC); }
"w\\|"	{ return toUnicode(0x1FF2); }
"w|"	{ return toUnicode(0x1FF3); }
"w/|"	{ return toUnicode(0x1FF4); }
"*w\\"	{ return toUnicode(0x1FFA); }
"*w/"	{ return toUnicode(0x1FFB); }
"*w|"	{ return toUnicode(0x1FFC); }
"w="	{ return toUnicode(0x1FF6); }
"w=|"	{ return toUnicode(0x1FF7); }
"*o\\"	{ return toUnicode(0x1FF8); }
"*o/"	{ return toUnicode(0x1FF9); }

"*"			isUpper = 1;

"\\"	{ return toUnicode(0x0300); }
"/"		{ return toUnicode(0x0301); }
"_"		{ return toUnicode(0x0304); }
"^"		{ return toUnicode(0x0306); }
"+"		{ return toUnicode(0x0308); }
"="		{ return toUnicode(0x0302); }
")"		{ return toUnicode(0x0313); }
"("		{ return toUnicode(0x0314); }
"?"		{ return toUnicode(0x0323); }
"|"		{ return toUnicode(0x0345); }

"a"		{ return toUnicodeGreek(0x03b1); }
"b"		{ return toUnicodeGreek(0x03b2); }
"g"		{ return toUnicodeGreek(0x03b3); }
"d"		{ return toUnicodeGreek(0x03b4); }
"e"		{ return toUnicodeGreek(0x03b5); }
"z"		{ return toUnicodeGreek(0x03b6); }
"h"		{ return toUnicodeGreek(0x03b7); }
"q"		{ return toUnicodeGreek(0x03b8); }
"i"		{ return toUnicodeGreek(0x03b9); }
"k"		{ return toUnicodeGreek(0x03ba); }
"l"		{ return toUnicodeGreek(0x03bb); }
"m"		{ return toUnicodeGreek(0x03bc); }
"n"		{ return toUnicodeGreek(0x03bd); }
"c"		{ return toUnicodeGreek(0x03be); }
"o"		{ return toUnicodeGreek(0x03bf); }
"p"		{ return toUnicodeGreek(0x03c0); }
"r"		{ return toUnicodeGreek(0x03c1); }

"s1"	{ return toUnicode(0x03c3); } /* mdh 2002-01-07 */
"s"/\-\-	{ return toUnicode(0x03c2); }
"s"/\&gt; }[a-z\?\!0-9*=\/()\'\-] { return toUnicodeGreek(0x03c3); }
"s"/\&lt; { return toUnicodeGreek(0x03c2); }  /* MPDL update  */
"s"/[\[\]][a-z\?\!0-9*=\/()\'\-] { return toUnicodeGreek(0x03c3); }
"s"/\??[^a-z0-9*=\/()\'\-\[\?] { return toUnicode(0x03c2); }
"s"		{ return toUnicodeGreek(0x03c3); }

"t"		{ return toUnicodeGreek(0x03c4); }
"u"		{ return toUnicodeGreek(0x03c5); }
"f"		{ return toUnicodeGreek(0x03c6); }
"x"		{ return toUnicodeGreek(0x03c7); }
"y"		{ return toUnicodeGreek(0x03c8); }
"w"		{ return toUnicodeGreek(0x03c9); }

[\&_]"vert;"   { return "|"; }
[\&_]"lpar;"   { return "("; }
[\&_]"rpar;"   { return ")"; }
[\_\&]"lt;"    { return "&lt;"; }
[\_\&]"gt;"    { return "&gt;"; }
"&#039;"       { return "'"; }  /* MPDL update  */

"&"[a-zA-Z]+";"  { return yytext(); }

.       { return yytext(); }
\n      { return yytext(); }