comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Betacode2Unicode.lex.old @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:408254cf2f1d
1 package de.mpg.mpiwg.berlin.mpdl.lt.general;
2
3 %%
4 %{
5 /*
6 * Betacode to Unicode conversion
7 */
8
9 private int isUpper = 0;
10
11 private String toUnicodeGreek(int in) {
12 String retStr = toUnicode(in - (isUpper * 0x0020));
13 isUpper = 0;
14 return retStr;
15 }
16
17 private String toUnicode(int in) {
18 char c = (char) in;
19 Character ch = new Character(c);
20 String retString = ch.toString();
21 return retString;
22 }
23
24 %}
25
26 %class Betacode2UnicodeLex
27 %public
28 %type java.lang.String
29 %unicode
30 %%
31
32
33 "<"[^>]+">" { return yytext(); }
34
35 "*j" { return "H"; }
36 "j" { return "h"; }
37 "*v" { return "F"; }
38 "v" { return "f"; }
39 "*s" { return toUnicode(0x03a3); }
40
41 "!" { return "."; }
42 ":" { return toUnicode(0x00B7); } /* MPDL update */
43
44 "a)" { return toUnicode(0x1F00); }
45 "a(" { return toUnicode(0x1F01); }
46 "a)\\" { return toUnicode(0x1F02); }
47 "a(\\" { return toUnicode(0x1F03); }
48 "a)/" { return toUnicode(0x1F04); }
49 "a(/" { return toUnicode(0x1F05); }
50 "a)=" { return toUnicode(0x1F06); }
51 "a(=" { return toUnicode(0x1F07); }
52 "*)a" { return toUnicode(0x1F08); }
53 "*(a" { return toUnicode(0x1F09); }
54 "*)\\a" { return toUnicode(0x1F0A); }
55 "*(\\a" { return toUnicode(0x1F0B); }
56 "*)/a" { return toUnicode(0x1F0C); }
57 "*(/a" { return toUnicode(0x1F0D); }
58 "*)=a" { return toUnicode(0x1F0E); }
59 "*(=a" { return toUnicode(0x1F0F); }
60 "e)" { return toUnicode(0x1F10); }
61 "e(" { return toUnicode(0x1F11); }
62 "e)\\" { return toUnicode(0x1F12); }
63 "e(\\" { return toUnicode(0x1F13); }
64 "e)/" { return toUnicode(0x1F14); }
65 "e(/" { return toUnicode(0x1F15); }
66 "*)e" { return toUnicode(0x1F18); }
67 "*(e" { return toUnicode(0x1F19); }
68 "*)\\e" { return toUnicode(0x1F1A); }
69 "*(\\e" { return toUnicode(0x1F1B); }
70 "*)/e" { return toUnicode(0x1F1C); }
71 "*(/e" { return toUnicode(0x1F1D); }
72 "h)" { return toUnicode(0x1F20); }
73 "h(" { return toUnicode(0x1F21); }
74 "h)\\" { return toUnicode(0x1F22); }
75 "h(\\" { return toUnicode(0x1F23); }
76 "h)/" { return toUnicode(0x1F24); }
77 "h(/" { return toUnicode(0x1F25); }
78 "h)=" { return toUnicode(0x1F26); }
79 "h(=" { return toUnicode(0x1F27); }
80 "*)h" { return toUnicode(0x1F28); }
81 "*(h" { return toUnicode(0x1F29); }
82 "*)\\h" { return toUnicode(0x1F2A); }
83 "*(\\h" { return toUnicode(0x1F2B); }
84 "*)/h" { return toUnicode(0x1F2C); }
85 "*(/h" { return toUnicode(0x1F2D); }
86 "*)=h" { return toUnicode(0x1F2E); }
87 "*(=h" { return toUnicode(0x1F2F); }
88 "i)" { return toUnicode(0x1F30); }
89 "i(" { return toUnicode(0x1F31); }
90 "i)\\" { return toUnicode(0x1F32); }
91 "i(\\" { return toUnicode(0x1F33); }
92 "i)/" { return toUnicode(0x1F34); }
93 "i(/" { return toUnicode(0x1F35); }
94 "i)=" { return toUnicode(0x1F36); }
95 "i(=" { return toUnicode(0x1F37); }
96 "*)i" { return toUnicode(0x1F38); }
97 "*(i" { return toUnicode(0x1F39); }
98 "*)\\i" { return toUnicode(0x1F3A); }
99 "*(\\i" { return toUnicode(0x1F3B); }
100 "*)/i" { return toUnicode(0x1F3C); }
101 "*(/i" { return toUnicode(0x1F3D); }
102 "*)=i" { return toUnicode(0x1F3E); }
103 "*(=i" { return toUnicode(0x1F3F); }
104 "o)" { return toUnicode(0x1F40); }
105 "o(" { return toUnicode(0x1F41); }
106 "o)\\" { return toUnicode(0x1F42); }
107 "o(\\" { return toUnicode(0x1F43); }
108 "o)/" { return toUnicode(0x1F44); }
109 "o(/" { return toUnicode(0x1F45); }
110 "*)o" { return toUnicode(0x1F48); }
111 "*(o" { return toUnicode(0x1F49); }
112 "*)\\o" { return toUnicode(0x1F4A); }
113 "*(\\o" { return toUnicode(0x1F4B); }
114 "*)/o" { return toUnicode(0x1F4C); }
115 "*(/o" { return toUnicode(0x1F4D); }
116 "u)" { return toUnicode(0x1F50); }
117 "u(" { return toUnicode(0x1F51); }
118 "u)\\" { return toUnicode(0x1F52); }
119 "u(\\" { return toUnicode(0x1F53); }
120 "u)/" { return toUnicode(0x1F54); }
121 "u(/" { return toUnicode(0x1F55); }
122 "u)=" { return toUnicode(0x1F56); }
123 "u(=" { return toUnicode(0x1F57); }
124 "*(u" { return toUnicode(0x1F59); }
125 "*(\\u" { return toUnicode(0x1F5B); }
126 "*(/u" { return toUnicode(0x1F5D); }
127 "*(=u" { return toUnicode(0x1F5F); }
128 "w)" { return toUnicode(0x1F60); }
129 "w(" { return toUnicode(0x1F61); }
130 "w)\\" { return toUnicode(0x1F62); }
131 "w(\\" { return toUnicode(0x1F63); }
132 "w)/" { return toUnicode(0x1F64); }
133 "w(/" { return toUnicode(0x1F65); }
134 "w)=" { return toUnicode(0x1F66); }
135 "w(=" { return toUnicode(0x1F67); }
136 "*)w" { return toUnicode(0x1F68); }
137 "*(w" { return toUnicode(0x1F69); }
138 "*)\\w" { return toUnicode(0x1F6A); }
139 "*(\\w" { return toUnicode(0x1F6B); }
140 "*)/w" { return toUnicode(0x1F6C); }
141 "*(/w" { return toUnicode(0x1F6D); }
142 "*)=w" { return toUnicode(0x1F6E); }
143 "*(=w" { return toUnicode(0x1F6F); }
144 "a\\" { return toUnicode(0x1F70); }
145 "a/" { return toUnicode(0x1F71); }
146 "e\\" { return toUnicode(0x1F72); }
147 "e/" { return toUnicode(0x1F73); }
148 "h\\" { return toUnicode(0x1F74); }
149 "h/" { return toUnicode(0x1F75); }
150 "i\\" { return toUnicode(0x1F76); }
151 "i/" { return toUnicode(0x1F77); }
152 "o\\" { return toUnicode(0x1F78); }
153 "o/" { return toUnicode(0x1F79); }
154 "u\\" { return toUnicode(0x1F7A); }
155 "u/" { return toUnicode(0x1F7B); }
156 "w\\" { return toUnicode(0x1F7C); }
157 "w/" { return toUnicode(0x1F7D); }
158 "a)|" { return toUnicode(0x1F80); }
159 "a(|" { return toUnicode(0x1F81); }
160 "a)\\|" { return toUnicode(0x1F82); }
161 "a(\\|" { return toUnicode(0x1F83); }
162 "a)/|" { return toUnicode(0x1F84); }
163 "a(/|" { return toUnicode(0x1F85); }
164 "a)=|" { return toUnicode(0x1F86); }
165 "a(=|" { return toUnicode(0x1F87); }
166 "*)|a" { return toUnicode(0x1F88); }
167 "*(|a" { return toUnicode(0x1F89); }
168 "*)\\|a" { return toUnicode(0x1F8A); }
169 "*(\\|a" { return toUnicode(0x1F8B); }
170 "*)/|a" { return toUnicode(0x1F8C); }
171 "*(/|a" { return toUnicode(0x1F8D); }
172 "*)=|a" { return toUnicode(0x1F8E); }
173 "*(=|a" { return toUnicode(0x1F8F); }
174 "h)|" { return toUnicode(0x1F90); }
175 "h(|" { return toUnicode(0x1F91); }
176 "h)\\|" { return toUnicode(0x1F92); }
177 "h(\\|" { return toUnicode(0x1F93); }
178 "h)/|" { return toUnicode(0x1F94); }
179 "h(/|" { return toUnicode(0x1F95); }
180 "h)=|" { return toUnicode(0x1F96); }
181 "h(=|" { return toUnicode(0x1F97); }
182 "*)|h" { return toUnicode(0x1F98); }
183 "*(|h" { return toUnicode(0x1F99); }
184 "*)\\|h" { return toUnicode(0x1F9A); }
185 "*(\\|h" { return toUnicode(0x1F9B); }
186 "*)/|h" { return toUnicode(0x1F9C); }
187 "*(/|h" { return toUnicode(0x1F9D); }
188 "*)=|h" { return toUnicode(0x1F9E); }
189 "*(=|h" { return toUnicode(0x1F9F); }
190 "w)|" { return toUnicode(0x1FA0); }
191 "w(|" { return toUnicode(0x1FA1); }
192 "w)\\|" { return toUnicode(0x1FA2); }
193 "w(\\|" { return toUnicode(0x1FA3); }
194 "w)/|" { return toUnicode(0x1FA4); }
195 "w(/|" { return toUnicode(0x1FA5); }
196 "w)=|" { return toUnicode(0x1FA6); }
197 "w(=|" { return toUnicode(0x1FA7); }
198 "*)|w" { return toUnicode(0x1FA8); }
199 "*(|w" { return toUnicode(0x1FA9); }
200 "*)\\|w" { return toUnicode(0x1FAA); }
201 "*(\\|w" { return toUnicode(0x1FAB); }
202 "*)/|w" { return toUnicode(0x1FAC); }
203 "*(/|w" { return toUnicode(0x1FAD); }
204 "*)=|w" { return toUnicode(0x1FAE); }
205 "*(=|w" { return toUnicode(0x1FAF); }
206 "a^" { return toUnicode(0x1FB0); }
207 "a_" { return toUnicode(0x1FB1); }
208 "a\\|" { return toUnicode(0x1FB2); }
209 "a|" { return toUnicode(0x1FB3); }
210 "a/|" { return toUnicode(0x1FB4); }
211 "a=" { return toUnicode(0x1FB6); }
212 "a=|" { return toUnicode(0x1FB7); }
213 "*a^" { return toUnicode(0x1FB8); }
214 "*a_" { return toUnicode(0x1FB9); }
215 "*a\\" { return toUnicode(0x1FBA); }
216 "*a/" { return toUnicode(0x1FBB); }
217 "*a|" { return toUnicode(0x1FBC); }
218 "h\\|" { return toUnicode(0x1FC2); }
219 "h|" { return toUnicode(0x1FC3); }
220 "h/|" { return toUnicode(0x1FC4); }
221 "h=" { return toUnicode(0x1FC6); }
222 "h=|" { return toUnicode(0x1FC7); }
223 "*e\\" { return toUnicode(0x1FC8); }
224 "*e/" { return toUnicode(0x1FC9); }
225 "*h\\" { return toUnicode(0x1FCA); }
226 "*h/" { return toUnicode(0x1FCB); }
227 "*h|" { return toUnicode(0x1FCC); }
228 "i^" { return toUnicode(0x1FD0); }
229 "i_" { return toUnicode(0x1FD1); }
230 "i+\\" { return toUnicode(0x1FD2); }
231 "i+/" { return toUnicode(0x1FD3); }
232 "i=" { return toUnicode(0x1FD6); }
233 "i+=" { return toUnicode(0x1FD7); }
234 "*i^" { return toUnicode(0x1FD8); }
235 "*i_" { return toUnicode(0x1FD9); }
236 "*i\\" { return toUnicode(0x1FDA); }
237 "*i/" { return toUnicode(0x1FDB); }
238 "u^" { return toUnicode(0x1FE0); }
239 "u_" { return toUnicode(0x1FE1); }
240 "u+\\" { return toUnicode(0x1FE2); }
241 "u+/" { return toUnicode(0x1FE3); }
242 "r)" { return toUnicode(0x1FE4); }
243 "r(" { return toUnicode(0x1FE5); }
244 "u=" { return toUnicode(0x1FE6); }
245 "u+=" { return toUnicode(0x1FE7); }
246 "*u^" { return toUnicode(0x1FE8); }
247 "*u_" { return toUnicode(0x1FE9); }
248 "*u\\" { return toUnicode(0x1FEA); }
249 "*u/" { return toUnicode(0x1FEB); }
250 "*(r" { return toUnicode(0x1FEC); }
251 "w\\|" { return toUnicode(0x1FF2); }
252 "w|" { return toUnicode(0x1FF3); }
253 "w/|" { return toUnicode(0x1FF4); }
254 "*w\\" { return toUnicode(0x1FFA); }
255 "*w/" { return toUnicode(0x1FFB); }
256 "*w|" { return toUnicode(0x1FFC); }
257 "w=" { return toUnicode(0x1FF6); }
258 "w=|" { return toUnicode(0x1FF7); }
259 "*o\\" { return toUnicode(0x1FF8); }
260 "*o/" { return toUnicode(0x1FF9); }
261
262 "*" isUpper = 1;
263
264 "\\" { return toUnicode(0x0300); }
265 "/" { return toUnicode(0x0301); }
266 "_" { return toUnicode(0x0304); }
267 "^" { return toUnicode(0x0306); }
268 "+" { return toUnicode(0x0308); }
269 "=" { return toUnicode(0x0302); }
270 ")" { return toUnicode(0x0313); }
271 "(" { return toUnicode(0x0314); }
272 "?" { return toUnicode(0x0323); }
273 "|" { return toUnicode(0x0345); }
274
275 "a" { return toUnicodeGreek(0x03b1); }
276 "b" { return toUnicodeGreek(0x03b2); }
277 "g" { return toUnicodeGreek(0x03b3); }
278 "d" { return toUnicodeGreek(0x03b4); }
279 "e" { return toUnicodeGreek(0x03b5); }
280 "z" { return toUnicodeGreek(0x03b6); }
281 "h" { return toUnicodeGreek(0x03b7); }
282 "q" { return toUnicodeGreek(0x03b8); }
283 "i" { return toUnicodeGreek(0x03b9); }
284 "k" { return toUnicodeGreek(0x03ba); }
285 "l" { return toUnicodeGreek(0x03bb); }
286 "m" { return toUnicodeGreek(0x03bc); }
287 "n" { return toUnicodeGreek(0x03bd); }
288 "c" { return toUnicodeGreek(0x03be); }
289 "o" { return toUnicodeGreek(0x03bf); }
290 "p" { return toUnicodeGreek(0x03c0); }
291 "r" { return toUnicodeGreek(0x03c1); }
292
293 "s1" { return toUnicode(0x03c3); } /* mdh 2002-01-07 */
294 "s"/\-\- { return toUnicode(0x03c2); }
295 "s"/\&gt; }[a-z\?\!0-9*=\/()\'\-] { return toUnicodeGreek(0x03c3); }
296 "s"/\&lt; { return toUnicodeGreek(0x03c2); } /* MPDL update */
297 "s"/[\[\]][a-z\?\!0-9*=\/()\'\-] { return toUnicodeGreek(0x03c3); }
298 "s"/\??[^a-z0-9*=\/()\'\-\[\?] { return toUnicode(0x03c2); }
299 "s" { return toUnicodeGreek(0x03c3); }
300
301 "t" { return toUnicodeGreek(0x03c4); }
302 "u" { return toUnicodeGreek(0x03c5); }
303 "f" { return toUnicodeGreek(0x03c6); }
304 "x" { return toUnicodeGreek(0x03c7); }
305 "y" { return toUnicodeGreek(0x03c8); }
306 "w" { return toUnicodeGreek(0x03c9); }
307
308 [\&_]"vert;" { return "|"; }
309 [\&_]"lpar;" { return "("; }
310 [\&_]"rpar;" { return ")"; }
311 [\_\&]"lt;" { return "&lt;"; }
312 [\_\&]"gt;" { return "&gt;"; }
313 "&#039;" { return "'"; } /* MPDL update */
314
315 "&"[a-zA-Z]+";" { return yytext(); }
316
317 . { return yytext(); }
318 \n { return yytext(); }