Mercurial > hg > mpdl-group
annotate software/mpdl-services/mpiwg-mpdl-lt/bin/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexAll.lex @ 23:e845310098ba
diverse Korrekturen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 27 Nov 2012 12:35:19 +0100 |
parents | |
children |
rev | line source |
---|---|
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1 /* |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
2 * Normalization rules for all languages |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
3 * [this is a JFlex specification] |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
4 * |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
5 * Wolfgang Schmidle |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
6 * 2011-01-25 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
7 * |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
8 */ |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
9 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
10 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
11 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
12 %% |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
13 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
14 %public |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
15 %class MpdlNormalizerLexAll |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
16 %type java.lang.String |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
17 %unicode |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
18 // %debug |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
19 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
20 %states LA, ZH |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
21 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
22 %{ |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
23 int cv = 0; // consonant = 1, vowel = 2, everything else = 0 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
24 %} |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
25 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
26 VOWEL=[AEIOUaeiouÆæęàèòùœ] |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
27 CONS=[BCDFGHKLMNPQRSTVWXZbcdfghklmnpqrstvwxzſß] |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
28 LR=[lLrR] |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
29 QUE=(que)? |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
30 END=\n |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
31 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
32 %% |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
33 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
34 <LA> { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
35 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
36 // 1. simple replacements |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
37 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
38 // 1.1 single characters |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
39 ſ { cv = 1; return "s"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
40 ß { cv = 1; return "ss"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
41 [æę] { cv = 2; return "ae"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
42 Æ { cv = 2; return "AE"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
43 œ { cv = 2; return "oe"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
44 // 1.2 character combinations |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
45 ij { cv = 2; return "ii"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
46 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
47 // 2. diacritics |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
48 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
49 // 2.1 superfluous diacritics in single words |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
50 ^ hîc {END} { return "hic"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
51 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
52 // 2.2 superfluous diacritics at the end of a word |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
53 // 2.2.1 common cases |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
54 à / {QUE} {END} { return "a"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
55 àm / {QUE} {END} { return "am"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
56 às / {QUE} {END} { return "as"; } // (-àsque will likely never occur) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
57 // à / [ms]? {QUE} {END} { return "a"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
58 è / {QUE} {END} { return "e"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
59 ò / {QUE} {END} { return "o"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
60 òd / {QUE} {END} { return "od"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
61 ùm / {QUE} {END} { return "um"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
62 ùs / {QUE} {END} { return "us"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
63 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
64 // 2.3 superfluous diacritics within a word |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
65 // 2.3.1 common cases |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
66 aë { cv = 2; return "ae"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
67 oë { cv = 2; return "oe"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
68 // 2.3.2 rare cases |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
69 oï { cv = 2; return "oi"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
70 uï { cv = 2; return "ui"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
71 // 2.3.3 extremely rare cases |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
72 uü { cv = 2; return "uu"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
73 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
74 // 3. rules for u and v |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
75 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
76 // 3.1 rules for u |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
77 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
78 u/{VOWEL} { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
79 switch(cv) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
80 case 2: return "v"; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
81 default: cv = 2; return "u"; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
82 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
83 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
84 U/{VOWEL} { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
85 switch(cv) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
86 case 2: return "V"; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
87 default: cv = 2; return "U"; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
88 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
89 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
90 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
91 // 3.2 rules for v |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
92 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
93 qv { cv = 1; return "qu"; } // the replaced v still counts as consonant |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
94 Qv { cv = 1; return "Qu"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
95 QV { cv = 1; return "QU"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
96 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
97 {LR}v { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
98 switch(cv) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
99 case 1: return yytext().replace("v", "u"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
100 default: cv = 1; return yytext(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
101 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
102 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
103 {LR}V { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
104 switch(cv) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
105 case 1: return yytext().replace("V", "U"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
106 default: cv = 1; return yytext(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
107 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
108 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
109 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
110 v/{CONS} { cv = 1; return "u"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
111 V/{CONS} { cv = 1; return "U"; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
112 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
113 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
114 // default |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
115 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
116 {VOWEL} { cv = 2; return yytext(); } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
117 {CONS} { cv = 1; return yytext(); } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
118 \n { cv = 0; return ""; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
119 . { cv = 0; return yytext(); } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
120 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
121 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
122 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
123 <ZH> { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
124 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
125 // Codepoint < FFFF |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
126 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
127 竒 { return "奇"; } // 7AD2 --> 5947 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
128 旹 { return "時"; } // 65F9 --> 6642 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
129 歴 { return "歷"; } // 6B74 --> 6B77 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
130 精 { return "精"; } // FA1D --> 7CBE (FA1D is a compatibility ideograph) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
131 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
132 // Codepoint > FFFF |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
133 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
134 庶 { return "庶"; } // 2F88D --> 5EB6 (2F88D is a compatibility ideograph) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
135 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
136 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
137 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
138 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
139 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
140 // default (can be overridden by individual languages) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
141 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
142 \n { return ""; } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
143 . { return yytext(); } |