comparison software/mpdl-services/mpiwg-mpdl-lt/bin/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexTemplate.lex @ 23:e845310098ba

diverse Korrekturen
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 27 Nov 2012 12:35:19 +0100
parents
children
comparison
equal deleted inserted replaced
22:6a45a982c333 23:e845310098ba
1 /*
2 * Template for normalization rules
3 * [this is a JFlex specification]
4 *
5 * Wolfgang Schmidle
6 * version 2011-07-12
7 *
8 */
9
10 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
11
12 %%
13
14 %public
15 %class MpdlNormalizerLexTemplate
16 %type java.lang.String
17 %unicode
18
19 // Language: list of ISO codes
20
21 %states DISP, DICT, SEARCH
22
23 %{
24 private String original = "";
25 private String normalized = "";
26 private int problem = 0;
27
28 private void add (String norm) {
29 original += yytext();
30 normalized += norm;
31 }
32
33 private static final String LB = "[\u002d\u00ad] ";
34 %}
35
36 hyphen = [-\u{00ad}] // hyphen and soft hyphen
37 LB = {hyphen} \u0020
38 // lb = ({hyphen} \u0020)?
39
40 END = \n
41
42 %%
43
44 <DISP, DICT, SEARCH> {
45
46 ſ { add("s"); } // sample rule
47
48 }
49
50
51 // default rules
52
53 @ { problem = 1; add(yytext()); }
54 {LB} { add(yytext()); }
55 . { add(yytext()); }
56
57
58 // at the end, determine which string to return
59
60 <DISP> {
61
62 {END} {
63 switch (problem) {
64 case 1: return original;
65 default: return normalized;
66 }
67 }
68 }
69
70 <DICT> {
71
72 {END} {
73 switch (problem) {
74 case 1: return "";
75 default: return normalized.replaceAll(LB, "");
76 }
77 }
78 }
79
80 <SEARCH> {
81
82 {END} {
83 switch (problem) {
84 case 1: return original;
85 default: return normalized.replaceAll(LB, "").toLowerCase();
86 }
87 }
88 }
89