Mercurial > hg > mpdl-group
comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexNL.lex @ 9:1ec29fdd0db8
neue .lex Dateien f?r Normalisierung / externe Objekte update
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 22 Feb 2011 16:03:45 +0100 |
parents | |
children | 5df60f24e997 |
comparison
equal
deleted
inserted
replaced
8:d2a1c14fde31 | 9:1ec29fdd0db8 |
---|---|
1 /* | |
2 * Normalization rules for Dutch text | |
3 * [this is a JFlex specification] | |
4 * | |
5 * Wolfgang Schmidle | |
6 * version 0.96 | |
7 * 2011-02-21 | |
8 * | |
9 */ | |
10 | |
11 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang; | |
12 | |
13 %% | |
14 | |
15 %public | |
16 %class MpdlNormalizerLexNL | |
17 %type java.lang.String | |
18 %unicode | |
19 | |
20 // Dutch: nl | |
21 | |
22 %states DISP, DICT, SEARCH | |
23 | |
24 %{ | |
25 private String original = ""; | |
26 private String normalized = ""; | |
27 private int problem = 0; | |
28 | |
29 private void add (String norm) { | |
30 original += yytext(); | |
31 normalized += norm; | |
32 } | |
33 %} | |
34 | |
35 END = \n | |
36 | |
37 %% | |
38 | |
39 <DISP, DICT, SEARCH> { | |
40 | |
41 ſ { add("s"); } | |
42 | |
43 } | |
44 | |
45 | |
46 // default | |
47 | |
48 @ { problem = 1; add(yytext()); } | |
49 . { add(yytext()); } | |
50 | |
51 | |
52 <DISP, SEARCH> { | |
53 | |
54 {END} { | |
55 switch (problem) { | |
56 case 1: return original; | |
57 default: return normalized; | |
58 } | |
59 } | |
60 } | |
61 | |
62 <DICT> { | |
63 | |
64 {END} { | |
65 switch (problem) { | |
66 case 1: return ""; | |
67 default: return normalized; | |
68 } | |
69 } | |
70 } | |
71 | |
72 | |
73 /* | |
74 | |
75 Annahmen: | |
76 - die Routine wird wortweise aufgerufen, mit einem \n am Ende des Strings | |
77 - Wörter mit Zeilenumbrüchen wurden bereits wieder zusammengesetzt | |
78 | |
79 TO DO: | |
80 | |
81 NL: vollständig? | |
82 | |
83 */ |