Mercurial > hg > mpdl-group
comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexAR.lex @ 9:1ec29fdd0db8
neue .lex Dateien f?r Normalisierung / externe Objekte update
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 22 Feb 2011 16:03:45 +0100 |
parents | |
children | 5df60f24e997 |
comparison
equal
deleted
inserted
replaced
8:d2a1c14fde31 | 9:1ec29fdd0db8 |
---|---|
1 /* | |
2 * Normalization rules for Arabic text | |
3 * [this is a JFlex specification] | |
4 * | |
5 * Wolfgang Schmidle | |
6 * version 0.96 | |
7 * 2011-02-21 | |
8 * | |
9 */ | |
10 | |
11 package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang; | |
12 | |
13 %% | |
14 | |
15 %public | |
16 %class MpdlNormalizerLexAR | |
17 %type java.lang.String | |
18 %unicode | |
19 | |
20 // Arabic: ar | |
21 | |
22 %states DISP, DICT, SEARCH | |
23 | |
24 %{ | |
25 private String original = ""; | |
26 private String normalized = ""; | |
27 private int problem = 0; | |
28 | |
29 private void add (String norm) { | |
30 original += yytext(); | |
31 normalized += norm; | |
32 } | |
33 %} | |
34 | |
35 END = \n | |
36 | |
37 %% | |
38 | |
39 @ { problem = 1; add(yytext()); } | |
40 . { add(yytext()); } | |
41 | |
42 | |
43 <DISP, SEARCH> { | |
44 | |
45 {END} { | |
46 switch (problem) { | |
47 case 1: return original; | |
48 default: return normalized; | |
49 } | |
50 } | |
51 } | |
52 | |
53 <DICT> { | |
54 | |
55 {END} { | |
56 switch (problem) { | |
57 case 1: return ""; | |
58 default: return normalized; | |
59 } | |
60 } | |
61 } | |
62 | |
63 | |
64 /* | |
65 | |
66 Annahmen: | |
67 - die Routine wird wortweise aufgerufen, mit einem \n am Ende des Strings | |
68 - Wörter mit Zeilenumbrüchen wurden bereits wieder zusammengesetzt | |
69 | |
70 TO DO: | |
71 | |
72 AR: fehlt noch | |
73 | |
74 */ |