Mercurial > hg > mpdl-group
view software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlNormalizerLexTemplate.lex @ 14:5df60f24e997
diverse Fehlerbehebungen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Mon, 29 Aug 2011 17:40:02 +0200 |
parents | |
children |
line wrap: on
line source
/* * Template for normalization rules * [this is a JFlex specification] * * Wolfgang Schmidle * version 2011-07-12 * */ package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang; %% %public %class MpdlNormalizerLexTemplate %type java.lang.String %unicode // Language: list of ISO codes %states DISP, DICT, SEARCH %{ private String original = ""; private String normalized = ""; private int problem = 0; private void add (String norm) { original += yytext(); normalized += norm; } private static final String LB = "[\u002d\u00ad] "; %} hyphen = [-\u{00ad}] // hyphen and soft hyphen LB = {hyphen} \u0020 // lb = ({hyphen} \u0020)? END = \n %% <DISP, DICT, SEARCH> { ſ { add("s"); } // sample rule } // default rules @ { problem = 1; add(yytext()); } {LB} { add(yytext()); } . { add(yytext()); } // at the end, determine which string to return <DISP> { {END} { switch (problem) { case 1: return original; default: return normalized; } } } <DICT> { {END} { switch (problem) { case 1: return ""; default: return normalized.replaceAll(LB, ""); } } } <SEARCH> { {END} { switch (problem) { case 1: return original; default: return normalized.replaceAll(LB, "").toLowerCase(); } } }