mpdl-group: software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/Normalizer.java comparison

comparison software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/Normalizer.java @ 23:e845310098ba

diverse Korrekturen

author	Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date	Tue, 27 Nov 2012 12:35:19 +0100
parents	7d6d969b10cf
children

comparison

equal deleted inserted replaced

-:6a45a982c333
+:e845310098ba
 package de.mpg.mpiwg.berlin.mpdl.lt.text.norm;
 import java.io.IOException;
 import java.io.StringReader;
-import java.util.ArrayList;
 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
 import de.mpg.mpiwg.berlin.mpdl.lt.general.Language;
 import de.mpg.mpiwg.berlin.mpdl.lt.text.norm.lang.MpdlNormalizerLexAR;
 import de.mpg.mpiwg.berlin.mpdl.lt.text.norm.lang.MpdlNormalizerLexDE;
 import de.mpg.mpiwg.berlin.mpdl.lt.text.norm.lang.MpdlNormalizerLexFR;
 import de.mpg.mpiwg.berlin.mpdl.lt.text.norm.lang.MpdlNormalizerLexIT;
 import de.mpg.mpiwg.berlin.mpdl.lt.text.norm.lang.MpdlNormalizerLexLA;
 import de.mpg.mpiwg.berlin.mpdl.lt.text.norm.lang.MpdlNormalizerLexNL;
 import de.mpg.mpiwg.berlin.mpdl.lt.text.norm.lang.MpdlNormalizerLexZH;
-import de.mpg.mpiwg.berlin.mpdl.lt.text.reg.Regularization;
-import de.mpg.mpiwg.berlin.mpdl.lt.text.reg.RegularizationManager;
 public class Normalizer {
 public static int NONE = -1;  // no normalization
 public static int DISPLAY = 1;  // normalization in DISPLAY mode
 public static int DICTIONARY = 2;  // normalization in DICTIONARY mode
 */
 public String normalize(String s) throws ApplicationException {
 String normStr = s;
 if (useSpecialNormFunction())
 normStr = removeSpecialNWDMarks(normStr);
-if (useRegFunction()) {
-// try to regularize the string to the norm form over predefined regularizations
-RegularizationManager regManager = RegularizationManager.getInstance();
-ArrayList<Regularization> regs = regManager.findRegsByOrig(language, s);
-if (regs != null && regs.size() > 0) {
-Regularization reg = regs.get(0);  // only one: the first one
-String regNormStr = reg.getNorm();
-normStr = regNormStr;
-}
-}
 if (useNormFunction()) {
 // normalize the string by string replacements
 if (normMode == DICTIONARY) {
 normStr = normalize(normStr, DICTIONARY);
 } else if (normMode == DISPLAY) {
 if (useSpecialNormFunction())
 normStr = insertSpecialNWDMarks(normStr);
 return normStr;
 }
-private boolean useRegFunction() {
-boolean useReg = false;
-for (int i=0; i< normFunctions.length; i++) {
-String function = normFunctions[i];
-if (function.equals("reg"))
-return true;
-}
-return useReg;
-}
 private boolean useNormFunction() {
 boolean useNorm = false;
 for (int i=0; i< normFunctions.length; i++) {
 String function = normFunctions[i];
 if (function.equals("norm") || function.equals("specialNorm"))
 } else if (Language.getInstance().isFrench(language)) {
 MpdlNormalizerLexFR mpdlNormalizerLex = new MpdlNormalizerLexFR(strReader);
 if (mode == DISPLAY)
 mpdlNormalizerLex.yybegin(MpdlNormalizerLexFR.DISP);
 else if (mode == DICTIONARY)
-mpdlNormalizerLex.yybegin(MpdlNormalizerLexFR.DICT_ASCII);
+mpdlNormalizerLex.yybegin(MpdlNormalizerLexFR.DICT);
 else if (mode == SEARCH)
 mpdlNormalizerLex.yybegin(MpdlNormalizerLexFR.SEARCH);
 while (token != null) {
 token = mpdlNormalizerLex.yylex();
 if (token != null)

Mercurial > hg > mpdl-group

comparison software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/Normalizer.java @ 23:e845310098ba