Mercurial > hg > openmind
changeset 15:728549225b02
first version of new normalizer.
author | casties |
---|---|
date | Tue, 12 May 2015 08:32:23 +0000 |
parents | 034df8d5c923 |
children | c009ce2e60be |
files | src/main/java/org/mpi/openmind/repository/utils/ArabicTranslitNormalizer.java |
diffstat | 1 files changed, 22 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/main/java/org/mpi/openmind/repository/utils/ArabicTranslitNormalizer.java Tue May 12 08:32:23 2015 +0000 @@ -0,0 +1,22 @@ +package org.mpi.openmind.repository.utils; + +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Pattern; + +public class ArabicTranslitNormalizer { + + protected static Map<String, Pattern> multiRepPat = new HashMap<String, Pattern>(); + { + multiRepPat.put("j", Pattern.compile("ch")); + multiRepPat.put("j", Pattern.compile("dj")); + multiRepPat.put("t", Pattern.compile("th")); + multiRepPat.put("h", Pattern.compile("kh")); + multiRepPat.put("d", Pattern.compile("dh")); + multiRepPat.put("s", Pattern.compile("sh")); + multiRepPat.put("g", Pattern.compile("gh")); + multiRepPat.put("j", Pattern.compile("ch")); + // aẗ\b, at\b, ah\b -> a + multiRepPat.put("a", Pattern.compile("a\u1E97\\b|at\\b|ah\\b")); + } +}