Mercurial > hg > openmind
changeset 96:e8cee8cf2f52
small bugfix and minimal tests for ArabicTranslitNormalizer.
author | Robert Casties <casties@mpiwg-berlin.mpg.de> |
---|---|
date | Mon, 27 Aug 2018 16:34:46 +0200 |
parents | 2c01cdc9b34a |
children | 2c49ddbe95ac |
files | src/main/java/org/mpi/openmind/repository/utils/ArabicTranslitNormalizer.java src/test/java/org/mpi/openmind/repository/utils/ArabicTranslitNormalizerTest.java |
diffstat | 2 files changed, 26 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/src/main/java/org/mpi/openmind/repository/utils/ArabicTranslitNormalizer.java Mon Jun 25 18:12:00 2018 +0200 +++ b/src/main/java/org/mpi/openmind/repository/utils/ArabicTranslitNormalizer.java Mon Aug 27 16:34:46 2018 +0200 @@ -69,6 +69,9 @@ if (StringUtils.isEmpty(text)) { return text; } + + // normalize Unicode + text = Normalizer.normalize(text, Form.NFC); // everything is lowercase TODO: locale? text = text.toLowerCase();
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/test/java/org/mpi/openmind/repository/utils/ArabicTranslitNormalizerTest.java Mon Aug 27 16:34:46 2018 +0200 @@ -0,0 +1,23 @@ +package org.mpi.openmind.repository.utils; + +import static org.junit.Assert.*; + +import org.junit.Test; + +/** + * @author casties + * + */ +public class ArabicTranslitNormalizerTest { + + /** + * Test method for {@link org.mpi.openmind.repository.utils.ArabicTranslitNormalizer#normalize(java.lang.String)}. + */ + @Test + public void testNormalize() { + assertEquals("t/t h/h d/d s/s j/j a", ArabicTranslitNormalizer.normalize("Ṯ/ṯ Ḫ/ḫ Ḏ/ḏ Š/š Ġ/ġ ỳ")); + assertEquals("t/t h/h d/d s/s j/j a", ArabicTranslitNormalizer.normalize("Th/th Kh/kh Dh/dh Sh/sh Gh/gh a")); + assertEquals("fath fath", ArabicTranslitNormalizer.normalize("fatḥ FATḤ")); + } + +}