changeset 96:e8cee8cf2f52

small bugfix and minimal tests for ArabicTranslitNormalizer.
author Robert Casties <casties@mpiwg-berlin.mpg.de>
date Mon, 27 Aug 2018 16:34:46 +0200
parents 2c01cdc9b34a
children 2c49ddbe95ac
files src/main/java/org/mpi/openmind/repository/utils/ArabicTranslitNormalizer.java src/test/java/org/mpi/openmind/repository/utils/ArabicTranslitNormalizerTest.java
diffstat 2 files changed, 26 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/src/main/java/org/mpi/openmind/repository/utils/ArabicTranslitNormalizer.java	Mon Jun 25 18:12:00 2018 +0200
+++ b/src/main/java/org/mpi/openmind/repository/utils/ArabicTranslitNormalizer.java	Mon Aug 27 16:34:46 2018 +0200
@@ -69,6 +69,9 @@
         if (StringUtils.isEmpty(text)) {
             return text;
         }
+
+        // normalize Unicode
+        text = Normalizer.normalize(text, Form.NFC);
         
         // everything is lowercase TODO: locale?
         text = text.toLowerCase();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/test/java/org/mpi/openmind/repository/utils/ArabicTranslitNormalizerTest.java	Mon Aug 27 16:34:46 2018 +0200
@@ -0,0 +1,23 @@
+package org.mpi.openmind.repository.utils;
+
+import static org.junit.Assert.*;
+
+import org.junit.Test;
+
+/**
+ * @author casties
+ *
+ */
+public class ArabicTranslitNormalizerTest {
+
+	/**
+	 * Test method for {@link org.mpi.openmind.repository.utils.ArabicTranslitNormalizer#normalize(java.lang.String)}.
+	 */
+	@Test
+	public void testNormalize() {
+        assertEquals("t/t h/h d/d s/s j/j a", ArabicTranslitNormalizer.normalize("Ṯ/ṯ Ḫ/ḫ Ḏ/ḏ Š/š Ġ/ġ ỳ"));
+        assertEquals("t/t h/h d/d s/s j/j a", ArabicTranslitNormalizer.normalize("Th/th Kh/kh Dh/dh Sh/sh Gh/gh a"));
+        assertEquals("fath fath", ArabicTranslitNormalizer.normalize("fatḥ FATḤ"));
+	}
+
+}