changeset 89:8adfa8679991

new implementation of translit-to-romanization rules in RomanizationLoc with test(!).
author Robert Casties <casties@mpiwg-berlin.mpg.de>
date Mon, 26 Feb 2018 14:39:49 +0100
parents ad505ef703ed
children 4b6c0b368f46
files src/main/java/org/mpi/openmind/repository/utils/NormalizerUtils.java src/main/java/org/mpi/openmind/repository/utils/OldRomanizationLoC.java src/main/java/org/mpi/openmind/repository/utils/RomanizationLoC.java src/test/java/openmind/RomanizationLocTest.java
diffstat 4 files changed, 97 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/src/main/java/org/mpi/openmind/repository/utils/NormalizerUtils.java	Fri Feb 23 21:43:29 2018 +0100
+++ b/src/main/java/org/mpi/openmind/repository/utils/NormalizerUtils.java	Mon Feb 26 14:39:49 2018 +0100
@@ -50,7 +50,7 @@
 	 */
 	public static String unicodeNormalize(String text) {
 	    if (!Normalizer.isNormalized(text, Normalizer.Form.NFC)) {
-	        Normalizer.normalize(text, Normalizer.Form.NFC);
+	        text = Normalizer.normalize(text, Normalizer.Form.NFC);
 	    }
 	    return text;
 	}
--- a/src/main/java/org/mpi/openmind/repository/utils/OldRomanizationLoC.java	Fri Feb 23 21:43:29 2018 +0100
+++ b/src/main/java/org/mpi/openmind/repository/utils/OldRomanizationLoC.java	Mon Feb 26 14:39:49 2018 +0100
@@ -339,19 +339,18 @@
 		test("bi al-tamām̄");
 		test("wa al-kamāl");
 		test("bi tarīq");
-*/
+
 		//Group 4
 		test("li al-shirbini");
 		test("li’l-Shirbīnī");
 		test("li-’l-Shirbīnī");
 		test("li tajrīd");
-/*		
+
 		//Group 5
 		test("aš-šams");
 		test("aḏ-ḏams");
         test("fi’l-kitāb");
         test("fi-’l-kitāb");
-		*/
 		
 		
 		//Group 6
@@ -359,12 +358,15 @@
         test("shirbini");
         test("shirazi");
 
-		/*
 		//Group 7
+		test("’Abdullāh");
+        test("ʿAbdullah");
 		test("’Abd allāh");
+        
+        test("ʿAli b. ʿAbdullah");
+		
 		
 		*/
-		
 		//test("al-Jawharaẗ al-bahiyyaẗ fī maʿrifaẗ al-awqāt al-layliyyaẗ wa-ʾl-nahāriyyaẗ");
 		
 		
@@ -373,7 +375,7 @@
 		
 		//char ch = 'Á';
 		//System.out.println(String.format("%04x", (int) ch));
-		//test("Al-Futūḥāt al-Wahbīyaẗ fī Ỳarḥ al-Risālaẗ al-Fatḥīyaẗ fī al-ʿamal bi-al-rubʿ al-mujayyab");
+		test("Al-Futūḥāt al-Wahbīyaẗ fī Ỳarḥ al-Risālaẗ al-Fatḥīyaẗ fī al-ʿamal bi-al-rubʿ al-mujayyab");
 	
 		//test("wa-ʾl-nahār");
 		//test("li-l-ʿIlm");
--- a/src/main/java/org/mpi/openmind/repository/utils/RomanizationLoC.java	Fri Feb 23 21:43:29 2018 +0100
+++ b/src/main/java/org/mpi/openmind/repository/utils/RomanizationLoC.java	Mon Feb 26 14:39:49 2018 +0100
@@ -178,7 +178,7 @@
     }
     
     /*
-	 * rule 6
+	 * rule 6 currently unused
 	 * 
 	 * λh; λ= [t; k; d; s; g] => λʹh
 	 */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/test/java/openmind/RomanizationLocTest.java	Mon Feb 26 14:39:49 2018 +0100
@@ -0,0 +1,87 @@
+package openmind;
+
+import static org.junit.Assert.*;
+
+import org.junit.Test;
+import org.mpi.openmind.repository.utils.RomanizationLoC;
+
+/**
+ * Test RomanizationLoC conversion from ISMI transliteration to Library of Congress romanization.
+ * 
+ * @author casties
+ *
+ */
+public class RomanizationLocTest {
+
+    /*
+     * Test methods for {@link org.mpi.openmind.repository.utils.RomanizationLoC#convert(java.lang.String)}.
+     */
+    
+    @Test
+    public void testConvertRule1() {
+        assertEquals("Th/th Kh/kh Dh/dh Sh/sh Gh/gh á", RomanizationLoC.convert("Ṯ/ṯ Ḫ/ḫ Ḏ/ḏ Š/š Ġ/ġ ỳ"));
+    }
+    
+    @Test
+    public void testConvertRule2() {
+        assertEquals("al-risalah", RomanizationLoC.convert("al-risalaẗ"));
+        assertEquals("risalat al-kabir", RomanizationLoC.convert("risalaẗ al-kabir"));
+        assertEquals("risalat    al-kabir", RomanizationLoC.convert("risalaẗ    al-kabir"));
+        assertEquals("risalat al-kabirah", RomanizationLoC.convert("risalaẗ al-kabiraẗ"));
+        assertEquals("risalah", RomanizationLoC.convert("risalaẗ"));
+        assertEquals("risalah risalah", RomanizationLoC.convert("risalaẗ risalaẗ"));
+        assertEquals("risalatan", RomanizationLoC.convert("risalaẗan"));
+    }
+
+    @Test
+    public void testConvertRule3() {
+        assertEquals("bi-al-tamām̄", RomanizationLoC.convert("bi al-tamām̄"));
+        assertEquals("wa-al-kamāl", RomanizationLoC.convert("wa al-kamāl"));
+        assertEquals("bi-tarīq", RomanizationLoC.convert("bi tarīq"));
+    }
+
+    @Test
+    public void testConvertRule4() {
+        assertEquals("lil-shirbini", RomanizationLoC.convert("li al-shirbini"));
+        assertEquals("lil-Shirbīnī", RomanizationLoC.convert("li’l-Shirbīnī"));
+        assertEquals("lil-Shirbīnī", RomanizationLoC.convert("li-’l-Shirbīnī"));
+        assertEquals("lil-Shirbīnī", RomanizationLoC.convert("liʾl-Shirbīnī"));
+        assertEquals("lil-Shirbīnī", RomanizationLoC.convert("li-ʾl-Shirbīnī"));
+        assertEquals("li-tajrīd", RomanizationLoC.convert("li tajrīd"));
+    }
+
+    @Test
+    public void testConvertRule5() {
+        assertEquals("fi al-kitāb", RomanizationLoC.convert("fi’l-kitāb"));
+        assertEquals("fi al-kitāb", RomanizationLoC.convert("fi-’l-kitāb"));
+        assertEquals("fi al-kitāb", RomanizationLoC.convert("fiʾl-kitāb"));
+        assertEquals("fi al-kitāb", RomanizationLoC.convert("fi-ʾl-kitāb"));
+        assertEquals("al-shams", RomanizationLoC.convert("aš-šams"));
+        assertEquals("al-dhams", RomanizationLoC.convert("aḏ-ḏams"));
+    }
+    
+    /*
+     * rule 6 not used
+    @Test
+    public void testConvertRule6() {
+        assertEquals("Adʹham", RomanizationLoC.convert("Adham"));
+    }
+     */
+
+    /*
+     * rule 7 not used
+    @Test
+    public void testConvertRule7() {
+        assertEquals("ʿAbd Allāh", RomanizationLoC.convert("‘Abdullāh"));
+        assertEquals("ʿAbd Allāh", RomanizationLoC.convert("ʿAbdullah"));
+        assertEquals("ʿAbd Allāh", RomanizationLoC.convert("ʿAbd allāh"));
+        assertEquals("ʿAli b. ʿAbd Allāh", RomanizationLoC.convert("ʿAli b. ʿAbdullah"));
+    }
+    */
+
+    @Test
+    public void testConvertRules() {
+        assertEquals("Risālah fī al-ʿamal bi-rubʿ al-muqanṭarāt al-shamālīyah", RomanizationLoC.convert("Risālaẗ fī al-ʿamal bi-rubʿ al-muqanṭarāt al-šamālīyaẗ"));
+        assertEquals("Mukhtaṣarah fī ṣanʿah baʿḍ al-ālāt al-raṣadiyyah wa-al-ʿamal bi-hā", RomanizationLoC.convert("Muḫtaṣaraẗ fī ṣanʿaẗ baʿḍ al-ālāt al-raṣadiyyaẗ wa-ʾl-ʿamal bi-hā"));
+    }
+}