# HG changeset patch # User Robert Casties # Date 1519652389 -3600 # Node ID 8adfa8679991f8ebbc2bd81bb3116976dd2df0a6 # Parent ad505ef703ed5b637c672df836b6612aa0d7e59f new implementation of translit-to-romanization rules in RomanizationLoc with test(!). diff -r ad505ef703ed -r 8adfa8679991 src/main/java/org/mpi/openmind/repository/utils/NormalizerUtils.java --- a/src/main/java/org/mpi/openmind/repository/utils/NormalizerUtils.java Fri Feb 23 21:43:29 2018 +0100 +++ b/src/main/java/org/mpi/openmind/repository/utils/NormalizerUtils.java Mon Feb 26 14:39:49 2018 +0100 @@ -50,7 +50,7 @@ */ public static String unicodeNormalize(String text) { if (!Normalizer.isNormalized(text, Normalizer.Form.NFC)) { - Normalizer.normalize(text, Normalizer.Form.NFC); + text = Normalizer.normalize(text, Normalizer.Form.NFC); } return text; } diff -r ad505ef703ed -r 8adfa8679991 src/main/java/org/mpi/openmind/repository/utils/OldRomanizationLoC.java --- a/src/main/java/org/mpi/openmind/repository/utils/OldRomanizationLoC.java Fri Feb 23 21:43:29 2018 +0100 +++ b/src/main/java/org/mpi/openmind/repository/utils/OldRomanizationLoC.java Mon Feb 26 14:39:49 2018 +0100 @@ -339,19 +339,18 @@ test("bi al-tamām̄"); test("wa al-kamāl"); test("bi tarīq"); -*/ + //Group 4 test("li al-shirbini"); test("li’l-Shirbīnī"); test("li-’l-Shirbīnī"); test("li tajrīd"); -/* + //Group 5 test("aš-šams"); test("aḏ-ḏams"); test("fi’l-kitāb"); test("fi-’l-kitāb"); - */ //Group 6 @@ -359,12 +358,15 @@ test("shirbini"); test("shirazi"); - /* //Group 7 + test("’Abdullāh"); + test("ʿAbdullah"); test("’Abd allāh"); + + test("ʿAli b. ʿAbdullah"); + */ - //test("al-Jawharaẗ al-bahiyyaẗ fī maʿrifaẗ al-awqāt al-layliyyaẗ wa-ʾl-nahāriyyaẗ"); @@ -373,7 +375,7 @@ //char ch = 'Á'; //System.out.println(String.format("%04x", (int) ch)); - //test("Al-Futūḥāt al-Wahbīyaẗ fī Ỳarḥ al-Risālaẗ al-Fatḥīyaẗ fī al-ʿamal bi-al-rubʿ al-mujayyab"); + test("Al-Futūḥāt al-Wahbīyaẗ fī Ỳarḥ al-Risālaẗ al-Fatḥīyaẗ fī al-ʿamal bi-al-rubʿ al-mujayyab"); //test("wa-ʾl-nahār"); //test("li-l-ʿIlm"); diff -r ad505ef703ed -r 8adfa8679991 src/main/java/org/mpi/openmind/repository/utils/RomanizationLoC.java --- a/src/main/java/org/mpi/openmind/repository/utils/RomanizationLoC.java Fri Feb 23 21:43:29 2018 +0100 +++ b/src/main/java/org/mpi/openmind/repository/utils/RomanizationLoC.java Mon Feb 26 14:39:49 2018 +0100 @@ -178,7 +178,7 @@ } /* - * rule 6 + * rule 6 currently unused * * λh; λ= [t; k; d; s; g] => λʹh */ diff -r ad505ef703ed -r 8adfa8679991 src/test/java/openmind/RomanizationLocTest.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/test/java/openmind/RomanizationLocTest.java Mon Feb 26 14:39:49 2018 +0100 @@ -0,0 +1,87 @@ +package openmind; + +import static org.junit.Assert.*; + +import org.junit.Test; +import org.mpi.openmind.repository.utils.RomanizationLoC; + +/** + * Test RomanizationLoC conversion from ISMI transliteration to Library of Congress romanization. + * + * @author casties + * + */ +public class RomanizationLocTest { + + /* + * Test methods for {@link org.mpi.openmind.repository.utils.RomanizationLoC#convert(java.lang.String)}. + */ + + @Test + public void testConvertRule1() { + assertEquals("Th/th Kh/kh Dh/dh Sh/sh Gh/gh á", RomanizationLoC.convert("Ṯ/ṯ Ḫ/ḫ Ḏ/ḏ Š/š Ġ/ġ ỳ")); + } + + @Test + public void testConvertRule2() { + assertEquals("al-risalah", RomanizationLoC.convert("al-risalaẗ")); + assertEquals("risalat al-kabir", RomanizationLoC.convert("risalaẗ al-kabir")); + assertEquals("risalat al-kabir", RomanizationLoC.convert("risalaẗ al-kabir")); + assertEquals("risalat al-kabirah", RomanizationLoC.convert("risalaẗ al-kabiraẗ")); + assertEquals("risalah", RomanizationLoC.convert("risalaẗ")); + assertEquals("risalah risalah", RomanizationLoC.convert("risalaẗ risalaẗ")); + assertEquals("risalatan", RomanizationLoC.convert("risalaẗan")); + } + + @Test + public void testConvertRule3() { + assertEquals("bi-al-tamām̄", RomanizationLoC.convert("bi al-tamām̄")); + assertEquals("wa-al-kamāl", RomanizationLoC.convert("wa al-kamāl")); + assertEquals("bi-tarīq", RomanizationLoC.convert("bi tarīq")); + } + + @Test + public void testConvertRule4() { + assertEquals("lil-shirbini", RomanizationLoC.convert("li al-shirbini")); + assertEquals("lil-Shirbīnī", RomanizationLoC.convert("li’l-Shirbīnī")); + assertEquals("lil-Shirbīnī", RomanizationLoC.convert("li-’l-Shirbīnī")); + assertEquals("lil-Shirbīnī", RomanizationLoC.convert("liʾl-Shirbīnī")); + assertEquals("lil-Shirbīnī", RomanizationLoC.convert("li-ʾl-Shirbīnī")); + assertEquals("li-tajrīd", RomanizationLoC.convert("li tajrīd")); + } + + @Test + public void testConvertRule5() { + assertEquals("fi al-kitāb", RomanizationLoC.convert("fi’l-kitāb")); + assertEquals("fi al-kitāb", RomanizationLoC.convert("fi-’l-kitāb")); + assertEquals("fi al-kitāb", RomanizationLoC.convert("fiʾl-kitāb")); + assertEquals("fi al-kitāb", RomanizationLoC.convert("fi-ʾl-kitāb")); + assertEquals("al-shams", RomanizationLoC.convert("aš-šams")); + assertEquals("al-dhams", RomanizationLoC.convert("aḏ-ḏams")); + } + + /* + * rule 6 not used + @Test + public void testConvertRule6() { + assertEquals("Adʹham", RomanizationLoC.convert("Adham")); + } + */ + + /* + * rule 7 not used + @Test + public void testConvertRule7() { + assertEquals("ʿAbd Allāh", RomanizationLoC.convert("‘Abdullāh")); + assertEquals("ʿAbd Allāh", RomanizationLoC.convert("ʿAbdullah")); + assertEquals("ʿAbd Allāh", RomanizationLoC.convert("ʿAbd allāh")); + assertEquals("ʿAli b. ʿAbd Allāh", RomanizationLoC.convert("ʿAli b. ʿAbdullah")); + } + */ + + @Test + public void testConvertRules() { + assertEquals("Risālah fī al-ʿamal bi-rubʿ al-muqanṭarāt al-shamālīyah", RomanizationLoC.convert("Risālaẗ fī al-ʿamal bi-rubʿ al-muqanṭarāt al-šamālīyaẗ")); + assertEquals("Mukhtaṣarah fī ṣanʿah baʿḍ al-ālāt al-raṣadiyyah wa-al-ʿamal bi-hā", RomanizationLoC.convert("Muḫtaṣaraẗ fī ṣanʿaẗ baʿḍ al-ālāt al-raṣadiyyaẗ wa-ʾl-ʿamal bi-hā")); + } +}