annotate src/main/java/org/mpi/openmind/repository/utils/RomanizationLoC.java @ 89:8adfa8679991

new implementation of translit-to-romanization rules in RomanizationLoc with test(!).
author Robert Casties <casties@mpiwg-berlin.mpg.de>
date Mon, 26 Feb 2018 14:39:49 +0100
parents ad505ef703ed
children 2c01cdc9b34a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
jurzua
parents:
diff changeset
1 package org.mpi.openmind.repository.utils;
jurzua
parents:
diff changeset
2
jurzua
parents:
diff changeset
3 import java.util.HashMap;
jurzua
parents:
diff changeset
4 import java.util.Map;
88
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
5 import java.util.Map.Entry;
1
jurzua
parents:
diff changeset
6 import java.util.regex.Matcher;
jurzua
parents:
diff changeset
7 import java.util.regex.Pattern;
jurzua
parents:
diff changeset
8
jurzua
parents:
diff changeset
9 import org.apache.commons.lang.StringUtils;
jurzua
parents:
diff changeset
10
jurzua
parents:
diff changeset
11 /**
88
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
12 * Convert ISMI transliteration into LOC romanization/transcription.
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
13 *
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
14 * See document: translit-to-romanization-2.0.doc by Chantal Wahbi
1
jurzua
parents:
diff changeset
15 * http://www.loc.gov/catdir/cpso/romanization/arabic.pdf
88
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
16 *
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
17 * @author cwahbi, jurzua, casties
1
jurzua
parents:
diff changeset
18 *
jurzua
parents:
diff changeset
19 */
jurzua
parents:
diff changeset
20 public class RomanizationLoC {
jurzua
parents:
diff changeset
21
88
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
22 /*
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
23 * rule 1
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
24 */
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
25 private static Map<String, String> rule1_map = new HashMap<String, String>();
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
26 static{
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
27 //rules 1.a to 1.f
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
28 rule1_map.put("\u1E6F", "\u0074\u0068");//ṯ -> th
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
29 rule1_map.put("\u1E6E", "\u0054\u0068");//Ṯ -> Th
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
30
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
31 rule1_map.put("\u1E2B", "\u006B\u0068");//ḫ -> kh
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
32 rule1_map.put("\u1E2A", "\u004B\u0068");//Ḫ -> Kh
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
33
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
34 rule1_map.put("\u1E0F", "\u0064\u0068");//ḏ -> dh
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
35 rule1_map.put("\u1E0E", "\u0044\u0068");//Ḏ -> Dh
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
36
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
37 rule1_map.put("\u0161", "\u0073\u0068");//š -> sh
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
38 rule1_map.put("\u0160", "\u0053\u0068");//Š -> Sh
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
39
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
40 rule1_map.put("\u0121", "\u0067\u0068");//ġ -> gh
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
41 rule1_map.put("\u0120", "\u0047\u0068");//Ġ -> Gh
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
42
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
43 rule1_map.put("\u1EF3", "\u00E1");//ỳ -> á
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
44 //CONVERSIONMAP.put("\u1EF2", "\u00C1");//Ỳ -> Á
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
45 }
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
46
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
47
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
48 /*
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
49 * rule 2
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
50 */
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
51 private static Pattern rule2a_pattern = Pattern.compile("\\b((?!al-)\\S+)ẗ(\\s+)(al-)");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
52 private static Pattern rule2b_pattern = Pattern.compile("(\\S+)ẗ(\\s+|(?!al-)\\S*)");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
53 private static Pattern rule2c_pattern = Pattern.compile("\\b(al-)(\\S+)ẗ\\b");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
54 private static Pattern rule2d_pattern = Pattern.compile("(\\S+)ẗan\\b");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
55
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
56 public static String ruleGroup2(String text) {
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
57
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
58 /*
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
59 * Rule 2c
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
60 *
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
61 * al-Xẗ => al-Xh
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
62 */
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
63 Matcher matcher_c = rule2c_pattern.matcher(text);
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
64 text = matcher_c.replaceAll("$1$2h");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
65
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
66 /*
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
67 * rule 2.d
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
68 *
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
69 * Xẗan -> Xtan
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
70 */
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
71 Matcher matcher_d = rule2d_pattern.matcher(text);
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
72 text = matcher_d.replaceAll("$1tan");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
73
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
74 /*
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
75 * rule 2a
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
76 *
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
77 * [Not beginnig with: al-] Xẗ al-X => Xt al-X
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
78 */
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
79 Matcher matcher_a = rule2a_pattern.matcher(text);
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
80 text = matcher_a.replaceAll("$1t$2$3");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
81
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
82 /*
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
83 * rule 2b
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
84 *
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
85 * Xẗ [Not followed by: al-X] => Xh
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
86 */
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
87 Matcher matcher_b = rule2b_pattern.matcher(text);
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
88 text = matcher_b.replaceAll("$1h$2");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
89
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
90 return text;
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
91 }
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
92
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
93
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
94 /*
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
95 * rule 3
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
96 */
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
97 //private static Pattern rule3a_pattern = Pattern.compile(begin_or_space + "(bi|wa|ka)(\\s+)(al-)(\\S+)");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
98 private static Pattern rule3b_pattern = Pattern.compile("\\b(bi|wa|ka)(\\s+)(\\S+)");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
99
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
100 public static String ruleGroup3(String text) {
1
jurzua
parents:
diff changeset
101
88
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
102 /*
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
103 * rule 3.A
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
104 *
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
105 * P al-X; P=[ bi; wa; ka] => P-al-X
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
106 */
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
107 /* rule 3a is subsumed by 3b
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
108 Matcher matcher_a = rule3a_pattern.matcher(text);
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
109 text = matcher_a.replaceAll("$1$2-$4");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
110 */
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
111
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
112 /*
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
113 * rule 3.B
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
114 *
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
115 * P X; P=[ bi; wa; ka] => P-X
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
116 */
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
117 Matcher matcher_b = rule3b_pattern.matcher(text);
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
118 text = matcher_b.replaceAll("$1-$3");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
119
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
120 return text;
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
121 }
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
122
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
123
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
124 /*
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
125 * rule 4
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
126 */
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
127 private static Pattern rule4a_pattern = Pattern.compile("\\b(li )(\\S+)");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
128 private static Pattern rule4b_pattern = Pattern.compile("\\b(li al-|liʾl-|li-ʾl-|li-l-)(\\S+)");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
129
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
130 public static String ruleGroup4(String text){
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
131
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
132 /*
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
133 * rule 4.B
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
134 *
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
135 * [li al-X; li’l-X; li-’l-X; li-l-X] => lil-X
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
136 */
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
137 Matcher matcher_b = rule4b_pattern.matcher(text);
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
138 text = matcher_b.replaceAll("lil-$2");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
139
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
140 /*
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
141 * rule 4.A
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
142 *
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
143 * li X => li-X
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
144 */
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
145 Matcher matcher_a = rule4a_pattern.matcher(text);
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
146 text = matcher_a.replaceAll("li-$2");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
147
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
148 return text;
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
149 }
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
150
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
151
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
152 /*
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
153 * rule 5
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
154 */
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
155 private static Pattern rule5a_pattern = Pattern.compile("(-?ʾl-)(\\S+)");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
156 private static Pattern rule5b_pattern = Pattern.compile("\\b(a|A)(t-(t)|ṯ-(ṯ)|d-(d)|ḏ-(ḏ)|r-(r)|z-(z)|s-(s)|š-(š)|ṣ-(ṣ)|ḍ-(ḍ)|ṭ-(ṭ)|ẓ-(ẓ)|l-(l)|n-(n))(\\S+)");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
157
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
158 public static String ruleGroup5(String text){
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
159
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
160 /*
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
161 * rule 5a
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
162 *
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
163 * [’l-X; X-’l-X] => al-X
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
164 */
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
165 Matcher matcher_a = rule5a_pattern.matcher(text);
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
166 text = matcher_a.replaceAll(" al-$2");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
167
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
168 /*
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
169 * rule 5b
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
170 *
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
171 * aY-YX; Y=Sun letters[t;ṯ;d;ḏ;r;z;s;š;ṣ;ḍ;ṭ;ẓ;l;n] => al-YX
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
172 */
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
173 Matcher matcher_b = rule5b_pattern.matcher(text);
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
174 // the groups 3-16 will be empty except the real match
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
175 text = matcher_b.replaceAll("$1l-$3$4$5$6$7$8$9$10$11$12$13$14$15$16$17");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
176
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
177 return text;
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
178 }
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
179
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
180 /*
89
8adfa8679991 new implementation of translit-to-romanization rules in RomanizationLoc with test(!).
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 88
diff changeset
181 * rule 6 currently unused
88
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
182 *
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
183 * λh; λ= [t; k; d; s; g] => λʹh
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
184 */
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
185 private static Pattern rule6_pattern = Pattern.compile("(\\S+)(t|k|d|s|g)h(\\S+)");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
186 //private static Pattern rule6_pattern = Pattern.compile("([\\w|ā|ī|’|ā|š]*)(t|k|d|s|g)h([\\w|ā|ī|’|ā|š]*)");
1
jurzua
parents:
diff changeset
187
88
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
188 public static String ruleGroup6(String text) {
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
189 Matcher matcher = rule6_pattern.matcher(text);
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
190 text = matcher.replaceAll("$1$2ʹh$3");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
191 return text;
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
192 }
1
jurzua
parents:
diff changeset
193
88
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
194 /*
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
195 * rule 7 currently unused
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
196 *
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
197 * X[illāh; ullāh; allah; allāh; - Allāh; Allah; ullah] => X Allāh
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
198 *
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
199 * [ l; b; bism]illāh => [lillāh; billāh; bismillāh] (stay unchanged)
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
200 */
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
201 public static String ruleGroup7(String text){
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
202
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
203 String rule_7_1_allah = "illāh|ullāh|allah|allāh|-Allāh|Allah|ullah|illah";
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
204
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
205 int count = 0;
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
206 while(text.matches("(.*)(\\S+)(" + rule_7_1_allah + ")(.*)") && count<10){
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
207 text = text.replaceAll("(.*)(\\S+)(" + rule_7_1_allah + ")(.*)", "$1$2 Allāh$4");
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
208 count++;
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
209 }
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
210
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
211 return text;
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
212
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
213 }
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
214
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
215
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
216 /**
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
217 * Convert (ISMI-) transliterated arabic text into (LoC romanized) transcribed text.
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
218 *
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
219 * @param text
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
220 * @return
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
221 */
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
222 public static String convert(final String text) {
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
223
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
224 if (StringUtils.isEmpty(text))
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
225 return text;
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
226
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
227 // make sure we have composed unicode
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
228 String romanizedText = NormalizerUtils.unicodeNormalize(text);
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
229 // make sure we have standard ayn and hamza
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
230 romanizedText = NormalizerUtils.aynHamzaNormalizer(romanizedText);
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
231
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
232 romanizedText = ruleGroup2(romanizedText);
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
233 romanizedText = ruleGroup4(romanizedText);
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
234 romanizedText = ruleGroup5(romanizedText);
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
235 //romanizedText = ruleGroup6(romanizedText);
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
236 romanizedText = ruleGroup3(romanizedText);
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
237 // replacementText = ruleGroup7(replacementText);
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
238
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
239 // rule 1
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
240 for (Entry<String, String> tr : rule1_map.entrySet()) {
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
241 if (romanizedText.contains(tr.getKey())) {
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
242 romanizedText = romanizedText.replace(tr.getKey(), tr.getValue());
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
243 }
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
244 }
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
245
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
246 return romanizedText;
ad505ef703ed new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 71
diff changeset
247 }
1
jurzua
parents:
diff changeset
248
jurzua
parents:
diff changeset
249 }