Mercurial > hg > openmind
annotate src/main/java/org/mpi/openmind/repository/utils/RomanizationLoC.java @ 89:8adfa8679991
new implementation of translit-to-romanization rules in RomanizationLoc with test(!).
author | Robert Casties <casties@mpiwg-berlin.mpg.de> |
---|---|
date | Mon, 26 Feb 2018 14:39:49 +0100 |
parents | ad505ef703ed |
children | 2c01cdc9b34a |
rev | line source |
---|---|
1 | 1 package org.mpi.openmind.repository.utils; |
2 | |
3 import java.util.HashMap; | |
4 import java.util.Map; | |
88
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
5 import java.util.Map.Entry; |
1 | 6 import java.util.regex.Matcher; |
7 import java.util.regex.Pattern; | |
8 | |
9 import org.apache.commons.lang.StringUtils; | |
10 | |
11 /** | |
88
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
12 * Convert ISMI transliteration into LOC romanization/transcription. |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
13 * |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
14 * See document: translit-to-romanization-2.0.doc by Chantal Wahbi |
1 | 15 * http://www.loc.gov/catdir/cpso/romanization/arabic.pdf |
88
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
16 * |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
17 * @author cwahbi, jurzua, casties |
1 | 18 * |
19 */ | |
20 public class RomanizationLoC { | |
21 | |
88
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
22 /* |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
23 * rule 1 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
24 */ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
25 private static Map<String, String> rule1_map = new HashMap<String, String>(); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
26 static{ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
27 //rules 1.a to 1.f |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
28 rule1_map.put("\u1E6F", "\u0074\u0068");//ṯ -> th |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
29 rule1_map.put("\u1E6E", "\u0054\u0068");//Ṯ -> Th |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
30 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
31 rule1_map.put("\u1E2B", "\u006B\u0068");//ḫ -> kh |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
32 rule1_map.put("\u1E2A", "\u004B\u0068");//Ḫ -> Kh |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
33 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
34 rule1_map.put("\u1E0F", "\u0064\u0068");//ḏ -> dh |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
35 rule1_map.put("\u1E0E", "\u0044\u0068");//Ḏ -> Dh |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
36 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
37 rule1_map.put("\u0161", "\u0073\u0068");//š -> sh |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
38 rule1_map.put("\u0160", "\u0053\u0068");//Š -> Sh |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
39 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
40 rule1_map.put("\u0121", "\u0067\u0068");//ġ -> gh |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
41 rule1_map.put("\u0120", "\u0047\u0068");//Ġ -> Gh |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
42 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
43 rule1_map.put("\u1EF3", "\u00E1");//ỳ -> á |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
44 //CONVERSIONMAP.put("\u1EF2", "\u00C1");//Ỳ -> Á |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
45 } |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
46 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
47 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
48 /* |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
49 * rule 2 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
50 */ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
51 private static Pattern rule2a_pattern = Pattern.compile("\\b((?!al-)\\S+)ẗ(\\s+)(al-)"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
52 private static Pattern rule2b_pattern = Pattern.compile("(\\S+)ẗ(\\s+|(?!al-)\\S*)"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
53 private static Pattern rule2c_pattern = Pattern.compile("\\b(al-)(\\S+)ẗ\\b"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
54 private static Pattern rule2d_pattern = Pattern.compile("(\\S+)ẗan\\b"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
55 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
56 public static String ruleGroup2(String text) { |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
57 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
58 /* |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
59 * Rule 2c |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
60 * |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
61 * al-Xẗ => al-Xh |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
62 */ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
63 Matcher matcher_c = rule2c_pattern.matcher(text); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
64 text = matcher_c.replaceAll("$1$2h"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
65 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
66 /* |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
67 * rule 2.d |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
68 * |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
69 * Xẗan -> Xtan |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
70 */ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
71 Matcher matcher_d = rule2d_pattern.matcher(text); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
72 text = matcher_d.replaceAll("$1tan"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
73 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
74 /* |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
75 * rule 2a |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
76 * |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
77 * [Not beginnig with: al-] Xẗ al-X => Xt al-X |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
78 */ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
79 Matcher matcher_a = rule2a_pattern.matcher(text); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
80 text = matcher_a.replaceAll("$1t$2$3"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
81 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
82 /* |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
83 * rule 2b |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
84 * |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
85 * Xẗ [Not followed by: al-X] => Xh |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
86 */ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
87 Matcher matcher_b = rule2b_pattern.matcher(text); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
88 text = matcher_b.replaceAll("$1h$2"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
89 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
90 return text; |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
91 } |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
92 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
93 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
94 /* |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
95 * rule 3 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
96 */ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
97 //private static Pattern rule3a_pattern = Pattern.compile(begin_or_space + "(bi|wa|ka)(\\s+)(al-)(\\S+)"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
98 private static Pattern rule3b_pattern = Pattern.compile("\\b(bi|wa|ka)(\\s+)(\\S+)"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
99 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
100 public static String ruleGroup3(String text) { |
1 | 101 |
88
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
102 /* |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
103 * rule 3.A |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
104 * |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
105 * P al-X; P=[ bi; wa; ka] => P-al-X |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
106 */ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
107 /* rule 3a is subsumed by 3b |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
108 Matcher matcher_a = rule3a_pattern.matcher(text); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
109 text = matcher_a.replaceAll("$1$2-$4"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
110 */ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
111 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
112 /* |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
113 * rule 3.B |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
114 * |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
115 * P X; P=[ bi; wa; ka] => P-X |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
116 */ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
117 Matcher matcher_b = rule3b_pattern.matcher(text); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
118 text = matcher_b.replaceAll("$1-$3"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
119 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
120 return text; |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
121 } |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
122 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
123 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
124 /* |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
125 * rule 4 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
126 */ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
127 private static Pattern rule4a_pattern = Pattern.compile("\\b(li )(\\S+)"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
128 private static Pattern rule4b_pattern = Pattern.compile("\\b(li al-|liʾl-|li-ʾl-|li-l-)(\\S+)"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
129 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
130 public static String ruleGroup4(String text){ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
131 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
132 /* |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
133 * rule 4.B |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
134 * |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
135 * [li al-X; li’l-X; li-’l-X; li-l-X] => lil-X |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
136 */ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
137 Matcher matcher_b = rule4b_pattern.matcher(text); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
138 text = matcher_b.replaceAll("lil-$2"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
139 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
140 /* |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
141 * rule 4.A |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
142 * |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
143 * li X => li-X |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
144 */ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
145 Matcher matcher_a = rule4a_pattern.matcher(text); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
146 text = matcher_a.replaceAll("li-$2"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
147 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
148 return text; |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
149 } |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
150 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
151 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
152 /* |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
153 * rule 5 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
154 */ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
155 private static Pattern rule5a_pattern = Pattern.compile("(-?ʾl-)(\\S+)"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
156 private static Pattern rule5b_pattern = Pattern.compile("\\b(a|A)(t-(t)|ṯ-(ṯ)|d-(d)|ḏ-(ḏ)|r-(r)|z-(z)|s-(s)|š-(š)|ṣ-(ṣ)|ḍ-(ḍ)|ṭ-(ṭ)|ẓ-(ẓ)|l-(l)|n-(n))(\\S+)"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
157 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
158 public static String ruleGroup5(String text){ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
159 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
160 /* |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
161 * rule 5a |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
162 * |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
163 * [’l-X; X-’l-X] => al-X |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
164 */ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
165 Matcher matcher_a = rule5a_pattern.matcher(text); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
166 text = matcher_a.replaceAll(" al-$2"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
167 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
168 /* |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
169 * rule 5b |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
170 * |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
171 * aY-YX; Y=Sun letters[t;ṯ;d;ḏ;r;z;s;š;ṣ;ḍ;ṭ;ẓ;l;n] => al-YX |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
172 */ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
173 Matcher matcher_b = rule5b_pattern.matcher(text); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
174 // the groups 3-16 will be empty except the real match |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
175 text = matcher_b.replaceAll("$1l-$3$4$5$6$7$8$9$10$11$12$13$14$15$16$17"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
176 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
177 return text; |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
178 } |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
179 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
180 /* |
89
8adfa8679991
new implementation of translit-to-romanization rules in RomanizationLoc with test(!).
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
88
diff
changeset
|
181 * rule 6 currently unused |
88
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
182 * |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
183 * λh; λ= [t; k; d; s; g] => λʹh |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
184 */ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
185 private static Pattern rule6_pattern = Pattern.compile("(\\S+)(t|k|d|s|g)h(\\S+)"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
186 //private static Pattern rule6_pattern = Pattern.compile("([\\w|ā|ī|’|ā|š]*)(t|k|d|s|g)h([\\w|ā|ī|’|ā|š]*)"); |
1 | 187 |
88
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
188 public static String ruleGroup6(String text) { |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
189 Matcher matcher = rule6_pattern.matcher(text); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
190 text = matcher.replaceAll("$1$2ʹh$3"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
191 return text; |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
192 } |
1 | 193 |
88
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
194 /* |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
195 * rule 7 currently unused |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
196 * |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
197 * X[illāh; ullāh; allah; allāh; - Allāh; Allah; ullah] => X Allāh |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
198 * |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
199 * [ l; b; bism]illāh => [lillāh; billāh; bismillāh] (stay unchanged) |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
200 */ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
201 public static String ruleGroup7(String text){ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
202 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
203 String rule_7_1_allah = "illāh|ullāh|allah|allāh|-Allāh|Allah|ullah|illah"; |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
204 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
205 int count = 0; |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
206 while(text.matches("(.*)(\\S+)(" + rule_7_1_allah + ")(.*)") && count<10){ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
207 text = text.replaceAll("(.*)(\\S+)(" + rule_7_1_allah + ")(.*)", "$1$2 Allāh$4"); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
208 count++; |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
209 } |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
210 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
211 return text; |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
212 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
213 } |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
214 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
215 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
216 /** |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
217 * Convert (ISMI-) transliterated arabic text into (LoC romanized) transcribed text. |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
218 * |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
219 * @param text |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
220 * @return |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
221 */ |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
222 public static String convert(final String text) { |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
223 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
224 if (StringUtils.isEmpty(text)) |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
225 return text; |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
226 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
227 // make sure we have composed unicode |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
228 String romanizedText = NormalizerUtils.unicodeNormalize(text); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
229 // make sure we have standard ayn and hamza |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
230 romanizedText = NormalizerUtils.aynHamzaNormalizer(romanizedText); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
231 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
232 romanizedText = ruleGroup2(romanizedText); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
233 romanizedText = ruleGroup4(romanizedText); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
234 romanizedText = ruleGroup5(romanizedText); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
235 //romanizedText = ruleGroup6(romanizedText); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
236 romanizedText = ruleGroup3(romanizedText); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
237 // replacementText = ruleGroup7(replacementText); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
238 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
239 // rule 1 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
240 for (Entry<String, String> tr : rule1_map.entrySet()) { |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
241 if (romanizedText.contains(tr.getKey())) { |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
242 romanizedText = romanizedText.replace(tr.getKey(), tr.getValue()); |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
243 } |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
244 } |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
245 |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
246 return romanizedText; |
ad505ef703ed
new implementation of translit-to-romanization rules in RomanizationLoc.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
71
diff
changeset
|
247 } |
1 | 248 |
249 } |