# HG changeset patch # User Robert Casties # Date 1529943120 -7200 # Node ID 2c01cdc9b34a37ed7f572bef09b2947aec8490dd # Parent 869fb6bb36417533c83715cea6f676c96f1b48e0 Fix problem with Persian in RomanizationLoc. diff -r 869fb6bb3641 -r 2c01cdc9b34a src/main/java/org/mpi/openmind/repository/utils/RomanizationLoC.java --- a/src/main/java/org/mpi/openmind/repository/utils/RomanizationLoC.java Wed Jun 13 15:01:37 2018 +0200 +++ b/src/main/java/org/mpi/openmind/repository/utils/RomanizationLoC.java Mon Jun 25 18:12:00 2018 +0200 @@ -95,7 +95,7 @@ * rule 3 */ //private static Pattern rule3a_pattern = Pattern.compile(begin_or_space + "(bi|wa|ka)(\\s+)(al-)(\\S+)"); - private static Pattern rule3b_pattern = Pattern.compile("\\b(bi|wa|ka)(\\s+)(\\S+)"); + private static Pattern rule3b_pattern = Pattern.compile("\\b(bi|ka)(\\s+)(\\S+)"); public static String ruleGroup3(String text) { @@ -113,6 +113,8 @@ * rule 3.B * * P X; P=[ bi; wa; ka] => P-X + * + * 25.6.2018: Do not use for wa (problem with Persian) */ Matcher matcher_b = rule3b_pattern.matcher(text); text = matcher_b.replaceAll("$1-$3");