changeset 95:2c01cdc9b34a

Fix problem with Persian in RomanizationLoc.
author Robert Casties <casties@mpiwg-berlin.mpg.de>
date Mon, 25 Jun 2018 18:12:00 +0200
parents 869fb6bb3641
children e8cee8cf2f52
files src/main/java/org/mpi/openmind/repository/utils/RomanizationLoC.java
diffstat 1 files changed, 3 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/src/main/java/org/mpi/openmind/repository/utils/RomanizationLoC.java	Wed Jun 13 15:01:37 2018 +0200
+++ b/src/main/java/org/mpi/openmind/repository/utils/RomanizationLoC.java	Mon Jun 25 18:12:00 2018 +0200
@@ -95,7 +95,7 @@
      * rule 3
      */
     //private static Pattern rule3a_pattern = Pattern.compile(begin_or_space + "(bi|wa|ka)(\\s+)(al-)(\\S+)");
-    private static Pattern rule3b_pattern = Pattern.compile("\\b(bi|wa|ka)(\\s+)(\\S+)");
+    private static Pattern rule3b_pattern = Pattern.compile("\\b(bi|ka)(\\s+)(\\S+)");
 
     public static String ruleGroup3(String text) {
 
@@ -113,6 +113,8 @@
          * rule 3.B
          * 
          * P X; P=[ bi; wa; ka] => P-X
+         * 
+         * 25.6.2018: Do not use for wa (problem with Persian) 
          */
         Matcher matcher_b = rule3b_pattern.matcher(text);
         text = matcher_b.replaceAll("$1-$3");