changeset 71:aeb29e362a67

New ArabicNormalizer. NormalizerUtils.normalize() now does both translit and arabic normalization. 108: arabic normalization is not applied Task-Url: https://it-dev.mpiwg-berlin.mpg.de/tracs/ismi/ticket/108
author casties
date Thu, 02 Feb 2017 17:58:52 +0100
parents b5a22b9ab9c6
children 3490a2237118
files src/main/java/org/mpi/openmind/cache/WrapperService.java src/main/java/org/mpi/openmind/repository/bo/Node.java src/main/java/org/mpi/openmind/repository/utils/ArabicNormalizer.java src/main/java/org/mpi/openmind/repository/utils/ArabicNormalizerUtils.java src/main/java/org/mpi/openmind/repository/utils/ArabicTranslitNormalizer.java src/main/java/org/mpi/openmind/repository/utils/NormalizerUtils.java src/main/java/org/mpi/openmind/repository/utils/OldNormalizerUtils.java src/main/java/org/mpi/openmind/repository/utils/ReplacementPattern.java src/main/java/org/mpi/openmind/repository/utils/RomanizationLoC.java src/main/java/org/mpi/openmind/scripts/NormalizeOW.java
diffstat 10 files changed, 449 insertions(+), 343 deletions(-) [+]
line wrap: on
line diff
--- a/src/main/java/org/mpi/openmind/cache/WrapperService.java	Thu Feb 02 11:58:23 2017 +0100
+++ b/src/main/java/org/mpi/openmind/cache/WrapperService.java	Thu Feb 02 17:58:52 2017 +0100
@@ -25,7 +25,6 @@
 import org.mpi.openmind.repository.services.PersistenceService;
 import org.mpi.openmind.repository.services.utils.AttributeFilter;
 import org.mpi.openmind.repository.services.utils.EditIntent;
-import org.mpi.openmind.repository.utils.ArabicTranslitNormalizer;
 import org.mpi.openmind.repository.utils.ImportOM3Util;
 import org.mpi.openmind.repository.utils.NormalizerUtils;
 import org.mpi.openmind.repository.utils.RomanizationLoC;
@@ -161,7 +160,7 @@
 		int count = 0;
 		if (StringUtils.isNotEmpty(term)) {
 			// TODO: better normalization
-			String normalizedTerm = ArabicTranslitNormalizer.normalize(term);
+			String normalizedTerm = NormalizerUtils.normalize(term);
 			for (AttributeFilter filter : filters) {
 				if (mustBreak) {
 					break;
--- a/src/main/java/org/mpi/openmind/repository/bo/Node.java	Thu Feb 02 11:58:23 2017 +0100
+++ b/src/main/java/org/mpi/openmind/repository/bo/Node.java	Thu Feb 02 17:58:52 2017 +0100
@@ -21,7 +21,6 @@
 import javax.persistence.Transient;
 
 import org.apache.commons.codec.binary.Base64;
-import org.mpi.openmind.repository.utils.ArabicNormalizerUtils;
 import org.mpi.openmind.repository.utils.NormalizerUtils;
 import org.mpi.openmind.repository.utils.RomanizationLoC;
 
@@ -198,12 +197,12 @@
     public void setOwnValue(String ownValue) {
         this.ownValue = ownValue;
         this.normalizedOwnValue = NormalizerUtils.normalize(ownValue);
-        this.normalizedArabicOwnValue = ArabicNormalizerUtils.normalize(ownValue);
+        this.normalizedArabicOwnValue = NormalizerUtils.normalizeArabic(ownValue);
     }
     
     public void autoNormalize(){
     	this.normalizedOwnValue = NormalizerUtils.normalize(ownValue);
-        this.normalizedArabicOwnValue = ArabicNormalizerUtils.normalize(ownValue);
+        this.normalizedArabicOwnValue = NormalizerUtils.normalizeArabic(ownValue);
     }
 
     public String getRomanizationLoC(){
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/main/java/org/mpi/openmind/repository/utils/ArabicNormalizer.java	Thu Feb 02 17:58:52 2017 +0100
@@ -0,0 +1,38 @@
+package org.mpi.openmind.repository.utils;
+
+import java.text.Normalizer;
+import java.text.Normalizer.Form;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang.StringUtils;
+
+/**
+ * @author casties
+ * 
+ */
+public class ArabicNormalizer {
+
+	/** match all of Unicode mark category */
+    protected static Pattern markPattern = Pattern.compile("\\p{M}+");
+    
+    /**
+     * Returns String of normalized arabic.
+     * 
+     * Normalization means de-vowelisation using Unicode tables.
+     * Removes all Unicode mark characters from decomposed form.
+     * 
+     * @param text
+     * @return
+     */
+    public static String normalize(String text) {
+        if (StringUtils.isEmpty(text)) {
+            return text;
+        }
+        
+        // remove vowels by de-composing and removing diacritical marks
+        text = Normalizer.normalize(text, Form.NFKD);
+        text = markPattern.matcher(text).replaceAll("");
+        
+        return text;
+    }
+}
--- a/src/main/java/org/mpi/openmind/repository/utils/ArabicNormalizerUtils.java	Thu Feb 02 11:58:23 2017 +0100
+++ b/src/main/java/org/mpi/openmind/repository/utils/ArabicNormalizerUtils.java	Thu Feb 02 17:58:52 2017 +0100
@@ -1,6 +1,5 @@
 package org.mpi.openmind.repository.utils;
 
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
@@ -78,16 +77,9 @@
 		if(StringUtils.isEmpty(w))
 			return w;
 		
-		
 		/*
 		 * Replacing combination of vowels
-		 
-		for(String key : wildCardStringMap.keySet()){
-			List<String> list = wildCardStringMap.get(key);
-			for(String term : list){
-				w = w.replace(term, key);
-			}
-		}*/
+		 */
 		
 		for(String key : wildCardCharMap.keySet()){
 			Character[] list = wildCardCharMap.get(key);
--- a/src/main/java/org/mpi/openmind/repository/utils/ArabicTranslitNormalizer.java	Thu Feb 02 11:58:23 2017 +0100
+++ b/src/main/java/org/mpi/openmind/repository/utils/ArabicTranslitNormalizer.java	Thu Feb 02 17:58:52 2017 +0100
@@ -2,11 +2,12 @@
 
 import java.text.Normalizer;
 import java.text.Normalizer.Form;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Map.Entry;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.regex.Pattern;
 
+import org.apache.commons.lang.StringUtils;
+
 /**
  * @author casties
  *
@@ -15,49 +16,57 @@
  */
 public class ArabicTranslitNormalizer {
 
-    protected static Map<String, Pattern> apostrophePatterns = new HashMap<String, Pattern>();
+    protected static List<ReplacementPattern> apostrophePatterns = new ArrayList<ReplacementPattern>();
     static {
         // `, ʿ, ʾ, ‘, ’ -> '
         //apostrophePatterns.put("'", Pattern.compile("\u0060|\u02BE|\u02BF|\u2018|\u2019"));
-    	// remove `, ʿ, ʾ, ‘, ’, '
-        apostrophePatterns.put("", Pattern.compile("'|\u0060|\u02BE|\u02BF|\u2018|\u2019"));
+    	// remove apostrophes `, ʿ, ʾ, ‘, ’, '
+        apostrophePatterns.add(new ReplacementPattern("", Pattern.compile("'|\u0060|\u02BE|\u02BF|\u2018|\u2019")));
     }    
     
-    protected static Map<String, Pattern> twoletterPatterns = new HashMap<String, Pattern>();
+    protected static List<ReplacementPattern> twoletterPatterns = new ArrayList<ReplacementPattern>();
     static {
-        twoletterPatterns.put("j", Pattern.compile("ch"));
-        twoletterPatterns.put("j", Pattern.compile("dj"));
-        twoletterPatterns.put("t", Pattern.compile("th"));
-        twoletterPatterns.put("h", Pattern.compile("kh"));
-        twoletterPatterns.put("d", Pattern.compile("dh"));
-        twoletterPatterns.put("s", Pattern.compile("sh"));
-        twoletterPatterns.put("g", Pattern.compile("gh"));
+        twoletterPatterns.add(new ReplacementPattern("j", Pattern.compile("ch")));
+        twoletterPatterns.add(new ReplacementPattern("j", Pattern.compile("dj")));
+        twoletterPatterns.add(new ReplacementPattern("t", Pattern.compile("th")));
+        twoletterPatterns.add(new ReplacementPattern("h", Pattern.compile("kh")));
+        twoletterPatterns.add(new ReplacementPattern("d", Pattern.compile("dh")));
+        twoletterPatterns.add(new ReplacementPattern("s", Pattern.compile("sh")));
+        twoletterPatterns.add(new ReplacementPattern("g", Pattern.compile("gh")));
     }
 
-    protected static Map<String, Pattern> wordpartPatterns = new HashMap<String, Pattern>();
+    protected static List<ReplacementPattern> wordpartPatterns = new ArrayList<ReplacementPattern>();
     static {
         // aẗ\b, at\b, ah\b -> a
-        wordpartPatterns.put("a", Pattern.compile("a\u1E97\\b|at\\b|ah\\b"));
+        wordpartPatterns.add(new ReplacementPattern("a", Pattern.compile("a\u1E97\\b|at\\b|ah\\b")));
         // 'abd + space -> 'abd
-        //wordpartPatterns.put("'abd", Pattern.compile("'abd "));
-        wordpartPatterns.put("abd", Pattern.compile("abd "));
+        // now without apostrophe
+        wordpartPatterns.add(new ReplacementPattern("abd", Pattern.compile("abd ")));
     }
 
-    protected static Map<String, Pattern> letterdiacritPatterns = new HashMap<String, Pattern>();
+    protected static List<ReplacementPattern> letterdiacritPatterns = new ArrayList<ReplacementPattern>();
     static {
         // ỳ -> a
-        letterdiacritPatterns.put("a", Pattern.compile("\u1EF3"));
+        letterdiacritPatterns.add(new ReplacementPattern("a", Pattern.compile("\u1EF3")));
     }
 
-    protected static Map<String, Pattern> letterPatterns = new HashMap<String, Pattern>();
+    protected static List<ReplacementPattern> letterPatterns = new ArrayList<ReplacementPattern>();
     static {
-        letterPatterns.put("j", Pattern.compile("g|c"));
+        letterPatterns.add(new ReplacementPattern("j", Pattern.compile("g|c")));
     }
     
     protected static Pattern diacriticsPattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
     
+    /**
+     * Returns String normalized according to arabic transliteration rules.
+     * 
+     * @see https://it-dev.mpiwg-berlin.mpg.de/tracs/OpenMind3/wiki/normalize_arabic_translit
+     * 
+     * @param text
+     * @return
+     */
     public static String normalize(String text) {
-        if (text == null || text.isEmpty()) {
+        if (StringUtils.isEmpty(text)) {
             return text;
         }
         
@@ -65,31 +74,23 @@
         text = text.toLowerCase();
         
         // replace "apostrophes"
-        for (Entry<String, Pattern> entry : apostrophePatterns.entrySet()) {
-            Pattern pattern = entry.getValue();
-            String replacement = entry.getKey();
-            text = pattern.matcher(text).replaceAll(replacement);
+        for (ReplacementPattern entry : apostrophePatterns) {
+            text = entry.getPattern().matcher(text).replaceAll(entry.getReplacement());
         }
         
         // replace two-letter combinations
-        for (Entry<String, Pattern> entry : twoletterPatterns.entrySet()) {
-            Pattern pattern = entry.getValue();
-            String replacement = entry.getKey();
-            text = pattern.matcher(text).replaceAll(replacement);
+        for (ReplacementPattern entry : twoletterPatterns) {
+            text = entry.getPattern().matcher(text).replaceAll(entry.getReplacement());
         }
         
         // replace word-parts
-        for (Entry<String, Pattern> entry : wordpartPatterns.entrySet()) {
-            Pattern pattern = entry.getValue();
-            String replacement = entry.getKey();
-            text = pattern.matcher(text).replaceAll(replacement);
+        for (ReplacementPattern entry : wordpartPatterns) {
+            text = entry.getPattern().matcher(text).replaceAll(entry.getReplacement());
         }
 
         // replace letters with diacritics
-        for (Entry<String, Pattern> entry : letterdiacritPatterns.entrySet()) {
-            Pattern pattern = entry.getValue();
-            String replacement = entry.getKey();
-            text = pattern.matcher(text).replaceAll(replacement);
+        for (ReplacementPattern entry : letterdiacritPatterns) {
+            text = entry.getPattern().matcher(text).replaceAll(entry.getReplacement());
         }
 
         // remove diacritics by de-composing and removing diacritical marks
@@ -97,10 +98,8 @@
         text = diacriticsPattern.matcher(text).replaceAll("");
         
         // replace letters
-        for (Entry<String, Pattern> entry : letterPatterns.entrySet()) {
-            Pattern pattern = entry.getValue();
-            String replacement = entry.getKey();
-            text = pattern.matcher(text).replaceAll(replacement);
+        for (ReplacementPattern entry : letterPatterns) {
+            text = entry.getPattern().matcher(text).replaceAll(entry.getReplacement());
         }
 
         return text;
--- a/src/main/java/org/mpi/openmind/repository/utils/NormalizerUtils.java	Thu Feb 02 11:58:23 2017 +0100
+++ b/src/main/java/org/mpi/openmind/repository/utils/NormalizerUtils.java	Thu Feb 02 17:58:52 2017 +0100
@@ -1,291 +1,27 @@
 package org.mpi.openmind.repository.utils;
 
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.commons.lang.StringUtils;
-
 public class NormalizerUtils {
 
-	public static Map<String, List<String>> wildCardStringMap = new HashMap<String, List<String>>();
 	
-	static{
-		List<String> list;
-		StringBuilder sb = new StringBuilder();
-		
-		list = new ArrayList<String>();
-		Character c = 0x1E6F;
-		sb.append(c);
-		list.add(sb.toString());//ṯ
-		list.add("th");
-		wildCardStringMap.put("T", list);
-		
-		list = new ArrayList<String>();
-		c = 0x1E2b;
-		list.add(c + "");//ḫ
-		list.add("kh");
-		wildCardStringMap.put("H", list);
-		
-		list = new ArrayList<String>();
-		c = 0x1E0f;
-		list.add(c + "");//ḏ
-		list.add("dh");
-		wildCardStringMap.put("D", list);
-		
-		list = new ArrayList<String>();
-		c = 0x0161;
-		list.add(c + "");//š
-		list.add("sh");
-		wildCardStringMap.put("S", list);
-		
-		list = new ArrayList<String>();
-		c = 0x0121;
-		list.add(c + "");//ġ
-		list.add("gh");
-		wildCardStringMap.put("G", list);
-		
-		list = new ArrayList<String>();
-		c = 0x1E97;
-		list.add("a" + c + " ");//aẗSPACE
-		list.add("at ");
-		list.add("ah ");
-		list.add("a ");
-		wildCardStringMap.put("A ", list);
-		
-		list = new ArrayList<String>();
-		c = 0x1ef3;
-		list.add(c + "");//ỳ
-		c = 0x00E1;
-		list.add(c + "");//á
-		c = 0x0101;
-		list.add(c + "");//ā
-		c = 0x00E0;
-		list.add(c + "");//à
-		/*
-		//Chantal list for A
-		c = 0x0065;
-		list.add(c + "");//e
-		c = 0x0101;
-		list.add(c + "");//ā
-		c = 0x00E2;
-		list.add(c + "");//â
-		*/
-		wildCardStringMap.put("A", list);
-		
-		/*
-		list = new ArrayList<String>();
-		c = 0x0062;
-		list.add(c + "");//b
-		c = 0x0070;
-		list.add(c + "");//p
-		wildCardStringMap.put("B", list);
-		*/
+	public static String normalize(String w) {
+		String atn = ArabicTranslitNormalizer.normalize(w);
+		String an = ArabicNormalizer.normalize(atn);
+	    return an;
 	}
 	
-	public static Map<String, Character[]> wildCardCharMap = new HashMap<String, Character[]>();
-
-	// " ` ′ ‘ ’ ‛ ' ʻ ʼ ʽ ˋ ʾ ʿ
-	public static Character[] apostrophes = {
-			0x22, 0x60, 0x2032, 0x2018, 0x2019, 0x201B, 0x27, 0x2BB, 0x2BC, 0x2BD, 0x2CB, 0x2BE, 0x2BF };
-	//IN: Aa Áá  Àà  Ââ  Ǎǎ  Ăă  Ãã  Ảả  Ȧȧ  Ạạ  Ää  Åå  Ḁḁ  Āā  Ąą  
-	//OUT: ᶏ  Ⱥⱥ  Ȁȁ  Ấấ  Ầầ  Ẫẫ  Ẩẩ  Ậậ  Ắắ  Ằằ  Ẵẵ  Ẳẳ  Ặặ  Ǻǻ  Ǡǡ  Ǟǟ  Ȁȁ  Ȃȃ
-	public static Character[] AList = {
-			0x41, 0x61, 0xC1, 0xE1, 0xC0, 0xE0, 0xC2, 0xE2, 0x1CD, 
-			0x1CE, 0x102, 0x103, 0xC3, 0xE3, 0x1EA2, 0x1EA3, 0x226, 
-			0x227, 0x1EA0, 0x1EA1, 0xC4, 0xE4, 0xC5, 0xE5, 0x1E00, 
-			0x1E01, 0x100, 0x101, 0x104, 0x105 };
-	
-	static{
-
-		wildCardCharMap.put("", apostrophes);		
-		wildCardCharMap.put("A", AList);
-		
-		//IN: Bb Ḃḃ  Ḅḅ  Ḇḇ  Ɓɓ  ʙ  Bb 
-		//OUT: Ƃƃ  ᵬ  ᶀ  ʙ  Bb  ȸ Ƀƀ  
-		Character[] BList = {
-				0x42, 0x62, 0x1E02, 0x1E03, 0x1E04, 0x1E05, 0x1E06, 
-				0x1E07, 0x181, 0x253, 0x299, 0xFF22, 0xFF42,
-		};
-		wildCardCharMap.put("B", BList);
-		
-		//Ćć  Ĉĉ  Čč  Ċċ  C̄c̄  Ç(ç problem with this)  Ḉḉ  Ȼȼ  Ƈƈ  ɕ  ᴄ  Cc
-		Character[] CList = {
-				0x43, 0x63, 0x106, 0x107, 0x108, 0x109, 0x10C, 0x10D, 
-				0x10A, 0x10B, 0x43, 0xC7, 0xE7, 0x1E08, 0x1E09, 0x23B, 
-				0x23C, 0x187, 0x188, 0x255, 0x1D04, 0xFF23, 0xFF43
-		};
-		wildCardCharMap.put("C", CList);
-		
-		//IN: Dd Ďď  Ḋḋ  Ḑḑ  Ḍḍ  Ḓḓ  Ḏḏ  Dd  
-		//OUT: Đđ  D̦d̦  Ɖɖ  Ɗɗ  Ƌƌ  ᵭ  ᶁ  ᶑ  ȡ  ᴅ
-		Character[] DList = {
-				0x44, 0x64, 0x10E, 0x10F, 0x1E0A, 0x1E0B, 0x1E10, 
-				0x1E11, 0x1E0C, 0x1E0D, 0x1E12, 0x1E13, 0x1E0E, 
-				0x1E0F, 0xFF24, 0xFF44
-		};
-		wildCardCharMap.put("D", DList);
-
-		//IN: Ee Éé  Èè  Êê  Ḙḙ  Ěě  Ĕĕ  Ẽẽ  Ḛḛ  Ẻẻ  Ėė  Ëë  Ēē  Ȩȩ  Ęę  Ȅȅ  Ếế  Ềề  Ễễ  Ểể  Ḝḝ  Ḗḗ  Ḕḕ  Ȇȇ  Ẹẹ  Ệệ ᴇ  Ee  
-		//OUT: Ææ  Ǽǽ  Ǣǣ  Œœ ᶒ  Ɇɇ
-		Character[] EList = {
-				0x45, 0x65, 0xC9, 0xE9, 0xC8, 0xE8, 0xCA, 0xEA, 
-				0x1E18, 0x1E19, 0x11A, 0x11B, 0x114, 0x115, 
-				0x1EBC, 0x1EBD, 0x1E1A, 0x1E1B, 0x1EBA, 0x1EBB, 
-				0x116, 0x117, 0xCB, 0xEB, 0x112, 0x113, 0x228, 
-				0x229, 0x118, 0x119, 0x204, 0x205, 0x1EBE, 0x1EBF,
-				0x1EC0, 0x1EC1, 0x1EC4, 0x1EC5, 0x1EC2, 0x1EC3,
-				0x1E1C, 0x1E1D, 0x1E16, 0x1E17, 0x1E14, 0x1E15, 
-				0x206, 0x207, 0x1EB8, 0x1EB9, 0x1EC6, 0x1EC7, 
-				0x1D07, 0xFF25, 0xFF45
-		};
-		wildCardCharMap.put("E", EList);
-		
-		//Ii Íí  Ìì  Ĭĭ  Îî  Ǐǐ  Ïï  Ḯḯ  Ĩĩ  Įį  Īī  Ỉỉ  Ȉȉ  Ȋȋ  Ịị  Ḭḭ
-		Character[] IList = {
-				0x49, 0x69, 0xCD, 0xED, 0xCC, 0xEC, 0x12C, 0x12D, 0xCE, 
-				0xEE, 0x1CF, 0x1D0, 0xCF, 0xEF, 0x1E2E, 0x1E2F, 0x128, 
-				0x129, 0x12E, 0x12F, 0x12A, 0x12B, 0x1EC8, 0x1EC9, 0x208, 
-				0x209, 0x20A, 0x20B, 0x1ECA, 0x1ECB, 0x1E2C, 0x1E2D 
-		};
-		wildCardCharMap.put("I", IList);
-		
-		//IN: Gg Ǵǵ  Ğğ  Ĝĝ  Ǧǧ  Ġġ  Ģģ  Ḡḡ  Ǥǥ  Gg 
-		//OUT: Ɠɠ  ᶃ  ɢ 
-		Character[] GList = {
-				0x47, 0x67, 0x1F4, 0x1F5, 0x11E, 0x11F, 0x11C, 0x11D, 
-				0x1E6, 0x1E7, 0x120, 0x121, 0x122, 0x123, 0x1E20, 0x1E21, 
-				0x1E4, 0x1E5, 0xFF27, 0xFF47
-		};
-		wildCardCharMap.put("G", GList);
-		
-		//Nn Ńń  Ǹǹ  Ňň  Ññ  Ṅṅ  Ņņ  Ṇṇ  Ṋṋ  Ṉṉ
-		Character[] NList = {
-				0x4E, 0x6E, 0x143, 0x144, 0x1F8, 0x1F9, 0x147, 0x148, 
-				0xD1, 0xF1, 0x1E44, 0x1E45, 0x145, 0x146, 0x1E46, 
-				0x1E47, 0x1E4A, 0x1E4B, 0x1E48, 0x1E49
-		};
-		wildCardCharMap.put("N", NList);
-		
-		//H h Ĥ ĥ Ȟ ȟ Ḧ ḧ Ḣ ḣ Ḩ ḩ Ḥ ḥ Ḫ ḫ H ̱ ẖ Ħ ħ Ⱨ ⱨ
-		Character[] HList = {
-				0x48, 0x68, 0x124, 0x125, 0x21E, 0x21F, 0x1E26, 0x1E27, 
-				0x1E22, 0x1E23, 0x1E28, 0x1E29, 0x1E24, 0x1E25, 0x1E2A, 
-				0x1E2B, 0x48, 0x1E96, 0x126, 0x127, 0x2C67, 0x2C68
-		};
-		wildCardCharMap.put("H", HList);
-		
-		//Oo  Óó  Òò  Ŏŏ  Ôô  Ốố  Ồồ  Ỗỗ  Ổổ  Ǒǒ  Öö  Ȫȫ  Őő  Õõ  Ṍṍ  Ṏṏ  Ȭȭ  Ȯȯ  Ȱȱ  Øø  Ǿǿ  Ǫǫ  Ǭǭ  Ōō  Ṓṓ  Ṑṑ  Ỏỏ  Ȍȍ  Ȏȏ  Ơơ  Ớớ  Ờờ  Ỡỡ  Ởở  Ợợ  Ọọ  Ộộ
-		Character[] OLIST = {
-				0x4F, 0x6F, 0xD3, 0xF3, 0xD2, 0xF2, 0x14E, 0x14F, 0xD4, 
-				0xF4, 0x1ED0, 0x1ED1, 0x1ED2, 0x1ED3, 0x1ED6, 0x1ED7, 
-				0x1ED4, 0x1ED5, 0x1D1, 0x1D2, 0xD6, 0xF6, 0x22A, 0x22B, 
-				0x150, 0x151, 0xD5, 0xF5, 0x1E4C, 0x1E4D, 0x1E4E, 0x1E4F, 
-				0x22C, 0x22D, 0x22E, 0x22F, 0x230, 0x231, 0xD8, 0xF8, 0x1FE, 
-				0x1FF, 0x1EA, 0x1EB, 0x1EC, 0x1ED, 0x14C, 0x14D, 0x1E52, 
-				0x1E53, 0x1E50, 0x1E51, 0x1ECE, 0x1ECF, 0x20C, 0x20D, 
-				0x20E, 0x20F, 0x1A0, 0x1A1, 0x1EDA, 0x1EDB, 0x1EDC, 0x1EDD, 
-				0x1EE0, 0x1EE1, 0x1EDE, 0x1EDF, 0x1EE2, 0x1EE3, 0x1ECC, 
-				0x1ECD, 0x1ED8, 0x1ED9
-		};
-		wildCardCharMap.put("O", OLIST);
-		
-		Character[] RList = {
-				0x52, 0x72, 0x154, 0x155, 0x158, 0x159, 0x1E58, 0x1E59, 
-				0x156, 0x157, 0x210, 0x211, 0x212, 0x213, 0x1E5A, 0x1E5B, 
-				0x1E5C, 0x1E5D, 0x1E5E, 0x1E5F, 0x27C, 0x27E, 0x280, 0xFF32, 0xFF52
-		};
-		wildCardCharMap.put("R", RList);
-		
-		
-		//IN: Ss Śś  Ṥṥ  Ŝŝ  Šš  Ṧṧ  Ṡṡẛ  Şş  Ṣṣ  Ṩṩ  Șș  S̩̩  
-		//OUT: ᵴ  ᶊ  ʂ  ȿ  ꜱ  Ss s
-		Character[] SList = {
-				0x53, 0x73, 0x15A, 0x15B, 0x1E64, 0x1E65, 0x15C, 0x15D, 
-				0x160, 0x161, 0x1E66, 0x1E67, 0x1E60, 0x1E61, 0x15E, 0x15F, 
-				0x1E62, 0x1E63, 0x1E68, 0x1E69, 0x218, 0x219, 0x53
-		};
-		wildCardCharMap.put("S", SList);
-		
-		
-		//IN: Tt Ťť  Ṫṫ  Ţţ  Ṭṭ  Țț  Ṱṱ  Ṯṯ Tt
-		//OUT: Ŧŧ  Ⱦⱦ  Ƭƭ  Ʈʈ  T̈ẗ  ᵵ  ƫ  ȶ  ᶙ  ᴛ
-		Character[] TList = {
-				0x54, 0x74, 0x164, 0x165, 0x1E6A, 0x1E6B, 0x162, 0x163, 
-				0x1E6C, 0x1E6D, 0x21A, 0x21B, 0x1E70, 0x1E71, 0x1E6E, 
-				0x1E6F, 0xFF34, 0xFF54
-		};
-		wildCardCharMap.put("T", TList);
-		
-		//IN: Uu Úú  Ùù  Ŭŭ  Ûû  Ǔǔ  Ůů  Üü  Ǘǘ  Ǜǜ  Ǚǚ  Ǖǖ  Űű  Ũũ  Ṹṹ  Ųų  Ūū  
-		//OUT: Ṻṻ  Ủủ  Ȕȕ  Ȗȗ  Ưư  Ứứ  Ừừ  Ữữ  Ửử  Ựự  Ụụ  Ṳṳ  Ṷṷ  Ṵṵ  Ʉʉ  ᵾ  ᶙ  ᴜ  Uu
-		Character[] UList ={
-				0x55, 0x75, 0xDA, 0xFA, 0xD9, 0xF9, 0x16C, 0x16D, 0xDB, 0xFB, 0x1D3, 
-				0x1D4, 0x16E, 0x16F, 0xDC, 0xFC, 0x1D7, 0x1D8, 0x1DB, 0x1DC, 0x1D9, 
-				0x1DA, 0x1D5, 0x1D6, 0x170, 0x171, 0x168, 0x169, 0x1E78, 0x1E79, 
-				0x172, 0x173, 0x16A, 0x16B	
-		};
-		wildCardCharMap.put("U", UList);
-		
-		Character[] VList = {
-				0x1E7C, 0x1E7D, 0x1E7E, 0x1E7F, 0x1B2, 
-				0x28B, 0x1D20, 0xFF36, 0xFF56
-		};
-		wildCardCharMap.put("V", VList);
-		
-		//IN: Zz Źź  Ẑẑ  Žž  Żż  Ẓẓ  Ẕẕ  Ƶƶ  Ȥȥ  
-		//OUT: Ⱬⱬ  ᵶ  ᶎ  ʐ  ʑ  ɀ  ᴢ  Zz
-		Character[] ZList = {
-				0x5A, 0x7A, 0x179, 0x17A, 0x1E90, 0x1E91, 0x17D, 
-				0x17E, 0x17B, 0x17C, 0x1E92, 0x1E93, 0x1E94, 
-				0x1E95, 0x1B5, 0x1B6, 0x1D22, 0xFF3A, 0xFF5A
-		};
-		wildCardCharMap.put("Z", ZList);
+    /**
+     * Returns String normalized according to arabic transliteration rules.
+     * 
+     * @see https://it-dev.mpiwg-berlin.mpg.de/tracs/OpenMind3/wiki/normalize_arabic_translit
+     * 
+     * @param w
+     * @return
+     */
+	public static String normalizeArabicTranslit(String w) {
+	    return ArabicTranslitNormalizer.normalize(w);		
 	}
 	
-	public static String normalize(String w) {
-	    return ArabicTranslitNormalizer.normalize(w);
-	}
-	
-	public static String old_normalize(String w){
-		if(StringUtils.isEmpty(w))
-			return w;
-		
-		w = w.toLowerCase();
-		/*
-		 * Replacing combination of vowels
-		 */
-		for(String key : wildCardStringMap.keySet()){
-			List<String> list = wildCardStringMap.get(key);
-			for(String term : list){
-				w = w.replace(term, key);
-			}
-		}
-		
-		for(String key : wildCardCharMap.keySet()){
-			Character[] list = wildCardCharMap.get(key);
-			for(int i=0; i< list.length; i++){
-				w = w.replace(list[i] + "", key);
-			}
-		}
-		return w.toLowerCase();
-	}
-	
-	public static String normalizedToCompare(String s1){
-    	s1 = s1.replace("#", "");
-    	s1 = s1.replace("-", "");
-    	s1 = s1.replace("(", "");
-    	s1 = s1.replace(")", "");
-    	s1 = s1.replace("[", "");
-    	s1 = s1.replace("]", "");
-    	s1 = s1.replace("_", "");
-        
-    	return s1;
-	}
-	
-	public static void main(String[] args){
-		String s = NormalizerUtils.normalize("ṯ");
-		System.out.println(s);
+	public static String normalizeArabic(String w) {
+		return ArabicNormalizer.normalize(w);
 	}
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/main/java/org/mpi/openmind/repository/utils/OldNormalizerUtils.java	Thu Feb 02 17:58:52 2017 +0100
@@ -0,0 +1,291 @@
+package org.mpi.openmind.repository.utils;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang.StringUtils;
+
+public class OldNormalizerUtils {
+
+	public static Map<String, List<String>> wildCardStringMap = new HashMap<String, List<String>>();
+	
+	static{
+		List<String> list;
+		StringBuilder sb = new StringBuilder();
+		
+		list = new ArrayList<String>();
+		Character c = 0x1E6F;
+		sb.append(c);
+		list.add(sb.toString());//ṯ
+		list.add("th");
+		wildCardStringMap.put("T", list);
+		
+		list = new ArrayList<String>();
+		c = 0x1E2b;
+		list.add(c + "");//ḫ
+		list.add("kh");
+		wildCardStringMap.put("H", list);
+		
+		list = new ArrayList<String>();
+		c = 0x1E0f;
+		list.add(c + "");//ḏ
+		list.add("dh");
+		wildCardStringMap.put("D", list);
+		
+		list = new ArrayList<String>();
+		c = 0x0161;
+		list.add(c + "");//š
+		list.add("sh");
+		wildCardStringMap.put("S", list);
+		
+		list = new ArrayList<String>();
+		c = 0x0121;
+		list.add(c + "");//ġ
+		list.add("gh");
+		wildCardStringMap.put("G", list);
+		
+		list = new ArrayList<String>();
+		c = 0x1E97;
+		list.add("a" + c + " ");//aẗSPACE
+		list.add("at ");
+		list.add("ah ");
+		list.add("a ");
+		wildCardStringMap.put("A ", list);
+		
+		list = new ArrayList<String>();
+		c = 0x1ef3;
+		list.add(c + "");//ỳ
+		c = 0x00E1;
+		list.add(c + "");//á
+		c = 0x0101;
+		list.add(c + "");//ā
+		c = 0x00E0;
+		list.add(c + "");//à
+		/*
+		//Chantal list for A
+		c = 0x0065;
+		list.add(c + "");//e
+		c = 0x0101;
+		list.add(c + "");//ā
+		c = 0x00E2;
+		list.add(c + "");//â
+		*/
+		wildCardStringMap.put("A", list);
+		
+		/*
+		list = new ArrayList<String>();
+		c = 0x0062;
+		list.add(c + "");//b
+		c = 0x0070;
+		list.add(c + "");//p
+		wildCardStringMap.put("B", list);
+		*/
+	}
+	
+	public static Map<String, Character[]> wildCardCharMap = new HashMap<String, Character[]>();
+
+	// " ` ′ ‘ ’ ‛ ' ʻ ʼ ʽ ˋ ʾ ʿ
+	public static Character[] apostrophes = {
+			0x22, 0x60, 0x2032, 0x2018, 0x2019, 0x201B, 0x27, 0x2BB, 0x2BC, 0x2BD, 0x2CB, 0x2BE, 0x2BF };
+	//IN: Aa Áá  Àà  Ââ  Ǎǎ  Ăă  Ãã  Ảả  Ȧȧ  Ạạ  Ää  Åå  Ḁḁ  Āā  Ąą  
+	//OUT: ᶏ  Ⱥⱥ  Ȁȁ  Ấấ  Ầầ  Ẫẫ  Ẩẩ  Ậậ  Ắắ  Ằằ  Ẵẵ  Ẳẳ  Ặặ  Ǻǻ  Ǡǡ  Ǟǟ  Ȁȁ  Ȃȃ
+	public static Character[] AList = {
+			0x41, 0x61, 0xC1, 0xE1, 0xC0, 0xE0, 0xC2, 0xE2, 0x1CD, 
+			0x1CE, 0x102, 0x103, 0xC3, 0xE3, 0x1EA2, 0x1EA3, 0x226, 
+			0x227, 0x1EA0, 0x1EA1, 0xC4, 0xE4, 0xC5, 0xE5, 0x1E00, 
+			0x1E01, 0x100, 0x101, 0x104, 0x105 };
+	
+	static{
+
+		wildCardCharMap.put("", apostrophes);		
+		wildCardCharMap.put("A", AList);
+		
+		//IN: Bb Ḃḃ  Ḅḅ  Ḇḇ  Ɓɓ  ʙ  Bb 
+		//OUT: Ƃƃ  ᵬ  ᶀ  ʙ  Bb  ȸ Ƀƀ  
+		Character[] BList = {
+				0x42, 0x62, 0x1E02, 0x1E03, 0x1E04, 0x1E05, 0x1E06, 
+				0x1E07, 0x181, 0x253, 0x299, 0xFF22, 0xFF42,
+		};
+		wildCardCharMap.put("B", BList);
+		
+		//Ćć  Ĉĉ  Čč  Ċċ  C̄c̄  Ç(ç problem with this)  Ḉḉ  Ȼȼ  Ƈƈ  ɕ  ᴄ  Cc
+		Character[] CList = {
+				0x43, 0x63, 0x106, 0x107, 0x108, 0x109, 0x10C, 0x10D, 
+				0x10A, 0x10B, 0x43, 0xC7, 0xE7, 0x1E08, 0x1E09, 0x23B, 
+				0x23C, 0x187, 0x188, 0x255, 0x1D04, 0xFF23, 0xFF43
+		};
+		wildCardCharMap.put("C", CList);
+		
+		//IN: Dd Ďď  Ḋḋ  Ḑḑ  Ḍḍ  Ḓḓ  Ḏḏ  Dd  
+		//OUT: Đđ  D̦d̦  Ɖɖ  Ɗɗ  Ƌƌ  ᵭ  ᶁ  ᶑ  ȡ  ᴅ
+		Character[] DList = {
+				0x44, 0x64, 0x10E, 0x10F, 0x1E0A, 0x1E0B, 0x1E10, 
+				0x1E11, 0x1E0C, 0x1E0D, 0x1E12, 0x1E13, 0x1E0E, 
+				0x1E0F, 0xFF24, 0xFF44
+		};
+		wildCardCharMap.put("D", DList);
+
+		//IN: Ee Éé  Èè  Êê  Ḙḙ  Ěě  Ĕĕ  Ẽẽ  Ḛḛ  Ẻẻ  Ėė  Ëë  Ēē  Ȩȩ  Ęę  Ȅȅ  Ếế  Ềề  Ễễ  Ểể  Ḝḝ  Ḗḗ  Ḕḕ  Ȇȇ  Ẹẹ  Ệệ ᴇ  Ee  
+		//OUT: Ææ  Ǽǽ  Ǣǣ  Œœ ᶒ  Ɇɇ
+		Character[] EList = {
+				0x45, 0x65, 0xC9, 0xE9, 0xC8, 0xE8, 0xCA, 0xEA, 
+				0x1E18, 0x1E19, 0x11A, 0x11B, 0x114, 0x115, 
+				0x1EBC, 0x1EBD, 0x1E1A, 0x1E1B, 0x1EBA, 0x1EBB, 
+				0x116, 0x117, 0xCB, 0xEB, 0x112, 0x113, 0x228, 
+				0x229, 0x118, 0x119, 0x204, 0x205, 0x1EBE, 0x1EBF,
+				0x1EC0, 0x1EC1, 0x1EC4, 0x1EC5, 0x1EC2, 0x1EC3,
+				0x1E1C, 0x1E1D, 0x1E16, 0x1E17, 0x1E14, 0x1E15, 
+				0x206, 0x207, 0x1EB8, 0x1EB9, 0x1EC6, 0x1EC7, 
+				0x1D07, 0xFF25, 0xFF45
+		};
+		wildCardCharMap.put("E", EList);
+		
+		//Ii Íí  Ìì  Ĭĭ  Îî  Ǐǐ  Ïï  Ḯḯ  Ĩĩ  Įį  Īī  Ỉỉ  Ȉȉ  Ȋȋ  Ịị  Ḭḭ
+		Character[] IList = {
+				0x49, 0x69, 0xCD, 0xED, 0xCC, 0xEC, 0x12C, 0x12D, 0xCE, 
+				0xEE, 0x1CF, 0x1D0, 0xCF, 0xEF, 0x1E2E, 0x1E2F, 0x128, 
+				0x129, 0x12E, 0x12F, 0x12A, 0x12B, 0x1EC8, 0x1EC9, 0x208, 
+				0x209, 0x20A, 0x20B, 0x1ECA, 0x1ECB, 0x1E2C, 0x1E2D 
+		};
+		wildCardCharMap.put("I", IList);
+		
+		//IN: Gg Ǵǵ  Ğğ  Ĝĝ  Ǧǧ  Ġġ  Ģģ  Ḡḡ  Ǥǥ  Gg 
+		//OUT: Ɠɠ  ᶃ  ɢ 
+		Character[] GList = {
+				0x47, 0x67, 0x1F4, 0x1F5, 0x11E, 0x11F, 0x11C, 0x11D, 
+				0x1E6, 0x1E7, 0x120, 0x121, 0x122, 0x123, 0x1E20, 0x1E21, 
+				0x1E4, 0x1E5, 0xFF27, 0xFF47
+		};
+		wildCardCharMap.put("G", GList);
+		
+		//Nn Ńń  Ǹǹ  Ňň  Ññ  Ṅṅ  Ņņ  Ṇṇ  Ṋṋ  Ṉṉ
+		Character[] NList = {
+				0x4E, 0x6E, 0x143, 0x144, 0x1F8, 0x1F9, 0x147, 0x148, 
+				0xD1, 0xF1, 0x1E44, 0x1E45, 0x145, 0x146, 0x1E46, 
+				0x1E47, 0x1E4A, 0x1E4B, 0x1E48, 0x1E49
+		};
+		wildCardCharMap.put("N", NList);
+		
+		//H h Ĥ ĥ Ȟ ȟ Ḧ ḧ Ḣ ḣ Ḩ ḩ Ḥ ḥ Ḫ ḫ H ̱ ẖ Ħ ħ Ⱨ ⱨ
+		Character[] HList = {
+				0x48, 0x68, 0x124, 0x125, 0x21E, 0x21F, 0x1E26, 0x1E27, 
+				0x1E22, 0x1E23, 0x1E28, 0x1E29, 0x1E24, 0x1E25, 0x1E2A, 
+				0x1E2B, 0x48, 0x1E96, 0x126, 0x127, 0x2C67, 0x2C68
+		};
+		wildCardCharMap.put("H", HList);
+		
+		//Oo  Óó  Òò  Ŏŏ  Ôô  Ốố  Ồồ  Ỗỗ  Ổổ  Ǒǒ  Öö  Ȫȫ  Őő  Õõ  Ṍṍ  Ṏṏ  Ȭȭ  Ȯȯ  Ȱȱ  Øø  Ǿǿ  Ǫǫ  Ǭǭ  Ōō  Ṓṓ  Ṑṑ  Ỏỏ  Ȍȍ  Ȏȏ  Ơơ  Ớớ  Ờờ  Ỡỡ  Ởở  Ợợ  Ọọ  Ộộ
+		Character[] OLIST = {
+				0x4F, 0x6F, 0xD3, 0xF3, 0xD2, 0xF2, 0x14E, 0x14F, 0xD4, 
+				0xF4, 0x1ED0, 0x1ED1, 0x1ED2, 0x1ED3, 0x1ED6, 0x1ED7, 
+				0x1ED4, 0x1ED5, 0x1D1, 0x1D2, 0xD6, 0xF6, 0x22A, 0x22B, 
+				0x150, 0x151, 0xD5, 0xF5, 0x1E4C, 0x1E4D, 0x1E4E, 0x1E4F, 
+				0x22C, 0x22D, 0x22E, 0x22F, 0x230, 0x231, 0xD8, 0xF8, 0x1FE, 
+				0x1FF, 0x1EA, 0x1EB, 0x1EC, 0x1ED, 0x14C, 0x14D, 0x1E52, 
+				0x1E53, 0x1E50, 0x1E51, 0x1ECE, 0x1ECF, 0x20C, 0x20D, 
+				0x20E, 0x20F, 0x1A0, 0x1A1, 0x1EDA, 0x1EDB, 0x1EDC, 0x1EDD, 
+				0x1EE0, 0x1EE1, 0x1EDE, 0x1EDF, 0x1EE2, 0x1EE3, 0x1ECC, 
+				0x1ECD, 0x1ED8, 0x1ED9
+		};
+		wildCardCharMap.put("O", OLIST);
+		
+		Character[] RList = {
+				0x52, 0x72, 0x154, 0x155, 0x158, 0x159, 0x1E58, 0x1E59, 
+				0x156, 0x157, 0x210, 0x211, 0x212, 0x213, 0x1E5A, 0x1E5B, 
+				0x1E5C, 0x1E5D, 0x1E5E, 0x1E5F, 0x27C, 0x27E, 0x280, 0xFF32, 0xFF52
+		};
+		wildCardCharMap.put("R", RList);
+		
+		
+		//IN: Ss Śś  Ṥṥ  Ŝŝ  Šš  Ṧṧ  Ṡṡẛ  Şş  Ṣṣ  Ṩṩ  Șș  S̩̩  
+		//OUT: ᵴ  ᶊ  ʂ  ȿ  ꜱ  Ss s
+		Character[] SList = {
+				0x53, 0x73, 0x15A, 0x15B, 0x1E64, 0x1E65, 0x15C, 0x15D, 
+				0x160, 0x161, 0x1E66, 0x1E67, 0x1E60, 0x1E61, 0x15E, 0x15F, 
+				0x1E62, 0x1E63, 0x1E68, 0x1E69, 0x218, 0x219, 0x53
+		};
+		wildCardCharMap.put("S", SList);
+		
+		
+		//IN: Tt Ťť  Ṫṫ  Ţţ  Ṭṭ  Țț  Ṱṱ  Ṯṯ Tt
+		//OUT: Ŧŧ  Ⱦⱦ  Ƭƭ  Ʈʈ  T̈ẗ  ᵵ  ƫ  ȶ  ᶙ  ᴛ
+		Character[] TList = {
+				0x54, 0x74, 0x164, 0x165, 0x1E6A, 0x1E6B, 0x162, 0x163, 
+				0x1E6C, 0x1E6D, 0x21A, 0x21B, 0x1E70, 0x1E71, 0x1E6E, 
+				0x1E6F, 0xFF34, 0xFF54
+		};
+		wildCardCharMap.put("T", TList);
+		
+		//IN: Uu Úú  Ùù  Ŭŭ  Ûû  Ǔǔ  Ůů  Üü  Ǘǘ  Ǜǜ  Ǚǚ  Ǖǖ  Űű  Ũũ  Ṹṹ  Ųų  Ūū  
+		//OUT: Ṻṻ  Ủủ  Ȕȕ  Ȗȗ  Ưư  Ứứ  Ừừ  Ữữ  Ửử  Ựự  Ụụ  Ṳṳ  Ṷṷ  Ṵṵ  Ʉʉ  ᵾ  ᶙ  ᴜ  Uu
+		Character[] UList ={
+				0x55, 0x75, 0xDA, 0xFA, 0xD9, 0xF9, 0x16C, 0x16D, 0xDB, 0xFB, 0x1D3, 
+				0x1D4, 0x16E, 0x16F, 0xDC, 0xFC, 0x1D7, 0x1D8, 0x1DB, 0x1DC, 0x1D9, 
+				0x1DA, 0x1D5, 0x1D6, 0x170, 0x171, 0x168, 0x169, 0x1E78, 0x1E79, 
+				0x172, 0x173, 0x16A, 0x16B	
+		};
+		wildCardCharMap.put("U", UList);
+		
+		Character[] VList = {
+				0x1E7C, 0x1E7D, 0x1E7E, 0x1E7F, 0x1B2, 
+				0x28B, 0x1D20, 0xFF36, 0xFF56
+		};
+		wildCardCharMap.put("V", VList);
+		
+		//IN: Zz Źź  Ẑẑ  Žž  Żż  Ẓẓ  Ẕẕ  Ƶƶ  Ȥȥ  
+		//OUT: Ⱬⱬ  ᵶ  ᶎ  ʐ  ʑ  ɀ  ᴢ  Zz
+		Character[] ZList = {
+				0x5A, 0x7A, 0x179, 0x17A, 0x1E90, 0x1E91, 0x17D, 
+				0x17E, 0x17B, 0x17C, 0x1E92, 0x1E93, 0x1E94, 
+				0x1E95, 0x1B5, 0x1B6, 0x1D22, 0xFF3A, 0xFF5A
+		};
+		wildCardCharMap.put("Z", ZList);
+	}
+	
+	public static String normalize(String w) {
+	    return ArabicTranslitNormalizer.normalize(w);
+	}
+	
+	public static String old_normalize(String w){
+		if(StringUtils.isEmpty(w))
+			return w;
+		
+		w = w.toLowerCase();
+		/*
+		 * Replacing combination of vowels
+		 */
+		for(String key : wildCardStringMap.keySet()){
+			List<String> list = wildCardStringMap.get(key);
+			for(String term : list){
+				w = w.replace(term, key);
+			}
+		}
+		
+		for(String key : wildCardCharMap.keySet()){
+			Character[] list = wildCardCharMap.get(key);
+			for(int i=0; i< list.length; i++){
+				w = w.replace(list[i] + "", key);
+			}
+		}
+		return w.toLowerCase();
+	}
+	
+	public static String normalizedToCompare(String s1){
+    	s1 = s1.replace("#", "");
+    	s1 = s1.replace("-", "");
+    	s1 = s1.replace("(", "");
+    	s1 = s1.replace(")", "");
+    	s1 = s1.replace("[", "");
+    	s1 = s1.replace("]", "");
+    	s1 = s1.replace("_", "");
+        
+    	return s1;
+	}
+	
+	public static void main(String[] args){
+		String s = OldNormalizerUtils.normalize("ṯ");
+		System.out.println(s);
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/main/java/org/mpi/openmind/repository/utils/ReplacementPattern.java	Thu Feb 02 17:58:52 2017 +0100
@@ -0,0 +1,53 @@
+/**
+ * 
+ */
+package org.mpi.openmind.repository.utils;
+
+import java.util.regex.Pattern;
+
+/**
+ * @author casties
+ *
+ */
+public class ReplacementPattern {
+	public Pattern pattern;
+	public String replacement;
+	
+	/**
+	 * @param replacement
+	 * @param pattern
+	 */
+	public ReplacementPattern(String replacement, Pattern pattern) {
+		super();
+		this.pattern = pattern;
+		this.replacement = replacement;
+	}
+
+	/**
+	 * @return the pattern
+	 */
+	public Pattern getPattern() {
+		return pattern;
+	}
+
+	/**
+	 * @param pattern the pattern to set
+	 */
+	public void setPattern(Pattern pattern) {
+		this.pattern = pattern;
+	}
+
+	/**
+	 * @return the replacement
+	 */
+	public String getReplacement() {
+		return replacement;
+	}
+
+	/**
+	 * @param replacement the replacement to set
+	 */
+	public void setReplacement(String replacement) {
+		this.replacement = replacement;
+	}
+}
--- a/src/main/java/org/mpi/openmind/repository/utils/RomanizationLoC.java	Thu Feb 02 11:58:23 2017 +0100
+++ b/src/main/java/org/mpi/openmind/repository/utils/RomanizationLoC.java	Thu Feb 02 17:58:52 2017 +0100
@@ -58,7 +58,7 @@
 	public static char APOSTROPHE = 0x27;
 	public static String apostrophesNormalization(String text){
 		String result = text;
-		for(Character apostrophe : NormalizerUtils.apostrophes){
+		for(Character apostrophe : OldNormalizerUtils.apostrophes){
 			result = result.replace(apostrophe, APOSTROPHE);
 		}
 		return result;
@@ -67,7 +67,7 @@
 	public static char a = 0x61;
 	public static String aNormalization(String text){
 		String result = text;
-		for(Character item : NormalizerUtils.AList){
+		for(Character item : OldNormalizerUtils.AList){
 			result = result.replace(item, a);
 		}
 		return result;
--- a/src/main/java/org/mpi/openmind/scripts/NormalizeOW.java	Thu Feb 02 11:58:23 2017 +0100
+++ b/src/main/java/org/mpi/openmind/scripts/NormalizeOW.java	Thu Feb 02 17:58:52 2017 +0100
@@ -9,8 +9,7 @@
 import java.util.HashMap;
 import java.util.Map;
 
-import org.mpi.openmind.repository.utils.ArabicNormalizerUtils;
-import org.mpi.openmind.repository.utils.ArabicTranslitNormalizer;
+import org.mpi.openmind.repository.utils.NormalizerUtils;
 
 public class NormalizeOW {
 	public static void execute(String type, String dbUser, String dbPw, boolean modify) {
@@ -64,8 +63,8 @@
 	        String ow = ows[0];
 	        String oldNormalizedOW = ows[1];
 	        String oldNormalizedArabicOW = ows[2];
-	        String normalizedOW = ArabicTranslitNormalizer.normalize(ow);
-	        String normalizedArabicOW = ArabicNormalizerUtils.normalize(ow);
+	        String normalizedOW = NormalizerUtils.normalize(ow);
+	        String normalizedArabicOW = NormalizerUtils.normalizeArabic(ow);
 	        boolean changes = false;
 	        if (normalizedOW != null && !normalizedOW.equals(oldNormalizedOW)) {
 	            System.out.println("normOW changes (#"+cnt+" @"+id+"): old="+oldNormalizedOW+" new="+normalizedOW);