changeset 72:3490a2237118

107: translit normalization should use normalized form for sorting lists Task-Url: https://it-dev.mpiwg-berlin.mpg.de/tracs/ismi/ticket/107
author casties
date Thu, 02 Feb 2017 19:01:43 +0100
parents aeb29e362a67
children ab61cd3ad0e0
files src/main/java/org/mpi/openmind/repository/bo/utils/EntitySortByNormalizedOwnValue.java
diffstat 1 files changed, 26 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/src/main/java/org/mpi/openmind/repository/bo/utils/EntitySortByNormalizedOwnValue.java	Thu Feb 02 17:58:52 2017 +0100
+++ b/src/main/java/org/mpi/openmind/repository/bo/utils/EntitySortByNormalizedOwnValue.java	Thu Feb 02 19:01:43 2017 +0100
@@ -1,18 +1,31 @@
 package org.mpi.openmind.repository.bo.utils;
 
 import java.util.Comparator;
+import java.util.regex.Pattern;
 
 import org.apache.commons.lang.StringUtils;
 import org.mpi.openmind.repository.bo.Entity;
 import org.mpi.openmind.repository.utils.TransliterationUtil;
 
-public class EntitySortByNormalizedOwnValue implements Comparator<Entity>{
+public class EntitySortByNormalizedOwnValue implements Comparator<Entity> {
+	
+	protected static Pattern parensPattern = Pattern.compile("#|\\-|\\(|\\)|\\[|\\]|_");
 	
+    /* (non-Javadoc)
+     * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
+     */
     public int compare(Entity o1, Entity o2) {
     	return EntitySortByNormalizedOwnValue.compare0(o1, o2);
-    }
-    
-    public static int compare0(Entity o1, Entity o2){
+    };
+
+    /**
+     * Compare normalized ownValue of two Entities.
+     * 
+     * @param o1
+     * @param o2
+     * @return
+     */
+    public static int compare0(Entity o1, Entity o2) {
     	if(o1 == null && o2 == null){
     		return 0;
     	}else if(o1 == null){
@@ -32,26 +45,19 @@
         	}else if(StringUtils.isBlank(s2)){
         		return -1;
         	}else{
-            	s1 = s1.replace("#", "");
-            	s1 = s1.replace("-", "");
-            	s1 = s1.replace("(", "");
-            	s1 = s1.replace(")", "");
-            	s1 = s1.replace("[", "");
-            	s1 = s1.replace("]", "");
-            	s1 = s1.replace("_", "");
-            	
-            	
-            	s2 = s2.replace("#", "");
-                s2 = s2.replace("-", "");
-                s2 = s2.replace("(", "");
-                s2 = s2.replace(")", "");
-                s2 = s2.replace("[", "");
-                s2 = s2.replace("]", "");
-                s2 = s2.replace("_", "");
+        		// trim whitespace
+        		s1 = s1.trim();
+        		s2 = s2.trim();
+        		
+        		// remove parens
+            	s1 = parensPattern.matcher(s1).replaceAll("");
+            	s2 = parensPattern.matcher(s2).replaceAll("");
                 
+                // TODO: does this make sense?
                 s1 = TransliterationUtil.getTransliteration(s1);
                 s2 = TransliterationUtil.getTransliteration(s2);
                 
+                // compare
                 int value = s1.compareTo(s2);
                 return value;
         	}