view src/main/java/org/mpi/openmind/scripts/NormalizeOW.java @ 69:bde6212babfd

106: translit normalization should ignore all '-equivalent chars Task-Url: https://it-dev.mpiwg-berlin.mpg.de/tracs/ismi/ticket/106
author casties
date Wed, 01 Feb 2017 19:54:12 +0100
parents ac466a164b61
children aeb29e362a67
line wrap: on
line source

package org.mpi.openmind.scripts;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.HashMap;
import java.util.Map;

import org.mpi.openmind.repository.utils.ArabicNormalizerUtils;
import org.mpi.openmind.repository.utils.ArabicTranslitNormalizer;

public class NormalizeOW {
	public static void execute(String type, String dbUser, String dbPw, boolean modify) {
		try {
		    if (modify) {
		        System.out.println("Fixing normalized own values for: " + type);
		    } else {
                System.out.println("Showing normalized own values for: " + type);
		    }
			System.out.println("INFO: only the CURRENT_VERSION of the nodes will be affected.");
			Connection conn;

			Class.forName("com.mysql.jdbc.Driver").newInstance();
			String url = "jdbc:mysql://localhost/openmind?characterEncoding=UTF-8";
			conn = DriverManager.getConnection(url, dbUser, dbPw);
			
			Map<Long, String[]> selectedMap = select(conn, type);
			
			System.out.println("Number of nodes=" + selectedMap.size());
			change(conn, selectedMap, modify);
			System.out.println("End");
			
			conn.close();
		} catch (ClassNotFoundException ex) {
			System.err.println(ex.getMessage());
		} catch (IllegalAccessException ex) {
			System.err.println(ex.getMessage());
		} catch (InstantiationException ex) {
			System.err.println(ex.getMessage());
		} catch (SQLException ex) {
			System.err.println(ex.getMessage());
		}

	}
	
	public static void change(Connection conn, Map<Long, String[]> map, boolean modify){
	    String s = new String();
	    int cnt = 0;
	    PreparedStatement st = null;
        if (modify) {
            try {
                st = conn.prepareStatement("UPDATE node SET normalized_own_value = ?, normalized_arabic_own_value = ? WHERE row_id = ?");
            } catch (SQLException e) {
                System.err.println(e);
                return;
            }
        }
	    for(Long id : map.keySet()){
	        cnt += 1;
	        String[] ows = map.get(id);
	        String ow = ows[0];
	        String oldNormalizedOW = ows[1];
	        String oldNormalizedArabicOW = ows[2];
	        String normalizedOW = ArabicTranslitNormalizer.normalize(ow);
	        String normalizedArabicOW = ArabicNormalizerUtils.normalize(ow);
	        boolean changes = false;
	        if (normalizedOW != null && !normalizedOW.equals(oldNormalizedOW)) {
	            System.out.println("normOW changes (#"+cnt+" @"+id+"): old="+oldNormalizedOW+" new="+normalizedOW);
	            changes = true;
	        }
	        if (normalizedArabicOW != null && !normalizedArabicOW.equals(oldNormalizedArabicOW)) {
	            System.out.println("normArabicOW changes (#"+cnt+" @"+id+"): old="+oldNormalizedArabicOW+" new="+normalizedArabicOW);
	            changes = true;
	        }
	        if (modify && changes) {
	            try {
	                st.setString(1, normalizedOW);
	                st.setString(2, normalizedArabicOW);
	                st.setString(3, id.toString());
	                st.executeUpdate();

	            } catch (SQLException e) {
	                System.err.println(s);
	                e.printStackTrace();
	            }
	        }
            //if (cnt > 100) break; // FIXME: testing
	    }
	}
	
	public static Map<Long, String[]> select(Connection conn, String type){
		Map<Long, String[]> map = new HashMap<Long, String[]>();
		String query = "select row_id, own_value, normalized_own_value, normalized_arabic_own_value " +
				"from node " +
				"where system_status = 'CURRENT_VERSION'";
				
		if (type.equals("ATTRIBUTE") || type.equals("ENTITY")) {
			query += " AND node_type = '"+ type +"'";
		}
				
		try {
	      Statement st = conn.createStatement();
	      System.out.println("  fetching rows...");
	      ResultSet rs = st.executeQuery(query);
	      while (rs.next()) {
	        String id = rs.getString("row_id");
	        String[] ows = new String[3];
	        ows[0] = rs.getString("own_value");
	        ows[1] = rs.getString("normalized_own_value");
	        ows[2] = rs.getString("normalized_arabic_own_value");
	        map.put(new Long(id), ows);
	      }
	    } catch (SQLException ex) {
	        ex.printStackTrace();
	        System.err.println(ex.getMessage());
	    }
        return map;
	}

	public static void main(String[] args) {
        int rc = 0;
        if (args.length > 1 && args.length < 4) {
            String user = args[1];
            String pw = (args.length == 3) ? args[2] : null;
            if (args[0].equalsIgnoreCase("fix")) {
                execute("all", user, pw, true);
            } else {
                execute("all", user, pw, false);
            }
        } else {
            System.out.println("Parameter/s not found! Should be: mode(SHOW/FIX), mysql_user, mysql_password");
            System.out.println("  got: "+args.toString() + "("+args.length+")");
            System.exit(1);
        }
        System.exit(rc);
	}

}