Mercurial > hg > openmind
view src/main/java/org/mpi/openmind/scripts/NormalizeOW.java @ 69:bde6212babfd
106: translit normalization should ignore all '-equivalent chars
Task-Url: https://it-dev.mpiwg-berlin.mpg.de/tracs/ismi/ticket/106
author | casties |
---|---|
date | Wed, 01 Feb 2017 19:54:12 +0100 |
parents | ac466a164b61 |
children | aeb29e362a67 |
line wrap: on
line source
package org.mpi.openmind.scripts; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.HashMap; import java.util.Map; import org.mpi.openmind.repository.utils.ArabicNormalizerUtils; import org.mpi.openmind.repository.utils.ArabicTranslitNormalizer; public class NormalizeOW { public static void execute(String type, String dbUser, String dbPw, boolean modify) { try { if (modify) { System.out.println("Fixing normalized own values for: " + type); } else { System.out.println("Showing normalized own values for: " + type); } System.out.println("INFO: only the CURRENT_VERSION of the nodes will be affected."); Connection conn; Class.forName("com.mysql.jdbc.Driver").newInstance(); String url = "jdbc:mysql://localhost/openmind?characterEncoding=UTF-8"; conn = DriverManager.getConnection(url, dbUser, dbPw); Map<Long, String[]> selectedMap = select(conn, type); System.out.println("Number of nodes=" + selectedMap.size()); change(conn, selectedMap, modify); System.out.println("End"); conn.close(); } catch (ClassNotFoundException ex) { System.err.println(ex.getMessage()); } catch (IllegalAccessException ex) { System.err.println(ex.getMessage()); } catch (InstantiationException ex) { System.err.println(ex.getMessage()); } catch (SQLException ex) { System.err.println(ex.getMessage()); } } public static void change(Connection conn, Map<Long, String[]> map, boolean modify){ String s = new String(); int cnt = 0; PreparedStatement st = null; if (modify) { try { st = conn.prepareStatement("UPDATE node SET normalized_own_value = ?, normalized_arabic_own_value = ? WHERE row_id = ?"); } catch (SQLException e) { System.err.println(e); return; } } for(Long id : map.keySet()){ cnt += 1; String[] ows = map.get(id); String ow = ows[0]; String oldNormalizedOW = ows[1]; String oldNormalizedArabicOW = ows[2]; String normalizedOW = ArabicTranslitNormalizer.normalize(ow); String normalizedArabicOW = ArabicNormalizerUtils.normalize(ow); boolean changes = false; if (normalizedOW != null && !normalizedOW.equals(oldNormalizedOW)) { System.out.println("normOW changes (#"+cnt+" @"+id+"): old="+oldNormalizedOW+" new="+normalizedOW); changes = true; } if (normalizedArabicOW != null && !normalizedArabicOW.equals(oldNormalizedArabicOW)) { System.out.println("normArabicOW changes (#"+cnt+" @"+id+"): old="+oldNormalizedArabicOW+" new="+normalizedArabicOW); changes = true; } if (modify && changes) { try { st.setString(1, normalizedOW); st.setString(2, normalizedArabicOW); st.setString(3, id.toString()); st.executeUpdate(); } catch (SQLException e) { System.err.println(s); e.printStackTrace(); } } //if (cnt > 100) break; // FIXME: testing } } public static Map<Long, String[]> select(Connection conn, String type){ Map<Long, String[]> map = new HashMap<Long, String[]>(); String query = "select row_id, own_value, normalized_own_value, normalized_arabic_own_value " + "from node " + "where system_status = 'CURRENT_VERSION'"; if (type.equals("ATTRIBUTE") || type.equals("ENTITY")) { query += " AND node_type = '"+ type +"'"; } try { Statement st = conn.createStatement(); System.out.println(" fetching rows..."); ResultSet rs = st.executeQuery(query); while (rs.next()) { String id = rs.getString("row_id"); String[] ows = new String[3]; ows[0] = rs.getString("own_value"); ows[1] = rs.getString("normalized_own_value"); ows[2] = rs.getString("normalized_arabic_own_value"); map.put(new Long(id), ows); } } catch (SQLException ex) { ex.printStackTrace(); System.err.println(ex.getMessage()); } return map; } public static void main(String[] args) { int rc = 0; if (args.length > 1 && args.length < 4) { String user = args[1]; String pw = (args.length == 3) ? args[2] : null; if (args[0].equalsIgnoreCase("fix")) { execute("all", user, pw, true); } else { execute("all", user, pw, false); } } else { System.out.println("Parameter/s not found! Should be: mode(SHOW/FIX), mysql_user, mysql_password"); System.out.println(" got: "+args.toString() + "("+args.length+")"); System.exit(1); } System.exit(rc); } }