view software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/reg/RegularizationManager.java @ 19:4a3641ae14d2

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 09 Nov 2011 15:32:05 +0100
parents
children
line wrap: on
line source

package de.mpg.mpiwg.berlin.mpdl.lt.text.reg;

import java.util.ArrayList;
import java.util.Date;
import java.util.Hashtable;
import java.util.logging.Logger;

import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
import de.mpg.mpiwg.berlin.mpdl.lt.general.Constants;
import de.mpg.mpiwg.berlin.mpdl.lucene.util.LuceneUtil;
import de.mpg.mpiwg.berlin.mpdl.util.Util;

public class RegularizationManager {
  private static RegularizationManager instance;
  private static String DATA_DIR = Constants.getInstance().getDataDir();
  private static String REGULARIZATION_DB_DIR = DATA_DIR + "/dataBerkeleyDB/regularization";
  private static Logger LOGGER = Logger.getLogger(RegularizationManager.class.getName());
  private DBRegularizationHandler dbRegHandler;
  private Hashtable<String, ArrayList<Regularization>> regsOrig;
  private Hashtable<String, ArrayList<Regularization>> regsNorm;
  private Date beginOfOperation;
  private Date endOfOperation;
  
  public static RegularizationManager getInstance() throws ApplicationException {
    if (instance == null) {
      instance = new RegularizationManager();
      instance.init();
    }
    return instance;
  }

  public static void main(String[] args) throws ApplicationException {
    getInstance();
    instance.beginOperation();
    System.out.print("Start ...");
    ArrayList<Regularization> regs = instance.findRegsByNorm("la", "Illiusque");
    ArrayList<Regularization> regs2 = instance.findRegsByNorm("la", "Itaque");
    Regularization bla = regs.get(0);
    Regularization bla2 = regs2.get(0);
    
    instance.end();
    instance.endOperation();
    Double elapsedTime = new Util().getSecondWithMillisecondsBetween(instance.beginOfOperation, instance.endOfOperation);
    System.out.println("End.");
    System.out.println("Needed time: " + elapsedTime + " seconds");
  }

  private void init() throws ApplicationException {
    regsOrig = new Hashtable<String, ArrayList<Regularization>>();
    regsNorm = new Hashtable<String, ArrayList<Regularization>>();
    dbRegHandler = new DBRegularizationHandler(REGULARIZATION_DB_DIR);
    dbRegHandler.start();
    dbRegHandler.openDatabases();
    LOGGER.info("Regularization db cache: opened");
  }

  public ArrayList<Regularization> findRegsByOrig(String language, String orig) throws ApplicationException {
    orig = orig.toLowerCase();
    String hashKey = language + "###" + orig;
    ArrayList<Regularization> regs = regsOrig.get(hashKey);
    if (regs == null) {
      regs = dbRegHandler.readRegsByOrig(language, orig);
      if (regs == null || regs.isEmpty())
        regsOrig.put(hashKey, new ArrayList<Regularization>());
      else
        regsOrig.put(hashKey, regs);
    }
    return regs;
  }
  
  public ArrayList<Regularization> findRegsByNorm(String language, String norm) throws ApplicationException {
    norm = norm.toLowerCase();
    String hashKey = language + "###" + norm;
    ArrayList<Regularization> regs = regsNorm.get(hashKey);
    if (regs == null || regs.isEmpty()) {
      regs = dbRegHandler.readRegsByNorm(language, norm);
      if (regs == null)
        regsNorm.put(hashKey, new ArrayList<Regularization>());
      else
        regsNorm.put(hashKey, regs);
    }
    return regs;
  }
  
  public ArrayList<String> getRegOrigsByNormLuceneQueryString(String language, String luceneQueryString) throws ApplicationException {
    ArrayList<String> regForms = new ArrayList<String>();
    LuceneUtil luceneUtil = LuceneUtil.getInstance();
    ArrayList<String> variants = luceneUtil.getVariantsFromLuceneQuery(luceneQueryString);
    if (variants != null) {
      for (int i=0; i<variants.size(); i++) {
        String variant = variants.get(i);
        ArrayList<Regularization> regs = findRegsByNorm(language, variant);
        if (regs != null) {
          for (int j=0; j<regs.size(); j++) {
            Regularization reg = regs.get(j);
            String orig = reg.getOrig();
            regForms.add(orig);
          }
        }
      }
    }
    return regForms;
  }
  
  public void end() throws ApplicationException {
    dbRegHandler.closeDatabases();
    LOGGER.info("Regularization db cache: closed");
  }

  private void beginOperation() {
    beginOfOperation = new Date();
  }

  private void endOperation() {
    endOfOperation = new Date();
  }

}