view software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/test/TestLocal.java @ 19:4a3641ae14d2

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 09 Nov 2011 15:32:05 +0100
parents
children 7d6d969b10cf
line wrap: on
line source

package de.mpg.mpiwg.berlin.mpdl.test;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.net.URL;
import java.util.ArrayList;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;

import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
import de.mpg.mpiwg.berlin.mpdl.lt.dict.app.Lexicon;
import de.mpg.mpiwg.berlin.mpdl.lt.dict.app.LexiconEntry;
import de.mpg.mpiwg.berlin.mpdl.lt.dict.db.LexHandler;
import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma;
import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.Token;
import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.Tokenizer;
import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.XmlTokenizer;

public class TestLocal {
  private LexHandler lexHandler;

  public static void main(String[] args) throws ApplicationException {
    try {
      TestLocal test = new TestLocal();
      test.init();
      // test.testCalls();
      // test.tokenizeString();
      // test.tokenizeXmlFragment();
      test.getLexEntriesByLexiconBeginningWith("ls", "a");
      // test.end();
    } catch (Exception e) {
      e.printStackTrace();
    }
  }

  private void init() throws ApplicationException {
    lexHandler = LexHandler.getInstance();
  }
  
  private void end() throws ApplicationException {
    lexHandler.end();
  }

  private ArrayList<Token> tokenizeString() throws ApplicationException {
    ArrayList<Token> tokens = new ArrayList<Token>();
    try {
      StringReader reader = new StringReader("edo philoſophi");
      // StringReader reader = new StringReader("扞盗則李兗州");
      Tokenizer tokenizer = new Tokenizer(reader);
      tokenizer.setLanguage("lat");
      // tokenizer.setLanguage("zho");
      String[] normFunctions = new String[1];
      normFunctions[0] = "norm";
      tokenizer.setNormFunctions(normFunctions);
      tokens = tokenizer.getTokens();
      tokenizer.end();
      tokenizer.close();
    } catch (IOException e) {
      throw new ApplicationException(e);
    }
    return tokens;
  }

  private String tokenizeXmlFragment() throws ApplicationException {
    String result = null;
    try {
      String xmlFragment = new String(FileUtils.readFileToByteArray(new File("/Users/jwillenborg/tmp/testFragment2.xml")), "utf-8");
      String srcUrlStr = "http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=/echo/la/Benedetti_1585.xml&mode=pureXml&pn=13";
      URL srcUrl = new URL(srcUrlStr);
      InputStream inputStream = srcUrl.openStream();
      BufferedInputStream in = new BufferedInputStream(inputStream);
      xmlFragment = IOUtils.toString(in, "utf-8");
      in.close();

      XmlTokenizer xmlTokenizer = new XmlTokenizer(new StringReader(xmlFragment));
      xmlTokenizer.setLanguage("lat");
      String[] normFunctions = new String[1];
      normFunctions[0] = "norm";
      String[] stopElements = new String[1];
      stopElements[0] = "var";
      xmlTokenizer.setNormFunctions(normFunctions);
      xmlTokenizer.setStopElements(stopElements);
      result = xmlTokenizer.tokenize();
      System.out.println(result);
    } catch (Exception e) {
      throw new ApplicationException(e);
    }
    return result;
  }
  
  private void testCalls() throws ApplicationException {
    String query = "sum quibus";
    String language = "lat";
    // String query = "ἱκανῶσ";
    // String language = "el";
    String inputType = "form";
    String outputType = null;
    String outputFormat = "html";
    String dictionaryName = null;
    String normalization = "norm";
    getLexEntries(query, language, inputType, outputType, outputFormat, dictionaryName, normalization);
  }

  private void getLexEntries(String query, String language, String inputType , String outputType, String outputFormat, String dictionaryName, String normalization) throws ApplicationException {
    ArrayList<Lemma> lemmas = lexHandler.getLemmas(query, inputType, language, normalization);
    ArrayList<Lexicon> dictionaries = lexHandler.getLexEntries(lemmas, language, dictionaryName);
    // String result = lexHandler.getLexEntries(query, language, inputType, outputType, outputFormat, dictionaryName, normalization);
    String result = "";
    result = result + "<dictionaries>";
    for (int i=0; i<dictionaries.size(); i++) {
      Lexicon lexicon = dictionaries.get(i);
      result = result + lexicon.toXmlString();
    }
    result = result + "</dictionaries>";
    System.out.println(result);
  }
  
  private void getLexEntriesByLexiconBeginningWith(String lexiconName, String prefix) throws ApplicationException {
    ArrayList<Lexicon> lexEntries = lexHandler.getLexEntriesByLexiconBeginningWith(lexiconName, prefix, 1);
    System.out.println(lexEntries);
  }

  private void getLexEntriesBeginningWith(String language, String prefix) throws ApplicationException {
    ArrayList<Lexicon> lexEntries = lexHandler.getLexEntriesBeginningWith(language, prefix, 1);
    System.out.println(lexEntries);
  }
}