Mercurial > hg > mpdl-group

diff software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/test/TestLocal.java @ 19:4a3641ae14d2
Erstellung
author: Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date: Wed, 09 Nov 2011 15:32:05 +0100
children: 7d6d969b10cf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/test/TestLocal.java	Wed Nov 09 15:32:05 2011 +0100
@@ -0,0 +1,131 @@
+package de.mpg.mpiwg.berlin.mpdl.test;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringReader;
+import java.net.URL;
+import java.util.ArrayList;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.dict.app.Lexicon;
+import de.mpg.mpiwg.berlin.mpdl.lt.dict.app.LexiconEntry;
+import de.mpg.mpiwg.berlin.mpdl.lt.dict.db.LexHandler;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma;
+import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.Token;
+import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.Tokenizer;
+import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.XmlTokenizer;
+
+public class TestLocal {
+  private LexHandler lexHandler;
+
+  public static void main(String[] args) throws ApplicationException {
+    try {
+      TestLocal test = new TestLocal();
+      test.init();
+      // test.testCalls();
+      // test.tokenizeString();
+      // test.tokenizeXmlFragment();
+      test.getLexEntriesByLexiconBeginningWith("ls", "a");
+      // test.end();
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+  }
+
+  private void init() throws ApplicationException {
+    lexHandler = LexHandler.getInstance();
+  }
+  
+  private void end() throws ApplicationException {
+    lexHandler.end();
+  }
+
+  private ArrayList<Token> tokenizeString() throws ApplicationException {
+    ArrayList<Token> tokens = new ArrayList<Token>();
+    try {
+      StringReader reader = new StringReader("edo philoſophi");
+      // StringReader reader = new StringReader("扞盗則李兗州");
+      Tokenizer tokenizer = new Tokenizer(reader);
+      tokenizer.setLanguage("lat");
+      // tokenizer.setLanguage("zho");
+      String[] normFunctions = new String[1];
+      normFunctions[0] = "norm";
+      tokenizer.setNormFunctions(normFunctions);
+      tokens = tokenizer.getTokens();
+      tokenizer.end();
+      tokenizer.close();
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+    return tokens;
+  }
+
+  private String tokenizeXmlFragment() throws ApplicationException {
+    String result = null;
+    try {
+      String xmlFragment = new String(FileUtils.readFileToByteArray(new File("/Users/jwillenborg/tmp/testFragment2.xml")), "utf-8");
+      String srcUrlStr = "http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=/echo/la/Benedetti_1585.xml&mode=pureXml&pn=13";
+      URL srcUrl = new URL(srcUrlStr);
+      InputStream inputStream = srcUrl.openStream();
+      BufferedInputStream in = new BufferedInputStream(inputStream);
+      xmlFragment = IOUtils.toString(in, "utf-8");
+      in.close();
+
+      XmlTokenizer xmlTokenizer = new XmlTokenizer(new StringReader(xmlFragment));
+      xmlTokenizer.setLanguage("lat");
+      String[] normFunctions = new String[1];
+      normFunctions[0] = "norm";
+      String[] stopElements = new String[1];
+      stopElements[0] = "var";
+      xmlTokenizer.setNormFunctions(normFunctions);
+      xmlTokenizer.setStopElements(stopElements);
+      result = xmlTokenizer.tokenize();
+      System.out.println(result);
+    } catch (Exception e) {
+      throw new ApplicationException(e);
+    }
+    return result;
+  }
+  
+  private void testCalls() throws ApplicationException {
+    String query = "sum quibus";
+    String language = "lat";
+    // String query = "ἱκανῶσ";
+    // String language = "el";
+    String inputType = "form";
+    String outputType = null;
+    String outputFormat = "html";
+    String dictionaryName = null;
+    String normalization = "norm";
+    getLexEntries(query, language, inputType, outputType, outputFormat, dictionaryName, normalization);
+  }
+
+  private void getLexEntries(String query, String language, String inputType , String outputType, String outputFormat, String dictionaryName, String normalization) throws ApplicationException {
+    ArrayList<Lemma> lemmas = lexHandler.getLemmas(query, inputType, language, normalization);
+    ArrayList<Lexicon> dictionaries = lexHandler.getLexEntries(lemmas, language, dictionaryName);
+    // String result = lexHandler.getLexEntries(query, language, inputType, outputType, outputFormat, dictionaryName, normalization);
+    String result = "";
+    result = result + "<dictionaries>";
+    for (int i=0; i<dictionaries.size(); i++) {
+      Lexicon lexicon = dictionaries.get(i);
+      result = result + lexicon.toXmlString();
+    }
+    result = result + "</dictionaries>";
+    System.out.println(result);
+  }
+  
+  private void getLexEntriesByLexiconBeginningWith(String lexiconName, String prefix) throws ApplicationException {
+    ArrayList<Lexicon> lexEntries = lexHandler.getLexEntriesByLexiconBeginningWith(lexiconName, prefix, 1);
+    System.out.println(lexEntries);
+  }
+
+  private void getLexEntriesBeginningWith(String language, String prefix) throws ApplicationException {
+    ArrayList<Lexicon> lexEntries = lexHandler.getLexEntriesBeginningWith(language, prefix, 1);
+    System.out.println(lexEntries);
+  }
+}
author	Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date	Wed, 09 Nov 2011 15:32:05 +0100
parents
children	7d6d969b10cf