Mercurial > hg > mpdl-group
diff software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/test/TestLocal.java @ 19:4a3641ae14d2
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 09 Nov 2011 15:32:05 +0100 |
parents | |
children | 7d6d969b10cf |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/test/TestLocal.java Wed Nov 09 15:32:05 2011 +0100 @@ -0,0 +1,131 @@ +package de.mpg.mpiwg.berlin.mpdl.test; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.StringReader; +import java.net.URL; +import java.util.ArrayList; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; + +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; +import de.mpg.mpiwg.berlin.mpdl.lt.dict.app.Lexicon; +import de.mpg.mpiwg.berlin.mpdl.lt.dict.app.LexiconEntry; +import de.mpg.mpiwg.berlin.mpdl.lt.dict.db.LexHandler; +import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma; +import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.Token; +import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.Tokenizer; +import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.XmlTokenizer; + +public class TestLocal { + private LexHandler lexHandler; + + public static void main(String[] args) throws ApplicationException { + try { + TestLocal test = new TestLocal(); + test.init(); + // test.testCalls(); + // test.tokenizeString(); + // test.tokenizeXmlFragment(); + test.getLexEntriesByLexiconBeginningWith("ls", "a"); + // test.end(); + } catch (Exception e) { + e.printStackTrace(); + } + } + + private void init() throws ApplicationException { + lexHandler = LexHandler.getInstance(); + } + + private void end() throws ApplicationException { + lexHandler.end(); + } + + private ArrayList<Token> tokenizeString() throws ApplicationException { + ArrayList<Token> tokens = new ArrayList<Token>(); + try { + StringReader reader = new StringReader("edo philoſophi"); + // StringReader reader = new StringReader("扞盗則李兗州"); + Tokenizer tokenizer = new Tokenizer(reader); + tokenizer.setLanguage("lat"); + // tokenizer.setLanguage("zho"); + String[] normFunctions = new String[1]; + normFunctions[0] = "norm"; + tokenizer.setNormFunctions(normFunctions); + tokens = tokenizer.getTokens(); + tokenizer.end(); + tokenizer.close(); + } catch (IOException e) { + throw new ApplicationException(e); + } + return tokens; + } + + private String tokenizeXmlFragment() throws ApplicationException { + String result = null; + try { + String xmlFragment = new String(FileUtils.readFileToByteArray(new File("/Users/jwillenborg/tmp/testFragment2.xml")), "utf-8"); + String srcUrlStr = "http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=/echo/la/Benedetti_1585.xml&mode=pureXml&pn=13"; + URL srcUrl = new URL(srcUrlStr); + InputStream inputStream = srcUrl.openStream(); + BufferedInputStream in = new BufferedInputStream(inputStream); + xmlFragment = IOUtils.toString(in, "utf-8"); + in.close(); + + XmlTokenizer xmlTokenizer = new XmlTokenizer(new StringReader(xmlFragment)); + xmlTokenizer.setLanguage("lat"); + String[] normFunctions = new String[1]; + normFunctions[0] = "norm"; + String[] stopElements = new String[1]; + stopElements[0] = "var"; + xmlTokenizer.setNormFunctions(normFunctions); + xmlTokenizer.setStopElements(stopElements); + result = xmlTokenizer.tokenize(); + System.out.println(result); + } catch (Exception e) { + throw new ApplicationException(e); + } + return result; + } + + private void testCalls() throws ApplicationException { + String query = "sum quibus"; + String language = "lat"; + // String query = "ἱκανῶσ"; + // String language = "el"; + String inputType = "form"; + String outputType = null; + String outputFormat = "html"; + String dictionaryName = null; + String normalization = "norm"; + getLexEntries(query, language, inputType, outputType, outputFormat, dictionaryName, normalization); + } + + private void getLexEntries(String query, String language, String inputType , String outputType, String outputFormat, String dictionaryName, String normalization) throws ApplicationException { + ArrayList<Lemma> lemmas = lexHandler.getLemmas(query, inputType, language, normalization); + ArrayList<Lexicon> dictionaries = lexHandler.getLexEntries(lemmas, language, dictionaryName); + // String result = lexHandler.getLexEntries(query, language, inputType, outputType, outputFormat, dictionaryName, normalization); + String result = ""; + result = result + "<dictionaries>"; + for (int i=0; i<dictionaries.size(); i++) { + Lexicon lexicon = dictionaries.get(i); + result = result + lexicon.toXmlString(); + } + result = result + "</dictionaries>"; + System.out.println(result); + } + + private void getLexEntriesByLexiconBeginningWith(String lexiconName, String prefix) throws ApplicationException { + ArrayList<Lexicon> lexEntries = lexHandler.getLexEntriesByLexiconBeginningWith(lexiconName, prefix, 1); + System.out.println(lexEntries); + } + + private void getLexEntriesBeginningWith(String language, String prefix) throws ApplicationException { + ArrayList<Lexicon> lexEntries = lexHandler.getLexEntriesBeginningWith(language, prefix, 1); + System.out.println(lexEntries); + } +}