Mercurial > hg > mpdl-group
view software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/test/TestLocal.java @ 20:7d6d969b10cf
little corrections
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 14 Dec 2011 12:48:43 +0100 |
parents | 4a3641ae14d2 |
children | e845310098ba |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.test; import java.io.BufferedInputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.StringReader; import java.net.URL; import java.util.ArrayList; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; import de.mpg.mpiwg.berlin.mpdl.lt.dict.app.Lexicon; import de.mpg.mpiwg.berlin.mpdl.lt.dict.app.LexiconEntry; import de.mpg.mpiwg.berlin.mpdl.lt.dict.db.LexHandler; import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma; import de.mpg.mpiwg.berlin.mpdl.lt.text.norm.Normalizer; import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.Token; import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.Tokenizer; import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.XmlTokenizer; public class TestLocal { private LexHandler lexHandler; public static void main(String[] args) throws ApplicationException { try { TestLocal test = new TestLocal(); test.init(); test.testCalls(); // test.tokenizeString(); // test.tokenizeXmlFragment(); // test.getLexEntriesByLexiconBeginningWith("ls", "a"); // test.end(); } catch (Exception e) { e.printStackTrace(); } } private void init() throws ApplicationException { lexHandler = LexHandler.getInstance(); } private void end() throws ApplicationException { lexHandler.end(); } private ArrayList<Token> tokenizeString() throws ApplicationException { ArrayList<Token> tokens = new ArrayList<Token>(); try { StringReader reader = new StringReader("edo philoſophi"); // StringReader reader = new StringReader("扞盗則李兗州"); Tokenizer tokenizer = new Tokenizer(reader); tokenizer.setLanguage("lat"); // tokenizer.setLanguage("zho"); String[] normFunctions = new String[1]; normFunctions[0] = "norm"; tokenizer.setNormFunctions(normFunctions); tokens = tokenizer.getTokens(); tokenizer.end(); tokenizer.close(); } catch (IOException e) { throw new ApplicationException(e); } return tokens; } private String tokenizeXmlFragment() throws ApplicationException { String result = null; try { String xmlFragment = new String(FileUtils.readFileToByteArray(new File("/Users/jwillenborg/tmp/testFragment2.xml")), "utf-8"); String srcUrlStr = "http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=/echo/la/Benedetti_1585.xml&mode=pureXml&pn=13"; URL srcUrl = new URL(srcUrlStr); InputStream inputStream = srcUrl.openStream(); BufferedInputStream in = new BufferedInputStream(inputStream); xmlFragment = IOUtils.toString(in, "utf-8"); in.close(); XmlTokenizer xmlTokenizer = new XmlTokenizer(new StringReader(xmlFragment)); xmlTokenizer.setLanguage("lat"); String[] normFunctions = new String[1]; normFunctions[0] = "norm"; String[] stopElements = new String[1]; stopElements[0] = "var"; xmlTokenizer.setNormFunctions(normFunctions); xmlTokenizer.setStopElements(stopElements); result = xmlTokenizer.tokenize(); System.out.println(result); } catch (Exception e) { throw new ApplicationException(e); } return result; } private void testCalls() throws ApplicationException { String query = "vergewissernd"; String language = "deu"; // String query = "ἱκανῶσ"; // String language = "el"; String inputType = "form"; String outputType = null; String outputFormat = "html"; String dictionaryName = null; int normMode = Normalizer.DICTIONARY; getLexEntries(query, language, inputType, outputType, outputFormat, dictionaryName, normMode); } private void getLexEntries(String query, String language, String inputType , String outputType, String outputFormat, String dictionaryName, int normMode) throws ApplicationException { ArrayList<Lemma> lemmas = lexHandler.getLemmas(query, inputType, language, normMode); ArrayList<Lexicon> dictionaries = lexHandler.getLexEntries(lemmas, language, dictionaryName, query); // String result = lexHandler.getLexEntries(query, language, inputType, outputType, outputFormat, dictionaryName, normalization); String result = ""; result = result + "<dictionaries>"; for (int i=0; i<dictionaries.size(); i++) { Lexicon lexicon = dictionaries.get(i); result = result + lexicon.toXmlString(); } result = result + "</dictionaries>"; System.out.println(result); } private void getLexEntriesByLexiconBeginningWith(String lexiconName, String prefix) throws ApplicationException { ArrayList<Lexicon> lexEntries = lexHandler.getLexEntriesByLexiconBeginningWith(lexiconName, prefix, 1, 50); System.out.println(lexEntries); } private void getLexEntriesBeginningWith(String language, String prefix) throws ApplicationException { ArrayList<Lexicon> lexEntries = lexHandler.getLexEntriesBeginningWith(language, prefix, 1, 50); System.out.println(lexEntries); } }