Mercurial > hg > mpdl-group
view software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/lex/db/LexHandler.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children | 2396a569e446 |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.lt.lex.db; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Date; import com.sleepycat.je.Cursor; import com.sleepycat.je.Database; import com.sleepycat.je.DatabaseEntry; import com.sleepycat.je.DatabaseException; import com.sleepycat.je.LockMode; import com.sleepycat.je.OperationStatus; import de.mpg.mpiwg.berlin.mpdl.util.Util; import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants; import de.mpg.mpiwg.berlin.mpdl.lt.general.Transcoder; import de.mpg.mpiwg.berlin.mpdl.lt.lex.app.Lexica; import de.mpg.mpiwg.berlin.mpdl.lt.lex.app.Lexicon; import de.mpg.mpiwg.berlin.mpdl.lt.lex.app.LexiconEntry; import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma; import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.MorphologyCache; public class LexHandler { private static LexHandler instance; private static String MPDL_DATA_DIR = MpdlConstants.MPDL_EXIST_DATA_DIR; private static String DB_DIR_LEXICA = MPDL_DATA_DIR + "/dataBerkeleyDB/pollux"; private DbEnvLex dbEnvLexica; private Date beginOfOperation; private Date endOfOperation; public static LexHandler getInstance() throws ApplicationException { if (instance == null) { instance = new LexHandler(); instance.initReadOnly(); } return instance; } /** * * @param formName * @param language * @return delivers lexical entries by help of the morphology component (lexical entry of the stem of the normalized word form) * @throws ApplicationException */ public ArrayList<String> getLexEntryKeys(String formName, String language, boolean normalize) throws ApplicationException { ArrayList<String> lexEntryKeys = new ArrayList<String>(); MorphologyCache morphologyCache = MorphologyCache.getInstance(); ArrayList<Lemma> formLemmas = morphologyCache.getLemmasByFormName(language, formName, normalize); boolean hasLexEntry = false; hasLexEntry = hasLexEntryKey(formName, language); if (hasLexEntry) lexEntryKeys.add(formName); if (formLemmas != null) { for (int j=0; j<formLemmas.size(); j++) { Lemma l = formLemmas.get(j); String lName = l.getLemmaName(); if (! hasLexEntry) { hasLexEntry = hasLexEntryKey(lName, language); } if (! lName.equals(formName) && hasLexEntry) { lexEntryKeys.add(lName); } } } if(lexEntryKeys.isEmpty()) return null; else return lexEntryKeys; } public boolean hasLexEntryKey(String formName, String language) throws ApplicationException { boolean hasLexEntry = false; ArrayList<Lexicon> statLexicons = Lexica.getInstance().getLexicons(language); if (statLexicons != null) { for (int i=0; i<statLexicons.size(); i++) { Lexicon lexicon = statLexicons.get(i).clone(); // clone without lexicon entries LexiconEntry lexEntry = readEntry(lexicon.getName(), formName); if (lexEntry != null) { return true; } } } return hasLexEntry; } public LexiconEntry readEntry(String lexiconName, String formName) throws ApplicationException { LexiconEntry retLexEntry = null; try { String dbFoundValueStr = null; String keyStr = formName; DatabaseEntry dbEntryKey = new DatabaseEntry(keyStr.getBytes("utf-8")); Database lexDB = dbEnvLexica.getLexiconDB(lexiconName); Cursor cursor = lexDB.openCursor(null, null); DatabaseEntry foundValue = new DatabaseEntry(); OperationStatus operationStatus = cursor.getSearchKey(dbEntryKey, foundValue, LockMode.DEFAULT); if (operationStatus == OperationStatus.SUCCESS) { byte[] foundValueBytes = foundValue.getData(); dbFoundValueStr = new String(foundValueBytes, "utf-8"); } cursor.close(); if (dbFoundValueStr != null) { retLexEntry = new LexiconEntry(lexiconName, formName, dbFoundValueStr); } } catch (DatabaseException e) { throw new ApplicationException(e); } catch (UnsupportedEncodingException e) { throw new ApplicationException(e); } return retLexEntry; } public String transcode(String fromEncoding, String toEncoding, String inputStr) throws ApplicationException { String encodedStr = null; Transcoder transcoder = Transcoder.getInstance(); if (fromEncoding.equals("buckwalter") && toEncoding.equals("unicode")) { encodedStr = transcoder.transcodeFromBuckwalter2Unicode(inputStr); } else if (fromEncoding.equals("betacode") && toEncoding.equals("unicode")) { encodedStr = transcoder.transcodeFromBetaCode2Unicode(inputStr); } return encodedStr; } public static void main(String[] args) throws ApplicationException { getInstance(); instance.beginOperation(); System.out.print("Start ..."); instance.readSampleData(); instance.end(); instance.endOperation(); Double elapsedTime = new Util().getSecondWithMillisecondsBetween(instance.beginOfOperation, instance.endOfOperation); System.out.println("End."); System.out.println("Needed time: " + elapsedTime + " seconds"); } private void initReadOnly() throws ApplicationException { dbEnvLexica = new DbEnvLex(); dbEnvLexica.setDataDir(DB_DIR_LEXICA); dbEnvLexica.initReadOnly(); ArrayList<Lexicon> lexicons = Lexica.getInstance().getLexicons(); for (int i=0; i<lexicons.size(); i++) { Lexicon lexicon = lexicons.get(i); String lexiconName = lexicon.getName(); dbEnvLexica.openDatabase(lexiconName); } } private void readSampleData() throws ApplicationException { // List<String> dbNames = dbEnvLexica.getEnv().getDatabaseNames(); String l1 = readEntry("autenrieth", "au)to/s").getContent(); // greek: see also bonitz and lsj String l2 = readEntry("ls", "laudabilis").getContent(); // latin System.out.println("Autenrieth: autos: " + l1); System.out.println("Lewis & Short: Laudabilis: " + l2); } private void end() throws ApplicationException { ArrayList<Lexicon> lexicons = Lexica.getInstance().getLexicons(); for (int i=0; i<lexicons.size(); i++) { Lexicon lexicon = lexicons.get(i); String lexiconName = lexicon.getName(); dbEnvLexica.closeDatabase(lexiconName); } dbEnvLexica.close(); } private void beginOperation() { beginOfOperation = new Date(); } private void endOperation() { endOfOperation = new Date(); } }