Mercurial > hg > mpdl-group
changeset 20:7d6d969b10cf
little corrections
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 14 Dec 2011 12:48:43 +0100 |
parents | 4a3641ae14d2 |
children | 4ea0f81a5d08 |
files | software/mpdl-services/mpiwg-mpdl-lt/build/classes/.DS_Store software/mpdl-services/mpiwg-mpdl-lt/build/classes/constants.properties software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/exception/ApplicationException.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/dict/app/Lexica.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/dict/app/Lexicon.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/dict/app/LexiconEntry.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/dict/db/DBLexWriter.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/dict/db/DbEnvLex.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/dict/db/LexEntryContentHandler.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/dict/db/LexEntryErrorHandler.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/dict/db/LexHandler.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/general/Betacode2UnicodeLex.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/general/Buckwalter2UnicodeLex.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/general/Constants.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/general/Language.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/general/Transcoder.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/general/Unicode2BetacodeLex.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/general/Unicode2BuckwalterLex.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/Form.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/Lemma.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/MorphFileReaderContentHandler$Element.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/MorphFileReaderContentHandler.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/MorphologyCache.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/SimpleMorphContentHandler$Element.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/SimpleMorphContentHandler.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DBMorphHandler.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DBMorphSupWriter.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DBMorphWriter.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DBMorphWriterContentHandler$Element.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DBMorphWriterContentHandler.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DbEnvMorph.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DbEnvMorphSup.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/MpdlNormalizer.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/Normalizer.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexAR.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexDE.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexEL.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexEN.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexFR.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexIT.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexLA.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexNL.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexZH.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/reg/DBRegularizationHandler.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/reg/DbEnvRegularization.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/reg/Regularization.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/reg/RegularizationManager.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/ChineseTokenizer.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/Token.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/Tokenizer.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/XmlTokenizer.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/XmlTokenizerContentHandler$1.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/XmlTokenizerContentHandler$Element.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/XmlTokenizerContentHandler.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/transcode/Betacode2UnicodeLex.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/transcode/Buckwalter2UnicodeLex.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/transcode/Transcoder.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/transcode/Unicode2BetacodeLex.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/transcode/Unicode2BuckwalterLex.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lucene/util/LuceneUtil.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/test/TestLocal.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/util/FileUtil.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/util/LuceneUtil.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/util/StringUtilEscapeChars.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/util/StringUtils.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/util/Util.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/util/XmlUtil$1.class software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/util/XmlUtil.class software/mpdl-services/mpiwg-mpdl-lt/dist/mpiwg-mpdl-lt.jar software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/dict/app/Lexicon.java software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/dict/db/LexHandler.java software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Language.java software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/MorphologyCache.java software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/Normalizer.java software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/XmlTokenizerContentHandler.java software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/test/TestLocal.java |
diffstat | 76 files changed, 147 insertions(+), 69 deletions(-) [+] |
line wrap: on
line diff
--- a/software/mpdl-services/mpiwg-mpdl-lt/build/classes/constants.properties Wed Nov 09 15:32:05 2011 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -dataDir=/Users/jwillenborg/mpdl/data/lt \ No newline at end of file
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/exception/ApplicationException.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/dict/app/Lexica.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/dict/app/Lexicon.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/dict/app/LexiconEntry.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/dict/db/DBLexWriter.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/dict/db/DbEnvLex.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/dict/db/LexEntryContentHandler.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/dict/db/LexEntryErrorHandler.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/dict/db/LexHandler.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/general/Betacode2UnicodeLex.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/general/Buckwalter2UnicodeLex.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/general/Constants.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/general/Language.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/general/Transcoder.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/general/Unicode2BetacodeLex.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/general/Unicode2BuckwalterLex.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/Form.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/Lemma.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/MorphFileReaderContentHandler$Element.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/MorphFileReaderContentHandler.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/MorphologyCache.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/SimpleMorphContentHandler$Element.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/SimpleMorphContentHandler.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DBMorphHandler.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DBMorphSupWriter.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DBMorphWriter.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DBMorphWriterContentHandler$Element.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DBMorphWriterContentHandler.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DbEnvMorph.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DbEnvMorphSup.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/MpdlNormalizer.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/Normalizer.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexAR.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexDE.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexEL.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexEN.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexFR.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexIT.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexLA.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexNL.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/lang/MpdlNormalizerLexZH.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/reg/DBRegularizationHandler.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/reg/DbEnvRegularization.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/reg/Regularization.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/reg/RegularizationManager.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/ChineseTokenizer.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/Token.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/Tokenizer.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/XmlTokenizer.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/XmlTokenizerContentHandler$1.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/XmlTokenizerContentHandler$Element.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/XmlTokenizerContentHandler.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/transcode/Betacode2UnicodeLex.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/transcode/Buckwalter2UnicodeLex.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/transcode/Transcoder.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/transcode/Unicode2BetacodeLex.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lt/text/transcode/Unicode2BuckwalterLex.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/lucene/util/LuceneUtil.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/test/TestLocal.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/util/FileUtil.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/util/LuceneUtil.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/util/StringUtilEscapeChars.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/util/StringUtils.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/util/Util.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/util/XmlUtil$1.class has changed
Binary file software/mpdl-services/mpiwg-mpdl-lt/build/classes/de/mpg/mpiwg/berlin/mpdl/util/XmlUtil.class has changed
--- a/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/dict/app/Lexicon.java Wed Nov 09 15:32:05 2011 +0100 +++ b/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/dict/app/Lexicon.java Wed Dec 14 12:48:43 2011 +0100 @@ -140,6 +140,14 @@ } } + public LexiconEntry getEntry(String lexEntryName) { + LexiconEntry retEntry = null; + if (entries == null) { + entries.get(lexEntryName); + } + return retEntry; + } + /* * without lexicon entries (non-Javadoc) * @see java.lang.Object#clone() @@ -157,6 +165,7 @@ String result = ""; result = result + "<dictionary>"; result = result + "<name>" + name + "</name>"; + result = result + "<language>" + sourceLang + "</language>"; result = result + "<description>" + description + "</description>"; result = result + "<entries>"; for (int i=0; i<entries.size(); i++) { @@ -195,4 +204,21 @@ return result; } + public String toXmlStringCompact() { + String result = ""; + result = result + "<dictionary>"; + result = result + "<name>" + name + "</name>"; + result = result + "<language>" + sourceLang + "</language>"; + result = result + "<description>" + description + "</description>"; + result = result + "<entries>"; + for (int i=0; i<entries.size(); i++) { + result = result + "<entry>"; + LexiconEntry entry = getEntries().get(i); + result = result + "<form>" + entry.getFormName() + "</form>"; + result = result + "</entry>"; + } + result = result + "</entries>"; + result = result + "</dictionary>"; + return result; + } }
--- a/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/dict/db/LexHandler.java Wed Nov 09 15:32:05 2011 +0100 +++ b/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/dict/db/LexHandler.java Wed Dec 14 12:48:43 2011 +0100 @@ -60,7 +60,7 @@ * @return lemmas * @throws ApplicationException */ - public ArrayList<Lemma> getLemmas(String query, String type, String language, String normalization) throws ApplicationException { + public ArrayList<Lemma> getLemmas(String query, String type, String language, int normMode) throws ApplicationException { ArrayList<Lemma> lexLemmas = new ArrayList<Lemma>(); // get lemmas of all forms in query MorphologyCache morphologyCache = MorphologyCache.getInstance(); @@ -69,21 +69,10 @@ String queryForm = queryForms[k]; ArrayList<Lemma> lemmas = null; if (type.equals("form")) { - if (normalization.equals("norm")) - lemmas = morphologyCache.getLemmasByFormName(language, queryForm, true); - else if (normalization.equals("none")) - lemmas = morphologyCache.getLemmasByFormName(language, queryForm, false); - else - lemmas = morphologyCache.getLemmasByFormName(language, queryForm, true); // TODO reg and reg+norm + lemmas = morphologyCache.getLemmasByFormName(language, queryForm, normMode); } else if (type.equals("lemma")) { lemmas = new ArrayList<Lemma>(); - Lemma l = null; - if (normalization.equals("norm")) - l = morphologyCache.getLemma(language, queryForm, true); - else if (normalization.equals("none")) - l = morphologyCache.getLemma(language, queryForm, false); - else - l = morphologyCache.getLemma(language, queryForm, true); + Lemma l = morphologyCache.getLemma(language, queryForm, normMode); if (l != null) lemmas.add(l); } @@ -101,7 +90,7 @@ return lexLemmas; } - public ArrayList<Lexicon> getLexEntries(ArrayList<Lemma> lexLemmas, String language, String lexiconName) throws ApplicationException { + public ArrayList<Lexicon> getLexEntries(ArrayList<Lemma> lexLemmas, String language, String lexiconName, String query) throws ApplicationException { ArrayList<Lexicon> retLexicons = new ArrayList<Lexicon>(); ArrayList<Lexicon> lexicons = Lexica.getInstance().getLexicons(language); if (lexiconName != null) { @@ -115,19 +104,25 @@ Lexicon lexicon = lexicons.get(i).clone(); // clone without lexicon entries for (int j=0; j<lexLemmas.size(); j++) { String lemmaName = lexLemmas.get(j).getLemmaName(); - if (Language.getInstance().isGerman(language) && lemmaName.contains("ae")) - lemmaName = lemmaName.replaceAll("ae", "Š"); - if (Language.getInstance().isGerman(language) && lemmaName.contains("oe")) - lemmaName = lemmaName.replaceAll("oe", "š"); - if (Language.getInstance().isGerman(language) && lemmaName.contains("ue")) - lemmaName = lemmaName.replaceAll("ue", "Ÿ"); - if (Language.getInstance().isGerman(language) && lemmaName.contains("ss")) - lemmaName = lemmaName.replaceAll("ss", "§"); LexiconEntry lexEntry = getEntry(lexicon, lemmaName); if (lexEntry != null) { lexicon.addEntry(lexEntry); // add entries to the cloned lexicon } } + if (Language.getInstance().isGerman(language) && query != null) { + String[] lexFormNames = query.split(" "); + for (int j=0; j<lexFormNames.length; j++) { + String lexFormName = lexFormNames[j]; + LexiconEntry lexEntry = lexicon.getEntry(lexFormName); + if (lexEntry == null) { + LexiconEntry newLexEntry = new LexiconEntry(lexiconName, lexFormName, null); + String lexiconQueryUrl = lexicon.getQueryUrl(); + String remoteUrl = lexiconQueryUrl + lexFormName; + newLexEntry.setRemoteUrl(remoteUrl); + lexicon.addEntry(newLexEntry); + } + } + } if (! lexicon.isEmpty()) retLexicons.add(lexicon); } @@ -143,10 +138,10 @@ * @return delivers lexical entries by the help of the morphology component (lexical entry of the stem of the normalized word form) * @throws ApplicationException */ - public ArrayList<String> getLexEntryKeys(String formName, String language, boolean normalize) throws ApplicationException { + public ArrayList<String> getLexEntryKeys(String formName, String language, int normMode) throws ApplicationException { ArrayList<String> lexEntryKeys = new ArrayList<String>(); MorphologyCache morphologyCache = MorphologyCache.getInstance(); - ArrayList<Lemma> formLemmas = morphologyCache.getLemmasByFormName(language, formName, normalize); + ArrayList<Lemma> formLemmas = morphologyCache.getLemmasByFormName(language, formName, normMode); boolean hasLexEntry = false; hasLexEntry = hasLexEntryKey(formName, language); if (hasLexEntry) @@ -158,7 +153,7 @@ if (! hasLexEntry) { hasLexEntry = hasLexEntryKey(lName, language); } - if (language.equals("de") || language.equals("fr") || language.equals("nl")) // TODO Lexika fŸr diese Sprachen in BerkeleyDB einbringen (fŸr nl auch eine bessere Morph.) + if (language.equals("de") || language.equals("fr") || language.equals("nl")) // TODO Lexika für diese Sprachen in BerkeleyDB einbringen (für nl auch eine bessere Morph.) lexEntryKeys.add(lName); if (! lName.equals(formName) && hasLexEntry) { lexEntryKeys.add(lName); @@ -188,8 +183,7 @@ return hasLexEntry; } - public ArrayList<Lexicon> getLexEntriesBeginningWith(String language, String formPrefix, int pageNumber) throws ApplicationException { - int pageSize = 50; + public ArrayList<Lexicon> getLexEntriesBeginningWith(String language, String formPrefix, int pageNumber, int pageSize) throws ApplicationException { int from = (pageNumber * pageSize) - pageSize + 1; int to = pageNumber * pageSize; ArrayList<Lexicon> statLexicons = Lexica.getInstance().getLocalLexicons(language); @@ -211,8 +205,7 @@ return retLexicons; } - public ArrayList<Lexicon> getLexEntriesByLexiconBeginningWith(String lexiconName, String formPrefix, int pageNumber) throws ApplicationException { - int pageSize = 50; + public ArrayList<Lexicon> getLexEntriesByLexiconBeginningWith(String lexiconName, String formPrefix, int pageNumber, int pageSize) throws ApplicationException { int from = (pageNumber * pageSize) - pageSize + 1; int to = pageNumber * pageSize; Lexicon lexicon = Lexica.getInstance().getLexicon(lexiconName).clone(); @@ -265,6 +258,7 @@ cursor.close(); if (dbFoundValueStr != null) { retLexEntry = new LexiconEntry(lexiconName, formName, dbFoundValueStr); + retLexEntry = correct(retLexEntry); // correct errors: e.g. in lsj some html entities are not correct } } catch (DatabaseException e) { throw new ApplicationException(e); @@ -292,6 +286,7 @@ byte[] foundKeyBytes = dbEntryKey.getData(); String dbFoundKeyStr = new String(foundKeyBytes, "utf-8"); LexiconEntry lexEntry = new LexiconEntry(lexiconName, dbFoundKeyStr, dbFoundValueStr); + lexEntry = correct(lexEntry); // correct errors: e.g. in lsj some html entities are not correct retLexEntries.add(lexEntry); } operationStatus = cursor.getNext(dbEntryKey, foundValue, LockMode.DEFAULT); @@ -309,6 +304,17 @@ return retLexEntries; } + private LexiconEntry correct(LexiconEntry lexEntry) { + String lexiconName = lexEntry.getLexiconName(); + String content = lexEntry.getContent(); + if (content != null && content.contains("&#") && lexiconName.equals("lsj")) { // errors in greek lexicon lsj + content = content.replaceAll("&#\u03C7", "&#x"); // html entity: replace greek Minuskel Chi by "x" + content = content.replaceAll("&#x[^0-9]{4};", ""); // html entity: remove entity if special greek not hex characters appear + lexEntry.setContent(content); + } + return lexEntry; + } + public static void main(String[] args) throws ApplicationException { getInstance(); instance.beginOperation();
--- a/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Language.java Wed Nov 09 15:32:05 2011 +0100 +++ b/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Language.java Wed Dec 14 12:48:43 2011 +0100 @@ -169,4 +169,46 @@ else return false; } + + public String getHtmlSelectBox(String language) { + String result = "<select name=\"language\">"; + if (isArabic(language)) + result = result + "<option value =\"ara\" selected=\"true\">Arabic</option>"; + else + result = result + "<option value =\"ara\">Arabic</option>"; + if (isChinese(language)) + result = result + "<option value =\"zho\" selected=\"true\">Chinese</option>"; + else + result = result + "<option value =\"zho\">Chinese</option>"; + if (isDutch(language)) + result = result + "<option value =\"nld\" selected=\"true\">Dutch</option>"; + else + result = result + "<option value =\"nld\">Dutch</option>"; + if (isEnglish(language)) + result = result + "<option value =\"eng\" selected=\"true\">English</option>"; + else + result = result + "<option value =\"eng\">English</option>"; + if (isFrench(language)) + result = result + "<option value =\"fra\" selected=\"true\">French</option>"; + else + result = result + "<option value =\"fra\">French</option>"; + if (isGerman(language)) + result = result + "<option value =\"ger\" selected=\"true\">German</option>"; + else + result = result + "<option value =\"ger\">German</option>"; + if (isGreek(language)) + result = result + "<option value =\"grc\" selected=\"true\">Greek</option>"; + else + result = result + "<option value =\"grc\">Greek</option>"; + if (isItalian(language)) + result = result + "<option value =\"ita\" selected=\"true\">Italian</option>"; + else + result = result + "<option value =\"ita\">Italian</option>"; + if (isLatin(language)) + result = result + "<option value =\"lat\" selected=\"true\">Latin</option>"; + else + result = result + "<option value =\"lat\">Latin</option>"; + result = result + "</select>"; + return result; + } } \ No newline at end of file
--- a/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/MorphologyCache.java Wed Nov 09 15:32:05 2011 +0100 +++ b/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/MorphologyCache.java Wed Dec 14 12:48:43 2011 +0100 @@ -64,14 +64,19 @@ LOGGER.info("Morphology db cache: closed"); } - public ArrayList<Lemma> getLemmasByFormName(String lang, String formNameArg, boolean normalize) throws ApplicationException { + /* + public ArrayList<Lemma> getLemmasByFormName(String lang, String formName, boolean normalize) throws ApplicationException { + return getLemmasByFormName(lang, formName, normalize, Normalizer.DISPLAY); + } + */ + + public ArrayList<Lemma> getLemmasByFormName(String lang, String formNameArg, int normMode) throws ApplicationException { String language = Language.getInstance().getLanguageId(lang); ArrayList<Lemma> retFormLemmas = null; String formName = formNameArg; - if (normalize) { - Normalizer normalizer = new Normalizer(language); - formName = normalizer.normalize(formNameArg); - } + Normalizer normalizer = new Normalizer(language); + normalizer.setNormMode(normMode); + formName = normalizer.normalize(formNameArg); // first look in local cache String key = language + "###" + formName; Hashtable<String, Lemma> formLemmasHashtable = forms.get(key); @@ -114,13 +119,12 @@ return retFormLemmas; } - public Lemma getLemma(String lang, String lemmaNameArg, boolean normalize) throws ApplicationException { + public Lemma getLemma(String lang, String lemmaNameArg, int normMode) throws ApplicationException { String language = Language.getInstance().getLanguageId(lang); String lemmaName = lemmaNameArg; - if (normalize) { - Normalizer normalizer = new Normalizer(language); - lemmaName = normalizer.normalize(lemmaNameArg); - } + Normalizer normalizer = new Normalizer(language); + normalizer.setNormMode(normMode); + lemmaName = normalizer.normalize(lemmaNameArg); // first look in local cache String key = language + "###" + lemmaName; Lemma lemma = lemmas.get(key); @@ -138,7 +142,7 @@ return lemma; } - public ArrayList<Form> getFormsByLuceneQuery(String lang, String luceneQueryString, boolean normalize) throws ApplicationException { + public ArrayList<Form> getFormsByLuceneQuery(String lang, String luceneQueryString, int normMode) throws ApplicationException { String language = Language.getInstance().getLanguageId(lang); ArrayList<Form> result = new ArrayList<Form>(); luceneQueryString = luceneQueryString.toLowerCase(); @@ -146,19 +150,18 @@ if (! (formsFromQuery == null || formsFromQuery.isEmpty())) { for (int i=0; i<formsFromQuery.size(); i++) { String formStr = formsFromQuery.get(i); - if (normalize) { - Normalizer normalizer = new Normalizer(language); - formStr = normalizer.normalize(formStr); - } + Normalizer normalizer = new Normalizer(language); + normalizer.setNormMode(normMode); + formStr = normalizer.normalize(formStr); ArrayList<Lemma> formLemmas = null; // lemma mode: if formName contains "lemmalemma" then the lemma itself is fetched if (formStr.startsWith("lemmalemma")) { formLemmas = new ArrayList<Lemma>(); String lemmaName = formStr.substring(10); - Lemma lemma = getLemma(language, lemmaName, false); + Lemma lemma = getLemma(language, lemmaName, Normalizer.NONE); formLemmas.add(lemma); } else { - formLemmas = getLemmasByFormName(language, formStr, false); + formLemmas = getLemmasByFormName(language, formStr, normMode); } if (formLemmas != null && ! formLemmas.isEmpty()) { for (int j=0; j<formLemmas.size(); j++) { @@ -172,7 +175,7 @@ return result; } - public ArrayList<Lemma> getLemmasByLuceneQuery(String lang, String luceneQueryString, boolean normalize) throws ApplicationException { + public ArrayList<Lemma> getLemmasByLuceneQuery(String lang, String luceneQueryString, int normMode) throws ApplicationException { String language = Language.getInstance().getLanguageId(lang); Hashtable<String, Lemma> lemmas = new Hashtable<String, Lemma>(); luceneQueryString = luceneQueryString.toLowerCase(); @@ -180,19 +183,18 @@ if (! (formsFromQuery == null || formsFromQuery.isEmpty())) { for (int i=0; i<formsFromQuery.size(); i++) { String formStr = formsFromQuery.get(i); - if (normalize) { - Normalizer normalizer = new Normalizer(language); - formStr = normalizer.normalize(formStr); - } + Normalizer normalizer = new Normalizer(language); + normalizer.setNormMode(normMode); + formStr = normalizer.normalize(formStr); ArrayList<Lemma> formLemmas = null; // lemma mode: if formName starts with "lemmalemma" then the lemma itself is fetched if (formStr.startsWith("lemmalemma")) { formLemmas = new ArrayList<Lemma>(); String lemmaName = formStr.substring(10); - Lemma lemma = getLemma(language, lemmaName, false); + Lemma lemma = getLemma(language, lemmaName, Normalizer.NONE); formLemmas.add(lemma); } else { - formLemmas = getLemmasByFormName(language, formStr, false); + formLemmas = getLemmasByFormName(language, formStr, normMode); } if (formLemmas != null) { for (int j=0; j<formLemmas.size(); j++) { @@ -223,13 +225,13 @@ Hashtable<String, String> indexKeys = new Hashtable<String, String>(); for (int j=0; j<lemmaNames.size(); j++) { String lemmaName = lemmaNames.get(j); - Lemma lemma = getLemma(language, lemmaName, false); + Lemma lemma = getLemma(language, lemmaName, Normalizer.NONE); indexKeys.put(lemmaName, lemmaName); if (lemma != null) { ArrayList<Form> lemmaForms = lemma.getFormsList(); for (int k=0; k<lemmaForms.size(); k++) { Form form = lemmaForms.get(k); - ArrayList<Lemma> fLemmas = getLemmasByFormName(language, form.getFormName(), false); + ArrayList<Lemma> fLemmas = getLemmasByFormName(language, form.getFormName(), Normalizer.NONE); if (fLemmas != null) { String indexKey = ""; if (fLemmas.size() == 1) {
--- a/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/Normalizer.java Wed Nov 09 15:32:05 2011 +0100 +++ b/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/norm/Normalizer.java Wed Dec 14 12:48:43 2011 +0100 @@ -19,6 +19,7 @@ import de.mpg.mpiwg.berlin.mpdl.lt.text.reg.RegularizationManager; public class Normalizer { + public static int NONE = -1; // no normalization public static int DISPLAY = 1; // normalization in DISPLAY mode public static int DICTIONARY = 2; // normalization in DICTIONARY mode public static int SEARCH = 3; // normalization in SEARCH mode; never used so far in indexing because it does not support the morph. lexicons such as CELEX (e.g. eingeschränkt would not be stemmed to eingeschraenkt)
--- a/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/XmlTokenizerContentHandler.java Wed Nov 09 15:32:05 2011 +0100 +++ b/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/XmlTokenizerContentHandler.java Wed Dec 14 12:48:43 2011 +0100 @@ -12,6 +12,7 @@ import de.mpg.mpiwg.berlin.mpdl.lt.general.Language; import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form; import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma; +import de.mpg.mpiwg.berlin.mpdl.lt.text.norm.Normalizer; import de.mpg.mpiwg.berlin.mpdl.util.StringUtils; public class XmlTokenizerContentHandler implements ContentHandler { @@ -329,7 +330,7 @@ ArrayList<Lemma> lemmas = null; if (withForms() || withLemmas()) { LexHandler lexHandler = LexHandler.getInstance(); - lemmas = lexHandler.getLemmas(wordForm, "form", language, "none"); + lemmas = lexHandler.getLemmas(wordForm, "form", language, Normalizer.NONE); } wordTag = insertWordTags(origWordFormDeresolved, wordForm, language, null, lemmas); return wordTag;
--- a/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/test/TestLocal.java Wed Nov 09 15:32:05 2011 +0100 +++ b/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/test/TestLocal.java Wed Dec 14 12:48:43 2011 +0100 @@ -16,6 +16,7 @@ import de.mpg.mpiwg.berlin.mpdl.lt.dict.app.LexiconEntry; import de.mpg.mpiwg.berlin.mpdl.lt.dict.db.LexHandler; import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma; +import de.mpg.mpiwg.berlin.mpdl.lt.text.norm.Normalizer; import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.Token; import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.Tokenizer; import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.XmlTokenizer; @@ -27,10 +28,10 @@ try { TestLocal test = new TestLocal(); test.init(); - // test.testCalls(); + test.testCalls(); // test.tokenizeString(); // test.tokenizeXmlFragment(); - test.getLexEntriesByLexiconBeginningWith("ls", "a"); + // test.getLexEntriesByLexiconBeginningWith("ls", "a"); // test.end(); } catch (Exception e) { e.printStackTrace(); @@ -93,21 +94,21 @@ } private void testCalls() throws ApplicationException { - String query = "sum quibus"; - String language = "lat"; + String query = "vergewissernd"; + String language = "deu"; // String query = "ἱκανῶσ"; // String language = "el"; String inputType = "form"; String outputType = null; String outputFormat = "html"; String dictionaryName = null; - String normalization = "norm"; - getLexEntries(query, language, inputType, outputType, outputFormat, dictionaryName, normalization); + int normMode = Normalizer.DICTIONARY; + getLexEntries(query, language, inputType, outputType, outputFormat, dictionaryName, normMode); } - private void getLexEntries(String query, String language, String inputType , String outputType, String outputFormat, String dictionaryName, String normalization) throws ApplicationException { - ArrayList<Lemma> lemmas = lexHandler.getLemmas(query, inputType, language, normalization); - ArrayList<Lexicon> dictionaries = lexHandler.getLexEntries(lemmas, language, dictionaryName); + private void getLexEntries(String query, String language, String inputType , String outputType, String outputFormat, String dictionaryName, int normMode) throws ApplicationException { + ArrayList<Lemma> lemmas = lexHandler.getLemmas(query, inputType, language, normMode); + ArrayList<Lexicon> dictionaries = lexHandler.getLexEntries(lemmas, language, dictionaryName, query); // String result = lexHandler.getLexEntries(query, language, inputType, outputType, outputFormat, dictionaryName, normalization); String result = ""; result = result + "<dictionaries>"; @@ -120,12 +121,12 @@ } private void getLexEntriesByLexiconBeginningWith(String lexiconName, String prefix) throws ApplicationException { - ArrayList<Lexicon> lexEntries = lexHandler.getLexEntriesByLexiconBeginningWith(lexiconName, prefix, 1); + ArrayList<Lexicon> lexEntries = lexHandler.getLexEntriesByLexiconBeginningWith(lexiconName, prefix, 1, 50); System.out.println(lexEntries); } private void getLexEntriesBeginningWith(String language, String prefix) throws ApplicationException { - ArrayList<Lexicon> lexEntries = lexHandler.getLexEntriesBeginningWith(language, prefix, 1); + ArrayList<Lexicon> lexEntries = lexHandler.getLexEntriesBeginningWith(language, prefix, 1, 50); System.out.println(lexEntries); } }