comparison software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/XmlTokenizerContentHandler.java @ 20:7d6d969b10cf

little corrections
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 14 Dec 2011 12:48:43 +0100
parents 4a3641ae14d2
children e845310098ba
comparison
equal deleted inserted replaced
19:4a3641ae14d2 20:7d6d969b10cf
10 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; 10 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
11 import de.mpg.mpiwg.berlin.mpdl.lt.dict.db.LexHandler; 11 import de.mpg.mpiwg.berlin.mpdl.lt.dict.db.LexHandler;
12 import de.mpg.mpiwg.berlin.mpdl.lt.general.Language; 12 import de.mpg.mpiwg.berlin.mpdl.lt.general.Language;
13 import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form; 13 import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form;
14 import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma; 14 import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma;
15 import de.mpg.mpiwg.berlin.mpdl.lt.text.norm.Normalizer;
15 import de.mpg.mpiwg.berlin.mpdl.util.StringUtils; 16 import de.mpg.mpiwg.berlin.mpdl.util.StringUtils;
16 17
17 public class XmlTokenizerContentHandler implements ContentHandler { 18 public class XmlTokenizerContentHandler implements ContentHandler {
18 private static String COMPLEX_ELEMENT_MARK = new Character('\u2425').toString(); // word delimiting element 19 private static String COMPLEX_ELEMENT_MARK = new Character('\u2425').toString(); // word delimiting element
19 private static String COMPLEX_ELEMENT_NWD_MARK = new Character('\u2424').toString(); // not word delimiting element 20 private static String COMPLEX_ELEMENT_NWD_MARK = new Character('\u2424').toString(); // not word delimiting element
327 wordForm = wordForm.toLowerCase(); 328 wordForm = wordForm.toLowerCase();
328 String origWordFormDeresolved = StringUtils.deresolveXmlEntities(origWordForm); 329 String origWordFormDeresolved = StringUtils.deresolveXmlEntities(origWordForm);
329 ArrayList<Lemma> lemmas = null; 330 ArrayList<Lemma> lemmas = null;
330 if (withForms() || withLemmas()) { 331 if (withForms() || withLemmas()) {
331 LexHandler lexHandler = LexHandler.getInstance(); 332 LexHandler lexHandler = LexHandler.getInstance();
332 lemmas = lexHandler.getLemmas(wordForm, "form", language, "none"); 333 lemmas = lexHandler.getLemmas(wordForm, "form", language, Normalizer.NONE);
333 } 334 }
334 wordTag = insertWordTags(origWordFormDeresolved, wordForm, language, null, lemmas); 335 wordTag = insertWordTags(origWordFormDeresolved, wordForm, language, null, lemmas);
335 return wordTag; 336 return wordTag;
336 } 337 }
337 338