comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlStemmer.java @ 10:59ff47d1e237

TEI Unterst?tzung, Fehlerbehebungen, externe Objekte
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Fri, 11 Mar 2011 13:33:26 +0100
parents 408254cf2f1d
children
comparison
equal deleted inserted replaced
9:1ec29fdd0db8 10:59ff47d1e237
46 String lemmaName = lemma.getLemmaName(); 46 String lemmaName = lemma.getLemmaName();
47 stem = stem + "+++" + lemmaName; // e.g. "+++edo+++sum" 47 stem = stem + "+++" + lemmaName; // e.g. "+++edo+++sum"
48 } 48 }
49 } 49 }
50 } 50 }
51 // if not found in MorphologyCache use Snowball 51 // if not found then use the term itself as the stem
52 if (stem == null) { 52 if (stem == null) {
53 stem = term;
54 /* Snowball stemming: if not found in MorphologyCache use Snowball
53 stem = stemBySnowball(term, language); 55 stem = stemBySnowball(term, language);
54 // if term is not equal to the base form and also the stem is not too short (> 2 characters) then add this Snowball form to the dynamic morphology cache 56 // if term is not equal to the base form and also the stem is not too short (> 2 characters) then add this Snowball form to the dynamic morphology cache
55 if ((! stem.equals(term)) && stem.length() > 2) { 57 if ((! stem.equals(term)) && stem.length() > 2) {
56 try { 58 try {
57 MorphologyCache morphologyCache = MorphologyCache.getInstance(); 59 MorphologyCache morphologyCache = MorphologyCache.getInstance();
62 } 64 }
63 } catch (ApplicationException e) { 65 } catch (ApplicationException e) {
64 Logger.getLogger(MpdlStemmer.class).warn("MorphologyCache: an exception was caught while indexing a document: " + e.getMessage(), e); 66 Logger.getLogger(MpdlStemmer.class).warn("MorphologyCache: an exception was caught while indexing a document: " + e.getMessage(), e);
65 } 67 }
66 } 68 }
69 */
67 } 70 }
68 return stem; 71 return stem;
69 } 72 }
70 73
71 private String stemBySnowball(String term, String language) { 74 private String stemBySnowball(String term, String language) {