Mercurial > hg > mpdl-group
comparison software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/dict/db/LexHandler.java @ 23:e845310098ba
diverse Korrekturen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 27 Nov 2012 12:35:19 +0100 |
parents | 7d6d969b10cf |
children |
comparison
equal
deleted
inserted
replaced
22:6a45a982c333 | 23:e845310098ba |
---|---|
47 Lexicon lexicon = lexicons.get(i); | 47 Lexicon lexicon = lexicons.get(i); |
48 String lexiconName = lexicon.getName(); | 48 String lexiconName = lexicon.getName(); |
49 dbEnvLexica.closeDatabase(lexiconName); | 49 dbEnvLexica.closeDatabase(lexiconName); |
50 } | 50 } |
51 dbEnvLexica.close(); | 51 dbEnvLexica.close(); |
52 LOGGER.info("Lexicon db cache: closed"); | 52 LOGGER.info("Lexicon cache: db closed"); |
53 dbEnvLexica = null; | |
54 instance = null; | |
53 } | 55 } |
54 | 56 |
55 /** | 57 /** |
56 * @param query | 58 * @param query |
57 * @param type | 59 * @param type |
58 * @param language | 60 * @param language |
59 * @param normalization | 61 * @param normalization |
60 * @return lemmas | 62 * @return lemmas |
61 * @throws ApplicationException | 63 * @throws ApplicationException |
62 */ | 64 */ |
63 public ArrayList<Lemma> getLemmas(String query, String type, String language, int normMode) throws ApplicationException { | 65 public ArrayList<Lemma> getLemmas(String query, String type, String language, int normMode, boolean atLeastOneLemmaWithWordForm) throws ApplicationException { |
64 ArrayList<Lemma> lexLemmas = new ArrayList<Lemma>(); | 66 ArrayList<Lemma> lexLemmas = new ArrayList<Lemma>(); |
65 // get lemmas of all forms in query | 67 // get lemmas of all forms in query |
66 MorphologyCache morphologyCache = MorphologyCache.getInstance(); | 68 MorphologyCache morphologyCache = MorphologyCache.getInstance(); |
67 String[] queryForms = query.split(" "); | 69 String[] queryForms = query.split(" "); |
68 for (int k=0; k<queryForms.length; k++) { | 70 for (int k=0; k<queryForms.length; k++) { |
77 lemmas.add(l); | 79 lemmas.add(l); |
78 } | 80 } |
79 if (lemmas != null && ! lemmas.isEmpty()) { | 81 if (lemmas != null && ! lemmas.isEmpty()) { |
80 lexLemmas.addAll(lemmas); | 82 lexLemmas.addAll(lemmas); |
81 } else { | 83 } else { |
82 Lemma l = new Lemma("created dynamically cause no lemma is available", language, queryForm); // at least the word form is added for finding it in the lexicon | 84 if (atLeastOneLemmaWithWordForm) { |
83 lexLemmas.add(l); | 85 Lemma l = new Lemma("created dynamically cause no lemma is available", language, queryForm); // at least the word form is added for finding it in the lexicon |
86 lexLemmas.add(l); | |
87 } | |
84 } | 88 } |
85 } | 89 } |
86 Collections.sort(lexLemmas); | 90 Collections.sort(lexLemmas); |
87 if (lexLemmas.isEmpty()) | 91 if (lexLemmas.isEmpty()) |
88 return null; | 92 return null; |
168 | 172 |
169 public boolean hasLexEntryKey(String formName, String language) throws ApplicationException { | 173 public boolean hasLexEntryKey(String formName, String language) throws ApplicationException { |
170 boolean hasLexEntry = false; | 174 boolean hasLexEntry = false; |
171 if (language.equals("zh")) // each chinese character always has a lexicon entry | 175 if (language.equals("zh")) // each chinese character always has a lexicon entry |
172 return true; | 176 return true; |
173 ArrayList<Lexicon> statLexicons = Lexica.getInstance().getLocalLexicons(language); | 177 String[] lexiconNames = Lexica.getInstance().getLocalLexiconNames(language); |
174 if (statLexicons != null) { | 178 if (lexiconNames != null) { |
175 for (int i=0; i<statLexicons.size(); i++) { | 179 for (int i=0; i<lexiconNames.length; i++) { |
176 Lexicon lexicon = statLexicons.get(i).clone(); // clone without lexicon entries | 180 String lexiconName = lexiconNames[i]; |
177 LexiconEntry lexEntry = readEntry(lexicon.getName(), formName); | 181 hasLexEntry = hasKey(lexiconName, formName); |
178 if (lexEntry != null) { | 182 if (hasLexEntry) { |
179 return true; | 183 return true; |
180 } | 184 } |
181 } | 185 } |
182 } | 186 } |
183 return hasLexEntry; | 187 return hasLexEntry; |
239 lexEntry = lexicon.getDynamicEntry(formName); | 243 lexEntry = lexicon.getDynamicEntry(formName); |
240 } | 244 } |
241 return lexEntry; | 245 return lexEntry; |
242 } | 246 } |
243 | 247 |
248 private boolean hasKey(String lexiconName, String formName) throws ApplicationException { | |
249 boolean hasKey = false; | |
250 try { | |
251 String keyStr = formName; | |
252 DatabaseEntry dbEntryKey = new DatabaseEntry(keyStr.getBytes("utf-8")); | |
253 Database lexDB = dbEnvLexica.getLexiconDB(lexiconName); | |
254 Cursor cursor = lexDB.openCursor(null, null); | |
255 DatabaseEntry foundValue = new DatabaseEntry(); | |
256 foundValue.setPartial(0, 0, true); // more performance: the value is not fetched: only the key is fetched | |
257 OperationStatus operationStatus = cursor.getSearchKey(dbEntryKey, foundValue, LockMode.DEFAULT); | |
258 if (operationStatus == OperationStatus.SUCCESS) { | |
259 hasKey = true; | |
260 } | |
261 cursor.close(); | |
262 } catch (DatabaseException e) { | |
263 throw new ApplicationException(e); | |
264 } catch (UnsupportedEncodingException e) { | |
265 throw new ApplicationException(e); | |
266 } | |
267 return hasKey; | |
268 } | |
269 | |
244 private LexiconEntry readEntry(String lexiconName, String formName) throws ApplicationException { | 270 private LexiconEntry readEntry(String lexiconName, String formName) throws ApplicationException { |
245 LexiconEntry retLexEntry = null; | 271 LexiconEntry retLexEntry = null; |
246 try { | 272 try { |
247 String dbFoundValueStr = null; | 273 String dbFoundValueStr = null; |
248 String keyStr = formName; | 274 String keyStr = formName; |
335 for (int i=0; i<lexicons.size(); i++) { | 361 for (int i=0; i<lexicons.size(); i++) { |
336 Lexicon lexicon = lexicons.get(i); | 362 Lexicon lexicon = lexicons.get(i); |
337 String lexiconName = lexicon.getName(); | 363 String lexiconName = lexicon.getName(); |
338 dbEnvLexica.openDatabase(lexiconName); | 364 dbEnvLexica.openDatabase(lexiconName); |
339 } | 365 } |
340 LOGGER.info("Lexicon db cache: opened"); | 366 LOGGER.info("Lexicon cache: db opened read only"); |
341 } | 367 } |
342 | 368 |
343 private void readSampleData() throws ApplicationException { | 369 private void readSampleData() throws ApplicationException { |
344 // List<String> dbNames = dbEnvLexica.getEnv().getDatabaseNames(); | 370 // List<String> dbNames = dbEnvLexica.getEnv().getDatabaseNames(); |
345 String l1 = readEntry("autenrieth", "au)to/s").getContent(); // greek: see also bonitz and lsj | 371 String l1 = readEntry("autenrieth", "au)to/s").getContent(); // greek: see also bonitz and lsj |