comparison software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/dict/db/LexHandler.java @ 23:e845310098ba

diverse Korrekturen
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 27 Nov 2012 12:35:19 +0100
parents 7d6d969b10cf
children
comparison
equal deleted inserted replaced
22:6a45a982c333 23:e845310098ba
47 Lexicon lexicon = lexicons.get(i); 47 Lexicon lexicon = lexicons.get(i);
48 String lexiconName = lexicon.getName(); 48 String lexiconName = lexicon.getName();
49 dbEnvLexica.closeDatabase(lexiconName); 49 dbEnvLexica.closeDatabase(lexiconName);
50 } 50 }
51 dbEnvLexica.close(); 51 dbEnvLexica.close();
52 LOGGER.info("Lexicon db cache: closed"); 52 LOGGER.info("Lexicon cache: db closed");
53 dbEnvLexica = null;
54 instance = null;
53 } 55 }
54 56
55 /** 57 /**
56 * @param query 58 * @param query
57 * @param type 59 * @param type
58 * @param language 60 * @param language
59 * @param normalization 61 * @param normalization
60 * @return lemmas 62 * @return lemmas
61 * @throws ApplicationException 63 * @throws ApplicationException
62 */ 64 */
63 public ArrayList<Lemma> getLemmas(String query, String type, String language, int normMode) throws ApplicationException { 65 public ArrayList<Lemma> getLemmas(String query, String type, String language, int normMode, boolean atLeastOneLemmaWithWordForm) throws ApplicationException {
64 ArrayList<Lemma> lexLemmas = new ArrayList<Lemma>(); 66 ArrayList<Lemma> lexLemmas = new ArrayList<Lemma>();
65 // get lemmas of all forms in query 67 // get lemmas of all forms in query
66 MorphologyCache morphologyCache = MorphologyCache.getInstance(); 68 MorphologyCache morphologyCache = MorphologyCache.getInstance();
67 String[] queryForms = query.split(" "); 69 String[] queryForms = query.split(" ");
68 for (int k=0; k<queryForms.length; k++) { 70 for (int k=0; k<queryForms.length; k++) {
77 lemmas.add(l); 79 lemmas.add(l);
78 } 80 }
79 if (lemmas != null && ! lemmas.isEmpty()) { 81 if (lemmas != null && ! lemmas.isEmpty()) {
80 lexLemmas.addAll(lemmas); 82 lexLemmas.addAll(lemmas);
81 } else { 83 } else {
82 Lemma l = new Lemma("created dynamically cause no lemma is available", language, queryForm); // at least the word form is added for finding it in the lexicon 84 if (atLeastOneLemmaWithWordForm) {
83 lexLemmas.add(l); 85 Lemma l = new Lemma("created dynamically cause no lemma is available", language, queryForm); // at least the word form is added for finding it in the lexicon
86 lexLemmas.add(l);
87 }
84 } 88 }
85 } 89 }
86 Collections.sort(lexLemmas); 90 Collections.sort(lexLemmas);
87 if (lexLemmas.isEmpty()) 91 if (lexLemmas.isEmpty())
88 return null; 92 return null;
168 172
169 public boolean hasLexEntryKey(String formName, String language) throws ApplicationException { 173 public boolean hasLexEntryKey(String formName, String language) throws ApplicationException {
170 boolean hasLexEntry = false; 174 boolean hasLexEntry = false;
171 if (language.equals("zh")) // each chinese character always has a lexicon entry 175 if (language.equals("zh")) // each chinese character always has a lexicon entry
172 return true; 176 return true;
173 ArrayList<Lexicon> statLexicons = Lexica.getInstance().getLocalLexicons(language); 177 String[] lexiconNames = Lexica.getInstance().getLocalLexiconNames(language);
174 if (statLexicons != null) { 178 if (lexiconNames != null) {
175 for (int i=0; i<statLexicons.size(); i++) { 179 for (int i=0; i<lexiconNames.length; i++) {
176 Lexicon lexicon = statLexicons.get(i).clone(); // clone without lexicon entries 180 String lexiconName = lexiconNames[i];
177 LexiconEntry lexEntry = readEntry(lexicon.getName(), formName); 181 hasLexEntry = hasKey(lexiconName, formName);
178 if (lexEntry != null) { 182 if (hasLexEntry) {
179 return true; 183 return true;
180 } 184 }
181 } 185 }
182 } 186 }
183 return hasLexEntry; 187 return hasLexEntry;
239 lexEntry = lexicon.getDynamicEntry(formName); 243 lexEntry = lexicon.getDynamicEntry(formName);
240 } 244 }
241 return lexEntry; 245 return lexEntry;
242 } 246 }
243 247
248 private boolean hasKey(String lexiconName, String formName) throws ApplicationException {
249 boolean hasKey = false;
250 try {
251 String keyStr = formName;
252 DatabaseEntry dbEntryKey = new DatabaseEntry(keyStr.getBytes("utf-8"));
253 Database lexDB = dbEnvLexica.getLexiconDB(lexiconName);
254 Cursor cursor = lexDB.openCursor(null, null);
255 DatabaseEntry foundValue = new DatabaseEntry();
256 foundValue.setPartial(0, 0, true); // more performance: the value is not fetched: only the key is fetched
257 OperationStatus operationStatus = cursor.getSearchKey(dbEntryKey, foundValue, LockMode.DEFAULT);
258 if (operationStatus == OperationStatus.SUCCESS) {
259 hasKey = true;
260 }
261 cursor.close();
262 } catch (DatabaseException e) {
263 throw new ApplicationException(e);
264 } catch (UnsupportedEncodingException e) {
265 throw new ApplicationException(e);
266 }
267 return hasKey;
268 }
269
244 private LexiconEntry readEntry(String lexiconName, String formName) throws ApplicationException { 270 private LexiconEntry readEntry(String lexiconName, String formName) throws ApplicationException {
245 LexiconEntry retLexEntry = null; 271 LexiconEntry retLexEntry = null;
246 try { 272 try {
247 String dbFoundValueStr = null; 273 String dbFoundValueStr = null;
248 String keyStr = formName; 274 String keyStr = formName;
335 for (int i=0; i<lexicons.size(); i++) { 361 for (int i=0; i<lexicons.size(); i++) {
336 Lexicon lexicon = lexicons.get(i); 362 Lexicon lexicon = lexicons.get(i);
337 String lexiconName = lexicon.getName(); 363 String lexiconName = lexicon.getName();
338 dbEnvLexica.openDatabase(lexiconName); 364 dbEnvLexica.openDatabase(lexiconName);
339 } 365 }
340 LOGGER.info("Lexicon db cache: opened"); 366 LOGGER.info("Lexicon cache: db opened read only");
341 } 367 }
342 368
343 private void readSampleData() throws ApplicationException { 369 private void readSampleData() throws ApplicationException {
344 // List<String> dbNames = dbEnvLexica.getEnv().getDatabaseNames(); 370 // List<String> dbNames = dbEnvLexica.getEnv().getDatabaseNames();
345 String l1 = readEntry("autenrieth", "au)to/s").getContent(); // greek: see also bonitz and lsj 371 String l1 = readEntry("autenrieth", "au)to/s").getContent(); // greek: see also bonitz and lsj