comparison src/de/mpiwg/dwinter/lucencetools/analyzer/LanguageAnalyzers.java @ 0:dc7622afcfea default tip

initial
author dwinter
date Wed, 03 Nov 2010 12:33:16 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dc7622afcfea
1 /* */ package de.mpiwg.dwinter.lucencetools.analyzer;
2 /* */
3 /* */ import de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer.DonatusAnalyzer;
4 /* */ import de.mpiwg.dwinter.lucencetools.documents.MorphDocument;
5 /* */ import java.io.IOException;
6 /* */ import java.io.PrintStream;
7 /* */ import java.util.ArrayList;
8 /* */ import java.util.HashMap;
9 /* */ import java.util.Iterator;
10 /* */ import org.apache.lucene.document.Document;
11 /* */ import org.apache.lucene.index.CorruptIndexException;
12 /* */ import org.apache.lucene.index.IndexReader;
13 /* */ import org.apache.lucene.index.IndexWriter;
14 /* */ import org.apache.lucene.index.Term;
15 /* */ import org.apache.lucene.search.Collector;
16 /* */ import org.apache.lucene.search.IndexSearcher;
17 /* */ import org.apache.lucene.search.TermQuery;
18 /* */ import org.apache.lucene.search.TopScoreDocCollector;
19 /* */
20 /* */ public class LanguageAnalyzers extends ArrayList<LanguageAnalyzer>
21 /* */ {
22 /* */ private static final long serialVersionUID = 2L;
23 /* */ private static final int MAX_HITS_PER_PAGE = 10;
24 /* */
25 /* */ public void optimize()
26 /* */ throws CorruptIndexException, IOException
27 /* */ {
28 /* 43 */ Iterator indexWriter = iterator();
29 /* 44 */ while (indexWriter.hasNext())
30 /* */ {
31 /* 46 */ ((LanguageAnalyzer)indexWriter.next()).writer.optimize();
32 /* */ }
33 /* */ }
34 /* */
35 /* */ public void close()
36 /* */ throws CorruptIndexException, IOException
37 /* */ {
38 /* 57 */ Iterator indexWriter = iterator();
39 /* 58 */ while (indexWriter.hasNext())
40 /* */ {
41 /* 60 */ ((LanguageAnalyzer)indexWriter.next()).writer.close();
42 /* */ }
43 /* */ }
44 /* */
45 /* */ public void addDocument(Document document, String lang)
46 /* */ throws CorruptIndexException, IOException
47 /* */ {
48 /* 74 */ LanguageAnalyzer analyzer = getAnalyzer(lang);
49 /* */
50 /* 76 */ if (analyzer != null)
51 /* */ {
52 /* 78 */ analyzer.writer.addDocument(document);
53 /* */
54 /* 81 */ if (!DonatusAnalyzer.class.isInstance(analyzer.analyzer))
55 /* */ return;
56 /* 83 */ StringBuffer mp = DonatusAnalyzer.morphFile;
57 /* */
58 /* 86 */ LanguageAnalyzer mpAnalyzer = getAnalyzer("morph");
59 /* */
60 /* 88 */ mpAnalyzer.writer.addDocument(MorphDocument.Document(mp.toString(), document.get("cleanedPath")));
61 /* */ }
62 /* */ else
63 /* */ {
64
65 System.err.println("addDocument: cannot add the document language " + lang + " not known!");
66 /* */ }
67 /* */ }
68 /* */
69 /* */ public LanguageAnalyzer getAnalyzer(String lang)
70 /* */ {
71 /* 106 */ for (LanguageAnalyzer analyzer : this)
72 /* 107 */ if (analyzer.lang.equals(lang))
73 /* 108 */ return analyzer;
74 /* 109 */ return null;
75 /* */ }
76 /* */ public void deleteDocuments(Term term) throws CorruptIndexException, IOException {
77 /* 112 */ for (LanguageAnalyzer analyzer : this)
78 /* 113 */ analyzer.writer.deleteDocuments(term);
79 /* */ }
80 /* */
81 /* */ public void deleteDocuments(TermQuery query) throws CorruptIndexException, IOException
82 /* */ {
83 /* 118 */ for (LanguageAnalyzer analyzer : this)
84 /* 119 */ analyzer.writer.deleteDocuments(query);
85 /* */ }
86 /* */
87 /* */ public void deleteDocument(int id) throws CorruptIndexException, IOException
88 /* */ {
89 /* 124 */ for (LanguageAnalyzer analyzer : this)
90 /* 125 */ analyzer.reader.deleteDocument(id);
91 /* */ }
92 /* */
93 /* */ public HashMap<String, Collector> search(TermQuery query) throws IOException {
94 /* 129 */ HashMap collectors = new HashMap();
95 /* 130 */ for (LanguageAnalyzer analyzer : this)
96 /* */ {
97 /* 132 */ if (analyzer.searcher == null)
98 /* */ continue;
99 /* 134 */ Collector col = TopScoreDocCollector.create(10, false);
100 /* 135 */ analyzer.searcher.search(query, col);
101 /* 136 */ collectors.put(analyzer.lang, col);
102 /* */ }
103 /* */
104 /* 140 */ return collectors;
105 /* */ }
106 /* */ }
107
108 /* Location: /private/tmp/fulltextIndexer.jar
109 * Qualified Name: de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers
110 * JD-Core Version: 0.5.4
111 */