0
|
1 /* */ package de.mpiwg.dwinter.lucencetools.analyzer;
|
|
2 /* */
|
|
3 /* */ import de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer.DonatusAnalyzer;
|
|
4 /* */ import de.mpiwg.dwinter.lucencetools.documents.MorphDocument;
|
|
5 /* */ import java.io.IOException;
|
|
6 /* */ import java.io.PrintStream;
|
|
7 /* */ import java.util.ArrayList;
|
|
8 /* */ import java.util.HashMap;
|
|
9 /* */ import java.util.Iterator;
|
|
10 /* */ import org.apache.lucene.document.Document;
|
|
11 /* */ import org.apache.lucene.index.CorruptIndexException;
|
|
12 /* */ import org.apache.lucene.index.IndexReader;
|
|
13 /* */ import org.apache.lucene.index.IndexWriter;
|
|
14 /* */ import org.apache.lucene.index.Term;
|
|
15 /* */ import org.apache.lucene.search.Collector;
|
|
16 /* */ import org.apache.lucene.search.IndexSearcher;
|
|
17 /* */ import org.apache.lucene.search.TermQuery;
|
|
18 /* */ import org.apache.lucene.search.TopScoreDocCollector;
|
|
19 /* */
|
|
20 /* */ public class LanguageAnalyzers extends ArrayList<LanguageAnalyzer>
|
|
21 /* */ {
|
|
22 /* */ private static final long serialVersionUID = 2L;
|
|
23 /* */ private static final int MAX_HITS_PER_PAGE = 10;
|
|
24 /* */
|
|
25 /* */ public void optimize()
|
|
26 /* */ throws CorruptIndexException, IOException
|
|
27 /* */ {
|
|
28 /* 43 */ Iterator indexWriter = iterator();
|
|
29 /* 44 */ while (indexWriter.hasNext())
|
|
30 /* */ {
|
|
31 /* 46 */ ((LanguageAnalyzer)indexWriter.next()).writer.optimize();
|
|
32 /* */ }
|
|
33 /* */ }
|
|
34 /* */
|
|
35 /* */ public void close()
|
|
36 /* */ throws CorruptIndexException, IOException
|
|
37 /* */ {
|
|
38 /* 57 */ Iterator indexWriter = iterator();
|
|
39 /* 58 */ while (indexWriter.hasNext())
|
|
40 /* */ {
|
|
41 /* 60 */ ((LanguageAnalyzer)indexWriter.next()).writer.close();
|
|
42 /* */ }
|
|
43 /* */ }
|
|
44 /* */
|
|
45 /* */ public void addDocument(Document document, String lang)
|
|
46 /* */ throws CorruptIndexException, IOException
|
|
47 /* */ {
|
|
48 /* 74 */ LanguageAnalyzer analyzer = getAnalyzer(lang);
|
|
49 /* */
|
|
50 /* 76 */ if (analyzer != null)
|
|
51 /* */ {
|
|
52 /* 78 */ analyzer.writer.addDocument(document);
|
|
53 /* */
|
|
54 /* 81 */ if (!DonatusAnalyzer.class.isInstance(analyzer.analyzer))
|
|
55 /* */ return;
|
|
56 /* 83 */ StringBuffer mp = DonatusAnalyzer.morphFile;
|
|
57 /* */
|
|
58 /* 86 */ LanguageAnalyzer mpAnalyzer = getAnalyzer("morph");
|
|
59 /* */
|
|
60 /* 88 */ mpAnalyzer.writer.addDocument(MorphDocument.Document(mp.toString(), document.get("cleanedPath")));
|
|
61 /* */ }
|
|
62 /* */ else
|
|
63 /* */ {
|
|
64
|
|
65 System.err.println("addDocument: cannot add the document language " + lang + " not known!");
|
|
66 /* */ }
|
|
67 /* */ }
|
|
68 /* */
|
|
69 /* */ public LanguageAnalyzer getAnalyzer(String lang)
|
|
70 /* */ {
|
|
71 /* 106 */ for (LanguageAnalyzer analyzer : this)
|
|
72 /* 107 */ if (analyzer.lang.equals(lang))
|
|
73 /* 108 */ return analyzer;
|
|
74 /* 109 */ return null;
|
|
75 /* */ }
|
|
76 /* */ public void deleteDocuments(Term term) throws CorruptIndexException, IOException {
|
|
77 /* 112 */ for (LanguageAnalyzer analyzer : this)
|
|
78 /* 113 */ analyzer.writer.deleteDocuments(term);
|
|
79 /* */ }
|
|
80 /* */
|
|
81 /* */ public void deleteDocuments(TermQuery query) throws CorruptIndexException, IOException
|
|
82 /* */ {
|
|
83 /* 118 */ for (LanguageAnalyzer analyzer : this)
|
|
84 /* 119 */ analyzer.writer.deleteDocuments(query);
|
|
85 /* */ }
|
|
86 /* */
|
|
87 /* */ public void deleteDocument(int id) throws CorruptIndexException, IOException
|
|
88 /* */ {
|
|
89 /* 124 */ for (LanguageAnalyzer analyzer : this)
|
|
90 /* 125 */ analyzer.reader.deleteDocument(id);
|
|
91 /* */ }
|
|
92 /* */
|
|
93 /* */ public HashMap<String, Collector> search(TermQuery query) throws IOException {
|
|
94 /* 129 */ HashMap collectors = new HashMap();
|
|
95 /* 130 */ for (LanguageAnalyzer analyzer : this)
|
|
96 /* */ {
|
|
97 /* 132 */ if (analyzer.searcher == null)
|
|
98 /* */ continue;
|
|
99 /* 134 */ Collector col = TopScoreDocCollector.create(10, false);
|
|
100 /* 135 */ analyzer.searcher.search(query, col);
|
|
101 /* 136 */ collectors.put(analyzer.lang, col);
|
|
102 /* */ }
|
|
103 /* */
|
|
104 /* 140 */ return collectors;
|
|
105 /* */ }
|
|
106 /* */ }
|
|
107
|
|
108 /* Location: /private/tmp/fulltextIndexer.jar
|
|
109 * Qualified Name: de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers
|
|
110 * JD-Core Version: 0.5.4
|
|
111 */ |