Mercurial > hg > fulltextIndexer
comparison src/de/mpiwg/dwinter/lucencetools/analyzer/LanguageAnalyzers.java @ 0:dc7622afcfea default tip
initial
author | dwinter |
---|---|
date | Wed, 03 Nov 2010 12:33:16 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dc7622afcfea |
---|---|
1 /* */ package de.mpiwg.dwinter.lucencetools.analyzer; | |
2 /* */ | |
3 /* */ import de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer.DonatusAnalyzer; | |
4 /* */ import de.mpiwg.dwinter.lucencetools.documents.MorphDocument; | |
5 /* */ import java.io.IOException; | |
6 /* */ import java.io.PrintStream; | |
7 /* */ import java.util.ArrayList; | |
8 /* */ import java.util.HashMap; | |
9 /* */ import java.util.Iterator; | |
10 /* */ import org.apache.lucene.document.Document; | |
11 /* */ import org.apache.lucene.index.CorruptIndexException; | |
12 /* */ import org.apache.lucene.index.IndexReader; | |
13 /* */ import org.apache.lucene.index.IndexWriter; | |
14 /* */ import org.apache.lucene.index.Term; | |
15 /* */ import org.apache.lucene.search.Collector; | |
16 /* */ import org.apache.lucene.search.IndexSearcher; | |
17 /* */ import org.apache.lucene.search.TermQuery; | |
18 /* */ import org.apache.lucene.search.TopScoreDocCollector; | |
19 /* */ | |
20 /* */ public class LanguageAnalyzers extends ArrayList<LanguageAnalyzer> | |
21 /* */ { | |
22 /* */ private static final long serialVersionUID = 2L; | |
23 /* */ private static final int MAX_HITS_PER_PAGE = 10; | |
24 /* */ | |
25 /* */ public void optimize() | |
26 /* */ throws CorruptIndexException, IOException | |
27 /* */ { | |
28 /* 43 */ Iterator indexWriter = iterator(); | |
29 /* 44 */ while (indexWriter.hasNext()) | |
30 /* */ { | |
31 /* 46 */ ((LanguageAnalyzer)indexWriter.next()).writer.optimize(); | |
32 /* */ } | |
33 /* */ } | |
34 /* */ | |
35 /* */ public void close() | |
36 /* */ throws CorruptIndexException, IOException | |
37 /* */ { | |
38 /* 57 */ Iterator indexWriter = iterator(); | |
39 /* 58 */ while (indexWriter.hasNext()) | |
40 /* */ { | |
41 /* 60 */ ((LanguageAnalyzer)indexWriter.next()).writer.close(); | |
42 /* */ } | |
43 /* */ } | |
44 /* */ | |
45 /* */ public void addDocument(Document document, String lang) | |
46 /* */ throws CorruptIndexException, IOException | |
47 /* */ { | |
48 /* 74 */ LanguageAnalyzer analyzer = getAnalyzer(lang); | |
49 /* */ | |
50 /* 76 */ if (analyzer != null) | |
51 /* */ { | |
52 /* 78 */ analyzer.writer.addDocument(document); | |
53 /* */ | |
54 /* 81 */ if (!DonatusAnalyzer.class.isInstance(analyzer.analyzer)) | |
55 /* */ return; | |
56 /* 83 */ StringBuffer mp = DonatusAnalyzer.morphFile; | |
57 /* */ | |
58 /* 86 */ LanguageAnalyzer mpAnalyzer = getAnalyzer("morph"); | |
59 /* */ | |
60 /* 88 */ mpAnalyzer.writer.addDocument(MorphDocument.Document(mp.toString(), document.get("cleanedPath"))); | |
61 /* */ } | |
62 /* */ else | |
63 /* */ { | |
64 | |
65 System.err.println("addDocument: cannot add the document language " + lang + " not known!"); | |
66 /* */ } | |
67 /* */ } | |
68 /* */ | |
69 /* */ public LanguageAnalyzer getAnalyzer(String lang) | |
70 /* */ { | |
71 /* 106 */ for (LanguageAnalyzer analyzer : this) | |
72 /* 107 */ if (analyzer.lang.equals(lang)) | |
73 /* 108 */ return analyzer; | |
74 /* 109 */ return null; | |
75 /* */ } | |
76 /* */ public void deleteDocuments(Term term) throws CorruptIndexException, IOException { | |
77 /* 112 */ for (LanguageAnalyzer analyzer : this) | |
78 /* 113 */ analyzer.writer.deleteDocuments(term); | |
79 /* */ } | |
80 /* */ | |
81 /* */ public void deleteDocuments(TermQuery query) throws CorruptIndexException, IOException | |
82 /* */ { | |
83 /* 118 */ for (LanguageAnalyzer analyzer : this) | |
84 /* 119 */ analyzer.writer.deleteDocuments(query); | |
85 /* */ } | |
86 /* */ | |
87 /* */ public void deleteDocument(int id) throws CorruptIndexException, IOException | |
88 /* */ { | |
89 /* 124 */ for (LanguageAnalyzer analyzer : this) | |
90 /* 125 */ analyzer.reader.deleteDocument(id); | |
91 /* */ } | |
92 /* */ | |
93 /* */ public HashMap<String, Collector> search(TermQuery query) throws IOException { | |
94 /* 129 */ HashMap collectors = new HashMap(); | |
95 /* 130 */ for (LanguageAnalyzer analyzer : this) | |
96 /* */ { | |
97 /* 132 */ if (analyzer.searcher == null) | |
98 /* */ continue; | |
99 /* 134 */ Collector col = TopScoreDocCollector.create(10, false); | |
100 /* 135 */ analyzer.searcher.search(query, col); | |
101 /* 136 */ collectors.put(analyzer.lang, col); | |
102 /* */ } | |
103 /* */ | |
104 /* 140 */ return collectors; | |
105 /* */ } | |
106 /* */ } | |
107 | |
108 /* Location: /private/tmp/fulltextIndexer.jar | |
109 * Qualified Name: de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers | |
110 * JD-Core Version: 0.5.4 | |
111 */ |