Mercurial > hg > fulltextIndexer
view src/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/LanguageAnalyzers.java.svn-base @ 0:dc7622afcfea default tip
initial
author | dwinter |
---|---|
date | Wed, 03 Nov 2010 12:33:16 +0100 |
parents | |
children |
line wrap: on
line source
/* */ package de.mpiwg.dwinter.lucencetools.analyzer; /* */ /* */ import de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer.DonatusAnalyzer; /* */ import de.mpiwg.dwinter.lucencetools.documents.MorphDocument; /* */ import java.io.IOException; /* */ import java.io.PrintStream; /* */ import java.util.ArrayList; /* */ import java.util.HashMap; /* */ import java.util.Iterator; /* */ import org.apache.lucene.document.Document; /* */ import org.apache.lucene.index.CorruptIndexException; /* */ import org.apache.lucene.index.IndexReader; /* */ import org.apache.lucene.index.IndexWriter; /* */ import org.apache.lucene.index.Term; /* */ import org.apache.lucene.search.Collector; /* */ import org.apache.lucene.search.IndexSearcher; /* */ import org.apache.lucene.search.TermQuery; /* */ import org.apache.lucene.search.TopScoreDocCollector; /* */ /* */ public class LanguageAnalyzers extends ArrayList<LanguageAnalyzer> /* */ { /* */ private static final long serialVersionUID = 2L; /* */ private static final int MAX_HITS_PER_PAGE = 10; /* */ /* */ public void optimize() /* */ throws CorruptIndexException, IOException /* */ { /* 43 */ Iterator indexWriter = iterator(); /* 44 */ while (indexWriter.hasNext()) /* */ { /* 46 */ ((LanguageAnalyzer)indexWriter.next()).writer.optimize(); /* */ } /* */ } /* */ /* */ public void close() /* */ throws CorruptIndexException, IOException /* */ { /* 57 */ Iterator indexWriter = iterator(); /* 58 */ while (indexWriter.hasNext()) /* */ { /* 60 */ ((LanguageAnalyzer)indexWriter.next()).writer.close(); /* */ } /* */ } /* */ /* */ public void addDocument(Document document, String lang) /* */ throws CorruptIndexException, IOException /* */ { /* 74 */ LanguageAnalyzer analyzer = getAnalyzer(lang); /* */ /* 76 */ if (analyzer != null) /* */ { /* 78 */ analyzer.writer.addDocument(document); /* */ /* 81 */ if (!DonatusAnalyzer.class.isInstance(analyzer.analyzer)) /* */ return; /* 83 */ StringBuffer mp = DonatusAnalyzer.morphFile; /* */ /* 86 */ LanguageAnalyzer mpAnalyzer = getAnalyzer("morph"); /* */ /* 88 */ mpAnalyzer.writer.addDocument(MorphDocument.Document(mp.toString(), document.get("cleanedPath"))); /* */ } /* */ else /* */ { System.err.println("addDocument: cannot add the document language " + lang + " not known!"); /* */ } /* */ } /* */ /* */ public LanguageAnalyzer getAnalyzer(String lang) /* */ { /* 106 */ for (LanguageAnalyzer analyzer : this) /* 107 */ if (analyzer.lang.equals(lang)) /* 108 */ return analyzer; /* 109 */ return null; /* */ } /* */ public void deleteDocuments(Term term) throws CorruptIndexException, IOException { /* 112 */ for (LanguageAnalyzer analyzer : this) /* 113 */ analyzer.writer.deleteDocuments(term); /* */ } /* */ /* */ public void deleteDocuments(TermQuery query) throws CorruptIndexException, IOException /* */ { /* 118 */ for (LanguageAnalyzer analyzer : this) /* 119 */ analyzer.writer.deleteDocuments(query); /* */ } /* */ /* */ public void deleteDocument(int id) throws CorruptIndexException, IOException /* */ { /* 124 */ for (LanguageAnalyzer analyzer : this) /* 125 */ analyzer.reader.deleteDocument(id); /* */ } /* */ /* */ public HashMap<String, Collector> search(TermQuery query) throws IOException { /* 129 */ HashMap collectors = new HashMap(); /* 130 */ for (LanguageAnalyzer analyzer : this) /* */ { /* 132 */ if (analyzer.searcher == null) /* */ continue; /* 134 */ Collector col = TopScoreDocCollector.create(10, false); /* 135 */ analyzer.searcher.search(query, col); /* 136 */ collectors.put(analyzer.lang, col); /* */ } /* */ /* 140 */ return collectors; /* */ } /* */ } /* Location: /private/tmp/fulltextIndexer.jar * Qualified Name: de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers * JD-Core Version: 0.5.4 */