Mercurial > hg > fulltextSearch
view src/de/mpiwg/dwinter/fulltext/searcher/LanguageSearcher.java @ 1:5c9c31510f0c
CLOSED - # 16: Zeige nur eine konfigurierbare Anzahl von Treffern an.
https://it-dev.mpiwg-berlin.mpg.de/tracs/pythonOcropusTools/ticket/16
author | dwinter |
---|---|
date | Wed, 03 Nov 2010 12:26:29 +0100 |
parents | |
children | 2b29b0b6db16 |
line wrap: on
line source
package de.mpiwg.dwinter.fulltext.searcher; import java.io.File; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.util.HashMap; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.Version; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import com.sun.org.apache.xerces.internal.parsers.SAXParser; import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer; public class LanguageSearcher extends LanguageAnalyzer { static final int MAX_RESULTS = 10000; // Maximalanzahl Treffer public Searcher searcher=null; public QueryParser parser=null; public LanguageSearcher(String lang, Analyzer analyzer, File index_dir) throws CorruptIndexException, LockObtainFailedException, IOException { super(lang, analyzer, index_dir,true); searcher = new IndexSearcher(this.reader); parser = new QueryParser(Version.LUCENE_30,"contents",analyzer); } public Collector parseAndSearch(String searchString) throws ParseException, IOException { Query query= parser.parse(searchString); System.out.println("Parse and search:"+query); System.out.println("Parse and search lang:"+this.lang); TopScoreDocCollector col = TopScoreDocCollector.create(MAX_RESULTS, false); //searcher.searcher.search(query, col); searcher.search(query,col); return col; } public Collector parseAndSearch(String searchString, String mdString) throws ParseException, IOException { Query query= parser.parse(searchString +" AND dcMetaData:"+mdString); System.out.println("Parse and search:"+query); System.out.println("Parse and search lang:"+this.lang); TopScoreDocCollector col = TopScoreDocCollector.create(MAX_RESULTS, false); //searcher.searcher.search(query, col); searcher.search(query,col); return col; } // TODO: implement the donatus bits // public HashMap<String,Collector> parseAndSearchAndAnalyse(String searchString) throws ParseException, IOException // { // HashMap<String, Collector> retMap = new HashMap<String,Collector>(); // Query query= parser.parse(searchString); // System.out.println("Parse and search and analyse:"+query); // // TopScoreDocCollector col = TopScoreDocCollector.create(MAX_RESULTS, false); // //searcher.searcher.search(query, col); // searcher.search(query,col); // // String word; // // try { // // word = getNormalizedSearchString(((DonatusAnalyzer)analyzer).morphFile.toString()); // // } catch (RuntimeException e) { // // word=searchString; // } // // retMap.put(word, result); // return retMap; // // } // private String getNormalizedSearchString(String string) throws IOException { // // // XMLReader XMLparser = new SAXParser(); // ParseDonatusResultDocument ch = new ParseDonatusResultDocument(); // XMLparser.setContentHandler(ch); // // try { // System.out.println("CCCCCCCCCCC:"+string); // Reader reader = new StringReader(string); // InputSource input = new InputSource(reader); // XMLparser.parse(input); // // } catch (SAXException e) { // // TODO Auto-generated catch block // e.printStackTrace(); // } // // // String ret = ""; // // for (String lemma: ch.value.values()) // { // ret+=lemma+" "; // } // // return ret; // } }