Mercurial > hg > fulltextSearch
view src/de/mpiwg/dwinter/fulltext/search/cli/Search.java @ 1:5c9c31510f0c
CLOSED - # 16: Zeige nur eine konfigurierbare Anzahl von Treffern an.
https://it-dev.mpiwg-berlin.mpg.de/tracs/pythonOcropusTools/ticket/16
author | dwinter |
---|---|
date | Wed, 03 Nov 2010 12:26:29 +0100 |
parents | |
children |
line wrap: on
line source
package de.mpiwg.dwinter.fulltext.search.cli; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.Date; import java.util.HashMap; import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPathExpressionException; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocsCollector; import org.xml.sax.SAXException; import de.mpiwg.dwinter.fulltext.search.FulltextSearch; import de.mpiwg.dwinter.fulltext.searcher.LanguageSearcher; public class Search { /** * @param args * @throws IOException * @throws ParseException * @throws SAXException * @throws ParserConfigurationException * @throws XPathExpressionException */ public static void main(String[] args) throws IOException, ParseException, XPathExpressionException, ParserConfigurationException, SAXException { String usage = "search <index_dir>"; if (args.length != 1) { System.err.println("Usage: " + usage); System.exit(1); } String queries = null; FulltextSearch fulltextSearcher = new FulltextSearch(new File(args[0])); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new FileReader(queries)); } else { in = new BufferedReader(new InputStreamReader(System.in)); } while (true) { if (queries == null) // prompt the user System.out.println("Enter query: "); String line = in.readLine(); if (line == null || line.length() == -1) break; line = line.trim(); if (line.length() == 0) break; System.out.println("line: "+line); HashMap<String, Collector> resultHash = fulltextSearcher.languageSearchers.parseAndsearchAllLanguages(line); for (String lang: resultHash.keySet()) { LanguageSearcher searcher = fulltextSearcher.languageSearchers.getSearcherByLanguage(lang); TopDocsCollector col = (TopDocsCollector<ScoreDoc>)resultHash.get(lang); System.out.println(lang+":"+col.getTotalHits() + " total matching documents"); TopDocs td= col.topDocs(); ScoreDoc[] docs = td.scoreDocs; final int HITS_PER_PAGE = 10; for (int start = 0; start < col.getTotalHits(); start += HITS_PER_PAGE) { int end = Math.min(col.getTotalHits(), start + HITS_PER_PAGE); System.out.println("Start:"+String.valueOf(start)); for(int i = start;i<(start+ HITS_PER_PAGE);i++){ //for (ScoreDoc scoreDoc:Adocs) { ScoreDoc scoreDoc = docs[i]; Document doc = searcher.searcher.doc(scoreDoc.doc); String path = doc.get("path"); String textId = doc.get("textId"); System.out.println("path:"+path); System.out.println("docnr:"+String.valueOf(scoreDoc.doc)); System.out.println("textId:"+textId); } if (queries != null) // non-interactive break; if (col.getTotalHits() > end) { System.out.println("more (y/n) ? "); line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') break; } } } } fulltextSearcher.languageSearchers.close(); } }