Mercurial > hg > fulltextSearch
diff src/de/mpiwg/dwinter/fulltext/search/cli/Search.java @ 1:5c9c31510f0c
CLOSED - # 16: Zeige nur eine konfigurierbare Anzahl von Treffern an.
https://it-dev.mpiwg-berlin.mpg.de/tracs/pythonOcropusTools/ticket/16
author | dwinter |
---|---|
date | Wed, 03 Nov 2010 12:26:29 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltext/search/cli/Search.java Wed Nov 03 12:26:29 2010 +0100 @@ -0,0 +1,113 @@ +package de.mpiwg.dwinter.fulltext.search.cli; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Date; +import java.util.HashMap; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; + +import org.apache.lucene.document.Document; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopDocsCollector; +import org.xml.sax.SAXException; + +import de.mpiwg.dwinter.fulltext.search.FulltextSearch; +import de.mpiwg.dwinter.fulltext.searcher.LanguageSearcher; + +public class Search { + + /** + * @param args + * @throws IOException + * @throws ParseException + * @throws SAXException + * @throws ParserConfigurationException + * @throws XPathExpressionException + */ + public static void main(String[] args) throws IOException, ParseException, XPathExpressionException, ParserConfigurationException, SAXException { + + String usage = "search <index_dir>"; + if (args.length != 1) { + System.err.println("Usage: " + usage); + System.exit(1); + } + + String queries = null; + FulltextSearch fulltextSearcher = new FulltextSearch(new File(args[0])); + + BufferedReader in = null; + if (queries != null) { + in = new BufferedReader(new FileReader(queries)); + } else { + in = new BufferedReader(new InputStreamReader(System.in)); + } + + while (true) { + if (queries == null) // prompt the user + System.out.println("Enter query: "); + + String line = in.readLine(); + + if (line == null || line.length() == -1) + break; + + line = line.trim(); + if (line.length() == 0) + break; + + System.out.println("line: "+line); + + HashMap<String, Collector> resultHash = fulltextSearcher.languageSearchers.parseAndsearchAllLanguages(line); + + for (String lang: resultHash.keySet()) + { + LanguageSearcher searcher = fulltextSearcher.languageSearchers.getSearcherByLanguage(lang); + TopDocsCollector col = (TopDocsCollector<ScoreDoc>)resultHash.get(lang); + + System.out.println(lang+":"+col.getTotalHits() + " total matching documents"); + + TopDocs td= col.topDocs(); + ScoreDoc[] docs = td.scoreDocs; + final int HITS_PER_PAGE = 10; + for (int start = 0; start < col.getTotalHits(); start += HITS_PER_PAGE) { + int end = Math.min(col.getTotalHits(), start + HITS_PER_PAGE); + + System.out.println("Start:"+String.valueOf(start)); + for(int i = start;i<(start+ HITS_PER_PAGE);i++){ + //for (ScoreDoc scoreDoc:Adocs) { + ScoreDoc scoreDoc = docs[i]; + Document doc = searcher.searcher.doc(scoreDoc.doc); + + String path = doc.get("path"); + String textId = doc.get("textId"); + + System.out.println("path:"+path); + System.out.println("docnr:"+String.valueOf(scoreDoc.doc)); + System.out.println("textId:"+textId); + } + if (queries != null) // non-interactive + break; + + if (col.getTotalHits() > end) { + System.out.println("more (y/n) ? "); + line = in.readLine(); + if (line.length() == 0 || line.charAt(0) == 'n') + break; + } + } + } + } + fulltextSearcher.languageSearchers.close(); + } +} +