view src/de/mpiwg/dwinter/fulltext/search/cli/Search.java @ 1:5c9c31510f0c

CLOSED - # 16: Zeige nur eine konfigurierbare Anzahl von Treffern an. https://it-dev.mpiwg-berlin.mpg.de/tracs/pythonOcropusTools/ticket/16
author dwinter
date Wed, 03 Nov 2010 12:26:29 +0100
parents
children
line wrap: on
line source

package de.mpiwg.dwinter.fulltext.search.cli;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;
import java.util.HashMap;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathExpressionException;

import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.xml.sax.SAXException;

import de.mpiwg.dwinter.fulltext.search.FulltextSearch;
import de.mpiwg.dwinter.fulltext.searcher.LanguageSearcher;

public class Search {

	/**
	 * @param args
	 * @throws IOException 
	 * @throws ParseException 
	 * @throws SAXException 
	 * @throws ParserConfigurationException 
	 * @throws XPathExpressionException 
	 */
	public static void main(String[] args) throws IOException, ParseException, XPathExpressionException, ParserConfigurationException, SAXException {

		String usage = "search <index_dir>";
		if (args.length != 1) {
			System.err.println("Usage: " + usage);
			System.exit(1);
		}

		String queries = null;
		FulltextSearch fulltextSearcher = new FulltextSearch(new File(args[0]));

		BufferedReader in = null;
		if (queries != null) {
			in = new BufferedReader(new FileReader(queries));
		} else {
			in = new BufferedReader(new InputStreamReader(System.in));
		}

		while (true) {
			if (queries == null)                        // prompt the user
			System.out.println("Enter query: ");

			String line = in.readLine();

			if (line == null || line.length() == -1)
				break;

			line = line.trim();
			if (line.length() == 0)
				break;

			System.out.println("line: "+line);

			HashMap<String, Collector> resultHash = fulltextSearcher.languageSearchers.parseAndsearchAllLanguages(line);

			for (String lang: resultHash.keySet())
			{
				LanguageSearcher searcher = fulltextSearcher.languageSearchers.getSearcherByLanguage(lang);
				TopDocsCollector col = (TopDocsCollector<ScoreDoc>)resultHash.get(lang); 
				
				System.out.println(lang+":"+col.getTotalHits() + " total matching documents");

				TopDocs td= col.topDocs();
				ScoreDoc[] docs = td.scoreDocs;
				final int HITS_PER_PAGE = 10;
				for (int start = 0; start < col.getTotalHits(); start += HITS_PER_PAGE) {
					int end = Math.min(col.getTotalHits(), start + HITS_PER_PAGE);
					
					System.out.println("Start:"+String.valueOf(start));
					for(int i = start;i<(start+ HITS_PER_PAGE);i++){
					//for (ScoreDoc scoreDoc:Adocs) {
						ScoreDoc scoreDoc = docs[i];
						Document doc = searcher.searcher.doc(scoreDoc.doc);

						String path = doc.get("path");
						String textId = doc.get("textId");
						
						System.out.println("path:"+path);
						System.out.println("docnr:"+String.valueOf(scoreDoc.doc));
						System.out.println("textId:"+textId);
					}
					if (queries != null)                      // non-interactive
						break;

					if (col.getTotalHits() > end) {
						System.out.println("more (y/n) ? ");
						line = in.readLine();
						if (line.length() == 0 || line.charAt(0) == 'n')
							break;
					}
				}
			}
		}
		fulltextSearcher.languageSearchers.close();
	}
}