diff src/de/mpiwg/dwinter/fulltext/search/cli/Search.java @ 1:5c9c31510f0c

CLOSED - # 16: Zeige nur eine konfigurierbare Anzahl von Treffern an. https://it-dev.mpiwg-berlin.mpg.de/tracs/pythonOcropusTools/ticket/16
author dwinter
date Wed, 03 Nov 2010 12:26:29 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/dwinter/fulltext/search/cli/Search.java	Wed Nov 03 12:26:29 2010 +0100
@@ -0,0 +1,113 @@
+package de.mpiwg.dwinter.fulltext.search.cli;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.Date;
+import java.util.HashMap;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.xpath.XPathExpressionException;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopDocsCollector;
+import org.xml.sax.SAXException;
+
+import de.mpiwg.dwinter.fulltext.search.FulltextSearch;
+import de.mpiwg.dwinter.fulltext.searcher.LanguageSearcher;
+
+public class Search {
+
+	/**
+	 * @param args
+	 * @throws IOException 
+	 * @throws ParseException 
+	 * @throws SAXException 
+	 * @throws ParserConfigurationException 
+	 * @throws XPathExpressionException 
+	 */
+	public static void main(String[] args) throws IOException, ParseException, XPathExpressionException, ParserConfigurationException, SAXException {
+
+		String usage = "search <index_dir>";
+		if (args.length != 1) {
+			System.err.println("Usage: " + usage);
+			System.exit(1);
+		}
+
+		String queries = null;
+		FulltextSearch fulltextSearcher = new FulltextSearch(new File(args[0]));
+
+		BufferedReader in = null;
+		if (queries != null) {
+			in = new BufferedReader(new FileReader(queries));
+		} else {
+			in = new BufferedReader(new InputStreamReader(System.in));
+		}
+
+		while (true) {
+			if (queries == null)                        // prompt the user
+			System.out.println("Enter query: ");
+
+			String line = in.readLine();
+
+			if (line == null || line.length() == -1)
+				break;
+
+			line = line.trim();
+			if (line.length() == 0)
+				break;
+
+			System.out.println("line: "+line);
+
+			HashMap<String, Collector> resultHash = fulltextSearcher.languageSearchers.parseAndsearchAllLanguages(line);
+
+			for (String lang: resultHash.keySet())
+			{
+				LanguageSearcher searcher = fulltextSearcher.languageSearchers.getSearcherByLanguage(lang);
+				TopDocsCollector col = (TopDocsCollector<ScoreDoc>)resultHash.get(lang); 
+				
+				System.out.println(lang+":"+col.getTotalHits() + " total matching documents");
+
+				TopDocs td= col.topDocs();
+				ScoreDoc[] docs = td.scoreDocs;
+				final int HITS_PER_PAGE = 10;
+				for (int start = 0; start < col.getTotalHits(); start += HITS_PER_PAGE) {
+					int end = Math.min(col.getTotalHits(), start + HITS_PER_PAGE);
+					
+					System.out.println("Start:"+String.valueOf(start));
+					for(int i = start;i<(start+ HITS_PER_PAGE);i++){
+					//for (ScoreDoc scoreDoc:Adocs) {
+						ScoreDoc scoreDoc = docs[i];
+						Document doc = searcher.searcher.doc(scoreDoc.doc);
+
+						String path = doc.get("path");
+						String textId = doc.get("textId");
+						
+						System.out.println("path:"+path);
+						System.out.println("docnr:"+String.valueOf(scoreDoc.doc));
+						System.out.println("textId:"+textId);
+					}
+					if (queries != null)                      // non-interactive
+						break;
+
+					if (col.getTotalHits() > end) {
+						System.out.println("more (y/n) ? ");
+						line = in.readLine();
+						if (line.length() == 0 || line.charAt(0) == 'n')
+							break;
+					}
+				}
+			}
+		}
+		fulltextSearcher.languageSearchers.close();
+	}
+}
+