comparison src/de/mpiwg/dwinter/fulltext/search/cli/Search.java @ 1:5c9c31510f0c

CLOSED - # 16: Zeige nur eine konfigurierbare Anzahl von Treffern an. https://it-dev.mpiwg-berlin.mpg.de/tracs/pythonOcropusTools/ticket/16
author dwinter
date Wed, 03 Nov 2010 12:26:29 +0100
parents
children
comparison
equal deleted inserted replaced
0:72a015318a6d 1:5c9c31510f0c
1 package de.mpiwg.dwinter.fulltext.search.cli;
2
3 import java.io.BufferedReader;
4 import java.io.File;
5 import java.io.FileReader;
6 import java.io.IOException;
7 import java.io.InputStreamReader;
8 import java.util.Date;
9 import java.util.HashMap;
10
11 import javax.xml.parsers.ParserConfigurationException;
12 import javax.xml.xpath.XPathExpressionException;
13
14 import org.apache.lucene.document.Document;
15 import org.apache.lucene.queryParser.ParseException;
16 import org.apache.lucene.queryParser.QueryParser;
17 import org.apache.lucene.search.Collector;
18 import org.apache.lucene.search.Query;
19 import org.apache.lucene.search.ScoreDoc;
20 import org.apache.lucene.search.TopDocs;
21 import org.apache.lucene.search.TopDocsCollector;
22 import org.xml.sax.SAXException;
23
24 import de.mpiwg.dwinter.fulltext.search.FulltextSearch;
25 import de.mpiwg.dwinter.fulltext.searcher.LanguageSearcher;
26
27 public class Search {
28
29 /**
30 * @param args
31 * @throws IOException
32 * @throws ParseException
33 * @throws SAXException
34 * @throws ParserConfigurationException
35 * @throws XPathExpressionException
36 */
37 public static void main(String[] args) throws IOException, ParseException, XPathExpressionException, ParserConfigurationException, SAXException {
38
39 String usage = "search <index_dir>";
40 if (args.length != 1) {
41 System.err.println("Usage: " + usage);
42 System.exit(1);
43 }
44
45 String queries = null;
46 FulltextSearch fulltextSearcher = new FulltextSearch(new File(args[0]));
47
48 BufferedReader in = null;
49 if (queries != null) {
50 in = new BufferedReader(new FileReader(queries));
51 } else {
52 in = new BufferedReader(new InputStreamReader(System.in));
53 }
54
55 while (true) {
56 if (queries == null) // prompt the user
57 System.out.println("Enter query: ");
58
59 String line = in.readLine();
60
61 if (line == null || line.length() == -1)
62 break;
63
64 line = line.trim();
65 if (line.length() == 0)
66 break;
67
68 System.out.println("line: "+line);
69
70 HashMap<String, Collector> resultHash = fulltextSearcher.languageSearchers.parseAndsearchAllLanguages(line);
71
72 for (String lang: resultHash.keySet())
73 {
74 LanguageSearcher searcher = fulltextSearcher.languageSearchers.getSearcherByLanguage(lang);
75 TopDocsCollector col = (TopDocsCollector<ScoreDoc>)resultHash.get(lang);
76
77 System.out.println(lang+":"+col.getTotalHits() + " total matching documents");
78
79 TopDocs td= col.topDocs();
80 ScoreDoc[] docs = td.scoreDocs;
81 final int HITS_PER_PAGE = 10;
82 for (int start = 0; start < col.getTotalHits(); start += HITS_PER_PAGE) {
83 int end = Math.min(col.getTotalHits(), start + HITS_PER_PAGE);
84
85 System.out.println("Start:"+String.valueOf(start));
86 for(int i = start;i<(start+ HITS_PER_PAGE);i++){
87 //for (ScoreDoc scoreDoc:Adocs) {
88 ScoreDoc scoreDoc = docs[i];
89 Document doc = searcher.searcher.doc(scoreDoc.doc);
90
91 String path = doc.get("path");
92 String textId = doc.get("textId");
93
94 System.out.println("path:"+path);
95 System.out.println("docnr:"+String.valueOf(scoreDoc.doc));
96 System.out.println("textId:"+textId);
97 }
98 if (queries != null) // non-interactive
99 break;
100
101 if (col.getTotalHits() > end) {
102 System.out.println("more (y/n) ? ");
103 line = in.readLine();
104 if (line.length() == 0 || line.charAt(0) == 'n')
105 break;
106 }
107 }
108 }
109 }
110 fulltextSearcher.languageSearchers.close();
111 }
112 }
113