Mercurial > hg > fulltextSearch
diff src/de/mpiwg/dwinter/fulltext/tests/TestSearchInLinesToDir.java @ 1:5c9c31510f0c
CLOSED - # 16: Zeige nur eine konfigurierbare Anzahl von Treffern an.
https://it-dev.mpiwg-berlin.mpg.de/tracs/pythonOcropusTools/ticket/16
author | dwinter |
---|---|
date | Wed, 03 Nov 2010 12:26:29 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltext/tests/TestSearchInLinesToDir.java Wed Nov 03 12:26:29 2010 +0100 @@ -0,0 +1,85 @@ +package de.mpiwg.dwinter.fulltext.tests; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Date; +import java.util.HashMap; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; + +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopDocsCollector; +import org.apache.lucene.util.Version; +import org.xml.sax.SAXException; + +import de.mpiwg.dwinter.fulltext.search.FulltextSearch; +import de.mpiwg.dwinter.fulltext.search.FulltextSearchDocsLines; +import de.mpiwg.dwinter.fulltext.searcher.LanguageSearcher; + +public class TestSearchInLinesToDir { + + protected static Logger logger = Logger.getRootLogger(); + + public static void main(String[] args) throws IOException, ParseException, XPathExpressionException, ParserConfigurationException, SAXException { + + + BasicConfigurator.configure(); + logger.setLevel(Level.ALL); + + String usage = "search <index_dir> <line_index_dir> <language>"; + if (args.length != 3) { + System.err.println("Usage: " + usage); + System.exit(1); + } + + String queries = null; + FulltextSearchDocsLines fulltextSearcher = new FulltextSearchDocsLines(new File(args[0]),new File(args[1])); + + BufferedReader in = null; + if (queries != null) { + in = new BufferedReader(new FileReader(queries)); + } else { + in = new BufferedReader(new InputStreamReader(System.in)); + } + + while (true) { + if (queries == null) // prompt the user + System.out.println("Enter query: "); + + String line = in.readLine(); + + if (line == null || line.length() == -1) + break; + + line = line.trim(); + if (line.length() == 0) + break; + + System.out.println("line: "+line); + + //Query query= new TermQuery(new Term("contents",line)); + Analyzer analyzer = fulltextSearcher.languageSearchers.getSearcherByLanguage(args[2]).analyzer; + QueryParser parser = new QueryParser(Version.LUCENE_30,"contents",analyzer); + Query query= parser.parse(line); + fulltextSearcher.searchInLinesToDir(query,args[2],"1"); + + } + } +} +