view src/de/mpiwg/dwinter/fulltext/searcher/LanguageSearcher.java @ 2:2b29b0b6db16 default tip

Version mit integrierter Suche ?ber XML-Volltexte
author dwinter
date Wed, 26 Jan 2011 14:41:09 +0100
parents 5c9c31510f0c
children
line wrap: on
line source

package de.mpiwg.dwinter.fulltext.searcher;

import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.HashMap;



import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

import com.sun.org.apache.xerces.internal.parsers.SAXParser;

import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer;

public class LanguageSearcher extends LanguageAnalyzer implements ILanguageSearcher {

	static final int MAX_RESULTS = 10000; // Maximalanzahl Treffer
	public Searcher searcher=null;
	public QueryParser parser=null;
	public LanguageSearcher(String lang, Analyzer analyzer, File index_dir)
			throws CorruptIndexException, LockObtainFailedException,
			IOException {
		super(lang, analyzer, index_dir,true);
		searcher = new IndexSearcher(this.reader);
		parser = new QueryParser(Version.LUCENE_30,"contents",analyzer);
		
	}

	/* (non-Javadoc)
	 * @see de.mpiwg.dwinter.fulltext.searcher.ILanguageSearcher#parseAndSearch(java.lang.String)
	 */
	public Collector parseAndSearch(String searchString) throws ParseException, IOException
	{
		Query query= parser.parse(searchString);
		System.out.println("Parse and search:"+query);
		System.out.println("Parse and search lang:"+this.lang);
		
		TopScoreDocCollector col = TopScoreDocCollector.create(MAX_RESULTS, false);
		
		//searcher.searcher.search(query, col);
		
		searcher.search(query,col);
		return col;
		
	}	
	
	/* (non-Javadoc)
	 * @see de.mpiwg.dwinter.fulltext.searcher.ILanguageSearcher#parseAndSearch(java.lang.String, java.lang.String)
	 */
	public Collector parseAndSearch(String searchString, String mdString) throws ParseException, IOException {
		Query query= parser.parse(searchString +" AND dcMetaData:"+mdString);
		System.out.println("Parse and search:"+query);
		System.out.println("Parse and search lang:"+this.lang);
		TopScoreDocCollector col = TopScoreDocCollector.create(MAX_RESULTS, false);
		//searcher.searcher.search(query, col);	
		searcher.search(query,col);
		return col;
	}
	
//	TODO: implement the donatus bits
//	public HashMap<String,Collector> parseAndSearchAndAnalyse(String searchString) throws ParseException, IOException
//	{
//		HashMap<String, Collector> retMap = new HashMap<String,Collector>();
//		Query query= parser.parse(searchString);
//		System.out.println("Parse and search and analyse:"+query);
//		
//		TopScoreDocCollector col = TopScoreDocCollector.create(MAX_RESULTS, false);
//		//searcher.searcher.search(query, col);	
//		searcher.search(query,col);
//		
//		String word;
//	
//		try {
//			
//			word = getNormalizedSearchString(((DonatusAnalyzer)analyzer).morphFile.toString());
//			
//		} catch (RuntimeException e) {
//			
//			word=searchString;
//		}
//		
//		retMap.put(word, result);
//		return retMap;
//		
//	}

//	private String getNormalizedSearchString(String string) throws IOException {
//		
//		
//		XMLReader XMLparser = new SAXParser();
//		ParseDonatusResultDocument ch = new ParseDonatusResultDocument();
//		XMLparser.setContentHandler(ch);
//		
//		try {
//			System.out.println("CCCCCCCCCCC:"+string);
//			Reader reader = new StringReader(string);
//			InputSource input = new InputSource(reader);
//			XMLparser.parse(input);
//			
//				} catch (SAXException e) {
//					// TODO Auto-generated catch block
//				e.printStackTrace();
//			}	
//				
//				
//		String ret = "";
//		
//	    for (String lemma: ch.value.values())
//	    {
//	    	ret+=lemma+" ";
//	    }
//	    	
//		return ret;
//	}


}