Mercurial > hg > fulltextSearch
view src/de/mpiwg/dwinter/fulltext/ticket/LanguageWriter.java @ 0:72a015318a6d
CLOSED - # 16: Zeige nur eine konfigurierbare Anzahl von Treffern an.
https://it-dev.mpiwg-berlin.mpg.de/tracs/pythonOcropusTools/ticket/16
author | dwinter |
---|---|
date | Wed, 03 Nov 2010 12:26:20 +0100 |
parents | |
children |
line wrap: on
line source
package de.mpiwg.dwinter.fulltext.ticket; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.util.HashSet; import java.util.Set; import org.apache.lucene.document.Document; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopScoreDocCollector; import sun.security.krb5.internal.PAEncTSEnc; import de.mpiwg.dwinter.fulltext.search.utils.OCRDoc; import de.mpiwg.dwinter.fulltext.search.utils.OCRLine; import de.mpiwg.dwinter.lucencetools.documents.FileDocument; public class LanguageWriter extends TicketWriter{ private File languageFile; private String language; public LanguageWriter(String ticket, String language) throws IOException{ super(ticket); languageFile = new File(ticketFile.getAbsolutePath()+PATHSEPARATOR+language); if(!languageFile.exists()){ logger.debug("Create Languagefolder:"+languageFile.getCanonicalPath()); if(!languageFile.mkdirs()) throw new IOException(); } this.language=language; } /** Schreibt das Ergebnis einer Suche in eine Datei im Ticket. * @param docs * @param reader * @return Anzahl der tatsaechlich geschriebenen Treffer * @throws CorruptIndexException * @throws IOException */ public Integer writeResults(TopDocs docs, IndexReader reader) throws CorruptIndexException, IOException { File resultFile = new File(languageFile.getAbsolutePath()+PATHSEPARATOR+"result"); FileOutputStream fs = new FileOutputStream(resultFile); OutputStreamWriter rw = new OutputStreamWriter(fs,"utf-8"); Set<String> idsAlreadyDone = new HashSet<String>(); // TODO: aus irgendwelche gruenden gibt es ein Dokument mehrfach in den Fundstellen //FileWriter rw = new FileWriter(resultFile); if(!languageFile.exists()){ logger.debug("Create Languagefolder:"+languageFile.getCanonicalPath()); if(!languageFile.mkdirs()) throw new IOException(); } Integer counter =0; for (ScoreDoc sd:docs.scoreDocs) { Document d = reader.document(sd.doc); String id = d.get("textId"); if (!idsAlreadyDone.contains(id)){ // stelle sicher das alle treffer nur einmal in die date geschrieben werden. idsAlreadyDone.add(id); rw.write(FileDocument.toXML(d)+"\n"); counter ++; } } rw.close(); return counter; } public void writeResultInfo(TopDocs docs, Integer counter) throws IOException { File resultFile = new File(languageFile.getAbsolutePath()+PATHSEPARATOR+"resultInfo"); FileOutputStream fs = new FileOutputStream(resultFile); OutputStreamWriter rw = new OutputStreamWriter(fs,"utf-8"); String ret = "<resultInfo>"; int hits = docs.totalHits; ret +="<lang>"+language+"</lang>"; ret +="<hits>"+counter+"</hits>"; ret +="<totalHits>"+hits+"</totalHits>"; ret+= "</resultInfo>"; rw.write(ret); rw.close(); } public void writeDoc(OCRDoc ocrDoc) throws IOException { // erzeuge fuer jedes document einen ordner File docFile = new File(languageFile.getAbsolutePath()+PATHSEPARATOR+ocrDoc.textId.replace(PATHSEPARATORCHAR, ':')); if(!docFile.exists()){ logger.debug("Create Docfolder:"+docFile.getCanonicalPath()); if(!docFile.mkdirs()) throw new IOException(); } // jetzt fuer jese seite ein file for (String page:ocrDoc.linesInPage.keySet()){ File pageFile = new File(docFile.getAbsolutePath()+PATHSEPARATOR+page.replace(PATHSEPARATORCHAR, ':')); FileWriter pageFileWriter = new FileWriter(pageFile); for (OCRLine line: ocrDoc.linesInPage.get(page)){ pageFileWriter.write(line.toString()+"\n"); } pageFileWriter.close(); } } public void saveQuery(Query query) throws IOException { File qf = new File(languageFile.getAbsolutePath()+PATHSEPARATOR+"query"); FileWriter fw = new FileWriter(qf); fw.write(query.toString()); fw.close(); } }