Mercurial > hg > fulltextIndexer
view src/de/mpiwg/dwinter/fulltextIndexer/harvester/OCRHarvesterThreaded.java @ 0:dc7622afcfea default tip
initial
author | dwinter |
---|---|
date | Wed, 03 Nov 2010 12:33:16 +0100 |
parents | |
children |
line wrap: on
line source
/* */ package de.mpiwg.dwinter.fulltextIndexer.harvester; /* */ /* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread; /* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread; /* */ import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer; /* */ import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers; /* */ import java.io.File; /* */ import java.io.IOException; /* */ import java.io.PrintStream; /* */ import java.util.HashMap; /* */ import org.apache.lucene.analysis.de.GermanAnalyzer; /* */ import org.apache.lucene.analysis.fr.FrenchAnalyzer; /* */ import org.apache.lucene.analysis.standard.StandardAnalyzer; /* */ import org.apache.lucene.index.CorruptIndexException; /* */ import org.apache.lucene.store.LockObtainFailedException; /* */ import org.apache.lucene.util.Version; /* */ /* */ public class OCRHarvesterThreaded extends HarvesterThreaded /* */ { /* */ private String preferedLanguage; /* */ /* */ public OCRHarvesterThreaded() /* */ { /* */ } /* */ /* */ public OCRHarvesterThreaded(File docDir, File index_dir, String languageFileName, String mdProviderUrl, String lang) /* */ throws CorruptIndexException, LockObtainFailedException, IOException /* */ { /* 41 */ this.index_dir = index_dir; /* 42 */ this.languageFileName = languageFileName; /* 43 */ this.docDir = docDir; /* 44 */ this.preferedLanguage = lang; /* */ /* 46 */ this.mdProviderUrl = mdProviderUrl; /* 47 */ for (int i = 0; i < maxThread; ++i) /* */ { /* 49 */ this.mythreads[i] = null; /* */ } /* */ /* 52 */ init_languages(); /* */ } /* */ /* */ private void init_languages() { /* 56 */ this.languageToISO.put("German", "de"); /* 57 */ this.languageToISO.put("French", "fr"); /* 58 */ this.languageToISO.put("English", "en"); /* 59 */ this.languageToISO.put("German-f", "de-f"); /* */ /* 61 */ this.supportedLanguageFolder.put("deu", "de"); /* 62 */ this.supportedLanguageFolder.put("deu-f", "de"); /* 63 */ this.supportedLanguageFolder.put("fra", "fr"); /* 64 */ this.supportedLanguageFolder.put("eng", "en"); /* 65 */ this.supportedLanguageFolder.put("lic", "la"); /* */ try /* */ { /* 68 */ this.languageAnalyzers.add(new LanguageAnalyzer("de", new GermanAnalyzer(Version.LUCENE_30), this.index_dir)); /* 69 */ this.languageAnalyzers.add(new LanguageAnalyzer("de-f", new GermanAnalyzer(Version.LUCENE_30), this.index_dir)); /* 70 */ this.languageAnalyzers.add(new LanguageAnalyzer("en", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); /* 71 */ this.languageAnalyzers.add(new LanguageAnalyzer("fr", new FrenchAnalyzer(Version.LUCENE_30), this.index_dir)); /* 72 */ this.languageAnalyzers.add(new LanguageAnalyzer("la", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); /* */ /* 74 */ this.languageAnalyzers.add(new LanguageAnalyzer("all", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); /* 75 */ this.languageAnalyzers.add(new LanguageAnalyzer("morph", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); /* */ } catch (CorruptIndexException e) { /* 77 */ e.printStackTrace(); /* 78 */ System.exit(1); /* */ } catch (LockObtainFailedException e) { /* 80 */ e.printStackTrace(); /* 81 */ System.exit(1); /* */ } catch (IOException e) { /* 83 */ e.printStackTrace(); /* 84 */ System.exit(1); /* */ } /* */ } /* */ /* */ public OCRHarvesterThreaded(File docDir, File index_dir, String mdProviderUrl, String preferedLanguage) /* */ throws CorruptIndexException, LockObtainFailedException, IOException /* */ { /* 92 */ this(docDir, index_dir, null, mdProviderUrl, preferedLanguage); /* */ } /* */ /* */ protected void processFile(File file) throws CorruptIndexException, LockObtainFailedException, IOException /* */ { /* 97 */ int freeThread = -1; /* 98 */ while (freeThread == -1) /* */ { /* 100 */ freeThread = waitForFreeThread(); /* */ } /* */ /* 104 */ if (this.textLanguage == null) /* 105 */ this.textLanguage = loadLanguages(); /* 106 */ this.mythreads[freeThread] = new OCRProcessFileThread(this.languageAnalyzers, file, this.languageFileName, this.textLanguage, this.mdProviderUrl, this.preferedLanguage, this.languageToISO, this.supportedLanguageFolder); /* 107 */ this.mythreads[freeThread].start(); /* 108 */ System.out.println("New process started:" + freeThread); /* */ } /* */ } /* Location: /private/tmp/fulltextIndexer.jar * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded * JD-Core Version: 0.5.4 */