view src/de/mpiwg/dwinter/fulltextIndexer/harvester/OCRHarvesterThreaded.java @ 0:dc7622afcfea default tip

initial
author dwinter
date Wed, 03 Nov 2010 12:33:16 +0100
parents
children
line wrap: on
line source


/*     */ package de.mpiwg.dwinter.fulltextIndexer.harvester;
/*     */ 
/*     */ import de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread;
/*     */ import de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread;
/*     */ import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer;
/*     */ import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers;
/*     */ import java.io.File;
/*     */ import java.io.IOException;
/*     */ import java.io.PrintStream;
/*     */ import java.util.HashMap;
/*     */ import org.apache.lucene.analysis.de.GermanAnalyzer;
/*     */ import org.apache.lucene.analysis.fr.FrenchAnalyzer;
/*     */ import org.apache.lucene.analysis.standard.StandardAnalyzer;
/*     */ import org.apache.lucene.index.CorruptIndexException;
/*     */ import org.apache.lucene.store.LockObtainFailedException;
/*     */ import org.apache.lucene.util.Version;
/*     */ 
/*     */ public class OCRHarvesterThreaded extends HarvesterThreaded
/*     */ {
/*     */   private String preferedLanguage;
/*     */ 
/*     */   public OCRHarvesterThreaded()
/*     */   {
/*     */   }
/*     */ 
/*     */   public OCRHarvesterThreaded(File docDir, File index_dir, String languageFileName, String mdProviderUrl, String lang)
/*     */     throws CorruptIndexException, LockObtainFailedException, IOException
/*     */   {
/*  41 */     this.index_dir = index_dir;
/*  42 */     this.languageFileName = languageFileName;
/*  43 */     this.docDir = docDir;
/*  44 */     this.preferedLanguage = lang;
/*     */ 
/*  46 */     this.mdProviderUrl = mdProviderUrl;
/*  47 */     for (int i = 0; i < maxThread; ++i)
/*     */     {
/*  49 */       this.mythreads[i] = null;
/*     */     }
/*     */ 
/*  52 */     init_languages();
/*     */   }
/*     */ 
/*     */   private void init_languages() {
/*  56 */     this.languageToISO.put("German", "de");
/*  57 */     this.languageToISO.put("French", "fr");
/*  58 */     this.languageToISO.put("English", "en");
/*  59 */     this.languageToISO.put("German-f", "de-f");
/*     */ 
/*  61 */     this.supportedLanguageFolder.put("deu", "de");
/*  62 */     this.supportedLanguageFolder.put("deu-f", "de");
/*  63 */     this.supportedLanguageFolder.put("fra", "fr");
/*  64 */     this.supportedLanguageFolder.put("eng", "en");
/*  65 */     this.supportedLanguageFolder.put("lic", "la");
/*     */     try
/*     */     {
/*  68 */       this.languageAnalyzers.add(new LanguageAnalyzer("de", new GermanAnalyzer(Version.LUCENE_30), this.index_dir));
/*  69 */       this.languageAnalyzers.add(new LanguageAnalyzer("de-f", new GermanAnalyzer(Version.LUCENE_30), this.index_dir));
/*  70 */       this.languageAnalyzers.add(new LanguageAnalyzer("en", new StandardAnalyzer(Version.LUCENE_30), this.index_dir));
/*  71 */       this.languageAnalyzers.add(new LanguageAnalyzer("fr", new FrenchAnalyzer(Version.LUCENE_30), this.index_dir));
/*  72 */       this.languageAnalyzers.add(new LanguageAnalyzer("la", new StandardAnalyzer(Version.LUCENE_30), this.index_dir));
/*     */ 
/*  74 */       this.languageAnalyzers.add(new LanguageAnalyzer("all", new StandardAnalyzer(Version.LUCENE_30), this.index_dir));
/*  75 */       this.languageAnalyzers.add(new LanguageAnalyzer("morph", new StandardAnalyzer(Version.LUCENE_30), this.index_dir));
/*     */     } catch (CorruptIndexException e) {
/*  77 */       e.printStackTrace();
/*  78 */       System.exit(1);
/*     */     } catch (LockObtainFailedException e) {
/*  80 */       e.printStackTrace();
/*  81 */       System.exit(1);
/*     */     } catch (IOException e) {
/*  83 */       e.printStackTrace();
/*  84 */       System.exit(1);
/*     */     }
/*     */   }
/*     */ 
/*     */   public OCRHarvesterThreaded(File docDir, File index_dir, String mdProviderUrl, String preferedLanguage)
/*     */     throws CorruptIndexException, LockObtainFailedException, IOException
/*     */   {
/*  92 */     this(docDir, index_dir, null, mdProviderUrl, preferedLanguage);
/*     */   }
/*     */ 
/*     */   protected void processFile(File file) throws CorruptIndexException, LockObtainFailedException, IOException
/*     */   {
/*  97 */     int freeThread = -1;
/*  98 */     while (freeThread == -1)
/*     */     {
/* 100 */       freeThread = waitForFreeThread();
/*     */     }
/*     */ 
/* 104 */     if (this.textLanguage == null)
/* 105 */       this.textLanguage = loadLanguages();
/* 106 */     this.mythreads[freeThread] = new OCRProcessFileThread(this.languageAnalyzers, file, this.languageFileName, this.textLanguage, this.mdProviderUrl, this.preferedLanguage, this.languageToISO, this.supportedLanguageFolder);
/* 107 */     this.mythreads[freeThread].start();
/* 108 */     System.out.println("New process started:" + freeThread);
/*     */   }
/*     */ }

/* Location:           /private/tmp/fulltextIndexer.jar
 * Qualified Name:     de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded
 * JD-Core Version:    0.5.4
 */