comparison src/de/mpiwg/dwinter/fulltextIndexer/harvester/OCRHarvesterThreaded.java @ 0:dc7622afcfea default tip

initial
author dwinter
date Wed, 03 Nov 2010 12:33:16 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dc7622afcfea
1
2 /* */ package de.mpiwg.dwinter.fulltextIndexer.harvester;
3 /* */
4 /* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread;
5 /* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread;
6 /* */ import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer;
7 /* */ import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers;
8 /* */ import java.io.File;
9 /* */ import java.io.IOException;
10 /* */ import java.io.PrintStream;
11 /* */ import java.util.HashMap;
12 /* */ import org.apache.lucene.analysis.de.GermanAnalyzer;
13 /* */ import org.apache.lucene.analysis.fr.FrenchAnalyzer;
14 /* */ import org.apache.lucene.analysis.standard.StandardAnalyzer;
15 /* */ import org.apache.lucene.index.CorruptIndexException;
16 /* */ import org.apache.lucene.store.LockObtainFailedException;
17 /* */ import org.apache.lucene.util.Version;
18 /* */
19 /* */ public class OCRHarvesterThreaded extends HarvesterThreaded
20 /* */ {
21 /* */ private String preferedLanguage;
22 /* */
23 /* */ public OCRHarvesterThreaded()
24 /* */ {
25 /* */ }
26 /* */
27 /* */ public OCRHarvesterThreaded(File docDir, File index_dir, String languageFileName, String mdProviderUrl, String lang)
28 /* */ throws CorruptIndexException, LockObtainFailedException, IOException
29 /* */ {
30 /* 41 */ this.index_dir = index_dir;
31 /* 42 */ this.languageFileName = languageFileName;
32 /* 43 */ this.docDir = docDir;
33 /* 44 */ this.preferedLanguage = lang;
34 /* */
35 /* 46 */ this.mdProviderUrl = mdProviderUrl;
36 /* 47 */ for (int i = 0; i < maxThread; ++i)
37 /* */ {
38 /* 49 */ this.mythreads[i] = null;
39 /* */ }
40 /* */
41 /* 52 */ init_languages();
42 /* */ }
43 /* */
44 /* */ private void init_languages() {
45 /* 56 */ this.languageToISO.put("German", "de");
46 /* 57 */ this.languageToISO.put("French", "fr");
47 /* 58 */ this.languageToISO.put("English", "en");
48 /* 59 */ this.languageToISO.put("German-f", "de-f");
49 /* */
50 /* 61 */ this.supportedLanguageFolder.put("deu", "de");
51 /* 62 */ this.supportedLanguageFolder.put("deu-f", "de");
52 /* 63 */ this.supportedLanguageFolder.put("fra", "fr");
53 /* 64 */ this.supportedLanguageFolder.put("eng", "en");
54 /* 65 */ this.supportedLanguageFolder.put("lic", "la");
55 /* */ try
56 /* */ {
57 /* 68 */ this.languageAnalyzers.add(new LanguageAnalyzer("de", new GermanAnalyzer(Version.LUCENE_30), this.index_dir));
58 /* 69 */ this.languageAnalyzers.add(new LanguageAnalyzer("de-f", new GermanAnalyzer(Version.LUCENE_30), this.index_dir));
59 /* 70 */ this.languageAnalyzers.add(new LanguageAnalyzer("en", new StandardAnalyzer(Version.LUCENE_30), this.index_dir));
60 /* 71 */ this.languageAnalyzers.add(new LanguageAnalyzer("fr", new FrenchAnalyzer(Version.LUCENE_30), this.index_dir));
61 /* 72 */ this.languageAnalyzers.add(new LanguageAnalyzer("la", new StandardAnalyzer(Version.LUCENE_30), this.index_dir));
62 /* */
63 /* 74 */ this.languageAnalyzers.add(new LanguageAnalyzer("all", new StandardAnalyzer(Version.LUCENE_30), this.index_dir));
64 /* 75 */ this.languageAnalyzers.add(new LanguageAnalyzer("morph", new StandardAnalyzer(Version.LUCENE_30), this.index_dir));
65 /* */ } catch (CorruptIndexException e) {
66 /* 77 */ e.printStackTrace();
67 /* 78 */ System.exit(1);
68 /* */ } catch (LockObtainFailedException e) {
69 /* 80 */ e.printStackTrace();
70 /* 81 */ System.exit(1);
71 /* */ } catch (IOException e) {
72 /* 83 */ e.printStackTrace();
73 /* 84 */ System.exit(1);
74 /* */ }
75 /* */ }
76 /* */
77 /* */ public OCRHarvesterThreaded(File docDir, File index_dir, String mdProviderUrl, String preferedLanguage)
78 /* */ throws CorruptIndexException, LockObtainFailedException, IOException
79 /* */ {
80 /* 92 */ this(docDir, index_dir, null, mdProviderUrl, preferedLanguage);
81 /* */ }
82 /* */
83 /* */ protected void processFile(File file) throws CorruptIndexException, LockObtainFailedException, IOException
84 /* */ {
85 /* 97 */ int freeThread = -1;
86 /* 98 */ while (freeThread == -1)
87 /* */ {
88 /* 100 */ freeThread = waitForFreeThread();
89 /* */ }
90 /* */
91 /* 104 */ if (this.textLanguage == null)
92 /* 105 */ this.textLanguage = loadLanguages();
93 /* 106 */ this.mythreads[freeThread] = new OCRProcessFileThread(this.languageAnalyzers, file, this.languageFileName, this.textLanguage, this.mdProviderUrl, this.preferedLanguage, this.languageToISO, this.supportedLanguageFolder);
94 /* 107 */ this.mythreads[freeThread].start();
95 /* 108 */ System.out.println("New process started:" + freeThread);
96 /* */ }
97 /* */ }
98
99 /* Location: /private/tmp/fulltextIndexer.jar
100 * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded
101 * JD-Core Version: 0.5.4
102 */