Mercurial > hg > fulltextIndexer
comparison src/de/mpiwg/dwinter/fulltextIndexer/harvester/OCRHarvesterThreaded.java @ 0:dc7622afcfea default tip
initial
author | dwinter |
---|---|
date | Wed, 03 Nov 2010 12:33:16 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dc7622afcfea |
---|---|
1 | |
2 /* */ package de.mpiwg.dwinter.fulltextIndexer.harvester; | |
3 /* */ | |
4 /* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread; | |
5 /* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread; | |
6 /* */ import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer; | |
7 /* */ import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers; | |
8 /* */ import java.io.File; | |
9 /* */ import java.io.IOException; | |
10 /* */ import java.io.PrintStream; | |
11 /* */ import java.util.HashMap; | |
12 /* */ import org.apache.lucene.analysis.de.GermanAnalyzer; | |
13 /* */ import org.apache.lucene.analysis.fr.FrenchAnalyzer; | |
14 /* */ import org.apache.lucene.analysis.standard.StandardAnalyzer; | |
15 /* */ import org.apache.lucene.index.CorruptIndexException; | |
16 /* */ import org.apache.lucene.store.LockObtainFailedException; | |
17 /* */ import org.apache.lucene.util.Version; | |
18 /* */ | |
19 /* */ public class OCRHarvesterThreaded extends HarvesterThreaded | |
20 /* */ { | |
21 /* */ private String preferedLanguage; | |
22 /* */ | |
23 /* */ public OCRHarvesterThreaded() | |
24 /* */ { | |
25 /* */ } | |
26 /* */ | |
27 /* */ public OCRHarvesterThreaded(File docDir, File index_dir, String languageFileName, String mdProviderUrl, String lang) | |
28 /* */ throws CorruptIndexException, LockObtainFailedException, IOException | |
29 /* */ { | |
30 /* 41 */ this.index_dir = index_dir; | |
31 /* 42 */ this.languageFileName = languageFileName; | |
32 /* 43 */ this.docDir = docDir; | |
33 /* 44 */ this.preferedLanguage = lang; | |
34 /* */ | |
35 /* 46 */ this.mdProviderUrl = mdProviderUrl; | |
36 /* 47 */ for (int i = 0; i < maxThread; ++i) | |
37 /* */ { | |
38 /* 49 */ this.mythreads[i] = null; | |
39 /* */ } | |
40 /* */ | |
41 /* 52 */ init_languages(); | |
42 /* */ } | |
43 /* */ | |
44 /* */ private void init_languages() { | |
45 /* 56 */ this.languageToISO.put("German", "de"); | |
46 /* 57 */ this.languageToISO.put("French", "fr"); | |
47 /* 58 */ this.languageToISO.put("English", "en"); | |
48 /* 59 */ this.languageToISO.put("German-f", "de-f"); | |
49 /* */ | |
50 /* 61 */ this.supportedLanguageFolder.put("deu", "de"); | |
51 /* 62 */ this.supportedLanguageFolder.put("deu-f", "de"); | |
52 /* 63 */ this.supportedLanguageFolder.put("fra", "fr"); | |
53 /* 64 */ this.supportedLanguageFolder.put("eng", "en"); | |
54 /* 65 */ this.supportedLanguageFolder.put("lic", "la"); | |
55 /* */ try | |
56 /* */ { | |
57 /* 68 */ this.languageAnalyzers.add(new LanguageAnalyzer("de", new GermanAnalyzer(Version.LUCENE_30), this.index_dir)); | |
58 /* 69 */ this.languageAnalyzers.add(new LanguageAnalyzer("de-f", new GermanAnalyzer(Version.LUCENE_30), this.index_dir)); | |
59 /* 70 */ this.languageAnalyzers.add(new LanguageAnalyzer("en", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); | |
60 /* 71 */ this.languageAnalyzers.add(new LanguageAnalyzer("fr", new FrenchAnalyzer(Version.LUCENE_30), this.index_dir)); | |
61 /* 72 */ this.languageAnalyzers.add(new LanguageAnalyzer("la", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); | |
62 /* */ | |
63 /* 74 */ this.languageAnalyzers.add(new LanguageAnalyzer("all", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); | |
64 /* 75 */ this.languageAnalyzers.add(new LanguageAnalyzer("morph", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); | |
65 /* */ } catch (CorruptIndexException e) { | |
66 /* 77 */ e.printStackTrace(); | |
67 /* 78 */ System.exit(1); | |
68 /* */ } catch (LockObtainFailedException e) { | |
69 /* 80 */ e.printStackTrace(); | |
70 /* 81 */ System.exit(1); | |
71 /* */ } catch (IOException e) { | |
72 /* 83 */ e.printStackTrace(); | |
73 /* 84 */ System.exit(1); | |
74 /* */ } | |
75 /* */ } | |
76 /* */ | |
77 /* */ public OCRHarvesterThreaded(File docDir, File index_dir, String mdProviderUrl, String preferedLanguage) | |
78 /* */ throws CorruptIndexException, LockObtainFailedException, IOException | |
79 /* */ { | |
80 /* 92 */ this(docDir, index_dir, null, mdProviderUrl, preferedLanguage); | |
81 /* */ } | |
82 /* */ | |
83 /* */ protected void processFile(File file) throws CorruptIndexException, LockObtainFailedException, IOException | |
84 /* */ { | |
85 /* 97 */ int freeThread = -1; | |
86 /* 98 */ while (freeThread == -1) | |
87 /* */ { | |
88 /* 100 */ freeThread = waitForFreeThread(); | |
89 /* */ } | |
90 /* */ | |
91 /* 104 */ if (this.textLanguage == null) | |
92 /* 105 */ this.textLanguage = loadLanguages(); | |
93 /* 106 */ this.mythreads[freeThread] = new OCRProcessFileThread(this.languageAnalyzers, file, this.languageFileName, this.textLanguage, this.mdProviderUrl, this.preferedLanguage, this.languageToISO, this.supportedLanguageFolder); | |
94 /* 107 */ this.mythreads[freeThread].start(); | |
95 /* 108 */ System.out.println("New process started:" + freeThread); | |
96 /* */ } | |
97 /* */ } | |
98 | |
99 /* Location: /private/tmp/fulltextIndexer.jar | |
100 * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded | |
101 * JD-Core Version: 0.5.4 | |
102 */ |