annotate src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/OCRHarvesterCLIMD.java.svn-base @ 0:dc7622afcfea default tip

initial
author dwinter
date Wed, 03 Nov 2010 12:33:16 +0100
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dc7622afcfea initial
dwinter
parents:
diff changeset
1 /* */ package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI;
dc7622afcfea initial
dwinter
parents:
diff changeset
2 /* */
dc7622afcfea initial
dwinter
parents:
diff changeset
3 /* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded;
dc7622afcfea initial
dwinter
parents:
diff changeset
4 /* */ import java.io.File;
dc7622afcfea initial
dwinter
parents:
diff changeset
5 /* */ import java.io.IOException;
dc7622afcfea initial
dwinter
parents:
diff changeset
6 /* */ import java.io.PrintStream;
dc7622afcfea initial
dwinter
parents:
diff changeset
7 /* */ import org.apache.lucene.index.CorruptIndexException;
dc7622afcfea initial
dwinter
parents:
diff changeset
8 /* */ import org.apache.lucene.store.LockObtainFailedException;
dc7622afcfea initial
dwinter
parents:
diff changeset
9 /* */
dc7622afcfea initial
dwinter
parents:
diff changeset
10 /* */ public class OCRHarvesterCLIMD
dc7622afcfea initial
dwinter
parents:
diff changeset
11 /* */ {
dc7622afcfea initial
dwinter
parents:
diff changeset
12 /* */ public static void main(String[] args)
dc7622afcfea initial
dwinter
parents:
diff changeset
13 /* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException
dc7622afcfea initial
dwinter
parents:
diff changeset
14 /* */ {
dc7622afcfea initial
dwinter
parents:
diff changeset
15 /* 20 */ String usage = "java de.mpiwg.itgroup.fulltext.harvester.OCRHarvesterCLIMD <root_directory> <index_dir> <mdproviderUrl> --lang=<language>\njava de.mpiwg.itgroup.fulltext.harvester.OCRHarvesterCLIMD <root_directory> <index_dir> <mdproviderUrl>--langfile=<languageFile>\njava de.mpiwg.itgroup.fulltext.harvester.OCRHarvesterCLIMD <root_directory> <index_dir> <mdproviderUrl>";
dc7622afcfea initial
dwinter
parents:
diff changeset
16 /* */
dc7622afcfea initial
dwinter
parents:
diff changeset
17 /* 23 */ if ((args.length != 4) && (args.length != 3)) {
dc7622afcfea initial
dwinter
parents:
diff changeset
18 /* 24 */ System.err.println("Usage: " + usage);
dc7622afcfea initial
dwinter
parents:
diff changeset
19 /* 25 */ System.exit(1);
dc7622afcfea initial
dwinter
parents:
diff changeset
20 /* */ }
dc7622afcfea initial
dwinter
parents:
diff changeset
21 /* */
dc7622afcfea initial
dwinter
parents:
diff changeset
22 /* 33 */ File docDir = new File(args[0]);
dc7622afcfea initial
dwinter
parents:
diff changeset
23 /* 34 */ if ((!docDir.exists()) || (!docDir.canRead())) {
dc7622afcfea initial
dwinter
parents:
diff changeset
24 /* 35 */ System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path");
dc7622afcfea initial
dwinter
parents:
diff changeset
25 /* 36 */ System.exit(1);
dc7622afcfea initial
dwinter
parents:
diff changeset
26 /* */ }
dc7622afcfea initial
dwinter
parents:
diff changeset
27 /* */
dc7622afcfea initial
dwinter
parents:
diff changeset
28 /* 39 */ File index_dir = new File(args[1]);
dc7622afcfea initial
dwinter
parents:
diff changeset
29 /* */
dc7622afcfea initial
dwinter
parents:
diff changeset
30 /* 41 */ if (args.length == 3) {
dc7622afcfea initial
dwinter
parents:
diff changeset
31 /* 42 */ doTheHarvest(docDir, index_dir, args[2], null);
dc7622afcfea initial
dwinter
parents:
diff changeset
32 /* */ }
dc7622afcfea initial
dwinter
parents:
diff changeset
33 /* */ else {
dc7622afcfea initial
dwinter
parents:
diff changeset
34 /* 45 */ String[] options = args[3].split("=");
dc7622afcfea initial
dwinter
parents:
diff changeset
35 /* 46 */ if (options.length != 3) {
dc7622afcfea initial
dwinter
parents:
diff changeset
36 /* 47 */ System.err.println("wrong options:" + args[3]);
dc7622afcfea initial
dwinter
parents:
diff changeset
37 /* 48 */ System.exit(1);
dc7622afcfea initial
dwinter
parents:
diff changeset
38 /* */ }
dc7622afcfea initial
dwinter
parents:
diff changeset
39 /* */
dc7622afcfea initial
dwinter
parents:
diff changeset
40 /* 51 */ if (options[0].equals("--lang")) {
dc7622afcfea initial
dwinter
parents:
diff changeset
41 /* 52 */ doTheHarvestLanguage(docDir, index_dir, args[2], options[1]);
dc7622afcfea initial
dwinter
parents:
diff changeset
42 /* 53 */ } else if (options[0].equals("--langfile")) {
dc7622afcfea initial
dwinter
parents:
diff changeset
43 /* 54 */ doTheHarvest(docDir, index_dir, args[2], options[1]);
dc7622afcfea initial
dwinter
parents:
diff changeset
44 /* */ } else {
dc7622afcfea initial
dwinter
parents:
diff changeset
45 /* 56 */ System.err.println("wrong options:" + options[0]);
dc7622afcfea initial
dwinter
parents:
diff changeset
46 /* 57 */ System.exit(1);
dc7622afcfea initial
dwinter
parents:
diff changeset
47 /* */ }
dc7622afcfea initial
dwinter
parents:
diff changeset
48 /* */ }
dc7622afcfea initial
dwinter
parents:
diff changeset
49 /* */ }
dc7622afcfea initial
dwinter
parents:
diff changeset
50 /* */
dc7622afcfea initial
dwinter
parents:
diff changeset
51 /* */ protected static void doTheHarvestLanguage(File docDir, File index_dir, String mdProviderUrl, String lang)
dc7622afcfea initial
dwinter
parents:
diff changeset
52 /* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException
dc7622afcfea initial
dwinter
parents:
diff changeset
53 /* */ {
dc7622afcfea initial
dwinter
parents:
diff changeset
54 /* 65 */ OCRHarvesterThreaded harvester = new OCRHarvesterThreaded(docDir, index_dir, null, mdProviderUrl, lang);
dc7622afcfea initial
dwinter
parents:
diff changeset
55 /* 66 */ harvester.harvestFolder();
dc7622afcfea initial
dwinter
parents:
diff changeset
56 /* */ }
dc7622afcfea initial
dwinter
parents:
diff changeset
57 /* */
dc7622afcfea initial
dwinter
parents:
diff changeset
58 /* */ protected static void doTheHarvest(File docDir, File index_dir, String mdProviderUrl, String languageFile)
dc7622afcfea initial
dwinter
parents:
diff changeset
59 /* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException
dc7622afcfea initial
dwinter
parents:
diff changeset
60 /* */ {
dc7622afcfea initial
dwinter
parents:
diff changeset
61 /* 72 */ OCRHarvesterThreaded harvester = new OCRHarvesterThreaded(docDir, index_dir, languageFile, mdProviderUrl, null);
dc7622afcfea initial
dwinter
parents:
diff changeset
62 /* 73 */ harvester.harvestFolder();
dc7622afcfea initial
dwinter
parents:
diff changeset
63 /* */ }
dc7622afcfea initial
dwinter
parents:
diff changeset
64 /* */ }
dc7622afcfea initial
dwinter
parents:
diff changeset
65
dc7622afcfea initial
dwinter
parents:
diff changeset
66 /* Location: /private/tmp/fulltextIndexer.jar
dc7622afcfea initial
dwinter
parents:
diff changeset
67 * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD
dc7622afcfea initial
dwinter
parents:
diff changeset
68 * JD-Core Version: 0.5.4
dc7622afcfea initial
dwinter
parents:
diff changeset
69 */