diff src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLIMD.java @ 0:dc7622afcfea default tip

initial
author dwinter
date Wed, 03 Nov 2010 12:33:16 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLIMD.java	Wed Nov 03 12:33:16 2010 +0100
@@ -0,0 +1,69 @@
+/*    */ package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI;
+/*    */ 
+/*    */ import de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded;
+/*    */ import java.io.File;
+/*    */ import java.io.IOException;
+/*    */ import java.io.PrintStream;
+/*    */ import org.apache.lucene.index.CorruptIndexException;
+/*    */ import org.apache.lucene.store.LockObtainFailedException;
+/*    */ 
+/*    */ public class OCRHarvesterCLIMD
+/*    */ {
+/*    */   public static void main(String[] args)
+/*    */     throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException
+/*    */   {
+/* 20 */     String usage = "java de.mpiwg.itgroup.fulltext.harvester.OCRHarvesterCLIMD <root_directory> <index_dir> <mdproviderUrl> --lang=<language>\njava de.mpiwg.itgroup.fulltext.harvester.OCRHarvesterCLIMD <root_directory> <index_dir> <mdproviderUrl>--langfile=<languageFile>\njava de.mpiwg.itgroup.fulltext.harvester.OCRHarvesterCLIMD <root_directory> <index_dir>  <mdproviderUrl>";
+/*    */ 
+/* 23 */     if ((args.length != 4) && (args.length != 3)) {
+/* 24 */       System.err.println("Usage: " + usage);
+/* 25 */       System.exit(1);
+/*    */     }
+/*    */ 
+/* 33 */     File docDir = new File(args[0]);
+/* 34 */     if ((!docDir.exists()) || (!docDir.canRead())) {
+/* 35 */       System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path");
+/* 36 */       System.exit(1);
+/*    */     }
+/*    */ 
+/* 39 */     File index_dir = new File(args[1]);
+/*    */ 
+/* 41 */     if (args.length == 3) {
+/* 42 */       doTheHarvest(docDir, index_dir, args[2], null);
+/*    */     }
+/*    */     else {
+/* 45 */       String[] options = args[3].split("=");
+/* 46 */       if (options.length != 3) {
+/* 47 */         System.err.println("wrong options:" + args[3]);
+/* 48 */         System.exit(1);
+/*    */       }
+/*    */ 
+/* 51 */       if (options[0].equals("--lang")) {
+/* 52 */         doTheHarvestLanguage(docDir, index_dir, args[2], options[1]);
+/* 53 */       } else if (options[0].equals("--langfile")) {
+/* 54 */         doTheHarvest(docDir, index_dir, args[2], options[1]);
+/*    */       } else {
+/* 56 */         System.err.println("wrong options:" + options[0]);
+/* 57 */         System.exit(1);
+/*    */       }
+/*    */     }
+/*    */   }
+/*    */ 
+/*    */   protected static void doTheHarvestLanguage(File docDir, File index_dir, String mdProviderUrl, String lang)
+/*    */     throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException
+/*    */   {
+/* 65 */     OCRHarvesterThreaded harvester = new OCRHarvesterThreaded(docDir, index_dir, null, mdProviderUrl, lang);
+/* 66 */     harvester.harvestFolder();
+/*    */   }
+/*    */ 
+/*    */   protected static void doTheHarvest(File docDir, File index_dir, String mdProviderUrl, String languageFile)
+/*    */     throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException
+/*    */   {
+/* 72 */     OCRHarvesterThreaded harvester = new OCRHarvesterThreaded(docDir, index_dir, languageFile, mdProviderUrl, null);
+/* 73 */     harvester.harvestFolder();
+/*    */   }
+/*    */ }
+
+/* Location:           /private/tmp/fulltextIndexer.jar
+ * Qualified Name:     de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD
+ * JD-Core Version:    0.5.4
+ */
\ No newline at end of file