view src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/DocHarvesterCLIRDFMD.java @ 0:dc7622afcfea default tip

initial
author dwinter
date Wed, 03 Nov 2010 12:33:16 +0100
parents
children
line wrap: on
line source

package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.LockObtainFailedException;
import org.jdom.JDOMException;

import de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded;

 

public class DocHarvesterCLIRDFMD {

	/** Index all text files under a directory. 
	 * @throws IOException 
	 * @throws LockObtainFailedException 
	 * @throws CorruptIndexException 
	 * @throws InterruptedException */
	public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException {
		String usage = "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF <pathToRDF> <root_directory> <index_dir> <mdProviderURL> --lang=<language>\n" +
				"java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF <pathtoRDF>  <root_directory> <index_dir>  <mdProviderURL> --langfile=<languageFile>\n";

				
		if (args.length != 4 & args.length != 5)  {
			System.err.println("Usage: " + usage);
			System.exit(1);
		}

//		if (INDEX_DIR.exists()) {
//			System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory, please delete it first");
//			System.exit(1);
//		}

		 File docDir = new File(args[1]);
		     if ((!docDir.exists()) || (!docDir.canRead())) {
		      System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path");
		        System.exit(1);
		    }
		
		final File rdfFile = new File(args[0]);
		if (!rdfFile.exists() || !rdfFile.canRead()) {
			System.out.println("RDFFile directory '" +rdfFile.getAbsolutePath()+ "' does not exist or is not readable, please check the path");
			System.exit(1);
		}
		
		final File index_dir= new File(args[2]);
		final String mdProviderUrl= args[3];
		
		if (args.length == 4){
			doTheHarvest(rdfFile, docDir, index_dir, mdProviderUrl,null);
		}
		else {
			 String[] options = args[4].split("=");
		     if (options.length != 3) {
			       System.err.println("wrong options:" + args[4]);
			        System.exit(1);
		       }
			
			 if (options[0].equals("--lang")) {
		         doTheHarvestLanguage(rdfFile,docDir, index_dir, args[3], options[1]);
		      } else if (options[0].equals("--langfile")) {
			        doTheHarvest(rdfFile,docDir, index_dir, args[3], options[1]);
			    } else {
			        System.err.println("wrong options:" + options[0]);
		       System.exit(1);
			      }
			    }
			   }
	
	/**
	 * @param rdfFile Pfad to the RDF file (ECHO-rdf-Format)
	 * @param docDir
	 * @param index_dir
	 * @param mdProviderUrl
	 * @param languageFile
	 * @throws CorruptIndexException
	 * @throws LockObtainFailedException
	 * @throws IOException
	 * @throws InterruptedException
	 */
	private static void doTheHarvest(File rdfFile, File docDir,File index_dir,
			String mdProviderUrl, String languageFile) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException {
		DocHarvesterThreaded harvester = new DocHarvesterThreaded(docDir,index_dir,languageFile,mdProviderUrl,null);
		try {
			harvester.harvestFromRDF(rdfFile.getAbsolutePath());
		} catch (JDOMException e) {
			e.printStackTrace();
		}
		
	}

	private static void doTheHarvestLanguage(File rdfFile, File docDir,File index_dir,
			String mdProviderUrl, String lang) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException {
		DocHarvesterThreaded harvester = new DocHarvesterThreaded(docDir,index_dir,null,mdProviderUrl,lang);
		try {
			harvester.harvestFromRDF(rdfFile.getAbsolutePath());
		} catch (JDOMException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
	}

	

}