Mercurial > hg > fulltextIndexer
view src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLIRDFMD.java @ 0:dc7622afcfea default tip
initial
author | dwinter |
---|---|
date | Wed, 03 Nov 2010 12:33:16 +0100 |
parents | |
children |
line wrap: on
line source
package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; import java.io.File; import java.io.IOException; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.store.LockObtainFailedException; import org.jdom.JDOMException; import de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded; public class OCRHarvesterCLIRDFMD { /** Index all text files under a directory. * @throws IOException * @throws LockObtainFailedException * @throws CorruptIndexException * @throws InterruptedException */ public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { String usage = "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF <pathToRDF> <root_directory> <index_dir> <mdProviderURL> --lang=<language>\n" + "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF <pathtoRDF> <root_directory> <index_dir> <mdProviderURL> --langfile=<languageFile>\n"; if (args.length != 4 & args.length != 5) { System.err.println("Usage: " + usage); System.exit(1); } // if (INDEX_DIR.exists()) { // System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory, please delete it first"); // System.exit(1); // } File docDir = new File(args[1]); if ((!docDir.exists()) || (!docDir.canRead())) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } final File rdfFile = new File(args[0]); if (!rdfFile.exists() || !rdfFile.canRead()) { System.out.println("RDFFile directory '" +rdfFile.getAbsolutePath()+ "' does not exist or is not readable, please check the path"); System.exit(1); } final File index_dir= new File(args[2]); final String mdProviderUrl= args[3]; if (args.length == 4){ doTheHarvest(rdfFile, docDir, index_dir, mdProviderUrl,null); } else { String[] options = args[4].split("="); if (options.length != 3) { System.err.println("wrong options:" + args[4]); System.exit(1); } if (options[0].equals("--lang")) { doTheHarvestLanguage(rdfFile,docDir, index_dir, args[3], options[1]); } else if (options[0].equals("--langfile")) { doTheHarvest(rdfFile,docDir, index_dir, args[3], options[1]); } else { System.err.println("wrong options:" + options[0]); System.exit(1); } } } private static void doTheHarvest(File rdfFile, File docDir,File index_dir, String mdProviderUrl, String languageFile) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { OCRHarvesterThreaded harvester = new OCRHarvesterThreaded(docDir,index_dir,languageFile,mdProviderUrl,null); try { harvester.harvestFromRDF(rdfFile.getAbsolutePath()); } catch (JDOMException e) { e.printStackTrace(); } } private static void doTheHarvestLanguage(File rdfFile, File docDir,File index_dir, String mdProviderUrl, String lang) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { OCRHarvesterThreaded harvester = new OCRHarvesterThreaded(docDir,index_dir,null,mdProviderUrl,lang); try { harvester.harvestFromRDF(rdfFile.getAbsolutePath()); } catch (JDOMException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }