Mercurial > hg > fulltextIndexer
diff src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/HarvesterCLIRDFMD.java.svn-base @ 0:dc7622afcfea default tip
initial
author | dwinter |
---|---|
date | Wed, 03 Nov 2010 12:33:16 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/HarvesterCLIRDFMD.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,98 @@ +package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; + +import java.io.File; +import java.io.IOException; + +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.store.LockObtainFailedException; +import org.jdom.JDOMException; + +import de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded; +import de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded; + + + +public class HarvesterCLIRDFMD { + + /** Index all text files under a directory. + * @throws IOException + * @throws LockObtainFailedException + * @throws CorruptIndexException + * @throws InterruptedException */ + public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + String usage = "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF <pathToRDF> <root_directory> <index_dir> <mdProviderURL> --lang=<language>\n" + + "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF <pathtoRDF> <root_directory> <index_dir> <mdProviderURL> --langfile=<languageFile>\n"; + + + if (args.length != 4 & args.length != 5) { + System.err.println("Usage: " + usage); + System.exit(1); + } + +// if (INDEX_DIR.exists()) { +// System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory, please delete it first"); +// System.exit(1); +// } + + File docDir = new File(args[1]); + if ((!docDir.exists()) || (!docDir.canRead())) { + System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); + System.exit(1); + } + + final File rdfFile = new File(args[0]); + if (!rdfFile.exists() || !rdfFile.canRead()) { + System.out.println("RDFFile directory '" +rdfFile.getAbsolutePath()+ "' does not exist or is not readable, please check the path"); + System.exit(1); + } + + final File index_dir= new File(args[2]); + final String mdProviderUrl= args[3]; + + if (args.length == 4){ + doTheHarvest(rdfFile, docDir, index_dir, mdProviderUrl,null); + } + else { + String[] options = args[4].split("="); + if (options.length != 3) { + System.err.println("wrong options:" + args[4]); + System.exit(1); + } + + if (options[0].equals("--lang")) { + doTheHarvestLanguage(rdfFile,docDir, index_dir, args[3], options[1]); + } else if (options[0].equals("--langfile")) { + doTheHarvest(rdfFile,docDir, index_dir, args[3], options[1]); + } else { + System.err.println("wrong options:" + options[0]); + System.exit(1); + } + } + } + + private static void doTheHarvest(File rdfFile, File docDir,File index_dir, + String mdProviderUrl, String languageFile) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + HarvesterThreaded harvester = new HarvesterThreaded(docDir,index_dir,languageFile,mdProviderUrl,null); + try { + harvester.harvestFromRDF(rdfFile.getAbsolutePath()); + } catch (JDOMException e) { + e.printStackTrace(); + } + + } + + private static void doTheHarvestLanguage(File rdfFile, File docDir,File index_dir, + String mdProviderUrl, String lang) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + HarvesterThreaded harvester = new HarvesterThreaded(docDir,index_dir,null,mdProviderUrl,lang); + try { + harvester.harvestFromRDF(rdfFile.getAbsolutePath()); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + + +}