comparison src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/HarvesterCLIRDFMD.java.svn-base @ 0:dc7622afcfea default tip

initial
author dwinter
date Wed, 03 Nov 2010 12:33:16 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dc7622afcfea
1 package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI;
2
3 import java.io.File;
4 import java.io.IOException;
5
6 import org.apache.lucene.index.CorruptIndexException;
7 import org.apache.lucene.store.LockObtainFailedException;
8 import org.jdom.JDOMException;
9
10 import de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded;
11 import de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded;
12
13
14
15 public class HarvesterCLIRDFMD {
16
17 /** Index all text files under a directory.
18 * @throws IOException
19 * @throws LockObtainFailedException
20 * @throws CorruptIndexException
21 * @throws InterruptedException */
22 public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException {
23 String usage = "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF <pathToRDF> <root_directory> <index_dir> <mdProviderURL> --lang=<language>\n" +
24 "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF <pathtoRDF> <root_directory> <index_dir> <mdProviderURL> --langfile=<languageFile>\n";
25
26
27 if (args.length != 4 & args.length != 5) {
28 System.err.println("Usage: " + usage);
29 System.exit(1);
30 }
31
32 // if (INDEX_DIR.exists()) {
33 // System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory, please delete it first");
34 // System.exit(1);
35 // }
36
37 File docDir = new File(args[1]);
38 if ((!docDir.exists()) || (!docDir.canRead())) {
39 System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path");
40 System.exit(1);
41 }
42
43 final File rdfFile = new File(args[0]);
44 if (!rdfFile.exists() || !rdfFile.canRead()) {
45 System.out.println("RDFFile directory '" +rdfFile.getAbsolutePath()+ "' does not exist or is not readable, please check the path");
46 System.exit(1);
47 }
48
49 final File index_dir= new File(args[2]);
50 final String mdProviderUrl= args[3];
51
52 if (args.length == 4){
53 doTheHarvest(rdfFile, docDir, index_dir, mdProviderUrl,null);
54 }
55 else {
56 String[] options = args[4].split("=");
57 if (options.length != 3) {
58 System.err.println("wrong options:" + args[4]);
59 System.exit(1);
60 }
61
62 if (options[0].equals("--lang")) {
63 doTheHarvestLanguage(rdfFile,docDir, index_dir, args[3], options[1]);
64 } else if (options[0].equals("--langfile")) {
65 doTheHarvest(rdfFile,docDir, index_dir, args[3], options[1]);
66 } else {
67 System.err.println("wrong options:" + options[0]);
68 System.exit(1);
69 }
70 }
71 }
72
73 private static void doTheHarvest(File rdfFile, File docDir,File index_dir,
74 String mdProviderUrl, String languageFile) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException {
75 HarvesterThreaded harvester = new HarvesterThreaded(docDir,index_dir,languageFile,mdProviderUrl,null);
76 try {
77 harvester.harvestFromRDF(rdfFile.getAbsolutePath());
78 } catch (JDOMException e) {
79 e.printStackTrace();
80 }
81
82 }
83
84 private static void doTheHarvestLanguage(File rdfFile, File docDir,File index_dir,
85 String mdProviderUrl, String lang) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException {
86 HarvesterThreaded harvester = new HarvesterThreaded(docDir,index_dir,null,mdProviderUrl,lang);
87 try {
88 harvester.harvestFromRDF(rdfFile.getAbsolutePath());
89 } catch (JDOMException e) {
90 // TODO Auto-generated catch block
91 e.printStackTrace();
92 }
93
94 }
95
96
97
98 }