0
|
1 package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI;
|
|
2
|
|
3 import java.io.File;
|
|
4 import java.io.IOException;
|
|
5
|
|
6 import org.apache.lucene.index.CorruptIndexException;
|
|
7 import org.apache.lucene.store.LockObtainFailedException;
|
|
8 import org.jdom.JDOMException;
|
|
9
|
|
10 import de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded;
|
|
11 import de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded;
|
|
12
|
|
13
|
|
14
|
|
15 public class HarvesterCLIRDFMD {
|
|
16
|
|
17 /** Index all text files under a directory.
|
|
18 * @throws IOException
|
|
19 * @throws LockObtainFailedException
|
|
20 * @throws CorruptIndexException
|
|
21 * @throws InterruptedException */
|
|
22 public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException {
|
|
23 String usage = "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF <pathToRDF> <root_directory> <index_dir> <mdProviderURL> --lang=<language>\n" +
|
|
24 "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF <pathtoRDF> <root_directory> <index_dir> <mdProviderURL> --langfile=<languageFile>\n";
|
|
25
|
|
26
|
|
27 if (args.length != 4 & args.length != 5) {
|
|
28 System.err.println("Usage: " + usage);
|
|
29 System.exit(1);
|
|
30 }
|
|
31
|
|
32 // if (INDEX_DIR.exists()) {
|
|
33 // System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory, please delete it first");
|
|
34 // System.exit(1);
|
|
35 // }
|
|
36
|
|
37 File docDir = new File(args[1]);
|
|
38 if ((!docDir.exists()) || (!docDir.canRead())) {
|
|
39 System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path");
|
|
40 System.exit(1);
|
|
41 }
|
|
42
|
|
43 final File rdfFile = new File(args[0]);
|
|
44 if (!rdfFile.exists() || !rdfFile.canRead()) {
|
|
45 System.out.println("RDFFile directory '" +rdfFile.getAbsolutePath()+ "' does not exist or is not readable, please check the path");
|
|
46 System.exit(1);
|
|
47 }
|
|
48
|
|
49 final File index_dir= new File(args[2]);
|
|
50 final String mdProviderUrl= args[3];
|
|
51
|
|
52 if (args.length == 4){
|
|
53 doTheHarvest(rdfFile, docDir, index_dir, mdProviderUrl,null);
|
|
54 }
|
|
55 else {
|
|
56 String[] options = args[4].split("=");
|
|
57 if (options.length != 3) {
|
|
58 System.err.println("wrong options:" + args[4]);
|
|
59 System.exit(1);
|
|
60 }
|
|
61
|
|
62 if (options[0].equals("--lang")) {
|
|
63 doTheHarvestLanguage(rdfFile,docDir, index_dir, args[3], options[1]);
|
|
64 } else if (options[0].equals("--langfile")) {
|
|
65 doTheHarvest(rdfFile,docDir, index_dir, args[3], options[1]);
|
|
66 } else {
|
|
67 System.err.println("wrong options:" + options[0]);
|
|
68 System.exit(1);
|
|
69 }
|
|
70 }
|
|
71 }
|
|
72
|
|
73 private static void doTheHarvest(File rdfFile, File docDir,File index_dir,
|
|
74 String mdProviderUrl, String languageFile) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException {
|
|
75 HarvesterThreaded harvester = new HarvesterThreaded(docDir,index_dir,languageFile,mdProviderUrl,null);
|
|
76 try {
|
|
77 harvester.harvestFromRDF(rdfFile.getAbsolutePath());
|
|
78 } catch (JDOMException e) {
|
|
79 e.printStackTrace();
|
|
80 }
|
|
81
|
|
82 }
|
|
83
|
|
84 private static void doTheHarvestLanguage(File rdfFile, File docDir,File index_dir,
|
|
85 String mdProviderUrl, String lang) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException {
|
|
86 HarvesterThreaded harvester = new HarvesterThreaded(docDir,index_dir,null,mdProviderUrl,lang);
|
|
87 try {
|
|
88 harvester.harvestFromRDF(rdfFile.getAbsolutePath());
|
|
89 } catch (JDOMException e) {
|
|
90 // TODO Auto-generated catch block
|
|
91 e.printStackTrace();
|
|
92 }
|
|
93
|
|
94 }
|
|
95
|
|
96
|
|
97
|
|
98 }
|