0
|
1 package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI;
|
|
2
|
|
3 import java.io.File;
|
|
4 import java.io.IOException;
|
|
5
|
|
6 import org.apache.lucene.index.CorruptIndexException;
|
|
7 import org.apache.lucene.store.LockObtainFailedException;
|
|
8 import org.jdom.JDOMException;
|
|
9
|
|
10 import de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded;
|
|
11
|
|
12
|
|
13
|
|
14 public class DocHarvesterCLIRDFMD {
|
|
15
|
|
16 /** Index all text files under a directory.
|
|
17 * @throws IOException
|
|
18 * @throws LockObtainFailedException
|
|
19 * @throws CorruptIndexException
|
|
20 * @throws InterruptedException */
|
|
21 public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException {
|
|
22 String usage = "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF <pathToRDF> <root_directory> <index_dir> <mdProviderURL> --lang=<language>\n" +
|
|
23 "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF <pathtoRDF> <root_directory> <index_dir> <mdProviderURL> --langfile=<languageFile>\n";
|
|
24
|
|
25
|
|
26 if (args.length != 4 & args.length != 5) {
|
|
27 System.err.println("Usage: " + usage);
|
|
28 System.exit(1);
|
|
29 }
|
|
30
|
|
31 // if (INDEX_DIR.exists()) {
|
|
32 // System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory, please delete it first");
|
|
33 // System.exit(1);
|
|
34 // }
|
|
35
|
|
36 File docDir = new File(args[1]);
|
|
37 if ((!docDir.exists()) || (!docDir.canRead())) {
|
|
38 System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path");
|
|
39 System.exit(1);
|
|
40 }
|
|
41
|
|
42 final File rdfFile = new File(args[0]);
|
|
43 if (!rdfFile.exists() || !rdfFile.canRead()) {
|
|
44 System.out.println("RDFFile directory '" +rdfFile.getAbsolutePath()+ "' does not exist or is not readable, please check the path");
|
|
45 System.exit(1);
|
|
46 }
|
|
47
|
|
48 final File index_dir= new File(args[2]);
|
|
49 final String mdProviderUrl= args[3];
|
|
50
|
|
51 if (args.length == 4){
|
|
52 doTheHarvest(rdfFile, docDir, index_dir, mdProviderUrl,null);
|
|
53 }
|
|
54 else {
|
|
55 String[] options = args[4].split("=");
|
|
56 if (options.length != 3) {
|
|
57 System.err.println("wrong options:" + args[4]);
|
|
58 System.exit(1);
|
|
59 }
|
|
60
|
|
61 if (options[0].equals("--lang")) {
|
|
62 doTheHarvestLanguage(rdfFile,docDir, index_dir, args[3], options[1]);
|
|
63 } else if (options[0].equals("--langfile")) {
|
|
64 doTheHarvest(rdfFile,docDir, index_dir, args[3], options[1]);
|
|
65 } else {
|
|
66 System.err.println("wrong options:" + options[0]);
|
|
67 System.exit(1);
|
|
68 }
|
|
69 }
|
|
70 }
|
|
71
|
|
72 /**
|
|
73 * @param rdfFile Pfad to the RDF file (ECHO-rdf-Format)
|
|
74 * @param docDir
|
|
75 * @param index_dir
|
|
76 * @param mdProviderUrl
|
|
77 * @param languageFile
|
|
78 * @throws CorruptIndexException
|
|
79 * @throws LockObtainFailedException
|
|
80 * @throws IOException
|
|
81 * @throws InterruptedException
|
|
82 */
|
|
83 private static void doTheHarvest(File rdfFile, File docDir,File index_dir,
|
|
84 String mdProviderUrl, String languageFile) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException {
|
|
85 DocHarvesterThreaded harvester = new DocHarvesterThreaded(docDir,index_dir,languageFile,mdProviderUrl,null);
|
|
86 try {
|
|
87 harvester.harvestFromRDF(rdfFile.getAbsolutePath());
|
|
88 } catch (JDOMException e) {
|
|
89 e.printStackTrace();
|
|
90 }
|
|
91
|
|
92 }
|
|
93
|
|
94 private static void doTheHarvestLanguage(File rdfFile, File docDir,File index_dir,
|
|
95 String mdProviderUrl, String lang) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException {
|
|
96 DocHarvesterThreaded harvester = new DocHarvesterThreaded(docDir,index_dir,null,mdProviderUrl,lang);
|
|
97 try {
|
|
98 harvester.harvestFromRDF(rdfFile.getAbsolutePath());
|
|
99 } catch (JDOMException e) {
|
|
100 // TODO Auto-generated catch block
|
|
101 e.printStackTrace();
|
|
102 }
|
|
103
|
|
104 }
|
|
105
|
|
106
|
|
107
|
|
108 }
|