comparison src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/DocHarvesterCLIRDFMD.java.svn-base @ 0:dc7622afcfea default tip

initial
author dwinter
date Wed, 03 Nov 2010 12:33:16 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dc7622afcfea
1 package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI;
2
3 import java.io.File;
4 import java.io.IOException;
5
6 import org.apache.lucene.index.CorruptIndexException;
7 import org.apache.lucene.store.LockObtainFailedException;
8 import org.jdom.JDOMException;
9
10 import de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded;
11
12
13
14 public class DocHarvesterCLIRDFMD {
15
16 /** Index all text files under a directory.
17 * @throws IOException
18 * @throws LockObtainFailedException
19 * @throws CorruptIndexException
20 * @throws InterruptedException */
21 public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException {
22 String usage = "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF <pathToRDF> <root_directory> <index_dir> <mdProviderURL> --lang=<language>\n" +
23 "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF <pathtoRDF> <root_directory> <index_dir> <mdProviderURL> --langfile=<languageFile>\n";
24
25
26 if (args.length != 4 & args.length != 5) {
27 System.err.println("Usage: " + usage);
28 System.exit(1);
29 }
30
31 // if (INDEX_DIR.exists()) {
32 // System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory, please delete it first");
33 // System.exit(1);
34 // }
35
36 File docDir = new File(args[1]);
37 if ((!docDir.exists()) || (!docDir.canRead())) {
38 System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path");
39 System.exit(1);
40 }
41
42 final File rdfFile = new File(args[0]);
43 if (!rdfFile.exists() || !rdfFile.canRead()) {
44 System.out.println("RDFFile directory '" +rdfFile.getAbsolutePath()+ "' does not exist or is not readable, please check the path");
45 System.exit(1);
46 }
47
48 final File index_dir= new File(args[2]);
49 final String mdProviderUrl= args[3];
50
51 if (args.length == 4){
52 doTheHarvest(rdfFile, docDir, index_dir, mdProviderUrl,null);
53 }
54 else {
55 String[] options = args[4].split("=");
56 if (options.length != 3) {
57 System.err.println("wrong options:" + args[4]);
58 System.exit(1);
59 }
60
61 if (options[0].equals("--lang")) {
62 doTheHarvestLanguage(rdfFile,docDir, index_dir, args[3], options[1]);
63 } else if (options[0].equals("--langfile")) {
64 doTheHarvest(rdfFile,docDir, index_dir, args[3], options[1]);
65 } else {
66 System.err.println("wrong options:" + options[0]);
67 System.exit(1);
68 }
69 }
70 }
71
72 /**
73 * @param rdfFile Pfad to the RDF file (ECHO-rdf-Format)
74 * @param docDir
75 * @param index_dir
76 * @param mdProviderUrl
77 * @param languageFile
78 * @throws CorruptIndexException
79 * @throws LockObtainFailedException
80 * @throws IOException
81 * @throws InterruptedException
82 */
83 private static void doTheHarvest(File rdfFile, File docDir,File index_dir,
84 String mdProviderUrl, String languageFile) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException {
85 DocHarvesterThreaded harvester = new DocHarvesterThreaded(docDir,index_dir,languageFile,mdProviderUrl,null);
86 try {
87 harvester.harvestFromRDF(rdfFile.getAbsolutePath());
88 } catch (JDOMException e) {
89 e.printStackTrace();
90 }
91
92 }
93
94 private static void doTheHarvestLanguage(File rdfFile, File docDir,File index_dir,
95 String mdProviderUrl, String lang) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException {
96 DocHarvesterThreaded harvester = new DocHarvesterThreaded(docDir,index_dir,null,mdProviderUrl,lang);
97 try {
98 harvester.harvestFromRDF(rdfFile.getAbsolutePath());
99 } catch (JDOMException e) {
100 // TODO Auto-generated catch block
101 e.printStackTrace();
102 }
103
104 }
105
106
107
108 }