Mercurial > hg > fulltextIndexer
comparison src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/HarvesterCLIRDFMD.java.svn-base @ 0:dc7622afcfea default tip
initial
author | dwinter |
---|---|
date | Wed, 03 Nov 2010 12:33:16 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dc7622afcfea |
---|---|
1 package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; | |
2 | |
3 import java.io.File; | |
4 import java.io.IOException; | |
5 | |
6 import org.apache.lucene.index.CorruptIndexException; | |
7 import org.apache.lucene.store.LockObtainFailedException; | |
8 import org.jdom.JDOMException; | |
9 | |
10 import de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded; | |
11 import de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded; | |
12 | |
13 | |
14 | |
15 public class HarvesterCLIRDFMD { | |
16 | |
17 /** Index all text files under a directory. | |
18 * @throws IOException | |
19 * @throws LockObtainFailedException | |
20 * @throws CorruptIndexException | |
21 * @throws InterruptedException */ | |
22 public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { | |
23 String usage = "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF <pathToRDF> <root_directory> <index_dir> <mdProviderURL> --lang=<language>\n" + | |
24 "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF <pathtoRDF> <root_directory> <index_dir> <mdProviderURL> --langfile=<languageFile>\n"; | |
25 | |
26 | |
27 if (args.length != 4 & args.length != 5) { | |
28 System.err.println("Usage: " + usage); | |
29 System.exit(1); | |
30 } | |
31 | |
32 // if (INDEX_DIR.exists()) { | |
33 // System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory, please delete it first"); | |
34 // System.exit(1); | |
35 // } | |
36 | |
37 File docDir = new File(args[1]); | |
38 if ((!docDir.exists()) || (!docDir.canRead())) { | |
39 System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); | |
40 System.exit(1); | |
41 } | |
42 | |
43 final File rdfFile = new File(args[0]); | |
44 if (!rdfFile.exists() || !rdfFile.canRead()) { | |
45 System.out.println("RDFFile directory '" +rdfFile.getAbsolutePath()+ "' does not exist or is not readable, please check the path"); | |
46 System.exit(1); | |
47 } | |
48 | |
49 final File index_dir= new File(args[2]); | |
50 final String mdProviderUrl= args[3]; | |
51 | |
52 if (args.length == 4){ | |
53 doTheHarvest(rdfFile, docDir, index_dir, mdProviderUrl,null); | |
54 } | |
55 else { | |
56 String[] options = args[4].split("="); | |
57 if (options.length != 3) { | |
58 System.err.println("wrong options:" + args[4]); | |
59 System.exit(1); | |
60 } | |
61 | |
62 if (options[0].equals("--lang")) { | |
63 doTheHarvestLanguage(rdfFile,docDir, index_dir, args[3], options[1]); | |
64 } else if (options[0].equals("--langfile")) { | |
65 doTheHarvest(rdfFile,docDir, index_dir, args[3], options[1]); | |
66 } else { | |
67 System.err.println("wrong options:" + options[0]); | |
68 System.exit(1); | |
69 } | |
70 } | |
71 } | |
72 | |
73 private static void doTheHarvest(File rdfFile, File docDir,File index_dir, | |
74 String mdProviderUrl, String languageFile) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { | |
75 HarvesterThreaded harvester = new HarvesterThreaded(docDir,index_dir,languageFile,mdProviderUrl,null); | |
76 try { | |
77 harvester.harvestFromRDF(rdfFile.getAbsolutePath()); | |
78 } catch (JDOMException e) { | |
79 e.printStackTrace(); | |
80 } | |
81 | |
82 } | |
83 | |
84 private static void doTheHarvestLanguage(File rdfFile, File docDir,File index_dir, | |
85 String mdProviderUrl, String lang) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { | |
86 HarvesterThreaded harvester = new HarvesterThreaded(docDir,index_dir,null,mdProviderUrl,lang); | |
87 try { | |
88 harvester.harvestFromRDF(rdfFile.getAbsolutePath()); | |
89 } catch (JDOMException e) { | |
90 // TODO Auto-generated catch block | |
91 e.printStackTrace(); | |
92 } | |
93 | |
94 } | |
95 | |
96 | |
97 | |
98 } |