Mercurial > hg > fulltextIndexer
comparison src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/DocHarvesterCLIRDFMD.java.svn-base @ 0:dc7622afcfea default tip
initial
author | dwinter |
---|---|
date | Wed, 03 Nov 2010 12:33:16 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dc7622afcfea |
---|---|
1 package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; | |
2 | |
3 import java.io.File; | |
4 import java.io.IOException; | |
5 | |
6 import org.apache.lucene.index.CorruptIndexException; | |
7 import org.apache.lucene.store.LockObtainFailedException; | |
8 import org.jdom.JDOMException; | |
9 | |
10 import de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded; | |
11 | |
12 | |
13 | |
14 public class DocHarvesterCLIRDFMD { | |
15 | |
16 /** Index all text files under a directory. | |
17 * @throws IOException | |
18 * @throws LockObtainFailedException | |
19 * @throws CorruptIndexException | |
20 * @throws InterruptedException */ | |
21 public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { | |
22 String usage = "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF <pathToRDF> <root_directory> <index_dir> <mdProviderURL> --lang=<language>\n" + | |
23 "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF <pathtoRDF> <root_directory> <index_dir> <mdProviderURL> --langfile=<languageFile>\n"; | |
24 | |
25 | |
26 if (args.length != 4 & args.length != 5) { | |
27 System.err.println("Usage: " + usage); | |
28 System.exit(1); | |
29 } | |
30 | |
31 // if (INDEX_DIR.exists()) { | |
32 // System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory, please delete it first"); | |
33 // System.exit(1); | |
34 // } | |
35 | |
36 File docDir = new File(args[1]); | |
37 if ((!docDir.exists()) || (!docDir.canRead())) { | |
38 System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); | |
39 System.exit(1); | |
40 } | |
41 | |
42 final File rdfFile = new File(args[0]); | |
43 if (!rdfFile.exists() || !rdfFile.canRead()) { | |
44 System.out.println("RDFFile directory '" +rdfFile.getAbsolutePath()+ "' does not exist or is not readable, please check the path"); | |
45 System.exit(1); | |
46 } | |
47 | |
48 final File index_dir= new File(args[2]); | |
49 final String mdProviderUrl= args[3]; | |
50 | |
51 if (args.length == 4){ | |
52 doTheHarvest(rdfFile, docDir, index_dir, mdProviderUrl,null); | |
53 } | |
54 else { | |
55 String[] options = args[4].split("="); | |
56 if (options.length != 3) { | |
57 System.err.println("wrong options:" + args[4]); | |
58 System.exit(1); | |
59 } | |
60 | |
61 if (options[0].equals("--lang")) { | |
62 doTheHarvestLanguage(rdfFile,docDir, index_dir, args[3], options[1]); | |
63 } else if (options[0].equals("--langfile")) { | |
64 doTheHarvest(rdfFile,docDir, index_dir, args[3], options[1]); | |
65 } else { | |
66 System.err.println("wrong options:" + options[0]); | |
67 System.exit(1); | |
68 } | |
69 } | |
70 } | |
71 | |
72 /** | |
73 * @param rdfFile Pfad to the RDF file (ECHO-rdf-Format) | |
74 * @param docDir | |
75 * @param index_dir | |
76 * @param mdProviderUrl | |
77 * @param languageFile | |
78 * @throws CorruptIndexException | |
79 * @throws LockObtainFailedException | |
80 * @throws IOException | |
81 * @throws InterruptedException | |
82 */ | |
83 private static void doTheHarvest(File rdfFile, File docDir,File index_dir, | |
84 String mdProviderUrl, String languageFile) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { | |
85 DocHarvesterThreaded harvester = new DocHarvesterThreaded(docDir,index_dir,languageFile,mdProviderUrl,null); | |
86 try { | |
87 harvester.harvestFromRDF(rdfFile.getAbsolutePath()); | |
88 } catch (JDOMException e) { | |
89 e.printStackTrace(); | |
90 } | |
91 | |
92 } | |
93 | |
94 private static void doTheHarvestLanguage(File rdfFile, File docDir,File index_dir, | |
95 String mdProviderUrl, String lang) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { | |
96 DocHarvesterThreaded harvester = new DocHarvesterThreaded(docDir,index_dir,null,mdProviderUrl,lang); | |
97 try { | |
98 harvester.harvestFromRDF(rdfFile.getAbsolutePath()); | |
99 } catch (JDOMException e) { | |
100 // TODO Auto-generated catch block | |
101 e.printStackTrace(); | |
102 } | |
103 | |
104 } | |
105 | |
106 | |
107 | |
108 } |