0
|
1 /* */ package de.mpiwg.dwinter.lucencetools.documents;
|
|
2 /* */
|
|
3 /* */ import de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader;
|
|
4 /* */ import java.io.File;
|
|
5 /* */ import java.io.FileInputStream;
|
|
6 /* */ import java.io.IOException;
|
|
7 /* */ import java.io.Reader;
|
|
8 /* */ import org.apache.lucene.document.DateTools;
|
|
9 /* */ import org.apache.lucene.document.DateTools.Resolution;
|
|
10 /* */ import org.apache.lucene.document.Document;
|
|
11 /* */ import org.apache.lucene.document.Field;
|
|
12 /* */ import org.apache.lucene.document.Field.Index;
|
|
13 /* */ import org.apache.lucene.document.Field.Store;
|
|
14 /* */
|
|
15 /* */ public class FileDocument
|
|
16 /* */ {
|
|
17
|
|
18 public static String toXML(Document doc){
|
|
19 //String path = doc.get("path");
|
|
20 String cleanedPath = doc.get("cleanedPath");
|
|
21 String textId = doc.get("textId");
|
|
22 String md = doc.get("dcMetaData");
|
|
23 String ret = "<result>";
|
|
24 ret+= "<cleanedPath>"+cleanedPath+"</cleanedPath>";
|
|
25 ret+= "<textId>"+textId.replace("/",":")+"</textId>";
|
|
26 ret+= "<textIdCleaned>"+textId.replace("/","_")+"</textIdCleaned>";
|
|
27 ret+= "<md>"+md+"</md>";
|
|
28 ret+="</result>";
|
|
29 return ret;
|
|
30
|
|
31 }
|
|
32 /* */ public static Document Document(File f, String cleanedPath,String language, String textId)
|
|
33 /* */ throws IOException
|
|
34 /* */ {
|
|
35 /* 63 */ return Document(f, cleanedPath,language, null, textId);
|
|
36 /* */ }
|
|
37 /* */
|
|
38 /* */ public static Document Document(File f, String cleanedPath,String language, String dcMetaData, String textId)
|
|
39 /* */ throws IOException
|
|
40 /* */ {
|
|
41 /* 70 */ Document doc = new Document();
|
|
42 /* */
|
|
43 /* 74 */ doc.add(new Field("path", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));
|
|
44 /* 74 */ doc.add(new Field("cleanedPath", cleanedPath, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
|
45 /* 75 */ if (dcMetaData == null)
|
|
46 /* 76 */ dcMetaData = "";
|
|
47 /* 77 */ doc.add(new Field("dcMetaData", dcMetaData, Field.Store.YES, Field.Index.ANALYZED));
|
|
48 /* */
|
|
49 /* 79 */ if (textId == null)
|
|
50 /* 80 */ textId = "";
|
|
51 /* 81 */ doc.add(new Field("textId", textId, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
|
52 /* */
|
|
53 /* 87 */ doc.add(
|
|
54 /* 89 */ new Field("modified",
|
|
55 /* 88 */ DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),
|
|
56 /* 89 */ Field.Store.YES, Field.Index.NOT_ANALYZED));
|
|
57 /* */
|
|
58 /* 95 */ Reader in = new XMLFilteredReader(new FileInputStream(f), "UTF-8");
|
|
59 /* */
|
|
60 /* 98 */ doc.add(new Field("contents", in));
|
|
61 /* */
|
|
62 /* 105 */ doc.add(new Field("language", language, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
|
63 /* */
|
|
64 /* 107 */ return doc;
|
|
65 /* */ }
|
|
66
|
|
67
|
|
68 /* */ }
|
|
69
|
|
70 /* Location: /private/tmp/fulltextIndexer.jar
|
|
71 * Qualified Name: de.mpiwg.dwinter.lucencetools.documents.FileDocument
|
|
72 * JD-Core Version: 0.5.4
|
|
73 */ |