Mercurial > hg > fulltextIndexer
comparison src/de/mpiwg/dwinter/lucencetools/documents/FileDocument.java @ 0:dc7622afcfea default tip
initial
author | dwinter |
---|---|
date | Wed, 03 Nov 2010 12:33:16 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dc7622afcfea |
---|---|
1 /* */ package de.mpiwg.dwinter.lucencetools.documents; | |
2 /* */ | |
3 /* */ import de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader; | |
4 /* */ import java.io.File; | |
5 /* */ import java.io.FileInputStream; | |
6 /* */ import java.io.IOException; | |
7 /* */ import java.io.Reader; | |
8 /* */ import org.apache.lucene.document.DateTools; | |
9 /* */ import org.apache.lucene.document.DateTools.Resolution; | |
10 /* */ import org.apache.lucene.document.Document; | |
11 /* */ import org.apache.lucene.document.Field; | |
12 /* */ import org.apache.lucene.document.Field.Index; | |
13 /* */ import org.apache.lucene.document.Field.Store; | |
14 /* */ | |
15 /* */ public class FileDocument | |
16 /* */ { | |
17 | |
18 public static String toXML(Document doc){ | |
19 //String path = doc.get("path"); | |
20 String cleanedPath = doc.get("cleanedPath"); | |
21 String textId = doc.get("textId"); | |
22 String md = doc.get("dcMetaData"); | |
23 String ret = "<result>"; | |
24 ret+= "<cleanedPath>"+cleanedPath+"</cleanedPath>"; | |
25 ret+= "<textId>"+textId.replace("/",":")+"</textId>"; | |
26 ret+= "<textIdCleaned>"+textId.replace("/","_")+"</textIdCleaned>"; | |
27 ret+= "<md>"+md+"</md>"; | |
28 ret+="</result>"; | |
29 return ret; | |
30 | |
31 } | |
32 /* */ public static Document Document(File f, String cleanedPath,String language, String textId) | |
33 /* */ throws IOException | |
34 /* */ { | |
35 /* 63 */ return Document(f, cleanedPath,language, null, textId); | |
36 /* */ } | |
37 /* */ | |
38 /* */ public static Document Document(File f, String cleanedPath,String language, String dcMetaData, String textId) | |
39 /* */ throws IOException | |
40 /* */ { | |
41 /* 70 */ Document doc = new Document(); | |
42 /* */ | |
43 /* 74 */ doc.add(new Field("path", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); | |
44 /* 74 */ doc.add(new Field("cleanedPath", cleanedPath, Field.Store.YES, Field.Index.NOT_ANALYZED)); | |
45 /* 75 */ if (dcMetaData == null) | |
46 /* 76 */ dcMetaData = ""; | |
47 /* 77 */ doc.add(new Field("dcMetaData", dcMetaData, Field.Store.YES, Field.Index.ANALYZED)); | |
48 /* */ | |
49 /* 79 */ if (textId == null) | |
50 /* 80 */ textId = ""; | |
51 /* 81 */ doc.add(new Field("textId", textId, Field.Store.YES, Field.Index.NOT_ANALYZED)); | |
52 /* */ | |
53 /* 87 */ doc.add( | |
54 /* 89 */ new Field("modified", | |
55 /* 88 */ DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE), | |
56 /* 89 */ Field.Store.YES, Field.Index.NOT_ANALYZED)); | |
57 /* */ | |
58 /* 95 */ Reader in = new XMLFilteredReader(new FileInputStream(f), "UTF-8"); | |
59 /* */ | |
60 /* 98 */ doc.add(new Field("contents", in)); | |
61 /* */ | |
62 /* 105 */ doc.add(new Field("language", language, Field.Store.YES, Field.Index.NOT_ANALYZED)); | |
63 /* */ | |
64 /* 107 */ return doc; | |
65 /* */ } | |
66 | |
67 | |
68 /* */ } | |
69 | |
70 /* Location: /private/tmp/fulltextIndexer.jar | |
71 * Qualified Name: de.mpiwg.dwinter.lucencetools.documents.FileDocument | |
72 * JD-Core Version: 0.5.4 | |
73 */ |