Mercurial > hg > fulltextIndexer
view src/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/FileDocument.java.svn-base @ 0:dc7622afcfea default tip
initial
author | dwinter |
---|---|
date | Wed, 03 Nov 2010 12:33:16 +0100 |
parents | |
children |
line wrap: on
line source
/* */ package de.mpiwg.dwinter.lucencetools.documents; /* */ /* */ import de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader; /* */ import java.io.File; /* */ import java.io.FileInputStream; /* */ import java.io.IOException; /* */ import java.io.Reader; /* */ import org.apache.lucene.document.DateTools; /* */ import org.apache.lucene.document.DateTools.Resolution; /* */ import org.apache.lucene.document.Document; /* */ import org.apache.lucene.document.Field; /* */ import org.apache.lucene.document.Field.Index; /* */ import org.apache.lucene.document.Field.Store; /* */ /* */ public class FileDocument /* */ { public static String toXML(Document doc){ //String path = doc.get("path"); String cleanedPath = doc.get("cleanedPath"); String textId = doc.get("textId"); String md = doc.get("dcMetaData"); String ret = "<result>"; ret+= "<cleanedPath>"+cleanedPath+"</cleanedPath>"; ret+= "<textId>"+textId.replace("/",":")+"</textId>"; ret+= "<textIdCleaned>"+textId.replace("/","_")+"</textIdCleaned>"; ret+= "<md>"+md+"</md>"; ret+="</result>"; return ret; } /* */ public static Document Document(File f, String cleanedPath,String language, String textId) /* */ throws IOException /* */ { /* 63 */ return Document(f, cleanedPath,language, null, textId); /* */ } /* */ /* */ public static Document Document(File f, String cleanedPath,String language, String dcMetaData, String textId) /* */ throws IOException /* */ { /* 70 */ Document doc = new Document(); /* */ /* 74 */ doc.add(new Field("path", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); /* 74 */ doc.add(new Field("cleanedPath", cleanedPath, Field.Store.YES, Field.Index.NOT_ANALYZED)); /* 75 */ if (dcMetaData == null) /* 76 */ dcMetaData = ""; /* 77 */ doc.add(new Field("dcMetaData", dcMetaData, Field.Store.YES, Field.Index.ANALYZED)); /* */ /* 79 */ if (textId == null) /* 80 */ textId = ""; /* 81 */ doc.add(new Field("textId", textId, Field.Store.YES, Field.Index.NOT_ANALYZED)); /* */ /* 87 */ doc.add( /* 89 */ new Field("modified", /* 88 */ DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE), /* 89 */ Field.Store.YES, Field.Index.NOT_ANALYZED)); /* */ /* 95 */ Reader in = new XMLFilteredReader(new FileInputStream(f), "UTF-8"); /* */ /* 98 */ doc.add(new Field("contents", in)); /* */ /* 105 */ doc.add(new Field("language", language, Field.Store.YES, Field.Index.NOT_ANALYZED)); /* */ /* 107 */ return doc; /* */ } /* */ } /* Location: /private/tmp/fulltextIndexer.jar * Qualified Name: de.mpiwg.dwinter.lucencetools.documents.FileDocument * JD-Core Version: 0.5.4 */