Mercurial > hg > fulltextIndexer
view src/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/OcropusLineDocument.java.svn-base @ 0:dc7622afcfea default tip
initial
author | dwinter |
---|---|
date | Wed, 03 Nov 2010 12:33:16 +0100 |
parents | |
children |
line wrap: on
line source
/* */ package de.mpiwg.dwinter.lucencetools.documents; /* */ /* */ import de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument; import de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine; /* */ import java.io.File; /* */ import java.io.FileNotFoundException; /* */ import java.io.PrintStream; /* */ import java.io.UnsupportedEncodingException; /* */ import org.apache.lucene.document.DateTools; /* */ import org.apache.lucene.document.DateTools.Resolution; /* */ import org.apache.lucene.document.Document; /* */ import org.apache.lucene.document.Field; /* */ import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; /* */ /* */ public class OcropusLineDocument /* */ { /* */ public static Document Document(File f, String cleanPath,String language, OCRDocument.OCRLine ocrline, String pageDimension, String textId) /* */ throws FileNotFoundException, UnsupportedEncodingException /* */ { /* 65 */ return Document(f, cleanPath,language, ocrline, pageDimension, null, textId); /* */ } /* */ /* */ public static Document Document(File f, String cleanPath,String language, OCRDocument.OCRLine ocrline, String pageDimension, String dcMetaData, String textId) /* */ throws FileNotFoundException, UnsupportedEncodingException /* */ { /* 72 */ Document doc = new Document(); /* */ /* 76 */ doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("cleanedPath", cleanPath, Field.Store.YES, Field.Index.NOT_ANALYZED)); /* 77 */ doc.add(new Field("pageDimension", pageDimension, Field.Store.YES, Field.Index.NOT_ANALYZED)); /* 78 */ if (dcMetaData == null) /* 79 */ dcMetaData = ""; /* 80 */ System.out.println("++++++++++++++++++DCMD:" + dcMetaData); /* 81 */ doc.add(new Field("dcMetaData", dcMetaData, Field.Store.YES, Field.Index.ANALYZED)); /* */ /* 90 */ doc.add( /* 92 */ new Field("modified", /* 91 */ DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE), /* 92 */ Field.Store.YES, Field.Index.NOT_ANALYZED)); /* */ /* 94 */ doc.add(new Field("textId", textId, Field.Store.YES, Field.Index.NOT_ANALYZED)); /* */ /* 106 */ doc.add(new Field("contents", ocrline.content, Field.Store.NO, Field.Index.ANALYZED)); /* 107 */ doc.add(new Field("contentsNormalized", ocrline.content, Field.Store.NO, Field.Index.ANALYZED)); /* */ /* 111 */ doc.add(new Field("language", language, Field.Store.YES, Field.Index.NOT_ANALYZED)); /* */ /* 114 */ doc.add(new Field("bbox", ocrline.bbox, Field.Store.YES, Field.Index.NOT_ANALYZED)); /* */ /* 116 */ doc.add(new Field("lineNumber", ocrline.lineNumber, Field.Store.YES, Field.Index.NOT_ANALYZED)); /* 117 */ return doc; /* */ } /* */ } /* Location: /private/tmp/fulltextIndexer.jar * Qualified Name: de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument * JD-Core Version: 0.5.4 */