Mercurial > hg > fulltextIndexer
comparison src/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/OcropusLineDocument.java.svn-base @ 0:dc7622afcfea default tip
initial
author | dwinter |
---|---|
date | Wed, 03 Nov 2010 12:33:16 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dc7622afcfea |
---|---|
1 /* */ package de.mpiwg.dwinter.lucencetools.documents; | |
2 /* */ | |
3 /* */ import de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument; | |
4 import de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine; | |
5 /* */ import java.io.File; | |
6 /* */ import java.io.FileNotFoundException; | |
7 /* */ import java.io.PrintStream; | |
8 /* */ import java.io.UnsupportedEncodingException; | |
9 /* */ import org.apache.lucene.document.DateTools; | |
10 /* */ import org.apache.lucene.document.DateTools.Resolution; | |
11 /* */ import org.apache.lucene.document.Document; | |
12 /* */ import org.apache.lucene.document.Field; | |
13 /* */ import org.apache.lucene.document.Field.Index; | |
14 import org.apache.lucene.document.Field.Store; | |
15 /* */ | |
16 /* */ public class OcropusLineDocument | |
17 /* */ { | |
18 /* */ public static Document Document(File f, String cleanPath,String language, OCRDocument.OCRLine ocrline, String pageDimension, String textId) | |
19 /* */ throws FileNotFoundException, UnsupportedEncodingException | |
20 /* */ { | |
21 /* 65 */ return Document(f, cleanPath,language, ocrline, pageDimension, null, textId); | |
22 /* */ } | |
23 /* */ | |
24 /* */ public static Document Document(File f, String cleanPath,String language, OCRDocument.OCRLine ocrline, String pageDimension, String dcMetaData, String textId) | |
25 /* */ throws FileNotFoundException, UnsupportedEncodingException | |
26 /* */ { | |
27 /* 72 */ Document doc = new Document(); | |
28 /* */ | |
29 /* 76 */ doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); | |
30 doc.add(new Field("cleanedPath", cleanPath, Field.Store.YES, Field.Index.NOT_ANALYZED)); | |
31 /* 77 */ doc.add(new Field("pageDimension", pageDimension, Field.Store.YES, Field.Index.NOT_ANALYZED)); | |
32 /* 78 */ if (dcMetaData == null) | |
33 /* 79 */ dcMetaData = ""; | |
34 /* 80 */ System.out.println("++++++++++++++++++DCMD:" + dcMetaData); | |
35 /* 81 */ doc.add(new Field("dcMetaData", dcMetaData, Field.Store.YES, Field.Index.ANALYZED)); | |
36 /* */ | |
37 /* 90 */ doc.add( | |
38 /* 92 */ new Field("modified", | |
39 /* 91 */ DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE), | |
40 /* 92 */ Field.Store.YES, Field.Index.NOT_ANALYZED)); | |
41 /* */ | |
42 /* 94 */ doc.add(new Field("textId", textId, Field.Store.YES, Field.Index.NOT_ANALYZED)); | |
43 /* */ | |
44 /* 106 */ doc.add(new Field("contents", ocrline.content, Field.Store.NO, Field.Index.ANALYZED)); | |
45 /* 107 */ doc.add(new Field("contentsNormalized", ocrline.content, Field.Store.NO, Field.Index.ANALYZED)); | |
46 /* */ | |
47 /* 111 */ doc.add(new Field("language", language, Field.Store.YES, Field.Index.NOT_ANALYZED)); | |
48 /* */ | |
49 /* 114 */ doc.add(new Field("bbox", ocrline.bbox, Field.Store.YES, Field.Index.NOT_ANALYZED)); | |
50 /* */ | |
51 /* 116 */ doc.add(new Field("lineNumber", ocrline.lineNumber, Field.Store.YES, Field.Index.NOT_ANALYZED)); | |
52 /* 117 */ return doc; | |
53 /* */ } | |
54 /* */ } | |
55 | |
56 /* Location: /private/tmp/fulltextIndexer.jar | |
57 * Qualified Name: de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument | |
58 * JD-Core Version: 0.5.4 | |
59 */ |