annotate src/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/OcropusLineDocument.java.svn-base @ 0:dc7622afcfea default tip

initial
author dwinter
date Wed, 03 Nov 2010 12:33:16 +0100
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dc7622afcfea initial
dwinter
parents:
diff changeset
1 /* */ package de.mpiwg.dwinter.lucencetools.documents;
dc7622afcfea initial
dwinter
parents:
diff changeset
2 /* */
dc7622afcfea initial
dwinter
parents:
diff changeset
3 /* */ import de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument;
dc7622afcfea initial
dwinter
parents:
diff changeset
4 import de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine;
dc7622afcfea initial
dwinter
parents:
diff changeset
5 /* */ import java.io.File;
dc7622afcfea initial
dwinter
parents:
diff changeset
6 /* */ import java.io.FileNotFoundException;
dc7622afcfea initial
dwinter
parents:
diff changeset
7 /* */ import java.io.PrintStream;
dc7622afcfea initial
dwinter
parents:
diff changeset
8 /* */ import java.io.UnsupportedEncodingException;
dc7622afcfea initial
dwinter
parents:
diff changeset
9 /* */ import org.apache.lucene.document.DateTools;
dc7622afcfea initial
dwinter
parents:
diff changeset
10 /* */ import org.apache.lucene.document.DateTools.Resolution;
dc7622afcfea initial
dwinter
parents:
diff changeset
11 /* */ import org.apache.lucene.document.Document;
dc7622afcfea initial
dwinter
parents:
diff changeset
12 /* */ import org.apache.lucene.document.Field;
dc7622afcfea initial
dwinter
parents:
diff changeset
13 /* */ import org.apache.lucene.document.Field.Index;
dc7622afcfea initial
dwinter
parents:
diff changeset
14 import org.apache.lucene.document.Field.Store;
dc7622afcfea initial
dwinter
parents:
diff changeset
15 /* */
dc7622afcfea initial
dwinter
parents:
diff changeset
16 /* */ public class OcropusLineDocument
dc7622afcfea initial
dwinter
parents:
diff changeset
17 /* */ {
dc7622afcfea initial
dwinter
parents:
diff changeset
18 /* */ public static Document Document(File f, String cleanPath,String language, OCRDocument.OCRLine ocrline, String pageDimension, String textId)
dc7622afcfea initial
dwinter
parents:
diff changeset
19 /* */ throws FileNotFoundException, UnsupportedEncodingException
dc7622afcfea initial
dwinter
parents:
diff changeset
20 /* */ {
dc7622afcfea initial
dwinter
parents:
diff changeset
21 /* 65 */ return Document(f, cleanPath,language, ocrline, pageDimension, null, textId);
dc7622afcfea initial
dwinter
parents:
diff changeset
22 /* */ }
dc7622afcfea initial
dwinter
parents:
diff changeset
23 /* */
dc7622afcfea initial
dwinter
parents:
diff changeset
24 /* */ public static Document Document(File f, String cleanPath,String language, OCRDocument.OCRLine ocrline, String pageDimension, String dcMetaData, String textId)
dc7622afcfea initial
dwinter
parents:
diff changeset
25 /* */ throws FileNotFoundException, UnsupportedEncodingException
dc7622afcfea initial
dwinter
parents:
diff changeset
26 /* */ {
dc7622afcfea initial
dwinter
parents:
diff changeset
27 /* 72 */ Document doc = new Document();
dc7622afcfea initial
dwinter
parents:
diff changeset
28 /* */
dc7622afcfea initial
dwinter
parents:
diff changeset
29 /* 76 */ doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));
dc7622afcfea initial
dwinter
parents:
diff changeset
30 doc.add(new Field("cleanedPath", cleanPath, Field.Store.YES, Field.Index.NOT_ANALYZED));
dc7622afcfea initial
dwinter
parents:
diff changeset
31 /* 77 */ doc.add(new Field("pageDimension", pageDimension, Field.Store.YES, Field.Index.NOT_ANALYZED));
dc7622afcfea initial
dwinter
parents:
diff changeset
32 /* 78 */ if (dcMetaData == null)
dc7622afcfea initial
dwinter
parents:
diff changeset
33 /* 79 */ dcMetaData = "";
dc7622afcfea initial
dwinter
parents:
diff changeset
34 /* 80 */ System.out.println("++++++++++++++++++DCMD:" + dcMetaData);
dc7622afcfea initial
dwinter
parents:
diff changeset
35 /* 81 */ doc.add(new Field("dcMetaData", dcMetaData, Field.Store.YES, Field.Index.ANALYZED));
dc7622afcfea initial
dwinter
parents:
diff changeset
36 /* */
dc7622afcfea initial
dwinter
parents:
diff changeset
37 /* 90 */ doc.add(
dc7622afcfea initial
dwinter
parents:
diff changeset
38 /* 92 */ new Field("modified",
dc7622afcfea initial
dwinter
parents:
diff changeset
39 /* 91 */ DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),
dc7622afcfea initial
dwinter
parents:
diff changeset
40 /* 92 */ Field.Store.YES, Field.Index.NOT_ANALYZED));
dc7622afcfea initial
dwinter
parents:
diff changeset
41 /* */
dc7622afcfea initial
dwinter
parents:
diff changeset
42 /* 94 */ doc.add(new Field("textId", textId, Field.Store.YES, Field.Index.NOT_ANALYZED));
dc7622afcfea initial
dwinter
parents:
diff changeset
43 /* */
dc7622afcfea initial
dwinter
parents:
diff changeset
44 /* 106 */ doc.add(new Field("contents", ocrline.content, Field.Store.NO, Field.Index.ANALYZED));
dc7622afcfea initial
dwinter
parents:
diff changeset
45 /* 107 */ doc.add(new Field("contentsNormalized", ocrline.content, Field.Store.NO, Field.Index.ANALYZED));
dc7622afcfea initial
dwinter
parents:
diff changeset
46 /* */
dc7622afcfea initial
dwinter
parents:
diff changeset
47 /* 111 */ doc.add(new Field("language", language, Field.Store.YES, Field.Index.NOT_ANALYZED));
dc7622afcfea initial
dwinter
parents:
diff changeset
48 /* */
dc7622afcfea initial
dwinter
parents:
diff changeset
49 /* 114 */ doc.add(new Field("bbox", ocrline.bbox, Field.Store.YES, Field.Index.NOT_ANALYZED));
dc7622afcfea initial
dwinter
parents:
diff changeset
50 /* */
dc7622afcfea initial
dwinter
parents:
diff changeset
51 /* 116 */ doc.add(new Field("lineNumber", ocrline.lineNumber, Field.Store.YES, Field.Index.NOT_ANALYZED));
dc7622afcfea initial
dwinter
parents:
diff changeset
52 /* 117 */ return doc;
dc7622afcfea initial
dwinter
parents:
diff changeset
53 /* */ }
dc7622afcfea initial
dwinter
parents:
diff changeset
54 /* */ }
dc7622afcfea initial
dwinter
parents:
diff changeset
55
dc7622afcfea initial
dwinter
parents:
diff changeset
56 /* Location: /private/tmp/fulltextIndexer.jar
dc7622afcfea initial
dwinter
parents:
diff changeset
57 * Qualified Name: de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument
dc7622afcfea initial
dwinter
parents:
diff changeset
58 * JD-Core Version: 0.5.4
dc7622afcfea initial
dwinter
parents:
diff changeset
59 */