annotate conf/schema.xml @ 0:3b37d71af924 default tip

iniitial
author dwinter
date Tue, 26 Feb 2013 15:50:30 +0100
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3b37d71af924 iniitial
dwinter
parents:
diff changeset
1 <?xml version="1.0" encoding="UTF-8" ?>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
2 <!--
3b37d71af924 iniitial
dwinter
parents:
diff changeset
3 Licensed to the Apache Software Foundation (ASF) under one or
3b37d71af924 iniitial
dwinter
parents:
diff changeset
4 more contributor license agreements. See the NOTICE file
3b37d71af924 iniitial
dwinter
parents:
diff changeset
5 distributed with this work for additional information regarding
3b37d71af924 iniitial
dwinter
parents:
diff changeset
6 copyright ownership. The ASF licenses this file to You under the
3b37d71af924 iniitial
dwinter
parents:
diff changeset
7 Apache License, Version 2.0 (the "License"); you may not use
3b37d71af924 iniitial
dwinter
parents:
diff changeset
8 this file except in compliance with the License. You may obtain
3b37d71af924 iniitial
dwinter
parents:
diff changeset
9 a copy of the License at
3b37d71af924 iniitial
dwinter
parents:
diff changeset
10 http://www.apache.org/licenses/LICENSE-2.0 Unless required by
3b37d71af924 iniitial
dwinter
parents:
diff changeset
11 applicable law or agreed to in writing, software distributed
3b37d71af924 iniitial
dwinter
parents:
diff changeset
12 under the License is distributed on an "AS IS" BASIS, WITHOUT
3b37d71af924 iniitial
dwinter
parents:
diff changeset
13 WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
3b37d71af924 iniitial
dwinter
parents:
diff changeset
14 See the License for the specific language governing permissions
3b37d71af924 iniitial
dwinter
parents:
diff changeset
15 and limitations under the License.
3b37d71af924 iniitial
dwinter
parents:
diff changeset
16 -->
3b37d71af924 iniitial
dwinter
parents:
diff changeset
17 <!--
3b37d71af924 iniitial
dwinter
parents:
diff changeset
18 Description: This document contains Solr 3.1 schema definition to
3b37d71af924 iniitial
dwinter
parents:
diff changeset
19 be used with Solr integration currently build into Nutch. See
3b37d71af924 iniitial
dwinter
parents:
diff changeset
20 https://issues.apache.org/jira/browse/NUTCH-442
3b37d71af924 iniitial
dwinter
parents:
diff changeset
21 https://issues.apache.org/jira/browse/NUTCH-699
3b37d71af924 iniitial
dwinter
parents:
diff changeset
22 https://issues.apache.org/jira/browse/NUTCH-994
3b37d71af924 iniitial
dwinter
parents:
diff changeset
23 https://issues.apache.org/jira/browse/NUTCH-997
3b37d71af924 iniitial
dwinter
parents:
diff changeset
24 https://issues.apache.org/jira/browse/NUTCH-1058
3b37d71af924 iniitial
dwinter
parents:
diff changeset
25 https://issues.apache.org/jira/browse/NUTCH-1232
3b37d71af924 iniitial
dwinter
parents:
diff changeset
26 and
3b37d71af924 iniitial
dwinter
parents:
diff changeset
27 http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/
3b37d71af924 iniitial
dwinter
parents:
diff changeset
28 example/solr/conf/schema.xml?view=markup
3b37d71af924 iniitial
dwinter
parents:
diff changeset
29 for more info.
3b37d71af924 iniitial
dwinter
parents:
diff changeset
30 -->
3b37d71af924 iniitial
dwinter
parents:
diff changeset
31 <schema name="nutch" version="1.5">
3b37d71af924 iniitial
dwinter
parents:
diff changeset
32 <types>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
33 <fieldType name="string" class="solr.StrField" sortMissingLast="true"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
34 omitNorms="true"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
35 <fieldType name="long" class="solr.TrieLongField" precisionStep="0"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
36 omitNorms="true" positionIncrementGap="0"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
37 <fieldType name="float" class="solr.TrieFloatField" precisionStep="0"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
38 omitNorms="true" positionIncrementGap="0"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
39 <fieldType name="date" class="solr.TrieDateField" precisionStep="0"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
40 omitNorms="true" positionIncrementGap="0"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
41
3b37d71af924 iniitial
dwinter
parents:
diff changeset
42 <fieldType name="text" class="solr.TextField"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
43 positionIncrementGap="100">
3b37d71af924 iniitial
dwinter
parents:
diff changeset
44 <analyzer>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
45 <tokenizer class="solr.WhitespaceTokenizerFactory"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
46 <filter class="solr.StopFilterFactory"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
47 ignoreCase="true" words="stopwords.txt"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
48 <filter class="solr.WordDelimiterFilterFactory"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
49 generateWordParts="1" generateNumberParts="1"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
50 catenateWords="1" catenateNumbers="1" catenateAll="0"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
51 splitOnCaseChange="1"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
52 <filter class="solr.LowerCaseFilterFactory"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
53 <filter class="solr.EnglishPorterFilterFactory"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
54 protected="protwords.txt"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
55 <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
56 </analyzer>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
57 </fieldType>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
58 <fieldType name="url" class="solr.TextField"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
59 positionIncrementGap="100">
3b37d71af924 iniitial
dwinter
parents:
diff changeset
60 <analyzer>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
61 <tokenizer class="solr.StandardTokenizerFactory"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
62 <filter class="solr.LowerCaseFilterFactory"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
63 <filter class="solr.WordDelimiterFilterFactory"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
64 generateWordParts="1" generateNumberParts="1"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
65 </analyzer>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
66 </fieldType>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
67 </types>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
68 <fields>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
69 <field name="id" type="string" stored="true" indexed="true"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
70
3b37d71af924 iniitial
dwinter
parents:
diff changeset
71 <!-- core fields -->
3b37d71af924 iniitial
dwinter
parents:
diff changeset
72 <field name="segment" type="string" stored="true" indexed="false"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
73 <field name="digest" type="string" stored="true" indexed="false"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
74 <field name="boost" type="float" stored="true" indexed="false"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
75
3b37d71af924 iniitial
dwinter
parents:
diff changeset
76 <!-- fields for index-basic plugin -->
3b37d71af924 iniitial
dwinter
parents:
diff changeset
77 <field name="host" type="string" stored="false" indexed="true"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
78 <field name="url" type="url" stored="true" indexed="true"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
79 required="true"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
80 <field name="content" type="text" stored="false" indexed="true"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
81 <field name="title" type="text" stored="true" indexed="true"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
82 <field name="cache" type="string" stored="true" indexed="false"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
83 <field name="tstamp" type="date" stored="true" indexed="false"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
84
3b37d71af924 iniitial
dwinter
parents:
diff changeset
85 <!-- fields for index-anchor plugin -->
3b37d71af924 iniitial
dwinter
parents:
diff changeset
86 <field name="anchor" type="string" stored="true" indexed="true"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
87 multiValued="true"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
88
3b37d71af924 iniitial
dwinter
parents:
diff changeset
89 <!-- fields for index-more plugin -->
3b37d71af924 iniitial
dwinter
parents:
diff changeset
90 <field name="type" type="string" stored="true" indexed="true"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
91 multiValued="true"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
92 <field name="contentLength" type="long" stored="true"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
93 indexed="false"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
94 <field name="lastModified" type="date" stored="true"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
95 indexed="false"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
96 <field name="date" type="date" stored="true" indexed="true"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
97
3b37d71af924 iniitial
dwinter
parents:
diff changeset
98 <!-- fields for languageidentifier plugin -->
3b37d71af924 iniitial
dwinter
parents:
diff changeset
99 <field name="lang" type="string" stored="true" indexed="true"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
100
3b37d71af924 iniitial
dwinter
parents:
diff changeset
101 <!-- fields for subcollection plugin -->
3b37d71af924 iniitial
dwinter
parents:
diff changeset
102 <field name="subcollection" type="string" stored="true"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
103 indexed="true" multiValued="true"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
104
3b37d71af924 iniitial
dwinter
parents:
diff changeset
105 <!-- fields for feed plugin (tag is also used by microformats-reltag)-->
3b37d71af924 iniitial
dwinter
parents:
diff changeset
106 <field name="author" type="string" stored="true" indexed="true"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
107 <field name="tag" type="string" stored="true" indexed="true" multiValued="true"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
108 <field name="feed" type="string" stored="true" indexed="true"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
109 <field name="publishedDate" type="date" stored="true"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
110 indexed="true"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
111 <field name="updatedDate" type="date" stored="true"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
112 indexed="true"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
113
3b37d71af924 iniitial
dwinter
parents:
diff changeset
114 <!-- fields for creativecommons plugin -->
3b37d71af924 iniitial
dwinter
parents:
diff changeset
115 <field name="cc" type="string" stored="true" indexed="true"
3b37d71af924 iniitial
dwinter
parents:
diff changeset
116 multiValued="true"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
117 </fields>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
118 <uniqueKey>id</uniqueKey>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
119 <defaultSearchField>content</defaultSearchField>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
120 <solrQueryParser defaultOperator="OR"/>
3b37d71af924 iniitial
dwinter
parents:
diff changeset
121 </schema>