comparison libs/commons-math-2.1/docs/apidocs/src-html/org/apache/commons/math/stat/inference/UnknownDistributionChiSquareTest.html @ 13:cbf34dd4d7e6

commons-math-2.1 added
author dwinter
date Tue, 04 Jan 2011 10:02:07 +0100
parents
children
comparison
equal deleted inserted replaced
12:970d26a94fb7 13:cbf34dd4d7e6
1 <HTML>
2 <BODY BGCOLOR="white">
3 <PRE>
4 <FONT color="green">001</FONT> /*<a name="line.1"></a>
5 <FONT color="green">002</FONT> * Licensed to the Apache Software Foundation (ASF) under one or more<a name="line.2"></a>
6 <FONT color="green">003</FONT> * contributor license agreements. See the NOTICE file distributed with<a name="line.3"></a>
7 <FONT color="green">004</FONT> * this work for additional information regarding copyright ownership.<a name="line.4"></a>
8 <FONT color="green">005</FONT> * The ASF licenses this file to You under the Apache License, Version 2.0<a name="line.5"></a>
9 <FONT color="green">006</FONT> * (the "License"); you may not use this file except in compliance with<a name="line.6"></a>
10 <FONT color="green">007</FONT> * the License. You may obtain a copy of the License at<a name="line.7"></a>
11 <FONT color="green">008</FONT> *<a name="line.8"></a>
12 <FONT color="green">009</FONT> * http://www.apache.org/licenses/LICENSE-2.0<a name="line.9"></a>
13 <FONT color="green">010</FONT> *<a name="line.10"></a>
14 <FONT color="green">011</FONT> * Unless required by applicable law or agreed to in writing, software<a name="line.11"></a>
15 <FONT color="green">012</FONT> * distributed under the License is distributed on an "AS IS" BASIS,<a name="line.12"></a>
16 <FONT color="green">013</FONT> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.<a name="line.13"></a>
17 <FONT color="green">014</FONT> * See the License for the specific language governing permissions and<a name="line.14"></a>
18 <FONT color="green">015</FONT> * limitations under the License.<a name="line.15"></a>
19 <FONT color="green">016</FONT> */<a name="line.16"></a>
20 <FONT color="green">017</FONT> package org.apache.commons.math.stat.inference;<a name="line.17"></a>
21 <FONT color="green">018</FONT> <a name="line.18"></a>
22 <FONT color="green">019</FONT> import org.apache.commons.math.MathException;<a name="line.19"></a>
23 <FONT color="green">020</FONT> <a name="line.20"></a>
24 <FONT color="green">021</FONT> /**<a name="line.21"></a>
25 <FONT color="green">022</FONT> * An interface for Chi-Square tests for unknown distributions.<a name="line.22"></a>
26 <FONT color="green">023</FONT> * &lt;p&gt;Two samples tests are used when the distribution is unknown &lt;i&gt;a priori&lt;/i&gt;<a name="line.23"></a>
27 <FONT color="green">024</FONT> * but provided by one sample. We compare the second sample against the first.&lt;/p&gt;<a name="line.24"></a>
28 <FONT color="green">025</FONT> *<a name="line.25"></a>
29 <FONT color="green">026</FONT> * @version $Revision: 811685 $ $Date: 2009-09-05 13:36:48 -0400 (Sat, 05 Sep 2009) $<a name="line.26"></a>
30 <FONT color="green">027</FONT> * @since 1.2<a name="line.27"></a>
31 <FONT color="green">028</FONT> */<a name="line.28"></a>
32 <FONT color="green">029</FONT> public interface UnknownDistributionChiSquareTest extends ChiSquareTest {<a name="line.29"></a>
33 <FONT color="green">030</FONT> <a name="line.30"></a>
34 <FONT color="green">031</FONT> /**<a name="line.31"></a>
35 <FONT color="green">032</FONT> * &lt;p&gt;Computes a<a name="line.32"></a>
36 <FONT color="green">033</FONT> * &lt;a href="http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/chi2samp.htm"&gt;<a name="line.33"></a>
37 <FONT color="green">034</FONT> * Chi-Square two sample test statistic&lt;/a&gt; comparing bin frequency counts<a name="line.34"></a>
38 <FONT color="green">035</FONT> * in &lt;code&gt;observed1&lt;/code&gt; and &lt;code&gt;observed2&lt;/code&gt;. The<a name="line.35"></a>
39 <FONT color="green">036</FONT> * sums of frequency counts in the two samples are not required to be the<a name="line.36"></a>
40 <FONT color="green">037</FONT> * same. The formula used to compute the test statistic is&lt;/p&gt;<a name="line.37"></a>
41 <FONT color="green">038</FONT> * &lt;code&gt;<a name="line.38"></a>
42 <FONT color="green">039</FONT> * &amp;sum;[(K * observed1[i] - observed2[i]/K)&lt;sup&gt;2&lt;/sup&gt; / (observed1[i] + observed2[i])]<a name="line.39"></a>
43 <FONT color="green">040</FONT> * &lt;/code&gt; where<a name="line.40"></a>
44 <FONT color="green">041</FONT> * &lt;br/&gt;&lt;code&gt;K = &amp;sqrt;[&amp;sum(observed2 / &amp;sum;(observed1)]&lt;/code&gt;<a name="line.41"></a>
45 <FONT color="green">042</FONT> * &lt;/p&gt;<a name="line.42"></a>
46 <FONT color="green">043</FONT> * &lt;p&gt;This statistic can be used to perform a Chi-Square test evaluating the null hypothesis that<a name="line.43"></a>
47 <FONT color="green">044</FONT> * both observed counts follow the same distribution.&lt;/p&gt;<a name="line.44"></a>
48 <FONT color="green">045</FONT> * &lt;p&gt;<a name="line.45"></a>
49 <FONT color="green">046</FONT> * &lt;strong&gt;Preconditions&lt;/strong&gt;: &lt;ul&gt;<a name="line.46"></a>
50 <FONT color="green">047</FONT> * &lt;li&gt;Observed counts must be non-negative.<a name="line.47"></a>
51 <FONT color="green">048</FONT> * &lt;/li&gt;<a name="line.48"></a>
52 <FONT color="green">049</FONT> * &lt;li&gt;Observed counts for a specific bin must not both be zero.<a name="line.49"></a>
53 <FONT color="green">050</FONT> * &lt;/li&gt;<a name="line.50"></a>
54 <FONT color="green">051</FONT> * &lt;li&gt;Observed counts for a specific sample must not all be 0.<a name="line.51"></a>
55 <FONT color="green">052</FONT> * &lt;/li&gt;<a name="line.52"></a>
56 <FONT color="green">053</FONT> * &lt;li&gt;The arrays &lt;code&gt;observed1&lt;/code&gt; and &lt;code&gt;observed2&lt;/code&gt; must have the same length and<a name="line.53"></a>
57 <FONT color="green">054</FONT> * their common length must be at least 2.<a name="line.54"></a>
58 <FONT color="green">055</FONT> * &lt;/li&gt;&lt;/ul&gt;&lt;/p&gt;&lt;p&gt;<a name="line.55"></a>
59 <FONT color="green">056</FONT> * If any of the preconditions are not met, an<a name="line.56"></a>
60 <FONT color="green">057</FONT> * &lt;code&gt;IllegalArgumentException&lt;/code&gt; is thrown.&lt;/p&gt;<a name="line.57"></a>
61 <FONT color="green">058</FONT> *<a name="line.58"></a>
62 <FONT color="green">059</FONT> * @param observed1 array of observed frequency counts of the first data set<a name="line.59"></a>
63 <FONT color="green">060</FONT> * @param observed2 array of observed frequency counts of the second data set<a name="line.60"></a>
64 <FONT color="green">061</FONT> * @return chiSquare statistic<a name="line.61"></a>
65 <FONT color="green">062</FONT> * @throws IllegalArgumentException if preconditions are not met<a name="line.62"></a>
66 <FONT color="green">063</FONT> */<a name="line.63"></a>
67 <FONT color="green">064</FONT> double chiSquareDataSetsComparison(long[] observed1, long[] observed2)<a name="line.64"></a>
68 <FONT color="green">065</FONT> throws IllegalArgumentException;<a name="line.65"></a>
69 <FONT color="green">066</FONT> <a name="line.66"></a>
70 <FONT color="green">067</FONT> /**<a name="line.67"></a>
71 <FONT color="green">068</FONT> * &lt;p&gt;Returns the &lt;i&gt;observed significance level&lt;/i&gt;, or &lt;a href=<a name="line.68"></a>
72 <FONT color="green">069</FONT> * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue"&gt;<a name="line.69"></a>
73 <FONT color="green">070</FONT> * p-value&lt;/a&gt;, associated with a Chi-Square two sample test comparing<a name="line.70"></a>
74 <FONT color="green">071</FONT> * bin frequency counts in &lt;code&gt;observed1&lt;/code&gt; and<a name="line.71"></a>
75 <FONT color="green">072</FONT> * &lt;code&gt;observed2&lt;/code&gt;.<a name="line.72"></a>
76 <FONT color="green">073</FONT> * &lt;/p&gt;<a name="line.73"></a>
77 <FONT color="green">074</FONT> * &lt;p&gt;The number returned is the smallest significance level at which one<a name="line.74"></a>
78 <FONT color="green">075</FONT> * can reject the null hypothesis that the observed counts conform to the<a name="line.75"></a>
79 <FONT color="green">076</FONT> * same distribution.<a name="line.76"></a>
80 <FONT color="green">077</FONT> * &lt;/p&gt;<a name="line.77"></a>
81 <FONT color="green">078</FONT> * &lt;p&gt;See {@link #chiSquareDataSetsComparison(long[], long[])} for details<a name="line.78"></a>
82 <FONT color="green">079</FONT> * on the formula used to compute the test statistic. The degrees of<a name="line.79"></a>
83 <FONT color="green">080</FONT> * of freedom used to perform the test is one less than the common length<a name="line.80"></a>
84 <FONT color="green">081</FONT> * of the input observed count arrays.<a name="line.81"></a>
85 <FONT color="green">082</FONT> * &lt;/p&gt;<a name="line.82"></a>
86 <FONT color="green">083</FONT> * &lt;strong&gt;Preconditions&lt;/strong&gt;: &lt;ul&gt;<a name="line.83"></a>
87 <FONT color="green">084</FONT> * &lt;li&gt;Observed counts must be non-negative.<a name="line.84"></a>
88 <FONT color="green">085</FONT> * &lt;/li&gt;<a name="line.85"></a>
89 <FONT color="green">086</FONT> * &lt;li&gt;Observed counts for a specific bin must not both be zero.<a name="line.86"></a>
90 <FONT color="green">087</FONT> * &lt;/li&gt;<a name="line.87"></a>
91 <FONT color="green">088</FONT> * &lt;li&gt;Observed counts for a specific sample must not all be 0.<a name="line.88"></a>
92 <FONT color="green">089</FONT> * &lt;/li&gt;<a name="line.89"></a>
93 <FONT color="green">090</FONT> * &lt;li&gt;The arrays &lt;code&gt;observed1&lt;/code&gt; and &lt;code&gt;observed2&lt;/code&gt; must<a name="line.90"></a>
94 <FONT color="green">091</FONT> * have the same length and<a name="line.91"></a>
95 <FONT color="green">092</FONT> * their common length must be at least 2.<a name="line.92"></a>
96 <FONT color="green">093</FONT> * &lt;/li&gt;&lt;/ul&gt;&lt;p&gt;<a name="line.93"></a>
97 <FONT color="green">094</FONT> * If any of the preconditions are not met, an<a name="line.94"></a>
98 <FONT color="green">095</FONT> * &lt;code&gt;IllegalArgumentException&lt;/code&gt; is thrown.&lt;/p&gt;<a name="line.95"></a>
99 <FONT color="green">096</FONT> *<a name="line.96"></a>
100 <FONT color="green">097</FONT> * @param observed1 array of observed frequency counts of the first data set<a name="line.97"></a>
101 <FONT color="green">098</FONT> * @param observed2 array of observed frequency counts of the second data set<a name="line.98"></a>
102 <FONT color="green">099</FONT> * @return p-value<a name="line.99"></a>
103 <FONT color="green">100</FONT> * @throws IllegalArgumentException if preconditions are not met<a name="line.100"></a>
104 <FONT color="green">101</FONT> * @throws MathException if an error occurs computing the p-value<a name="line.101"></a>
105 <FONT color="green">102</FONT> */<a name="line.102"></a>
106 <FONT color="green">103</FONT> double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)<a name="line.103"></a>
107 <FONT color="green">104</FONT> throws IllegalArgumentException, MathException;<a name="line.104"></a>
108 <FONT color="green">105</FONT> <a name="line.105"></a>
109 <FONT color="green">106</FONT> /**<a name="line.106"></a>
110 <FONT color="green">107</FONT> * &lt;p&gt;Performs a Chi-Square two sample test comparing two binned data<a name="line.107"></a>
111 <FONT color="green">108</FONT> * sets. The test evaluates the null hypothesis that the two lists of<a name="line.108"></a>
112 <FONT color="green">109</FONT> * observed counts conform to the same frequency distribution, with<a name="line.109"></a>
113 <FONT color="green">110</FONT> * significance level &lt;code&gt;alpha&lt;/code&gt;. Returns true iff the null<a name="line.110"></a>
114 <FONT color="green">111</FONT> * hypothesis can be rejected with 100 * (1 - alpha) percent confidence.<a name="line.111"></a>
115 <FONT color="green">112</FONT> * &lt;/p&gt;<a name="line.112"></a>
116 <FONT color="green">113</FONT> * &lt;p&gt;See {@link #chiSquareDataSetsComparison(long[], long[])} for<a name="line.113"></a>
117 <FONT color="green">114</FONT> * details on the formula used to compute the Chisquare statistic used<a name="line.114"></a>
118 <FONT color="green">115</FONT> * in the test. The degrees of of freedom used to perform the test is<a name="line.115"></a>
119 <FONT color="green">116</FONT> * one less than the common length of the input observed count arrays.<a name="line.116"></a>
120 <FONT color="green">117</FONT> * &lt;/p&gt;<a name="line.117"></a>
121 <FONT color="green">118</FONT> * &lt;strong&gt;Preconditions&lt;/strong&gt;: &lt;ul&gt;<a name="line.118"></a>
122 <FONT color="green">119</FONT> * &lt;li&gt;Observed counts must be non-negative.<a name="line.119"></a>
123 <FONT color="green">120</FONT> * &lt;/li&gt;<a name="line.120"></a>
124 <FONT color="green">121</FONT> * &lt;li&gt;Observed counts for a specific bin must not both be zero.<a name="line.121"></a>
125 <FONT color="green">122</FONT> * &lt;/li&gt;<a name="line.122"></a>
126 <FONT color="green">123</FONT> * &lt;li&gt;Observed counts for a specific sample must not all be 0.<a name="line.123"></a>
127 <FONT color="green">124</FONT> * &lt;/li&gt;<a name="line.124"></a>
128 <FONT color="green">125</FONT> * &lt;li&gt;The arrays &lt;code&gt;observed1&lt;/code&gt; and &lt;code&gt;observed2&lt;/code&gt; must<a name="line.125"></a>
129 <FONT color="green">126</FONT> * have the same length and their common length must be at least 2.<a name="line.126"></a>
130 <FONT color="green">127</FONT> * &lt;/li&gt;<a name="line.127"></a>
131 <FONT color="green">128</FONT> * &lt;li&gt; &lt;code&gt; 0 &lt; alpha &lt; 0.5 &lt;/code&gt;<a name="line.128"></a>
132 <FONT color="green">129</FONT> * &lt;/li&gt;&lt;/ul&gt;&lt;p&gt;<a name="line.129"></a>
133 <FONT color="green">130</FONT> * If any of the preconditions are not met, an<a name="line.130"></a>
134 <FONT color="green">131</FONT> * &lt;code&gt;IllegalArgumentException&lt;/code&gt; is thrown.&lt;/p&gt;<a name="line.131"></a>
135 <FONT color="green">132</FONT> *<a name="line.132"></a>
136 <FONT color="green">133</FONT> * @param observed1 array of observed frequency counts of the first data set<a name="line.133"></a>
137 <FONT color="green">134</FONT> * @param observed2 array of observed frequency counts of the second data set<a name="line.134"></a>
138 <FONT color="green">135</FONT> * @param alpha significance level of the test<a name="line.135"></a>
139 <FONT color="green">136</FONT> * @return true iff null hypothesis can be rejected with confidence<a name="line.136"></a>
140 <FONT color="green">137</FONT> * 1 - alpha<a name="line.137"></a>
141 <FONT color="green">138</FONT> * @throws IllegalArgumentException if preconditions are not met<a name="line.138"></a>
142 <FONT color="green">139</FONT> * @throws MathException if an error occurs performing the test<a name="line.139"></a>
143 <FONT color="green">140</FONT> */<a name="line.140"></a>
144 <FONT color="green">141</FONT> boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2, double alpha)<a name="line.141"></a>
145 <FONT color="green">142</FONT> throws IllegalArgumentException, MathException;<a name="line.142"></a>
146 <FONT color="green">143</FONT> <a name="line.143"></a>
147 <FONT color="green">144</FONT> }<a name="line.144"></a>
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208 </PRE>
209 </BODY>
210 </HTML>