comparison libs/commons-math-2.1/docs/apidocs/src-html/org/apache/commons/math/stat/regression/SimpleRegression.html @ 13:cbf34dd4d7e6

commons-math-2.1 added
author dwinter
date Tue, 04 Jan 2011 10:02:07 +0100
parents
children
comparison
equal deleted inserted replaced
12:970d26a94fb7 13:cbf34dd4d7e6
1 <HTML>
2 <BODY BGCOLOR="white">
3 <PRE>
4 <FONT color="green">001</FONT> /*<a name="line.1"></a>
5 <FONT color="green">002</FONT> * Licensed to the Apache Software Foundation (ASF) under one or more<a name="line.2"></a>
6 <FONT color="green">003</FONT> * contributor license agreements. See the NOTICE file distributed with<a name="line.3"></a>
7 <FONT color="green">004</FONT> * this work for additional information regarding copyright ownership.<a name="line.4"></a>
8 <FONT color="green">005</FONT> * The ASF licenses this file to You under the Apache License, Version 2.0<a name="line.5"></a>
9 <FONT color="green">006</FONT> * (the "License"); you may not use this file except in compliance with<a name="line.6"></a>
10 <FONT color="green">007</FONT> * the License. You may obtain a copy of the License at<a name="line.7"></a>
11 <FONT color="green">008</FONT> *<a name="line.8"></a>
12 <FONT color="green">009</FONT> * http://www.apache.org/licenses/LICENSE-2.0<a name="line.9"></a>
13 <FONT color="green">010</FONT> *<a name="line.10"></a>
14 <FONT color="green">011</FONT> * Unless required by applicable law or agreed to in writing, software<a name="line.11"></a>
15 <FONT color="green">012</FONT> * distributed under the License is distributed on an "AS IS" BASIS,<a name="line.12"></a>
16 <FONT color="green">013</FONT> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.<a name="line.13"></a>
17 <FONT color="green">014</FONT> * See the License for the specific language governing permissions and<a name="line.14"></a>
18 <FONT color="green">015</FONT> * limitations under the License.<a name="line.15"></a>
19 <FONT color="green">016</FONT> */<a name="line.16"></a>
20 <FONT color="green">017</FONT> <a name="line.17"></a>
21 <FONT color="green">018</FONT> package org.apache.commons.math.stat.regression;<a name="line.18"></a>
22 <FONT color="green">019</FONT> import java.io.Serializable;<a name="line.19"></a>
23 <FONT color="green">020</FONT> <a name="line.20"></a>
24 <FONT color="green">021</FONT> import org.apache.commons.math.MathException;<a name="line.21"></a>
25 <FONT color="green">022</FONT> import org.apache.commons.math.MathRuntimeException;<a name="line.22"></a>
26 <FONT color="green">023</FONT> import org.apache.commons.math.distribution.TDistribution;<a name="line.23"></a>
27 <FONT color="green">024</FONT> import org.apache.commons.math.distribution.TDistributionImpl;<a name="line.24"></a>
28 <FONT color="green">025</FONT> <a name="line.25"></a>
29 <FONT color="green">026</FONT> /**<a name="line.26"></a>
30 <FONT color="green">027</FONT> * Estimates an ordinary least squares regression model<a name="line.27"></a>
31 <FONT color="green">028</FONT> * with one independent variable.<a name="line.28"></a>
32 <FONT color="green">029</FONT> * &lt;p&gt;<a name="line.29"></a>
33 <FONT color="green">030</FONT> * &lt;code&gt; y = intercept + slope * x &lt;/code&gt;&lt;/p&gt;<a name="line.30"></a>
34 <FONT color="green">031</FONT> * &lt;p&gt;<a name="line.31"></a>
35 <FONT color="green">032</FONT> * Standard errors for &lt;code&gt;intercept&lt;/code&gt; and &lt;code&gt;slope&lt;/code&gt; are<a name="line.32"></a>
36 <FONT color="green">033</FONT> * available as well as ANOVA, r-square and Pearson's r statistics.&lt;/p&gt;<a name="line.33"></a>
37 <FONT color="green">034</FONT> * &lt;p&gt;<a name="line.34"></a>
38 <FONT color="green">035</FONT> * Observations (x,y pairs) can be added to the model one at a time or they<a name="line.35"></a>
39 <FONT color="green">036</FONT> * can be provided in a 2-dimensional array. The observations are not stored<a name="line.36"></a>
40 <FONT color="green">037</FONT> * in memory, so there is no limit to the number of observations that can be<a name="line.37"></a>
41 <FONT color="green">038</FONT> * added to the model.&lt;/p&gt;<a name="line.38"></a>
42 <FONT color="green">039</FONT> * &lt;p&gt;<a name="line.39"></a>
43 <FONT color="green">040</FONT> * &lt;strong&gt;Usage Notes&lt;/strong&gt;: &lt;ul&gt;<a name="line.40"></a>
44 <FONT color="green">041</FONT> * &lt;li&gt; When there are fewer than two observations in the model, or when<a name="line.41"></a>
45 <FONT color="green">042</FONT> * there is no variation in the x values (i.e. all x values are the same)<a name="line.42"></a>
46 <FONT color="green">043</FONT> * all statistics return &lt;code&gt;NaN&lt;/code&gt;. At least two observations with<a name="line.43"></a>
47 <FONT color="green">044</FONT> * different x coordinates are requred to estimate a bivariate regression<a name="line.44"></a>
48 <FONT color="green">045</FONT> * model.<a name="line.45"></a>
49 <FONT color="green">046</FONT> * &lt;/li&gt;<a name="line.46"></a>
50 <FONT color="green">047</FONT> * &lt;li&gt; getters for the statistics always compute values based on the current<a name="line.47"></a>
51 <FONT color="green">048</FONT> * set of observations -- i.e., you can get statistics, then add more data<a name="line.48"></a>
52 <FONT color="green">049</FONT> * and get updated statistics without using a new instance. There is no<a name="line.49"></a>
53 <FONT color="green">050</FONT> * "compute" method that updates all statistics. Each of the getters performs<a name="line.50"></a>
54 <FONT color="green">051</FONT> * the necessary computations to return the requested statistic.&lt;/li&gt;<a name="line.51"></a>
55 <FONT color="green">052</FONT> * &lt;/ul&gt;&lt;/p&gt;<a name="line.52"></a>
56 <FONT color="green">053</FONT> *<a name="line.53"></a>
57 <FONT color="green">054</FONT> * @version $Revision: 811685 $ $Date: 2009-09-05 13:36:48 -0400 (Sat, 05 Sep 2009) $<a name="line.54"></a>
58 <FONT color="green">055</FONT> */<a name="line.55"></a>
59 <FONT color="green">056</FONT> public class SimpleRegression implements Serializable {<a name="line.56"></a>
60 <FONT color="green">057</FONT> <a name="line.57"></a>
61 <FONT color="green">058</FONT> /** Serializable version identifier */<a name="line.58"></a>
62 <FONT color="green">059</FONT> private static final long serialVersionUID = -3004689053607543335L;<a name="line.59"></a>
63 <FONT color="green">060</FONT> <a name="line.60"></a>
64 <FONT color="green">061</FONT> /** the distribution used to compute inference statistics. */<a name="line.61"></a>
65 <FONT color="green">062</FONT> private TDistribution distribution;<a name="line.62"></a>
66 <FONT color="green">063</FONT> <a name="line.63"></a>
67 <FONT color="green">064</FONT> /** sum of x values */<a name="line.64"></a>
68 <FONT color="green">065</FONT> private double sumX = 0d;<a name="line.65"></a>
69 <FONT color="green">066</FONT> <a name="line.66"></a>
70 <FONT color="green">067</FONT> /** total variation in x (sum of squared deviations from xbar) */<a name="line.67"></a>
71 <FONT color="green">068</FONT> private double sumXX = 0d;<a name="line.68"></a>
72 <FONT color="green">069</FONT> <a name="line.69"></a>
73 <FONT color="green">070</FONT> /** sum of y values */<a name="line.70"></a>
74 <FONT color="green">071</FONT> private double sumY = 0d;<a name="line.71"></a>
75 <FONT color="green">072</FONT> <a name="line.72"></a>
76 <FONT color="green">073</FONT> /** total variation in y (sum of squared deviations from ybar) */<a name="line.73"></a>
77 <FONT color="green">074</FONT> private double sumYY = 0d;<a name="line.74"></a>
78 <FONT color="green">075</FONT> <a name="line.75"></a>
79 <FONT color="green">076</FONT> /** sum of products */<a name="line.76"></a>
80 <FONT color="green">077</FONT> private double sumXY = 0d;<a name="line.77"></a>
81 <FONT color="green">078</FONT> <a name="line.78"></a>
82 <FONT color="green">079</FONT> /** number of observations */<a name="line.79"></a>
83 <FONT color="green">080</FONT> private long n = 0;<a name="line.80"></a>
84 <FONT color="green">081</FONT> <a name="line.81"></a>
85 <FONT color="green">082</FONT> /** mean of accumulated x values, used in updating formulas */<a name="line.82"></a>
86 <FONT color="green">083</FONT> private double xbar = 0;<a name="line.83"></a>
87 <FONT color="green">084</FONT> <a name="line.84"></a>
88 <FONT color="green">085</FONT> /** mean of accumulated y values, used in updating formulas */<a name="line.85"></a>
89 <FONT color="green">086</FONT> private double ybar = 0;<a name="line.86"></a>
90 <FONT color="green">087</FONT> <a name="line.87"></a>
91 <FONT color="green">088</FONT> // ---------------------Public methods--------------------------------------<a name="line.88"></a>
92 <FONT color="green">089</FONT> <a name="line.89"></a>
93 <FONT color="green">090</FONT> /**<a name="line.90"></a>
94 <FONT color="green">091</FONT> * Create an empty SimpleRegression instance<a name="line.91"></a>
95 <FONT color="green">092</FONT> */<a name="line.92"></a>
96 <FONT color="green">093</FONT> public SimpleRegression() {<a name="line.93"></a>
97 <FONT color="green">094</FONT> this(new TDistributionImpl(1.0));<a name="line.94"></a>
98 <FONT color="green">095</FONT> }<a name="line.95"></a>
99 <FONT color="green">096</FONT> <a name="line.96"></a>
100 <FONT color="green">097</FONT> /**<a name="line.97"></a>
101 <FONT color="green">098</FONT> * Create an empty SimpleRegression using the given distribution object to<a name="line.98"></a>
102 <FONT color="green">099</FONT> * compute inference statistics.<a name="line.99"></a>
103 <FONT color="green">100</FONT> * @param t the distribution used to compute inference statistics.<a name="line.100"></a>
104 <FONT color="green">101</FONT> * @since 1.2<a name="line.101"></a>
105 <FONT color="green">102</FONT> */<a name="line.102"></a>
106 <FONT color="green">103</FONT> public SimpleRegression(TDistribution t) {<a name="line.103"></a>
107 <FONT color="green">104</FONT> super();<a name="line.104"></a>
108 <FONT color="green">105</FONT> setDistribution(t);<a name="line.105"></a>
109 <FONT color="green">106</FONT> }<a name="line.106"></a>
110 <FONT color="green">107</FONT> <a name="line.107"></a>
111 <FONT color="green">108</FONT> /**<a name="line.108"></a>
112 <FONT color="green">109</FONT> * Adds the observation (x,y) to the regression data set.<a name="line.109"></a>
113 <FONT color="green">110</FONT> * &lt;p&gt;<a name="line.110"></a>
114 <FONT color="green">111</FONT> * Uses updating formulas for means and sums of squares defined in<a name="line.111"></a>
115 <FONT color="green">112</FONT> * "Algorithms for Computing the Sample Variance: Analysis and<a name="line.112"></a>
116 <FONT color="green">113</FONT> * Recommendations", Chan, T.F., Golub, G.H., and LeVeque, R.J.<a name="line.113"></a>
117 <FONT color="green">114</FONT> * 1983, American Statistician, vol. 37, pp. 242-247, referenced in<a name="line.114"></a>
118 <FONT color="green">115</FONT> * Weisberg, S. "Applied Linear Regression". 2nd Ed. 1985.&lt;/p&gt;<a name="line.115"></a>
119 <FONT color="green">116</FONT> *<a name="line.116"></a>
120 <FONT color="green">117</FONT> *<a name="line.117"></a>
121 <FONT color="green">118</FONT> * @param x independent variable value<a name="line.118"></a>
122 <FONT color="green">119</FONT> * @param y dependent variable value<a name="line.119"></a>
123 <FONT color="green">120</FONT> */<a name="line.120"></a>
124 <FONT color="green">121</FONT> public void addData(double x, double y) {<a name="line.121"></a>
125 <FONT color="green">122</FONT> if (n == 0) {<a name="line.122"></a>
126 <FONT color="green">123</FONT> xbar = x;<a name="line.123"></a>
127 <FONT color="green">124</FONT> ybar = y;<a name="line.124"></a>
128 <FONT color="green">125</FONT> } else {<a name="line.125"></a>
129 <FONT color="green">126</FONT> double dx = x - xbar;<a name="line.126"></a>
130 <FONT color="green">127</FONT> double dy = y - ybar;<a name="line.127"></a>
131 <FONT color="green">128</FONT> sumXX += dx * dx * (double) n / (n + 1d);<a name="line.128"></a>
132 <FONT color="green">129</FONT> sumYY += dy * dy * (double) n / (n + 1d);<a name="line.129"></a>
133 <FONT color="green">130</FONT> sumXY += dx * dy * (double) n / (n + 1d);<a name="line.130"></a>
134 <FONT color="green">131</FONT> xbar += dx / (n + 1.0);<a name="line.131"></a>
135 <FONT color="green">132</FONT> ybar += dy / (n + 1.0);<a name="line.132"></a>
136 <FONT color="green">133</FONT> }<a name="line.133"></a>
137 <FONT color="green">134</FONT> sumX += x;<a name="line.134"></a>
138 <FONT color="green">135</FONT> sumY += y;<a name="line.135"></a>
139 <FONT color="green">136</FONT> n++;<a name="line.136"></a>
140 <FONT color="green">137</FONT> <a name="line.137"></a>
141 <FONT color="green">138</FONT> if (n &gt; 2) {<a name="line.138"></a>
142 <FONT color="green">139</FONT> distribution.setDegreesOfFreedom(n - 2);<a name="line.139"></a>
143 <FONT color="green">140</FONT> }<a name="line.140"></a>
144 <FONT color="green">141</FONT> }<a name="line.141"></a>
145 <FONT color="green">142</FONT> <a name="line.142"></a>
146 <FONT color="green">143</FONT> <a name="line.143"></a>
147 <FONT color="green">144</FONT> /**<a name="line.144"></a>
148 <FONT color="green">145</FONT> * Removes the observation (x,y) from the regression data set.<a name="line.145"></a>
149 <FONT color="green">146</FONT> * &lt;p&gt;<a name="line.146"></a>
150 <FONT color="green">147</FONT> * Mirrors the addData method. This method permits the use of<a name="line.147"></a>
151 <FONT color="green">148</FONT> * SimpleRegression instances in streaming mode where the regression<a name="line.148"></a>
152 <FONT color="green">149</FONT> * is applied to a sliding "window" of observations, however the caller is<a name="line.149"></a>
153 <FONT color="green">150</FONT> * responsible for maintaining the set of observations in the window.&lt;/p&gt;<a name="line.150"></a>
154 <FONT color="green">151</FONT> *<a name="line.151"></a>
155 <FONT color="green">152</FONT> * The method has no effect if there are no points of data (i.e. n=0)<a name="line.152"></a>
156 <FONT color="green">153</FONT> *<a name="line.153"></a>
157 <FONT color="green">154</FONT> * @param x independent variable value<a name="line.154"></a>
158 <FONT color="green">155</FONT> * @param y dependent variable value<a name="line.155"></a>
159 <FONT color="green">156</FONT> */<a name="line.156"></a>
160 <FONT color="green">157</FONT> public void removeData(double x, double y) {<a name="line.157"></a>
161 <FONT color="green">158</FONT> if (n &gt; 0) {<a name="line.158"></a>
162 <FONT color="green">159</FONT> double dx = x - xbar;<a name="line.159"></a>
163 <FONT color="green">160</FONT> double dy = y - ybar;<a name="line.160"></a>
164 <FONT color="green">161</FONT> sumXX -= dx * dx * (double) n / (n - 1d);<a name="line.161"></a>
165 <FONT color="green">162</FONT> sumYY -= dy * dy * (double) n / (n - 1d);<a name="line.162"></a>
166 <FONT color="green">163</FONT> sumXY -= dx * dy * (double) n / (n - 1d);<a name="line.163"></a>
167 <FONT color="green">164</FONT> xbar -= dx / (n - 1.0);<a name="line.164"></a>
168 <FONT color="green">165</FONT> ybar -= dy / (n - 1.0);<a name="line.165"></a>
169 <FONT color="green">166</FONT> sumX -= x;<a name="line.166"></a>
170 <FONT color="green">167</FONT> sumY -= y;<a name="line.167"></a>
171 <FONT color="green">168</FONT> n--;<a name="line.168"></a>
172 <FONT color="green">169</FONT> <a name="line.169"></a>
173 <FONT color="green">170</FONT> if (n &gt; 2) {<a name="line.170"></a>
174 <FONT color="green">171</FONT> distribution.setDegreesOfFreedom(n - 2);<a name="line.171"></a>
175 <FONT color="green">172</FONT> }<a name="line.172"></a>
176 <FONT color="green">173</FONT> }<a name="line.173"></a>
177 <FONT color="green">174</FONT> }<a name="line.174"></a>
178 <FONT color="green">175</FONT> <a name="line.175"></a>
179 <FONT color="green">176</FONT> /**<a name="line.176"></a>
180 <FONT color="green">177</FONT> * Adds the observations represented by the elements in<a name="line.177"></a>
181 <FONT color="green">178</FONT> * &lt;code&gt;data&lt;/code&gt;.<a name="line.178"></a>
182 <FONT color="green">179</FONT> * &lt;p&gt;<a name="line.179"></a>
183 <FONT color="green">180</FONT> * &lt;code&gt;(data[0][0],data[0][1])&lt;/code&gt; will be the first observation, then<a name="line.180"></a>
184 <FONT color="green">181</FONT> * &lt;code&gt;(data[1][0],data[1][1])&lt;/code&gt;, etc.&lt;/p&gt;<a name="line.181"></a>
185 <FONT color="green">182</FONT> * &lt;p&gt;<a name="line.182"></a>
186 <FONT color="green">183</FONT> * This method does not replace data that has already been added. The<a name="line.183"></a>
187 <FONT color="green">184</FONT> * observations represented by &lt;code&gt;data&lt;/code&gt; are added to the existing<a name="line.184"></a>
188 <FONT color="green">185</FONT> * dataset.&lt;/p&gt;<a name="line.185"></a>
189 <FONT color="green">186</FONT> * &lt;p&gt;<a name="line.186"></a>
190 <FONT color="green">187</FONT> * To replace all data, use &lt;code&gt;clear()&lt;/code&gt; before adding the new<a name="line.187"></a>
191 <FONT color="green">188</FONT> * data.&lt;/p&gt;<a name="line.188"></a>
192 <FONT color="green">189</FONT> *<a name="line.189"></a>
193 <FONT color="green">190</FONT> * @param data array of observations to be added<a name="line.190"></a>
194 <FONT color="green">191</FONT> */<a name="line.191"></a>
195 <FONT color="green">192</FONT> public void addData(double[][] data) {<a name="line.192"></a>
196 <FONT color="green">193</FONT> for (int i = 0; i &lt; data.length; i++) {<a name="line.193"></a>
197 <FONT color="green">194</FONT> addData(data[i][0], data[i][1]);<a name="line.194"></a>
198 <FONT color="green">195</FONT> }<a name="line.195"></a>
199 <FONT color="green">196</FONT> }<a name="line.196"></a>
200 <FONT color="green">197</FONT> <a name="line.197"></a>
201 <FONT color="green">198</FONT> <a name="line.198"></a>
202 <FONT color="green">199</FONT> /**<a name="line.199"></a>
203 <FONT color="green">200</FONT> * Removes observations represented by the elements in &lt;code&gt;data&lt;/code&gt;.<a name="line.200"></a>
204 <FONT color="green">201</FONT> * &lt;p&gt;<a name="line.201"></a>
205 <FONT color="green">202</FONT> * If the array is larger than the current n, only the first n elements are<a name="line.202"></a>
206 <FONT color="green">203</FONT> * processed. This method permits the use of SimpleRegression instances in<a name="line.203"></a>
207 <FONT color="green">204</FONT> * streaming mode where the regression is applied to a sliding "window" of<a name="line.204"></a>
208 <FONT color="green">205</FONT> * observations, however the caller is responsible for maintaining the set<a name="line.205"></a>
209 <FONT color="green">206</FONT> * of observations in the window.&lt;/p&gt;<a name="line.206"></a>
210 <FONT color="green">207</FONT> * &lt;p&gt;<a name="line.207"></a>
211 <FONT color="green">208</FONT> * To remove all data, use &lt;code&gt;clear()&lt;/code&gt;.&lt;/p&gt;<a name="line.208"></a>
212 <FONT color="green">209</FONT> *<a name="line.209"></a>
213 <FONT color="green">210</FONT> * @param data array of observations to be removed<a name="line.210"></a>
214 <FONT color="green">211</FONT> */<a name="line.211"></a>
215 <FONT color="green">212</FONT> public void removeData(double[][] data) {<a name="line.212"></a>
216 <FONT color="green">213</FONT> for (int i = 0; i &lt; data.length &amp;&amp; n &gt; 0; i++) {<a name="line.213"></a>
217 <FONT color="green">214</FONT> removeData(data[i][0], data[i][1]);<a name="line.214"></a>
218 <FONT color="green">215</FONT> }<a name="line.215"></a>
219 <FONT color="green">216</FONT> }<a name="line.216"></a>
220 <FONT color="green">217</FONT> <a name="line.217"></a>
221 <FONT color="green">218</FONT> /**<a name="line.218"></a>
222 <FONT color="green">219</FONT> * Clears all data from the model.<a name="line.219"></a>
223 <FONT color="green">220</FONT> */<a name="line.220"></a>
224 <FONT color="green">221</FONT> public void clear() {<a name="line.221"></a>
225 <FONT color="green">222</FONT> sumX = 0d;<a name="line.222"></a>
226 <FONT color="green">223</FONT> sumXX = 0d;<a name="line.223"></a>
227 <FONT color="green">224</FONT> sumY = 0d;<a name="line.224"></a>
228 <FONT color="green">225</FONT> sumYY = 0d;<a name="line.225"></a>
229 <FONT color="green">226</FONT> sumXY = 0d;<a name="line.226"></a>
230 <FONT color="green">227</FONT> n = 0;<a name="line.227"></a>
231 <FONT color="green">228</FONT> }<a name="line.228"></a>
232 <FONT color="green">229</FONT> <a name="line.229"></a>
233 <FONT color="green">230</FONT> /**<a name="line.230"></a>
234 <FONT color="green">231</FONT> * Returns the number of observations that have been added to the model.<a name="line.231"></a>
235 <FONT color="green">232</FONT> *<a name="line.232"></a>
236 <FONT color="green">233</FONT> * @return n number of observations that have been added.<a name="line.233"></a>
237 <FONT color="green">234</FONT> */<a name="line.234"></a>
238 <FONT color="green">235</FONT> public long getN() {<a name="line.235"></a>
239 <FONT color="green">236</FONT> return n;<a name="line.236"></a>
240 <FONT color="green">237</FONT> }<a name="line.237"></a>
241 <FONT color="green">238</FONT> <a name="line.238"></a>
242 <FONT color="green">239</FONT> /**<a name="line.239"></a>
243 <FONT color="green">240</FONT> * Returns the "predicted" &lt;code&gt;y&lt;/code&gt; value associated with the<a name="line.240"></a>
244 <FONT color="green">241</FONT> * supplied &lt;code&gt;x&lt;/code&gt; value, based on the data that has been<a name="line.241"></a>
245 <FONT color="green">242</FONT> * added to the model when this method is activated.<a name="line.242"></a>
246 <FONT color="green">243</FONT> * &lt;p&gt;<a name="line.243"></a>
247 <FONT color="green">244</FONT> * &lt;code&gt; predict(x) = intercept + slope * x &lt;/code&gt;&lt;/p&gt;<a name="line.244"></a>
248 <FONT color="green">245</FONT> * &lt;p&gt;<a name="line.245"></a>
249 <FONT color="green">246</FONT> * &lt;strong&gt;Preconditions&lt;/strong&gt;: &lt;ul&gt;<a name="line.246"></a>
250 <FONT color="green">247</FONT> * &lt;li&gt;At least two observations (with at least two different x values)<a name="line.247"></a>
251 <FONT color="green">248</FONT> * must have been added before invoking this method. If this method is<a name="line.248"></a>
252 <FONT color="green">249</FONT> * invoked before a model can be estimated, &lt;code&gt;Double,NaN&lt;/code&gt; is<a name="line.249"></a>
253 <FONT color="green">250</FONT> * returned.<a name="line.250"></a>
254 <FONT color="green">251</FONT> * &lt;/li&gt;&lt;/ul&gt;&lt;/p&gt;<a name="line.251"></a>
255 <FONT color="green">252</FONT> *<a name="line.252"></a>
256 <FONT color="green">253</FONT> * @param x input &lt;code&gt;x&lt;/code&gt; value<a name="line.253"></a>
257 <FONT color="green">254</FONT> * @return predicted &lt;code&gt;y&lt;/code&gt; value<a name="line.254"></a>
258 <FONT color="green">255</FONT> */<a name="line.255"></a>
259 <FONT color="green">256</FONT> public double predict(double x) {<a name="line.256"></a>
260 <FONT color="green">257</FONT> double b1 = getSlope();<a name="line.257"></a>
261 <FONT color="green">258</FONT> return getIntercept(b1) + b1 * x;<a name="line.258"></a>
262 <FONT color="green">259</FONT> }<a name="line.259"></a>
263 <FONT color="green">260</FONT> <a name="line.260"></a>
264 <FONT color="green">261</FONT> /**<a name="line.261"></a>
265 <FONT color="green">262</FONT> * Returns the intercept of the estimated regression line.<a name="line.262"></a>
266 <FONT color="green">263</FONT> * &lt;p&gt;<a name="line.263"></a>
267 <FONT color="green">264</FONT> * The least squares estimate of the intercept is computed using the<a name="line.264"></a>
268 <FONT color="green">265</FONT> * &lt;a href="http://www.xycoon.com/estimation4.htm"&gt;normal equations&lt;/a&gt;.<a name="line.265"></a>
269 <FONT color="green">266</FONT> * The intercept is sometimes denoted b0.&lt;/p&gt;<a name="line.266"></a>
270 <FONT color="green">267</FONT> * &lt;p&gt;<a name="line.267"></a>
271 <FONT color="green">268</FONT> * &lt;strong&gt;Preconditions&lt;/strong&gt;: &lt;ul&gt;<a name="line.268"></a>
272 <FONT color="green">269</FONT> * &lt;li&gt;At least two observations (with at least two different x values)<a name="line.269"></a>
273 <FONT color="green">270</FONT> * must have been added before invoking this method. If this method is<a name="line.270"></a>
274 <FONT color="green">271</FONT> * invoked before a model can be estimated, &lt;code&gt;Double,NaN&lt;/code&gt; is<a name="line.271"></a>
275 <FONT color="green">272</FONT> * returned.<a name="line.272"></a>
276 <FONT color="green">273</FONT> * &lt;/li&gt;&lt;/ul&gt;&lt;/p&gt;<a name="line.273"></a>
277 <FONT color="green">274</FONT> *<a name="line.274"></a>
278 <FONT color="green">275</FONT> * @return the intercept of the regression line<a name="line.275"></a>
279 <FONT color="green">276</FONT> */<a name="line.276"></a>
280 <FONT color="green">277</FONT> public double getIntercept() {<a name="line.277"></a>
281 <FONT color="green">278</FONT> return getIntercept(getSlope());<a name="line.278"></a>
282 <FONT color="green">279</FONT> }<a name="line.279"></a>
283 <FONT color="green">280</FONT> <a name="line.280"></a>
284 <FONT color="green">281</FONT> /**<a name="line.281"></a>
285 <FONT color="green">282</FONT> * Returns the slope of the estimated regression line.<a name="line.282"></a>
286 <FONT color="green">283</FONT> * &lt;p&gt;<a name="line.283"></a>
287 <FONT color="green">284</FONT> * The least squares estimate of the slope is computed using the<a name="line.284"></a>
288 <FONT color="green">285</FONT> * &lt;a href="http://www.xycoon.com/estimation4.htm"&gt;normal equations&lt;/a&gt;.<a name="line.285"></a>
289 <FONT color="green">286</FONT> * The slope is sometimes denoted b1.&lt;/p&gt;<a name="line.286"></a>
290 <FONT color="green">287</FONT> * &lt;p&gt;<a name="line.287"></a>
291 <FONT color="green">288</FONT> * &lt;strong&gt;Preconditions&lt;/strong&gt;: &lt;ul&gt;<a name="line.288"></a>
292 <FONT color="green">289</FONT> * &lt;li&gt;At least two observations (with at least two different x values)<a name="line.289"></a>
293 <FONT color="green">290</FONT> * must have been added before invoking this method. If this method is<a name="line.290"></a>
294 <FONT color="green">291</FONT> * invoked before a model can be estimated, &lt;code&gt;Double.NaN&lt;/code&gt; is<a name="line.291"></a>
295 <FONT color="green">292</FONT> * returned.<a name="line.292"></a>
296 <FONT color="green">293</FONT> * &lt;/li&gt;&lt;/ul&gt;&lt;/p&gt;<a name="line.293"></a>
297 <FONT color="green">294</FONT> *<a name="line.294"></a>
298 <FONT color="green">295</FONT> * @return the slope of the regression line<a name="line.295"></a>
299 <FONT color="green">296</FONT> */<a name="line.296"></a>
300 <FONT color="green">297</FONT> public double getSlope() {<a name="line.297"></a>
301 <FONT color="green">298</FONT> if (n &lt; 2) {<a name="line.298"></a>
302 <FONT color="green">299</FONT> return Double.NaN; //not enough data<a name="line.299"></a>
303 <FONT color="green">300</FONT> }<a name="line.300"></a>
304 <FONT color="green">301</FONT> if (Math.abs(sumXX) &lt; 10 * Double.MIN_VALUE) {<a name="line.301"></a>
305 <FONT color="green">302</FONT> return Double.NaN; //not enough variation in x<a name="line.302"></a>
306 <FONT color="green">303</FONT> }<a name="line.303"></a>
307 <FONT color="green">304</FONT> return sumXY / sumXX;<a name="line.304"></a>
308 <FONT color="green">305</FONT> }<a name="line.305"></a>
309 <FONT color="green">306</FONT> <a name="line.306"></a>
310 <FONT color="green">307</FONT> /**<a name="line.307"></a>
311 <FONT color="green">308</FONT> * Returns the &lt;a href="http://www.xycoon.com/SumOfSquares.htm"&gt;<a name="line.308"></a>
312 <FONT color="green">309</FONT> * sum of squared errors&lt;/a&gt; (SSE) associated with the regression<a name="line.309"></a>
313 <FONT color="green">310</FONT> * model.<a name="line.310"></a>
314 <FONT color="green">311</FONT> * &lt;p&gt;<a name="line.311"></a>
315 <FONT color="green">312</FONT> * The sum is computed using the computational formula&lt;/p&gt;<a name="line.312"></a>
316 <FONT color="green">313</FONT> * &lt;p&gt;<a name="line.313"></a>
317 <FONT color="green">314</FONT> * &lt;code&gt;SSE = SYY - (SXY * SXY / SXX)&lt;/code&gt;&lt;/p&gt;<a name="line.314"></a>
318 <FONT color="green">315</FONT> * &lt;p&gt;<a name="line.315"></a>
319 <FONT color="green">316</FONT> * where &lt;code&gt;SYY&lt;/code&gt; is the sum of the squared deviations of the y<a name="line.316"></a>
320 <FONT color="green">317</FONT> * values about their mean, &lt;code&gt;SXX&lt;/code&gt; is similarly defined and<a name="line.317"></a>
321 <FONT color="green">318</FONT> * &lt;code&gt;SXY&lt;/code&gt; is the sum of the products of x and y mean deviations.<a name="line.318"></a>
322 <FONT color="green">319</FONT> * &lt;/p&gt;&lt;p&gt;<a name="line.319"></a>
323 <FONT color="green">320</FONT> * The sums are accumulated using the updating algorithm referenced in<a name="line.320"></a>
324 <FONT color="green">321</FONT> * {@link #addData}.&lt;/p&gt;<a name="line.321"></a>
325 <FONT color="green">322</FONT> * &lt;p&gt;<a name="line.322"></a>
326 <FONT color="green">323</FONT> * The return value is constrained to be non-negative - i.e., if due to<a name="line.323"></a>
327 <FONT color="green">324</FONT> * rounding errors the computational formula returns a negative result,<a name="line.324"></a>
328 <FONT color="green">325</FONT> * 0 is returned.&lt;/p&gt;<a name="line.325"></a>
329 <FONT color="green">326</FONT> * &lt;p&gt;<a name="line.326"></a>
330 <FONT color="green">327</FONT> * &lt;strong&gt;Preconditions&lt;/strong&gt;: &lt;ul&gt;<a name="line.327"></a>
331 <FONT color="green">328</FONT> * &lt;li&gt;At least two observations (with at least two different x values)<a name="line.328"></a>
332 <FONT color="green">329</FONT> * must have been added before invoking this method. If this method is<a name="line.329"></a>
333 <FONT color="green">330</FONT> * invoked before a model can be estimated, &lt;code&gt;Double,NaN&lt;/code&gt; is<a name="line.330"></a>
334 <FONT color="green">331</FONT> * returned.<a name="line.331"></a>
335 <FONT color="green">332</FONT> * &lt;/li&gt;&lt;/ul&gt;&lt;/p&gt;<a name="line.332"></a>
336 <FONT color="green">333</FONT> *<a name="line.333"></a>
337 <FONT color="green">334</FONT> * @return sum of squared errors associated with the regression model<a name="line.334"></a>
338 <FONT color="green">335</FONT> */<a name="line.335"></a>
339 <FONT color="green">336</FONT> public double getSumSquaredErrors() {<a name="line.336"></a>
340 <FONT color="green">337</FONT> return Math.max(0d, sumYY - sumXY * sumXY / sumXX);<a name="line.337"></a>
341 <FONT color="green">338</FONT> }<a name="line.338"></a>
342 <FONT color="green">339</FONT> <a name="line.339"></a>
343 <FONT color="green">340</FONT> /**<a name="line.340"></a>
344 <FONT color="green">341</FONT> * Returns the sum of squared deviations of the y values about their mean.<a name="line.341"></a>
345 <FONT color="green">342</FONT> * &lt;p&gt;<a name="line.342"></a>
346 <FONT color="green">343</FONT> * This is defined as SSTO<a name="line.343"></a>
347 <FONT color="green">344</FONT> * &lt;a href="http://www.xycoon.com/SumOfSquares.htm"&gt;here&lt;/a&gt;.&lt;/p&gt;<a name="line.344"></a>
348 <FONT color="green">345</FONT> * &lt;p&gt;<a name="line.345"></a>
349 <FONT color="green">346</FONT> * If &lt;code&gt;n &lt; 2&lt;/code&gt;, this returns &lt;code&gt;Double.NaN&lt;/code&gt;.&lt;/p&gt;<a name="line.346"></a>
350 <FONT color="green">347</FONT> *<a name="line.347"></a>
351 <FONT color="green">348</FONT> * @return sum of squared deviations of y values<a name="line.348"></a>
352 <FONT color="green">349</FONT> */<a name="line.349"></a>
353 <FONT color="green">350</FONT> public double getTotalSumSquares() {<a name="line.350"></a>
354 <FONT color="green">351</FONT> if (n &lt; 2) {<a name="line.351"></a>
355 <FONT color="green">352</FONT> return Double.NaN;<a name="line.352"></a>
356 <FONT color="green">353</FONT> }<a name="line.353"></a>
357 <FONT color="green">354</FONT> return sumYY;<a name="line.354"></a>
358 <FONT color="green">355</FONT> }<a name="line.355"></a>
359 <FONT color="green">356</FONT> <a name="line.356"></a>
360 <FONT color="green">357</FONT> /**<a name="line.357"></a>
361 <FONT color="green">358</FONT> * Returns the sum of squared deviations of the x values about their mean.<a name="line.358"></a>
362 <FONT color="green">359</FONT> *<a name="line.359"></a>
363 <FONT color="green">360</FONT> * If &lt;code&gt;n &lt; 2&lt;/code&gt;, this returns &lt;code&gt;Double.NaN&lt;/code&gt;.&lt;/p&gt;<a name="line.360"></a>
364 <FONT color="green">361</FONT> *<a name="line.361"></a>
365 <FONT color="green">362</FONT> * @return sum of squared deviations of x values<a name="line.362"></a>
366 <FONT color="green">363</FONT> */<a name="line.363"></a>
367 <FONT color="green">364</FONT> public double getXSumSquares() {<a name="line.364"></a>
368 <FONT color="green">365</FONT> if (n &lt; 2) {<a name="line.365"></a>
369 <FONT color="green">366</FONT> return Double.NaN;<a name="line.366"></a>
370 <FONT color="green">367</FONT> }<a name="line.367"></a>
371 <FONT color="green">368</FONT> return sumXX;<a name="line.368"></a>
372 <FONT color="green">369</FONT> }<a name="line.369"></a>
373 <FONT color="green">370</FONT> <a name="line.370"></a>
374 <FONT color="green">371</FONT> /**<a name="line.371"></a>
375 <FONT color="green">372</FONT> * Returns the sum of crossproducts, x&lt;sub&gt;i&lt;/sub&gt;*y&lt;sub&gt;i&lt;/sub&gt;.<a name="line.372"></a>
376 <FONT color="green">373</FONT> *<a name="line.373"></a>
377 <FONT color="green">374</FONT> * @return sum of cross products<a name="line.374"></a>
378 <FONT color="green">375</FONT> */<a name="line.375"></a>
379 <FONT color="green">376</FONT> public double getSumOfCrossProducts() {<a name="line.376"></a>
380 <FONT color="green">377</FONT> return sumXY;<a name="line.377"></a>
381 <FONT color="green">378</FONT> }<a name="line.378"></a>
382 <FONT color="green">379</FONT> <a name="line.379"></a>
383 <FONT color="green">380</FONT> /**<a name="line.380"></a>
384 <FONT color="green">381</FONT> * Returns the sum of squared deviations of the predicted y values about<a name="line.381"></a>
385 <FONT color="green">382</FONT> * their mean (which equals the mean of y).<a name="line.382"></a>
386 <FONT color="green">383</FONT> * &lt;p&gt;<a name="line.383"></a>
387 <FONT color="green">384</FONT> * This is usually abbreviated SSR or SSM. It is defined as SSM<a name="line.384"></a>
388 <FONT color="green">385</FONT> * &lt;a href="http://www.xycoon.com/SumOfSquares.htm"&gt;here&lt;/a&gt;&lt;/p&gt;<a name="line.385"></a>
389 <FONT color="green">386</FONT> * &lt;p&gt;<a name="line.386"></a>
390 <FONT color="green">387</FONT> * &lt;strong&gt;Preconditions&lt;/strong&gt;: &lt;ul&gt;<a name="line.387"></a>
391 <FONT color="green">388</FONT> * &lt;li&gt;At least two observations (with at least two different x values)<a name="line.388"></a>
392 <FONT color="green">389</FONT> * must have been added before invoking this method. If this method is<a name="line.389"></a>
393 <FONT color="green">390</FONT> * invoked before a model can be estimated, &lt;code&gt;Double.NaN&lt;/code&gt; is<a name="line.390"></a>
394 <FONT color="green">391</FONT> * returned.<a name="line.391"></a>
395 <FONT color="green">392</FONT> * &lt;/li&gt;&lt;/ul&gt;&lt;/p&gt;<a name="line.392"></a>
396 <FONT color="green">393</FONT> *<a name="line.393"></a>
397 <FONT color="green">394</FONT> * @return sum of squared deviations of predicted y values<a name="line.394"></a>
398 <FONT color="green">395</FONT> */<a name="line.395"></a>
399 <FONT color="green">396</FONT> public double getRegressionSumSquares() {<a name="line.396"></a>
400 <FONT color="green">397</FONT> return getRegressionSumSquares(getSlope());<a name="line.397"></a>
401 <FONT color="green">398</FONT> }<a name="line.398"></a>
402 <FONT color="green">399</FONT> <a name="line.399"></a>
403 <FONT color="green">400</FONT> /**<a name="line.400"></a>
404 <FONT color="green">401</FONT> * Returns the sum of squared errors divided by the degrees of freedom,<a name="line.401"></a>
405 <FONT color="green">402</FONT> * usually abbreviated MSE.<a name="line.402"></a>
406 <FONT color="green">403</FONT> * &lt;p&gt;<a name="line.403"></a>
407 <FONT color="green">404</FONT> * If there are fewer than &lt;strong&gt;three&lt;/strong&gt; data pairs in the model,<a name="line.404"></a>
408 <FONT color="green">405</FONT> * or if there is no variation in &lt;code&gt;x&lt;/code&gt;, this returns<a name="line.405"></a>
409 <FONT color="green">406</FONT> * &lt;code&gt;Double.NaN&lt;/code&gt;.&lt;/p&gt;<a name="line.406"></a>
410 <FONT color="green">407</FONT> *<a name="line.407"></a>
411 <FONT color="green">408</FONT> * @return sum of squared deviations of y values<a name="line.408"></a>
412 <FONT color="green">409</FONT> */<a name="line.409"></a>
413 <FONT color="green">410</FONT> public double getMeanSquareError() {<a name="line.410"></a>
414 <FONT color="green">411</FONT> if (n &lt; 3) {<a name="line.411"></a>
415 <FONT color="green">412</FONT> return Double.NaN;<a name="line.412"></a>
416 <FONT color="green">413</FONT> }<a name="line.413"></a>
417 <FONT color="green">414</FONT> return getSumSquaredErrors() / (n - 2);<a name="line.414"></a>
418 <FONT color="green">415</FONT> }<a name="line.415"></a>
419 <FONT color="green">416</FONT> <a name="line.416"></a>
420 <FONT color="green">417</FONT> /**<a name="line.417"></a>
421 <FONT color="green">418</FONT> * Returns &lt;a href="http://mathworld.wolfram.com/CorrelationCoefficient.html"&gt;<a name="line.418"></a>
422 <FONT color="green">419</FONT> * Pearson's product moment correlation coefficient&lt;/a&gt;,<a name="line.419"></a>
423 <FONT color="green">420</FONT> * usually denoted r.<a name="line.420"></a>
424 <FONT color="green">421</FONT> * &lt;p&gt;<a name="line.421"></a>
425 <FONT color="green">422</FONT> * &lt;strong&gt;Preconditions&lt;/strong&gt;: &lt;ul&gt;<a name="line.422"></a>
426 <FONT color="green">423</FONT> * &lt;li&gt;At least two observations (with at least two different x values)<a name="line.423"></a>
427 <FONT color="green">424</FONT> * must have been added before invoking this method. If this method is<a name="line.424"></a>
428 <FONT color="green">425</FONT> * invoked before a model can be estimated, &lt;code&gt;Double,NaN&lt;/code&gt; is<a name="line.425"></a>
429 <FONT color="green">426</FONT> * returned.<a name="line.426"></a>
430 <FONT color="green">427</FONT> * &lt;/li&gt;&lt;/ul&gt;&lt;/p&gt;<a name="line.427"></a>
431 <FONT color="green">428</FONT> *<a name="line.428"></a>
432 <FONT color="green">429</FONT> * @return Pearson's r<a name="line.429"></a>
433 <FONT color="green">430</FONT> */<a name="line.430"></a>
434 <FONT color="green">431</FONT> public double getR() {<a name="line.431"></a>
435 <FONT color="green">432</FONT> double b1 = getSlope();<a name="line.432"></a>
436 <FONT color="green">433</FONT> double result = Math.sqrt(getRSquare());<a name="line.433"></a>
437 <FONT color="green">434</FONT> if (b1 &lt; 0) {<a name="line.434"></a>
438 <FONT color="green">435</FONT> result = -result;<a name="line.435"></a>
439 <FONT color="green">436</FONT> }<a name="line.436"></a>
440 <FONT color="green">437</FONT> return result;<a name="line.437"></a>
441 <FONT color="green">438</FONT> }<a name="line.438"></a>
442 <FONT color="green">439</FONT> <a name="line.439"></a>
443 <FONT color="green">440</FONT> /**<a name="line.440"></a>
444 <FONT color="green">441</FONT> * Returns the &lt;a href="http://www.xycoon.com/coefficient1.htm"&gt;<a name="line.441"></a>
445 <FONT color="green">442</FONT> * coefficient of determination&lt;/a&gt;,<a name="line.442"></a>
446 <FONT color="green">443</FONT> * usually denoted r-square.<a name="line.443"></a>
447 <FONT color="green">444</FONT> * &lt;p&gt;<a name="line.444"></a>
448 <FONT color="green">445</FONT> * &lt;strong&gt;Preconditions&lt;/strong&gt;: &lt;ul&gt;<a name="line.445"></a>
449 <FONT color="green">446</FONT> * &lt;li&gt;At least two observations (with at least two different x values)<a name="line.446"></a>
450 <FONT color="green">447</FONT> * must have been added before invoking this method. If this method is<a name="line.447"></a>
451 <FONT color="green">448</FONT> * invoked before a model can be estimated, &lt;code&gt;Double,NaN&lt;/code&gt; is<a name="line.448"></a>
452 <FONT color="green">449</FONT> * returned.<a name="line.449"></a>
453 <FONT color="green">450</FONT> * &lt;/li&gt;&lt;/ul&gt;&lt;/p&gt;<a name="line.450"></a>
454 <FONT color="green">451</FONT> *<a name="line.451"></a>
455 <FONT color="green">452</FONT> * @return r-square<a name="line.452"></a>
456 <FONT color="green">453</FONT> */<a name="line.453"></a>
457 <FONT color="green">454</FONT> public double getRSquare() {<a name="line.454"></a>
458 <FONT color="green">455</FONT> double ssto = getTotalSumSquares();<a name="line.455"></a>
459 <FONT color="green">456</FONT> return (ssto - getSumSquaredErrors()) / ssto;<a name="line.456"></a>
460 <FONT color="green">457</FONT> }<a name="line.457"></a>
461 <FONT color="green">458</FONT> <a name="line.458"></a>
462 <FONT color="green">459</FONT> /**<a name="line.459"></a>
463 <FONT color="green">460</FONT> * Returns the &lt;a href="http://www.xycoon.com/standarderrorb0.htm"&gt;<a name="line.460"></a>
464 <FONT color="green">461</FONT> * standard error of the intercept estimate&lt;/a&gt;,<a name="line.461"></a>
465 <FONT color="green">462</FONT> * usually denoted s(b0).<a name="line.462"></a>
466 <FONT color="green">463</FONT> * &lt;p&gt;<a name="line.463"></a>
467 <FONT color="green">464</FONT> * If there are fewer that &lt;strong&gt;three&lt;/strong&gt; observations in the<a name="line.464"></a>
468 <FONT color="green">465</FONT> * model, or if there is no variation in x, this returns<a name="line.465"></a>
469 <FONT color="green">466</FONT> * &lt;code&gt;Double.NaN&lt;/code&gt;.&lt;/p&gt;<a name="line.466"></a>
470 <FONT color="green">467</FONT> *<a name="line.467"></a>
471 <FONT color="green">468</FONT> * @return standard error associated with intercept estimate<a name="line.468"></a>
472 <FONT color="green">469</FONT> */<a name="line.469"></a>
473 <FONT color="green">470</FONT> public double getInterceptStdErr() {<a name="line.470"></a>
474 <FONT color="green">471</FONT> return Math.sqrt(<a name="line.471"></a>
475 <FONT color="green">472</FONT> getMeanSquareError() * ((1d / (double) n) + (xbar * xbar) / sumXX));<a name="line.472"></a>
476 <FONT color="green">473</FONT> }<a name="line.473"></a>
477 <FONT color="green">474</FONT> <a name="line.474"></a>
478 <FONT color="green">475</FONT> /**<a name="line.475"></a>
479 <FONT color="green">476</FONT> * Returns the &lt;a href="http://www.xycoon.com/standerrorb(1).htm"&gt;standard<a name="line.476"></a>
480 <FONT color="green">477</FONT> * error of the slope estimate&lt;/a&gt;,<a name="line.477"></a>
481 <FONT color="green">478</FONT> * usually denoted s(b1).<a name="line.478"></a>
482 <FONT color="green">479</FONT> * &lt;p&gt;<a name="line.479"></a>
483 <FONT color="green">480</FONT> * If there are fewer that &lt;strong&gt;three&lt;/strong&gt; data pairs in the model,<a name="line.480"></a>
484 <FONT color="green">481</FONT> * or if there is no variation in x, this returns &lt;code&gt;Double.NaN&lt;/code&gt;.<a name="line.481"></a>
485 <FONT color="green">482</FONT> * &lt;/p&gt;<a name="line.482"></a>
486 <FONT color="green">483</FONT> *<a name="line.483"></a>
487 <FONT color="green">484</FONT> * @return standard error associated with slope estimate<a name="line.484"></a>
488 <FONT color="green">485</FONT> */<a name="line.485"></a>
489 <FONT color="green">486</FONT> public double getSlopeStdErr() {<a name="line.486"></a>
490 <FONT color="green">487</FONT> return Math.sqrt(getMeanSquareError() / sumXX);<a name="line.487"></a>
491 <FONT color="green">488</FONT> }<a name="line.488"></a>
492 <FONT color="green">489</FONT> <a name="line.489"></a>
493 <FONT color="green">490</FONT> /**<a name="line.490"></a>
494 <FONT color="green">491</FONT> * Returns the half-width of a 95% confidence interval for the slope<a name="line.491"></a>
495 <FONT color="green">492</FONT> * estimate.<a name="line.492"></a>
496 <FONT color="green">493</FONT> * &lt;p&gt;<a name="line.493"></a>
497 <FONT color="green">494</FONT> * The 95% confidence interval is&lt;/p&gt;<a name="line.494"></a>
498 <FONT color="green">495</FONT> * &lt;p&gt;<a name="line.495"></a>
499 <FONT color="green">496</FONT> * &lt;code&gt;(getSlope() - getSlopeConfidenceInterval(),<a name="line.496"></a>
500 <FONT color="green">497</FONT> * getSlope() + getSlopeConfidenceInterval())&lt;/code&gt;&lt;/p&gt;<a name="line.497"></a>
501 <FONT color="green">498</FONT> * &lt;p&gt;<a name="line.498"></a>
502 <FONT color="green">499</FONT> * If there are fewer that &lt;strong&gt;three&lt;/strong&gt; observations in the<a name="line.499"></a>
503 <FONT color="green">500</FONT> * model, or if there is no variation in x, this returns<a name="line.500"></a>
504 <FONT color="green">501</FONT> * &lt;code&gt;Double.NaN&lt;/code&gt;.&lt;/p&gt;<a name="line.501"></a>
505 <FONT color="green">502</FONT> * &lt;p&gt;<a name="line.502"></a>
506 <FONT color="green">503</FONT> * &lt;strong&gt;Usage Note&lt;/strong&gt;:&lt;br&gt;<a name="line.503"></a>
507 <FONT color="green">504</FONT> * The validity of this statistic depends on the assumption that the<a name="line.504"></a>
508 <FONT color="green">505</FONT> * observations included in the model are drawn from a<a name="line.505"></a>
509 <FONT color="green">506</FONT> * &lt;a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html"&gt;<a name="line.506"></a>
510 <FONT color="green">507</FONT> * Bivariate Normal Distribution&lt;/a&gt;.&lt;/p&gt;<a name="line.507"></a>
511 <FONT color="green">508</FONT> *<a name="line.508"></a>
512 <FONT color="green">509</FONT> * @return half-width of 95% confidence interval for the slope estimate<a name="line.509"></a>
513 <FONT color="green">510</FONT> * @throws MathException if the confidence interval can not be computed.<a name="line.510"></a>
514 <FONT color="green">511</FONT> */<a name="line.511"></a>
515 <FONT color="green">512</FONT> public double getSlopeConfidenceInterval() throws MathException {<a name="line.512"></a>
516 <FONT color="green">513</FONT> return getSlopeConfidenceInterval(0.05d);<a name="line.513"></a>
517 <FONT color="green">514</FONT> }<a name="line.514"></a>
518 <FONT color="green">515</FONT> <a name="line.515"></a>
519 <FONT color="green">516</FONT> /**<a name="line.516"></a>
520 <FONT color="green">517</FONT> * Returns the half-width of a (100-100*alpha)% confidence interval for<a name="line.517"></a>
521 <FONT color="green">518</FONT> * the slope estimate.<a name="line.518"></a>
522 <FONT color="green">519</FONT> * &lt;p&gt;<a name="line.519"></a>
523 <FONT color="green">520</FONT> * The (100-100*alpha)% confidence interval is &lt;/p&gt;<a name="line.520"></a>
524 <FONT color="green">521</FONT> * &lt;p&gt;<a name="line.521"></a>
525 <FONT color="green">522</FONT> * &lt;code&gt;(getSlope() - getSlopeConfidenceInterval(),<a name="line.522"></a>
526 <FONT color="green">523</FONT> * getSlope() + getSlopeConfidenceInterval())&lt;/code&gt;&lt;/p&gt;<a name="line.523"></a>
527 <FONT color="green">524</FONT> * &lt;p&gt;<a name="line.524"></a>
528 <FONT color="green">525</FONT> * To request, for example, a 99% confidence interval, use<a name="line.525"></a>
529 <FONT color="green">526</FONT> * &lt;code&gt;alpha = .01&lt;/code&gt;&lt;/p&gt;<a name="line.526"></a>
530 <FONT color="green">527</FONT> * &lt;p&gt;<a name="line.527"></a>
531 <FONT color="green">528</FONT> * &lt;strong&gt;Usage Note&lt;/strong&gt;:&lt;br&gt;<a name="line.528"></a>
532 <FONT color="green">529</FONT> * The validity of this statistic depends on the assumption that the<a name="line.529"></a>
533 <FONT color="green">530</FONT> * observations included in the model are drawn from a<a name="line.530"></a>
534 <FONT color="green">531</FONT> * &lt;a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html"&gt;<a name="line.531"></a>
535 <FONT color="green">532</FONT> * Bivariate Normal Distribution&lt;/a&gt;.&lt;/p&gt;<a name="line.532"></a>
536 <FONT color="green">533</FONT> * &lt;p&gt;<a name="line.533"></a>
537 <FONT color="green">534</FONT> * &lt;strong&gt; Preconditions:&lt;/strong&gt;&lt;ul&gt;<a name="line.534"></a>
538 <FONT color="green">535</FONT> * &lt;li&gt;If there are fewer that &lt;strong&gt;three&lt;/strong&gt; observations in the<a name="line.535"></a>
539 <FONT color="green">536</FONT> * model, or if there is no variation in x, this returns<a name="line.536"></a>
540 <FONT color="green">537</FONT> * &lt;code&gt;Double.NaN&lt;/code&gt;.<a name="line.537"></a>
541 <FONT color="green">538</FONT> * &lt;/li&gt;<a name="line.538"></a>
542 <FONT color="green">539</FONT> * &lt;li&gt;&lt;code&gt;(0 &lt; alpha &lt; 1)&lt;/code&gt;; otherwise an<a name="line.539"></a>
543 <FONT color="green">540</FONT> * &lt;code&gt;IllegalArgumentException&lt;/code&gt; is thrown.<a name="line.540"></a>
544 <FONT color="green">541</FONT> * &lt;/li&gt;&lt;/ul&gt;&lt;/p&gt;<a name="line.541"></a>
545 <FONT color="green">542</FONT> *<a name="line.542"></a>
546 <FONT color="green">543</FONT> * @param alpha the desired significance level<a name="line.543"></a>
547 <FONT color="green">544</FONT> * @return half-width of 95% confidence interval for the slope estimate<a name="line.544"></a>
548 <FONT color="green">545</FONT> * @throws MathException if the confidence interval can not be computed.<a name="line.545"></a>
549 <FONT color="green">546</FONT> */<a name="line.546"></a>
550 <FONT color="green">547</FONT> public double getSlopeConfidenceInterval(double alpha)<a name="line.547"></a>
551 <FONT color="green">548</FONT> throws MathException {<a name="line.548"></a>
552 <FONT color="green">549</FONT> if (alpha &gt;= 1 || alpha &lt;= 0) {<a name="line.549"></a>
553 <FONT color="green">550</FONT> throw MathRuntimeException.createIllegalArgumentException(<a name="line.550"></a>
554 <FONT color="green">551</FONT> "out of bounds significance level {0}, must be between {1} and {2}",<a name="line.551"></a>
555 <FONT color="green">552</FONT> alpha, 0.0, 1.0);<a name="line.552"></a>
556 <FONT color="green">553</FONT> }<a name="line.553"></a>
557 <FONT color="green">554</FONT> return getSlopeStdErr() *<a name="line.554"></a>
558 <FONT color="green">555</FONT> distribution.inverseCumulativeProbability(1d - alpha / 2d);<a name="line.555"></a>
559 <FONT color="green">556</FONT> }<a name="line.556"></a>
560 <FONT color="green">557</FONT> <a name="line.557"></a>
561 <FONT color="green">558</FONT> /**<a name="line.558"></a>
562 <FONT color="green">559</FONT> * Returns the significance level of the slope (equiv) correlation.<a name="line.559"></a>
563 <FONT color="green">560</FONT> * &lt;p&gt;<a name="line.560"></a>
564 <FONT color="green">561</FONT> * Specifically, the returned value is the smallest &lt;code&gt;alpha&lt;/code&gt;<a name="line.561"></a>
565 <FONT color="green">562</FONT> * such that the slope confidence interval with significance level<a name="line.562"></a>
566 <FONT color="green">563</FONT> * equal to &lt;code&gt;alpha&lt;/code&gt; does not include &lt;code&gt;0&lt;/code&gt;.<a name="line.563"></a>
567 <FONT color="green">564</FONT> * On regression output, this is often denoted &lt;code&gt;Prob(|t| &gt; 0)&lt;/code&gt;<a name="line.564"></a>
568 <FONT color="green">565</FONT> * &lt;/p&gt;&lt;p&gt;<a name="line.565"></a>
569 <FONT color="green">566</FONT> * &lt;strong&gt;Usage Note&lt;/strong&gt;:&lt;br&gt;<a name="line.566"></a>
570 <FONT color="green">567</FONT> * The validity of this statistic depends on the assumption that the<a name="line.567"></a>
571 <FONT color="green">568</FONT> * observations included in the model are drawn from a<a name="line.568"></a>
572 <FONT color="green">569</FONT> * &lt;a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html"&gt;<a name="line.569"></a>
573 <FONT color="green">570</FONT> * Bivariate Normal Distribution&lt;/a&gt;.&lt;/p&gt;<a name="line.570"></a>
574 <FONT color="green">571</FONT> * &lt;p&gt;<a name="line.571"></a>
575 <FONT color="green">572</FONT> * If there are fewer that &lt;strong&gt;three&lt;/strong&gt; observations in the<a name="line.572"></a>
576 <FONT color="green">573</FONT> * model, or if there is no variation in x, this returns<a name="line.573"></a>
577 <FONT color="green">574</FONT> * &lt;code&gt;Double.NaN&lt;/code&gt;.&lt;/p&gt;<a name="line.574"></a>
578 <FONT color="green">575</FONT> *<a name="line.575"></a>
579 <FONT color="green">576</FONT> * @return significance level for slope/correlation<a name="line.576"></a>
580 <FONT color="green">577</FONT> * @throws MathException if the significance level can not be computed.<a name="line.577"></a>
581 <FONT color="green">578</FONT> */<a name="line.578"></a>
582 <FONT color="green">579</FONT> public double getSignificance() throws MathException {<a name="line.579"></a>
583 <FONT color="green">580</FONT> return 2d * (1.0 - distribution.cumulativeProbability(<a name="line.580"></a>
584 <FONT color="green">581</FONT> Math.abs(getSlope()) / getSlopeStdErr()));<a name="line.581"></a>
585 <FONT color="green">582</FONT> }<a name="line.582"></a>
586 <FONT color="green">583</FONT> <a name="line.583"></a>
587 <FONT color="green">584</FONT> // ---------------------Private methods-----------------------------------<a name="line.584"></a>
588 <FONT color="green">585</FONT> <a name="line.585"></a>
589 <FONT color="green">586</FONT> /**<a name="line.586"></a>
590 <FONT color="green">587</FONT> * Returns the intercept of the estimated regression line, given the slope.<a name="line.587"></a>
591 <FONT color="green">588</FONT> * &lt;p&gt;<a name="line.588"></a>
592 <FONT color="green">589</FONT> * Will return &lt;code&gt;NaN&lt;/code&gt; if slope is &lt;code&gt;NaN&lt;/code&gt;.&lt;/p&gt;<a name="line.589"></a>
593 <FONT color="green">590</FONT> *<a name="line.590"></a>
594 <FONT color="green">591</FONT> * @param slope current slope<a name="line.591"></a>
595 <FONT color="green">592</FONT> * @return the intercept of the regression line<a name="line.592"></a>
596 <FONT color="green">593</FONT> */<a name="line.593"></a>
597 <FONT color="green">594</FONT> private double getIntercept(double slope) {<a name="line.594"></a>
598 <FONT color="green">595</FONT> return (sumY - slope * sumX) / n;<a name="line.595"></a>
599 <FONT color="green">596</FONT> }<a name="line.596"></a>
600 <FONT color="green">597</FONT> <a name="line.597"></a>
601 <FONT color="green">598</FONT> /**<a name="line.598"></a>
602 <FONT color="green">599</FONT> * Computes SSR from b1.<a name="line.599"></a>
603 <FONT color="green">600</FONT> *<a name="line.600"></a>
604 <FONT color="green">601</FONT> * @param slope regression slope estimate<a name="line.601"></a>
605 <FONT color="green">602</FONT> * @return sum of squared deviations of predicted y values<a name="line.602"></a>
606 <FONT color="green">603</FONT> */<a name="line.603"></a>
607 <FONT color="green">604</FONT> private double getRegressionSumSquares(double slope) {<a name="line.604"></a>
608 <FONT color="green">605</FONT> return slope * slope * sumXX;<a name="line.605"></a>
609 <FONT color="green">606</FONT> }<a name="line.606"></a>
610 <FONT color="green">607</FONT> <a name="line.607"></a>
611 <FONT color="green">608</FONT> /**<a name="line.608"></a>
612 <FONT color="green">609</FONT> * Modify the distribution used to compute inference statistics.<a name="line.609"></a>
613 <FONT color="green">610</FONT> * @param value the new distribution<a name="line.610"></a>
614 <FONT color="green">611</FONT> * @since 1.2<a name="line.611"></a>
615 <FONT color="green">612</FONT> */<a name="line.612"></a>
616 <FONT color="green">613</FONT> public void setDistribution(TDistribution value) {<a name="line.613"></a>
617 <FONT color="green">614</FONT> distribution = value;<a name="line.614"></a>
618 <FONT color="green">615</FONT> <a name="line.615"></a>
619 <FONT color="green">616</FONT> // modify degrees of freedom<a name="line.616"></a>
620 <FONT color="green">617</FONT> if (n &gt; 2) {<a name="line.617"></a>
621 <FONT color="green">618</FONT> distribution.setDegreesOfFreedom(n - 2);<a name="line.618"></a>
622 <FONT color="green">619</FONT> }<a name="line.619"></a>
623 <FONT color="green">620</FONT> }<a name="line.620"></a>
624 <FONT color="green">621</FONT> }<a name="line.621"></a>
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685 </PRE>
686 </BODY>
687 </HTML>