# HG changeset patch # User dwinter # Date 1294131653 -3600 # Node ID 5f2c5fb36e9314ed187b1a5ef351b2abbb6e28ac # Parent e63a64652f4d999d031452a5e09007895483c47e commons-math-2.1 added diff -r e63a64652f4d -r 5f2c5fb36e93 libs/commons-math-2.1/docs/userguide/TrajectoryDeterminationProblem.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/commons-math-2.1/docs/userguide/TrajectoryDeterminationProblem.java Tue Jan 04 10:00:53 2011 +0100 @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.commons.math.optimization.general.EstimationException; +import org.apache.commons.math.optimization.general.EstimatedParameter; +import org.apache.commons.math.optimization.general.EstimationProblem; +import org.apache.commons.math.optimization.general.LevenbergMarquardtEstimator; +import org.apache.commons.math.optimization.general.SimpleEstimationProblem; +import org.apache.commons.math.optimization.general.WeightedMeasurement; + +public class TrajectoryDeterminationProblem extends SimpleEstimationProblem { + + public static void main(String[] args) { + try { + TrajectoryDeterminationProblem problem = + new TrajectoryDeterminationProblem(0.0, 100.0, 800.0, 1.0, 0.0); + + double[][] distances = { + { 0.0, 806.5849 }, { 20.0, 796.8148 }, { 40.0, 791.0833 }, { 60.0, 789.6712 }, + { 80.0, 793.1334 }, { 100.0, 797.7248 }, { 120.0, 803.2785 }, { 140.0, 813.4939 }, + { 160.0, 826.9295 }, { 180.0, 844.0640 }, { 200.0, 863.3829 }, { 220.0, 883.3143 }, + { 240.0, 908.6867 }, { 260.0, 934.8561 }, { 280.0, 964.0730 }, { 300.0, 992.1033 }, + { 320.0, 1023.998 }, { 340.0, 1057.439 }, { 360.0, 1091.912 }, { 380.0, 1125.968 }, + { 400.0, 1162.789 }, { 420.0, 1201.517 }, { 440.0, 1239.176 }, { 460.0, 1279.347 } }; + for (int i = 0; i < distances.length; ++i) { + problem.addDistanceMeasurement(1.0, distances[i][0], distances[i][1]); + }; + + double[][] angles = { + { 10.0, 1.415423 }, { 30.0, 1.352643 }, { 50.0, 1.289290 }, { 70.0, 1.225249 }, + { 90.0, 1.161203 }, {110.0, 1.098538 }, {130.0, 1.036263 }, {150.0, 0.976052 }, + {170.0, 0.917921 }, {190.0, 0.861830 }, {210.0, 0.808237 }, {230.0, 0.757043 }, + {250.0, 0.708650 }, {270.0, 0.662949 }, {290.0, 0.619903 }, {310.0, 0.579160 }, + {330.0, 0.541033 }, {350.0, 0.505590 }, {370.0, 0.471746 }, {390.0, 0.440155 }, + {410.0, 0.410522 }, {430.0, 0.382701 }, {450.0, 0.356957 }, {470.0, 0.332400 } }; + for (int i = 0; i < angles.length; ++i) { + problem.addAngularMeasurement(3.0e7, angles[i][0], angles[i][1]); + }; + + LevenbergMarquardtEstimator estimator = new LevenbergMarquardtEstimator(); + estimator.estimate(problem); + System.out.println("initial position: " + problem.getX0() + " " + problem.getY0()); + System.out.println("velocity: " + problem.getVx0() + " " + problem.getVy0()); + + } catch (EstimationException ee) { + System.err.println(ee.getMessage()); + } + } + + public TrajectoryDeterminationProblem(double t0, + double x0Guess, double y0Guess, + double vx0Guess, double vy0Guess) { + this.t0 = t0; + x0 = new EstimatedParameter( "x0", x0Guess); + y0 = new EstimatedParameter( "y0", y0Guess); + vx0 = new EstimatedParameter("vx0", vx0Guess); + vy0 = new EstimatedParameter("vy0", vy0Guess); + + // inform the base class about the parameters + addParameter(x0); + addParameter(y0); + addParameter(vx0); + addParameter(vy0); + + } + + public double getX0() { + return x0.getEstimate(); + } + + public double getY0() { + return y0.getEstimate(); + } + + public double getVx0() { + return vx0.getEstimate(); + } + + public double getVy0() { + return vy0.getEstimate(); + } + + public void addAngularMeasurement(double wi, double ti, double ai) { + // let the base class handle the measurement + addMeasurement(new AngularMeasurement(wi, ti, ai)); + } + + public void addDistanceMeasurement(double wi, double ti, double di) { + // let the base class handle the measurement + addMeasurement(new DistanceMeasurement(wi, ti, di)); + } + + public double x(double t) { + return x0.getEstimate() + (t - t0) * vx0.getEstimate(); + } + + public double y(double t) { + return y0.getEstimate() + (t - t0) * vy0.getEstimate(); + } + + private class AngularMeasurement extends WeightedMeasurement { + + public AngularMeasurement(double weight, double t, double angle) { + super(weight, angle); + this.t = t; + } + + public double getTheoreticalValue() { + return Math.atan2(y(t), x(t)); + } + + public double getPartial(EstimatedParameter parameter) { + double xt = x(t); + double yt = y(t); + double r = Math.sqrt(xt * xt + yt * yt); + double u = yt / (r + xt); + double c = 2 * u / (1 + u * u); + if (parameter == x0) { + return -c; + } else if (parameter == vx0) { + return -c * t; + } else if (parameter == y0) { + return c * xt / yt; + } else { + return c * t * xt / yt; + } + } + + private final double t; + private static final long serialVersionUID = -5990040582592763282L; + + } + + private class DistanceMeasurement extends WeightedMeasurement { + + public DistanceMeasurement(double weight, double t, double angle) { + super(weight, angle); + this.t = t; + } + + public double getTheoreticalValue() { + double xt = x(t); + double yt = y(t); + return Math.sqrt(xt * xt + yt * yt); + } + + public double getPartial(EstimatedParameter parameter) { + double xt = x(t); + double yt = y(t); + double r = Math.sqrt(xt * xt + yt * yt); + if (parameter == x0) { + return xt / r; + } else if (parameter == vx0) { + return xt * t / r; + } else if (parameter == y0) { + return yt / r; + } else { + return yt * t / r; + } + } + + private final double t; + private static final long serialVersionUID = 3257286197740459503L; + + } + + private double t0; + private EstimatedParameter x0; + private EstimatedParameter y0; + private EstimatedParameter vx0; + private EstimatedParameter vy0; + +} diff -r e63a64652f4d -r 5f2c5fb36e93 libs/commons-math-2.1/docs/userguide/analysis.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/commons-math-2.1/docs/userguide/analysis.html Tue Jan 04 10:00:53 2011 +0100 @@ -0,0 +1,519 @@ + + + + + + + + + + + + + + + Math - The Commons Math User Guide - Numerical Analysis + + + + + + + +
+ +
+
+
+

4 Numerical Analysis

+

4.1 Overview

+

+ The analysis package is the parent package for algorithms dealing with + real-valued functions of one real variable. It contains dedicated sub-packages + providing numerical root-finding, integration, and interpolation. It also + contains a polynomials sub-package that considers polynomials with real + coefficients as differentiable real functions. +

+

+ Functions interfaces are intended to be implemented by user code to represent + their domain problems. The algorithms provided by the library will then operate + on these function to find their roots, or integrate them, or ... Functions can + be multivariate or univariate, real vectorial or matrix valued, and they can be + differentiable or not. +

+

+ Possible future additions may include numerical differentiation. +

+
+

4.2 Root-finding

+

+ A + org.apache.commons.math.analysis.solvers.UnivariateRealSolver. + provides the means to find roots of univariate real-valued functions. + A root is the value where the function takes the value 0. Commons-Math + includes implementations of the following root-finding algorithms:

+

+

+ There are numerous non-obvious traps and pitfalls in root finding. + First, the usual disclaimers due to the way real world computers + calculate values apply. If the computation of the function provides + numerical instabilities, for example due to bit cancellation, the root + finding algorithms may behave badly and fail to converge or even + return bogus values. There will not necessarily be an indication that + the computed root is way off the true value. Secondly, the root finding + problem itself may be inherently ill-conditioned. There is a + "domain of indeterminacy", the interval for which the function has + near zero absolute values around the true root, which may be large. + Even worse, small problems like roundoff error may cause the function + value to "numerically oscillate" between negative and positive values. + This may again result in roots way off the true value, without + indication. There is not much a generic algorithm can do if + ill-conditioned problems are met. A way around this is to transform + the problem in order to get a better conditioned function. Proper + selection of a root-finding algorithm and its configuration parameters + requires knowledge of the analytical properties of the function under + analysis and numerical analysis techniques. Users are encouraged + to consult a numerical analysis text (or a numerical analyst) when + selecting and configuring a solver. +

+

+ In order to use the root-finding features, first a solver object must + be created. It is encouraged that all solver object creation occurs + via the org.apache.commons.math.analysis.solvers.UnivariateRealSolverFactory + class. UnivariateRealSolverFactory is a simple factory + used to create all of the solver objects supported by Commons-Math. + The typical usage of UnivariateRealSolverFactory + to create a solver object would be:

+
UnivariateRealSolverFactory factory = UnivariateRealSolverFactory.newInstance();
+UnivariateRealSolver solver = factory.newDefaultSolver();
+
+

+ The solvers that can be instantiated via the + UnivariateRealSolverFactory are detailed below: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SolverFactory MethodNotes on Use
BisectionnewBisectionSolver
Root must be bracketted.
Linear, guaranteed convergence
BrentnewBrentSolver
Root must be bracketted.
Super-linear, guaranteed convergence
NewtonnewNewtonSolver
Uses single value for initialization.
Super-linear, non-guaranteed convergence
Function must be differentiable
SecantnewSecantSolver
Root must be bracketted.
Super-linear, non-guaranteed convergence
MullernewMullerSolver
Root must be bracketted.
We restrict ourselves to real valued functions, not complex ones
LaguerrenewLaguerreSolver
Root must be bracketted.
Function must be a polynomial
RiddernewRidderSolver
Root must be bracketted.
+

+

+ Using a solver object, roots of functions are easily found using the solve + methods. For a function f, and two domain values, min and + max, solve computes a value c such that: +

  • f(c) = 0.0 (see "function value accuracy")
  • +
  • min <= c <= max
  • +
+

+

+ Typical usage: +

+
UnivariateRealFunction function = // some user defined function object
+UnivariateRealSolverFactory factory = UnivariateRealSolverFactory.newInstance();
+UnivariateRealSolver solver = factory.newBisectionSolver();
+double c = solver.solve(function, 1.0, 5.0);
+
+

+ The BrentSolve uses the Brent-Dekker algorithm which is + fast and robust. This algorithm is recommended for most users and the + BrentSolver is the default solver provided by the + UnivariateRealSolverFactory. If there are multiple roots + in the interval, or there is a large domain of indeterminacy, the + algorithm will converge to a random root in the interval without + indication that there are problems. Interestingly, the examined text + book implementations all disagree in details of the convergence + criteria. Also each implementation had problems for one of the test + cases, so the expressions had to be fudged further. Don't expect to + get exactly the same root values as for other implementations of this + algorithm. +

+

+ The SecantSolver uses a variant of the well known secant + algorithm. It may be a bit faster than the Brent solver for a class + of well-behaved functions. +

+

+ The BisectionSolver is included for completeness and for + establishing a fall back in cases of emergency. The algorithm is + simple, most likely bug free and guaranteed to converge even in very + adverse circumstances which might cause other algorithms to + malfunction. The drawback is of course that it is also guaranteed + to be slow. +

+

+ The UnivariateRealSolver interface exposes many + properties to control the convergence of a solver. For the most part, + these properties should not have to change from their default values + to produce good results. In the circumstances where changing these + property values is needed, it is easily done through getter and setter + methods on UnivariateRealSolver: + + + + + + + + + + + + + + + + + + + + +
PropertyMethodsPurpose
Absolute accuracy
getAbsoluteAccuracy
resetAbsoluteAccuracy
setAbsoluteAccuracy
+ The Absolute Accuracy is (estimated) maximal difference between + the computed root and the true root of the function. This is + what most people think of as "accuracy" intuitively. The default + value is chosen as a sane value for most real world problems, + for roots in the range from -100 to +100. For accurate + computation of roots near zero, in the range form -0.0001 to + +0.0001, the value may be decreased. For computing roots + much larger in absolute value than 100, the default absolute + accuracy may never be reached because the given relative + accuracy is reached first. +
Relative accuracy
getRelativeAccuracy
resetRelativeAccuracy
setRelativeAccuracy
+ The Relative Accuracy is the maximal difference between the + computed root and the true root, divided by the maximum of the + absolute values of the numbers. This accuracy measurement is + better suited for numerical calculations with computers, due to + the way floating point numbers are represented. The default + value is chosen so that algorithms will get a result even for + roots with large absolute values, even while it may be + impossible to reach the given absolute accuracy. +
Function value accuracy
getFunctionValueAccuracy
resetFunctionValueAccuracy
setFunctionValueAccuracy
+ This value is used by some algorithms in order to prevent + numerical instabilities. If the function is evaluated to an + absolute value smaller than the Function Value Accuracy, the + algorithms assume they hit a root and return the value + immediately. The default value is a "very small value". If the + goal is to get a near zero function value rather than an accurate + root, computation may be sped up by setting this value + appropriately. +
Maximum iteration count
getMaximumIterationCount
resetMaximumIterationCount
setMaximumIterationCount
+ This is the maximal number of iterations the algorithm will try. + If this number is exceeded, non-convergence is assumed and a + ConvergenceException exception is thrown. The + default value is 100, which should be plenty, given that a + bisection algorithm can't get any more accurate after 52 + iterations because of the number of mantissa bits in a double + precision floating point number. If a number of ill-conditioned + problems is to be solved, this number can be decreased in order + to avoid wasting time. +
+

+
+

4.3 Interpolation

+

+ A + org.apache.commons.math.analysis.interpolation.UnivariateRealInterpolator + is used to find a univariate real-valued function f which + for a given set of ordered pairs + (xi,yi) yields + f(xi)=yi to the best accuracy possible. The result + is provided as an object implementing the + org.apache.commons.math.analysis.UnivariateRealFunction interface. It can therefore + be evaluated at any point, including point not belonging to the original set. + Currently, only an interpolator for generating natural cubic splines and a polynomial + interpolator are available. There is no interpolator factory, mainly because the + interpolation algorithm is more determined by the kind of the interpolated function + rather than the set of points to interpolate. + There aren't currently any accuracy controls either, as interpolation + accuracy is in general determined by the algorithm. +

+

Typical usage:

+
double x[] = { 0.0, 1.0, 2.0 };
+double y[] = { 1.0, -1.0, 2.0);
+UnivariateRealInterpolator interpolator = new SplineInterpolator();
+UnivariateRealFunction function = interpolator.interpolate(x, y);
+double interpolationX = 0.5;
+double interpolatedY = function.evaluate(x);
+System.out println("f(" + interpolationX + ") = " + interpolatedY);
+
+

+ A natural cubic spline is a function consisting of a polynomial of + third degree for each subinterval determined by the x-coordinates of the + interpolated points. A function interpolating N + value pairs consists of N-1 polynomials. The function + is continuous, smooth and can be differentiated twice. The second + derivative is continuous but not smooth. The x values passed to the + interpolator must be ordered in ascending order. It is not valid to + evaluate the function for values outside the range + x0..xN. +

+

+ The polynomial function returned by the Neville's algorithm is a single + polynomial guaranteed to pass exactly through the interpolation points. + The degree of the polynomial is the number of points minus 1 (i.e. the + interpolation polynomial for a three points set will be a quadratic + polynomial). Despite the fact the interpolating polynomials is a perfect + approximation of a function at interpolation points, it may be a loose + approximation between the points. Due to Runge's phenomenom + the error can get worse as the degree of the polynomial increases, so + adding more points does not always lead to a better interpolation. +

+

+ Loess (or Lowess) interpolation is a robust interpolation useful for + smoothing univariate scaterplots. It has been described by William + Cleveland in his 1979 seminal paper Robust + Locally Weighted Regression and Smoothing Scatterplots. This kind of + interpolation is computationally intensive but robust. +

+

+ Microsphere interpolation is a robust multidimensional interpolation algorithm. + It has been described in William Dudziak's MS thesis. +

+

+ A + org.apache.commons.math.analysis.interpolation.BivariateRealGridInterpolator + is used to find a bivariate real-valued function f which + for a given set of tuples + (xi,yj,zij) + yields f(xi,yj)=zij to the best accuracy + possible. The result is provided as an object implementing the + + org.apache.commons.math.analysis.BivariateRealFunction interface. It can therefore + be evaluated at any point, including a point not belonging to the original set. + The array xi and yj must be sorted in + increasing order in order to define a two-dimensional regular grid. +

+

+ In + bicubic interpolation, the interpolation function is a 3rd-degree polynomial of two + variables. The coefficients are computed from the function values sampled on a regular grid, + as well as the values of the partial derivatives of the function at those grid points. +

+

+ From two-dimensional data sampled on a regular grid, the + + org.apache.commons.math.analysis.interpolation.SmoothingBicubicSplineInterpolator + computes a + bicubic interpolating function. The data is first smoothed, along each grid dimension, + using one-dimensional splines. +

+
+

4.4 Integration

+

+ A + org.apache.commons.math.analysis.integration.UnivariateRealIntegrator. + provides the means to numerically integrate univariate real-valued functions. + Commons-Math includes implementations of the following integration algorithms:

+

+
+

4.5 Polynomials

+

+ The + org.apache.commons.math.analysis.polynomials package provides real coefficients + polynomials. +

+

+ The + org.apache.commons.math.analysis.polynomials.PolynomialFunction class is the most general + one, using traditional coefficients arrays. The + org.apache.commons.math.analysis.polynomials.PolynomialsUtils utility class provides static + factory methods to build Chebyshev, Hermite, Lagrange and Legendre polynomials. Coefficients + are computed using exact fractions so these factory methods can build polynomials up to any degree. +

+
+
+ +
+
+
+
+
+ + + diff -r e63a64652f4d -r 5f2c5fb36e93 libs/commons-math-2.1/docs/userguide/complex.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/commons-math-2.1/docs/userguide/complex.html Tue Jan 04 10:00:53 2011 +0100 @@ -0,0 +1,299 @@ + + + + + + + + + + + + + + + Math - The Commons Math User Guide - Complex Numbers + + + + + + + +
+ +
+
+
+

7 Complex Numbers

+

7.1 Overview

+

+ The complex packages provides a complex number type as well as complex + versions of common transcendental functions and complex number + formatting. +

+
+

7.2 Complex Numbers

+

+ org.apache.commons.math.complex.Complex provides a complex number + type that forms the basis for the complex functionality found in + commons-math. +

+

+ Complex functions and arithmetic operations are implemented in + commons-math by applying standard computational formulas and + following the rules for java.lang.Double arithmetic in + handling infinite and NaN values. No attempt is made + to comply with ANSII/IEC C99x Annex G or any other standard for + Complex arithmetic. See the class and method javadocs for the + + Complex and + + ComplexUtils classes for details on computing formulas. +

+

+ To create a complex number, simply call the constructor passing in two + floating-point arguments, the first being the real part of the + complex number and the second being the imaginary part: +

Complex c = new Complex(1.0, 3.0); // 1 + 3i
+
+

+

+ Complex numbers may also be created from polar representations + using the polar2Complex method in + ComplexUtils. +

+

+ The Complex class provides basic unary and binary + complex number operations. These operations provide the means to add, + subtract, multiply and divide complex numbers along with other + complex number functions similar to the real number functions found in + java.math.BigDecimal: +

Complex lhs = new Complex(1.0, 3.0);
+Complex rhs = new Complex(2.0, 5.0);
+
+Complex answer = lhs.add(rhs);       // add two complex numbers
+        answer = lhs.subtract(rhs);  // subtract two complex numbers
+        answer = lhs.abs();          // absolute value
+        answer = lhs.conjugate(rhs); // complex conjugate
+
+

+
+

7.3 Complex Transcendental Functions

+

+ org.apache.commons.math.complex.Complex also provides + implementations of serveral transcendental functions involving complex + number arguments. Prior to version 1.2, these functions were provided + by + org.apache.commons.math.complex.ComplexUtils in a way similar to the real + number functions found in java.lang.Math, but this has been + deprecated. These operations provide the means to compute the log, sine, + tangent, and other complex values : +

Complex first  = new Complex(1.0, 3.0);
+Complex second = new Complex(2.0, 5.0);
+
+Complex answer = first.log();        // natural logarithm.
+        answer = first.cos();        // cosine
+        answer = first.pow(second);  // first raised to the power of second
+
+

+
+

7.4 Complex Formatting and Parsing

+

Complex instances can be converted to and from strings + using the + org.apache.commons.math.complex.ComplexFormat class. + ComplexFormat is a java.text.Format + extension and, as such, is used like other formatting objects (e.g. + java.text.SimpleDateFormat): +

ComplexFormat format = new ComplexFormat(); // default format
+Complex c = new Complex(1.1111, 2.2222);
+String s = format.format(c); // s contains "1.11 + 2.22i"
+
+

+

+ To customize the formatting output, one or two + java.text.NumberFormat instances can be used to construct + a ComplexFormat. These number formats control the + formatting of the real and imaginary values of the complex number: +

NumberFormat nf = NumberFormat.getInstance();
+nf.setMinimumFractionDigits(3);
+nf.setMaximumFractionDigits(3);
+
+// create complex format with custom number format
+// when one number format is used, both real and
+// imaginary parts are formatted the same
+ComplexFormat cf = new ComplexFormat(nf);
+Complex c = new Complex(1.11, 2.2222);
+String s = format.format(c); // s contains "1.110 + 2.222i"
+
+NumberFormat nf2 = NumberFormat.getInstance();
+nf.setMinimumFractionDigits(1);
+nf.setMaximumFractionDigits(1);
+
+// create complex format with custom number formats
+cf = new ComplexFormat(nf, nf2);
+s = format.format(c); // s contains "1.110 + 2.2i"
+
+

+

+ Another formatting customization provided by + ComplexFormat is the text used for the imaginary + designation. By default, the imaginary notation is "i" but, it can be + manipulated using the setImaginaryCharacter method. +

+

+ Formatting inverse operation, parsing, can also be performed by + ComplexFormat. Parse a complex number from a string, + simply call the parse method: +

ComplexFormat cf = new ComplexFormat();
+Complex c = cf.parse("1.110 + 2.222i");
+
+

+
+
+ +
+
+
+
+
+ + + diff -r e63a64652f4d -r 5f2c5fb36e93 libs/commons-math-2.1/docs/userguide/distribution.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/commons-math-2.1/docs/userguide/distribution.html Tue Jan 04 10:00:53 2011 +0100 @@ -0,0 +1,232 @@ + + + + + + + + + + + + + + + Math - The Commons Math User Guide - Statistics + + + + + + + +
+ +
+
+
+

8 Probability Distributions

+

8.1 Overview

+

+ The distributions package provide a framework for some commonly used + probability distributions. +

+
+

8.2 Distribution Framework

+

+ The distribution framework provides the means to compute probability density + function (PDF) probabilities and cumulative distribution function (CDF) + probabilities for common probability distributions. Along with the direct + computation of PDF and CDF probabilities, the framework also allows for the + computation of inverse PDF and inverse CDF values. +

+

+ Using a distribution object, PDF and CDF probabilities are easily computed + using the cumulativeProbability methods. For a distribution X, + and a domain value, x, cumulativeProbability computes + P(X <= x) (i.e. the lower tail probability of X). +

+
TDistribution t = new TDistributionImpl(29);
+double lowerTail = t.cumulativeProbability(-2.656);     // P(T <= -2.656)
+double upperTail = 1.0 - t.cumulativeProbability(2.75); // P(T >= 2.75)
+
+

+ The inverse PDF and CDF values are just as easily computed using the + inverseCumulativeProbability methods. For a distribution X, + and a probability, p, inverseCumulativeProbability + computes the domain value x, such that: +

  • P(X <= x) = p, for continuous distributions
  • +
  • P(X <= x) <= p, for discrete distributions
  • +
+ + Notice the different cases for continuous and discrete distributions. This is the result + of PDFs not being invertible functions. As such, for discrete distributions, an exact + domain value can not be returned. Only the "best" domain value. For Commons-Math, the "best" + domain value is determined by the largest domain value whose cumulative probability is + less-than or equal to the given probability. +

+
+

8.3 User Defined Distributions

+

+ Since there are numerous distributions and Commons-Math only directly supports a handful, + it may be necessary to extend the distribution framework to satisfy individual needs. It + is recommended that the Distribution, ContinuousDistribution, + DiscreteDistribution, and IntegerDistribution interfaces serve as + base types for any extension. These serve as the basis for all the distributions directly + supported by Commons-Math and using those interfaces for implementation purposes will + insure any extension is compatible with the remainder of Commons-Math. To aid in + implementing a distribution extension, the AbstractDistribution, + AbstractContinuousDistribution, and AbstractIntegerDistribution + provide implementation building blocks and offer a lot of default distribution + functionality. By extending these abstract classes directly, a good portion of the + repetitive distribution implementation is already developed and should save time and effort + in developing user defined distributions. +

+
+
+ +
+
+
+
+
+ + + diff -r e63a64652f4d -r 5f2c5fb36e93 libs/commons-math-2.1/docs/userguide/estimation-class-diagram.png Binary file libs/commons-math-2.1/docs/userguide/estimation-class-diagram.png has changed diff -r e63a64652f4d -r 5f2c5fb36e93 libs/commons-math-2.1/docs/userguide/estimation-sequence-diagram.png Binary file libs/commons-math-2.1/docs/userguide/estimation-sequence-diagram.png has changed diff -r e63a64652f4d -r 5f2c5fb36e93 libs/commons-math-2.1/docs/userguide/fraction.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/commons-math-2.1/docs/userguide/fraction.html Tue Jan 04 10:00:53 2011 +0100 @@ -0,0 +1,260 @@ + + + + + + + + + + + + + + + Math - The Commons Math User Guide - Fractions + + + + + + + +
+ +
+
+
+

9 Fractions

+

9.1 Overview

+

+ The fraction packages provides a fraction number type as well as + fraction number formatting. +

+
+

9.2 Fraction Numbers

+

+ org.apache.commons.math.fraction.Fraction and + + org.apache.commons.math.fraction.BigFraction provide fraction number + type that forms the basis for the fraction functionality found in + commons-math. The former one can be used for fractions whose numerators + and denominators are small enough to fit in an int (taking care of intermediate + values) while the second class should be used when there is a risk the numerator + and denominator grow very large. +

+

+ A fraction number, can be built from two integer arguments representing numerator + and denominator or from a double which will be approximated: +

Fraction f = new Fraction(1, 3); // 1 / 3
+Fraction g = new Fraction(0.25); // 1 / 4
+
+

+

+ Of special note with fraction construction, when a fraction is created it is always reduced to lowest terms. +

+

+ The Fraction class provides many unary and binary + fraction operations. These operations provide the means to add, + subtract, multiple and, divide fractions along with other functions similar to the real number functions found in + java.math.BigDecimal: +

Fraction lhs = new Fraction(1, 3);
+Fraction rhs = new Fraction(2, 5);
+
+Fraction answer = lhs.add(rhs);     // add two fractions
+        answer = lhs.subtract(rhs); // subtract two fractions
+        answer = lhs.abs();         // absolute value
+        answer = lhs.reciprocal();  // reciprocal of lhs
+
+

+

+ Like fraction construction, for each of the fraction functions, the resulting fraction is reduced to lowest terms. +

+
+

9.3 Fraction Formatting and Parsing

+

Fraction instances can be converted to and from strings + using the + org.apache.commons.math.fraction.FractionFormat class. + FractionFormat is a java.text.Format + extension and, as such, is used like other formatting objects (e.g. + java.text.SimpleDateFormat): +

FractionFormat format = new FractionFormat(); // default format
+Fraction f = new Fraction(2, 4);
+String s = format.format(f); // s contains "1 / 2", note the reduced fraction
+
+

+

+ To customize the formatting output, one or two + java.text.NumberFormat instances can be used to construct + a FractionFormat. These number formats control the + formatting of the numerator and denominator of the fraction: +

NumberFormat nf = NumberFormat.getInstance(Locale.FRANCE);
+// create fraction format with custom number format
+// when one number format is used, both numerator and
+// denominator are formatted the same
+FractionFormat format = new FractionFormat(nf);
+Fraction f = new Fraction(2000, 3333);
+String s = format.format(c); // s contains "2.000 / 3.333"
+
+NumberFormat nf2 = NumberFormat.getInstance(Locale.US);
+// create fraction format with custom number formats
+format = new FractionFormat(nf, nf2);
+s = format.format(f); // s contains "2.000 / 3,333"
+
+

+

+ Formatting's inverse operation, parsing, can also be performed by + FractionFormat. To parse a fraction from a string, + simply call the parse method: +

FractionFormat ff = new FractionFormat();
+Fraction f = ff.parse("-10 / 21");
+
+

+
+
+ +
+
+
+
+
+ + + diff -r e63a64652f4d -r 5f2c5fb36e93 libs/commons-math-2.1/docs/userguide/genetics.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/commons-math-2.1/docs/userguide/genetics.html Tue Jan 04 10:00:53 2011 +0100 @@ -0,0 +1,292 @@ + + + + + + + + + + + + + + + Math - The Commons Math User Guide - Genetic Algorithms + + + + + + + +
+ +
+
+
+

14 Genetic Algorithms

+

14.1 Overview

+

+ The genetics package provides a framework and implementations for + genetic algorithms. +

+
+

14.2 GA Framework

+

+ org.apache.commons.math.genetic.GeneticAlgorithm provides an + execution framework for Genetic Algorithms (GA). + Populations, consisting + of + Chromosomes are evolved by the GeneticAlgorithm until a + StoppingCondition + is reached. Evolution is determined by + SelectionPolicies, + MutationPolicies + and Fitness. +

+

+ The GA itself is implemented by the evolve method of the GeneticAlgorithm class, + which looks like this: +

+public Population evolve(Population initial, StoppingCondition condition) {
+    Population current = initial;
+    while (!condition.isSatisfied(current)) {
+        current = nextGeneration(current);
+    }
+    return current;
+}
+          
+
+ + The nextGeneration method implements the following algorithm: +
  1. Get nextGeneration population to fill from current + generation, using its nextGeneration method
  2. +
  3. Loop until new generation is filled:
  4. +
    • Apply configured SelectionPolicy to select a pair of parents + from current
    • +
    • With probability = + + getCrossoverRate(), apply configured CrossoverPolicy to parents
    • +
    • With probability = + + getMutationRate(), + apply configured MutationPolicy to each of the offspring
    • +
    • Add offspring individually to nextGeneration, + space permitting
    • +
    +
  5. Return nextGeneration
  6. +
+

+
+

14.3 Implementation

+

+ Here is an example GA execution: +

+// initialize a new genetic algorithm
+GeneticAlgorithm ga = new GeneticAlgorithm(
+    new OnePointCrossover<Integer>(),
+    1,
+    new RandomKeyMutation(),
+    0.10,
+    new TournamentSelection(TOURNAMENT_ARITY)
+);
+        
+// initial population
+Population initial = getInitialPopulation();
+        
+// stopping condition
+StoppingCondition stopCond = new FixedGenerationCount(NUM_GENERATIONS);
+        
+// run the algorithm
+Population finalPopulation = ga.evolve(initial, stopCond);
+        
+// best chromosome from the final population
+Chromosome bestFinal = finalPopulation.getFittestChromosome();
+        
+
+ + The arguments to the GeneticAlgorithm constructor above are:
+ + + + + + + + + + + + + + + + + + + + + + + + +
Parametervalue in examplemeaning
crossoverPolicyOnePointCrossoverA random crossover point is selected and the first part from each parent is copied to the corresponding + child, and the second parts are copied crosswise.
crossoverRate1Always apply crossover
mutationPolicyRandomKeyMutationChanges a randomly chosen element of the array representation to a random value uniformly distributed in [0,1].
mutationRate.1Apply mutation with probability 0.1 - that is, 10% of the time.
selectionPolicyTournamentSelectionEach of the two selected chromosomes is selected based on an n-ary tournament -- this is done by drawing + n random chromosomes without replacement from the population, and then selecting the fittest chromosome among them.
+
+ + The algorithm starts with an initial population of Chromosomes. and executes until + the specified StoppingCondition + is reached. In the example above, a + FixedGenerationCount + stopping condition is used, which means the algorithm proceeds through a fixed number of generations. +

+
+
+ +
+
+
+
+
+ + + diff -r e63a64652f4d -r 5f2c5fb36e93 libs/commons-math-2.1/docs/userguide/geometry.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/commons-math-2.1/docs/userguide/geometry.html Tue Jan 04 10:00:53 2011 +0100 @@ -0,0 +1,282 @@ + + + + + + + + + + + + + + + Math - The Commons Math User Guide - Geometry + + + + + + + +
+ +
+
+
+

11 Geometry

+

11.1 Overview

+

+ The geometry package provides classes useful for many physical simulations + in the real 3D space, namely vectors and rotations. +

+
+

11.2 Vectors

+

+ org.apache.commons.math.geometry.Vector3D provides a simple vector + type. One important feature is that instances of this class are guaranteed + to be immutable, this greatly simplifies modelling dynamical systems + with changing states: once a vector has been computed, a reference to it + is known to preserve its state as long as the reference itself is preserved. +

+

+ Numerous constructors are available to create vectors. In addition to the + straightforward cartesian coordinates constructor, a constructor using + azimuthal coordinates can build normalized vectors and linear constructors + from one, two, three or four base vectors are also available. Constants have + been defined for the most commons vectors (plus and minus canonical axes, + null vector, and special vectors with infinite or NaN coordinates). +

+

+ The generic vectorial space operations are available including dot product, + normalization, orthogonal vector finding and angular separation computation + which have a specific meaning in 3D. The 3D geometry specific cross product + is of course also implemented. +

+

+ org.apache.commons.math.geometry.Vector3DFormat is a specialized format + for formatting output or parsing input with text representation of 3D vectors. +

+
+

11.3 Rotations

+

+ org.apache.commons.math.geometry.Rotation represents 3D rotations. + Rotation instances are also immutable objects, as Vector3D instances. +

+

+ Rotations can be represented by several different mathematical + entities (matrices, axe and angle, Cardan or Euler angles, + quaternions). This class presents a higher level abstraction, more + user-oriented and hiding implementation details. Well, for the + curious, we use quaternions for the internal representation. The user + can build a rotation from any of these representations, and any of + these representations can be retrieved from a Rotation + instance (see the various constructors and getters). In addition, a + rotation can also be built implicitely from a set of vectors and their + image. +

+

+ This implies that this class can be used to convert from one + representation to another one. For example, converting a rotation + matrix into a set of Cardan angles can be done using the + following single line of code: +

+
double[] angles = new Rotation(matrix, 1.0e-10).getAngles(RotationOrder.XYZ);
+
+

+ Focus is oriented on what a rotation does rather than on its + underlying representation. Once it has been built, and regardless of + its internal representation, a rotation is an operator which + basically transforms three dimensional vectors into other three + dimensional vectors. Depending on the application, the meaning of + these vectors may vary as well as the semantics of the rotation. +

+

+ For example in a spacecraft attitude simulation tool, users will + often consider the vectors are fixed (say the Earth direction for + example) and the rotation transforms the coordinates coordinates of + this vector in inertial frame into the coordinates of the same vector + in satellite frame. In this case, the rotation implicitly defines the + relation between the two frames (we have fixed vectors and moving frame). + Another example could be a telescope control application, where the + rotation would transform the sighting direction at rest into the desired + observing direction when the telescope is pointed towards an object of + interest. In this case the rotation transforms the direction at rest in + a topocentric frame into the sighting direction in the same topocentric + frame (we have moving vectors in fixed frame). In many case, both + approaches will be combined, in our telescope example, we will probably + also need to transform the observing direction in the topocentric frame + into the observing direction in inertial frame taking into account the + observatory location and the Earth rotation. +

+

+ These examples show that a rotation means what the user wants it to + mean, so this class does not push the user towards one specific + definition and hence does not provide methods like + projectVectorIntoDestinationFrame or + computeTransformedDirection. It provides simpler and more + generic methods: applyTo(Vector3D) and + applyInverseTo(Vector3D). +

+

+ Since a rotation is basically a vectorial operator, several + rotations can be composed together and the composite operation + r = r1 o r2 (which means that for each + vector u, r(u) = r1(r2(u))) + is also a rotation. Hence we can consider that in addition to vectors, a + rotation can be applied to other rotations as well (or to itself). With our + previous notations, we would say we can apply r1 to + r2 and the result we get is r = + r1 o r2. For this purpose, the class + provides the methods: applyTo(Rotation) and + applyInverseTo(Rotation). +

+
+
+ +
+
+
+
+
+ + + diff -r e63a64652f4d -r 5f2c5fb36e93 libs/commons-math-2.1/docs/userguide/index.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/commons-math-2.1/docs/userguide/index.html Tue Jan 04 10:00:53 2011 +0100 @@ -0,0 +1,272 @@ + + + + + + + + + + + + + + + Math - The Commons Math User Guide - Table of Contents + + + + + + + +
+ +
+
+
+

Table of Contents

+ +
+ +
+
+
+
+
+ + + diff -r e63a64652f4d -r 5f2c5fb36e93 libs/commons-math-2.1/docs/userguide/linear.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/commons-math-2.1/docs/userguide/linear.html Tue Jan 04 10:00:53 2011 +0100 @@ -0,0 +1,362 @@ + + + + + + + + + + + + + + + Math - The Commons Math User Guide - Linear Algebra + + + + + + + +
+ +
+
+
+

3 Linear Algebra

+

3.1 Overview

+

+ Linear algebra support in commons-math provides operations on real matrices + (both dense and sparse matrices are supported) and vectors. It features basic + operations (addition, subtraction ...) and decomposition algorithms that can + be used to solve linear systems either in exact sense and in least squares sense. +

+
+

3.2 Real matrices

+

+ The + RealMatrix interface represents a matrix with real numbers as + entries. The following basic matrix operations are supported: +

  • Matrix addition, subtraction, multiplication
  • +
  • Scalar addition and multiplication
  • +
  • transpose
  • +
  • Norm and Trace
  • +
  • Operation on a vector
  • +
+

+

+ Example: +

+// Create a real matrix with two rows and three columns
+double[][] matrixData = { {1d,2d,3d}, {2d,5d,3d}};
+RealMatrix m = new Array2DRowRealMatrix(matrixData);
+
+// One more with three rows, two columns
+double[][] matrixData2 = { {1d,2d}, {2d,5d}, {1d, 7d}};
+RealMatrix n = new Array2DRowRealMatrix(matrixData2);
+
+// Note: The constructor copies  the input double[][] array.
+
+// Now multiply m by n
+RealMatrix p = m.multiply(n);
+System.out.println(p.getRowDimension());    // 2
+System.out.println(p.getColumnDimension()); // 2
+
+// Invert p, using LU decomposition
+RealMatrix pInverse = new LUDecompositionImpl(p).getSolver().getInverse();
+         
+
+

+

+ The three main implementations of the interface are + Array2DRowRealMatrix and + BlockRealMatrix for dense matrices (the second one being more suited to + dimensions above 50 or 100) and + SparseRealMatrix for sparse matrices. +

+
+

3.3 Real vectors

+

+ The + RealVector interface represents a vector with real numbers as + entries. The following basic matrix operations are supported: +

  • Vector addition, subtraction
  • +
  • Element by element multiplication, division
  • +
  • Scalar addition, subtraction, multiplication, division and power
  • +
  • Mapping of mathematical functions (cos, sin ...)
  • +
  • Dot product, outer product
  • +
  • Distance and norm according to norms L1, L2 and Linf
  • +
+

+

+ The + RealVectorFormat class handles input/output of vectors in a customizable + textual format. +

+
+

3.4 Solving linear systems

+

+ The solve() methods of the DecompositionSolver + interface support solving linear systems of equations of the form AX=B, either + in linear sense or in least square sense. A RealMatrix instance is + used to represent the coefficient matrix of the system. Solving the system is a + two phases process: first the coefficient matrix is decomposed in some way and + then a solver built from the decomposition solves the system. This allows to + compute the decomposition and build the solver only once if several systems have + to be solved with the same coefficient matrix. +

+

+ For example, to solve the linear system +

+           2x + 3y - 2z = 1
+           -x + 7y + 6x = -2
+           4x - 3y - 5z = 1
+          
+ Start by decomposing the coefficient matrix A (in this case using LU decomposition) + and build a solver +
+RealMatrix coefficients =
+    new Array2DRowRealMatrix(new double[][] { { 2, 3, -2 }, { -1, 7, 6 }, { 4, -3, -5 } },
+                       false);
+DecompositionSolver solver = new LUDecompositionImpl(coefficients).getSolver();
+          
+
+ + Next create a RealVector array to represent the constant + vector B and use solve(RealVector) to solve the system +
+RealVector constants = new RealVectorImpl(new double[] { 1, -2, 1 }, false);
+RealVector solution = solver.solve(constants);
+          
+
+ + The solution vector will contain values for x + (solution.getEntry(0)), y (solution.getEntry(1)), + and z (solution.getEntry(2)) that solve the system. +

+

+ Each type of decomposition has its specific semantics and constraints on + the coefficient matrix as shown in the following table. For algorithms that + solve AX=B in least squares sense the value returned for X is such that the + residual AX-B has minimal norm. If an exact solution exist (i.e. if for some + X the residual AX-B is exactly 0), then this exact solution is also the solution + in least square sense. This implies that algorithms suited for least squares + problems can also be used to solve exact problems, but the reverse is not true. +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Decomposition algorithms
Namecoefficients matrixproblem type
LUsquareexact solution only
Choleskysymmetric positive definiteexact solution only
QRanyleast squares solution
eigen decompositionsquareexact solution only
SVDanyleast squares solution
+

+

+ It is possible to use a simple array of double instead of a RealVector. + In this case, the solution will be provided also as an array of double. +

+

+ It is possible to solve multiple systems with the same coefficient matrix + in one method call. To do this, create a matrix whose column vectors correspond + to the constant vectors for the systems to be solved and use solve(RealMatrix), + which returns a matrix with column vectors representing the solutions. +

+
+

3.5 Eigenvalues/eigenvectors and singular values/singular vectors

+

+ Decomposition algorithms may be used for themselves and not only for linear system solving. + This is of prime interest with eigen decomposition and singular value decomposition. +

+

+ The getEigenvalue(), getEigenvalues(), getEigenVector(), + getV(), getD() and getVT() methods of the + EigenDecomposition interface support solving eigenproblems of the form + AX = lambda X where lambda is a real scalar. +

+

The getSingularValues(), getU(), getS() and + getV() methods of the SingularValueDecomposition interface + allow to solve singular values problems of the form AXi = lambda Yi where lambda is a + real scalar, and where the Xi and Yi vectors form orthogonal bases of their respective + vector spaces (which may have different dimensions). +

+
+

3.6 Non-real fields (complex, fractions ...)

+

+ In addition to the real field, matrices and vectors using non-real field elements can be used. + The fields already supported by the library are: +

+

+
+
+ +
+
+
+
+
+ + + diff -r e63a64652f4d -r 5f2c5fb36e93 libs/commons-math-2.1/docs/userguide/ode.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/commons-math-2.1/docs/userguide/ode.html Tue Jan 04 10:00:53 2011 +0100 @@ -0,0 +1,609 @@ + + + + + + + + + + + + + + + Math - The Commons Math User Guide - Ordinary Differential Equations Integration + + + + + + + +
+ +
+
+
+

13 Ordinary Differential Equations Integration

+

13.1 Overview

+

+ The ode package provides classes to solve Ordinary Differential Equations problems. +

+

+ This package solves Initial Value Problems of the form y'=f(t,y) with t0 + and y(t0)=y0 known. The provided integrators compute an estimate + of y(t) from t=t0 to t=t1. +

+

+ All integrators provide dense output. This means that besides computing the state vector + at discrete times, they also provide a cheap mean to get both the state and its derivative + between the time steps. They do so through classes extending the + StepInterpolator + abstract class, which are made available to the user at the end of each step. +

+

+ All integrators handle multiple discrete events detection based on switching + functions. This means that the integrator can be driven by user specified discrete events + (occurring when the sign of user-supplied switching function changes). The steps are + shortened as needed to ensure the events occur at step boundaries (even if the integrator + is a fixed-step integrator). When the events are triggered, integration can + be stopped (this is called a G-stop facility), the state vector can be changed, or integration + can simply go on. The latter case is useful to handle discontinuities in the differential + equations gracefully and get accurate dense output even close to the discontinuity. +

+

+ All integrators support setting a maximal number of evaluations of differential + equations function. If this number is exceeded, an exception will be thrown during + integration. This can be used to prevent infinite loops if for example error control or + discrete events create a really large number of extremely small steps. By default, the + maximal number of evaluation is set to Integer.MAX_VALUE (i.e. 231-1 + or 2147483647). It is recommended to set this maximal number to a value suited to the ODE + problem, integration range, and step size or error control settings. +

+

+ The user should describe his problem in his own classes which should implement the + FirstOrderDifferentialEquations + interface. Then he should pass it to the integrator he prefers among all the classes that implement + the FirstOrderIntegrator + interface. The following example shows how to implement the simple two-dimensional problem: +

  • y'0(t) = ? × (c1 - y1(t))
  • +
  • y'1(t) = ? × (y0(t) - c0)
  • +
+ + with some initial state y(t0) = (y0(t0), y1(t0)). + In fact, the exact solution of this problem is that y(t) moves along a circle + centered at c = (c0, c1) with constant angular rate ?. +

+
+private static class CircleODE implements FirstOrderDifferentialEquations {
+
+    private double[] c;
+    private double omega;
+
+    public CircleODE(double[] c, double omega) {
+        this.c     = c;
+        this.omega = omega;
+    }
+
+    public int getDimension() {
+        return 2;
+    }
+
+    public void computeDerivatives(double t, double[] y, double[] yDot) {
+        yDot[0] = omega * (c[1] - y[1]);
+        yDot[1] = omega * (y[0] - c[0]);
+    }
+
+}
+        
+
+

+ Computing the state y(16.0) starting from y(0.0) = (0.0, 1.0) and integrating the ODE + is done as follows (using Dormand-Prince 8(5,3) integrator as an example): +

+
+FirstOrderIntegrator dp853 = new DormandPrince853Integrator(1.0e-8, 100.0, 1.0e-10, 1.0e-10);
+FirstOrderDifferentialEquations ode = new CircleODE(new double[] { 1.0, 1.0 }, 0.1);
+double[] y = new double[] { 0.0, 1.0 }; // initial state
+dp853.integrate(ode, 0.0, y, 16.0, y); // now y contains final state at time t=16.0
+        
+
+
+

13.2 Continuous Output

+

+ The solution of the integration problem is provided by two means. The first one is aimed towards + simple use: the state vector at the end of the integration process is copied in the y array of the + FirstOrderIntegrator.integrate method, as shown by previous example. The second one + should be used when more in-depth information is needed throughout the integration process. The user + can register an object implementing the + StepHandler interface or a + StepNormalizer object wrapping + a user-specified object implementing the + FixedStepHandler interface + into the integrator before calling the FirstOrderIntegrator.integrate method. The user object + will be called appropriately during the integration process, allowing the user to process intermediate + results. The default step handler does nothing. Considering again the previous example, we want to print the + trajectory of the point to check it really is a circle arc. We simply add the following before the call + to integrator.integrate: +

+
+StepHandler stepHandler = new StepHandler() {
+    public void reset() {}
+            
+    public boolean requiresDenseOutput() { return false; }
+            
+    public void handleStep(StepInterpolator interpolator, boolean isLast) throws DerivativeException {
+        double   t = interpolator.getCurrentTime();
+        double[] y = interpolator.getInterpolatedY();
+        System.out.println(t + " " + y[0] + " " + y[1]);
+    }
+};
+integrator.addStepHandler(stepHandler);
+        
+
+

ContinuousOutputModel + is a special-purpose step handler that is able to store all steps and to provide transparent access to + any intermediate result once the integration is over. An important feature of this class is that it + implements the Serializable interface. This means that a complete continuous model of the + integrated function throughout the integration range can be serialized and reused later (if stored into + a persistent medium like a file system or a database) or elsewhere (if sent to another application). + Only the result of the integration is stored, there is no reference to the integrated problem by itself. +

+

+ Other default implementations of the StepHandler + interface are available for general needs + (DummyStepHandler, + StepNormalizer) and custom + implementations can be developed for specific needs. As an example, if an application is to be + completely driven by the integration process, then most of the application code will be run inside a + step handler specific to this application. +

+

+ Some integrators (the simple ones) use fixed steps that are set at creation time. The more efficient + integrators use variable steps that are handled internally in order to control the integration error + with respect to a specified accuracy (these integrators extend the + AdaptiveStepsizeIntegrator + abstract class). In this case, the step handler which is called after each successful step shows up + the variable stepsize. The StepNormalizer + class can be used to convert the variable stepsize into a fixed stepsize that can be handled by classes + implementing the FixedStepHandler + interface. Adaptive stepsize integrators can automatically compute the initial stepsize by themselves, + however the user can specify it if he prefers to retain full control over the integration or if the + automatic guess is wrong. +

+
+

13.3 Discrete Events Handling

+

+ ODE problems are continuous ones. However, sometimes discrete events must be + taken into account. The most frequent case is the stop condition of the integrator + is not defined by the time t but by a target condition on state y (say y[0] = 1.0 + for example). +

+

+ Discrete events detection is based on switching functions. The user provides + a simple g(t, y) + function depending on the current time and state. The integrator will monitor + the value of the function throughout integration range and will trigger the + event when its sign changes. The magnitude of the value is almost irrelevant, + it should however be continuous (but not necessarily smooth) for the sake of + root finding. The steps are shortened as needed to ensure the events occur + at step boundaries (even if the integrator is a fixed-step integrator). +

+

+ When an event is triggered, the event time, current state and an indicator + whether the switching function was increasing or decreasing at event time + are provided to the user. Several different options are available to him: +

+
  • integration can be stopped (this is called a G-stop facility),
  • +
  • the state vector or the derivatives can be changed,
  • +
  • or integration can simply go on.
  • +
+

+ The first case, G-stop, is the most common one. A typical use case is when an + ODE must be solved up to some target state is reached, with a known value of + the state but an unknown occurrence time. As an example, if we want to monitor + a chemical reaction up to some predefined concentration for the first substance, + we can use the following switching function setting: +

+
+public double g(double t, double[] y) {
+  return y[0] - targetConcentration;
+}
+
+public int eventOccurred(double t, double[] y, boolean increasing) {
+  return STOP;
+}
+       
+
+

+ The second case, change state vector or derivatives is encountered when dealing + with discontinuous dynamical models. A typical case would be the motion of a + spacecraft when thrusters are fired for orbital maneuvers. The acceleration is + smooth as long as no maneuvers are performed, depending only on gravity, drag, + third body attraction, radiation pressure. Firing a thruster introduces a + discontinuity that must be handled appropriately by the integrator. In such a case, + we would use a switching function setting similar to this: +

+
+public double g(double t, double[] y) {
+  return (t - tManeuverStart) * (t - tManeuverStop);
+}
+
+public int eventOccurred(double t, double[] y, boolean increasing) {
+  return RESET_DERIVATIVES;
+}
+        
+
+

+ The third case is useful mainly for monitoring purposes, a simple example is: +

+
+public double g(double t, double[] y) {
+  return y[0] - y[1];
+}
+
+public int eventOccurred(double t, double[] y, boolean increasing) {
+  logger.log("y0(t) and y1(t) curves cross at t = " + t);
+  return CONTINUE;
+}
+        
+
+
+

13.4 Available Integrators

+

+ The tables below show the various integrators available for non-stiff problems. Note that the + implementations of Adams-Bashforth and Adams-Moulton are adaptive stepsize, not fixed stepsize + as is usual for these multi-step integrators. This is due to the fact the implementation relies + on the Nordsieck vector representation of the state. +

+

+ + + + + + + + + + + + + + + + + + + +
Fixed Step Integrators
NameOrder
Euler1
Midpoint2
Classical Runge-Kutta4
Gill4
3/84
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Adaptive Stepsize Integrators
NameIntegration OrderError Estimation Order
Higham and Hall54
Dormand-Prince 5(4)54
Dormand-Prince 8(5,3)85 and 3
Gragg-Bulirsch-Stoervariable (up to 18 by default)variable
Adams-Bashforthvariablevariable
Adams-Moultonvariablevariable
+

+
+

13.5 Derivatives

+

+ If in addition to state y(t) the user needs to compute the sensitivity of the state to + the initial state or some parameter of the ODE, he will use the classes and interfaces + from the org.apache.commons.ode.jacobians + package instead of the top level ode package. These classes compute the jacobians + dy(t)/dy0 and dy(t0)/dp where y0 is the initial state + and p is some ODE parameter. +

+

+ The classes and interfaces in this package mimic the behavior of the classes and + interfaces of the top level ode package, only adding parameters arrays for the jacobians. + The behavior of these classes is to create a compound state vector z containing both + the state y(t) and its derivatives dy(t)/dy0 and dy(t0)/dp and + to set up an extended problem by adding the equations for the jacobians automatically. + These extended state and problems are then provided to a classical underlying integrator + chosen by user. +

+

+ This behavior imply there will be a top level integrator knowing about state and jacobians + and a low level integrator knowing only about compound state (which may be big). If the user + wants to deal with the top level only, he will use the specialized step handler and event + handler classes registered at top level. He can also register classical step handlers and + event handlers, but in this case will see the big compound state. This state is guaranteed + to contain the original state in the first elements, followed by the jacobian with respect + to initial state (in row order), followed by the jacobian with respect to parameters (in + row order). If for example the original state dimension is 6 and there are 3 parameters, + the compound state will be a 60 elements array. The first 6 elements will be the original + state, the next 36 elements will be the jacobian with respect to initial state, and the + remaining 18 will be the jacobian with respect to parameters. Dealing with low level + step handlers and event handlers is cumbersome if one really needs the jacobians in these + methods, but it also prevents many data being copied back and forth between state and + jacobians on one side and compound state on the other side. +

+

+ In order to compute dy(t)/dy0 and dy(t0)/dp for any t, the algorithm + needs not only the ODE function f such that y'=f(t,y) but also its local jacobians + df(t, y, p)/dy and df(t, y, p)/dp. +

+

+ If the function f is too complex, the user can simply rely on internal differentiation + using finite differences to compute these local jacobians. So rather than the FirstOrderDifferentialEquations + interface he will implement the ParameterizedODE + interface. Considering again our example where only ? is considered a parameter, we get: +

+
+public class BasicCircleODE implements ParameterizedODE {
+
+    private double[] c;
+    private double omega;
+
+    public BasicCircleODE(double[] c, double omega) {
+        this.c     = c;
+        this.omega = omega;
+    }
+
+    public int getDimension() {
+        return 2;
+    }
+
+    public void computeDerivatives(double t, double[] y, double[] yDot) {
+        yDot[0] = omega * (c[1] - y[1]);
+        yDot[1] = omega * (y[0] - c[0]);
+    }
+
+    public int getParametersDimension() {
+        // we are only interested in the omega parameter
+        return 1;
+    }
+
+    public void setParameter(int i, double value) {
+        omega = value;
+    }
+
+}
+        
+
+

+ This ODE is provided to the specialized integrator with two arrays specifying the + step sizes to use for finite differences (one array for derivation with respect to + state y, one array for derivation with respect to parameters p): +

+
+double[] hY = new double[] { 0.001, 0.001 };
+double[] hP = new double[] { 1.0e-6 };
+FirstOrderIntegratorWithJacobians integrator = new FirstOrderIntegratorWithJacobians(dp853, ode, hY, hP);
+integrator.integrate(t0, y0, dy0dp, t, y, dydy0, dydp);
+        
+
+

+ If the function f is simple, the user can simply provide the local jacobians + by himself. So rather than the FirstOrderDifferentialEquations + interface he will implement the ODEWithJacobians + interface. Considering again our example where only ? is considered a parameter, we get: +

+
+public class EnhancedCircleODE implements ODEWithJacobians {
+
+    private double[] c;
+    private double omega;
+
+    public EnhancedCircleODE(double[] c, double omega) {
+        this.c     = c;
+        this.omega = omega;
+    }
+
+    public int getDimension() {
+        return 2;
+    }
+
+    public void computeDerivatives(double t, double[] y, double[] yDot) {
+        yDot[0] = omega * (c[1] - y[1]);
+        yDot[1] = omega * (y[0] - c[0]);
+    }
+
+    public int getParametersDimension() {
+        // we are only interested in the omega parameter
+        return 1;
+    }
+
+    public void computeJacobians(double t, double[] y, double[] yDot, double[][] dFdY, double[][] dFdP) {
+
+        dFdY[0][0] = 0;
+        dFdY[0][1] = -omega;
+        dFdY[1][0] = omega;
+        dFdY[1][1] = 0;
+
+        dFdP[0][0] = 0;
+        dFdP[0][1] = omega;
+        dFdP[0][2] = c[1] - y[1];
+        dFdP[1][0] = -omega;
+        dFdP[1][1] = 0;
+        dFdP[1][2] = y[0] - c[0];
+ 
+    }
+
+}
+        
+
+

+ This ODE is provided to the specialized integrator as is: +

+
+FirstOrderIntegratorWithJacobians integrator = new FirstOrderIntegratorWithJacobians(dp853, ode);
+integrator.integrate(t0, y0, dy0dp, t, y, dydy0, dydp);
+        
+
+
+
+ +
+
+
+
+
+ + + diff -r e63a64652f4d -r 5f2c5fb36e93 libs/commons-math-2.1/docs/userguide/optimization.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/commons-math-2.1/docs/userguide/optimization.html Tue Jan 04 10:00:53 2011 +0100 @@ -0,0 +1,390 @@ + + + + + + + + + + + + + + + Math - The Commons Math User Guide - Optimization + + + + + + + +
+ +
+
+
+

12 Optimization

+

12.1 Overview

+

+ The optimization package provides algorithms to optimize (i.e. either minimize + or maximize) some objective or cost function. The package is split in several + sub-packages dedicated to different kind of functions or algorithms. +

  • the univariate package handles univariate scalar functions,
  • +
  • the linear package handles multivariate vector linear functions + with linear constraints,
  • +
  • the direct package handles multivariate scalar functions + using direct search methods (i.e. not using derivatives),
  • +
  • the general package handles multivariate scalar or vector functions + using derivatives.
  • +
  • the fitting package handles curve fitting by univariate real functions
  • +
+

+

+ The top level optimization package provides common interfaces for the optimization + algorithms provided in sub-packages. The main interfaces defines defines optimizers + and convergence checkers. The functions that are optimized by the algorithms provided + by this package and its sub-packages are a subset of the one defined in the + analysis package, namely the real and vector valued functions. These + functions are called objective function here. When the goal is to minimize, the + functions are often called cost function, this name is not used in this package. +

+

+ The type of goal, i.e. minimization or maximization, is defined by the enumerated + + GoalType which has only two values: MAXIMIZE and MINIMIZE. +

+

+ Optimizers are the algorithms that will either minimize or maximize, the objective + function by changing its input variables set until an optimal set is found. There + are only four interfaces defining the common behavior of optimizers, one for each + supported type of objective function: +

+

+

+ Despite there are only four types of supported optimizers, it is possible to optimize + a transform a + non-differentiable multivariate vectorial function by converting it to a + non-differentiable multivariate real function thanks to the + LeastSquaresConverter helper class. The transformed function can be optimized using + any implementation of the + MultivariateRealOptimizer interface. +

+

+ For each of the four types of supported optimizers, there is a special implementation + which wraps a classical optimizer in order to add it a multi-start feature. This feature + call the underlying optimizer several times in sequence with different starting points + and returns the best optimum found or all optima if desired. This is a classical way to + prevent being trapped into a local extremum when looking for a global one. +

+
+

12.2 Univariate Functions

+

+ A + UnivariateRealOptimizer is used to find the minimal values of a univariate real-valued + function f. +

+

+ These algorithms usage is very similar to root-finding algorithms usage explained + in the analysis package. The main difference is that the solve methods in root + finding algorithms is replaced by optimize methods. +

+
+

12.3 Linear Programming

+

+ This package provides an implementation of George Dantzig's simplex algorithm + for solving linear optimization problems with linear equality and inequality + constraints. +

+
+

12.4 Direct Methods

+

+ Direct search methods only use cost function values, they don't + need derivatives and don't either try to compute approximation of + the derivatives. According to a 1996 paper by Margaret H. Wright + (Direct + Search Methods: Once Scorned, Now Respectable), they are used + when either the computation of the derivative is impossible (noisy + functions, unpredictable discontinuities) or difficult (complexity, + computation cost). In the first cases, rather than an optimum, a + not too bad point is desired. In the latter cases, an + optimum is desired but cannot be reasonably found. In all cases + direct search methods can be useful. +

+

+ Simplex-based direct search methods are based on comparison of + the cost function values at the vertices of a simplex (which is a + set of n+1 points in dimension n) that is updated by the algorithms + steps. +

+

+ The instances can be built either in single-start or in + multi-start mode. Multi-start is a traditional way to try to avoid + being trapped in a local minimum and miss the global minimum of a + function. It can also be used to verify the convergence of an + algorithm. In multi-start mode, the minimizesmethod + returns the best minimum found after all starts, and the etMinima + method can be used to retrieve all minima from all starts (including the one + already provided by the minimizes method). +

+

+ The direct package provides two solvers. The first one is the classical + + Nelder-Mead method. The second one is Virginia Torczon's + + multi-directional method. +

+
+

12.5 General Case

+

+ The general package deals with non-linear vectorial optimization problems when + the partial derivatives of the objective function are available. +

+

+ One important class of estimation problems is weighted least + squares problems. They basically consist in finding the values + for some parameters pk such that a cost function + J = sum(wi(mesi - modi)2) is + minimized. The various (targeti - modeli(pk)) + terms are called residuals. They represent the deviation between a set of + target values targeti and theoretical values computed from + models modeli depending on free parameters pk. + The wi factors are weights. One classical use case is when the + target values are experimental observations or measurements. +

+

+ Solving a least-squares problem is finding the free parameters pk + of the theoretical models such that they are close to the target values, i.e. + when the residual are small. +

+

+ Two optimizers are available in the general package, both devoted to least-squares + problems. The first one is based on the + Gauss-Newton method. The second one is the + Levenberg-Marquardt method. +

+

+ In order to solve a vectorial optimization problem, the user must provide it as + an object implementing the + DifferentiableMultivariateVectorialFunction interface. The object will be provided to + the estimate method of the optimizer, along with the target and weight arrays, + thus allowing the optimizer to compute the residuals at will. The last parameter to the + estimate method is the point from which the optimizer will start its + search for the optimal point. +

+

+ In addition to least squares solving, the + NonLinearConjugateGradientOptimizer class provides a non-linear conjugate gradient algorithm + to optimize + DifferentiableMultivariateRealFunction. Both the Fletcher-Reeves and the Polak-Ribière + search direction update methods are supported. It is also possible to set up a preconditioner + or to change the line-search algorithm of the inner loop if desired (the default one is a Brent + solver). +

+
+

12.6 Curve Fitting

+

+ The fitting package deals with curve fitting for univariate real functions. + When a univariate real function y = f(x) does depend on some unknown parameters + p0, p1 ... pn-1, curve fitting can be used to + find these parameters. It does this by fitting the curve so it remains + very close to a set of observed points (x0, y0), + (x1, y1) ... (xk-1, yk-1). This + fitting is done by finding the parameters values that minimizes the objective + function sum(yi-f(xi))2. This is really a least + squares problem. +

+

+ For all provided curve fitters, the operating principle is the same. Users must first + create an instance of the fitter, then add the observed points and once the complete + sample of observed points has been added they must call the fit method + which will compute the parameters that best fit the sample. A weight is associated + with each observed point, this allows to take into account uncertainty on some points + when they come from loosy measurements for example. If no such information exist and + all points should be treated the same, it is safe to put 1.0 as the weight for all points. +

+

+ The + CurveFitter class provides curve fitting for general curves. Users must + provide their own implementation of the curve template as a class implementing + the + ParametricRealFunction interface and they must provide the initial guess of the + parameters. The more specialized + PolynomialFitter and + HarmonicFitter classes require neither an implementation of the parametric real function + not an initial guess as they are able to compute them by themselves. +

+

+ An example of fitting a polynomial is given here: +

+
PolynomialFitter fitter = new PolynomialFitter(degree, new LevenbergMarquardtOptimizer());
+fitter.addObservedPoint(-1.00,  2.021170021833143);
+fitter.addObservedPoint(-0.99   2.221135431136975);
+fitter.addObservedPoint(-0.98   2.09985277659314);
+fitter.addObservedPoint(-0.97   2.0211192647627025);
+// lots of lines ommitted
+fitter.addObservedPoint( 0.99, -2.4345814727089854);
+PolynomialFunction fitted = fitter.fit();
+        
+
+
+
+ +
+
+
+
+
+ + + diff -r e63a64652f4d -r 5f2c5fb36e93 libs/commons-math-2.1/docs/userguide/overview.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/commons-math-2.1/docs/userguide/overview.html Tue Jan 04 10:00:53 2011 +0100 @@ -0,0 +1,281 @@ + + + + + + + + + + + + + + + Math - User Guide - Overview + + + + + + + +
+ +
+
+
+

Overview

+

0.1 About The User Guide

+

+ This guide is intended to help programmers quickly find what they need to develop + solutions using Commons Math. It also provides a supplement to the javadoc API documentation, + providing a little more explanation of the mathematical objects and functions included + in the package. +

+
+

0.2 What's in commons-math

+

+ Commons Math is made up of a small set of math/stat utilities addressing + programming problems like the ones in the list below. This list is not exhaustive, + it's just meant to give a feel for the kinds of things that Commons Math provides. +

  • Computing means, variances and other summary statistics for a list of numbers
  • +
  • Fitting a line to a set of data points using linear regression
  • +
  • Finding a smooth curve that passes through a collection of points (interpolation)
  • +
  • Fitting a parametric model to a set of measurements using least-squares methods
  • +
  • Solving equations involving real-valued functions (i.e. root-finding)
  • +
  • Solving systems of linear equations
  • +
  • Solving Ordinary Differential Equations
  • +
  • Minimizing multi-dimensional functions
  • +
  • Generating random numbers with more restrictions (e.g distribution, range) than what + is possible using the JDK
  • +
  • Generating random samples and/or datasets that are "like" the data in an input file
  • +
  • Performing statistical significance tests
  • +
  • Miscellaneous mathematical functions such as factorials, binomial + coefficients and "special functions" (e.g. gamma, beta functions)
  • +
+

+

+ We are actively seeking ideas for additional components that fit into the + Commons Math vision of a set of lightweight, + self-contained math/stat components useful for solving common programming problems. + Suggestions for new components or enhancements to existing functionality are always welcome! + All feedback/suggestions for improvement should be sent to the + commons-dev mailing list with + [math] at the beginning of the subject line. +

+
+

0.3 How commons-math is organized

+

+ Commons Math is divided into fourteen subpackages, based on functionality provided. +

  1. org.apache.commons.math.stat - statistics, statistical tests
  2. +
  3. org.apache.commons.math.analysis - rootfinding, integration, interpolation, polynomials
  4. +
  5. org.apache.commons.math.random - random numbers, strings and data generation
  6. +
  7. org.apache.commons.math.special - special functions (Gamma, Beta)
  8. +
  9. org.apache.commons.math.linear - matrices, solving linear systems
  10. +
  11. org.apache.commons.math.util - common math/stat functions extending java.lang.Math
  12. +
  13. org.apache.commons.math.complex - complex numbers
  14. +
  15. org.apache.commons.math.distribution - probability distributions
  16. +
  17. org.apache.commons.math.fraction - rational numbers
  18. +
  19. org.apache.commons.math.transform - transform methods (Fast Fourier)
  20. +
  21. org.apache.commons.math.geometry - 3D geometry (vectors and rotations)
  22. +
  23. org.apache.commons.math.optimization - function maximization or minimization
  24. +
  25. org.apache.commons.math.ode - Ordinary Differential Equations integration
  26. +
  27. org.apache.commons.math.genetics - Genetic Algorithms
  28. +
+ + Package javadocs are here

+
+

0.4 How interface contracts are specified in commons-math javadoc

+

+ You should always read the javadoc class and method comments carefully when using + Commons Math components in your programs. The javadoc provides references to the algorithms + that are used, usage notes about limitations, performance, etc. as well as interface contracts. + Interface contracts are specified in terms of preconditions (what has to be true in order + for the method to return valid results), special values returned (e.g. Double.NaN) + or exceptions that may be thrown if the preconditions are not met, and definitions for returned + values/objects or state changes.

+

+ When the actual parameters provided to a method or the internal state of an object + make a computation meaningless, an IllegalArgumentException or IllegalStateException may + be thrown. Exact conditions under which runtime exceptions (and any other exceptions) are + thrown are specified in the javadoc method comments. In some cases, to be consistent with + the IEEE 754 standard for floating point + arithmetic and with java.lang.Math, Commons Math methods return Double.NaN values. + Conditions under which Double.NaN or other special values are returned are fully specified + in the javadoc method comments. +

+
+

0.5 Dependencies

+

+ Commons Math requires JDK 1.5+ and has no runtime dependencies. +

+
+

0.6 License

+

+ Commons Math is distributed under the terms of the Apache License, Version 2.0: + . +

+

+ This product includes software developed by the University of Chicago, as Operator + of Argonne National Laboratory. This corresponds to software translated from the lmder, + lmpar and qrsolv Fortran routines from the Minpack package and distributed under the + following disclaimer: . +

+

+ This product includes software translated from the odex Fortran routine developed by + E. Hairer and G. Wanner and distributed under the following license: + . +

+

+ This product includes software translated from some LAPACK Fortran routines and + distributed under the following license: . +

+
+
+ +
+
+
+
+
+ + + diff -r e63a64652f4d -r 5f2c5fb36e93 libs/commons-math-2.1/docs/userguide/random.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/commons-math-2.1/docs/userguide/random.html Tue Jan 04 10:00:53 2011 +0100 @@ -0,0 +1,530 @@ + + + + + + + + + + + + + + + Math - The Commons Math User Guide - Data Generation + + + + + + + +
+ +
+
+
+

2 Data Generation

+

2.1 Overview

+

+ The Commons Math random package includes utilities for +

  • generating random numbers
  • +
  • generating random strings
  • +
  • generating cryptographically secure sequences of random numbers or + strings
  • +
  • generating random samples and permutations
  • +
  • analyzing distributions of values in an input file and generating + values "like" the values in the file
  • +
  • generating data for grouped frequency distributions or + histograms
  • +
+

+

+ The source of random data used by the data generation utilities is + pluggable. By default, the JDK-supplied PseudoRandom Number Generator + (PRNG) is used, but alternative generators can be "plugged in" using an + adaptor framework, which provides a generic facility for replacing + java.util.Random with an alternative PRNG. Another very + good PRNG suitable for Monte-Carlo analysis (but not + for cryptography) provided by the library is the Mersenne twister from + Makoto Matsumoto and Takuji Nishimura +

+

+ Sections 2.2-2.6 below show how to use the commons math API to generate + different kinds of random data. The examples all use the default + JDK-supplied PRNG. PRNG pluggability is covered in 2.7. The only + modification required to the examples to use alternative PRNGs is to + replace the argumentless constructor calls with invocations including + a RandomGenerator instance as a parameter. +

+
+

2.2 Random numbers

+

+ The + org.apache.commons.math.RandomData interface defines methods for + generating random sequences of numbers. The API contracts of these methods + use the following concepts: +

Random sequence of numbers from a probability distribution
+
There is no such thing as a single "random number." What can be + generated are sequences of numbers that appear to be random. When + using the built-in JDK function Math.random(), sequences of + values generated follow the + + Uniform Distribution, which means that the values are evenly spread + over the interval between 0 and 1, with no sub-interval having a greater + probability of containing generated values than any other interval of the + same length. The mathematical concept of a + + probability distribution basically amounts to asserting that different + ranges in the set of possible values of a random variable have + different probabilities of containing the value. Commons Math supports + generating random sequences from the following probability distributions. + The javadoc for the nextXxx methods in + RandomDataImpl describes the algorithms used to generate + random deviates from each of these distributions. + +
+
Cryptographically secure random sequences
+
It is possible for a sequence of numbers to appear random, but + nonetheless to be predictable based on the algorithm used to generate the + sequence. If in addition to randomness, strong unpredictability is + required, it is best to use a + + secure random number generator to generate values (or strings). The + nextSecureXxx methods in the RandomDataImpl implementation of + the RandomData interface use the JDK SecureRandom + PRNG to generate cryptographically secure sequences. The + setSecureAlgorithm method allows you to change the underlying + PRNG. These methods are much slower than the corresponding + "non-secure" versions, so they should only be used when cryptographic + security is required.
+
Seeding pseudo-random number generators
+
By default, the implementation provided in RandomDataImpl + uses the JDK-provided PRNG. Like most other PRNGs, the JDK generator + generates sequences of random numbers based on an initial "seed value". + For the non-secure methods, starting with the same seed always produces the + same sequence of values. Secure sequences started with the same seeds will + diverge. When a new RandomDataImpl is created, the underlying + random number generators are not initialized. The first + call to a data generation method, or to a reSeed() method + initializes the appropriate generator. If you do not explicitly seed the + generator, it is by default seeded with the current time in milliseconds. + Therefore, to generate sequences of random data values, you should always + instantiate oneRandomDataImpl and use it + repeatedly instead of creating new instances for subsequent values in the + sequence. For example, the following will generate a random sequence of 50 + long integers between 1 and 1,000,000, using the current time in + milliseconds as the seed for the JDK PRNG: +
+RandomData randomData = new RandomDataImpl(); 
+for (int i = 0; i < 1000; i++) {
+    value = randomData.nextLong(1, 1000000);
+}
+    
+
+ + The following will not in general produce a good random sequence, since the + PRNG is reseeded each time through the loop with the current time in + milliseconds: +
+for (int i = 0; i < 1000; i++) {
+    RandomDataImpl randomData = new RandomDataImpl(); 
+    value = randomData.nextLong(1, 1000000);
+}
+    
+
+ + The following will produce the same random sequence each time it is + executed: +
+RandomData randomData = new RandomDataImpl(); 
+randomData.reSeed(1000);
+for (int i = 0; i = 1000; i++) {
+    value = randomData.nextLong(1, 1000000);
+}
+    
+
+ + The following will produce a different random sequence each time it is + executed. +
+RandomData randomData = new RandomDataImpl(); 
+randomData.reSeedSecure(1000);
+for (int i = 0; i < 1000; i++) {
+    value = randomData.nextSecureLong(1, 1000000);
+}
+    
+
+
+
+

+
+

2.3 Random Vectors

+

+ Some algorithm requires random vectors instead of random scalars. When the + components of these vectors are uncorrelated, they may be generated simply + one at a time and packed together in the vector. The + org.apache.commons.math.UncorrelatedRandomVectorGenerator class + does however simplify this process by setting the mean and deviation of each + component once and generating complete vectors. When the components are correlated + however, generating them is much more difficult. The + org.apache.commons.math.CorrelatedRandomVectorGenerator class + provides this service. In this case, the user must set up a complete covariance matrix + instead of a simple standard deviations vector. This matrix gathers both the variance + and the correlation information of the probability law. +

+

+ The main use for correlated random vector generation is for Monte-Carlo + simulation of physical problems with several variables, for example to + generate error vectors to be added to a nominal vector. A particularly + common case is when the generated vector should be drawn from a + Multivariate Normal Distribution. +

+
+

2.4 Random Strings

+

+ The methods nextHexString and nextSecureHexString + can be used to generate random strings of hexadecimal characters. Both + of these methods produce sequences of strings with good dispersion + properties. The difference between the two methods is that the second is + cryptographically secure. Specifically, the implementation of + nextHexString(n) in RandomDataImpl uses the + following simple algorithm to generate a string of n hex digits: +

  1. n/2+1 binary bytes are generated using the underlying Random
  2. +
  3. Each binary byte is translated into 2 hex digits
  4. +
+ + The RandomDataImpl implementation of the "secure" version, + nextSecureHexString generates hex characters in 40-byte + "chunks" using a 3-step process: +
  1. 20 random bytes are generated using the underlying + SecureRandom.
  2. +
  3. SHA-1 hash is applied to yield a 20-byte binary digest.
  4. +
  5. Each byte of the binary digest is converted to 2 hex digits
  6. +
+ + Similarly to the secure random number generation methods, + nextSecureHexString is much slower than + the non-secure version. It should be used only for applications such as + generating unique session or transaction ids where predictability of + subsequent ids based on observation of previous values is a security + concern. If all that is needed is an even distribution of hex characters + in the generated strings, the non-secure method should be used. +

+
+

2.5 Random permutations, combinations, sampling

+

+ To select a random sample of objects in a collection, you can use the + nextSample method in the RandomData interface. + Specifically, if c is a collection containing at least + k objects, and randomData is a + RandomData instance randomData.nextSample(c, k) + will return an object[] array of length k + consisting of elements randomly selected from the collection. If + c contains duplicate references, there may be duplicate + references in the returned array; otherwise returned elements will be + unique -- i.e., the sampling is without replacement among the object + references in the collection.

+

+ If randomData is a RandomData instance, and + n and k are integers with + k <= n, then + randomData.nextPermutation(n, k) returns an int[] + array of length k whose whose entries are selected randomly, + without repetition, from the integers 0 through + n-1 (inclusive), i.e., + randomData.nextPermutation(n, k) returns a random + permutation of n taken k at a time. +

+
+

2.6 Generating data 'like' an input file

+

+ Using the ValueServer class, you can generate data based on + the values in an input file in one of two ways: +

Replay Mode
+
The following code will read data from url + (a java.net.URL instance), cycling through the values in the + file in sequence, reopening and starting at the beginning again when all + values have been read. +
+      ValueServer vs = new ValueServer();
+      vs.setValuesFileURL(url); 
+      vs.setMode(ValueServer.REPLAY_MODE);
+      vs.resetReplayFile();
+      double value = vs.getNext();
+      // ...Generate and use more values...
+      vs.closeReplayFile();
+      
+
+ + The values in the file are not stored in memory, so it does not matter + how large the file is, but you do need to explicitly close the file + as above. The expected file format is \n -delimited (i.e. one per line) + strings representing valid floating point numbers. +
+
Digest Mode
+
When used in Digest Mode, the ValueServer reads the entire input file + and estimates a probability density function based on data from the file. + The estimation method is essentially the + + Variable Kernel Method with Gaussian smoothing. Once the density + has been estimated, getNext() returns random values whose + probability distribution matches the empirical distribution -- i.e., if + you generate a large number of such values, their distribution should + "look like" the distribution of the values in the input file. The values + are not stored in memory in this case either, so there is no limit to the + size of the input file. Here is an example: +
+      ValueServer vs = new ValueServer();
+      vs.setValuesFileURL(url); 
+      vs.setMode(ValueServer.DIGEST_MODE);
+      vs.computeDistribution(500); //Read file and estimate distribution using 500 bins
+      double value = vs.getNext();
+      // ...Generate and use more values...
+      
+
+ + See the javadoc for ValueServer and + EmpiricalDistribution for more details. Note that + computeDistribution() opens and closes the input file + by itself. +
+
+

+
+

2.7 PRNG Pluggability

+

+ To enable alternative PRNGs to be "plugged in" to the commons-math data + generation utilities and to provide a generic means to replace + java.util.Random in applications, a random generator + adaptor framework has been added to commons-math. The + + org.apache.commons.math.RandomGenerator interface abstracts the public + interface of java.util.Random and any implementation of this + interface can be used as the source of random data for the commons-math + data generation classes. An abstract base class, + + org.apache.commons.math.AbstractRandomGenerator is provided to make + implementation easier. This class provides default implementations of + "derived" data generation methods based on the primitive, + nextDouble(). To support generic replacement of + java.util.Random, the + + org.apache.commons.math.RandomAdaptor class is provided, which + extends java.util.Random and wraps and delegates calls to + a RandomGenerator instance. +

+

+ Examples: +

Create a RandomGenerator based on RngPack's Mersenne Twister
+
To create a RandomGenerator using the RngPack Mersenne Twister PRNG + as the source of randomness, extend AbstractRandomGenerator + overriding the derived methods that the RngPack implementation provides: +
+import edu.cornell.lassp.houle.RngPack.RanMT;
+/**
+ * AbstractRandomGenerator based on RngPack RanMT generator.
+ */
+public class RngPackGenerator extends AbstractRandomGenerator {
+    
+    private RanMT random = new RanMT();
+    
+    public void setSeed(long seed) {
+       random = new RanMT(seed);
+    }
+    
+    public double nextDouble() {
+        return random.raw();
+    }
+    
+    public double nextGaussian() {
+        return random.gaussian();
+    }
+    
+    public int nextInt(int n) {
+        return random.choose(n);
+    }
+    
+    public boolean nextBoolean() {
+        return random.coin();
+    }
+}
+      
+
+
+
Use the Mersenne Twister RandomGenerator in place of + java.util.Random in RandomData
+
+RandomData randomData = new RandomDataImpl(new RngPackGenerator());
+      
+
+
+
Create an adaptor instance based on the Mersenne Twister generator + that can be used in place of a Random
+
+ RandomGenerator generator = new RngPackGenerator();
+ Random random = RandomAdaptor.createAdaptor(generator);
+ // random can now be used in place of a Random instance, data generation
+ // calls will be delegated to the wrapped Mersenne Twister
+      
+
+
+
+

+
+
+ +
+
+
+
+
+ + + diff -r e63a64652f4d -r 5f2c5fb36e93 libs/commons-math-2.1/docs/userguide/special.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/commons-math-2.1/docs/userguide/special.html Tue Jan 04 10:00:53 2011 +0100 @@ -0,0 +1,230 @@ + + + + + + + + + + + + + + + Math - The Commons Math User Guide - Special Functions + + + + + + + +
+ +
+
+
+

5 Special Functions

+

5.1 Overview

+

+ The special functions portion of Commons-Math contains several useful functions not + provided by java.lang.Math. These functions mostly serve as building blocks + for other portions of Commons-Math but, as others may find them useful as stand-alone + methods, these special functions were included as part of the Commons-Math public API. +

+
+

5.2 Erf functions

+

org.apache.commons.math.special.Erf contains several useful functions involving the Error Function, Erf. + + + + + + + + +
FunctionMethodReference
Error FunctionerfSee Erf from MathWorld
+

+
+

5.3 Gamma functions

+

org.apache.commons.math.special.Gamma contains several useful functions involving the Gamma Function. + + + + + + + + + + + + +
FunctionMethodReference
Log GammalogGammaSee Gamma Function from MathWorld
Regularized GammaregularizedGammaPSee Regularized Gamma Function from MathWorld
+

+
+

5.4 Beta funtions

+

org.apache.commons.math.special.Beta contains several useful functions involving the Beta Function. + + + + + + + + + + + + +
FunctionMethodReference
Log BetalogBetaSee Beta Function from MathWorld
Regularized BetaregularizedBetaSee Regularized Beta Function from MathWorld
+

+
+
+ +
+
+
+
+
+ + + diff -r e63a64652f4d -r 5f2c5fb36e93 libs/commons-math-2.1/docs/userguide/stat.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/commons-math-2.1/docs/userguide/stat.html Tue Jan 04 10:00:53 2011 +0100 @@ -0,0 +1,1227 @@ + + + + + + + + + + + + + + + Math - The Commons Math User Guide - Statistics + + + + + + + +
+ +
+
+
+

1 Statistics

+

1.1 Overview

+

+ The statistics package provides frameworks and implementations for + basic Descriptive statistics, frequency distributions, bivariate regression, + and t-, chi-square and ANOVA test statistics. +

+

Descriptive statistics
+
Frequency distributions
+
Simple Regression
+
Multiple Regression
+
Rank transformations
+
Covariance and correlation
+
Statistical Tests
+

+
+

1.2 Descriptive statistics

+

+ The stat package includes a framework and default implementations for + the following Descriptive statistics: +

  • arithmetic and geometric means
  • +
  • variance and standard deviation
  • +
  • sum, product, log sum, sum of squared values
  • +
  • minimum, maximum, median, and percentiles
  • +
  • skewness and kurtosis
  • +
  • first, second, third and fourth moments
  • +
+

+

+ With the exception of percentiles and the median, all of these + statistics can be computed without maintaining the full list of input + data values in memory. The stat package provides interfaces and + implementations that do not require value storage as well as + implementations that operate on arrays of stored values. +

+

+ The top level interface is + + org.apache.commons.math.stat.descriptive.UnivariateStatistic. + This interface, implemented by all statistics, consists of + evaluate() methods that take double[] arrays as arguments + and return the value of the statistic. This interface is extended by + + StorelessUnivariateStatistic, which adds increment(),getResult() and associated methods to support + "storageless" implementations that maintain counters, sums or other + state information as values are added using the increment() + method. +

+

+ Abstract implementations of the top level interfaces are provided in + + AbstractUnivariateStatistic and + + AbstractStorelessUnivariateStatistic respectively. +

+

+ Each statistic is implemented as a separate class, in one of the + subpackages (moment, rank, summary) and each extends one of the abstract + classes above (depending on whether or not value storage is required to + compute the statistic). There are several ways to instantiate and use statistics. + Statistics can be instantiated and used directly, but it is generally more convenient + (and efficient) to access them using the provided aggregates, + + DescriptiveStatistics and + + SummaryStatistics.

+

DescriptiveStatistics maintains the input data in memory + and has the capability of producing "rolling" statistics computed from a + "window" consisting of the most recently added values. +

+

SummaryStatistics does not store the input data values + in memory, so the statistics included in this aggregate are limited to those + that can be computed in one pass through the data without access to + the full array of values. +

+

+ + + + + + + + + + + + + + +
AggregateStatistics IncludedValues stored?"Rolling" capability?
+ DescriptiveStatisticsmin, max, mean, geometric mean, n, + sum, sum of squares, standard deviation, variance, percentiles, skewness, + kurtosis, medianYesYes
+ SummaryStatisticsmin, max, mean, geometric mean, n, + sum, sum of squares, standard deviation, varianceNoNo
+

+

SummaryStatistics can be aggregated using + + AggregateSummaryStatistics. This class can be used to concurrently gather statistics for multiple + datasets as well as for a combined sample including all of the data. +

+

MultivariateSummaryStatistics is similar to SummaryStatistics + but handles n-tuple values instead of scalar values. It can also compute the + full covariance matrix for the input data. +

+

+ Neither DescriptiveStatistics nor SummaryStatistics is + thread-safe. + SynchronizedDescriptiveStatistics and + + SynchronizedSummaryStatistics, respectively, provide thread-safe versions for applications that + require concurrent access to statistical aggregates by multiple threads. + + SynchronizedMultivariateSummaryStatistics provides threadsafe MultivariateSummaryStatistics.

+

+ There is also a utility class, + + StatUtils, that provides static methods for computing statistics + directly from double[] arrays. +

+

+ Here are some examples showing how to compute Descriptive statistics. +

Compute summary statistics for a list of double values
+
+
Using the DescriptiveStatistics aggregate + (values are stored in memory): +
+// Get a DescriptiveStatistics instance
+DescriptiveStatistics stats = new DescriptiveStatistics();
+
+// Add the data from the array
+for( int i = 0; i < inputArray.length; i++) {
+        stats.addValue(inputArray[i]);
+}
+
+// Compute some statistics
+double mean = stats.getMean();
+double std = stats.getStandardDeviation();
+double median = stats.getMedian();
+        
+
+
+
Using the SummaryStatistics aggregate (values are + not stored in memory): +
+// Get a SummaryStatistics instance
+SummaryStatistics stats = new SummaryStatistics();
+
+// Read data from an input stream,
+// adding values and updating sums, counters, etc.
+while (line != null) {
+        line = in.readLine();
+        stats.addValue(Double.parseDouble(line.trim()));
+}
+in.close();
+
+// Compute the statistics
+double mean = stats.getMean();
+double std = stats.getStandardDeviation();
+//double median = stats.getMedian(); <-- NOT AVAILABLE
+        
+
+
+
Using the StatUtils utility class: +
+// Compute statistics directly from the array
+// assume values is a double[] array
+double mean = StatUtils.mean(values);
+double std = StatUtils.variance(values);
+double median = StatUtils.percentile(50);
+
+// Compute the mean of the first three values in the array
+mean = StatUtils.mean(values, 0, 3);
+        
+
+
+
Maintain a "rolling mean" of the most recent 100 values from + an input stream
+
+
Use a DescriptiveStatistics instance with + window size set to 100 +
+// Create a DescriptiveStats instance and set the window size to 100
+DescriptiveStatistics stats = new DescriptiveStatistics();
+stats.setWindowSize(100);
+
+// Read data from an input stream,
+// displaying the mean of the most recent 100 observations
+// after every 100 observations
+long nLines = 0;
+while (line != null) {
+        line = in.readLine();
+        stats.addValue(Double.parseDouble(line.trim()));
+        if (nLines == 100) {
+                nLines = 0;
+                System.out.println(stats.getMean());
+       }
+}
+in.close();
+        
+
+
+
Compute statistics in a thread-safe manner
+
+
Use a SynchronizedDescriptiveStatistics instance +
+// Create a SynchronizedDescriptiveStatistics instance and
+// use as any other DescriptiveStatistics instance
+DescriptiveStatistics stats = new SynchronizedDescriptiveStatistics();
+        
+
+
+
Compute statistics for multiple samples and overall statistics concurrently
+
+
There are two ways to do this using AggregateSummaryStatistics. + The first is to use an AggregateSummaryStatistics instance to accumulate + overall statistics contributed by SummaryStatistics instances created using + + AggregateSummaryStatistics.createContributingStatistics(): +
+// Create a AggregateSummaryStatistics instance to accumulate the overall statistics 
+// and AggregatingSummaryStatistics for the subsamples
+AggregateSummaryStatistics aggregate = new AggregateSummaryStatistics();
+SummaryStatistics setOneStats = aggregate.createContributingStatistics();
+SummaryStatistics setTwoStats = aggregate.createContributingStatistics();
+// Add values to the subsample aggregates
+setOneStats.addValue(2);
+setOneStats.addValue(3);
+setTwoStats.addValue(2);
+setTwoStats.addValue(4);
+...
+// Full sample data is reported by the aggregate
+double totalSampleSum = aggregate.getSum();
+        
+
+ + The above approach has the disadvantages that the addValue calls must be synchronized on the + SummaryStatistics instance maintained by the aggregate and each value addition updates the + aggregate as well as the subsample. For applications that can wait to do the aggregation until all values + have been added, a static + + aggregate method is available, as shown in the following example. + This method should be used when aggregation needs to be done across threads. +
+// Create SummaryStatistics instances for the subsample data
+SummaryStatistics setOneStats = new SummaryStatistics();
+SummaryStatistics setTwoStats = new SummaryStatistics();
+// Add values to the subsample SummaryStatistics instances
+setOneStats.addValue(2);
+setOneStats.addValue(3);
+setTwoStats.addValue(2);
+setTwoStats.addValue(4);
+...
+// Aggregate the subsample statistics
+Collection<SummaryStatistics> aggregate = new ArrayList<SummaryStatistics>();
+aggregate.add(setOneStats);
+aggregate.add(setTwoStats);
+StatisticalSummary aggregatedStats = AggregateSummaryStatistics.aggregate(aggregate);
+
+// Full sample data is reported by aggregatedStats
+double totalSampleSum = aggregatedStats.getSum();
+        
+
+
+
+

+
+

1.3 Frequency distributions

+

+ org.apache.commons.math.stat.descriptive.Frequency + provides a simple interface for maintaining counts and percentages of discrete + values. +

+

+ Strings, integers, longs and chars are all supported as value types, + as well as instances of any class that implements Comparable. + The ordering of values used in computing cumulative frequencies is by + default the natural ordering, but this can be overriden by supplying a + Comparator to the constructor. Adding values that are not + comparable to those that have already been added results in an + IllegalArgumentException.

+

+ Here are some examples. +

Compute a frequency distribution based on integer values
+
+
Mixing integers, longs, Integers and Longs: +
+ Frequency f = new Frequency();
+ f.addValue(1);
+ f.addValue(new Integer(1));
+ f.addValue(new Long(1));
+ f.addValue(2);
+ f.addValue(new Integer(-1));
+ System.out.prinltn(f.getCount(1));   // displays 3
+ System.out.println(f.getCumPct(0));  // displays 0.2
+ System.out.println(f.getPct(new Integer(1)));  // displays 0.6
+ System.out.println(f.getCumPct(-2));   // displays 0
+ System.out.println(f.getCumPct(10));  // displays 1
+          
+
+
+
Count string frequencies
+
+
Using case-sensitive comparison, alpha sort order (natural comparator): +
+Frequency f = new Frequency();
+f.addValue("one");
+f.addValue("One");
+f.addValue("oNe");
+f.addValue("Z");
+System.out.println(f.getCount("one")); // displays 1
+System.out.println(f.getCumPct("Z"));  // displays 0.5
+System.out.println(f.getCumPct("Ot")); // displays 0.25
+          
+
+
+
Using case-insensitive comparator: +
+Frequency f = new Frequency(String.CASE_INSENSITIVE_ORDER);
+f.addValue("one");
+f.addValue("One");
+f.addValue("oNe");
+f.addValue("Z");
+System.out.println(f.getCount("one"));  // displays 3
+System.out.println(f.getCumPct("z"));  // displays 1
+          
+
+
+
+

+
+

1.4 Simple regression

+

+ org.apache.commons.math.stat.regression.SimpleRegression + provides ordinary least squares regression with one independent variable, + estimating the linear model: +

+

y = intercept + slope * x

+

+ Standard errors for intercept and slope are + available as well as ANOVA, r-square and Pearson's r statistics. +

+

+ Observations (x,y pairs) can be added to the model one at a time or they + can be provided in a 2-dimensional array. The observations are not stored + in memory, so there is no limit to the number of observations that can be + added to the model. +

+

Usage Notes:

  • When there are fewer than two observations in the model, or when + there is no variation in the x values (i.e. all x values are the same) + all statistics return NaN. At least two observations with + different x coordinates are requred to estimate a bivariate regression + model.
  • +
  • getters for the statistics always compute values based on the current + set of observations -- i.e., you can get statistics, then add more data + and get updated statistics without using a new instance. There is no + "compute" method that updates all statistics. Each of the getters performs + the necessary computations to return the requested statistic.
  • +
+

+

Implementation Notes:

  • As observations are added to the model, the sum of x values, y values, + cross products (x times y), and squared deviations of x and y from their + respective means are updated using updating formulas defined in + "Algorithms for Computing the Sample Variance: Analysis and + Recommendations", Chan, T.F., Golub, G.H., and LeVeque, R.J. + 1983, American Statistician, vol. 37, pp. 242-247, referenced in + Weisberg, S. "Applied Linear Regression". 2nd Ed. 1985. All regression + statistics are computed from these sums.
  • +
  • Inference statistics (confidence intervals, parameter significance levels) + are based on on the assumption that the observations included in the model are + drawn from a + Bivariate Normal Distribution
  • +
+

+

+ Here are some examples. +

Estimate a model based on observations added one at a time
+
+
Instantiate a regression instance and add data points +
+regression = new SimpleRegression();
+regression.addData(1d, 2d);
+// At this point, with only one observation,
+// all regression statistics will return NaN
+
+regression.addData(3d, 3d);
+// With only two observations,
+// slope and intercept can be computed
+// but inference statistics will return NaN
+
+regression.addData(3d, 3d);
+// Now all statistics are defined.
+         
+
+
+
Compute some statistics based on observations added so far +
+System.out.println(regression.getIntercept());
+// displays intercept of regression line
+
+System.out.println(regression.getSlope());
+// displays slope of regression line
+
+System.out.println(regression.getSlopeStdErr());
+// displays slope standard error
+         
+
+
+
Use the regression model to predict the y value for a new x value +
+System.out.println(regression.predict(1.5d)
+// displays predicted y value for x = 1.5
+         
+
+ + More data points can be added and subsequent getXxx calls will incorporate + additional data in statistics. +
+
Estimate a model from a double[][] array of data points
+
+
Instantiate a regression object and load dataset +
+double[][] data = { { 1, 3 }, {2, 5 }, {3, 7 }, {4, 14 }, {5, 11 }};
+SimpleRegression regression = new SimpleRegression();
+regression.addData(data);
+          
+
+
+
Estimate regression model based on data +
+System.out.println(regression.getIntercept());
+// displays intercept of regression line
+
+System.out.println(regression.getSlope());
+// displays slope of regression line
+
+System.out.println(regression.getSlopeStdErr());
+// displays slope standard error
+         
+
+ + More data points -- even another double[][] array -- can be added and subsequent + getXxx calls will incorporate additional data in statistics. +
+
+

+
+

1.5 Multiple linear regression

+

+ org.apache.commons.math.stat.regression.MultipleLinearRegression + provides ordinary least squares regression with a generic multiple variable linear model, which + in matrix notation can be expressed as: +

+

y=X*b+u

+

+ where y is an n-vectorregressand, X is a [n,k] matrix whose k columns are called + regressors, b is k-vector of regression parameters and u is an n-vector + of error terms or residuals. The notation is quite standard in literature, + cf eg Davidson and MacKinnon, Econometrics Theory and Methods, 2004. +

+

+ Two implementations are provided: + org.apache.commons.math.stat.regression.OLSMultipleLinearRegression and + + org.apache.commons.math.stat.regression.GLSMultipleLinearRegression

+

+ Observations (x,y and covariance data matrices) can be added to the model via the addData(double[] y, double[][] x, double[][] covariance) method. + The observations are stored in memory until the next time the addData method is invoked. +

+

Usage Notes:

  • Data is validated when invoking the addData(double[] y, double[][] x, double[][] covariance) method and + IllegalArgumentException is thrown when inappropriate. +
  • +
  • Only the GLS regressions require the covariance matrix, so in the OLS regression it is ignored and can be safely + inputted as null.
  • +
+

+

+ Here are some examples. +

OLS regression
+
+
Instantiate an OLS regression object and load dataset +
+MultipleLinearRegression regression = new OLSMultipleLinearRegression();
+double[] y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
+double[] x = new double[6][];
+x[0] = new double[]{1.0, 0, 0, 0, 0, 0};
+x[1] = new double[]{1.0, 2.0, 0, 0, 0, 0};
+x[2] = new double[]{1.0, 0, 3.0, 0, 0, 0};
+x[3] = new double[]{1.0, 0, 0, 4.0, 0, 0};
+x[4] = new double[]{1.0, 0, 0, 0, 5.0, 0};
+x[5] = new double[]{1.0, 0, 0, 0, 0, 6.0};          
+regression.addData(y, x, null); // we don't need covariance
+          
+
+
+
Estimate of regression values honours the MultipleLinearRegression interface: +
+double[] beta = regression.estimateRegressionParameters();        
+
+double[] residuals = regression.estimateResiduals();
+
+double[][] parametersVariance = regression.estimateRegressionParametersVariance();
+
+double regressandVariance = regression.estimateRegressandVariance();
+         
+
+
+
GLS regression
+
+
Instantiate an GLS regression object and load dataset +
+MultipleLinearRegression regression = new GLSMultipleLinearRegression();
+double[] y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
+double[] x = new double[6][];
+x[0] = new double[]{1.0, 0, 0, 0, 0, 0};
+x[1] = new double[]{1.0, 2.0, 0, 0, 0, 0};
+x[2] = new double[]{1.0, 0, 3.0, 0, 0, 0};
+x[3] = new double[]{1.0, 0, 0, 4.0, 0, 0};
+x[4] = new double[]{1.0, 0, 0, 0, 5.0, 0};
+x[5] = new double[]{1.0, 0, 0, 0, 0, 6.0};          
+double[][] omega = new double[6][];
+omega[0] = new double[]{1.1, 0, 0, 0, 0, 0};
+omega[1] = new double[]{0, 2.2, 0, 0, 0, 0};
+omega[2] = new double[]{0, 0, 3.3, 0, 0, 0};
+omega[3] = new double[]{0, 0, 0, 4.4, 0, 0};
+omega[4] = new double[]{0, 0, 0, 0, 5.5, 0};
+omega[5] = new double[]{0, 0, 0, 0, 0, 6.6};
+regression.addData(y, x, omega); // we do need covariance
+          
+
+
+
Estimate of regression values honours the same MultipleLinearRegression interface as + the OLS regression. +
+
+

+
+

1.6 Rank transformations

+

+ Some statistical algorithms require that input data be replaced by ranks. + The + org.apache.commons.math.stat.ranking package provides rank transformation. + + RankingAlgorithm defines the interface for ranking. + + NaturalRanking provides an implementation that has two configuration options. +

  • + Ties strategy deterimines how ties in the source data are handled by the ranking
  • +
  • + NaN strategy determines how NaN values in the source data are handled.
  • +
+

+

+ Examples: +

+NaturalRanking ranking = new NaturalRanking(NaNStrategy.MINIMAL,
+TiesStrategy.MAXIMUM);
+double[] data = { 20, 17, 30, 42.3, 17, 50,
+                  Double.NaN, Double.NEGATIVE_INFINITY, 17 };
+double[] ranks = ranking.rank(exampleData);
+         
+
+ + results in ranks containing {6, 5, 7, 8, 5, 9, 2, 2, 5}.
+new NaturalRanking(NaNStrategy.REMOVED,TiesStrategy.SEQUENTIAL).rank(exampleData);   
+         
+
+ + returns {5, 2, 6, 7, 3, 8, 1, 4}.

+

+ The default NaNStrategy is NaNStrategy.MAXIMAL. This makes NaN + values larger than any other value (including Double.POSITIVE_INFINITY). The + default TiesStrategy is TiesStrategy.AVERAGE, which assigns tied + values the average of the ranks applicable to the sequence of ties. See the + + NaturalRanking for more examples and + TiesStrategy and NaNStrategy + for details on these configuration options. +

+
+

1.7 Covariance and correlation

+

+ The + org.apache.commons.math.stat.correlation package computes covariances + and correlations for pairs of arrays or columns of a matrix. + + Covariance computes covariances, + + PearsonsCorrelation provides Pearson's Product-Moment correlation coefficients and + + SpearmansCorrelation computes Spearman's rank correlation. +

+

Implementation Notes

  • + Unbiased covariances are given by the formula
    +
    cov(X, Y) = sum [(xi - E(X))(yi - E(Y))] / (n - 1) + where E(X) is the mean of X and E(Y) + is the mean of the Y values. Non-bias-corrected estimates use + n in place of n - 1. Whether or not covariances are + bias-corrected is determined by the optional parameter, "biasCorrected," which + defaults to true.
  • +
  • + PearsonsCorrelation computes correlations defined by the formula
    +
    cor(X, Y) = sum[(xi - E(X))(yi - E(Y))] / [(n - 1)s(X)s(Y)]
    + + where E(X) and E(Y) are means of X and Y + and s(X), s(Y) are standard deviations. +
  • +
  • + SpearmansCorrelation applies a rank transformation to the input data and computes Pearson's + correlation on the ranked data. The ranking algorithm is configurable. By default, + + NaturalRanking with default strategies for handling ties and NaN values is used. +
  • +
+

+

Examples:

Covariance of 2 arrays
+
+
To compute the unbiased covariance between 2 double arrays, + x and y, use: +
+new Covariance().covariance(x, y)
+          
+
+ + For non-bias-corrected covariances, use +
+covariance(x, y, false)
+          
+
+
+
+
Covariance matrix
+
+
A covariance matrix over the columns of a source matrix data + can be computed using +
+new Covariance().computeCovarianceMatrix(data)
+          
+
+ + The i-jth entry of the returned matrix is the unbiased covariance of the ith and jth + columns of data. As above, to get non-bias-corrected covariances, + use +
+computeCovarianceMatrix(data, false)
+         
+
+
+
+
Pearson's correlation of 2 arrays
+
+
To compute the Pearson's product-moment correlation between two double arrays + x and y, use: +
+new PearsonsCorrelation().correlation(x, y)
+          
+
+
+
+
Pearson's correlation matrix
+
+
A (Pearson's) correlation matrix over the columns of a source matrix data + can be computed using +
+new PearsonsCorrelation().computeCorrelationMatrix(data)
+          
+
+ + The i-jth entry of the returned matrix is the Pearson's product-moment correlation between the + ith and jth columns of data.
+
+
Pearson's correlation significance and standard errors
+
+
To compute standard errors and/or significances of correlation coefficients + associated with Pearson's correlation coefficients, start by creating a + PearsonsCorrelation instance +
+PearsonsCorrelation correlation = new PearsonsCorrelation(data);
+          
+
+ + where data is either a rectangular array or a RealMatrix. + Then the matrix of standard errors is +
+correlation.getCorrelationStandardErrors();
+          
+
+ + The formula used to compute the standard error is
+SEr = ((1 - r2) / (n - 2))1/2
+ + where r is the estimated correlation coefficient and + n is the number of observations in the source dataset.
+
+p-values for the (2-sided) null hypotheses that elements of + a correlation matrix are zero populate the RealMatrix returned by +
+correlation.getCorrelationPValues()
+          
+
+getCorrelationPValues().getEntry(i,j) is the + probability that a random variable distributed as tn-2 takes + a value with absolute value greater than or equal to
+
|rij|((n - 2) / (1 - rij2))1/2, + where rij is the estimated correlation between the ith and jth + columns of the source array or RealMatrix. This is sometimes referred to as the + significance of the coefficient.
+
+ + For example, if data is a RealMatrix with 2 columns and 10 rows, then +
+new PearsonsCorrelation(data).getCorrelationPValues().getEntry(0,1)
+           
+
+ + is the significance of the Pearson's correlation coefficient between the two columns + of data. If this value is less than .01, we can say that the correlation + between the two columns of data is significant at the 99% level. +
+
+
Spearman's rank correlation coefficient
+
+
To compute the Spearman's rank-moment correlation between two double arrays + x and y: +
+new SpearmansCorrelation().correlation(x, y)
+          
+
+ + This is equivalent to +
+RankingAlgorithm ranking = new NaturalRanking();
+new PearsonsCorrelation().correlation(ranking.rank(x), ranking.rank(y))
+          
+
+
+
+
+

+
+

1.8 Statistical tests

+

+ The interfaces and implementations in the + + org.apache.commons.math.stat.inference package provide + + Student's t, + + Chi-Square and + + One-Way ANOVA test statistics as well as + + p-values associated with t-, + Chi-Square and One-Way ANOVA tests. The + interfaces are + + TTest, + + ChiSquareTest, and + + OneWayAnova with provided implementations + + TTestImpl, + + ChiSquareTestImpl and + + OneWayAnovaImpl, respectively. + The + + TestUtils class provides static methods to get test instances or + to compute test statistics directly. The examples below all use the + static methods in TestUtils to execute tests. To get + test object instances, either use e.g., + TestUtils.getTTest() or use the implementation constructors + directly, e.g., + new TTestImpl(). +

+

Implementation Notes

  • Both one- and two-sample t-tests are supported. Two sample tests + can be either paired or unpaired and the unpaired two-sample tests can + be conducted under the assumption of equal subpopulation variances or + without this assumption. When equal variances is assumed, a pooled + variance estimate is used to compute the t-statistic and the degrees + of freedom used in the t-test equals the sum of the sample sizes minus 2. + When equal variances is not assumed, the t-statistic uses both sample + variances and the + + Welch-Satterwaite approximation is used to compute the degrees + of freedom. Methods to return t-statistics and p-values are provided in each + case, as well as boolean-valued methods to perform fixed significance + level tests. The names of methods or methods that assume equal + subpopulation variances always start with "homoscedastic." Test or + test-statistic methods that just start with "t" do not assume equal + variances. See the examples below and the API documentation for + more details.
  • +
  • The validity of the p-values returned by the t-test depends on the + assumptions of the parametric t-test procedure, as discussed + + here
  • +
  • p-values returned by t-, chi-square and Anova tests are exact, based + on numerical approximations to the t-, chi-square and F distributions in the + distributions package.
  • +
  • p-values returned by t-tests are for two-sided tests and the boolean-valued + methods supporting fixed significance level tests assume that the hypotheses + are two-sided. One sided tests can be performed by dividing returned p-values + (resp. critical values) by 2.
  • +
  • Degrees of freedom for chi-square tests are integral values, based on the + number of observed or expected counts (number of observed counts - 1) + for the goodness-of-fit tests and (number of columns -1) * (number of rows - 1) + for independence tests.
  • +
+

+

Examples:

One-sample t tests
+
+
To compare the mean of a double[] array to a fixed value: +
+double[] observed = {1d, 2d, 3d};
+double mu = 2.5d;
+System.out.println(TestUtils.t(mu, observed));
+          
+
+ + The code above will display the t-statisitic associated with a one-sample + t-test comparing the mean of the observed values against + mu.
+
To compare the mean of a dataset described by a + + org.apache.commons.math.stat.descriptive.StatisticalSummary to a fixed value: +
+double[] observed ={1d, 2d, 3d};
+double mu = 2.5d;
+SummaryStatistics sampleStats = new SummaryStatistics();
+for (int i = 0; i < observed.length; i++) {
+    sampleStats.addValue(observed[i]);
+}
+System.out.println(TestUtils.t(mu, observed));
+
+
+
+
To compute the p-value associated with the null hypothesis that the mean + of a set of values equals a point estimate, against the two-sided alternative that + the mean is different from the target value: +
+double[] observed = {1d, 2d, 3d};
+double mu = 2.5d;
+System.out.println(TestUtils.tTest(mu, observed));
+           
+
+ + The snippet above will display the p-value associated with the null + hypothesis that the mean of the population from which the + observed values are drawn equals mu.
+
To perform the test using a fixed significance level, use: +
+TestUtils.tTest(mu, observed, alpha);
+          
+
+ + where 0 < alpha < 0.5 is the significance level of + the test. The boolean value returned will be true iff the + null hypothesis can be rejected with confidence 1 - alpha. + To test, for example at the 95% level of confidence, use + alpha = 0.05
+
+
Two-Sample t-tests
+
+
Example 1: Paired test evaluating + the null hypothesis that the mean difference between corresponding + (paired) elements of the double[] arrays + sample1 and sample2 is zero. + + To compute the t-statistic: +
+TestUtils.pairedT(sample1, sample2);
+          
+
+

+ To compute the p-value: +

+TestUtils.pairedTTest(sample1, sample2);
+           
+
+

+

+ To perform a fixed significance level test with alpha = .05: +

+TestUtils.pairedTTest(sample1, sample2, .05);
+           
+
+

+ + The last example will return true iff the p-value + returned by TestUtils.pairedTTest(sample1, sample2) + is less than .05
+
Example 2: unpaired, two-sided, two-sample t-test using + StatisticalSummary instances, without assuming that + subpopulation variances are equal. + + First create the StatisticalSummary instances. Both + DescriptiveStatistics and SummaryStatistics + implement this interface. Assume that summary1 and + summary2 are SummaryStatistics instances, + each of which has had at least 2 values added to the (virtual) dataset that + it describes. The sample sizes do not have to be the same -- all that is required + is that both samples have at least 2 elements. +

Note: The SummaryStatistics class does + not store the dataset that it describes in memory, but it does compute all + statistics necessary to perform t-tests, so this method can be used to + conduct t-tests with very large samples. One-sample tests can also be + performed this way. + (See Descriptive statistics for details + on the SummaryStatistics class.) +

+

+ To compute the t-statistic: +

+TestUtils.t(summary1, summary2);
+          
+
+

+

+ To compute the p-value: +

+TestUtils.tTest(sample1, sample2);
+           
+
+

+

+ To perform a fixed significance level test with alpha = .05: +

+TestUtils.tTest(sample1, sample2, .05);
+           
+
+

+

+ In each case above, the test does not assume that the subpopulation + variances are equal. To perform the tests under this assumption, + replace "t" at the beginning of the method name with "homoscedasticT" +

+
+
+
Chi-square tests
+
+
To compute a chi-square statistic measuring the agreement between a + long[] array of observed counts and a double[] + array of expected counts, use: +
+long[] observed = {10, 9, 11};
+double[] expected = {10.1, 9.8, 10.3};
+System.out.println(TestUtils.chiSquare(expected, observed));
+          
+
+ + the value displayed will be + sum((expected[i] - observed[i])^2 / expected[i])
+
To get the p-value associated with the null hypothesis that + observed conforms to expected use: +
+TestUtils.chiSquareTest(expected, observed);
+          
+
+
+
To test the null hypothesis that observed conforms to + expected with alpha siginficance level + (equiv. 100 * (1-alpha)% confidence) where + 0 < alpha < 1 use: +
+TestUtils.chiSquareTest(expected, observed, alpha);
+          
+
+ + The boolean value returned will be true iff the null hypothesis + can be rejected with confidence 1 - alpha. +
+
To compute a chi-square statistic statistic associated with a + + chi-square test of independence based on a two-dimensional (long[][]) + counts array viewed as a two-way table, use: +
+TestUtils.chiSquareTest(counts);
+          
+
+ + The rows of the 2-way table are + count[0], ... , count[count.length - 1].
+
+ The chi-square statistic returned is + sum((counts[i][j] - expected[i][j])^2/expected[i][j]) + where the sum is taken over all table entries and + expected[i][j] is the product of the row and column sums at + row i, column j divided by the total count. +
+
To compute the p-value associated with the null hypothesis that + the classifications represented by the counts in the columns of the input 2-way + table are independent of the rows, use: +
+ TestUtils.chiSquareTest(counts);
+          
+
+
+
To perform a chi-square test of independence with alpha + siginficance level (equiv. 100 * (1-alpha)% confidence) + where 0 < alpha < 1 use: +
+TestUtils.chiSquareTest(counts, alpha);
+          
+
+ + The boolean value returned will be true iff the null + hypothesis can be rejected with confidence 1 - alpha. +
+
+
One-Way Anova tests
+
+
To conduct a One-Way Analysis of Variance (ANOVA) to evaluate the + null hypothesis that the means of a collection of univariate datasets + are the same, start by loading the datasets into a collection, e.g. +
+double[] classA =
+   {93.0, 103.0, 95.0, 101.0, 91.0, 105.0, 96.0, 94.0, 101.0 };
+double[] classB =
+   {99.0, 92.0, 102.0, 100.0, 102.0, 89.0 };
+double[] classC =
+   {110.0, 115.0, 111.0, 117.0, 128.0, 117.0 };
+List classes = new ArrayList();
+classes.add(classA);
+classes.add(classB);
+classes.add(classC);
+          
+
+ + Then you can compute ANOVA F- or p-values associated with the + null hypothesis that the class means are all the same + using a OneWayAnova instance or TestUtils + methods: +
+double fStatistic = TestUtils.oneWayAnovaFValue(classes); // F-value
+double pValue = TestUtils.oneWayAnovaPValue(classes);     // P-value
+          
+
+ + To test perform a One-Way Anova test with signficance level set at 0.01 + (so the test will, assuming assumptions are met, reject the null + hypothesis incorrectly only about one in 100 times), use +
+TestUtils.oneWayAnovaTest(classes, 0.01); // returns a boolean
+                                          // true means reject null hypothesis
+          
+
+
+
+

+
+
+ +
+
+
+
+
+ + + diff -r e63a64652f4d -r 5f2c5fb36e93 libs/commons-math-2.1/docs/userguide/transform.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/commons-math-2.1/docs/userguide/transform.html Tue Jan 04 10:00:53 2011 +0100 @@ -0,0 +1,184 @@ + + + + + + + + + + + + + + + Math - The Commons Math User Guide - Transform methods + + + + + + + +
+ +
+
+
+

10 Transform methods

+

+ This package provides a few transformers for signal analysis. All transformers + provide both direct and inverse transforms. +

  • FastFourierTransformer (produces Complex results)
  • +
  • FastCosineTransformer (produces real results)
  • +
  • FastSineTransformer (produces real results)
  • +
  • FastHadamardTransformer (produces real results)
  • +
+

+
+ +
+
+
+
+
+ + + diff -r e63a64652f4d -r 5f2c5fb36e93 libs/commons-math-2.1/docs/userguide/utilities.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/commons-math-2.1/docs/userguide/utilities.html Tue Jan 04 10:00:53 2011 +0100 @@ -0,0 +1,329 @@ + + + + + + + + + + + + + + + Math - The Commons Math User Guide - Utilites + + + + + + + +
+ +
+
+
+

6 Utilities

+

6.1 Overview

+

+ The + org.apache.commons.math.util package collects a group of array utilities, + value transformers, and numerical routines used by implementation classes in + commons-math. +

+
+

6.2 Double array utilities

+

+ To maintain statistics based on a "rolling" window of values, a resizable + array implementation was developed and is provided for reuse in the + util package. The core functionality provided is described in + the documentation for the interface, + + org.apache.commons.math.util.DoubleArray. This interface adds one + method, addElementRolling(double) to basic list accessors. + The addElementRolling method adds an element + (the actual parameter) to the end of the list and removes the first element + in the list. +

+

+ The + org.apache.commons.math.util.ResizableDoubleArray class provides a + configurable, array-backed implementation of the DoubleArray + interface. When addElementRolling is invoked, the underlying + array is expanded if necessary, the new element is added to the end of the + array and the "usable window" of the array is moved forward, so that + the first element is effectively discarded, what was the second becomes the + first, and so on. To efficiently manage storage, two maintenance + operations need to be periodically performed -- orphaned elements at the + beginning of the array need to be reclaimed and space for new elements at + the end needs to be created. Both of these operations are handled + automatically, with frequency / effect driven by the configuration + properties expansionMode, expansionFactor and + contractionCriteria. See + + ResizableDoubleArray + for details. +

+
+

6.3 int/double hash map

+

+ The + org.apache.commons.math.util.OpenIntToDoubleHashMap class provides a specialized + hash map implementation for int/double. This implementation has a much smaller memory + overhead than standard java.util.HashMap class. It uses open addressing + and primitive arrays, which greatly reduces the number of intermediate objects and + improve data locality. +

+
+

6.4 Continued Fractions

+

+ The + org.apache.commons.math.util.ContinuedFraction class provides a generic + way to create and evaluate continued fractions. The easiest way to create a + continued fraction is to subclass ContinuedFraction and + override the getA and getB methods which return + the continued fraction terms. The precise definition of these terms is + explained in + Continued Fraction, equation (1) from MathWorld. +

+

+ As an example, the constant Pi could be computed using the continued fraction + defined at + http://functions.wolfram.com/Constants/Pi/10/0002/. The following + anonymous class provides the implementation: +

ContinuedFraction c = new ContinuedFraction() {
+    public double getA(int n, double x) {
+        switch(n) {
+            case 0: return 3.0;
+            default: return 6.0;
+        }
+    }
+    
+    public double getB(int n, double x) {
+        double y = (2.0 * n) - 1.0;
+        return y * y;
+    }
+}
+
+

+

+ Then, to evalute Pi, simply call any of the evalute methods + (Note, the point of evalution in this example is meaningless since Pi is a + constant). +

+

+ For a more practical use of continued fractions, consider the exponential + function with the continued fraction definition of + + http://functions.wolfram.com/ElementaryFunctions/Exp/10/. The + following anonymous class provides its implementation: +

ContinuedFraction c = new ContinuedFraction() {
+    public double getA(int n, double x) {
+        if (n % 2 == 0) {
+            switch(n) {
+                case 0: return 1.0;
+                default: return 2.0;
+            }
+        } else {
+            return n;
+        }
+    }
+    
+    public double getB(int n, double x) {
+        if (n % 2 == 0) {
+            return -x;
+        } else {
+            return x;
+        }
+    }
+}
+
+

+

+ Then, to evalute ex for any value x, simply call any of the + evalute methods. +

+
+

6.5 binomial coefficients, factorials and other common math functions

+

+ A collection of reusable math functions is provided in the + MathUtils + utility class. MathUtils currently includes methods to compute the following:

  • + Binomial coeffiecients -- "n choose k" available as an (exact) long value, + binomialCoefficient(int, int) for small n, k; as a double, + binomialCoefficientDouble(int, int) for larger values; and in + a "super-sized" version, binomialCoefficientLog(int, int) + that returns the natural logarithm of the value.
  • +
  • + Factorials -- like binomial coefficients, these are available as exact long + values, factorial(int); doubles, + factorialDouble(int); or logs, factorialLog(int).
  • +
  • + Hyperbolic sine and cosine functions -- + cosh(double), sinh(double)
  • +
  • + sign (+1 if argument > 0, 0 if x = 0, and -1 if x < 0) and + indicator (+1.0 if argument >= 0 and -1.0 if argument < 0) functions + for variables of all primitive numeric types.
  • +
  • + a hash function, hash(double), returning a long-valued + hash code for a double value. +
  • +
  • + Convience methods to round floating-point number to arbitrary precision. +
  • +
  • + Least common multiple and greatest common denominator functions. +
  • +
+

+
+
+ +
+
+
+
+
+ + +