Mercurial > hg > de.mpg.mpiwg.itgroup.digilib.core
comparison libs/commons-math-2.1/docs/userguide/random.html @ 10:5f2c5fb36e93
commons-math-2.1 added
author | dwinter |
---|---|
date | Tue, 04 Jan 2011 10:00:53 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
9:e63a64652f4d | 10:5f2c5fb36e93 |
---|---|
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 <html xmlns="http://www.w3.org/1999/xhtml"> | |
14 <head> | |
15 <title>Math - The Commons Math User Guide - Data Generation</title> | |
16 <style type="text/css" media="all"> | |
17 @import url("../css/maven-base.css"); | |
18 @import url("../css/maven-theme.css"); | |
19 @import url("../css/site.css"); | |
20 </style> | |
21 <link rel="stylesheet" href="../css/print.css" type="text/css" media="print" /> | |
22 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" /> | |
23 </head> | |
24 <body class="composite"> | |
25 <div id="banner"> | |
26 <span id="bannerLeft"> | |
27 | |
28 Commons Math User Guide | |
29 | |
30 </span> | |
31 <div class="clear"> | |
32 <hr/> | |
33 </div> | |
34 </div> | |
35 <div id="breadcrumbs"> | |
36 | |
37 | |
38 | |
39 | |
40 | |
41 | |
42 | |
43 | |
44 <div class="xright"> | |
45 | |
46 | |
47 | |
48 | |
49 | |
50 | |
51 | |
52 </div> | |
53 <div class="clear"> | |
54 <hr/> | |
55 </div> | |
56 </div> | |
57 <div id="leftColumn"> | |
58 <div id="navcolumn"> | |
59 | |
60 | |
61 | |
62 | |
63 | |
64 | |
65 | |
66 | |
67 <h5>User Guide</h5> | |
68 <ul> | |
69 | |
70 <li class="none"> | |
71 <a href="../userguide/index.html">Contents</a> | |
72 </li> | |
73 | |
74 <li class="none"> | |
75 <a href="../userguide/overview.html">Overview</a> | |
76 </li> | |
77 | |
78 <li class="none"> | |
79 <a href="../userguide/stat.html">Statistics</a> | |
80 </li> | |
81 | |
82 <li class="none"> | |
83 <strong>Data Generation</strong> | |
84 </li> | |
85 | |
86 <li class="none"> | |
87 <a href="../userguide/linear.html">Linear Algebra</a> | |
88 </li> | |
89 | |
90 <li class="none"> | |
91 <a href="../userguide/analysis.html">Numerical Analysis</a> | |
92 </li> | |
93 | |
94 <li class="none"> | |
95 <a href="../userguide/special.html">Special Functions</a> | |
96 </li> | |
97 | |
98 <li class="none"> | |
99 <a href="../userguide/utilities.html">Utilities</a> | |
100 </li> | |
101 | |
102 <li class="none"> | |
103 <a href="../userguide/complex.html">Complex Numbers</a> | |
104 </li> | |
105 | |
106 <li class="none"> | |
107 <a href="../userguide/distribution.html">Distributions</a> | |
108 </li> | |
109 | |
110 <li class="none"> | |
111 <a href="../userguide/fraction.html">Fractions</a> | |
112 </li> | |
113 | |
114 <li class="none"> | |
115 <a href="../userguide/transform.html">Transform Methods</a> | |
116 </li> | |
117 | |
118 <li class="none"> | |
119 <a href="../userguide/geometry.html">3D Geometry</a> | |
120 </li> | |
121 | |
122 <li class="none"> | |
123 <a href="../userguide/optimization.html">Optimization</a> | |
124 </li> | |
125 | |
126 <li class="none"> | |
127 <a href="../userguide/ode.html">Ordinary Differential Equations</a> | |
128 </li> | |
129 | |
130 <li class="none"> | |
131 <a href="../userguide/genetics.html">Genetic Algorithms</a> | |
132 </li> | |
133 </ul> | |
134 <a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy"> | |
135 <img alt="Built by Maven" src="../images/logos/maven-feather.png"></img> | |
136 </a> | |
137 | |
138 | |
139 | |
140 | |
141 | |
142 | |
143 | |
144 | |
145 </div> | |
146 </div> | |
147 <div id="bodyColumn"> | |
148 <div id="contentBox"> | |
149 <div class="section"><h2><a name="a2_Data_Generation"></a>2 Data Generation</h2> | |
150 <div class="section"><h3><a name="a2.1_Overview"></a>2.1 Overview</h3> | |
151 <p> | |
152 The Commons Math random package includes utilities for | |
153 <ul><li>generating random numbers</li> | |
154 <li>generating random strings</li> | |
155 <li>generating cryptographically secure sequences of random numbers or | |
156 strings</li> | |
157 <li>generating random samples and permutations</li> | |
158 <li>analyzing distributions of values in an input file and generating | |
159 values "like" the values in the file</li> | |
160 <li>generating data for grouped frequency distributions or | |
161 histograms</li> | |
162 </ul> | |
163 </p> | |
164 <p> | |
165 The source of random data used by the data generation utilities is | |
166 pluggable. By default, the JDK-supplied PseudoRandom Number Generator | |
167 (PRNG) is used, but alternative generators can be "plugged in" using an | |
168 adaptor framework, which provides a generic facility for replacing | |
169 <code>java.util.Random</code> with an alternative PRNG. Another very | |
170 good PRNG suitable for Monte-Carlo analysis (but <strong>not</strong> | |
171 for cryptography) provided by the library is the Mersenne twister from | |
172 Makoto Matsumoto and Takuji Nishimura | |
173 </p> | |
174 <p> | |
175 Sections 2.2-2.6 below show how to use the commons math API to generate | |
176 different kinds of random data. The examples all use the default | |
177 JDK-supplied PRNG. PRNG pluggability is covered in 2.7. The only | |
178 modification required to the examples to use alternative PRNGs is to | |
179 replace the argumentless constructor calls with invocations including | |
180 a <code>RandomGenerator</code> instance as a parameter. | |
181 </p> | |
182 </div> | |
183 <div class="section"><h3><a name="a2.2_Random_numbers"></a>2.2 Random numbers</h3> | |
184 <p> | |
185 The <a href="../apidocs/org/apache/commons/math/random/RandomData.html"> | |
186 org.apache.commons.math.RandomData</a> interface defines methods for | |
187 generating random sequences of numbers. The API contracts of these methods | |
188 use the following concepts: | |
189 <dl><dt>Random sequence of numbers from a probability distribution</dt> | |
190 <dd>There is no such thing as a single "random number." What can be | |
191 generated are <i>sequences</i> of numbers that appear to be random. When | |
192 using the built-in JDK function <code>Math.random(),</code> sequences of | |
193 values generated follow the | |
194 <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda3662.htm" class="externalLink"> | |
195 Uniform Distribution</a>, which means that the values are evenly spread | |
196 over the interval between 0 and 1, with no sub-interval having a greater | |
197 probability of containing generated values than any other interval of the | |
198 same length. The mathematical concept of a | |
199 <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda36.htm" class="externalLink"> | |
200 probability distribution</a> basically amounts to asserting that different | |
201 ranges in the set of possible values of a random variable have | |
202 different probabilities of containing the value. Commons Math supports | |
203 generating random sequences from the following probability distributions. | |
204 The javadoc for the <code>nextXxx</code> methods in | |
205 <code>RandomDataImpl</code> describes the algorithms used to generate | |
206 random deviates from each of these distributions. | |
207 <ul><li><a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda3662.htm" class="externalLink"> | |
208 uniform distribution</a></li> | |
209 <li><a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda3667.htm" class="externalLink"> | |
210 exponential distribution</a></li> | |
211 <li><a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda366j.htm" class="externalLink"> | |
212 poisson distribution</a></li> | |
213 <li><a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda3661.htm" class="externalLink"> | |
214 Gaussian distribution</a></li> | |
215 </ul> | |
216 </dd> | |
217 <dt>Cryptographically secure random sequences</dt> | |
218 <dd>It is possible for a sequence of numbers to appear random, but | |
219 nonetheless to be predictable based on the algorithm used to generate the | |
220 sequence. If in addition to randomness, strong unpredictability is | |
221 required, it is best to use a | |
222 <a href="http://www.wikipedia.org/wiki/Cryptographically_secure_pseudo-random_number_generator" class="externalLink"> | |
223 secure random number generator</a> to generate values (or strings). The | |
224 nextSecureXxx methods in the <code>RandomDataImpl</code> implementation of | |
225 the <code>RandomData</code> interface use the JDK <code>SecureRandom</code> | |
226 PRNG to generate cryptographically secure sequences. The | |
227 <code>setSecureAlgorithm</code> method allows you to change the underlying | |
228 PRNG. These methods are <strong>much slower</strong> than the corresponding | |
229 "non-secure" versions, so they should only be used when cryptographic | |
230 security is required.</dd> | |
231 <dt>Seeding pseudo-random number generators</dt> | |
232 <dd>By default, the implementation provided in <code>RandomDataImpl</code> | |
233 uses the JDK-provided PRNG. Like most other PRNGs, the JDK generator | |
234 generates sequences of random numbers based on an initial "seed value". | |
235 For the non-secure methods, starting with the same seed always produces the | |
236 same sequence of values. Secure sequences started with the same seeds will | |
237 diverge. When a new <code>RandomDataImpl</code> is created, the underlying | |
238 random number generators are <strong>not</strong> initialized. The first | |
239 call to a data generation method, or to a <code>reSeed()</code> method | |
240 initializes the appropriate generator. If you do not explicitly seed the | |
241 generator, it is by default seeded with the current time in milliseconds. | |
242 Therefore, to generate sequences of random data values, you should always | |
243 instantiate <strong>one</strong><code>RandomDataImpl</code> and use it | |
244 repeatedly instead of creating new instances for subsequent values in the | |
245 sequence. For example, the following will generate a random sequence of 50 | |
246 long integers between 1 and 1,000,000, using the current time in | |
247 milliseconds as the seed for the JDK PRNG: | |
248 <div class="source"><pre> | |
249 RandomData randomData = new RandomDataImpl(); | |
250 for (int i = 0; i < 1000; i++) { | |
251 value = randomData.nextLong(1, 1000000); | |
252 } | |
253 </pre> | |
254 </div> | |
255 | |
256 The following will not in general produce a good random sequence, since the | |
257 PRNG is reseeded each time through the loop with the current time in | |
258 milliseconds: | |
259 <div class="source"><pre> | |
260 for (int i = 0; i < 1000; i++) { | |
261 RandomDataImpl randomData = new RandomDataImpl(); | |
262 value = randomData.nextLong(1, 1000000); | |
263 } | |
264 </pre> | |
265 </div> | |
266 | |
267 The following will produce the same random sequence each time it is | |
268 executed: | |
269 <div class="source"><pre> | |
270 RandomData randomData = new RandomDataImpl(); | |
271 randomData.reSeed(1000); | |
272 for (int i = 0; i = 1000; i++) { | |
273 value = randomData.nextLong(1, 1000000); | |
274 } | |
275 </pre> | |
276 </div> | |
277 | |
278 The following will produce a different random sequence each time it is | |
279 executed. | |
280 <div class="source"><pre> | |
281 RandomData randomData = new RandomDataImpl(); | |
282 randomData.reSeedSecure(1000); | |
283 for (int i = 0; i < 1000; i++) { | |
284 value = randomData.nextSecureLong(1, 1000000); | |
285 } | |
286 </pre> | |
287 </div> | |
288 </dd> | |
289 </dl> | |
290 </p> | |
291 </div> | |
292 <div class="section"><h3><a name="a2.3_Random_Vectors"></a>2.3 Random Vectors</h3> | |
293 <p> | |
294 Some algorithm requires random vectors instead of random scalars. When the | |
295 components of these vectors are uncorrelated, they may be generated simply | |
296 one at a time and packed together in the vector. The <a href="../apidocs/org/apache/commons/math/random/UncorrelatedRandomVectorGenerator.html"> | |
297 org.apache.commons.math.UncorrelatedRandomVectorGenerator</a> class | |
298 does however simplify this process by setting the mean and deviation of each | |
299 component once and generating complete vectors. When the components are correlated | |
300 however, generating them is much more difficult. The <a href="../apidocs/org/apache/commons/math/random/CorrelatedRandomVectorGenerator.html"> | |
301 org.apache.commons.math.CorrelatedRandomVectorGenerator</a> class | |
302 provides this service. In this case, the user must set up a complete covariance matrix | |
303 instead of a simple standard deviations vector. This matrix gathers both the variance | |
304 and the correlation information of the probability law. | |
305 </p> | |
306 <p> | |
307 The main use for correlated random vector generation is for Monte-Carlo | |
308 simulation of physical problems with several variables, for example to | |
309 generate error vectors to be added to a nominal vector. A particularly | |
310 common case is when the generated vector should be drawn from a <a href="http://en.wikipedia.org/wiki/Multivariate_normal_distribution" class="externalLink"> | |
311 Multivariate Normal Distribution</a>. | |
312 </p> | |
313 </div> | |
314 <div class="section"><h3><a name="a2.4_Random_Strings"></a>2.4 Random Strings</h3> | |
315 <p> | |
316 The methods <code>nextHexString</code> and <code>nextSecureHexString</code> | |
317 can be used to generate random strings of hexadecimal characters. Both | |
318 of these methods produce sequences of strings with good dispersion | |
319 properties. The difference between the two methods is that the second is | |
320 cryptographically secure. Specifically, the implementation of | |
321 <code>nextHexString(n)</code> in <code>RandomDataImpl</code> uses the | |
322 following simple algorithm to generate a string of <code>n</code> hex digits: | |
323 <ol type="1"><li>n/2+1 binary bytes are generated using the underlying Random</li> | |
324 <li>Each binary byte is translated into 2 hex digits</li> | |
325 </ol> | |
326 | |
327 The <code>RandomDataImpl</code> implementation of the "secure" version, | |
328 <code>nextSecureHexString</code> generates hex characters in 40-byte | |
329 "chunks" using a 3-step process: | |
330 <ol type="1"><li>20 random bytes are generated using the underlying | |
331 <code>SecureRandom.</code></li> | |
332 <li>SHA-1 hash is applied to yield a 20-byte binary digest.</li> | |
333 <li>Each byte of the binary digest is converted to 2 hex digits</li> | |
334 </ol> | |
335 | |
336 Similarly to the secure random number generation methods, | |
337 <code>nextSecureHexString</code> is <strong>much slower</strong> than | |
338 the non-secure version. It should be used only for applications such as | |
339 generating unique session or transaction ids where predictability of | |
340 subsequent ids based on observation of previous values is a security | |
341 concern. If all that is needed is an even distribution of hex characters | |
342 in the generated strings, the non-secure method should be used. | |
343 </p> | |
344 </div> | |
345 <div class="section"><h3><a name="a2.5_Random_permutations_combinations_sampling"></a>2.5 Random permutations, combinations, sampling</h3> | |
346 <p> | |
347 To select a random sample of objects in a collection, you can use the | |
348 <code>nextSample</code> method in the <code>RandomData</code> interface. | |
349 Specifically, if <code>c</code> is a collection containing at least | |
350 <code>k</code> objects, and <code>randomData</code> is a | |
351 <code>RandomData</code> instance <code>randomData.nextSample(c, k)</code> | |
352 will return an <code>object[]</code> array of length <code>k</code> | |
353 consisting of elements randomly selected from the collection. If | |
354 <code>c</code> contains duplicate references, there may be duplicate | |
355 references in the returned array; otherwise returned elements will be | |
356 unique -- i.e., the sampling is without replacement among the object | |
357 references in the collection. </p> | |
358 <p> | |
359 If <code>randomData</code> is a <code>RandomData</code> instance, and | |
360 <code>n</code> and <code>k</code> are integers with | |
361 <code> k <= n</code>, then | |
362 <code>randomData.nextPermutation(n, k)</code> returns an <code>int[]</code> | |
363 array of length <code>k</code> whose whose entries are selected randomly, | |
364 without repetition, from the integers <code>0</code> through | |
365 <code>n-1</code> (inclusive), i.e., | |
366 <code>randomData.nextPermutation(n, k)</code> returns a random | |
367 permutation of <code>n</code> taken <code>k</code> at a time. | |
368 </p> | |
369 </div> | |
370 <div class="section"><h3><a name="a2.6_Generating_data_like_an_input_file"></a>2.6 Generating data 'like' an input file</h3> | |
371 <p> | |
372 Using the <code>ValueServer</code> class, you can generate data based on | |
373 the values in an input file in one of two ways: | |
374 <dl><dt>Replay Mode</dt> | |
375 <dd> The following code will read data from <code>url</code> | |
376 (a <code>java.net.URL</code> instance), cycling through the values in the | |
377 file in sequence, reopening and starting at the beginning again when all | |
378 values have been read. | |
379 <div class="source"><pre> | |
380 ValueServer vs = new ValueServer(); | |
381 vs.setValuesFileURL(url); | |
382 vs.setMode(ValueServer.REPLAY_MODE); | |
383 vs.resetReplayFile(); | |
384 double value = vs.getNext(); | |
385 // ...Generate and use more values... | |
386 vs.closeReplayFile(); | |
387 </pre> | |
388 </div> | |
389 | |
390 The values in the file are not stored in memory, so it does not matter | |
391 how large the file is, but you do need to explicitly close the file | |
392 as above. The expected file format is \n -delimited (i.e. one per line) | |
393 strings representing valid floating point numbers. | |
394 </dd> | |
395 <dt>Digest Mode</dt> | |
396 <dd>When used in Digest Mode, the ValueServer reads the entire input file | |
397 and estimates a probability density function based on data from the file. | |
398 The estimation method is essentially the | |
399 <a href="http://nedwww.ipac.caltech.edu/level5/March02/Silverman/Silver2_6.html" class="externalLink"> | |
400 Variable Kernel Method</a> with Gaussian smoothing. Once the density | |
401 has been estimated, <code>getNext()</code> returns random values whose | |
402 probability distribution matches the empirical distribution -- i.e., if | |
403 you generate a large number of such values, their distribution should | |
404 "look like" the distribution of the values in the input file. The values | |
405 are not stored in memory in this case either, so there is no limit to the | |
406 size of the input file. Here is an example: | |
407 <div class="source"><pre> | |
408 ValueServer vs = new ValueServer(); | |
409 vs.setValuesFileURL(url); | |
410 vs.setMode(ValueServer.DIGEST_MODE); | |
411 vs.computeDistribution(500); //Read file and estimate distribution using 500 bins | |
412 double value = vs.getNext(); | |
413 // ...Generate and use more values... | |
414 </pre> | |
415 </div> | |
416 | |
417 See the javadoc for <code>ValueServer</code> and | |
418 <code>EmpiricalDistribution</code> for more details. Note that | |
419 <code>computeDistribution()</code> opens and closes the input file | |
420 by itself. | |
421 </dd> | |
422 </dl> | |
423 </p> | |
424 </div> | |
425 <div class="section"><h3><a name="a2.7_PRNG_Pluggability"></a>2.7 PRNG Pluggability</h3> | |
426 <p> | |
427 To enable alternative PRNGs to be "plugged in" to the commons-math data | |
428 generation utilities and to provide a generic means to replace | |
429 <code>java.util.Random</code> in applications, a random generator | |
430 adaptor framework has been added to commons-math. The | |
431 <a href="../apidocs/org/apache/commons/math/random/RandomGenerator.html"> | |
432 org.apache.commons.math.RandomGenerator</a> interface abstracts the public | |
433 interface of <code>java.util.Random</code> and any implementation of this | |
434 interface can be used as the source of random data for the commons-math | |
435 data generation classes. An abstract base class, | |
436 <a href="../apidocs/org/apache/commons/math/random/AbstractRandomGenerator.html"> | |
437 org.apache.commons.math.AbstractRandomGenerator</a> is provided to make | |
438 implementation easier. This class provides default implementations of | |
439 "derived" data generation methods based on the primitive, | |
440 <code>nextDouble().</code> To support generic replacement of | |
441 <code>java.util.Random</code>, the | |
442 <a href="../apidocs/org/apache/commons/math/random/RandomAdaptor.html"> | |
443 org.apache.commons.math.RandomAdaptor</a> class is provided, which | |
444 extends <code>java.util.Random</code> and wraps and delegates calls to | |
445 a <code>RandomGenerator</code> instance. | |
446 </p> | |
447 <p> | |
448 Examples: | |
449 <dl><dt>Create a RandomGenerator based on RngPack's Mersenne Twister</dt> | |
450 <dd>To create a RandomGenerator using the RngPack Mersenne Twister PRNG | |
451 as the source of randomness, extend <code>AbstractRandomGenerator</code> | |
452 overriding the derived methods that the RngPack implementation provides: | |
453 <div class="source"><pre> | |
454 import edu.cornell.lassp.houle.RngPack.RanMT; | |
455 /** | |
456 * AbstractRandomGenerator based on RngPack RanMT generator. | |
457 */ | |
458 public class RngPackGenerator extends AbstractRandomGenerator { | |
459 | |
460 private RanMT random = new RanMT(); | |
461 | |
462 public void setSeed(long seed) { | |
463 random = new RanMT(seed); | |
464 } | |
465 | |
466 public double nextDouble() { | |
467 return random.raw(); | |
468 } | |
469 | |
470 public double nextGaussian() { | |
471 return random.gaussian(); | |
472 } | |
473 | |
474 public int nextInt(int n) { | |
475 return random.choose(n); | |
476 } | |
477 | |
478 public boolean nextBoolean() { | |
479 return random.coin(); | |
480 } | |
481 } | |
482 </pre> | |
483 </div> | |
484 </dd> | |
485 <dt>Use the Mersenne Twister RandomGenerator in place of | |
486 <code>java.util.Random</code> in <code>RandomData</code></dt> | |
487 <dd><div class="source"><pre> | |
488 RandomData randomData = new RandomDataImpl(new RngPackGenerator()); | |
489 </pre> | |
490 </div> | |
491 </dd> | |
492 <dt>Create an adaptor instance based on the Mersenne Twister generator | |
493 that can be used in place of a <code>Random</code></dt> | |
494 <dd><div class="source"><pre> | |
495 RandomGenerator generator = new RngPackGenerator(); | |
496 Random random = RandomAdaptor.createAdaptor(generator); | |
497 // random can now be used in place of a Random instance, data generation | |
498 // calls will be delegated to the wrapped Mersenne Twister | |
499 </pre> | |
500 </div> | |
501 </dd> | |
502 </dl> | |
503 </p> | |
504 </div> | |
505 </div> | |
506 | |
507 </div> | |
508 </div> | |
509 <div class="clear"> | |
510 <hr/> | |
511 </div> | |
512 <div id="footer"> | |
513 <div class="xright">© | |
514 2003-2010 | |
515 | |
516 | |
517 | |
518 | |
519 | |
520 | |
521 | |
522 | |
523 | |
524 </div> | |
525 <div class="clear"> | |
526 <hr/> | |
527 </div> | |
528 </div> | |
529 </body> | |
530 </html> |