blob: fa06f4c03c8e52311023c8f82ece4a65d1d787d3 [file] [log] [blame]
Austin Schuhdace2a62020-08-18 10:56:48 -07001/* stat.c -- statistical tests of random number sequences. */
2
3/*
4Copyright 1999, 2000 Free Software Foundation, Inc.
5
6This file is part of the GNU MP Library test suite.
7
8The GNU MP Library test suite is free software; you can redistribute it
9and/or modify it under the terms of the GNU General Public License as
10published by the Free Software Foundation; either version 3 of the License,
11or (at your option) any later version.
12
13The GNU MP Library test suite is distributed in the hope that it will be
14useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
16Public License for more details.
17
18You should have received a copy of the GNU General Public License along with
19the GNU MP Library test suite. If not, see https://www.gnu.org/licenses/. */
20
21/* Examples:
22
23 $ gen 1000 | stat
24Test 1000 real numbers.
25
26 $ gen 30000 | stat -2 1000
27Test 1000 real numbers 30 times and then test the 30 results in a
28``second level''.
29
30 $ gen -f mpz_urandomb 1000 | stat -i 0xffffffff
31Test 1000 integers 0 <= X <= 2^32-1.
32
33 $ gen -f mpz_urandomb -z 34 1000 | stat -i 0x3ffffffff
34Test 1000 integers 0 <= X <= 2^34-1.
35
36*/
37
38#include <stdio.h>
39#include <stdlib.h>
40#include <unistd.h>
41#include <math.h>
42#include "gmpstat.h"
43
44#if !HAVE_DECL_OPTARG
45extern char *optarg;
46extern int optind, opterr;
47#endif
48
49#define FVECSIZ (100000L)
50
51int g_debug = 0;
52
53static void
54print_ks_results (mpf_t f_p, mpf_t f_p_prob,
55 mpf_t f_m, mpf_t f_m_prob,
56 FILE *fp)
57{
58 double p, pp, m, mp;
59
60 p = mpf_get_d (f_p);
61 m = mpf_get_d (f_m);
62 pp = mpf_get_d (f_p_prob);
63 mp = mpf_get_d (f_m_prob);
64
65 fprintf (fp, "%.4f (%.0f%%)\t", p, pp * 100.0);
66 fprintf (fp, "%.4f (%.0f%%)\n", m, mp * 100.0);
67}
68
69static void
70print_x2_table (unsigned int v, FILE *fp)
71{
72 double t[7];
73 int f;
74
75
76 fprintf (fp, "Chi-square table for v=%u\n", v);
77 fprintf (fp, "1%%\t5%%\t25%%\t50%%\t75%%\t95%%\t99%%\n");
78 x2_table (t, v);
79 for (f = 0; f < 7; f++)
80 fprintf (fp, "%.2f\t", t[f]);
81 fputs ("\n", fp);
82}
83
84
85
86/* Pks () -- Distribution function for KS results with a big n (like 1000
87 or so): F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51]. */
88/* gnuplot: plot [0:1] Pks(x), Pks(x) = 1-exp(-2*x**2) */
89
90static void
91Pks (mpf_t p, mpf_t x)
92{
93 double dt; /* temp double */
94
95 mpf_set (p, x);
96 mpf_mul (p, p, p); /* p = x^2 */
97 mpf_mul_ui (p, p, 2); /* p = 2*x^2 */
98 mpf_neg (p, p); /* p = -2*x^2 */
99 /* No pow() in gmp. Use doubles. */
100 /* FIXME: Use exp()? */
101 dt = pow (M_E, mpf_get_d (p));
102 mpf_set_d (p, dt);
103 mpf_ui_sub (p, 1, p);
104}
105
106/* f_freq() -- frequency test on real numbers 0<=f<1*/
107static void
108f_freq (const unsigned l1runs, const unsigned l2runs,
109 mpf_t fvec[], const unsigned long n)
110{
111 unsigned f;
112 mpf_t f_p, f_p_prob;
113 mpf_t f_m, f_m_prob;
114 mpf_t *l1res; /* level 1 result array */
115
116 mpf_init (f_p); mpf_init (f_m);
117 mpf_init (f_p_prob); mpf_init (f_m_prob);
118
119
120 /* Allocate space for 1st level results. */
121 l1res = (mpf_t *) malloc (l2runs * 2 * sizeof (mpf_t));
122 if (NULL == l1res)
123 {
124 fprintf (stderr, "stat: malloc failure\n");
125 exit (1);
126 }
127
128 printf ("\nEquidistribution/Frequency test on real numbers (0<=X<1):\n");
129 printf ("\tKp\t\tKm\n");
130
131 for (f = 0; f < l2runs; f++)
132 {
133 /* f_printvec (fvec, n); */
134 mpf_freqt (f_p, f_m, fvec + f * n, n);
135
136 /* what's the probability of getting these results? */
137 ks_table (f_p_prob, f_p, n);
138 ks_table (f_m_prob, f_m, n);
139
140 if (l1runs == 0)
141 {
142 /*printf ("%u:\t", f + 1);*/
143 print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
144 }
145 else
146 {
147 /* save result */
148 mpf_init_set (l1res[f], f_p);
149 mpf_init_set (l1res[f + l2runs], f_m);
150 }
151 }
152
153 /* Now, apply the KS test on the results from the 1st level rounds
154 with the distribution
155 F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51] */
156
157 if (l1runs != 0)
158 {
159 /*printf ("-------------------------------------\n");*/
160
161 /* The Kp's. */
162 ks (f_p, f_m, l1res, Pks, l2runs);
163 ks_table (f_p_prob, f_p, l2runs);
164 ks_table (f_m_prob, f_m, l2runs);
165 printf ("Kp:\t");
166 print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
167
168 /* The Km's. */
169 ks (f_p, f_m, l1res + l2runs, Pks, l2runs);
170 ks_table (f_p_prob, f_p, l2runs);
171 ks_table (f_m_prob, f_m, l2runs);
172 printf ("Km:\t");
173 print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
174 }
175
176 mpf_clear (f_p); mpf_clear (f_m);
177 mpf_clear (f_p_prob); mpf_clear (f_m_prob);
178 free (l1res);
179}
180
181/* z_freq(l1runs, l2runs, zvec, n, max) -- frequency test on integers
182 0<=z<=MAX */
183static void
184z_freq (const unsigned l1runs,
185 const unsigned l2runs,
186 mpz_t zvec[],
187 const unsigned long n,
188 unsigned int max)
189{
190 mpf_t V; /* result */
191 double d_V; /* result as a double */
192
193 mpf_init (V);
194
195
196 printf ("\nEquidistribution/Frequency test on integers (0<=X<=%u):\n", max);
197 print_x2_table (max, stdout);
198
199 mpz_freqt (V, zvec, max, n);
200
201 d_V = mpf_get_d (V);
202 printf ("V = %.2f (n = %lu)\n", d_V, n);
203
204 mpf_clear (V);
205}
206
207unsigned int stat_debug = 0;
208
209int
210main (argc, argv)
211 int argc;
212 char *argv[];
213{
214 const char usage[] =
215 "usage: stat [-d] [-2 runs] [-i max | -r max] [file]\n" \
216 " file filename\n" \
217 " -2 runs perform 2-level test with RUNS runs on 1st level\n" \
218 " -d increase debugging level\n" \
219 " -i max input is integers 0 <= Z <= MAX\n" \
220 " -r max input is real numbers 0 <= R < 1 and use MAX as\n" \
221 " maximum value when converting real numbers to integers\n" \
222 "";
223
224 mpf_t fvec[FVECSIZ];
225 mpz_t zvec[FVECSIZ];
226 unsigned long int f, n, vecentries;
227 char *filen;
228 FILE *fp;
229 int c;
230 int omitoutput = 0;
231 int realinput = -1; /* 1: input is real numbers 0<=R<1;
232 0: input is integers 0 <= Z <= MAX. */
233 long l1runs = 0, /* 1st level runs */
234 l2runs = 1; /* 2nd level runs */
235 mpf_t f_temp;
236 mpz_t z_imax; /* max value when converting between
237 real number and integer. */
238 mpf_t f_imax_plus1; /* f_imax + 1 stored in an mpf_t for
239 convenience */
240 mpf_t f_imax_minus1; /* f_imax - 1 stored in an mpf_t for
241 convenience */
242
243
244 mpf_init (f_temp);
245 mpz_init_set_ui (z_imax, 0x7fffffff);
246 mpf_init (f_imax_plus1);
247 mpf_init (f_imax_minus1);
248
249 while ((c = getopt (argc, argv, "d2:i:r:")) != -1)
250 switch (c)
251 {
252 case '2':
253 l1runs = atol (optarg);
254 l2runs = -1; /* set later on */
255 break;
256 case 'd': /* increase debug level */
257 stat_debug++;
258 break;
259 case 'i':
260 if (1 == realinput)
261 {
262 fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
263 exit (1);
264 }
265 if (mpz_set_str (z_imax, optarg, 0))
266 {
267 fprintf (stderr, "stat: bad max value %s\n", optarg);
268 exit (1);
269 }
270 realinput = 0;
271 break;
272 case 'r':
273 if (0 == realinput)
274 {
275 fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
276 exit (1);
277 }
278 if (mpz_set_str (z_imax, optarg, 0))
279 {
280 fprintf (stderr, "stat: bad max value %s\n", optarg);
281 exit (1);
282 }
283 realinput = 1;
284 break;
285 case 'o':
286 omitoutput = atoi (optarg);
287 break;
288 case '?':
289 default:
290 fputs (usage, stderr);
291 exit (1);
292 }
293 argc -= optind;
294 argv += optind;
295
296 if (argc < 1)
297 fp = stdin;
298 else
299 filen = argv[0];
300
301 if (fp != stdin)
302 if (NULL == (fp = fopen (filen, "r")))
303 {
304 perror (filen);
305 exit (1);
306 }
307
308 if (-1 == realinput)
309 realinput = 1; /* default is real numbers */
310
311 /* read file and fill appropriate vec */
312 if (1 == realinput) /* real input */
313 {
314 for (f = 0; f < FVECSIZ ; f++)
315 {
316 mpf_init (fvec[f]);
317 if (!mpf_inp_str (fvec[f], fp, 10))
318 break;
319 }
320 }
321 else /* integer input */
322 {
323 for (f = 0; f < FVECSIZ ; f++)
324 {
325 mpz_init (zvec[f]);
326 if (!mpz_inp_str (zvec[f], fp, 10))
327 break;
328 }
329 }
330 vecentries = n = f; /* number of entries read */
331 fclose (fp);
332
333 if (FVECSIZ == f)
334 fprintf (stderr, "stat: warning: discarding input due to lazy allocation "\
335 "of only %ld entries. sorry.\n", FVECSIZ);
336
337 printf ("Got %lu numbers.\n", n);
338
339 /* convert and fill the other vec */
340 /* since fvec[] contains 0<=f<1 and we want ivec[] to contain
341 0<=z<=imax and we are truncating all fractions when
342 converting float to int, we have to add 1 to imax.*/
343 mpf_set_z (f_imax_plus1, z_imax);
344 mpf_add_ui (f_imax_plus1, f_imax_plus1, 1);
345 if (1 == realinput) /* fill zvec[] */
346 {
347 for (f = 0; f < n; f++)
348 {
349 mpf_mul (f_temp, fvec[f], f_imax_plus1);
350 mpz_init (zvec[f]);
351 mpz_set_f (zvec[f], f_temp); /* truncating fraction */
352 if (stat_debug > 1)
353 {
354 mpz_out_str (stderr, 10, zvec[f]);
355 fputs ("\n", stderr);
356 }
357 }
358 }
359 else /* integer input; fill fvec[] */
360 {
361 /* mpf_set_z (f_imax_minus1, z_imax);
362 mpf_sub_ui (f_imax_minus1, f_imax_minus1, 1);*/
363 for (f = 0; f < n; f++)
364 {
365 mpf_init (fvec[f]);
366 mpf_set_z (fvec[f], zvec[f]);
367 mpf_div (fvec[f], fvec[f], f_imax_plus1);
368 if (stat_debug > 1)
369 {
370 mpf_out_str (stderr, 10, 0, fvec[f]);
371 fputs ("\n", stderr);
372 }
373 }
374 }
375
376 /* 2 levels? */
377 if (1 != l2runs)
378 {
379 l2runs = n / l1runs;
380 printf ("Doing %ld second level rounds "\
381 "with %ld entries in each round", l2runs, l1runs);
382 if (n % l1runs)
383 printf (" (discarding %ld entr%s)", n % l1runs,
384 n % l1runs == 1 ? "y" : "ies");
385 puts (".");
386 n = l1runs;
387 }
388
389#ifndef DONT_FFREQ
390 f_freq (l1runs, l2runs, fvec, n);
391#endif
392#ifdef DO_ZFREQ
393 z_freq (l1runs, l2runs, zvec, n, mpz_get_ui (z_imax));
394#endif
395
396 mpf_clear (f_temp); mpz_clear (z_imax);
397 mpf_clear (f_imax_plus1);
398 mpf_clear (f_imax_minus1);
399 for (f = 0; f < vecentries; f++)
400 {
401 mpf_clear (fvec[f]);
402 mpz_clear (zvec[f]);
403 }
404
405 return 0;
406}