Austin Schuh | dace2a6 | 2020-08-18 10:56:48 -0700 | [diff] [blame] | 1 | /* stat.c -- statistical tests of random number sequences. */ |
| 2 | |
| 3 | /* |
| 4 | Copyright 1999, 2000 Free Software Foundation, Inc. |
| 5 | |
| 6 | This file is part of the GNU MP Library test suite. |
| 7 | |
| 8 | The GNU MP Library test suite is free software; you can redistribute it |
| 9 | and/or modify it under the terms of the GNU General Public License as |
| 10 | published by the Free Software Foundation; either version 3 of the License, |
| 11 | or (at your option) any later version. |
| 12 | |
| 13 | The GNU MP Library test suite is distributed in the hope that it will be |
| 14 | useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General |
| 16 | Public License for more details. |
| 17 | |
| 18 | You should have received a copy of the GNU General Public License along with |
| 19 | the GNU MP Library test suite. If not, see https://www.gnu.org/licenses/. */ |
| 20 | |
| 21 | /* Examples: |
| 22 | |
| 23 | $ gen 1000 | stat |
| 24 | Test 1000 real numbers. |
| 25 | |
| 26 | $ gen 30000 | stat -2 1000 |
| 27 | Test 1000 real numbers 30 times and then test the 30 results in a |
| 28 | ``second level''. |
| 29 | |
| 30 | $ gen -f mpz_urandomb 1000 | stat -i 0xffffffff |
| 31 | Test 1000 integers 0 <= X <= 2^32-1. |
| 32 | |
| 33 | $ gen -f mpz_urandomb -z 34 1000 | stat -i 0x3ffffffff |
| 34 | Test 1000 integers 0 <= X <= 2^34-1. |
| 35 | |
| 36 | */ |
| 37 | |
| 38 | #include <stdio.h> |
| 39 | #include <stdlib.h> |
| 40 | #include <unistd.h> |
| 41 | #include <math.h> |
| 42 | #include "gmpstat.h" |
| 43 | |
| 44 | #if !HAVE_DECL_OPTARG |
| 45 | extern char *optarg; |
| 46 | extern int optind, opterr; |
| 47 | #endif |
| 48 | |
| 49 | #define FVECSIZ (100000L) |
| 50 | |
| 51 | int g_debug = 0; |
| 52 | |
| 53 | static void |
| 54 | print_ks_results (mpf_t f_p, mpf_t f_p_prob, |
| 55 | mpf_t f_m, mpf_t f_m_prob, |
| 56 | FILE *fp) |
| 57 | { |
| 58 | double p, pp, m, mp; |
| 59 | |
| 60 | p = mpf_get_d (f_p); |
| 61 | m = mpf_get_d (f_m); |
| 62 | pp = mpf_get_d (f_p_prob); |
| 63 | mp = mpf_get_d (f_m_prob); |
| 64 | |
| 65 | fprintf (fp, "%.4f (%.0f%%)\t", p, pp * 100.0); |
| 66 | fprintf (fp, "%.4f (%.0f%%)\n", m, mp * 100.0); |
| 67 | } |
| 68 | |
| 69 | static void |
| 70 | print_x2_table (unsigned int v, FILE *fp) |
| 71 | { |
| 72 | double t[7]; |
| 73 | int f; |
| 74 | |
| 75 | |
| 76 | fprintf (fp, "Chi-square table for v=%u\n", v); |
| 77 | fprintf (fp, "1%%\t5%%\t25%%\t50%%\t75%%\t95%%\t99%%\n"); |
| 78 | x2_table (t, v); |
| 79 | for (f = 0; f < 7; f++) |
| 80 | fprintf (fp, "%.2f\t", t[f]); |
| 81 | fputs ("\n", fp); |
| 82 | } |
| 83 | |
| 84 | |
| 85 | |
| 86 | /* Pks () -- Distribution function for KS results with a big n (like 1000 |
| 87 | or so): F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51]. */ |
| 88 | /* gnuplot: plot [0:1] Pks(x), Pks(x) = 1-exp(-2*x**2) */ |
| 89 | |
| 90 | static void |
| 91 | Pks (mpf_t p, mpf_t x) |
| 92 | { |
| 93 | double dt; /* temp double */ |
| 94 | |
| 95 | mpf_set (p, x); |
| 96 | mpf_mul (p, p, p); /* p = x^2 */ |
| 97 | mpf_mul_ui (p, p, 2); /* p = 2*x^2 */ |
| 98 | mpf_neg (p, p); /* p = -2*x^2 */ |
| 99 | /* No pow() in gmp. Use doubles. */ |
| 100 | /* FIXME: Use exp()? */ |
| 101 | dt = pow (M_E, mpf_get_d (p)); |
| 102 | mpf_set_d (p, dt); |
| 103 | mpf_ui_sub (p, 1, p); |
| 104 | } |
| 105 | |
| 106 | /* f_freq() -- frequency test on real numbers 0<=f<1*/ |
| 107 | static void |
| 108 | f_freq (const unsigned l1runs, const unsigned l2runs, |
| 109 | mpf_t fvec[], const unsigned long n) |
| 110 | { |
| 111 | unsigned f; |
| 112 | mpf_t f_p, f_p_prob; |
| 113 | mpf_t f_m, f_m_prob; |
| 114 | mpf_t *l1res; /* level 1 result array */ |
| 115 | |
| 116 | mpf_init (f_p); mpf_init (f_m); |
| 117 | mpf_init (f_p_prob); mpf_init (f_m_prob); |
| 118 | |
| 119 | |
| 120 | /* Allocate space for 1st level results. */ |
| 121 | l1res = (mpf_t *) malloc (l2runs * 2 * sizeof (mpf_t)); |
| 122 | if (NULL == l1res) |
| 123 | { |
| 124 | fprintf (stderr, "stat: malloc failure\n"); |
| 125 | exit (1); |
| 126 | } |
| 127 | |
| 128 | printf ("\nEquidistribution/Frequency test on real numbers (0<=X<1):\n"); |
| 129 | printf ("\tKp\t\tKm\n"); |
| 130 | |
| 131 | for (f = 0; f < l2runs; f++) |
| 132 | { |
| 133 | /* f_printvec (fvec, n); */ |
| 134 | mpf_freqt (f_p, f_m, fvec + f * n, n); |
| 135 | |
| 136 | /* what's the probability of getting these results? */ |
| 137 | ks_table (f_p_prob, f_p, n); |
| 138 | ks_table (f_m_prob, f_m, n); |
| 139 | |
| 140 | if (l1runs == 0) |
| 141 | { |
| 142 | /*printf ("%u:\t", f + 1);*/ |
| 143 | print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout); |
| 144 | } |
| 145 | else |
| 146 | { |
| 147 | /* save result */ |
| 148 | mpf_init_set (l1res[f], f_p); |
| 149 | mpf_init_set (l1res[f + l2runs], f_m); |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | /* Now, apply the KS test on the results from the 1st level rounds |
| 154 | with the distribution |
| 155 | F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51] */ |
| 156 | |
| 157 | if (l1runs != 0) |
| 158 | { |
| 159 | /*printf ("-------------------------------------\n");*/ |
| 160 | |
| 161 | /* The Kp's. */ |
| 162 | ks (f_p, f_m, l1res, Pks, l2runs); |
| 163 | ks_table (f_p_prob, f_p, l2runs); |
| 164 | ks_table (f_m_prob, f_m, l2runs); |
| 165 | printf ("Kp:\t"); |
| 166 | print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout); |
| 167 | |
| 168 | /* The Km's. */ |
| 169 | ks (f_p, f_m, l1res + l2runs, Pks, l2runs); |
| 170 | ks_table (f_p_prob, f_p, l2runs); |
| 171 | ks_table (f_m_prob, f_m, l2runs); |
| 172 | printf ("Km:\t"); |
| 173 | print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout); |
| 174 | } |
| 175 | |
| 176 | mpf_clear (f_p); mpf_clear (f_m); |
| 177 | mpf_clear (f_p_prob); mpf_clear (f_m_prob); |
| 178 | free (l1res); |
| 179 | } |
| 180 | |
| 181 | /* z_freq(l1runs, l2runs, zvec, n, max) -- frequency test on integers |
| 182 | 0<=z<=MAX */ |
| 183 | static void |
| 184 | z_freq (const unsigned l1runs, |
| 185 | const unsigned l2runs, |
| 186 | mpz_t zvec[], |
| 187 | const unsigned long n, |
| 188 | unsigned int max) |
| 189 | { |
| 190 | mpf_t V; /* result */ |
| 191 | double d_V; /* result as a double */ |
| 192 | |
| 193 | mpf_init (V); |
| 194 | |
| 195 | |
| 196 | printf ("\nEquidistribution/Frequency test on integers (0<=X<=%u):\n", max); |
| 197 | print_x2_table (max, stdout); |
| 198 | |
| 199 | mpz_freqt (V, zvec, max, n); |
| 200 | |
| 201 | d_V = mpf_get_d (V); |
| 202 | printf ("V = %.2f (n = %lu)\n", d_V, n); |
| 203 | |
| 204 | mpf_clear (V); |
| 205 | } |
| 206 | |
| 207 | unsigned int stat_debug = 0; |
| 208 | |
| 209 | int |
| 210 | main (argc, argv) |
| 211 | int argc; |
| 212 | char *argv[]; |
| 213 | { |
| 214 | const char usage[] = |
| 215 | "usage: stat [-d] [-2 runs] [-i max | -r max] [file]\n" \ |
| 216 | " file filename\n" \ |
| 217 | " -2 runs perform 2-level test with RUNS runs on 1st level\n" \ |
| 218 | " -d increase debugging level\n" \ |
| 219 | " -i max input is integers 0 <= Z <= MAX\n" \ |
| 220 | " -r max input is real numbers 0 <= R < 1 and use MAX as\n" \ |
| 221 | " maximum value when converting real numbers to integers\n" \ |
| 222 | ""; |
| 223 | |
| 224 | mpf_t fvec[FVECSIZ]; |
| 225 | mpz_t zvec[FVECSIZ]; |
| 226 | unsigned long int f, n, vecentries; |
| 227 | char *filen; |
| 228 | FILE *fp; |
| 229 | int c; |
| 230 | int omitoutput = 0; |
| 231 | int realinput = -1; /* 1: input is real numbers 0<=R<1; |
| 232 | 0: input is integers 0 <= Z <= MAX. */ |
| 233 | long l1runs = 0, /* 1st level runs */ |
| 234 | l2runs = 1; /* 2nd level runs */ |
| 235 | mpf_t f_temp; |
| 236 | mpz_t z_imax; /* max value when converting between |
| 237 | real number and integer. */ |
| 238 | mpf_t f_imax_plus1; /* f_imax + 1 stored in an mpf_t for |
| 239 | convenience */ |
| 240 | mpf_t f_imax_minus1; /* f_imax - 1 stored in an mpf_t for |
| 241 | convenience */ |
| 242 | |
| 243 | |
| 244 | mpf_init (f_temp); |
| 245 | mpz_init_set_ui (z_imax, 0x7fffffff); |
| 246 | mpf_init (f_imax_plus1); |
| 247 | mpf_init (f_imax_minus1); |
| 248 | |
| 249 | while ((c = getopt (argc, argv, "d2:i:r:")) != -1) |
| 250 | switch (c) |
| 251 | { |
| 252 | case '2': |
| 253 | l1runs = atol (optarg); |
| 254 | l2runs = -1; /* set later on */ |
| 255 | break; |
| 256 | case 'd': /* increase debug level */ |
| 257 | stat_debug++; |
| 258 | break; |
| 259 | case 'i': |
| 260 | if (1 == realinput) |
| 261 | { |
| 262 | fputs ("stat: options -i and -r are mutually exclusive\n", stderr); |
| 263 | exit (1); |
| 264 | } |
| 265 | if (mpz_set_str (z_imax, optarg, 0)) |
| 266 | { |
| 267 | fprintf (stderr, "stat: bad max value %s\n", optarg); |
| 268 | exit (1); |
| 269 | } |
| 270 | realinput = 0; |
| 271 | break; |
| 272 | case 'r': |
| 273 | if (0 == realinput) |
| 274 | { |
| 275 | fputs ("stat: options -i and -r are mutually exclusive\n", stderr); |
| 276 | exit (1); |
| 277 | } |
| 278 | if (mpz_set_str (z_imax, optarg, 0)) |
| 279 | { |
| 280 | fprintf (stderr, "stat: bad max value %s\n", optarg); |
| 281 | exit (1); |
| 282 | } |
| 283 | realinput = 1; |
| 284 | break; |
| 285 | case 'o': |
| 286 | omitoutput = atoi (optarg); |
| 287 | break; |
| 288 | case '?': |
| 289 | default: |
| 290 | fputs (usage, stderr); |
| 291 | exit (1); |
| 292 | } |
| 293 | argc -= optind; |
| 294 | argv += optind; |
| 295 | |
| 296 | if (argc < 1) |
| 297 | fp = stdin; |
| 298 | else |
| 299 | filen = argv[0]; |
| 300 | |
| 301 | if (fp != stdin) |
| 302 | if (NULL == (fp = fopen (filen, "r"))) |
| 303 | { |
| 304 | perror (filen); |
| 305 | exit (1); |
| 306 | } |
| 307 | |
| 308 | if (-1 == realinput) |
| 309 | realinput = 1; /* default is real numbers */ |
| 310 | |
| 311 | /* read file and fill appropriate vec */ |
| 312 | if (1 == realinput) /* real input */ |
| 313 | { |
| 314 | for (f = 0; f < FVECSIZ ; f++) |
| 315 | { |
| 316 | mpf_init (fvec[f]); |
| 317 | if (!mpf_inp_str (fvec[f], fp, 10)) |
| 318 | break; |
| 319 | } |
| 320 | } |
| 321 | else /* integer input */ |
| 322 | { |
| 323 | for (f = 0; f < FVECSIZ ; f++) |
| 324 | { |
| 325 | mpz_init (zvec[f]); |
| 326 | if (!mpz_inp_str (zvec[f], fp, 10)) |
| 327 | break; |
| 328 | } |
| 329 | } |
| 330 | vecentries = n = f; /* number of entries read */ |
| 331 | fclose (fp); |
| 332 | |
| 333 | if (FVECSIZ == f) |
| 334 | fprintf (stderr, "stat: warning: discarding input due to lazy allocation "\ |
| 335 | "of only %ld entries. sorry.\n", FVECSIZ); |
| 336 | |
| 337 | printf ("Got %lu numbers.\n", n); |
| 338 | |
| 339 | /* convert and fill the other vec */ |
| 340 | /* since fvec[] contains 0<=f<1 and we want ivec[] to contain |
| 341 | 0<=z<=imax and we are truncating all fractions when |
| 342 | converting float to int, we have to add 1 to imax.*/ |
| 343 | mpf_set_z (f_imax_plus1, z_imax); |
| 344 | mpf_add_ui (f_imax_plus1, f_imax_plus1, 1); |
| 345 | if (1 == realinput) /* fill zvec[] */ |
| 346 | { |
| 347 | for (f = 0; f < n; f++) |
| 348 | { |
| 349 | mpf_mul (f_temp, fvec[f], f_imax_plus1); |
| 350 | mpz_init (zvec[f]); |
| 351 | mpz_set_f (zvec[f], f_temp); /* truncating fraction */ |
| 352 | if (stat_debug > 1) |
| 353 | { |
| 354 | mpz_out_str (stderr, 10, zvec[f]); |
| 355 | fputs ("\n", stderr); |
| 356 | } |
| 357 | } |
| 358 | } |
| 359 | else /* integer input; fill fvec[] */ |
| 360 | { |
| 361 | /* mpf_set_z (f_imax_minus1, z_imax); |
| 362 | mpf_sub_ui (f_imax_minus1, f_imax_minus1, 1);*/ |
| 363 | for (f = 0; f < n; f++) |
| 364 | { |
| 365 | mpf_init (fvec[f]); |
| 366 | mpf_set_z (fvec[f], zvec[f]); |
| 367 | mpf_div (fvec[f], fvec[f], f_imax_plus1); |
| 368 | if (stat_debug > 1) |
| 369 | { |
| 370 | mpf_out_str (stderr, 10, 0, fvec[f]); |
| 371 | fputs ("\n", stderr); |
| 372 | } |
| 373 | } |
| 374 | } |
| 375 | |
| 376 | /* 2 levels? */ |
| 377 | if (1 != l2runs) |
| 378 | { |
| 379 | l2runs = n / l1runs; |
| 380 | printf ("Doing %ld second level rounds "\ |
| 381 | "with %ld entries in each round", l2runs, l1runs); |
| 382 | if (n % l1runs) |
| 383 | printf (" (discarding %ld entr%s)", n % l1runs, |
| 384 | n % l1runs == 1 ? "y" : "ies"); |
| 385 | puts ("."); |
| 386 | n = l1runs; |
| 387 | } |
| 388 | |
| 389 | #ifndef DONT_FFREQ |
| 390 | f_freq (l1runs, l2runs, fvec, n); |
| 391 | #endif |
| 392 | #ifdef DO_ZFREQ |
| 393 | z_freq (l1runs, l2runs, zvec, n, mpz_get_ui (z_imax)); |
| 394 | #endif |
| 395 | |
| 396 | mpf_clear (f_temp); mpz_clear (z_imax); |
| 397 | mpf_clear (f_imax_plus1); |
| 398 | mpf_clear (f_imax_minus1); |
| 399 | for (f = 0; f < vecentries; f++) |
| 400 | { |
| 401 | mpf_clear (fvec[f]); |
| 402 | mpz_clear (zvec[f]); |
| 403 | } |
| 404 | |
| 405 | return 0; |
| 406 | } |