Austin Schuh | dace2a6 | 2020-08-18 10:56:48 -0700 | [diff] [blame] | 1 | /* Generate perfect square testing data. |
| 2 | |
| 3 | Copyright 2002-2004, 2012, 2014 Free Software Foundation, Inc. |
| 4 | |
| 5 | This file is part of the GNU MP Library. |
| 6 | |
| 7 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 8 | it under the terms of either: |
| 9 | |
| 10 | * the GNU Lesser General Public License as published by the Free |
| 11 | Software Foundation; either version 3 of the License, or (at your |
| 12 | option) any later version. |
| 13 | |
| 14 | or |
| 15 | |
| 16 | * the GNU General Public License as published by the Free Software |
| 17 | Foundation; either version 2 of the License, or (at your option) any |
| 18 | later version. |
| 19 | |
| 20 | or both in parallel, as here. |
| 21 | |
| 22 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 23 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 24 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 25 | for more details. |
| 26 | |
| 27 | You should have received copies of the GNU General Public License and the |
| 28 | GNU Lesser General Public License along with the GNU MP Library. If not, |
| 29 | see https://www.gnu.org/licenses/. */ |
| 30 | |
| 31 | #include <stdio.h> |
| 32 | #include <stdlib.h> |
| 33 | |
| 34 | #include "bootstrap.c" |
| 35 | |
| 36 | |
| 37 | /* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1 |
| 38 | (plus a PERFSQR_PP modulus), and generate tables indicating quadratic |
| 39 | residues and non-residues modulo small factors of that modulus. |
| 40 | |
| 41 | For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used. That |
| 42 | function exists specifically because 2^24-1 and 2^48-1 have nice sets of |
| 43 | prime factors. For other limb sizes it's considered, but if it doesn't |
| 44 | have good factors then mpn_mod_1 will be used instead. |
| 45 | |
| 46 | When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a |
| 47 | selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb, |
| 48 | with that bit count chosen so (2*GMP_LIMB_BITS)*2^PERFSQR_MOD_BITS <= |
| 49 | GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its |
| 50 | calculation within a single limb. |
| 51 | |
| 52 | In either case primes can be combined to make divisors. The table data |
| 53 | then effectively indicates remainders which are quadratic residues mod |
| 54 | all the primes. This sort of combining reduces the number of steps |
| 55 | needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time. |
| 56 | Nothing is gained or lost in terms of detections, the same total fraction |
| 57 | of non-residues will be identified. |
| 58 | |
| 59 | Nothing particularly sophisticated is attempted for combining factors to |
| 60 | make divisors. This is probably a kind of knapsack problem so it'd be |
| 61 | too hard to attempt anything completely general. For the usual 32 and 64 |
| 62 | bit limbs we get a good enough result just pairing the biggest and |
| 63 | smallest which fit together, repeatedly. |
| 64 | |
| 65 | Another aim is to get powerful combinations, ie. divisors which identify |
| 66 | biggest fraction of non-residues, and have those run first. Again for |
| 67 | the usual 32 and 64 bits it seems good enough just to pair for big |
| 68 | divisors then sort according to the resulting fraction of non-residues |
| 69 | identified. |
| 70 | |
| 71 | Also in this program, a table sq_res_0x100 of residues modulo 256 is |
| 72 | generated. This simply fills bits into limbs of the appropriate |
| 73 | build-time GMP_LIMB_BITS each. |
| 74 | |
| 75 | */ |
| 76 | |
| 77 | |
| 78 | /* Normally we aren't using const in gen*.c programs, so as not to have to |
| 79 | bother figuring out if it works, but using it with f_cmp_divisor and |
| 80 | f_cmp_fraction avoids warnings from the qsort calls. */ |
| 81 | |
| 82 | /* Same tests as gmp.h. */ |
| 83 | #if defined (__STDC__) \ |
| 84 | || defined (__cplusplus) \ |
| 85 | || defined (_AIX) \ |
| 86 | || defined (__DECC) \ |
| 87 | || (defined (__mips) && defined (_SYSTYPE_SVR4)) \ |
| 88 | || defined (_MSC_VER) \ |
| 89 | || defined (_WIN32) |
| 90 | #define HAVE_CONST 1 |
| 91 | #endif |
| 92 | |
| 93 | #if ! HAVE_CONST |
| 94 | #define const |
| 95 | #endif |
| 96 | |
| 97 | |
| 98 | mpz_t *sq_res_0x100; /* table of limbs */ |
| 99 | int nsq_res_0x100; /* elements in sq_res_0x100 array */ |
| 100 | int sq_res_0x100_num; /* squares in sq_res_0x100 */ |
| 101 | double sq_res_0x100_fraction; /* sq_res_0x100_num / 256 */ |
| 102 | |
| 103 | int mod34_bits; /* 3*GMP_NUMB_BITS/4 */ |
| 104 | int mod_bits; /* bits from PERFSQR_MOD_34 or MOD_PP */ |
| 105 | int max_divisor; /* all divisors <= max_divisor */ |
| 106 | int max_divisor_bits; /* ceil(log2(max_divisor)) */ |
| 107 | double total_fraction; /* of squares */ |
| 108 | mpz_t pp; /* product of primes, or 0 if mod_34lsub1 used */ |
| 109 | mpz_t pp_norm; /* pp shifted so NUMB high bit set */ |
| 110 | mpz_t pp_inverted; /* invert_limb style inverse */ |
| 111 | mpz_t mod_mask; /* 2^mod_bits-1 */ |
| 112 | char mod34_excuse[128]; /* why mod_34lsub1 not used (if it's not) */ |
| 113 | |
| 114 | /* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */ |
| 115 | struct rawfactor_t { |
| 116 | int divisor; |
| 117 | int multiplicity; |
| 118 | }; |
| 119 | struct rawfactor_t *rawfactor; |
| 120 | int nrawfactor; |
| 121 | |
| 122 | /* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */ |
| 123 | struct factor_t { |
| 124 | int divisor; |
| 125 | mpz_t inverse; /* 1/divisor mod 2^mod_bits */ |
| 126 | mpz_t mask; /* indicating squares mod divisor */ |
| 127 | double fraction; /* squares/total */ |
| 128 | }; |
| 129 | struct factor_t *factor; |
| 130 | int nfactor; /* entries in use in factor array */ |
| 131 | int factor_alloc; /* entries allocated to factor array */ |
| 132 | |
| 133 | |
| 134 | int |
| 135 | f_cmp_divisor (const void *parg, const void *qarg) |
| 136 | { |
| 137 | const struct factor_t *p, *q; |
| 138 | p = (const struct factor_t *) parg; |
| 139 | q = (const struct factor_t *) qarg; |
| 140 | if (p->divisor > q->divisor) |
| 141 | return 1; |
| 142 | else if (p->divisor < q->divisor) |
| 143 | return -1; |
| 144 | else |
| 145 | return 0; |
| 146 | } |
| 147 | |
| 148 | int |
| 149 | f_cmp_fraction (const void *parg, const void *qarg) |
| 150 | { |
| 151 | const struct factor_t *p, *q; |
| 152 | p = (const struct factor_t *) parg; |
| 153 | q = (const struct factor_t *) qarg; |
| 154 | if (p->fraction > q->fraction) |
| 155 | return 1; |
| 156 | else if (p->fraction < q->fraction) |
| 157 | return -1; |
| 158 | else |
| 159 | return 0; |
| 160 | } |
| 161 | |
| 162 | /* Remove array[idx] by copying the remainder down, and adjust narray |
| 163 | accordingly. */ |
| 164 | #define COLLAPSE_ELEMENT(array, idx, narray) \ |
| 165 | do { \ |
| 166 | memmove (&(array)[idx], \ |
| 167 | &(array)[idx+1], \ |
| 168 | ((narray)-((idx)+1)) * sizeof (array[0])); \ |
| 169 | (narray)--; \ |
| 170 | } while (0) |
| 171 | |
| 172 | |
| 173 | /* return n*2^p mod m */ |
| 174 | int |
| 175 | mul_2exp_mod (int n, int p, int m) |
| 176 | { |
| 177 | while (--p >= 0) |
| 178 | n = (2 * n) % m; |
| 179 | return n; |
| 180 | } |
| 181 | |
| 182 | /* return -n mod m */ |
| 183 | int |
| 184 | neg_mod (int n, int m) |
| 185 | { |
| 186 | assert (n >= 0 && n < m); |
| 187 | return (n == 0 ? 0 : m-n); |
| 188 | } |
| 189 | |
| 190 | /* Set "mask" to a value such that "mask & (1<<idx)" is non-zero if |
| 191 | "-(idx<<mod_bits)" can be a square modulo m. */ |
| 192 | void |
| 193 | square_mask (mpz_t mask, int m) |
| 194 | { |
| 195 | int p, i, r, idx; |
| 196 | |
| 197 | p = mul_2exp_mod (1, mod_bits, m); |
| 198 | p = neg_mod (p, m); |
| 199 | |
| 200 | mpz_set_ui (mask, 0L); |
| 201 | for (i = 0; i < m; i++) |
| 202 | { |
| 203 | r = (i * i) % m; |
| 204 | idx = (r * p) % m; |
| 205 | mpz_setbit (mask, (unsigned long) idx); |
| 206 | } |
| 207 | } |
| 208 | |
| 209 | void |
| 210 | generate_sq_res_0x100 (int limb_bits) |
| 211 | { |
| 212 | int i, res; |
| 213 | |
| 214 | nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits; |
| 215 | sq_res_0x100 = (mpz_t *) xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100)); |
| 216 | |
| 217 | for (i = 0; i < nsq_res_0x100; i++) |
| 218 | mpz_init_set_ui (sq_res_0x100[i], 0L); |
| 219 | |
| 220 | for (i = 0; i < 0x100; i++) |
| 221 | { |
| 222 | res = (i * i) % 0x100; |
| 223 | mpz_setbit (sq_res_0x100[res / limb_bits], |
| 224 | (unsigned long) (res % limb_bits)); |
| 225 | } |
| 226 | |
| 227 | sq_res_0x100_num = 0; |
| 228 | for (i = 0; i < nsq_res_0x100; i++) |
| 229 | sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]); |
| 230 | sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0; |
| 231 | } |
| 232 | |
| 233 | void |
| 234 | generate_mod (int limb_bits, int nail_bits) |
| 235 | { |
| 236 | int numb_bits = limb_bits - nail_bits; |
| 237 | int i, divisor; |
| 238 | |
| 239 | mpz_init_set_ui (pp, 0L); |
| 240 | mpz_init_set_ui (pp_norm, 0L); |
| 241 | mpz_init_set_ui (pp_inverted, 0L); |
| 242 | |
| 243 | /* no more than limb_bits many factors in a one limb modulus (and of |
| 244 | course in reality nothing like that many) */ |
| 245 | factor_alloc = limb_bits; |
| 246 | factor = (struct factor_t *) xmalloc (factor_alloc * sizeof (*factor)); |
| 247 | rawfactor = (struct rawfactor_t *) xmalloc (factor_alloc * sizeof (*rawfactor)); |
| 248 | |
| 249 | if (numb_bits % 4 != 0) |
| 250 | { |
| 251 | strcpy (mod34_excuse, "GMP_NUMB_BITS % 4 != 0"); |
| 252 | goto use_pp; |
| 253 | } |
| 254 | |
| 255 | max_divisor = 2*limb_bits; |
| 256 | max_divisor_bits = log2_ceil (max_divisor); |
| 257 | |
| 258 | if (numb_bits / 4 < max_divisor_bits) |
| 259 | { |
| 260 | /* Wind back to one limb worth of max_divisor, if that will let us use |
| 261 | mpn_mod_34lsub1. */ |
| 262 | max_divisor = limb_bits; |
| 263 | max_divisor_bits = log2_ceil (max_divisor); |
| 264 | |
| 265 | if (numb_bits / 4 < max_divisor_bits) |
| 266 | { |
| 267 | strcpy (mod34_excuse, "GMP_NUMB_BITS / 4 too small"); |
| 268 | goto use_pp; |
| 269 | } |
| 270 | } |
| 271 | |
| 272 | { |
| 273 | /* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */ |
| 274 | mpz_t m, q, r; |
| 275 | int multiplicity; |
| 276 | |
| 277 | mod34_bits = (numb_bits / 4) * 3; |
| 278 | |
| 279 | /* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at |
| 280 | the mod34_bits mark, adding the two halves for a remainder of at most |
| 281 | mod34_bits+1 many bits */ |
| 282 | mod_bits = mod34_bits + 1; |
| 283 | |
| 284 | mpz_init_set_ui (m, 1L); |
| 285 | mpz_mul_2exp (m, m, mod34_bits); |
| 286 | mpz_sub_ui (m, m, 1L); |
| 287 | |
| 288 | mpz_init (q); |
| 289 | mpz_init (r); |
| 290 | |
| 291 | for (i = 3; i <= max_divisor; i+=2) |
| 292 | { |
| 293 | if (! isprime (i)) |
| 294 | continue; |
| 295 | |
| 296 | mpz_tdiv_qr_ui (q, r, m, (unsigned long) i); |
| 297 | if (mpz_sgn (r) != 0) |
| 298 | continue; |
| 299 | |
| 300 | /* if a repeated prime is found it's used as an i^n in one factor */ |
| 301 | divisor = 1; |
| 302 | multiplicity = 0; |
| 303 | do |
| 304 | { |
| 305 | if (divisor > max_divisor / i) |
| 306 | break; |
| 307 | multiplicity++; |
| 308 | mpz_set (m, q); |
| 309 | mpz_tdiv_qr_ui (q, r, m, (unsigned long) i); |
| 310 | } |
| 311 | while (mpz_sgn (r) == 0); |
| 312 | |
| 313 | assert (nrawfactor < factor_alloc); |
| 314 | rawfactor[nrawfactor].divisor = i; |
| 315 | rawfactor[nrawfactor].multiplicity = multiplicity; |
| 316 | nrawfactor++; |
| 317 | } |
| 318 | |
| 319 | mpz_clear (m); |
| 320 | mpz_clear (q); |
| 321 | mpz_clear (r); |
| 322 | } |
| 323 | |
| 324 | if (nrawfactor <= 2) |
| 325 | { |
| 326 | mpz_t new_pp; |
| 327 | |
| 328 | sprintf (mod34_excuse, "only %d small factor%s", |
| 329 | nrawfactor, nrawfactor == 1 ? "" : "s"); |
| 330 | |
| 331 | use_pp: |
| 332 | /* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code |
| 333 | tried with just one */ |
| 334 | max_divisor = 2*limb_bits; |
| 335 | max_divisor_bits = log2_ceil (max_divisor); |
| 336 | |
| 337 | mpz_init (new_pp); |
| 338 | nrawfactor = 0; |
| 339 | mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits); |
| 340 | |
| 341 | /* one copy of each small prime */ |
| 342 | mpz_set_ui (pp, 1L); |
| 343 | for (i = 3; i <= max_divisor; i+=2) |
| 344 | { |
| 345 | if (! isprime (i)) |
| 346 | continue; |
| 347 | |
| 348 | mpz_mul_ui (new_pp, pp, (unsigned long) i); |
| 349 | if (mpz_sizeinbase (new_pp, 2) > mod_bits) |
| 350 | break; |
| 351 | mpz_set (pp, new_pp); |
| 352 | |
| 353 | assert (nrawfactor < factor_alloc); |
| 354 | rawfactor[nrawfactor].divisor = i; |
| 355 | rawfactor[nrawfactor].multiplicity = 1; |
| 356 | nrawfactor++; |
| 357 | } |
| 358 | |
| 359 | /* Plus an extra copy of one or more of the primes selected, if that |
| 360 | still fits in max_divisor and the total in mod_bits. Usually only |
| 361 | 3 or 5 will be candidates */ |
| 362 | for (i = nrawfactor-1; i >= 0; i--) |
| 363 | { |
| 364 | if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor) |
| 365 | continue; |
| 366 | mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor); |
| 367 | if (mpz_sizeinbase (new_pp, 2) > mod_bits) |
| 368 | continue; |
| 369 | mpz_set (pp, new_pp); |
| 370 | |
| 371 | rawfactor[i].multiplicity++; |
| 372 | } |
| 373 | |
| 374 | mod_bits = mpz_sizeinbase (pp, 2); |
| 375 | |
| 376 | mpz_set (pp_norm, pp); |
| 377 | while (mpz_sizeinbase (pp_norm, 2) < numb_bits) |
| 378 | mpz_add (pp_norm, pp_norm, pp_norm); |
| 379 | |
| 380 | mpz_preinv_invert (pp_inverted, pp_norm, numb_bits); |
| 381 | |
| 382 | mpz_clear (new_pp); |
| 383 | } |
| 384 | |
| 385 | /* start the factor array */ |
| 386 | for (i = 0; i < nrawfactor; i++) |
| 387 | { |
| 388 | int j; |
| 389 | assert (nfactor < factor_alloc); |
| 390 | factor[nfactor].divisor = 1; |
| 391 | for (j = 0; j < rawfactor[i].multiplicity; j++) |
| 392 | factor[nfactor].divisor *= rawfactor[i].divisor; |
| 393 | nfactor++; |
| 394 | } |
| 395 | |
| 396 | combine: |
| 397 | /* Combine entries in the factor array. Combine the smallest entry with |
| 398 | the biggest one that will fit with it (ie. under max_divisor), then |
| 399 | repeat that with the new smallest entry. */ |
| 400 | qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor); |
| 401 | for (i = nfactor-1; i >= 1; i--) |
| 402 | { |
| 403 | if (factor[i].divisor <= max_divisor / factor[0].divisor) |
| 404 | { |
| 405 | factor[0].divisor *= factor[i].divisor; |
| 406 | COLLAPSE_ELEMENT (factor, i, nfactor); |
| 407 | goto combine; |
| 408 | } |
| 409 | } |
| 410 | |
| 411 | total_fraction = 1.0; |
| 412 | for (i = 0; i < nfactor; i++) |
| 413 | { |
| 414 | mpz_init (factor[i].inverse); |
| 415 | mpz_invert_ui_2exp (factor[i].inverse, |
| 416 | (unsigned long) factor[i].divisor, |
| 417 | (unsigned long) mod_bits); |
| 418 | |
| 419 | mpz_init (factor[i].mask); |
| 420 | square_mask (factor[i].mask, factor[i].divisor); |
| 421 | |
| 422 | /* fraction of possible squares */ |
| 423 | factor[i].fraction = (double) mpz_popcount (factor[i].mask) |
| 424 | / factor[i].divisor; |
| 425 | |
| 426 | /* total fraction of possible squares */ |
| 427 | total_fraction *= factor[i].fraction; |
| 428 | } |
| 429 | |
| 430 | /* best tests first (ie. smallest fraction) */ |
| 431 | qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction); |
| 432 | } |
| 433 | |
| 434 | void |
| 435 | print (int limb_bits, int nail_bits) |
| 436 | { |
| 437 | int i; |
| 438 | mpz_t mhi, mlo; |
| 439 | |
| 440 | printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */\n"); |
| 441 | printf ("\n"); |
| 442 | |
| 443 | printf ("#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %d\n", |
| 444 | limb_bits, nail_bits); |
| 445 | printf ("Error, error, this data is for %d bit limb and %d bit nail\n", |
| 446 | limb_bits, nail_bits); |
| 447 | printf ("#endif\n"); |
| 448 | printf ("\n"); |
| 449 | |
| 450 | printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.\n"); |
| 451 | printf (" This test identifies %.2f%% as non-squares (%d/256). */\n", |
| 452 | (1.0 - sq_res_0x100_fraction) * 100.0, |
| 453 | 0x100 - sq_res_0x100_num); |
| 454 | printf ("static const mp_limb_t\n"); |
| 455 | printf ("sq_res_0x100[%d] = {\n", nsq_res_0x100); |
| 456 | for (i = 0; i < nsq_res_0x100; i++) |
| 457 | { |
| 458 | printf (" CNST_LIMB(0x"); |
| 459 | mpz_out_str (stdout, 16, sq_res_0x100[i]); |
| 460 | printf ("),\n"); |
| 461 | } |
| 462 | printf ("};\n"); |
| 463 | printf ("\n"); |
| 464 | |
| 465 | if (mpz_sgn (pp) != 0) |
| 466 | { |
| 467 | printf ("/* mpn_mod_34lsub1 not used due to %s */\n", mod34_excuse); |
| 468 | printf ("/* PERFSQR_PP = "); |
| 469 | } |
| 470 | else |
| 471 | printf ("/* 2^%d-1 = ", mod34_bits); |
| 472 | for (i = 0; i < nrawfactor; i++) |
| 473 | { |
| 474 | if (i != 0) |
| 475 | printf (" * "); |
| 476 | printf ("%d", rawfactor[i].divisor); |
| 477 | if (rawfactor[i].multiplicity != 1) |
| 478 | printf ("^%d", rawfactor[i].multiplicity); |
| 479 | } |
| 480 | printf (" %s*/\n", mpz_sgn (pp) == 0 ? "... " : ""); |
| 481 | |
| 482 | printf ("#define PERFSQR_MOD_BITS %d\n", mod_bits); |
| 483 | if (mpz_sgn (pp) != 0) |
| 484 | { |
| 485 | printf ("#define PERFSQR_PP CNST_LIMB(0x"); |
| 486 | mpz_out_str (stdout, 16, pp); |
| 487 | printf (")\n"); |
| 488 | printf ("#define PERFSQR_PP_NORM CNST_LIMB(0x"); |
| 489 | mpz_out_str (stdout, 16, pp_norm); |
| 490 | printf (")\n"); |
| 491 | printf ("#define PERFSQR_PP_INVERTED CNST_LIMB(0x"); |
| 492 | mpz_out_str (stdout, 16, pp_inverted); |
| 493 | printf (")\n"); |
| 494 | } |
| 495 | printf ("\n"); |
| 496 | |
| 497 | mpz_init (mhi); |
| 498 | mpz_init (mlo); |
| 499 | |
| 500 | printf ("/* This test identifies %.2f%% as non-squares. */\n", |
| 501 | (1.0 - total_fraction) * 100.0); |
| 502 | printf ("#define PERFSQR_MOD_TEST(up, usize) \\\n"); |
| 503 | printf (" do { \\\n"); |
| 504 | printf (" mp_limb_t r; \\\n"); |
| 505 | if (mpz_sgn (pp) != 0) |
| 506 | printf (" PERFSQR_MOD_PP (r, up, usize); \\\n"); |
| 507 | else |
| 508 | printf (" PERFSQR_MOD_34 (r, up, usize); \\\n"); |
| 509 | |
| 510 | for (i = 0; i < nfactor; i++) |
| 511 | { |
| 512 | printf (" \\\n"); |
| 513 | printf (" /* %5.2f%% */ \\\n", |
| 514 | (1.0 - factor[i].fraction) * 100.0); |
| 515 | |
| 516 | printf (" PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x", |
| 517 | factor[i].divisor <= limb_bits ? 1 : 2, |
| 518 | factor[i].divisor); |
| 519 | mpz_out_str (stdout, 16, factor[i].inverse); |
| 520 | printf ("), \\\n"); |
| 521 | printf (" CNST_LIMB(0x"); |
| 522 | |
| 523 | if ( factor[i].divisor <= limb_bits) |
| 524 | { |
| 525 | mpz_out_str (stdout, 16, factor[i].mask); |
| 526 | } |
| 527 | else |
| 528 | { |
| 529 | mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits); |
| 530 | mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits); |
| 531 | mpz_out_str (stdout, 16, mhi); |
| 532 | printf ("), CNST_LIMB(0x"); |
| 533 | mpz_out_str (stdout, 16, mlo); |
| 534 | } |
| 535 | printf (")); \\\n"); |
| 536 | } |
| 537 | |
| 538 | printf (" } while (0)\n"); |
| 539 | printf ("\n"); |
| 540 | |
| 541 | printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\n", |
| 542 | (1.0 - (total_fraction * 44.0/256.0)) * 100.0); |
| 543 | printf ("\n"); |
| 544 | |
| 545 | printf ("/* helper for tests/mpz/t-perfsqr.c */\n"); |
| 546 | printf ("#define PERFSQR_DIVISORS { 256,"); |
| 547 | for (i = 0; i < nfactor; i++) |
| 548 | printf (" %d,", factor[i].divisor); |
| 549 | printf (" }\n"); |
| 550 | |
| 551 | |
| 552 | mpz_clear (mhi); |
| 553 | mpz_clear (mlo); |
| 554 | } |
| 555 | |
| 556 | int |
| 557 | main (int argc, char *argv[]) |
| 558 | { |
| 559 | int limb_bits, nail_bits; |
| 560 | |
| 561 | if (argc != 3) |
| 562 | { |
| 563 | fprintf (stderr, "Usage: gen-psqr <limbbits> <nailbits>\n"); |
| 564 | exit (1); |
| 565 | } |
| 566 | |
| 567 | limb_bits = atoi (argv[1]); |
| 568 | nail_bits = atoi (argv[2]); |
| 569 | |
| 570 | if (limb_bits <= 0 |
| 571 | || nail_bits < 0 |
| 572 | || nail_bits >= limb_bits) |
| 573 | { |
| 574 | fprintf (stderr, "Invalid limb/nail bits: %d %d\n", |
| 575 | limb_bits, nail_bits); |
| 576 | exit (1); |
| 577 | } |
| 578 | |
| 579 | generate_sq_res_0x100 (limb_bits); |
| 580 | generate_mod (limb_bits, nail_bits); |
| 581 | |
| 582 | print (limb_bits, nail_bits); |
| 583 | |
| 584 | return 0; |
| 585 | } |