Austin Schuh | dace2a6 | 2020-08-18 10:56:48 -0700 | [diff] [blame] | 1 | /* gen-trialdivtab.c |
| 2 | |
| 3 | Contributed to the GNU project by Torbjorn Granlund. |
| 4 | |
| 5 | Copyright 2009, 2012, 2013, 2016, 2018 Free Software Foundation, Inc. |
| 6 | |
| 7 | This file is part of the GNU MP Library. |
| 8 | |
| 9 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 10 | it under the terms of either: |
| 11 | |
| 12 | * the GNU Lesser General Public License as published by the Free |
| 13 | Software Foundation; either version 3 of the License, or (at your |
| 14 | option) any later version. |
| 15 | |
| 16 | or |
| 17 | |
| 18 | * the GNU General Public License as published by the Free Software |
| 19 | Foundation; either version 2 of the License, or (at your option) any |
| 20 | later version. |
| 21 | |
| 22 | or both in parallel, as here. |
| 23 | |
| 24 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 25 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 26 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 27 | for more details. |
| 28 | |
| 29 | You should have received copies of the GNU General Public License and the |
| 30 | GNU Lesser General Public License along with the GNU MP Library. If not, |
| 31 | see https://www.gnu.org/licenses/. */ |
| 32 | |
| 33 | /* |
| 34 | Generate tables for fast, division-free trial division for GMP. |
| 35 | |
| 36 | There is one main table, ptab. It contains primes, multiplied together, and |
| 37 | several types of pre-computed inverses. It refers to tables of the type |
| 38 | dtab, via the last two indices. That table contains the individual primes in |
| 39 | the range, except that the primes are not actually included in the table (see |
| 40 | the P macro; it sneakingly excludes the primes themselves). Instead, the |
| 41 | dtab tables contains tuples for each prime (modular-inverse, limit) used for |
| 42 | divisibility checks. |
| 43 | |
| 44 | This interface is not intended for division of very many primes, since then |
| 45 | other algorithms apply. |
| 46 | */ |
| 47 | |
| 48 | #include <stdlib.h> |
| 49 | #include <stdio.h> |
| 50 | #include "bootstrap.c" |
| 51 | |
| 52 | int sumspills (mpz_t, mpz_t *, int); |
| 53 | void mpn_mod_1s_4p_cps (mpz_t [7], mpz_t); |
| 54 | |
| 55 | int limb_bits; |
| 56 | |
| 57 | mpz_t B; |
| 58 | |
| 59 | int |
| 60 | main (int argc, char *argv[]) |
| 61 | { |
| 62 | int t, p; |
| 63 | mpz_t ppp, acc, inv, gmp_numb_max, tmp, Bhalf; |
| 64 | mpz_t pre[7]; |
| 65 | int i; |
| 66 | int start_p, end_p, interval_start, interval_end, omitted_p; |
| 67 | const char *endtok; |
| 68 | int stop; |
| 69 | int np, start_idx; |
| 70 | |
| 71 | if (argc < 2) |
| 72 | { |
| 73 | fprintf (stderr, "usage: %s bits endprime\n", argv[0]); |
| 74 | exit (1); |
| 75 | } |
| 76 | |
| 77 | limb_bits = atoi (argv[1]); |
| 78 | |
| 79 | end_p = 1290; /* default end prime */ |
| 80 | if (argc == 3) |
| 81 | end_p = atoi (argv[2]); |
| 82 | |
| 83 | printf ("#if GMP_LIMB_BITS != %d\n", limb_bits); |
| 84 | printf ("#error This table is for GMP_LIMB_BITS = %d\n", limb_bits); |
| 85 | printf ("#endif\n\n"); |
| 86 | |
| 87 | printf ("#if GMP_NAIL_BITS != 0\n"); |
| 88 | printf ("#error This table does not support nails\n"); |
| 89 | printf ("#endif\n\n"); |
| 90 | |
| 91 | for (i = 0; i < 7; i++) |
| 92 | mpz_init (pre[i]); |
| 93 | |
| 94 | mpz_init (B); |
| 95 | mpz_setbit (B, limb_bits); |
| 96 | mpz_init_set (gmp_numb_max, B); |
| 97 | mpz_sub_ui (gmp_numb_max, gmp_numb_max, 1); |
| 98 | |
| 99 | mpz_init (tmp); |
| 100 | mpz_init (inv); |
| 101 | |
| 102 | mpz_init (Bhalf); |
| 103 | mpz_setbit (Bhalf, limb_bits - 1); |
| 104 | |
| 105 | start_p = 3; |
| 106 | |
| 107 | mpz_init_set_ui (ppp, 1); |
| 108 | mpz_init (acc); |
| 109 | interval_start = start_p; |
| 110 | omitted_p = 3; |
| 111 | interval_end = 0; |
| 112 | |
| 113 | /* printf ("static struct gmp_primes_dtab gmp_primes_dtab[] = {\n"); */ |
| 114 | |
| 115 | printf ("#ifdef WANT_dtab\n"); |
| 116 | |
| 117 | for (t = start_p; t <= end_p; t += 2) |
| 118 | { |
| 119 | if (! isprime (t)) |
| 120 | continue; |
| 121 | |
| 122 | mpz_mul_ui (acc, ppp, t); |
| 123 | stop = mpz_cmp (acc, Bhalf) >= 0; |
| 124 | if (!stop) |
| 125 | { |
| 126 | mpn_mod_1s_4p_cps (pre, acc); |
| 127 | stop = sumspills (acc, pre + 2, 5); |
| 128 | } |
| 129 | |
| 130 | if (stop) |
| 131 | { |
| 132 | for (p = interval_start; p <= interval_end; p += 2) |
| 133 | { |
| 134 | if (! isprime (p)) |
| 135 | continue; |
| 136 | |
| 137 | printf (" P(%d,", (int) p); |
| 138 | mpz_invert_ui_2exp (inv, p, limb_bits); |
| 139 | printf ("CNST_LIMB(0x"); mpz_out_str (stdout, 16, inv); printf ("),"); |
| 140 | |
| 141 | mpz_tdiv_q_ui (tmp, gmp_numb_max, p); |
| 142 | printf ("CNST_LIMB(0x"); mpz_out_str (stdout, 16, tmp); |
| 143 | printf (")),\n"); |
| 144 | } |
| 145 | mpz_set_ui (ppp, t); |
| 146 | interval_start = t; |
| 147 | omitted_p = t; |
| 148 | } |
| 149 | else |
| 150 | { |
| 151 | mpz_set (ppp, acc); |
| 152 | } |
| 153 | interval_end = t; |
| 154 | } |
| 155 | printf ("#define SMALLEST_OMITTED_PRIME %d\n", (int) omitted_p); |
| 156 | printf ("#endif\n"); |
| 157 | |
| 158 | printf ("#ifdef WANT_ptab\n"); |
| 159 | |
| 160 | /* printf ("static struct gmp_primes_ptab gmp_primes_ptab[] = {\n"); */ |
| 161 | |
| 162 | endtok = ""; |
| 163 | |
| 164 | mpz_set_ui (ppp, 1); |
| 165 | interval_start = start_p; |
| 166 | interval_end = 0; |
| 167 | np = 0; |
| 168 | start_idx = 0; |
| 169 | for (t = start_p; t <= end_p; t += 2) |
| 170 | { |
| 171 | if (! isprime (t)) |
| 172 | continue; |
| 173 | |
| 174 | mpz_mul_ui (acc, ppp, t); |
| 175 | |
| 176 | stop = mpz_cmp (acc, Bhalf) >= 0; |
| 177 | if (!stop) |
| 178 | { |
| 179 | mpn_mod_1s_4p_cps (pre, acc); |
| 180 | stop = sumspills (acc, pre + 2, 5); |
| 181 | } |
| 182 | |
| 183 | if (stop) |
| 184 | { |
| 185 | mpn_mod_1s_4p_cps (pre, ppp); |
| 186 | printf ("%s", endtok); |
| 187 | printf (" {CNST_LIMB(0x"); mpz_out_str (stdout, 16, ppp); |
| 188 | printf ("),{CNST_LIMB(0x"); mpz_out_str (stdout, 16, pre[0]); |
| 189 | printf ("),%d", (int) PTR(pre[1])[0]); |
| 190 | for (i = 0; i < 5; i++) |
| 191 | { |
| 192 | printf (","); |
| 193 | printf ("CNST_LIMB(0x"); mpz_out_str (stdout, 16, pre[2 + i]); |
| 194 | printf (")"); |
| 195 | } |
| 196 | printf ("},"); |
| 197 | printf ("%d,", start_idx); |
| 198 | printf ("%d}", np - start_idx); |
| 199 | |
| 200 | endtok = ",\n"; |
| 201 | mpz_set_ui (ppp, t); |
| 202 | interval_start = t; |
| 203 | start_idx = np; |
| 204 | } |
| 205 | else |
| 206 | { |
| 207 | mpz_set (ppp, acc); |
| 208 | } |
| 209 | interval_end = t; |
| 210 | np++; |
| 211 | } |
| 212 | |
| 213 | printf ("\n"); |
| 214 | printf ("#endif\n"); |
| 215 | |
| 216 | return 0; |
| 217 | } |
| 218 | |
| 219 | unsigned long |
| 220 | mpz_log2 (mpz_t x) |
| 221 | { |
| 222 | return mpz_sgn (x) ? mpz_sizeinbase (x, 2) : 0; |
| 223 | } |
| 224 | |
| 225 | void |
| 226 | mpn_mod_1s_4p_cps (mpz_t cps[7], mpz_t bparm) |
| 227 | { |
| 228 | mpz_t b, bi; |
| 229 | mpz_t B1modb, B2modb, B3modb, B4modb, B5modb; |
| 230 | mpz_t t; |
| 231 | int cnt; |
| 232 | |
| 233 | mpz_init_set (b, bparm); |
| 234 | |
| 235 | cnt = limb_bits - mpz_log2 (b); |
| 236 | |
| 237 | mpz_init (bi); |
| 238 | mpz_init (t); |
| 239 | mpz_init (B1modb); |
| 240 | mpz_init (B2modb); |
| 241 | mpz_init (B3modb); |
| 242 | mpz_init (B4modb); |
| 243 | mpz_init (B5modb); |
| 244 | |
| 245 | mpz_set_ui (t, 1); |
| 246 | mpz_mul_2exp (t, t, limb_bits - cnt); |
| 247 | mpz_sub (t, t, b); |
| 248 | mpz_mul_2exp (t, t, limb_bits); |
| 249 | mpz_tdiv_q (bi, t, b); /* bi = B^2/b, except msb */ |
| 250 | |
| 251 | mpz_set_ui (t, 1); |
| 252 | mpz_mul_2exp (t, t, limb_bits); /* t = B */ |
| 253 | mpz_tdiv_r (B1modb, t, b); |
| 254 | |
| 255 | mpz_mul_2exp (t, B1modb, limb_bits); |
| 256 | mpz_tdiv_r (B2modb, t, b); |
| 257 | |
| 258 | mpz_mul_2exp (t, B2modb, limb_bits); |
| 259 | mpz_tdiv_r (B3modb, t, b); |
| 260 | |
| 261 | mpz_mul_2exp (t, B3modb, limb_bits); |
| 262 | mpz_tdiv_r (B4modb, t, b); |
| 263 | |
| 264 | mpz_mul_2exp (t, B4modb, limb_bits); |
| 265 | mpz_tdiv_r (B5modb, t, b); |
| 266 | |
| 267 | mpz_set (cps[0], bi); |
| 268 | mpz_set_ui (cps[1], cnt); |
| 269 | mpz_tdiv_q_2exp (cps[2], B1modb, 0); |
| 270 | mpz_tdiv_q_2exp (cps[3], B2modb, 0); |
| 271 | mpz_tdiv_q_2exp (cps[4], B3modb, 0); |
| 272 | mpz_tdiv_q_2exp (cps[5], B4modb, 0); |
| 273 | mpz_tdiv_q_2exp (cps[6], B5modb, 0); |
| 274 | |
| 275 | mpz_clear (b); |
| 276 | mpz_clear (bi); |
| 277 | mpz_clear (t); |
| 278 | mpz_clear (B1modb); |
| 279 | mpz_clear (B2modb); |
| 280 | mpz_clear (B3modb); |
| 281 | mpz_clear (B4modb); |
| 282 | mpz_clear (B5modb); |
| 283 | } |
| 284 | |
| 285 | int |
| 286 | sumspills (mpz_t ppp, mpz_t *a, int n) |
| 287 | { |
| 288 | mpz_t s; |
| 289 | int i, ret; |
| 290 | |
| 291 | mpz_init_set (s, a[0]); |
| 292 | |
| 293 | for (i = 1; i < n; i++) |
| 294 | { |
| 295 | mpz_add (s, s, a[i]); |
| 296 | } |
| 297 | ret = mpz_cmp (s, B) >= 0; |
| 298 | mpz_clear (s); |
| 299 | |
| 300 | return ret; |
| 301 | } |