Austin Schuh | bb1338c | 2024-06-15 19:31:16 -0700 | [diff] [blame] | 1 | /* primesieve (BIT_ARRAY, N) -- Fills the BIT_ARRAY with a mask for primes up to N. |
| 2 | |
| 3 | Contributed to the GNU project by Marco Bodrato. |
| 4 | |
| 5 | THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. |
| 6 | IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. |
| 7 | IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR |
| 8 | DISAPPEAR IN A FUTURE GNU MP RELEASE. |
| 9 | |
| 10 | Copyright 2010-2012, 2015, 2016 Free Software Foundation, Inc. |
| 11 | |
| 12 | This file is part of the GNU MP Library. |
| 13 | |
| 14 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 15 | it under the terms of either: |
| 16 | |
| 17 | * the GNU Lesser General Public License as published by the Free |
| 18 | Software Foundation; either version 3 of the License, or (at your |
| 19 | option) any later version. |
| 20 | |
| 21 | or |
| 22 | |
| 23 | * the GNU General Public License as published by the Free Software |
| 24 | Foundation; either version 2 of the License, or (at your option) any |
| 25 | later version. |
| 26 | |
| 27 | or both in parallel, as here. |
| 28 | |
| 29 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 30 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 31 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 32 | for more details. |
| 33 | |
| 34 | You should have received copies of the GNU General Public License and the |
| 35 | GNU Lesser General Public License along with the GNU MP Library. If not, |
| 36 | see https://www.gnu.org/licenses/. */ |
| 37 | |
| 38 | #include "gmp-impl.h" |
| 39 | |
| 40 | #if 0 |
| 41 | static mp_limb_t |
| 42 | bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; } |
| 43 | #endif |
| 44 | |
| 45 | /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/ |
| 46 | static mp_limb_t |
| 47 | id_to_n (mp_limb_t id) { return id*3+1+(id&1); } |
| 48 | |
| 49 | /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */ |
| 50 | static mp_limb_t |
| 51 | n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; } |
| 52 | |
| 53 | #if 0 |
| 54 | static mp_size_t |
| 55 | primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; } |
| 56 | #endif |
| 57 | |
| 58 | #if GMP_LIMB_BITS > 61 |
| 59 | #define SIEVE_SEED CNST_LIMB(0x3294C9E069128480) |
| 60 | #if GMP_LIMB_BITS == 64 |
| 61 | /* 110bits pre-sieved mask for primes 5, 11*/ |
| 62 | #define SIEVE_MASK1 CNST_LIMB(0x81214a1204892058) |
| 63 | #define SIEVE_MASKT CNST_LIMB(0xc8130681244) |
| 64 | /* 182bits pre-sieved mask for primes 7, 13*/ |
| 65 | #define SIEVE_2MSK1 CNST_LIMB(0x9402180c40230184) |
| 66 | #define SIEVE_2MSK2 CNST_LIMB(0x0285021088402120) |
| 67 | #define SIEVE_2MSKT CNST_LIMB(0xa41210084421) |
| 68 | #define SEED_LIMIT 210 |
| 69 | #else |
| 70 | #define SEED_LIMIT 202 |
| 71 | #endif |
| 72 | #else |
| 73 | #if GMP_LIMB_BITS > 30 |
| 74 | #define SIEVE_SEED CNST_LIMB(0x69128480) |
| 75 | #if GMP_LIMB_BITS == 32 |
| 76 | /* 70bits pre-sieved mask for primes 5, 7*/ |
| 77 | #define SIEVE_MASK1 CNST_LIMB(0x12148960) |
| 78 | #define SIEVE_MASK2 CNST_LIMB(0x44a120cc) |
| 79 | #define SIEVE_MASKT CNST_LIMB(0x1a) |
| 80 | #define SEED_LIMIT 120 |
| 81 | #else |
| 82 | #define SEED_LIMIT 114 |
| 83 | #endif |
| 84 | #else |
| 85 | #if GMP_LIMB_BITS > 15 |
| 86 | #define SIEVE_SEED CNST_LIMB(0x8480) |
| 87 | #define SEED_LIMIT 54 |
| 88 | #else |
| 89 | #if GMP_LIMB_BITS > 7 |
| 90 | #define SIEVE_SEED CNST_LIMB(0x80) |
| 91 | #define SEED_LIMIT 34 |
| 92 | #else |
| 93 | #define SIEVE_SEED CNST_LIMB(0x0) |
| 94 | #define SEED_LIMIT 24 |
| 95 | #endif /* 7 */ |
| 96 | #endif /* 15 */ |
| 97 | #endif /* 30 */ |
| 98 | #endif /* 61 */ |
| 99 | |
| 100 | #define SET_OFF1(m1, m2, M1, M2, off, BITS) \ |
| 101 | if (off) { \ |
| 102 | if (off < GMP_LIMB_BITS) { \ |
| 103 | m1 = (M1 >> off) | (M2 << (GMP_LIMB_BITS - off)); \ |
| 104 | if (off <= BITS - GMP_LIMB_BITS) { \ |
| 105 | m2 = M1 << (BITS - GMP_LIMB_BITS - off) \ |
| 106 | | M2 >> off; \ |
| 107 | } else { \ |
| 108 | m1 |= M1 << (BITS - off); \ |
| 109 | m2 = M1 >> (off + GMP_LIMB_BITS - BITS); \ |
| 110 | } \ |
| 111 | } else { \ |
| 112 | m1 = M1 << (BITS - off) \ |
| 113 | | M2 >> (off - GMP_LIMB_BITS); \ |
| 114 | m2 = M2 << (BITS - off) \ |
| 115 | | M1 >> (off + GMP_LIMB_BITS - BITS); \ |
| 116 | } \ |
| 117 | } else { \ |
| 118 | m1 = M1; m2 = M2; \ |
| 119 | } |
| 120 | |
| 121 | #define SET_OFF2(m1, m2, m3, M1, M2, M3, off, BITS) \ |
| 122 | if (off) { \ |
| 123 | if (off <= GMP_LIMB_BITS) { \ |
| 124 | m1 = M2 << (GMP_LIMB_BITS - off); \ |
| 125 | m2 = M3 << (GMP_LIMB_BITS - off); \ |
| 126 | if (off != GMP_LIMB_BITS) { \ |
| 127 | m1 |= (M1 >> off); \ |
| 128 | m2 |= (M2 >> off); \ |
| 129 | } \ |
| 130 | if (off <= BITS - 2 * GMP_LIMB_BITS) { \ |
| 131 | m3 = M1 << (BITS - 2 * GMP_LIMB_BITS - off) \ |
| 132 | | M3 >> off; \ |
| 133 | } else { \ |
| 134 | m2 |= M1 << (BITS - GMP_LIMB_BITS - off); \ |
| 135 | m3 = M1 >> (off + 2 * GMP_LIMB_BITS - BITS); \ |
| 136 | } \ |
| 137 | } else if (off < 2 *GMP_LIMB_BITS) { \ |
| 138 | m1 = M2 >> (off - GMP_LIMB_BITS) \ |
| 139 | | M3 << (2 * GMP_LIMB_BITS - off); \ |
| 140 | if (off <= BITS - GMP_LIMB_BITS) { \ |
| 141 | m2 = M3 >> (off - GMP_LIMB_BITS) \ |
| 142 | | M1 << (BITS - GMP_LIMB_BITS - off); \ |
| 143 | m3 = M2 << (BITS - GMP_LIMB_BITS - off); \ |
| 144 | if (off != BITS - GMP_LIMB_BITS) { \ |
| 145 | m3 |= M1 >> (off + 2 * GMP_LIMB_BITS - BITS); \ |
| 146 | } \ |
| 147 | } else { \ |
| 148 | m1 |= M1 << (BITS - off); \ |
| 149 | m2 = M2 << (BITS - off) \ |
| 150 | | M1 >> (GMP_LIMB_BITS - BITS + off); \ |
| 151 | m3 = M2 >> (GMP_LIMB_BITS - BITS + off); \ |
| 152 | } \ |
| 153 | } else { \ |
| 154 | m1 = M1 << (BITS - off) \ |
| 155 | | M3 >> (off - 2 * GMP_LIMB_BITS); \ |
| 156 | m2 = M2 << (BITS - off) \ |
| 157 | | M1 >> (off + GMP_LIMB_BITS - BITS); \ |
| 158 | m3 = M3 << (BITS - off) \ |
| 159 | | M2 >> (off + GMP_LIMB_BITS - BITS); \ |
| 160 | } \ |
| 161 | } else { \ |
| 162 | m1 = M1; m2 = M2; m3 = M3; \ |
| 163 | } |
| 164 | |
| 165 | #define ROTATE1(m1, m2, BITS) \ |
| 166 | do { \ |
| 167 | mp_limb_t __tmp; \ |
| 168 | __tmp = m1 >> (2 * GMP_LIMB_BITS - BITS); \ |
| 169 | m1 = (m1 << (BITS - GMP_LIMB_BITS)) | m2; \ |
| 170 | m2 = __tmp; \ |
| 171 | } while (0) |
| 172 | |
| 173 | #define ROTATE2(m1, m2, m3, BITS) \ |
| 174 | do { \ |
| 175 | mp_limb_t __tmp; \ |
| 176 | __tmp = m2 >> (3 * GMP_LIMB_BITS - BITS); \ |
| 177 | m2 = m2 << (BITS - GMP_LIMB_BITS * 2) \ |
| 178 | | m1 >> (3 * GMP_LIMB_BITS - BITS); \ |
| 179 | m1 = m1 << (BITS - GMP_LIMB_BITS * 2) | m3; \ |
| 180 | m3 = __tmp; \ |
| 181 | } while (0) |
| 182 | |
| 183 | static mp_limb_t |
| 184 | fill_bitpattern (mp_ptr bit_array, mp_size_t limbs, mp_limb_t offset) |
| 185 | { |
| 186 | #ifdef SIEVE_2MSK2 |
| 187 | mp_limb_t m11, m12, m21, m22, m23; |
| 188 | |
| 189 | if (offset == 0) { /* This branch is not needed. */ |
| 190 | m11 = SIEVE_MASK1; |
| 191 | m12 = SIEVE_MASKT; |
| 192 | m21 = SIEVE_2MSK1; |
| 193 | m22 = SIEVE_2MSK2; |
| 194 | m23 = SIEVE_2MSKT; |
| 195 | } else { /* correctly handle offset == 0... */ |
| 196 | m21 = offset % 110; |
| 197 | SET_OFF1 (m11, m12, SIEVE_MASK1, SIEVE_MASKT, m21, 110); |
| 198 | offset %= 182; |
| 199 | SET_OFF2 (m21, m22, m23, SIEVE_2MSK1, SIEVE_2MSK2, SIEVE_2MSKT, offset, 182); |
| 200 | } |
| 201 | /* THINK: Consider handling odd values of 'limbs' outside the loop, |
| 202 | to have a single exit condition. */ |
| 203 | do { |
| 204 | bit_array[0] = m11 | m21; |
| 205 | if (--limbs == 0) |
| 206 | break; |
| 207 | ROTATE1 (m11, m12, 110); |
| 208 | bit_array[1] = m11 | m22; |
| 209 | bit_array += 2; |
| 210 | ROTATE1 (m11, m12, 110); |
| 211 | ROTATE2 (m21, m22, m23, 182); |
| 212 | } while (--limbs != 0); |
| 213 | return 4; |
| 214 | #else |
| 215 | #ifdef SIEVE_MASK2 |
| 216 | mp_limb_t mask, mask2, tail; |
| 217 | |
| 218 | if (offset == 0) { /* This branch is not needed. */ |
| 219 | mask = SIEVE_MASK1; |
| 220 | mask2 = SIEVE_MASK2; |
| 221 | tail = SIEVE_MASKT; |
| 222 | } else { /* correctly handle offset == 0... */ |
| 223 | offset %= 70; |
| 224 | SET_OFF2 (mask, mask2, tail, SIEVE_MASK1, SIEVE_MASK2, SIEVE_MASKT, offset, 70); |
| 225 | } |
| 226 | /* THINK: Consider handling odd values of 'limbs' outside the loop, |
| 227 | to have a single exit condition. */ |
| 228 | do { |
| 229 | bit_array[0] = mask; |
| 230 | if (--limbs == 0) |
| 231 | break; |
| 232 | bit_array[1] = mask2; |
| 233 | bit_array += 2; |
| 234 | ROTATE2 (mask, mask2, tail, 70); |
| 235 | } while (--limbs != 0); |
| 236 | return 2; |
| 237 | #else |
| 238 | MPN_FILL (bit_array, limbs, CNST_LIMB(0)); |
| 239 | return 0; |
| 240 | #endif |
| 241 | #endif |
| 242 | } |
| 243 | |
| 244 | static void |
| 245 | first_block_primesieve (mp_ptr bit_array, mp_limb_t n) |
| 246 | { |
| 247 | mp_size_t bits, limbs; |
| 248 | mp_limb_t i; |
| 249 | |
| 250 | ASSERT (n > 4); |
| 251 | |
| 252 | bits = n_to_bit(n); |
| 253 | limbs = bits / GMP_LIMB_BITS; |
| 254 | |
| 255 | if (limbs != 0) |
| 256 | i = fill_bitpattern (bit_array + 1, limbs, 0); |
| 257 | bit_array[0] = SIEVE_SEED; |
| 258 | |
| 259 | if ((bits + 1) % GMP_LIMB_BITS != 0) |
| 260 | bit_array[limbs] |= MP_LIMB_T_MAX << ((bits + 1) % GMP_LIMB_BITS); |
| 261 | |
| 262 | if (n > SEED_LIMIT) { |
| 263 | mp_limb_t mask, index; |
| 264 | |
| 265 | ASSERT (i < GMP_LIMB_BITS); |
| 266 | |
| 267 | if (n_to_bit (SEED_LIMIT + 1) < GMP_LIMB_BITS) |
| 268 | i = 0; |
| 269 | mask = CNST_LIMB(1) << i; |
| 270 | index = 0; |
| 271 | do { |
| 272 | ++i; |
| 273 | if ((bit_array[index] & mask) == 0) |
| 274 | { |
| 275 | mp_size_t step, lindex; |
| 276 | mp_limb_t lmask; |
| 277 | unsigned maskrot; |
| 278 | |
| 279 | step = id_to_n(i); |
| 280 | /* lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */ |
| 281 | lindex = i*(step+1)-1+(-(i&1)&(i+1)); |
| 282 | /* lindex = i*(step+1+(i&1))-1+(i&1); */ |
| 283 | if (lindex > bits) |
| 284 | break; |
| 285 | |
| 286 | step <<= 1; |
| 287 | maskrot = step % GMP_LIMB_BITS; |
| 288 | |
| 289 | lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS); |
| 290 | do { |
| 291 | bit_array[lindex / GMP_LIMB_BITS] |= lmask; |
| 292 | lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot); |
| 293 | lindex += step; |
| 294 | } while (lindex <= bits); |
| 295 | |
| 296 | /* lindex = n_to_bit(id_to_n(i)*bit_to_n(i)); */ |
| 297 | lindex = i*(i*3+6)+(i&1); |
| 298 | |
| 299 | lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS); |
| 300 | for ( ; lindex <= bits; lindex += step) { |
| 301 | bit_array[lindex / GMP_LIMB_BITS] |= lmask; |
| 302 | lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot); |
| 303 | }; |
| 304 | } |
| 305 | mask = mask << 1 | mask >> (GMP_LIMB_BITS-1); |
| 306 | index += mask & 1; |
| 307 | } while (1); |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | static void |
| 312 | block_resieve (mp_ptr bit_array, mp_size_t limbs, mp_limb_t offset, |
| 313 | mp_srcptr sieve) |
| 314 | { |
| 315 | mp_size_t bits, off = offset; |
| 316 | mp_limb_t mask, index, i; |
| 317 | |
| 318 | ASSERT (limbs > 0); |
| 319 | ASSERT (offset >= GMP_LIMB_BITS); |
| 320 | |
| 321 | bits = limbs * GMP_LIMB_BITS - 1; |
| 322 | |
| 323 | i = fill_bitpattern (bit_array, limbs, offset - GMP_LIMB_BITS); |
| 324 | |
| 325 | ASSERT (i < GMP_LIMB_BITS); |
| 326 | |
| 327 | mask = CNST_LIMB(1) << i; |
| 328 | index = 0; |
| 329 | do { |
| 330 | ++i; |
| 331 | if ((sieve[index] & mask) == 0) |
| 332 | { |
| 333 | mp_size_t step, lindex; |
| 334 | mp_limb_t lmask; |
| 335 | unsigned maskrot; |
| 336 | |
| 337 | step = id_to_n(i); |
| 338 | |
| 339 | /* lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */ |
| 340 | lindex = i*(step+1)-1+(-(i&1)&(i+1)); |
| 341 | /* lindex = i*(step+1+(i&1))-1+(i&1); */ |
| 342 | if (lindex > bits + off) |
| 343 | break; |
| 344 | |
| 345 | step <<= 1; |
| 346 | maskrot = step % GMP_LIMB_BITS; |
| 347 | |
| 348 | if (lindex < off) |
| 349 | lindex += step * ((off - lindex - 1) / step + 1); |
| 350 | |
| 351 | lindex -= off; |
| 352 | |
| 353 | lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS); |
| 354 | for ( ; lindex <= bits; lindex += step) { |
| 355 | bit_array[lindex / GMP_LIMB_BITS] |= lmask; |
| 356 | lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot); |
| 357 | }; |
| 358 | |
| 359 | /* lindex = n_to_bit(id_to_n(i)*bit_to_n(i)); */ |
| 360 | lindex = i*(i*3+6)+(i&1); |
| 361 | |
| 362 | if (lindex < off) |
| 363 | lindex += step * ((off - lindex - 1) / step + 1); |
| 364 | |
| 365 | lindex -= off; |
| 366 | |
| 367 | lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS); |
| 368 | for ( ; lindex <= bits; lindex += step) { |
| 369 | bit_array[lindex / GMP_LIMB_BITS] |= lmask; |
| 370 | lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot); |
| 371 | }; |
| 372 | } |
| 373 | mask = mask << 1 | mask >> (GMP_LIMB_BITS-1); |
| 374 | index += mask & 1; |
| 375 | } while (1); |
| 376 | } |
| 377 | |
| 378 | #define BLOCK_SIZE 2048 |
| 379 | |
| 380 | /* Fills bit_array with the characteristic function of composite |
| 381 | numbers up to the parameter n. I.e. a bit set to "1" represent a |
| 382 | composite, a "0" represent a prime. |
| 383 | |
| 384 | The primesieve_size(n) limbs pointed to by bit_array are |
| 385 | overwritten. The returned value counts prime integers in the |
| 386 | interval [4, n]. Note that n > 4. |
| 387 | |
| 388 | Even numbers and multiples of 3 are excluded "a priori", only |
| 389 | numbers equivalent to +/- 1 mod 6 have their bit in the array. |
| 390 | |
| 391 | Once sieved, if the bit b is ZERO it represent a prime, the |
| 392 | represented prime is bit_to_n(b), if the LSbit is bit 0, or |
| 393 | id_to_n(b), if you call "1" the first bit. |
| 394 | */ |
| 395 | |
| 396 | mp_limb_t |
| 397 | gmp_primesieve (mp_ptr bit_array, mp_limb_t n) |
| 398 | { |
| 399 | mp_size_t size; |
| 400 | mp_limb_t bits; |
| 401 | |
| 402 | ASSERT (n > 4); |
| 403 | |
| 404 | bits = n_to_bit(n); |
| 405 | size = bits / GMP_LIMB_BITS + 1; |
| 406 | |
| 407 | if (size > BLOCK_SIZE * 2) { |
| 408 | mp_size_t off; |
| 409 | off = BLOCK_SIZE + (size % BLOCK_SIZE); |
| 410 | first_block_primesieve (bit_array, id_to_n (off * GMP_LIMB_BITS)); |
| 411 | do { |
| 412 | block_resieve (bit_array + off, BLOCK_SIZE, off * GMP_LIMB_BITS, bit_array); |
| 413 | } while ((off += BLOCK_SIZE) < size); |
| 414 | } else { |
| 415 | first_block_primesieve (bit_array, n); |
| 416 | } |
| 417 | |
| 418 | if ((bits + 1) % GMP_LIMB_BITS != 0) |
| 419 | bit_array[size-1] |= MP_LIMB_T_MAX << ((bits + 1) % GMP_LIMB_BITS); |
| 420 | |
| 421 | return size * GMP_LIMB_BITS - mpn_popcount (bit_array, size); |
| 422 | } |
| 423 | |
| 424 | #undef BLOCK_SIZE |
| 425 | #undef SEED_LIMIT |
| 426 | #undef SIEVE_SEED |
| 427 | #undef SIEVE_MASK1 |
| 428 | #undef SIEVE_MASK2 |
| 429 | #undef SIEVE_MASKT |
| 430 | #undef SIEVE_2MSK1 |
| 431 | #undef SIEVE_2MSK2 |
| 432 | #undef SIEVE_2MSKT |
| 433 | #undef SET_OFF1 |
| 434 | #undef SET_OFF2 |
| 435 | #undef ROTATE1 |
| 436 | #undef ROTATE2 |