| /* mpz_bin_uiui - compute n over k. |
| |
| Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. |
| |
| Copyright 2010-2012, 2015-2018 Free Software Foundation, Inc. |
| |
| This file is part of the GNU MP Library. |
| |
| The GNU MP Library is free software; you can redistribute it and/or modify |
| it under the terms of either: |
| |
| * the GNU Lesser General Public License as published by the Free |
| Software Foundation; either version 3 of the License, or (at your |
| option) any later version. |
| |
| or |
| |
| * the GNU General Public License as published by the Free Software |
| Foundation; either version 2 of the License, or (at your option) any |
| later version. |
| |
| or both in parallel, as here. |
| |
| The GNU MP Library is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| You should have received copies of the GNU General Public License and the |
| GNU Lesser General Public License along with the GNU MP Library. If not, |
| see https://www.gnu.org/licenses/. */ |
| |
| #include "gmp-impl.h" |
| #include "longlong.h" |
| |
| #ifndef BIN_GOETGHELUCK_THRESHOLD |
| #define BIN_GOETGHELUCK_THRESHOLD 512 |
| #endif |
| #ifndef BIN_UIUI_ENABLE_SMALLDC |
| #define BIN_UIUI_ENABLE_SMALLDC 1 |
| #endif |
| #ifndef BIN_UIUI_RECURSIVE_SMALLDC |
| #define BIN_UIUI_RECURSIVE_SMALLDC (GMP_NUMB_BITS > 32) |
| #endif |
| |
| /* Algorithm: |
| |
| Accumulate chunks of factors first limb-by-limb (using one of mul0-mul8) |
| which are then accumulated into mpn numbers. The first inner loop |
| accumulates divisor factors, the 2nd inner loop accumulates exactly the same |
| number of dividend factors. We avoid accumulating more for the divisor, |
| even with its smaller factors, since we else cannot guarantee divisibility. |
| |
| Since we know each division will yield an integer, we compute the quotient |
| using Hensel norm: If the quotient is limited by 2^t, we compute A / B mod |
| 2^t. |
| |
| Improvements: |
| |
| (1) An obvious improvement to this code would be to compute mod 2^t |
| everywhere. Unfortunately, we cannot determine t beforehand, unless we |
| invoke some approximation, such as Stirling's formula. Of course, we don't |
| need t to be tight. However, it is not clear that this would help much, |
| our numbers are kept reasonably small already. |
| |
| (2) Compute nmax/kmax semi-accurately, without scalar division or a loop. |
| Extracting the 3 msb, then doing a table lookup using cnt*8+msb as index, |
| would make it both reasonably accurate and fast. (We could use a table |
| stored into a limb, perhaps.) The table should take the removed factors of |
| 2 into account (those done on-the-fly in mulN). |
| |
| (3) The first time in the loop we compute the odd part of a |
| factorial in kp, we might use oddfac_1 for this task. |
| */ |
| |
| /* This threshold determines how large divisor to accumulate before we call |
| bdiv. Perhaps we should never call bdiv, and accumulate all we are told, |
| since we are just basecase code anyway? Presumably, this depends on the |
| relative speed of the asymptotically fast code and this code. */ |
| #define SOME_THRESHOLD 20 |
| |
| /* Multiply-into-limb functions. These remove factors of 2 on-the-fly. FIXME: |
| All versions of MAXFACS don't take this 2 removal into account now, meaning |
| that then, shifting just adds some overhead. (We remove factors from the |
| completed limb anyway.) */ |
| |
| static mp_limb_t |
| mul1 (mp_limb_t m) |
| { |
| return m; |
| } |
| |
| static mp_limb_t |
| mul2 (mp_limb_t m) |
| { |
| /* We need to shift before multiplying, to avoid an overflow. */ |
| mp_limb_t m01 = (m | 1) * ((m + 1) >> 1); |
| return m01; |
| } |
| |
| static mp_limb_t |
| mul3 (mp_limb_t m) |
| { |
| mp_limb_t m01 = (m + 0) * (m + 1) >> 1; |
| mp_limb_t m2 = (m + 2); |
| return m01 * m2; |
| } |
| |
| static mp_limb_t |
| mul4 (mp_limb_t m) |
| { |
| mp_limb_t m03 = (m + 0) * (m + 3) >> 1; |
| return m03 * (m03 + 1); /* mul2 (m03) ? */ |
| } |
| |
| static mp_limb_t |
| mul5 (mp_limb_t m) |
| { |
| mp_limb_t m03 = (m + 0) * (m + 3) >> 1; |
| mp_limb_t m034 = m03 * (m + 4); |
| return (m03 + 1) * m034; |
| } |
| |
| static mp_limb_t |
| mul6 (mp_limb_t m) |
| { |
| mp_limb_t m05 = (m + 0) * (m + 5); |
| mp_limb_t m1234 = (m05 + 5) * (m05 + 5) >> 3; |
| return m1234 * (m05 >> 1); |
| } |
| |
| static mp_limb_t |
| mul7 (mp_limb_t m) |
| { |
| mp_limb_t m05 = (m + 0) * (m + 5); |
| mp_limb_t m1234 = (m05 + 5) * (m05 + 5) >> 3; |
| mp_limb_t m056 = m05 * (m + 6) >> 1; |
| return m1234 * m056; |
| } |
| |
| static mp_limb_t |
| mul8 (mp_limb_t m) |
| { |
| mp_limb_t m07 = (m + 0) * (m + 7); |
| mp_limb_t m0257 = m07 * (m07 + 10) >> 3; |
| mp_limb_t m1346 = m07 + 9 + m0257; |
| return m0257 * m1346; |
| } |
| |
| /* |
| static mp_limb_t |
| mul9 (mp_limb_t m) |
| { |
| return (m + 8) * mul8 (m) ; |
| } |
| |
| static mp_limb_t |
| mul10 (mp_limb_t m) |
| { |
| mp_limb_t m09 = (m + 0) * (m + 9); |
| mp_limb_t m18 = (m09 >> 1) + 4; |
| mp_limb_t m0369 = m09 * (m09 + 18) >> 3; |
| mp_limb_t m2457 = m09 * 2 + 35 + m0369; |
| return ((m0369 * m2457) >> 1) * m18; |
| } |
| */ |
| |
| typedef mp_limb_t (* mulfunc_t) (mp_limb_t); |
| |
| static const mulfunc_t mulfunc[] = {mul1,mul2,mul3,mul4,mul5,mul6,mul7,mul8 /* ,mul9,mul10 */}; |
| #define M (numberof(mulfunc)) |
| |
| /* Number of factors-of-2 removed by the corresponding mulN function. */ |
| static const unsigned char tcnttab[] = {0, 1, 1, 2, 2, 4, 4, 6 /*,6,8*/}; |
| |
| #if 1 |
| /* This variant is inaccurate but share the code with other functions. */ |
| #define MAXFACS(max,l) \ |
| do { \ |
| (max) = log_n_max (l); \ |
| } while (0) |
| #else |
| |
| /* This variant is exact(?) but uses a loop. It takes the 2 removal |
| of mulN into account. */ |
| static const unsigned long ftab[] = |
| #if GMP_NUMB_BITS == 64 |
| /* 1 to 8 factors per iteration */ |
| {CNST_LIMB(0xffffffffffffffff),CNST_LIMB(0x16a09e667),0x32cbfc,0x16a08,0x24c0,0xa11,0x345,0x1ab /*,0xe9,0x8e */}; |
| #endif |
| #if GMP_NUMB_BITS == 32 |
| /* 1 to 7 factors per iteration */ |
| {0xffffffff,0x16a09,0x7ff,0x168,0x6f,0x3d,0x20 /* ,0x17 */}; |
| #endif |
| |
| #define MAXFACS(max,l) \ |
| do { \ |
| int __i; \ |
| for (__i = numberof (ftab) - 1; l > ftab[__i]; __i--) \ |
| ; \ |
| (max) = __i + 1; \ |
| } while (0) |
| #endif |
| |
| /* Entry i contains (i!/2^t)^(-1) where t is chosen such that the parenthesis |
| is an odd integer. */ |
| static const mp_limb_t facinv[] = { ONE_LIMB_ODD_FACTORIAL_INVERSES_TABLE }; |
| |
| static void |
| mpz_bdiv_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) |
| { |
| unsigned nmax, kmax, nmaxnow, numfac; |
| mp_ptr np, kp; |
| mp_size_t nn, kn, alloc; |
| mp_limb_t i, j, t, iii, jjj, cy, dinv; |
| int cnt; |
| mp_size_t maxn; |
| TMP_DECL; |
| |
| ASSERT (k > ODD_FACTORIAL_TABLE_LIMIT); |
| TMP_MARK; |
| |
| maxn = 1 + n / GMP_NUMB_BITS; /* absolutely largest result size (limbs) */ |
| |
| /* FIXME: This allocation might be insufficient, but is usually way too |
| large. */ |
| alloc = SOME_THRESHOLD - 1 + MAX (3 * maxn / 2, SOME_THRESHOLD); |
| alloc = MIN (alloc, (mp_size_t) k) + 1; |
| TMP_ALLOC_LIMBS_2 (np, alloc, kp, SOME_THRESHOLD + 1); |
| |
| MAXFACS (nmax, n); |
| ASSERT (nmax <= M); |
| MAXFACS (kmax, k); |
| ASSERT (kmax <= M); |
| ASSERT (k >= M); |
| |
| i = n - k + 1; |
| |
| np[0] = 1; nn = 1; |
| |
| numfac = 1; |
| j = ODD_FACTORIAL_TABLE_LIMIT + 1; |
| jjj = ODD_FACTORIAL_TABLE_MAX; |
| ASSERT (__gmp_oddfac_table[ODD_FACTORIAL_TABLE_LIMIT] == ODD_FACTORIAL_TABLE_MAX); |
| |
| while (1) |
| { |
| kp[0] = jjj; /* store new factors */ |
| kn = 1; |
| t = k - j + 1; |
| kmax = MIN (kmax, t); |
| |
| while (kmax != 0 && kn < SOME_THRESHOLD) |
| { |
| jjj = mulfunc[kmax - 1] (j); |
| j += kmax; /* number of factors used */ |
| count_trailing_zeros (cnt, jjj); /* count low zeros */ |
| jjj >>= cnt; /* remove remaining low zeros */ |
| cy = mpn_mul_1 (kp, kp, kn, jjj); /* accumulate new factors */ |
| kp[kn] = cy; |
| kn += cy != 0; |
| t = k - j + 1; |
| kmax = MIN (kmax, t); |
| } |
| numfac = j - numfac; |
| |
| while (numfac != 0) |
| { |
| nmaxnow = MIN (nmax, numfac); |
| iii = mulfunc[nmaxnow - 1] (i); |
| i += nmaxnow; /* number of factors used */ |
| count_trailing_zeros (cnt, iii); /* count low zeros */ |
| iii >>= cnt; /* remove remaining low zeros */ |
| cy = mpn_mul_1 (np, np, nn, iii); /* accumulate new factors */ |
| np[nn] = cy; |
| nn += cy != 0; |
| numfac -= nmaxnow; |
| } |
| |
| ASSERT (nn < alloc); |
| |
| binvert_limb (dinv, kp[0]); |
| nn += (np[nn - 1] >= kp[kn - 1]); |
| nn -= kn; |
| mpn_sbpi1_bdiv_q (np, np, nn, kp, MIN(kn,nn), -dinv); |
| mpn_neg (np, np, nn); |
| |
| if (kmax == 0) |
| break; |
| numfac = j; |
| |
| jjj = mulfunc[kmax - 1] (j); |
| j += kmax; /* number of factors used */ |
| count_trailing_zeros (cnt, jjj); /* count low zeros */ |
| jjj >>= cnt; /* remove remaining low zeros */ |
| } |
| |
| /* Put back the right number of factors of 2. */ |
| popc_limb (cnt, n - k); |
| popc_limb (j, k); |
| cnt += j; |
| popc_limb (j, n); |
| cnt -= j; |
| if (cnt != 0) |
| { |
| ASSERT (cnt < GMP_NUMB_BITS); /* can happen, but not for intended use */ |
| cy = mpn_lshift (np, np, nn, cnt); |
| np[nn] = cy; |
| nn += cy != 0; |
| } |
| |
| nn -= np[nn - 1] == 0; /* normalisation */ |
| |
| kp = MPZ_NEWALLOC (r, nn); |
| SIZ(r) = nn; |
| MPN_COPY (kp, np, nn); |
| TMP_FREE; |
| } |
| |
| static void |
| mpz_smallk_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) |
| { |
| unsigned nmax, numfac; |
| mp_ptr rp; |
| mp_size_t rn, alloc; |
| mp_limb_t i, iii, cy; |
| unsigned i2cnt, cnt; |
| |
| MAXFACS (nmax, n); |
| nmax = MIN (nmax, M); |
| |
| i = n - k + 1; |
| |
| i2cnt = __gmp_fac2cnt_table[k / 2 - 1]; /* low zeros count */ |
| if (nmax >= k) |
| { |
| MPZ_NEWALLOC (r, 1) [0] = mulfunc[k - 1] (i) * facinv[k - 2] >> |
| (i2cnt - tcnttab[k - 1]); |
| SIZ(r) = 1; |
| return; |
| } |
| |
| count_leading_zeros (cnt, (mp_limb_t) n); |
| cnt = GMP_LIMB_BITS - cnt; |
| alloc = cnt * k / GMP_NUMB_BITS + 3; /* FIXME: ensure rounding is enough. */ |
| rp = MPZ_NEWALLOC (r, alloc); |
| |
| rp[0] = mulfunc[nmax - 1] (i); |
| rn = 1; |
| i += nmax; /* number of factors used */ |
| i2cnt -= tcnttab[nmax - 1]; /* low zeros count */ |
| numfac = k - nmax; |
| do |
| { |
| nmax = MIN (nmax, numfac); |
| iii = mulfunc[nmax - 1] (i); |
| i += nmax; /* number of factors used */ |
| i2cnt -= tcnttab[nmax - 1]; /* update low zeros count */ |
| cy = mpn_mul_1 (rp, rp, rn, iii); /* accumulate new factors */ |
| rp[rn] = cy; |
| rn += cy != 0; |
| numfac -= nmax; |
| } while (numfac != 0); |
| |
| ASSERT (rn < alloc); |
| |
| mpn_pi1_bdiv_q_1 (rp, rp, rn, __gmp_oddfac_table[k], facinv[k - 2], i2cnt); |
| /* A two-fold, branch-free normalisation is possible :*/ |
| /* rn -= rp[rn - 1] == 0; */ |
| /* rn -= rp[rn - 1] == 0; */ |
| MPN_NORMALIZE_NOT_ZERO (rp, rn); |
| |
| SIZ(r) = rn; |
| } |
| |
| /* Algorithm: |
| |
| Plain and simply multiply things together. |
| |
| We tabulate factorials (k!/2^t)^(-1) mod B (where t is chosen such |
| that k!/2^t is odd). |
| |
| */ |
| |
| static mp_limb_t |
| bc_bin_uiui (unsigned int n, unsigned int k) |
| { |
| return ((__gmp_oddfac_table[n] * facinv[k - 2] * facinv[n - k - 2]) |
| << (__gmp_fac2cnt_table[n / 2 - 1] - __gmp_fac2cnt_table[k / 2 - 1] - __gmp_fac2cnt_table[(n-k) / 2 - 1])) |
| & GMP_NUMB_MASK; |
| } |
| |
| /* Algorithm: |
| |
| Recursively exploit the relation |
| bin(n,k) = bin(n,k>>1)*bin(n-k>>1,k-k>>1)/bin(k,k>>1) . |
| |
| Values for binomial(k,k>>1) that fit in a limb are precomputed |
| (with inverses). |
| */ |
| |
| /* bin2kk[i - ODD_CENTRAL_BINOMIAL_OFFSET] = |
| binomial(i*2,i)/2^t (where t is chosen so that it is odd). */ |
| static const mp_limb_t bin2kk[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_TABLE }; |
| |
| /* bin2kkinv[i] = bin2kk[i]^-1 mod B */ |
| static const mp_limb_t bin2kkinv[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_INVERSE_TABLE }; |
| |
| /* bin2kk[i] = binomial((i+MIN_S)*2,i+MIN_S)/2^t. This table contains the t values. */ |
| static const unsigned char fac2bin[] = { CENTRAL_BINOMIAL_2FAC_TABLE }; |
| |
| static void |
| mpz_smallkdc_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) |
| { |
| mp_ptr rp; |
| mp_size_t rn; |
| unsigned long int hk; |
| |
| hk = k >> 1; |
| |
| if ((! BIN_UIUI_RECURSIVE_SMALLDC) || hk <= ODD_FACTORIAL_TABLE_LIMIT) |
| mpz_smallk_bin_uiui (r, n, hk); |
| else |
| mpz_smallkdc_bin_uiui (r, n, hk); |
| k -= hk; |
| n -= hk; |
| if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) { |
| mp_limb_t cy; |
| rn = SIZ (r); |
| rp = MPZ_REALLOC (r, rn + 1); |
| cy = mpn_mul_1 (rp, rp, rn, bc_bin_uiui (n, k)); |
| rp [rn] = cy; |
| rn += cy != 0; |
| } else { |
| mp_limb_t buffer[ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3]; |
| mpz_t t; |
| |
| ALLOC (t) = ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3; |
| PTR (t) = buffer; |
| if ((! BIN_UIUI_RECURSIVE_SMALLDC) || k <= ODD_FACTORIAL_TABLE_LIMIT) |
| mpz_smallk_bin_uiui (t, n, k); |
| else |
| mpz_smallkdc_bin_uiui (t, n, k); |
| mpz_mul (r, r, t); |
| rp = PTR (r); |
| rn = SIZ (r); |
| } |
| |
| mpn_pi1_bdiv_q_1 (rp, rp, rn, bin2kk[k - ODD_CENTRAL_BINOMIAL_OFFSET], |
| bin2kkinv[k - ODD_CENTRAL_BINOMIAL_OFFSET], |
| fac2bin[k - ODD_CENTRAL_BINOMIAL_OFFSET] - (k != hk)); |
| /* A two-fold, branch-free normalisation is possible :*/ |
| /* rn -= rp[rn - 1] == 0; */ |
| /* rn -= rp[rn - 1] == 0; */ |
| MPN_NORMALIZE_NOT_ZERO (rp, rn); |
| |
| SIZ(r) = rn; |
| } |
| |
| /* mpz_goetgheluck_bin_uiui(RESULT, N, K) -- Set RESULT to binomial(N,K). |
| * |
| * Contributed to the GNU project by Marco Bodrato. |
| * |
| * Implementation of the algorithm by P. Goetgheluck, "Computing |
| * Binomial Coefficients", The American Mathematical Monthly, Vol. 94, |
| * No. 4 (April 1987), pp. 360-365. |
| * |
| * Acknowledgment: Peter Luschny did spot the slowness of the previous |
| * code and suggested the reference. |
| */ |
| |
| /* TODO: Remove duplicated constants / macros / static functions... |
| */ |
| |
| /*************************************************************/ |
| /* Section macros: common macros, for swing/fac/bin (&sieve) */ |
| /*************************************************************/ |
| |
| #define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \ |
| if ((PR) > (MAX_PR)) { \ |
| (VEC)[(I)++] = (PR); \ |
| (PR) = 1; \ |
| } |
| |
| #define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \ |
| do { \ |
| if ((PR) > (MAX_PR)) { \ |
| (VEC)[(I)++] = (PR); \ |
| (PR) = (P); \ |
| } else \ |
| (PR) *= (P); \ |
| } while (0) |
| |
| #define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \ |
| __max_i = (end); \ |
| \ |
| do { \ |
| ++__i; \ |
| if (((sieve)[__index] & __mask) == 0) \ |
| { \ |
| mp_limb_t prime; \ |
| prime = id_to_n(__i) |
| |
| #define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \ |
| do { \ |
| mp_limb_t __mask, __index, __max_i, __i; \ |
| \ |
| __i = (start)-(off); \ |
| __index = __i / GMP_LIMB_BITS; \ |
| __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \ |
| __i += (off); \ |
| \ |
| LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) |
| |
| #define LOOP_ON_SIEVE_STOP \ |
| } \ |
| __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \ |
| __index += __mask & 1; \ |
| } while (__i <= __max_i) |
| |
| #define LOOP_ON_SIEVE_END \ |
| LOOP_ON_SIEVE_STOP; \ |
| } while (0) |
| |
| /*********************************************************/ |
| /* Section sieve: sieving functions and tools for primes */ |
| /*********************************************************/ |
| |
| #if WANT_ASSERT |
| static mp_limb_t |
| bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; } |
| #endif |
| |
| /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/ |
| static mp_limb_t |
| id_to_n (mp_limb_t id) { return id*3+1+(id&1); } |
| |
| /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */ |
| static mp_limb_t |
| n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; } |
| |
| static mp_size_t |
| primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; } |
| |
| /*********************************************************/ |
| /* Section binomial: fast binomial implementation */ |
| /*********************************************************/ |
| |
| #define COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I) \ |
| do { \ |
| mp_limb_t __a, __b, __prime, __ma,__mb; \ |
| __prime = (P); \ |
| __a = (N); __b = (K); __mb = 0; \ |
| FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \ |
| do { \ |
| __mb += __b % __prime; __b /= __prime; \ |
| __ma = __a % __prime; __a /= __prime; \ |
| if (__ma < __mb) { \ |
| __mb = 1; (PR) *= __prime; \ |
| } else __mb = 0; \ |
| } while (__a >= __prime); \ |
| } while (0) |
| |
| #define SH_COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I) \ |
| do { \ |
| mp_limb_t __prime; \ |
| __prime = (P); \ |
| if (((N) % __prime) < ((K) % __prime)) { \ |
| FACTOR_LIST_STORE (__prime, PR, MAX_PR, VEC, I); \ |
| } \ |
| } while (0) |
| |
| /* Returns an approximation of the sqare root of x. |
| * It gives: |
| * limb_apprsqrt (x) ^ 2 <= x < (limb_apprsqrt (x)+1) ^ 2 |
| * or |
| * x <= limb_apprsqrt (x) ^ 2 <= x * 9/8 |
| */ |
| static mp_limb_t |
| limb_apprsqrt (mp_limb_t x) |
| { |
| int s; |
| |
| ASSERT (x > 2); |
| count_leading_zeros (s, x); |
| s = (GMP_LIMB_BITS - s) >> 1; |
| return ((CNST_LIMB(1) << s) + (x >> s)) >> 1; |
| } |
| |
| static void |
| mpz_goetgheluck_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) |
| { |
| mp_limb_t *sieve, *factors, count; |
| mp_limb_t prod, max_prod; |
| mp_size_t j; |
| TMP_DECL; |
| |
| ASSERT (BIN_GOETGHELUCK_THRESHOLD >= 13); |
| ASSERT (n >= 25); |
| |
| TMP_MARK; |
| sieve = TMP_ALLOC_LIMBS (primesieve_size (n)); |
| |
| count = gmp_primesieve (sieve, n) + 1; |
| factors = TMP_ALLOC_LIMBS (count / log_n_max (n) + 1); |
| |
| max_prod = GMP_NUMB_MAX / n; |
| |
| /* Handle primes = 2, 3 separately. */ |
| popc_limb (count, n - k); |
| popc_limb (j, k); |
| count += j; |
| popc_limb (j, n); |
| count -= j; |
| prod = CNST_LIMB(1) << count; |
| |
| j = 0; |
| COUNT_A_PRIME (3, n, k, prod, max_prod, factors, j); |
| |
| /* Accumulate prime factors from 5 to n/2 */ |
| { |
| mp_limb_t s; |
| |
| s = limb_apprsqrt(n); |
| s = n_to_bit (s); |
| ASSERT (bit_to_n (s+1) * bit_to_n (s+1) > n); |
| ASSERT (s <= n_to_bit (n >> 1)); |
| LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve); |
| COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j); |
| LOOP_ON_SIEVE_STOP; |
| |
| ASSERT (max_prod <= GMP_NUMB_MAX / 2); |
| max_prod <<= 1; |
| |
| LOOP_ON_SIEVE_CONTINUE (prime, n_to_bit (n >> 1),sieve); |
| SH_COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j); |
| LOOP_ON_SIEVE_END; |
| |
| max_prod >>= 1; |
| } |
| |
| /* Store primes from (n-k)+1 to n */ |
| ASSERT (n_to_bit (n - k) < n_to_bit (n)); |
| |
| LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n - k) + 1, n_to_bit (n), 0,sieve); |
| FACTOR_LIST_STORE (prime, prod, max_prod, factors, j); |
| LOOP_ON_SIEVE_END; |
| |
| if (LIKELY (j != 0)) |
| { |
| factors[j++] = prod; |
| mpz_prodlimbs (r, factors, j); |
| } |
| else |
| { |
| MPZ_NEWALLOC (r, 1)[0] = prod; |
| SIZ (r) = 1; |
| } |
| TMP_FREE; |
| } |
| |
| #undef COUNT_A_PRIME |
| #undef SH_COUNT_A_PRIME |
| #undef LOOP_ON_SIEVE_END |
| #undef LOOP_ON_SIEVE_STOP |
| #undef LOOP_ON_SIEVE_BEGIN |
| #undef LOOP_ON_SIEVE_CONTINUE |
| |
| /*********************************************************/ |
| /* End of implementation of Goetgheluck's algorithm */ |
| /*********************************************************/ |
| |
| void |
| mpz_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) |
| { |
| if (UNLIKELY (n < k)) { |
| SIZ (r) = 0; |
| #if BITS_PER_ULONG > GMP_NUMB_BITS |
| } else if (UNLIKELY (n > GMP_NUMB_MAX)) { |
| mpz_t tmp; |
| |
| mpz_init_set_ui (tmp, n); |
| mpz_bin_ui (r, tmp, k); |
| mpz_clear (tmp); |
| #endif |
| } else { |
| ASSERT (n <= GMP_NUMB_MAX); |
| /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. */ |
| k = MIN (k, n - k); |
| if (k < 2) { |
| MPZ_NEWALLOC (r, 1)[0] = k ? n : 1; /* 1 + ((-k) & (n-1)); */ |
| SIZ(r) = 1; |
| } else if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) { /* k >= 2, n >= 4 */ |
| MPZ_NEWALLOC (r, 1)[0] = bc_bin_uiui (n, k); |
| SIZ(r) = 1; |
| } else if (k <= ODD_FACTORIAL_TABLE_LIMIT) |
| mpz_smallk_bin_uiui (r, n, k); |
| else if (BIN_UIUI_ENABLE_SMALLDC && |
| k <= (BIN_UIUI_RECURSIVE_SMALLDC ? ODD_CENTRAL_BINOMIAL_TABLE_LIMIT : ODD_FACTORIAL_TABLE_LIMIT)* 2) |
| mpz_smallkdc_bin_uiui (r, n, k); |
| else if (ABOVE_THRESHOLD (k, BIN_GOETGHELUCK_THRESHOLD) && |
| k > (n >> 4)) /* k > ODD_FACTORIAL_TABLE_LIMIT */ |
| mpz_goetgheluck_bin_uiui (r, n, k); |
| else |
| mpz_bdiv_bin_uiui (r, n, k); |
| } |
| } |