blob: b77628fdfc23a4ec112eedd5af9c8983b184c799 [file] [log] [blame]
Austin Schuhbb1338c2024-06-15 19:31:16 -07001/* mpz_bin_uiui - compute n over k.
2
3Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
4
5Copyright 2010-2012, 2015-2018 Free Software Foundation, Inc.
6
7This file is part of the GNU MP Library.
8
9The GNU MP Library is free software; you can redistribute it and/or modify
10it under the terms of either:
11
12 * the GNU Lesser General Public License as published by the Free
13 Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
15
16or
17
18 * the GNU General Public License as published by the Free Software
19 Foundation; either version 2 of the License, or (at your option) any
20 later version.
21
22or both in parallel, as here.
23
24The GNU MP Library is distributed in the hope that it will be useful, but
25WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27for more details.
28
29You should have received copies of the GNU General Public License and the
30GNU Lesser General Public License along with the GNU MP Library. If not,
31see https://www.gnu.org/licenses/. */
32
33#include "gmp-impl.h"
34#include "longlong.h"
35
36#ifndef BIN_GOETGHELUCK_THRESHOLD
37#define BIN_GOETGHELUCK_THRESHOLD 512
38#endif
39#ifndef BIN_UIUI_ENABLE_SMALLDC
40#define BIN_UIUI_ENABLE_SMALLDC 1
41#endif
42#ifndef BIN_UIUI_RECURSIVE_SMALLDC
43#define BIN_UIUI_RECURSIVE_SMALLDC (GMP_NUMB_BITS > 32)
44#endif
45
46/* Algorithm:
47
48 Accumulate chunks of factors first limb-by-limb (using one of mul0-mul8)
49 which are then accumulated into mpn numbers. The first inner loop
50 accumulates divisor factors, the 2nd inner loop accumulates exactly the same
51 number of dividend factors. We avoid accumulating more for the divisor,
52 even with its smaller factors, since we else cannot guarantee divisibility.
53
54 Since we know each division will yield an integer, we compute the quotient
55 using Hensel norm: If the quotient is limited by 2^t, we compute A / B mod
56 2^t.
57
58 Improvements:
59
60 (1) An obvious improvement to this code would be to compute mod 2^t
61 everywhere. Unfortunately, we cannot determine t beforehand, unless we
62 invoke some approximation, such as Stirling's formula. Of course, we don't
63 need t to be tight. However, it is not clear that this would help much,
64 our numbers are kept reasonably small already.
65
66 (2) Compute nmax/kmax semi-accurately, without scalar division or a loop.
67 Extracting the 3 msb, then doing a table lookup using cnt*8+msb as index,
68 would make it both reasonably accurate and fast. (We could use a table
69 stored into a limb, perhaps.) The table should take the removed factors of
70 2 into account (those done on-the-fly in mulN).
71
72 (3) The first time in the loop we compute the odd part of a
73 factorial in kp, we might use oddfac_1 for this task.
74 */
75
76/* This threshold determines how large divisor to accumulate before we call
77 bdiv. Perhaps we should never call bdiv, and accumulate all we are told,
78 since we are just basecase code anyway? Presumably, this depends on the
79 relative speed of the asymptotically fast code and this code. */
80#define SOME_THRESHOLD 20
81
82/* Multiply-into-limb functions. These remove factors of 2 on-the-fly. FIXME:
83 All versions of MAXFACS don't take this 2 removal into account now, meaning
84 that then, shifting just adds some overhead. (We remove factors from the
85 completed limb anyway.) */
86
87static mp_limb_t
88mul1 (mp_limb_t m)
89{
90 return m;
91}
92
93static mp_limb_t
94mul2 (mp_limb_t m)
95{
96 /* We need to shift before multiplying, to avoid an overflow. */
97 mp_limb_t m01 = (m | 1) * ((m + 1) >> 1);
98 return m01;
99}
100
101static mp_limb_t
102mul3 (mp_limb_t m)
103{
104 mp_limb_t m01 = (m + 0) * (m + 1) >> 1;
105 mp_limb_t m2 = (m + 2);
106 return m01 * m2;
107}
108
109static mp_limb_t
110mul4 (mp_limb_t m)
111{
112 mp_limb_t m03 = (m + 0) * (m + 3) >> 1;
113 return m03 * (m03 + 1); /* mul2 (m03) ? */
114}
115
116static mp_limb_t
117mul5 (mp_limb_t m)
118{
119 mp_limb_t m03 = (m + 0) * (m + 3) >> 1;
120 mp_limb_t m034 = m03 * (m + 4);
121 return (m03 + 1) * m034;
122}
123
124static mp_limb_t
125mul6 (mp_limb_t m)
126{
127 mp_limb_t m05 = (m + 0) * (m + 5);
128 mp_limb_t m1234 = (m05 + 5) * (m05 + 5) >> 3;
129 return m1234 * (m05 >> 1);
130}
131
132static mp_limb_t
133mul7 (mp_limb_t m)
134{
135 mp_limb_t m05 = (m + 0) * (m + 5);
136 mp_limb_t m1234 = (m05 + 5) * (m05 + 5) >> 3;
137 mp_limb_t m056 = m05 * (m + 6) >> 1;
138 return m1234 * m056;
139}
140
141static mp_limb_t
142mul8 (mp_limb_t m)
143{
144 mp_limb_t m07 = (m + 0) * (m + 7);
145 mp_limb_t m0257 = m07 * (m07 + 10) >> 3;
146 mp_limb_t m1346 = m07 + 9 + m0257;
147 return m0257 * m1346;
148}
149
150/*
151static mp_limb_t
152mul9 (mp_limb_t m)
153{
154 return (m + 8) * mul8 (m) ;
155}
156
157static mp_limb_t
158mul10 (mp_limb_t m)
159{
160 mp_limb_t m09 = (m + 0) * (m + 9);
161 mp_limb_t m18 = (m09 >> 1) + 4;
162 mp_limb_t m0369 = m09 * (m09 + 18) >> 3;
163 mp_limb_t m2457 = m09 * 2 + 35 + m0369;
164 return ((m0369 * m2457) >> 1) * m18;
165}
166*/
167
168typedef mp_limb_t (* mulfunc_t) (mp_limb_t);
169
170static const mulfunc_t mulfunc[] = {mul1,mul2,mul3,mul4,mul5,mul6,mul7,mul8 /* ,mul9,mul10 */};
171#define M (numberof(mulfunc))
172
173/* Number of factors-of-2 removed by the corresponding mulN function. */
174static const unsigned char tcnttab[] = {0, 1, 1, 2, 2, 4, 4, 6 /*,6,8*/};
175
176#if 1
177/* This variant is inaccurate but share the code with other functions. */
178#define MAXFACS(max,l) \
179 do { \
180 (max) = log_n_max (l); \
181 } while (0)
182#else
183
184/* This variant is exact(?) but uses a loop. It takes the 2 removal
185 of mulN into account. */
186static const unsigned long ftab[] =
187#if GMP_NUMB_BITS == 64
188 /* 1 to 8 factors per iteration */
189 {CNST_LIMB(0xffffffffffffffff),CNST_LIMB(0x16a09e667),0x32cbfc,0x16a08,0x24c0,0xa11,0x345,0x1ab /*,0xe9,0x8e */};
190#endif
191#if GMP_NUMB_BITS == 32
192 /* 1 to 7 factors per iteration */
193 {0xffffffff,0x16a09,0x7ff,0x168,0x6f,0x3d,0x20 /* ,0x17 */};
194#endif
195
196#define MAXFACS(max,l) \
197 do { \
198 int __i; \
199 for (__i = numberof (ftab) - 1; l > ftab[__i]; __i--) \
200 ; \
201 (max) = __i + 1; \
202 } while (0)
203#endif
204
205/* Entry i contains (i!/2^t)^(-1) where t is chosen such that the parenthesis
206 is an odd integer. */
207static const mp_limb_t facinv[] = { ONE_LIMB_ODD_FACTORIAL_INVERSES_TABLE };
208
209static void
210mpz_bdiv_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
211{
212 unsigned nmax, kmax, nmaxnow, numfac;
213 mp_ptr np, kp;
214 mp_size_t nn, kn, alloc;
215 mp_limb_t i, j, t, iii, jjj, cy, dinv;
216 int cnt;
217 mp_size_t maxn;
218 TMP_DECL;
219
220 ASSERT (k > ODD_FACTORIAL_TABLE_LIMIT);
221 TMP_MARK;
222
223 maxn = 1 + n / GMP_NUMB_BITS; /* absolutely largest result size (limbs) */
224
225 /* FIXME: This allocation might be insufficient, but is usually way too
226 large. */
227 alloc = SOME_THRESHOLD - 1 + MAX (3 * maxn / 2, SOME_THRESHOLD);
228 alloc = MIN (alloc, (mp_size_t) k) + 1;
229 TMP_ALLOC_LIMBS_2 (np, alloc, kp, SOME_THRESHOLD + 1);
230
231 MAXFACS (nmax, n);
232 ASSERT (nmax <= M);
233 MAXFACS (kmax, k);
234 ASSERT (kmax <= M);
235 ASSERT (k >= M);
236
237 i = n - k + 1;
238
239 np[0] = 1; nn = 1;
240
241 numfac = 1;
242 j = ODD_FACTORIAL_TABLE_LIMIT + 1;
243 jjj = ODD_FACTORIAL_TABLE_MAX;
244 ASSERT (__gmp_oddfac_table[ODD_FACTORIAL_TABLE_LIMIT] == ODD_FACTORIAL_TABLE_MAX);
245
246 while (1)
247 {
248 kp[0] = jjj; /* store new factors */
249 kn = 1;
250 t = k - j + 1;
251 kmax = MIN (kmax, t);
252
253 while (kmax != 0 && kn < SOME_THRESHOLD)
254 {
255 jjj = mulfunc[kmax - 1] (j);
256 j += kmax; /* number of factors used */
257 count_trailing_zeros (cnt, jjj); /* count low zeros */
258 jjj >>= cnt; /* remove remaining low zeros */
259 cy = mpn_mul_1 (kp, kp, kn, jjj); /* accumulate new factors */
260 kp[kn] = cy;
261 kn += cy != 0;
262 t = k - j + 1;
263 kmax = MIN (kmax, t);
264 }
265 numfac = j - numfac;
266
267 while (numfac != 0)
268 {
269 nmaxnow = MIN (nmax, numfac);
270 iii = mulfunc[nmaxnow - 1] (i);
271 i += nmaxnow; /* number of factors used */
272 count_trailing_zeros (cnt, iii); /* count low zeros */
273 iii >>= cnt; /* remove remaining low zeros */
274 cy = mpn_mul_1 (np, np, nn, iii); /* accumulate new factors */
275 np[nn] = cy;
276 nn += cy != 0;
277 numfac -= nmaxnow;
278 }
279
280 ASSERT (nn < alloc);
281
282 binvert_limb (dinv, kp[0]);
283 nn += (np[nn - 1] >= kp[kn - 1]);
284 nn -= kn;
285 mpn_sbpi1_bdiv_q (np, np, nn, kp, MIN(kn,nn), -dinv);
286 mpn_neg (np, np, nn);
287
288 if (kmax == 0)
289 break;
290 numfac = j;
291
292 jjj = mulfunc[kmax - 1] (j);
293 j += kmax; /* number of factors used */
294 count_trailing_zeros (cnt, jjj); /* count low zeros */
295 jjj >>= cnt; /* remove remaining low zeros */
296 }
297
298 /* Put back the right number of factors of 2. */
299 popc_limb (cnt, n - k);
300 popc_limb (j, k);
301 cnt += j;
302 popc_limb (j, n);
303 cnt -= j;
304 if (cnt != 0)
305 {
306 ASSERT (cnt < GMP_NUMB_BITS); /* can happen, but not for intended use */
307 cy = mpn_lshift (np, np, nn, cnt);
308 np[nn] = cy;
309 nn += cy != 0;
310 }
311
312 nn -= np[nn - 1] == 0; /* normalisation */
313
314 kp = MPZ_NEWALLOC (r, nn);
315 SIZ(r) = nn;
316 MPN_COPY (kp, np, nn);
317 TMP_FREE;
318}
319
320static void
321mpz_smallk_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
322{
323 unsigned nmax, numfac;
324 mp_ptr rp;
325 mp_size_t rn, alloc;
326 mp_limb_t i, iii, cy;
327 unsigned i2cnt, cnt;
328
329 MAXFACS (nmax, n);
330 nmax = MIN (nmax, M);
331
332 i = n - k + 1;
333
334 i2cnt = __gmp_fac2cnt_table[k / 2 - 1]; /* low zeros count */
335 if (nmax >= k)
336 {
337 MPZ_NEWALLOC (r, 1) [0] = mulfunc[k - 1] (i) * facinv[k - 2] >>
338 (i2cnt - tcnttab[k - 1]);
339 SIZ(r) = 1;
340 return;
341 }
342
343 count_leading_zeros (cnt, (mp_limb_t) n);
344 cnt = GMP_LIMB_BITS - cnt;
345 alloc = cnt * k / GMP_NUMB_BITS + 3; /* FIXME: ensure rounding is enough. */
346 rp = MPZ_NEWALLOC (r, alloc);
347
348 rp[0] = mulfunc[nmax - 1] (i);
349 rn = 1;
350 i += nmax; /* number of factors used */
351 i2cnt -= tcnttab[nmax - 1]; /* low zeros count */
352 numfac = k - nmax;
353 do
354 {
355 nmax = MIN (nmax, numfac);
356 iii = mulfunc[nmax - 1] (i);
357 i += nmax; /* number of factors used */
358 i2cnt -= tcnttab[nmax - 1]; /* update low zeros count */
359 cy = mpn_mul_1 (rp, rp, rn, iii); /* accumulate new factors */
360 rp[rn] = cy;
361 rn += cy != 0;
362 numfac -= nmax;
363 } while (numfac != 0);
364
365 ASSERT (rn < alloc);
366
367 mpn_pi1_bdiv_q_1 (rp, rp, rn, __gmp_oddfac_table[k], facinv[k - 2], i2cnt);
368 /* A two-fold, branch-free normalisation is possible :*/
369 /* rn -= rp[rn - 1] == 0; */
370 /* rn -= rp[rn - 1] == 0; */
371 MPN_NORMALIZE_NOT_ZERO (rp, rn);
372
373 SIZ(r) = rn;
374}
375
376/* Algorithm:
377
378 Plain and simply multiply things together.
379
380 We tabulate factorials (k!/2^t)^(-1) mod B (where t is chosen such
381 that k!/2^t is odd).
382
383*/
384
385static mp_limb_t
386bc_bin_uiui (unsigned int n, unsigned int k)
387{
388 return ((__gmp_oddfac_table[n] * facinv[k - 2] * facinv[n - k - 2])
389 << (__gmp_fac2cnt_table[n / 2 - 1] - __gmp_fac2cnt_table[k / 2 - 1] - __gmp_fac2cnt_table[(n-k) / 2 - 1]))
390 & GMP_NUMB_MASK;
391}
392
393/* Algorithm:
394
395 Recursively exploit the relation
396 bin(n,k) = bin(n,k>>1)*bin(n-k>>1,k-k>>1)/bin(k,k>>1) .
397
398 Values for binomial(k,k>>1) that fit in a limb are precomputed
399 (with inverses).
400*/
401
402/* bin2kk[i - ODD_CENTRAL_BINOMIAL_OFFSET] =
403 binomial(i*2,i)/2^t (where t is chosen so that it is odd). */
404static const mp_limb_t bin2kk[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_TABLE };
405
406/* bin2kkinv[i] = bin2kk[i]^-1 mod B */
407static const mp_limb_t bin2kkinv[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_INVERSE_TABLE };
408
409/* bin2kk[i] = binomial((i+MIN_S)*2,i+MIN_S)/2^t. This table contains the t values. */
410static const unsigned char fac2bin[] = { CENTRAL_BINOMIAL_2FAC_TABLE };
411
412static void
413mpz_smallkdc_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
414{
415 mp_ptr rp;
416 mp_size_t rn;
417 unsigned long int hk;
418
419 hk = k >> 1;
420
421 if ((! BIN_UIUI_RECURSIVE_SMALLDC) || hk <= ODD_FACTORIAL_TABLE_LIMIT)
422 mpz_smallk_bin_uiui (r, n, hk);
423 else
424 mpz_smallkdc_bin_uiui (r, n, hk);
425 k -= hk;
426 n -= hk;
427 if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) {
428 mp_limb_t cy;
429 rn = SIZ (r);
430 rp = MPZ_REALLOC (r, rn + 1);
431 cy = mpn_mul_1 (rp, rp, rn, bc_bin_uiui (n, k));
432 rp [rn] = cy;
433 rn += cy != 0;
434 } else {
435 mp_limb_t buffer[ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3];
436 mpz_t t;
437
438 ALLOC (t) = ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3;
439 PTR (t) = buffer;
440 if ((! BIN_UIUI_RECURSIVE_SMALLDC) || k <= ODD_FACTORIAL_TABLE_LIMIT)
441 mpz_smallk_bin_uiui (t, n, k);
442 else
443 mpz_smallkdc_bin_uiui (t, n, k);
444 mpz_mul (r, r, t);
445 rp = PTR (r);
446 rn = SIZ (r);
447 }
448
449 mpn_pi1_bdiv_q_1 (rp, rp, rn, bin2kk[k - ODD_CENTRAL_BINOMIAL_OFFSET],
450 bin2kkinv[k - ODD_CENTRAL_BINOMIAL_OFFSET],
451 fac2bin[k - ODD_CENTRAL_BINOMIAL_OFFSET] - (k != hk));
452 /* A two-fold, branch-free normalisation is possible :*/
453 /* rn -= rp[rn - 1] == 0; */
454 /* rn -= rp[rn - 1] == 0; */
455 MPN_NORMALIZE_NOT_ZERO (rp, rn);
456
457 SIZ(r) = rn;
458}
459
460/* mpz_goetgheluck_bin_uiui(RESULT, N, K) -- Set RESULT to binomial(N,K).
461 *
462 * Contributed to the GNU project by Marco Bodrato.
463 *
464 * Implementation of the algorithm by P. Goetgheluck, "Computing
465 * Binomial Coefficients", The American Mathematical Monthly, Vol. 94,
466 * No. 4 (April 1987), pp. 360-365.
467 *
468 * Acknowledgment: Peter Luschny did spot the slowness of the previous
469 * code and suggested the reference.
470 */
471
472/* TODO: Remove duplicated constants / macros / static functions...
473 */
474
475/*************************************************************/
476/* Section macros: common macros, for swing/fac/bin (&sieve) */
477/*************************************************************/
478
479#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \
480 if ((PR) > (MAX_PR)) { \
481 (VEC)[(I)++] = (PR); \
482 (PR) = 1; \
483 }
484
485#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \
486 do { \
487 if ((PR) > (MAX_PR)) { \
488 (VEC)[(I)++] = (PR); \
489 (PR) = (P); \
490 } else \
491 (PR) *= (P); \
492 } while (0)
493
494#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \
495 __max_i = (end); \
496 \
497 do { \
498 ++__i; \
499 if (((sieve)[__index] & __mask) == 0) \
500 { \
501 mp_limb_t prime; \
502 prime = id_to_n(__i)
503
504#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \
505 do { \
506 mp_limb_t __mask, __index, __max_i, __i; \
507 \
508 __i = (start)-(off); \
509 __index = __i / GMP_LIMB_BITS; \
510 __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \
511 __i += (off); \
512 \
513 LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
514
515#define LOOP_ON_SIEVE_STOP \
516 } \
517 __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \
518 __index += __mask & 1; \
519 } while (__i <= __max_i)
520
521#define LOOP_ON_SIEVE_END \
522 LOOP_ON_SIEVE_STOP; \
523 } while (0)
524
525/*********************************************************/
526/* Section sieve: sieving functions and tools for primes */
527/*********************************************************/
528
529#if WANT_ASSERT
530static mp_limb_t
531bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
532#endif
533
534/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
535static mp_limb_t
536id_to_n (mp_limb_t id) { return id*3+1+(id&1); }
537
538/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
539static mp_limb_t
540n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
541
542static mp_size_t
543primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
544
545/*********************************************************/
546/* Section binomial: fast binomial implementation */
547/*********************************************************/
548
549#define COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I) \
550 do { \
551 mp_limb_t __a, __b, __prime, __ma,__mb; \
552 __prime = (P); \
553 __a = (N); __b = (K); __mb = 0; \
554 FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
555 do { \
556 __mb += __b % __prime; __b /= __prime; \
557 __ma = __a % __prime; __a /= __prime; \
558 if (__ma < __mb) { \
559 __mb = 1; (PR) *= __prime; \
560 } else __mb = 0; \
561 } while (__a >= __prime); \
562 } while (0)
563
564#define SH_COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I) \
565 do { \
566 mp_limb_t __prime; \
567 __prime = (P); \
568 if (((N) % __prime) < ((K) % __prime)) { \
569 FACTOR_LIST_STORE (__prime, PR, MAX_PR, VEC, I); \
570 } \
571 } while (0)
572
573/* Returns an approximation of the sqare root of x.
574 * It gives:
575 * limb_apprsqrt (x) ^ 2 <= x < (limb_apprsqrt (x)+1) ^ 2
576 * or
577 * x <= limb_apprsqrt (x) ^ 2 <= x * 9/8
578 */
579static mp_limb_t
580limb_apprsqrt (mp_limb_t x)
581{
582 int s;
583
584 ASSERT (x > 2);
585 count_leading_zeros (s, x);
586 s = (GMP_LIMB_BITS - s) >> 1;
587 return ((CNST_LIMB(1) << s) + (x >> s)) >> 1;
588}
589
590static void
591mpz_goetgheluck_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
592{
593 mp_limb_t *sieve, *factors, count;
594 mp_limb_t prod, max_prod;
595 mp_size_t j;
596 TMP_DECL;
597
598 ASSERT (BIN_GOETGHELUCK_THRESHOLD >= 13);
599 ASSERT (n >= 25);
600
601 TMP_MARK;
602 sieve = TMP_ALLOC_LIMBS (primesieve_size (n));
603
604 count = gmp_primesieve (sieve, n) + 1;
605 factors = TMP_ALLOC_LIMBS (count / log_n_max (n) + 1);
606
607 max_prod = GMP_NUMB_MAX / n;
608
609 /* Handle primes = 2, 3 separately. */
610 popc_limb (count, n - k);
611 popc_limb (j, k);
612 count += j;
613 popc_limb (j, n);
614 count -= j;
615 prod = CNST_LIMB(1) << count;
616
617 j = 0;
618 COUNT_A_PRIME (3, n, k, prod, max_prod, factors, j);
619
620 /* Accumulate prime factors from 5 to n/2 */
621 {
622 mp_limb_t s;
623
624 s = limb_apprsqrt(n);
625 s = n_to_bit (s);
626 ASSERT (bit_to_n (s+1) * bit_to_n (s+1) > n);
627 ASSERT (s <= n_to_bit (n >> 1));
628 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
629 COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j);
630 LOOP_ON_SIEVE_STOP;
631
632 ASSERT (max_prod <= GMP_NUMB_MAX / 2);
633 max_prod <<= 1;
634
635 LOOP_ON_SIEVE_CONTINUE (prime, n_to_bit (n >> 1),sieve);
636 SH_COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j);
637 LOOP_ON_SIEVE_END;
638
639 max_prod >>= 1;
640 }
641
642 /* Store primes from (n-k)+1 to n */
643 ASSERT (n_to_bit (n - k) < n_to_bit (n));
644
645 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n - k) + 1, n_to_bit (n), 0,sieve);
646 FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
647 LOOP_ON_SIEVE_END;
648
649 if (LIKELY (j != 0))
650 {
651 factors[j++] = prod;
652 mpz_prodlimbs (r, factors, j);
653 }
654 else
655 {
656 MPZ_NEWALLOC (r, 1)[0] = prod;
657 SIZ (r) = 1;
658 }
659 TMP_FREE;
660}
661
662#undef COUNT_A_PRIME
663#undef SH_COUNT_A_PRIME
664#undef LOOP_ON_SIEVE_END
665#undef LOOP_ON_SIEVE_STOP
666#undef LOOP_ON_SIEVE_BEGIN
667#undef LOOP_ON_SIEVE_CONTINUE
668
669/*********************************************************/
670/* End of implementation of Goetgheluck's algorithm */
671/*********************************************************/
672
673void
674mpz_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
675{
676 if (UNLIKELY (n < k)) {
677 SIZ (r) = 0;
678#if BITS_PER_ULONG > GMP_NUMB_BITS
679 } else if (UNLIKELY (n > GMP_NUMB_MAX)) {
680 mpz_t tmp;
681
682 mpz_init_set_ui (tmp, n);
683 mpz_bin_ui (r, tmp, k);
684 mpz_clear (tmp);
685#endif
686 } else {
687 ASSERT (n <= GMP_NUMB_MAX);
688 /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. */
689 k = MIN (k, n - k);
690 if (k < 2) {
691 MPZ_NEWALLOC (r, 1)[0] = k ? n : 1; /* 1 + ((-k) & (n-1)); */
692 SIZ(r) = 1;
693 } else if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) { /* k >= 2, n >= 4 */
694 MPZ_NEWALLOC (r, 1)[0] = bc_bin_uiui (n, k);
695 SIZ(r) = 1;
696 } else if (k <= ODD_FACTORIAL_TABLE_LIMIT)
697 mpz_smallk_bin_uiui (r, n, k);
698 else if (BIN_UIUI_ENABLE_SMALLDC &&
699 k <= (BIN_UIUI_RECURSIVE_SMALLDC ? ODD_CENTRAL_BINOMIAL_TABLE_LIMIT : ODD_FACTORIAL_TABLE_LIMIT)* 2)
700 mpz_smallkdc_bin_uiui (r, n, k);
701 else if (ABOVE_THRESHOLD (k, BIN_GOETGHELUCK_THRESHOLD) &&
702 k > (n >> 4)) /* k > ODD_FACTORIAL_TABLE_LIMIT */
703 mpz_goetgheluck_bin_uiui (r, n, k);
704 else
705 mpz_bdiv_bin_uiui (r, n, k);
706 }
707}