blob: 9c99d1e494d4896a2a97cf8865c6d1a25a0fcb70 [file] [log] [blame]
Austin Schuhdace2a62020-08-18 10:56:48 -07001/* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.
2
3Contributed to the GNU project by Marco Bodrato.
4
5THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
6IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
7IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
8DISAPPEAR IN A FUTURE GNU MP RELEASE.
9
10Copyright 2010-2012, 2015-2017 Free Software Foundation, Inc.
11
12This file is part of the GNU MP Library.
13
14The GNU MP Library is free software; you can redistribute it and/or modify
15it under the terms of either:
16
17 * the GNU Lesser General Public License as published by the Free
18 Software Foundation; either version 3 of the License, or (at your
19 option) any later version.
20
21or
22
23 * the GNU General Public License as published by the Free Software
24 Foundation; either version 2 of the License, or (at your option) any
25 later version.
26
27or both in parallel, as here.
28
29The GNU MP Library is distributed in the hope that it will be useful, but
30WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
32for more details.
33
34You should have received copies of the GNU General Public License and the
35GNU Lesser General Public License along with the GNU MP Library. If not,
36see https://www.gnu.org/licenses/. */
37
38#include "gmp-impl.h"
39#include "longlong.h"
40
41/* TODO:
42 - split this file in smaller parts with functions that can be recycled for different computations.
43 */
44
45/**************************************************************/
46/* Section macros: common macros, for mswing/fac/bin (&sieve) */
47/**************************************************************/
48
49#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \
50 if ((PR) > (MAX_PR)) { \
51 (VEC)[(I)++] = (PR); \
52 (PR) = 1; \
53 }
54
55#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \
56 do { \
57 if ((PR) > (MAX_PR)) { \
58 (VEC)[(I)++] = (PR); \
59 (PR) = (P); \
60 } else \
61 (PR) *= (P); \
62 } while (0)
63
64#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \
65 __max_i = (end); \
66 \
67 do { \
68 ++__i; \
69 if (((sieve)[__index] & __mask) == 0) \
70 { \
71 mp_limb_t prime; \
72 prime = id_to_n(__i)
73
74#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \
75 do { \
76 mp_limb_t __mask, __index, __max_i, __i; \
77 \
78 __i = (start)-(off); \
79 __index = __i / GMP_LIMB_BITS; \
80 __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \
81 __i += (off); \
82 \
83 LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
84
85#define LOOP_ON_SIEVE_STOP \
86 } \
87 __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \
88 __index += __mask & 1; \
89 } while (__i <= __max_i)
90
91#define LOOP_ON_SIEVE_END \
92 LOOP_ON_SIEVE_STOP; \
93 } while (0)
94
95/*********************************************************/
96/* Section sieve: sieving functions and tools for primes */
97/*********************************************************/
98
99#if WANT_ASSERT
100static mp_limb_t
101bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
102#endif
103
104/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
105static mp_limb_t
106id_to_n (mp_limb_t id) { return id*3+1+(id&1); }
107
108/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
109static mp_limb_t
110n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
111
112#if WANT_ASSERT
113static mp_size_t
114primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
115#endif
116
117/*********************************************************/
118/* Section mswing: 2-multiswing factorial */
119/*********************************************************/
120
121/* Returns an approximation of the sqare root of x.
122 * It gives:
123 * limb_apprsqrt (x) ^ 2 <= x < (limb_apprsqrt (x)+1) ^ 2
124 * or
125 * x <= limb_apprsqrt (x) ^ 2 <= x * 9/8
126 */
127static mp_limb_t
128limb_apprsqrt (mp_limb_t x)
129{
130 int s;
131
132 ASSERT (x > 2);
133 count_leading_zeros (s, x);
134 s = (GMP_LIMB_BITS - s) >> 1;
135 return ((CNST_LIMB(1) << s) + (x >> s)) >> 1;
136}
137
138#if 0
139/* A count-then-exponentiate variant for SWING_A_PRIME */
140#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
141 do { \
142 mp_limb_t __q, __prime; \
143 int __exp; \
144 __prime = (P); \
145 __exp = 0; \
146 __q = (N); \
147 do { \
148 __q /= __prime; \
149 __exp += __q & 1; \
150 } while (__q >= __prime); \
151 if (__exp) { /* Store $prime^{exp}$ */ \
152 for (__q = __prime; --__exp; __q *= __prime); \
153 FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I); \
154 }; \
155 } while (0)
156#else
157#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
158 do { \
159 mp_limb_t __q, __prime; \
160 __prime = (P); \
161 FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
162 __q = (N); \
163 do { \
164 __q /= __prime; \
165 if ((__q & 1) != 0) (PR) *= __prime; \
166 } while (__q >= __prime); \
167 } while (0)
168#endif
169
170#define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
171 do { \
172 mp_limb_t __prime; \
173 __prime = (P); \
174 if ((((N) / __prime) & 1) != 0) \
175 FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I); \
176 } while (0)
177
178/* mpz_2multiswing_1 computes the odd part of the 2-multiswing
179 factorial of the parameter n. The result x is an odd positive
180 integer so that multiswing(n,2) = x 2^a.
181
182 Uses the algorithm described by Peter Luschny in "Divide, Swing and
183 Conquer the Factorial!".
184
185 The pointer sieve points to primesieve_size(n) limbs containing a
186 bit-array where primes are marked as 0.
187 Enough (FIXME: explain :-) limbs must be pointed by factors.
188 */
189
190static void
191mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
192{
193 mp_limb_t prod, max_prod;
194 mp_size_t j;
195
196 ASSERT (n > 25);
197
198 j = 0;
199 prod = -(n & 1);
200 n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
201
202 prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
203 max_prod = GMP_NUMB_MAX / (n-1);
204
205 /* Handle prime = 3 separately. */
206 SWING_A_PRIME (3, n, prod, max_prod, factors, j);
207
208 /* Swing primes from 5 to n/3 */
209 {
210 mp_limb_t s, l_max_prod;
211
212 s = limb_apprsqrt(n);
213 ASSERT (s >= 5);
214 s = n_to_bit (s);
215 ASSERT (bit_to_n (s+1) * bit_to_n (s+1) > n);
216 ASSERT (s < n_to_bit (n / 3));
217 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
218 SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
219 LOOP_ON_SIEVE_STOP;
220
221 ASSERT (max_prod <= GMP_NUMB_MAX / 3);
222
223 l_max_prod = max_prod * 3;
224
225 LOOP_ON_SIEVE_CONTINUE (prime, n_to_bit (n/3), sieve);
226 SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);
227 LOOP_ON_SIEVE_END;
228 }
229
230 /* Store primes from (n+1)/2 to n */
231 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);
232 FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
233 LOOP_ON_SIEVE_END;
234
235 if (LIKELY (j != 0))
236 {
237 factors[j++] = prod;
238 mpz_prodlimbs (x, factors, j);
239 }
240 else
241 {
242 ASSERT (ALLOC (x) > 0);
243 PTR (x)[0] = prod;
244 SIZ (x) = 1;
245 }
246}
247
248#undef SWING_A_PRIME
249#undef SH_SWING_A_PRIME
250#undef LOOP_ON_SIEVE_END
251#undef LOOP_ON_SIEVE_STOP
252#undef LOOP_ON_SIEVE_BEGIN
253#undef LOOP_ON_SIEVE_CONTINUE
254#undef FACTOR_LIST_APPEND
255
256/*********************************************************/
257/* Section oddfac: odd factorial, needed also by binomial*/
258/*********************************************************/
259
260#if TUNE_PROGRAM_BUILD
261#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))
262#else
263#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1))
264#endif
265
266/* mpz_oddfac_1 computes the odd part of the factorial of the
267 parameter n. I.e. n! = x 2^a, where x is the returned value: an
268 odd positive integer.
269
270 If flag != 0 a square is skipped in the DSC part, e.g.
271 if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.
272
273 If n is too small, flag is ignored, and an ASSERT can be triggered.
274
275 TODO: FAC_DSC_THRESHOLD is used here with two different roles:
276 - to decide when prime factorisation is needed,
277 - to stop the recursion, once sieving is done.
278 Maybe two thresholds can do a better job.
279 */
280void
281mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)
282{
283 ASSERT (n <= GMP_NUMB_MAX);
284 ASSERT (flag == 0 || (flag == 1 && n > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1 && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));
285
286 if (n <= ODD_FACTORIAL_TABLE_LIMIT)
287 {
288 MPZ_NEWALLOC (x, 1)[0] = __gmp_oddfac_table[n];
289 SIZ (x) = 1;
290 }
291 else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)
292 {
293 mp_ptr px;
294
295 px = MPZ_NEWALLOC (x, 2);
296 umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);
297 SIZ (x) = 2;
298 }
299 else
300 {
301 unsigned s;
302 mp_ptr factors;
303
304 s = 0;
305 {
306 mp_limb_t tn;
307 mp_limb_t prod, max_prod, i;
308 mp_size_t j;
309 TMP_SDECL;
310
311#if TUNE_PROGRAM_BUILD
312 ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
313 ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));
314#endif
315
316 /* Compute the number of recursive steps for the DSC algorithm. */
317 for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
318 tn >>= 1;
319
320 j = 0;
321
322 TMP_SMARK;
323 factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
324 ASSERT (tn >= FACTORS_PER_LIMB);
325
326 prod = 1;
327#if TUNE_PROGRAM_BUILD
328 max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;
329#else
330 max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD;
331#endif
332
333 ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
334 do {
335 i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2;
336 factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
337 do {
338 FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
339 i += 2;
340 } while (i <= tn);
341 max_prod <<= 1;
342 tn >>= 1;
343 } while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
344
345 factors[j++] = prod;
346 factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];
347 factors[j++] = __gmp_oddfac_table[tn >> 1];
348 mpz_prodlimbs (x, factors, j);
349
350 TMP_SFREE;
351 }
352
353 if (s != 0)
354 /* Use the algorithm described by Peter Luschny in "Divide,
355 Swing and Conquer the Factorial!".
356
357 Improvement: there are two temporary buffers, factors and
358 square, that are never used together; with a good estimate
359 of the maximal needed size, they could share a single
360 allocation.
361 */
362 {
363 mpz_t mswing;
364 mp_ptr sieve;
365 mp_size_t size;
366 TMP_DECL;
367
368 TMP_MARK;
369
370 flag--;
371 size = n / GMP_NUMB_BITS + 4;
372 ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));
373 /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);
374 one more can be overwritten by mul, another for the sieve */
375 MPZ_TMP_INIT (mswing, size);
376 /* Initialize size, so that ASSERT can check it correctly. */
377 ASSERT_CODE (SIZ (mswing) = 0);
378
379 /* Put the sieve on the second half, it will be overwritten by the last mswing. */
380 sieve = PTR (mswing) + size / 2 + 1;
381
382 size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
383
384 factors = TMP_ALLOC_LIMBS (size);
385 do {
386 mp_ptr square, px;
387 mp_size_t nx, ns;
388 mp_limb_t cy;
389 TMP_DECL;
390
391 s--;
392 ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */
393 mpz_2multiswing_1 (mswing, n >> s, sieve, factors);
394
395 TMP_MARK;
396 nx = SIZ (x);
397 if (s == flag) {
398 size = nx;
399 square = TMP_ALLOC_LIMBS (size);
400 MPN_COPY (square, PTR (x), nx);
401 } else {
402 size = nx << 1;
403 square = TMP_ALLOC_LIMBS (size);
404 mpn_sqr (square, PTR (x), nx);
405 size -= (square[size - 1] == 0);
406 }
407 ns = SIZ (mswing);
408 nx = size + ns;
409 px = MPZ_NEWALLOC (x, nx);
410 ASSERT (ns <= size);
411 cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */
412
413 SIZ(x) = nx - (cy == 0);
414 TMP_FREE;
415 } while (s != 0);
416 TMP_FREE;
417 }
418 }
419}
420
421#undef FACTORS_PER_LIMB
422#undef FACTOR_LIST_STORE