blob: 28281030b7cab0f4131edff435459eed7dafbd6a [file] [log] [blame]
Austin Schuhdace2a62020-08-18 10:56:48 -07001/* mpn_powm -- Compute R = U^E mod M.
2
3 Contributed to the GNU project by Torbjorn Granlund.
4
5 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
6 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9Copyright 2007-2012, 2019 Free Software Foundation, Inc.
10
11This file is part of the GNU MP Library.
12
13The GNU MP Library is free software; you can redistribute it and/or modify
14it under the terms of either:
15
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
19
20or
21
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
25
26or both in parallel, as here.
27
28The GNU MP Library is distributed in the hope that it will be useful, but
29WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31for more details.
32
33You should have received copies of the GNU General Public License and the
34GNU Lesser General Public License along with the GNU MP Library. If not,
35see https://www.gnu.org/licenses/. */
36
37
38/*
39 BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
40
41 1. W <- U
42
43 2. T <- (B^n * U) mod M Convert to REDC form
44
45 3. Compute table U^1, U^3, U^5... of E-dependent size
46
47 4. While there are more bits in E
48 W <- power left-to-right base-k
49
50
51 TODO:
52
53 * Make getbits a macro, thereby allowing it to update the index operand.
54 That will simplify the code using getbits. (Perhaps make getbits' sibling
55 getbit then have similar form, for symmetry.)
56
57 * Write an itch function. Or perhaps get rid of tp parameter since the huge
58 pp area is allocated locally anyway?
59
60 * Choose window size without looping. (Superoptimize or think(tm).)
61
62 * Handle small bases with initial, reduction-free exponentiation.
63
64 * Call new division functions, not mpn_tdiv_qr.
65
66 * Consider special code for one-limb M.
67
68 * How should we handle the redc1/redc2/redc_n choice?
69 - redc1: T(binvert_1limb) + e * (n) * (T(mullo-1x1) + n*T(addmul_1))
70 - redc2: T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
71 - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
72 This disregards the addmul_N constant term, but we could think of
73 that as part of the respective mullo.
74
75 * When U (the base) is small, we should start the exponentiation with plain
76 operations, then convert that partial result to REDC form.
77
78 * When U is just one limb, should it be handled without the k-ary tricks?
79 We could keep a factor of B^n in W, but use U' = BU as base. After
80 multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
81 mod M.
82*/
83
84#include "gmp-impl.h"
85#include "longlong.h"
86
87#undef MPN_REDC_0
88#define MPN_REDC_0(rp, up, mp, invm) \
89 do { \
90 mp_limb_t p1, r0, u0, _dummy; \
91 u0 = *(up); \
92 umul_ppmm (p1, _dummy, *(mp), (u0 * (invm)) & GMP_NUMB_MASK); \
93 ASSERT (((u0 + _dummy) & GMP_NUMB_MASK) == 0); \
94 p1 += (u0 != 0); \
95 r0 = (up)[1] + p1; \
96 if (p1 > r0) \
97 r0 -= *(mp); \
98 *(rp) = r0; \
99 } while (0)
100
101#undef MPN_REDC_1
102#if HAVE_NATIVE_mpn_sbpi1_bdiv_r
103#define MPN_REDC_1(rp, up, mp, n, invm) \
104 do { \
105 mp_limb_t cy; \
106 cy = mpn_sbpi1_bdiv_r (up, 2 * n, mp, n, invm); \
107 if (cy != 0) \
108 mpn_sub_n (rp, up + n, mp, n); \
109 else \
110 MPN_COPY (rp, up + n, n); \
111 } while (0)
112#else
113#define MPN_REDC_1(rp, up, mp, n, invm) \
114 do { \
115 mp_limb_t cy; \
116 cy = mpn_redc_1 (rp, up, mp, n, invm); \
117 if (cy != 0) \
118 mpn_sub_n (rp, rp, mp, n); \
119 } while (0)
120#endif
121
122#undef MPN_REDC_2
123#define MPN_REDC_2(rp, up, mp, n, mip) \
124 do { \
125 mp_limb_t cy; \
126 cy = mpn_redc_2 (rp, up, mp, n, mip); \
127 if (cy != 0) \
128 mpn_sub_n (rp, rp, mp, n); \
129 } while (0)
130
131#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
132#define WANT_REDC_2 1
133#endif
134
135#define getbit(p,bi) \
136 ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
137
138static inline mp_limb_t
139getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
140{
141 int nbits_in_r;
142 mp_limb_t r;
143 mp_size_t i;
144
145 if (bi < nbits)
146 {
147 return p[0] & (((mp_limb_t) 1 << bi) - 1);
148 }
149 else
150 {
151 bi -= nbits; /* bit index of low bit to extract */
152 i = bi / GMP_NUMB_BITS; /* word index of low bit to extract */
153 bi %= GMP_NUMB_BITS; /* bit index in low word */
154 r = p[i] >> bi; /* extract (low) bits */
155 nbits_in_r = GMP_NUMB_BITS - bi; /* number of bits now in r */
156 if (nbits_in_r < nbits) /* did we get enough bits? */
157 r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */
158 return r & (((mp_limb_t) 1 << nbits) - 1);
159 }
160}
161
162static inline int
163win_size (mp_bitcnt_t eb)
164{
165 int k;
166 static mp_bitcnt_t x[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
167 for (k = 1; eb > x[k]; k++)
168 ;
169 return k;
170}
171
172/* Convert U to REDC form, U_r = B^n * U mod M */
173static void
174redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
175{
176 mp_ptr tp, qp;
177 TMP_DECL;
178 TMP_MARK;
179
180 TMP_ALLOC_LIMBS_2 (tp, un + n, qp, un + 1);
181
182 MPN_ZERO (tp, n);
183 MPN_COPY (tp + n, up, un);
184 mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
185 TMP_FREE;
186}
187
188/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
189 Requires that mp[n-1..0] is odd.
190 Requires that ep[en-1..0] is > 1.
191 Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs. */
192void
193mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
194 mp_srcptr ep, mp_size_t en,
195 mp_srcptr mp, mp_size_t n, mp_ptr tp)
196{
197 mp_limb_t ip[2], *mip;
198 int cnt;
199 mp_bitcnt_t ebi;
200 int windowsize, this_windowsize;
201 mp_limb_t expbits;
202 mp_ptr pp, this_pp;
203 long i;
204 TMP_DECL;
205
206 ASSERT (en > 1 || (en == 1 && ep[0] > 1));
207 ASSERT (n >= 1 && ((mp[0] & 1) != 0));
208
209 TMP_MARK;
210
211 MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
212
213#if 0
214 if (bn < n)
215 {
216 /* Do the first few exponent bits without mod reductions,
217 until the result is greater than the mod argument. */
218 for (;;)
219 {
220 mpn_sqr (tp, this_pp, tn);
221 tn = tn * 2 - 1, tn += tp[tn] != 0;
222 if (getbit (ep, ebi) != 0)
223 mpn_mul (..., tp, tn, bp, bn);
224 ebi--;
225 }
226 }
227#endif
228
229 windowsize = win_size (ebi);
230
231#if WANT_REDC_2
232 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
233 {
234 mip = ip;
235 binvert_limb (mip[0], mp[0]);
236 mip[0] = -mip[0];
237 }
238 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
239 {
240 mip = ip;
241 mpn_binvert (mip, mp, 2, tp);
242 mip[0] = -mip[0]; mip[1] = ~mip[1];
243 }
244#else
245 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
246 {
247 mip = ip;
248 binvert_limb (mip[0], mp[0]);
249 mip[0] = -mip[0];
250 }
251#endif
252 else
253 {
254 mip = TMP_ALLOC_LIMBS (n);
255 mpn_binvert (mip, mp, n, tp);
256 }
257
258 pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
259
260 this_pp = pp;
261 redcify (this_pp, bp, bn, mp, n);
262
263 /* Store b^2 at rp. */
264 mpn_sqr (tp, this_pp, n);
265#if 0
266 if (n == 1) {
267 MPN_REDC_0 (rp, tp, mp, mip[0]);
268 } else
269#endif
270#if WANT_REDC_2
271 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
272 MPN_REDC_1 (rp, tp, mp, n, mip[0]);
273 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
274 MPN_REDC_2 (rp, tp, mp, n, mip);
275#else
276 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
277 MPN_REDC_1 (rp, tp, mp, n, mip[0]);
278#endif
279 else
280 mpn_redc_n (rp, tp, mp, n, mip);
281
282 /* Precompute odd powers of b and put them in the temporary area at pp. */
283 for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
284#if 1
285 if (n == 1) {
286 umul_ppmm((tp)[1], *(tp), *(this_pp), *(rp));
287 ++this_pp ;
288 MPN_REDC_0 (this_pp, tp, mp, mip[0]);
289 } else
290#endif
291 {
292 mpn_mul_n (tp, this_pp, rp, n);
293 this_pp += n;
294#if WANT_REDC_2
295 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
296 MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
297 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
298 MPN_REDC_2 (this_pp, tp, mp, n, mip);
299#else
300 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
301 MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
302#endif
303 else
304 mpn_redc_n (this_pp, tp, mp, n, mip);
305 }
306
307 expbits = getbits (ep, ebi, windowsize);
308 if (ebi < windowsize)
309 ebi = 0;
310 else
311 ebi -= windowsize;
312
313 count_trailing_zeros (cnt, expbits);
314 ebi += cnt;
315 expbits >>= cnt;
316
317 MPN_COPY (rp, pp + n * (expbits >> 1), n);
318
319#define INNERLOOP \
320 while (ebi != 0) \
321 { \
322 while (getbit (ep, ebi) == 0) \
323 { \
324 MPN_SQR (tp, rp, n); \
325 MPN_REDUCE (rp, tp, mp, n, mip); \
326 if (--ebi == 0) \
327 goto done; \
328 } \
329 \
330 /* The next bit of the exponent is 1. Now extract the largest \
331 block of bits <= windowsize, and such that the least \
332 significant bit is 1. */ \
333 \
334 expbits = getbits (ep, ebi, windowsize); \
335 this_windowsize = windowsize; \
336 if (ebi < windowsize) \
337 { \
338 this_windowsize -= windowsize - ebi; \
339 ebi = 0; \
340 } \
341 else \
342 ebi -= windowsize; \
343 \
344 count_trailing_zeros (cnt, expbits); \
345 this_windowsize -= cnt; \
346 ebi += cnt; \
347 expbits >>= cnt; \
348 \
349 do \
350 { \
351 MPN_SQR (tp, rp, n); \
352 MPN_REDUCE (rp, tp, mp, n, mip); \
353 } \
354 while (--this_windowsize != 0); \
355 \
356 MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n); \
357 MPN_REDUCE (rp, tp, mp, n, mip); \
358 }
359
360
361 if (n == 1)
362 {
363#undef MPN_MUL_N
364#undef MPN_SQR
365#undef MPN_REDUCE
366#define MPN_MUL_N(r,a,b,n) umul_ppmm((r)[1], *(r), *(a), *(b))
367#define MPN_SQR(r,a,n) umul_ppmm((r)[1], *(r), *(a), *(a))
368#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_0(rp, tp, mp, mip[0])
369 INNERLOOP;
370 }
371 else
372#if WANT_REDC_2
373 if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
374 {
375 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
376 {
377 if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
378 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
379 {
380#undef MPN_MUL_N
381#undef MPN_SQR
382#undef MPN_REDUCE
383#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
384#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
385#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
386 INNERLOOP;
387 }
388 else
389 {
390#undef MPN_MUL_N
391#undef MPN_SQR
392#undef MPN_REDUCE
393#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
394#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
395#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
396 INNERLOOP;
397 }
398 }
399 else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
400 {
401 if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
402 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
403 {
404#undef MPN_MUL_N
405#undef MPN_SQR
406#undef MPN_REDUCE
407#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
408#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
409#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
410 INNERLOOP;
411 }
412 else
413 {
414#undef MPN_MUL_N
415#undef MPN_SQR
416#undef MPN_REDUCE
417#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
418#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
419#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
420 INNERLOOP;
421 }
422 }
423 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
424 {
425#undef MPN_MUL_N
426#undef MPN_SQR
427#undef MPN_REDUCE
428#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
429#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
430#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
431 INNERLOOP;
432 }
433 else
434 {
435#undef MPN_MUL_N
436#undef MPN_SQR
437#undef MPN_REDUCE
438#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
439#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
440#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
441 INNERLOOP;
442 }
443 }
444 else
445 {
446 if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
447 {
448 if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
449 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
450 {
451#undef MPN_MUL_N
452#undef MPN_SQR
453#undef MPN_REDUCE
454#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
455#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
456#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
457 INNERLOOP;
458 }
459 else
460 {
461#undef MPN_MUL_N
462#undef MPN_SQR
463#undef MPN_REDUCE
464#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
465#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
466#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
467 INNERLOOP;
468 }
469 }
470 else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
471 {
472#undef MPN_MUL_N
473#undef MPN_SQR
474#undef MPN_REDUCE
475#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
476#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
477#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
478 INNERLOOP;
479 }
480 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
481 {
482#undef MPN_MUL_N
483#undef MPN_SQR
484#undef MPN_REDUCE
485#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
486#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
487#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
488 INNERLOOP;
489 }
490 else
491 {
492#undef MPN_MUL_N
493#undef MPN_SQR
494#undef MPN_REDUCE
495#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
496#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
497#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
498 INNERLOOP;
499 }
500 }
501
502#else /* WANT_REDC_2 */
503
504 if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
505 {
506 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
507 {
508 if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
509 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
510 {
511#undef MPN_MUL_N
512#undef MPN_SQR
513#undef MPN_REDUCE
514#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
515#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
516#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
517 INNERLOOP;
518 }
519 else
520 {
521#undef MPN_MUL_N
522#undef MPN_SQR
523#undef MPN_REDUCE
524#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
525#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
526#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
527 INNERLOOP;
528 }
529 }
530 else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
531 {
532 if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
533 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
534 {
535#undef MPN_MUL_N
536#undef MPN_SQR
537#undef MPN_REDUCE
538#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
539#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
540#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
541 INNERLOOP;
542 }
543 else
544 {
545#undef MPN_MUL_N
546#undef MPN_SQR
547#undef MPN_REDUCE
548#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
549#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
550#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
551 INNERLOOP;
552 }
553 }
554 else
555 {
556#undef MPN_MUL_N
557#undef MPN_SQR
558#undef MPN_REDUCE
559#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
560#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
561#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
562 INNERLOOP;
563 }
564 }
565 else
566 {
567 if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
568 {
569 if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
570 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
571 {
572#undef MPN_MUL_N
573#undef MPN_SQR
574#undef MPN_REDUCE
575#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
576#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
577#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
578 INNERLOOP;
579 }
580 else
581 {
582#undef MPN_MUL_N
583#undef MPN_SQR
584#undef MPN_REDUCE
585#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
586#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
587#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
588 INNERLOOP;
589 }
590 }
591 else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
592 {
593#undef MPN_MUL_N
594#undef MPN_SQR
595#undef MPN_REDUCE
596#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
597#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
598#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
599 INNERLOOP;
600 }
601 else
602 {
603#undef MPN_MUL_N
604#undef MPN_SQR
605#undef MPN_REDUCE
606#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
607#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
608#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
609 INNERLOOP;
610 }
611 }
612#endif /* WANT_REDC_2 */
613
614 done:
615
616 MPN_COPY (tp, rp, n);
617 MPN_ZERO (tp + n, n);
618
619#if WANT_REDC_2
620 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
621 MPN_REDC_1 (rp, tp, mp, n, mip[0]);
622 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
623 MPN_REDC_2 (rp, tp, mp, n, mip);
624#else
625 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
626 MPN_REDC_1 (rp, tp, mp, n, mip[0]);
627#endif
628 else
629 mpn_redc_n (rp, tp, mp, n, mip);
630
631 if (mpn_cmp (rp, mp, n) >= 0)
632 mpn_sub_n (rp, rp, mp, n);
633
634 TMP_FREE;
635}