Austin Schuh | dace2a6 | 2020-08-18 10:56:48 -0700 | [diff] [blame] | 1 | /* mpn_powm -- Compute R = U^E mod M. |
| 2 | |
| 3 | Contributed to the GNU project by Torbjorn Granlund. |
| 4 | |
| 5 | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
| 6 | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
| 7 | GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
| 8 | |
| 9 | Copyright 2007-2012, 2019 Free Software Foundation, Inc. |
| 10 | |
| 11 | This file is part of the GNU MP Library. |
| 12 | |
| 13 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 14 | it under the terms of either: |
| 15 | |
| 16 | * the GNU Lesser General Public License as published by the Free |
| 17 | Software Foundation; either version 3 of the License, or (at your |
| 18 | option) any later version. |
| 19 | |
| 20 | or |
| 21 | |
| 22 | * the GNU General Public License as published by the Free Software |
| 23 | Foundation; either version 2 of the License, or (at your option) any |
| 24 | later version. |
| 25 | |
| 26 | or both in parallel, as here. |
| 27 | |
| 28 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 29 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 30 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 31 | for more details. |
| 32 | |
| 33 | You should have received copies of the GNU General Public License and the |
| 34 | GNU Lesser General Public License along with the GNU MP Library. If not, |
| 35 | see https://www.gnu.org/licenses/. */ |
| 36 | |
| 37 | |
| 38 | /* |
| 39 | BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd. |
| 40 | |
| 41 | 1. W <- U |
| 42 | |
| 43 | 2. T <- (B^n * U) mod M Convert to REDC form |
| 44 | |
| 45 | 3. Compute table U^1, U^3, U^5... of E-dependent size |
| 46 | |
| 47 | 4. While there are more bits in E |
| 48 | W <- power left-to-right base-k |
| 49 | |
| 50 | |
| 51 | TODO: |
| 52 | |
| 53 | * Make getbits a macro, thereby allowing it to update the index operand. |
| 54 | That will simplify the code using getbits. (Perhaps make getbits' sibling |
| 55 | getbit then have similar form, for symmetry.) |
| 56 | |
| 57 | * Write an itch function. Or perhaps get rid of tp parameter since the huge |
| 58 | pp area is allocated locally anyway? |
| 59 | |
| 60 | * Choose window size without looping. (Superoptimize or think(tm).) |
| 61 | |
| 62 | * Handle small bases with initial, reduction-free exponentiation. |
| 63 | |
| 64 | * Call new division functions, not mpn_tdiv_qr. |
| 65 | |
| 66 | * Consider special code for one-limb M. |
| 67 | |
| 68 | * How should we handle the redc1/redc2/redc_n choice? |
| 69 | - redc1: T(binvert_1limb) + e * (n) * (T(mullo-1x1) + n*T(addmul_1)) |
| 70 | - redc2: T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2)) |
| 71 | - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n))) |
| 72 | This disregards the addmul_N constant term, but we could think of |
| 73 | that as part of the respective mullo. |
| 74 | |
| 75 | * When U (the base) is small, we should start the exponentiation with plain |
| 76 | operations, then convert that partial result to REDC form. |
| 77 | |
| 78 | * When U is just one limb, should it be handled without the k-ary tricks? |
| 79 | We could keep a factor of B^n in W, but use U' = BU as base. After |
| 80 | multiplying by this (pseudo two-limb) number, we need to multiply by 1/B |
| 81 | mod M. |
| 82 | */ |
| 83 | |
| 84 | #include "gmp-impl.h" |
| 85 | #include "longlong.h" |
| 86 | |
| 87 | #undef MPN_REDC_0 |
| 88 | #define MPN_REDC_0(rp, up, mp, invm) \ |
| 89 | do { \ |
| 90 | mp_limb_t p1, r0, u0, _dummy; \ |
| 91 | u0 = *(up); \ |
| 92 | umul_ppmm (p1, _dummy, *(mp), (u0 * (invm)) & GMP_NUMB_MASK); \ |
| 93 | ASSERT (((u0 + _dummy) & GMP_NUMB_MASK) == 0); \ |
| 94 | p1 += (u0 != 0); \ |
| 95 | r0 = (up)[1] + p1; \ |
| 96 | if (p1 > r0) \ |
| 97 | r0 -= *(mp); \ |
| 98 | *(rp) = r0; \ |
| 99 | } while (0) |
| 100 | |
| 101 | #undef MPN_REDC_1 |
| 102 | #if HAVE_NATIVE_mpn_sbpi1_bdiv_r |
| 103 | #define MPN_REDC_1(rp, up, mp, n, invm) \ |
| 104 | do { \ |
| 105 | mp_limb_t cy; \ |
| 106 | cy = mpn_sbpi1_bdiv_r (up, 2 * n, mp, n, invm); \ |
| 107 | if (cy != 0) \ |
| 108 | mpn_sub_n (rp, up + n, mp, n); \ |
| 109 | else \ |
| 110 | MPN_COPY (rp, up + n, n); \ |
| 111 | } while (0) |
| 112 | #else |
| 113 | #define MPN_REDC_1(rp, up, mp, n, invm) \ |
| 114 | do { \ |
| 115 | mp_limb_t cy; \ |
| 116 | cy = mpn_redc_1 (rp, up, mp, n, invm); \ |
| 117 | if (cy != 0) \ |
| 118 | mpn_sub_n (rp, rp, mp, n); \ |
| 119 | } while (0) |
| 120 | #endif |
| 121 | |
| 122 | #undef MPN_REDC_2 |
| 123 | #define MPN_REDC_2(rp, up, mp, n, mip) \ |
| 124 | do { \ |
| 125 | mp_limb_t cy; \ |
| 126 | cy = mpn_redc_2 (rp, up, mp, n, mip); \ |
| 127 | if (cy != 0) \ |
| 128 | mpn_sub_n (rp, rp, mp, n); \ |
| 129 | } while (0) |
| 130 | |
| 131 | #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 |
| 132 | #define WANT_REDC_2 1 |
| 133 | #endif |
| 134 | |
| 135 | #define getbit(p,bi) \ |
| 136 | ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1) |
| 137 | |
| 138 | static inline mp_limb_t |
| 139 | getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits) |
| 140 | { |
| 141 | int nbits_in_r; |
| 142 | mp_limb_t r; |
| 143 | mp_size_t i; |
| 144 | |
| 145 | if (bi < nbits) |
| 146 | { |
| 147 | return p[0] & (((mp_limb_t) 1 << bi) - 1); |
| 148 | } |
| 149 | else |
| 150 | { |
| 151 | bi -= nbits; /* bit index of low bit to extract */ |
| 152 | i = bi / GMP_NUMB_BITS; /* word index of low bit to extract */ |
| 153 | bi %= GMP_NUMB_BITS; /* bit index in low word */ |
| 154 | r = p[i] >> bi; /* extract (low) bits */ |
| 155 | nbits_in_r = GMP_NUMB_BITS - bi; /* number of bits now in r */ |
| 156 | if (nbits_in_r < nbits) /* did we get enough bits? */ |
| 157 | r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */ |
| 158 | return r & (((mp_limb_t) 1 << nbits) - 1); |
| 159 | } |
| 160 | } |
| 161 | |
| 162 | static inline int |
| 163 | win_size (mp_bitcnt_t eb) |
| 164 | { |
| 165 | int k; |
| 166 | static mp_bitcnt_t x[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0}; |
| 167 | for (k = 1; eb > x[k]; k++) |
| 168 | ; |
| 169 | return k; |
| 170 | } |
| 171 | |
| 172 | /* Convert U to REDC form, U_r = B^n * U mod M */ |
| 173 | static void |
| 174 | redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n) |
| 175 | { |
| 176 | mp_ptr tp, qp; |
| 177 | TMP_DECL; |
| 178 | TMP_MARK; |
| 179 | |
| 180 | TMP_ALLOC_LIMBS_2 (tp, un + n, qp, un + 1); |
| 181 | |
| 182 | MPN_ZERO (tp, n); |
| 183 | MPN_COPY (tp + n, up, un); |
| 184 | mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n); |
| 185 | TMP_FREE; |
| 186 | } |
| 187 | |
| 188 | /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0] |
| 189 | Requires that mp[n-1..0] is odd. |
| 190 | Requires that ep[en-1..0] is > 1. |
| 191 | Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs. */ |
| 192 | void |
| 193 | mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, |
| 194 | mp_srcptr ep, mp_size_t en, |
| 195 | mp_srcptr mp, mp_size_t n, mp_ptr tp) |
| 196 | { |
| 197 | mp_limb_t ip[2], *mip; |
| 198 | int cnt; |
| 199 | mp_bitcnt_t ebi; |
| 200 | int windowsize, this_windowsize; |
| 201 | mp_limb_t expbits; |
| 202 | mp_ptr pp, this_pp; |
| 203 | long i; |
| 204 | TMP_DECL; |
| 205 | |
| 206 | ASSERT (en > 1 || (en == 1 && ep[0] > 1)); |
| 207 | ASSERT (n >= 1 && ((mp[0] & 1) != 0)); |
| 208 | |
| 209 | TMP_MARK; |
| 210 | |
| 211 | MPN_SIZEINBASE_2EXP(ebi, ep, en, 1); |
| 212 | |
| 213 | #if 0 |
| 214 | if (bn < n) |
| 215 | { |
| 216 | /* Do the first few exponent bits without mod reductions, |
| 217 | until the result is greater than the mod argument. */ |
| 218 | for (;;) |
| 219 | { |
| 220 | mpn_sqr (tp, this_pp, tn); |
| 221 | tn = tn * 2 - 1, tn += tp[tn] != 0; |
| 222 | if (getbit (ep, ebi) != 0) |
| 223 | mpn_mul (..., tp, tn, bp, bn); |
| 224 | ebi--; |
| 225 | } |
| 226 | } |
| 227 | #endif |
| 228 | |
| 229 | windowsize = win_size (ebi); |
| 230 | |
| 231 | #if WANT_REDC_2 |
| 232 | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
| 233 | { |
| 234 | mip = ip; |
| 235 | binvert_limb (mip[0], mp[0]); |
| 236 | mip[0] = -mip[0]; |
| 237 | } |
| 238 | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
| 239 | { |
| 240 | mip = ip; |
| 241 | mpn_binvert (mip, mp, 2, tp); |
| 242 | mip[0] = -mip[0]; mip[1] = ~mip[1]; |
| 243 | } |
| 244 | #else |
| 245 | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
| 246 | { |
| 247 | mip = ip; |
| 248 | binvert_limb (mip[0], mp[0]); |
| 249 | mip[0] = -mip[0]; |
| 250 | } |
| 251 | #endif |
| 252 | else |
| 253 | { |
| 254 | mip = TMP_ALLOC_LIMBS (n); |
| 255 | mpn_binvert (mip, mp, n, tp); |
| 256 | } |
| 257 | |
| 258 | pp = TMP_ALLOC_LIMBS (n << (windowsize - 1)); |
| 259 | |
| 260 | this_pp = pp; |
| 261 | redcify (this_pp, bp, bn, mp, n); |
| 262 | |
| 263 | /* Store b^2 at rp. */ |
| 264 | mpn_sqr (tp, this_pp, n); |
| 265 | #if 0 |
| 266 | if (n == 1) { |
| 267 | MPN_REDC_0 (rp, tp, mp, mip[0]); |
| 268 | } else |
| 269 | #endif |
| 270 | #if WANT_REDC_2 |
| 271 | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
| 272 | MPN_REDC_1 (rp, tp, mp, n, mip[0]); |
| 273 | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
| 274 | MPN_REDC_2 (rp, tp, mp, n, mip); |
| 275 | #else |
| 276 | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
| 277 | MPN_REDC_1 (rp, tp, mp, n, mip[0]); |
| 278 | #endif |
| 279 | else |
| 280 | mpn_redc_n (rp, tp, mp, n, mip); |
| 281 | |
| 282 | /* Precompute odd powers of b and put them in the temporary area at pp. */ |
| 283 | for (i = (1 << (windowsize - 1)) - 1; i > 0; i--) |
| 284 | #if 1 |
| 285 | if (n == 1) { |
| 286 | umul_ppmm((tp)[1], *(tp), *(this_pp), *(rp)); |
| 287 | ++this_pp ; |
| 288 | MPN_REDC_0 (this_pp, tp, mp, mip[0]); |
| 289 | } else |
| 290 | #endif |
| 291 | { |
| 292 | mpn_mul_n (tp, this_pp, rp, n); |
| 293 | this_pp += n; |
| 294 | #if WANT_REDC_2 |
| 295 | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
| 296 | MPN_REDC_1 (this_pp, tp, mp, n, mip[0]); |
| 297 | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
| 298 | MPN_REDC_2 (this_pp, tp, mp, n, mip); |
| 299 | #else |
| 300 | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
| 301 | MPN_REDC_1 (this_pp, tp, mp, n, mip[0]); |
| 302 | #endif |
| 303 | else |
| 304 | mpn_redc_n (this_pp, tp, mp, n, mip); |
| 305 | } |
| 306 | |
| 307 | expbits = getbits (ep, ebi, windowsize); |
| 308 | if (ebi < windowsize) |
| 309 | ebi = 0; |
| 310 | else |
| 311 | ebi -= windowsize; |
| 312 | |
| 313 | count_trailing_zeros (cnt, expbits); |
| 314 | ebi += cnt; |
| 315 | expbits >>= cnt; |
| 316 | |
| 317 | MPN_COPY (rp, pp + n * (expbits >> 1), n); |
| 318 | |
| 319 | #define INNERLOOP \ |
| 320 | while (ebi != 0) \ |
| 321 | { \ |
| 322 | while (getbit (ep, ebi) == 0) \ |
| 323 | { \ |
| 324 | MPN_SQR (tp, rp, n); \ |
| 325 | MPN_REDUCE (rp, tp, mp, n, mip); \ |
| 326 | if (--ebi == 0) \ |
| 327 | goto done; \ |
| 328 | } \ |
| 329 | \ |
| 330 | /* The next bit of the exponent is 1. Now extract the largest \ |
| 331 | block of bits <= windowsize, and such that the least \ |
| 332 | significant bit is 1. */ \ |
| 333 | \ |
| 334 | expbits = getbits (ep, ebi, windowsize); \ |
| 335 | this_windowsize = windowsize; \ |
| 336 | if (ebi < windowsize) \ |
| 337 | { \ |
| 338 | this_windowsize -= windowsize - ebi; \ |
| 339 | ebi = 0; \ |
| 340 | } \ |
| 341 | else \ |
| 342 | ebi -= windowsize; \ |
| 343 | \ |
| 344 | count_trailing_zeros (cnt, expbits); \ |
| 345 | this_windowsize -= cnt; \ |
| 346 | ebi += cnt; \ |
| 347 | expbits >>= cnt; \ |
| 348 | \ |
| 349 | do \ |
| 350 | { \ |
| 351 | MPN_SQR (tp, rp, n); \ |
| 352 | MPN_REDUCE (rp, tp, mp, n, mip); \ |
| 353 | } \ |
| 354 | while (--this_windowsize != 0); \ |
| 355 | \ |
| 356 | MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n); \ |
| 357 | MPN_REDUCE (rp, tp, mp, n, mip); \ |
| 358 | } |
| 359 | |
| 360 | |
| 361 | if (n == 1) |
| 362 | { |
| 363 | #undef MPN_MUL_N |
| 364 | #undef MPN_SQR |
| 365 | #undef MPN_REDUCE |
| 366 | #define MPN_MUL_N(r,a,b,n) umul_ppmm((r)[1], *(r), *(a), *(b)) |
| 367 | #define MPN_SQR(r,a,n) umul_ppmm((r)[1], *(r), *(a), *(a)) |
| 368 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_0(rp, tp, mp, mip[0]) |
| 369 | INNERLOOP; |
| 370 | } |
| 371 | else |
| 372 | #if WANT_REDC_2 |
| 373 | if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD) |
| 374 | { |
| 375 | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
| 376 | { |
| 377 | if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD |
| 378 | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
| 379 | { |
| 380 | #undef MPN_MUL_N |
| 381 | #undef MPN_SQR |
| 382 | #undef MPN_REDUCE |
| 383 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
| 384 | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
| 385 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
| 386 | INNERLOOP; |
| 387 | } |
| 388 | else |
| 389 | { |
| 390 | #undef MPN_MUL_N |
| 391 | #undef MPN_SQR |
| 392 | #undef MPN_REDUCE |
| 393 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
| 394 | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
| 395 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
| 396 | INNERLOOP; |
| 397 | } |
| 398 | } |
| 399 | else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) |
| 400 | { |
| 401 | if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD |
| 402 | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
| 403 | { |
| 404 | #undef MPN_MUL_N |
| 405 | #undef MPN_SQR |
| 406 | #undef MPN_REDUCE |
| 407 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
| 408 | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
| 409 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip) |
| 410 | INNERLOOP; |
| 411 | } |
| 412 | else |
| 413 | { |
| 414 | #undef MPN_MUL_N |
| 415 | #undef MPN_SQR |
| 416 | #undef MPN_REDUCE |
| 417 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
| 418 | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
| 419 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip) |
| 420 | INNERLOOP; |
| 421 | } |
| 422 | } |
| 423 | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
| 424 | { |
| 425 | #undef MPN_MUL_N |
| 426 | #undef MPN_SQR |
| 427 | #undef MPN_REDUCE |
| 428 | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
| 429 | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
| 430 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip) |
| 431 | INNERLOOP; |
| 432 | } |
| 433 | else |
| 434 | { |
| 435 | #undef MPN_MUL_N |
| 436 | #undef MPN_SQR |
| 437 | #undef MPN_REDUCE |
| 438 | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
| 439 | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
| 440 | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
| 441 | INNERLOOP; |
| 442 | } |
| 443 | } |
| 444 | else |
| 445 | { |
| 446 | if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) |
| 447 | { |
| 448 | if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD |
| 449 | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
| 450 | { |
| 451 | #undef MPN_MUL_N |
| 452 | #undef MPN_SQR |
| 453 | #undef MPN_REDUCE |
| 454 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
| 455 | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
| 456 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
| 457 | INNERLOOP; |
| 458 | } |
| 459 | else |
| 460 | { |
| 461 | #undef MPN_MUL_N |
| 462 | #undef MPN_SQR |
| 463 | #undef MPN_REDUCE |
| 464 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
| 465 | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
| 466 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
| 467 | INNERLOOP; |
| 468 | } |
| 469 | } |
| 470 | else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
| 471 | { |
| 472 | #undef MPN_MUL_N |
| 473 | #undef MPN_SQR |
| 474 | #undef MPN_REDUCE |
| 475 | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
| 476 | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
| 477 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
| 478 | INNERLOOP; |
| 479 | } |
| 480 | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
| 481 | { |
| 482 | #undef MPN_MUL_N |
| 483 | #undef MPN_SQR |
| 484 | #undef MPN_REDUCE |
| 485 | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
| 486 | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
| 487 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip) |
| 488 | INNERLOOP; |
| 489 | } |
| 490 | else |
| 491 | { |
| 492 | #undef MPN_MUL_N |
| 493 | #undef MPN_SQR |
| 494 | #undef MPN_REDUCE |
| 495 | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
| 496 | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
| 497 | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
| 498 | INNERLOOP; |
| 499 | } |
| 500 | } |
| 501 | |
| 502 | #else /* WANT_REDC_2 */ |
| 503 | |
| 504 | if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD) |
| 505 | { |
| 506 | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
| 507 | { |
| 508 | if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD |
| 509 | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
| 510 | { |
| 511 | #undef MPN_MUL_N |
| 512 | #undef MPN_SQR |
| 513 | #undef MPN_REDUCE |
| 514 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
| 515 | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
| 516 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
| 517 | INNERLOOP; |
| 518 | } |
| 519 | else |
| 520 | { |
| 521 | #undef MPN_MUL_N |
| 522 | #undef MPN_SQR |
| 523 | #undef MPN_REDUCE |
| 524 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
| 525 | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
| 526 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
| 527 | INNERLOOP; |
| 528 | } |
| 529 | } |
| 530 | else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) |
| 531 | { |
| 532 | if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD |
| 533 | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
| 534 | { |
| 535 | #undef MPN_MUL_N |
| 536 | #undef MPN_SQR |
| 537 | #undef MPN_REDUCE |
| 538 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
| 539 | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
| 540 | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
| 541 | INNERLOOP; |
| 542 | } |
| 543 | else |
| 544 | { |
| 545 | #undef MPN_MUL_N |
| 546 | #undef MPN_SQR |
| 547 | #undef MPN_REDUCE |
| 548 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
| 549 | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
| 550 | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
| 551 | INNERLOOP; |
| 552 | } |
| 553 | } |
| 554 | else |
| 555 | { |
| 556 | #undef MPN_MUL_N |
| 557 | #undef MPN_SQR |
| 558 | #undef MPN_REDUCE |
| 559 | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
| 560 | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
| 561 | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
| 562 | INNERLOOP; |
| 563 | } |
| 564 | } |
| 565 | else |
| 566 | { |
| 567 | if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) |
| 568 | { |
| 569 | if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD |
| 570 | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
| 571 | { |
| 572 | #undef MPN_MUL_N |
| 573 | #undef MPN_SQR |
| 574 | #undef MPN_REDUCE |
| 575 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
| 576 | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
| 577 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
| 578 | INNERLOOP; |
| 579 | } |
| 580 | else |
| 581 | { |
| 582 | #undef MPN_MUL_N |
| 583 | #undef MPN_SQR |
| 584 | #undef MPN_REDUCE |
| 585 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
| 586 | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
| 587 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
| 588 | INNERLOOP; |
| 589 | } |
| 590 | } |
| 591 | else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
| 592 | { |
| 593 | #undef MPN_MUL_N |
| 594 | #undef MPN_SQR |
| 595 | #undef MPN_REDUCE |
| 596 | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
| 597 | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
| 598 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
| 599 | INNERLOOP; |
| 600 | } |
| 601 | else |
| 602 | { |
| 603 | #undef MPN_MUL_N |
| 604 | #undef MPN_SQR |
| 605 | #undef MPN_REDUCE |
| 606 | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
| 607 | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
| 608 | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
| 609 | INNERLOOP; |
| 610 | } |
| 611 | } |
| 612 | #endif /* WANT_REDC_2 */ |
| 613 | |
| 614 | done: |
| 615 | |
| 616 | MPN_COPY (tp, rp, n); |
| 617 | MPN_ZERO (tp + n, n); |
| 618 | |
| 619 | #if WANT_REDC_2 |
| 620 | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
| 621 | MPN_REDC_1 (rp, tp, mp, n, mip[0]); |
| 622 | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
| 623 | MPN_REDC_2 (rp, tp, mp, n, mip); |
| 624 | #else |
| 625 | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
| 626 | MPN_REDC_1 (rp, tp, mp, n, mip[0]); |
| 627 | #endif |
| 628 | else |
| 629 | mpn_redc_n (rp, tp, mp, n, mip); |
| 630 | |
| 631 | if (mpn_cmp (rp, mp, n) >= 0) |
| 632 | mpn_sub_n (rp, rp, mp, n); |
| 633 | |
| 634 | TMP_FREE; |
| 635 | } |