Austin Schuh | dace2a6 | 2020-08-18 10:56:48 -0700 | [diff] [blame] | 1 | /* mpn_brootinv, compute r such that r^k * y = 1 (mod 2^b). |
| 2 | |
| 3 | Contributed to the GNU project by Martin Boij (as part of perfpow.c). |
| 4 | |
| 5 | Copyright 2009, 2010, 2012, 2013, 2018 Free Software Foundation, Inc. |
| 6 | |
| 7 | This file is part of the GNU MP Library. |
| 8 | |
| 9 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 10 | it under the terms of either: |
| 11 | |
| 12 | * the GNU Lesser General Public License as published by the Free |
| 13 | Software Foundation; either version 3 of the License, or (at your |
| 14 | option) any later version. |
| 15 | |
| 16 | or |
| 17 | |
| 18 | * the GNU General Public License as published by the Free Software |
| 19 | Foundation; either version 2 of the License, or (at your option) any |
| 20 | later version. |
| 21 | |
| 22 | or both in parallel, as here. |
| 23 | |
| 24 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 25 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 26 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 27 | for more details. |
| 28 | |
| 29 | You should have received copies of the GNU General Public License and the |
| 30 | GNU Lesser General Public License along with the GNU MP Library. If not, |
| 31 | see https://www.gnu.org/licenses/. */ |
| 32 | |
| 33 | #include "gmp-impl.h" |
| 34 | |
| 35 | /* Computes a^2e (mod B). Uses right-to-left binary algorithm, since |
| 36 | typical use will have e small. */ |
| 37 | static mp_limb_t |
| 38 | powsquaredlimb (mp_limb_t a, mp_limb_t e) |
| 39 | { |
| 40 | mp_limb_t r; |
| 41 | |
| 42 | r = 1; |
| 43 | /* if (LIKELY (e != 0)) */ |
| 44 | do { |
| 45 | a *= a; |
| 46 | if (e & 1) |
| 47 | r *= a; |
| 48 | e >>= 1; |
| 49 | } while (e != 0); |
| 50 | |
| 51 | return r; |
| 52 | } |
| 53 | |
| 54 | /* Compute r such that r^k * y = 1 (mod B^n). |
| 55 | |
| 56 | Iterates |
| 57 | r' <-- k^{-1} ((k+1) r - r^{k+1} y) (mod 2^b) |
| 58 | using Hensel lifting, each time doubling the number of known bits in r. |
| 59 | |
| 60 | Works just for odd k. Else the Hensel lifting degenerates. |
| 61 | |
| 62 | FIXME: |
| 63 | |
| 64 | (1) Make it work for k == GMP_LIMB_MAX (k+1 below overflows). |
| 65 | |
| 66 | (2) Rewrite iteration as |
| 67 | r' <-- r - k^{-1} r (r^k y - 1) |
| 68 | and take advantage of the zero low part of r^k y - 1. |
| 69 | |
| 70 | (3) Use wrap-around trick. |
| 71 | |
| 72 | (4) Use a small table to get starting value. |
| 73 | |
| 74 | Scratch need: bn + (((bn + 1) >> 1) + 1) + scratch for mpn_powlo |
| 75 | Currently mpn_powlo requires 3*bn |
| 76 | so that 5*bn is surely enough, where bn = ceil (bnb / GMP_NUMB_BITS). |
| 77 | */ |
| 78 | |
| 79 | void |
| 80 | mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_size_t bn, mp_limb_t k, mp_ptr tp) |
| 81 | { |
| 82 | mp_ptr tp2, tp3; |
| 83 | mp_limb_t kinv, k2, r0, y0; |
| 84 | mp_size_t order[GMP_LIMB_BITS + 1]; |
| 85 | int d; |
| 86 | |
| 87 | ASSERT (bn > 0); |
| 88 | ASSERT ((k & 1) != 0); |
| 89 | |
| 90 | tp2 = tp + bn; |
| 91 | tp3 = tp + bn + ((bn + 3) >> 1); |
| 92 | k2 = (k >> 1) + 1; /* (k + 1) / 2 , but avoid k+1 overflow */ |
| 93 | |
| 94 | binvert_limb (kinv, k); |
| 95 | |
| 96 | /* 4-bit initial approximation: |
| 97 | |
| 98 | y%16 | 1 3 5 7 9 11 13 15, |
| 99 | k%4 +-------------------------+k2%2 |
| 100 | 1 | 1 11 13 7 9 3 5 15 | 1 |
| 101 | 3 | 1 3 5 7 9 11 13 15 | 0 |
| 102 | |
| 103 | */ |
| 104 | y0 = yp[0]; |
| 105 | |
| 106 | r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & (k2 << 3) & 8); /* 4 bits */ |
| 107 | r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2 & 0x3f)); /* 8 bits */ |
| 108 | r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2 & 0x3fff)); /* 16 bits */ |
| 109 | #if GMP_NUMB_BITS > 16 |
| 110 | { |
| 111 | unsigned prec = 16; |
| 112 | do |
| 113 | { |
| 114 | r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2)); |
| 115 | prec *= 2; |
| 116 | } |
| 117 | while (prec < GMP_NUMB_BITS); |
| 118 | } |
| 119 | #endif |
| 120 | |
| 121 | rp[0] = r0; |
| 122 | if (bn == 1) |
| 123 | return; |
| 124 | |
| 125 | d = 0; |
| 126 | for (; bn != 2; bn = (bn + 1) >> 1) |
| 127 | order[d++] = bn; |
| 128 | |
| 129 | order[d] = 2; |
| 130 | bn = 1; |
| 131 | |
| 132 | do |
| 133 | { |
| 134 | mpn_sqr (tp, rp, bn); /* Result may overlap tp2 */ |
| 135 | tp2[bn] = mpn_mul_1 (tp2, rp, bn, k2 << 1); |
| 136 | |
| 137 | bn = order[d]; |
| 138 | |
| 139 | mpn_powlo (rp, tp, &k2, 1, bn, tp3); |
| 140 | mpn_mullo_n (tp, yp, rp, bn); |
| 141 | |
| 142 | /* mpn_sub (tp, tp2, ((bn + 1) >> 1) + 1, tp, bn); */ |
| 143 | /* The function above is not handled, ((bn + 1) >> 1) + 1 <= bn*/ |
| 144 | { |
| 145 | mp_size_t pbn = (bn + 3) >> 1; /* Size of tp2 */ |
| 146 | int borrow; |
| 147 | borrow = mpn_sub_n (tp, tp2, tp, pbn) != 0; |
| 148 | if (bn > pbn) /* 3 < bn */ |
| 149 | { |
| 150 | if (borrow) |
| 151 | mpn_com (tp + pbn, tp + pbn, bn - pbn); |
| 152 | else |
| 153 | mpn_neg (tp + pbn, tp + pbn, bn - pbn); |
| 154 | } |
| 155 | } |
| 156 | mpn_pi1_bdiv_q_1 (rp, tp, bn, k, kinv, 0); |
| 157 | } |
| 158 | while (--d >= 0); |
| 159 | } |