Austin Schuh | dace2a6 | 2020-08-18 10:56:48 -0700 | [diff] [blame^] | 1 | /* mpn_broot -- Compute hensel sqrt |
| 2 | |
| 3 | Contributed to the GNU project by Niels Möller |
| 4 | |
| 5 | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
| 6 | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
| 7 | GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. |
| 8 | |
| 9 | Copyright 2012 Free Software Foundation, Inc. |
| 10 | |
| 11 | This file is part of the GNU MP Library. |
| 12 | |
| 13 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 14 | it under the terms of either: |
| 15 | |
| 16 | * the GNU Lesser General Public License as published by the Free |
| 17 | Software Foundation; either version 3 of the License, or (at your |
| 18 | option) any later version. |
| 19 | |
| 20 | or |
| 21 | |
| 22 | * the GNU General Public License as published by the Free Software |
| 23 | Foundation; either version 2 of the License, or (at your option) any |
| 24 | later version. |
| 25 | |
| 26 | or both in parallel, as here. |
| 27 | |
| 28 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 29 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 30 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 31 | for more details. |
| 32 | |
| 33 | You should have received copies of the GNU General Public License and the |
| 34 | GNU Lesser General Public License along with the GNU MP Library. If not, |
| 35 | see https://www.gnu.org/licenses/. */ |
| 36 | |
| 37 | #include "gmp-impl.h" |
| 38 | |
| 39 | /* Computes a^e (mod B). Uses right-to-left binary algorithm, since |
| 40 | typical use will have e small. */ |
| 41 | static mp_limb_t |
| 42 | powlimb (mp_limb_t a, mp_limb_t e) |
| 43 | { |
| 44 | mp_limb_t r = 1; |
| 45 | mp_limb_t s = a; |
| 46 | |
| 47 | for (r = 1, s = a; e > 0; e >>= 1, s *= s) |
| 48 | if (e & 1) |
| 49 | r *= s; |
| 50 | |
| 51 | return r; |
| 52 | } |
| 53 | |
| 54 | /* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd. |
| 55 | |
| 56 | Iterates |
| 57 | |
| 58 | r' <-- r - r * (a^{k-1} r^k - 1) / n |
| 59 | |
| 60 | If |
| 61 | |
| 62 | a^{k-1} r^k = 1 (mod 2^m), |
| 63 | |
| 64 | then |
| 65 | |
| 66 | a^{k-1} r'^k = 1 (mod 2^{2m}), |
| 67 | |
| 68 | Compute the update term as |
| 69 | |
| 70 | r' = r - (a^{k-1} r^{k+1} - r) / k |
| 71 | |
| 72 | where we still have cancellation of low limbs. |
| 73 | |
| 74 | */ |
| 75 | void |
| 76 | mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k) |
| 77 | { |
| 78 | mp_size_t sizes[GMP_LIMB_BITS * 2]; |
| 79 | mp_ptr akm1, tp, rnp, ep; |
| 80 | mp_limb_t a0, r0, km1, kp1h, kinv; |
| 81 | mp_size_t rn; |
| 82 | unsigned i; |
| 83 | |
| 84 | TMP_DECL; |
| 85 | |
| 86 | ASSERT (n > 0); |
| 87 | ASSERT (ap[0] & 1); |
| 88 | ASSERT (k & 1); |
| 89 | ASSERT (k >= 3); |
| 90 | |
| 91 | TMP_MARK; |
| 92 | |
| 93 | akm1 = TMP_ALLOC_LIMBS (4*n); |
| 94 | tp = akm1 + n; |
| 95 | |
| 96 | km1 = k-1; |
| 97 | /* FIXME: Could arrange the iteration so we don't need to compute |
| 98 | this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note |
| 99 | that we can use wraparound also for a*r, since the low half is |
| 100 | unchanged from the previous iteration. Or possibly mulmid. Also, |
| 101 | a r = a^{1/k}, so we get that value too, for free? */ |
| 102 | mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */ |
| 103 | |
| 104 | a0 = ap[0]; |
| 105 | binvert_limb (kinv, k); |
| 106 | |
| 107 | /* 4 bits: a^{1/k - 1} (mod 16): |
| 108 | |
| 109 | a % 8 |
| 110 | 1 3 5 7 |
| 111 | k%4 +------- |
| 112 | 1 |1 1 1 1 |
| 113 | 3 |1 9 9 1 |
| 114 | */ |
| 115 | r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8); |
| 116 | r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */ |
| 117 | r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */ |
| 118 | r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */ |
| 119 | #if GMP_NUMB_BITS > 32 |
| 120 | { |
| 121 | unsigned prec = 32; |
| 122 | do |
| 123 | { |
| 124 | r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); |
| 125 | prec *= 2; |
| 126 | } |
| 127 | while (prec < GMP_NUMB_BITS); |
| 128 | } |
| 129 | #endif |
| 130 | |
| 131 | rp[0] = r0; |
| 132 | if (n == 1) |
| 133 | { |
| 134 | TMP_FREE; |
| 135 | return; |
| 136 | } |
| 137 | |
| 138 | /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */ |
| 139 | kp1h = k/2 + 1; |
| 140 | |
| 141 | /* FIXME: Special case for two limb iteration. */ |
| 142 | rnp = TMP_ALLOC_LIMBS (2*n + 1); |
| 143 | ep = rnp + n; |
| 144 | |
| 145 | /* FIXME: Possible to this on the fly with some bit fiddling. */ |
| 146 | for (i = 0; n > 1; n = (n + 1)/2) |
| 147 | sizes[i++] = n; |
| 148 | |
| 149 | rn = 1; |
| 150 | |
| 151 | while (i-- > 0) |
| 152 | { |
| 153 | /* Compute x^{k+1}. */ |
| 154 | mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the |
| 155 | final iteration. */ |
| 156 | mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp); |
| 157 | |
| 158 | /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */ |
| 159 | |
| 160 | mpn_mullo_n (ep, rnp, akm1, sizes[i]); |
| 161 | ASSERT (mpn_cmp (ep, rp, rn) == 0); |
| 162 | |
| 163 | ASSERT (sizes[i] <= 2*rn); |
| 164 | mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0); |
| 165 | mpn_neg (rp + rn, rp + rn, sizes[i] - rn); |
| 166 | rn = sizes[i]; |
| 167 | } |
| 168 | TMP_FREE; |
| 169 | } |
| 170 | |
| 171 | /* Computes a^{1/k} (mod B^n). Both a and k must be odd. */ |
| 172 | void |
| 173 | mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k) |
| 174 | { |
| 175 | mp_ptr tp; |
| 176 | TMP_DECL; |
| 177 | |
| 178 | ASSERT (n > 0); |
| 179 | ASSERT (ap[0] & 1); |
| 180 | ASSERT (k & 1); |
| 181 | |
| 182 | if (k == 1) |
| 183 | { |
| 184 | MPN_COPY (rp, ap, n); |
| 185 | return; |
| 186 | } |
| 187 | |
| 188 | TMP_MARK; |
| 189 | tp = TMP_ALLOC_LIMBS (n); |
| 190 | |
| 191 | mpn_broot_invm1 (tp, ap, n, k); |
| 192 | mpn_mullo_n (rp, tp, ap, n); |
| 193 | |
| 194 | TMP_FREE; |
| 195 | } |