Austin Schuh | dace2a6 | 2020-08-18 10:56:48 -0700 | [diff] [blame] | 1 | /* mpn_powlo -- Compute R = U^E mod B^n, where B is the limb base. |
| 2 | |
| 3 | Copyright 2007-2009, 2012, 2015, 2016, 2018 Free Software Foundation, Inc. |
| 4 | |
| 5 | This file is part of the GNU MP Library. |
| 6 | |
| 7 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 8 | it under the terms of either: |
| 9 | |
| 10 | * the GNU Lesser General Public License as published by the Free |
| 11 | Software Foundation; either version 3 of the License, or (at your |
| 12 | option) any later version. |
| 13 | |
| 14 | or |
| 15 | |
| 16 | * the GNU General Public License as published by the Free Software |
| 17 | Foundation; either version 2 of the License, or (at your option) any |
| 18 | later version. |
| 19 | |
| 20 | or both in parallel, as here. |
| 21 | |
| 22 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 23 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 24 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 25 | for more details. |
| 26 | |
| 27 | You should have received copies of the GNU General Public License and the |
| 28 | GNU Lesser General Public License along with the GNU MP Library. If not, |
| 29 | see https://www.gnu.org/licenses/. */ |
| 30 | |
| 31 | |
| 32 | #include "gmp-impl.h" |
| 33 | #include "longlong.h" |
| 34 | |
| 35 | |
| 36 | #define getbit(p,bi) \ |
| 37 | ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1) |
| 38 | |
| 39 | static inline mp_limb_t |
| 40 | getbits (const mp_limb_t *p, mp_bitcnt_t bi, unsigned nbits) |
| 41 | { |
| 42 | unsigned nbits_in_r; |
| 43 | mp_limb_t r; |
| 44 | mp_size_t i; |
| 45 | |
| 46 | if (bi < nbits) |
| 47 | { |
| 48 | return p[0] & (((mp_limb_t) 1 << bi) - 1); |
| 49 | } |
| 50 | else |
| 51 | { |
| 52 | bi -= nbits; /* bit index of low bit to extract */ |
| 53 | i = bi / GMP_NUMB_BITS; /* word index of low bit to extract */ |
| 54 | bi %= GMP_NUMB_BITS; /* bit index in low word */ |
| 55 | r = p[i] >> bi; /* extract (low) bits */ |
| 56 | nbits_in_r = GMP_NUMB_BITS - bi; /* number of bits now in r */ |
| 57 | if (nbits_in_r < nbits) /* did we get enough bits? */ |
| 58 | r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */ |
| 59 | return r & (((mp_limb_t ) 1 << nbits) - 1); |
| 60 | } |
| 61 | } |
| 62 | |
| 63 | static inline unsigned |
| 64 | win_size (mp_bitcnt_t eb) |
| 65 | { |
| 66 | unsigned k; |
| 67 | static mp_bitcnt_t x[] = {7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0}; |
| 68 | ASSERT (eb > 1); |
| 69 | for (k = 0; eb > x[k++];) |
| 70 | ; |
| 71 | return k; |
| 72 | } |
| 73 | |
| 74 | /* rp[n-1..0] = bp[n-1..0] ^ ep[en-1..0] mod B^n, B is the limb base. |
| 75 | Requires that ep[en-1] is non-zero. |
| 76 | Uses scratch space tp[3n-1..0], i.e., 3n words. */ |
| 77 | /* We only use n words in the scratch space, we should pass tp + n to |
| 78 | mullo/sqrlo as a temporary area, it is needed. */ |
| 79 | void |
| 80 | mpn_powlo (mp_ptr rp, mp_srcptr bp, |
| 81 | mp_srcptr ep, mp_size_t en, |
| 82 | mp_size_t n, mp_ptr tp) |
| 83 | { |
| 84 | unsigned cnt; |
| 85 | mp_bitcnt_t ebi; |
| 86 | unsigned windowsize, this_windowsize; |
| 87 | mp_limb_t expbits; |
| 88 | mp_limb_t *pp; |
| 89 | long i; |
| 90 | int flipflop; |
| 91 | TMP_DECL; |
| 92 | |
| 93 | ASSERT (en > 1 || (en == 1 && ep[0] > 1)); |
| 94 | |
| 95 | TMP_MARK; |
| 96 | |
| 97 | MPN_SIZEINBASE_2EXP(ebi, ep, en, 1); |
| 98 | |
| 99 | windowsize = win_size (ebi); |
| 100 | if (windowsize > 1) |
| 101 | { |
| 102 | mp_limb_t *this_pp, *last_pp; |
| 103 | ASSERT (windowsize < ebi); |
| 104 | |
| 105 | pp = TMP_ALLOC_LIMBS ((n << (windowsize - 1))); |
| 106 | |
| 107 | this_pp = pp; |
| 108 | |
| 109 | MPN_COPY (this_pp, bp, n); |
| 110 | |
| 111 | /* Store b^2 in tp. */ |
| 112 | mpn_sqrlo (tp, bp, n); |
| 113 | |
| 114 | /* Precompute odd powers of b and put them in the temporary area at pp. */ |
| 115 | i = (1 << (windowsize - 1)) - 1; |
| 116 | do |
| 117 | { |
| 118 | last_pp = this_pp; |
| 119 | this_pp += n; |
| 120 | mpn_mullo_n (this_pp, last_pp, tp, n); |
| 121 | } while (--i != 0); |
| 122 | |
| 123 | expbits = getbits (ep, ebi, windowsize); |
| 124 | |
| 125 | /* THINK: Should we initialise the case expbits % 4 == 0 with a mullo? */ |
| 126 | count_trailing_zeros (cnt, expbits); |
| 127 | ebi -= windowsize; |
| 128 | ebi += cnt; |
| 129 | expbits >>= cnt; |
| 130 | |
| 131 | MPN_COPY (rp, pp + n * (expbits >> 1), n); |
| 132 | } |
| 133 | else |
| 134 | { |
| 135 | pp = tp + n; |
| 136 | MPN_COPY (pp, bp, n); |
| 137 | MPN_COPY (rp, bp, n); |
| 138 | --ebi; |
| 139 | } |
| 140 | |
| 141 | flipflop = 0; |
| 142 | |
| 143 | do |
| 144 | { |
| 145 | while (getbit (ep, ebi) == 0) |
| 146 | { |
| 147 | mpn_sqrlo (tp, rp, n); |
| 148 | MP_PTR_SWAP (rp, tp); |
| 149 | flipflop = ! flipflop; |
| 150 | if (--ebi == 0) |
| 151 | goto done; |
| 152 | } |
| 153 | |
| 154 | /* The next bit of the exponent is 1. Now extract the largest block of |
| 155 | bits <= windowsize, and such that the least significant bit is 1. */ |
| 156 | |
| 157 | expbits = getbits (ep, ebi, windowsize); |
| 158 | this_windowsize = MIN (windowsize, ebi); |
| 159 | ebi -= this_windowsize; |
| 160 | |
| 161 | count_trailing_zeros (cnt, expbits); |
| 162 | this_windowsize -= cnt; |
| 163 | ebi += cnt; |
| 164 | expbits >>= cnt; |
| 165 | |
| 166 | while (this_windowsize > 1) |
| 167 | { |
| 168 | mpn_sqrlo (tp, rp, n); |
| 169 | mpn_sqrlo (rp, tp, n); |
| 170 | this_windowsize -= 2; |
| 171 | } |
| 172 | |
| 173 | if (this_windowsize != 0) |
| 174 | mpn_sqrlo (tp, rp, n); |
| 175 | else |
| 176 | { |
| 177 | MP_PTR_SWAP (rp, tp); |
| 178 | flipflop = ! flipflop; |
| 179 | } |
| 180 | |
| 181 | mpn_mullo_n (rp, tp, pp + n * (expbits >> 1), n); |
| 182 | } while (ebi != 0); |
| 183 | |
| 184 | done: |
| 185 | if (flipflop) |
| 186 | MPN_COPY (tp, rp, n); |
| 187 | TMP_FREE; |
| 188 | } |