Austin Schuh | bb1338c | 2024-06-15 19:31:16 -0700 | [diff] [blame^] | 1 | /* mpn_jacobi_base -- limb/limb Jacobi symbol with restricted arguments. |
| 2 | |
| 3 | THIS INTERFACE IS PRELIMINARY AND MIGHT DISAPPEAR OR BE SUBJECT TO |
| 4 | INCOMPATIBLE CHANGES IN A FUTURE RELEASE OF GMP. |
| 5 | |
| 6 | Copyright 1999-2002, 2010 Free Software Foundation, Inc. |
| 7 | |
| 8 | This file is part of the GNU MP Library. |
| 9 | |
| 10 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 11 | it under the terms of either: |
| 12 | |
| 13 | * the GNU Lesser General Public License as published by the Free |
| 14 | Software Foundation; either version 3 of the License, or (at your |
| 15 | option) any later version. |
| 16 | |
| 17 | or |
| 18 | |
| 19 | * the GNU General Public License as published by the Free Software |
| 20 | Foundation; either version 2 of the License, or (at your option) any |
| 21 | later version. |
| 22 | |
| 23 | or both in parallel, as here. |
| 24 | |
| 25 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 26 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 27 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 28 | for more details. |
| 29 | |
| 30 | You should have received copies of the GNU General Public License and the |
| 31 | GNU Lesser General Public License along with the GNU MP Library. If not, |
| 32 | see https://www.gnu.org/licenses/. */ |
| 33 | |
| 34 | #include "gmp-impl.h" |
| 35 | #include "longlong.h" |
| 36 | |
| 37 | |
| 38 | /* Use the simple loop by default. The generic count_trailing_zeros is not |
| 39 | very fast, and the extra trickery of method 3 has proven to be less use |
| 40 | than might have been though. */ |
| 41 | #ifndef JACOBI_BASE_METHOD |
| 42 | #define JACOBI_BASE_METHOD 2 |
| 43 | #endif |
| 44 | |
| 45 | |
| 46 | /* Use count_trailing_zeros. */ |
| 47 | #if JACOBI_BASE_METHOD == 1 |
| 48 | #define PROCESS_TWOS_ANY \ |
| 49 | { \ |
| 50 | mp_limb_t twos; \ |
| 51 | count_trailing_zeros (twos, a); \ |
| 52 | result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b); \ |
| 53 | a >>= twos; \ |
| 54 | } |
| 55 | #define PROCESS_TWOS_EVEN PROCESS_TWOS_ANY |
| 56 | #endif |
| 57 | |
| 58 | /* Use a simple loop. A disadvantage of this is that there's a branch on a |
| 59 | 50/50 chance of a 0 or 1 low bit. */ |
| 60 | #if JACOBI_BASE_METHOD == 2 |
| 61 | #define PROCESS_TWOS_EVEN \ |
| 62 | { \ |
| 63 | int two; \ |
| 64 | two = JACOBI_TWO_U_BIT1 (b); \ |
| 65 | do \ |
| 66 | { \ |
| 67 | a >>= 1; \ |
| 68 | result_bit1 ^= two; \ |
| 69 | ASSERT (a != 0); \ |
| 70 | } \ |
| 71 | while ((a & 1) == 0); \ |
| 72 | } |
| 73 | #define PROCESS_TWOS_ANY \ |
| 74 | if ((a & 1) == 0) \ |
| 75 | PROCESS_TWOS_EVEN; |
| 76 | #endif |
| 77 | |
| 78 | /* Process one bit arithmetically, then a simple loop. This cuts the loop |
| 79 | condition down to a 25/75 chance, which should branch predict better. |
| 80 | The CPU will need a reasonable variable left shift. */ |
| 81 | #if JACOBI_BASE_METHOD == 3 |
| 82 | #define PROCESS_TWOS_EVEN \ |
| 83 | { \ |
| 84 | int two, mask, shift; \ |
| 85 | \ |
| 86 | two = JACOBI_TWO_U_BIT1 (b); \ |
| 87 | mask = (~a & 2); \ |
| 88 | a >>= 1; \ |
| 89 | \ |
| 90 | shift = (~a & 1); \ |
| 91 | a >>= shift; \ |
| 92 | result_bit1 ^= two ^ (two & mask); \ |
| 93 | \ |
| 94 | while ((a & 1) == 0) \ |
| 95 | { \ |
| 96 | a >>= 1; \ |
| 97 | result_bit1 ^= two; \ |
| 98 | ASSERT (a != 0); \ |
| 99 | } \ |
| 100 | } |
| 101 | #define PROCESS_TWOS_ANY \ |
| 102 | { \ |
| 103 | int two, mask, shift; \ |
| 104 | \ |
| 105 | two = JACOBI_TWO_U_BIT1 (b); \ |
| 106 | shift = (~a & 1); \ |
| 107 | a >>= shift; \ |
| 108 | \ |
| 109 | mask = shift << 1; \ |
| 110 | result_bit1 ^= (two & mask); \ |
| 111 | \ |
| 112 | while ((a & 1) == 0) \ |
| 113 | { \ |
| 114 | a >>= 1; \ |
| 115 | result_bit1 ^= two; \ |
| 116 | ASSERT (a != 0); \ |
| 117 | } \ |
| 118 | } |
| 119 | #endif |
| 120 | |
| 121 | #if JACOBI_BASE_METHOD < 4 |
| 122 | /* Calculate the value of the Jacobi symbol (a/b) of two mp_limb_t's, but |
| 123 | with a restricted range of inputs accepted, namely b>1, b odd. |
| 124 | |
| 125 | The initial result_bit1 is taken as a parameter for the convenience of |
| 126 | mpz_kronecker_ui() et al. The sign changes both here and in those |
| 127 | routines accumulate nicely in bit 1, see the JACOBI macros. |
| 128 | |
| 129 | The return value here is the normal +1, 0, or -1. Note that +1 and -1 |
| 130 | have bit 1 in the "BIT1" sense, which could be useful if the caller is |
| 131 | accumulating it into some extended calculation. |
| 132 | |
| 133 | Duplicating the loop body to avoid the MP_LIMB_T_SWAP(a,b) would be |
| 134 | possible, but a couple of tests suggest it's not a significant speedup, |
| 135 | and may even be a slowdown, so what's here is good enough for now. */ |
| 136 | |
| 137 | int |
| 138 | mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1) |
| 139 | { |
| 140 | ASSERT (b & 1); /* b odd */ |
| 141 | ASSERT (b != 1); |
| 142 | |
| 143 | if (a == 0) |
| 144 | return 0; |
| 145 | |
| 146 | PROCESS_TWOS_ANY; |
| 147 | if (a == 1) |
| 148 | goto done; |
| 149 | |
| 150 | if (a >= b) |
| 151 | goto a_gt_b; |
| 152 | |
| 153 | for (;;) |
| 154 | { |
| 155 | result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b); |
| 156 | MP_LIMB_T_SWAP (a, b); |
| 157 | |
| 158 | a_gt_b: |
| 159 | do |
| 160 | { |
| 161 | /* working on (a/b), a,b odd, a>=b */ |
| 162 | ASSERT (a & 1); |
| 163 | ASSERT (b & 1); |
| 164 | ASSERT (a >= b); |
| 165 | |
| 166 | if ((a -= b) == 0) |
| 167 | return 0; |
| 168 | |
| 169 | PROCESS_TWOS_EVEN; |
| 170 | if (a == 1) |
| 171 | goto done; |
| 172 | } |
| 173 | while (a >= b); |
| 174 | } |
| 175 | |
| 176 | done: |
| 177 | return JACOBI_BIT1_TO_PN (result_bit1); |
| 178 | } |
| 179 | #endif |
| 180 | |
| 181 | #if JACOBI_BASE_METHOD == 4 |
| 182 | /* Computes (a/b) for odd b > 1 and any a. The initial bit is taken as a |
| 183 | * parameter. We have no need for the convention that the sign is in |
| 184 | * bit 1, internally we use bit 0. */ |
| 185 | |
| 186 | /* FIXME: Could try table-based count_trailing_zeros. */ |
| 187 | int |
| 188 | mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int bit) |
| 189 | { |
| 190 | int c; |
| 191 | |
| 192 | ASSERT (b & 1); |
| 193 | ASSERT (b > 1); |
| 194 | |
| 195 | if (a == 0) |
| 196 | /* This is the only line which depends on b > 1 */ |
| 197 | return 0; |
| 198 | |
| 199 | bit >>= 1; |
| 200 | |
| 201 | /* Below, we represent a and b shifted right so that the least |
| 202 | significant one bit is implicit. */ |
| 203 | |
| 204 | b >>= 1; |
| 205 | |
| 206 | count_trailing_zeros (c, a); |
| 207 | bit ^= c & (b ^ (b >> 1)); |
| 208 | |
| 209 | /* We may have c==GMP_LIMB_BITS-1, so we can't use a>>c+1. */ |
| 210 | a >>= c; |
| 211 | a >>= 1; |
| 212 | |
| 213 | do |
| 214 | { |
| 215 | mp_limb_t t = a - b; |
| 216 | mp_limb_t bgta = LIMB_HIGHBIT_TO_MASK (t); |
| 217 | |
| 218 | if (t == 0) |
| 219 | return 0; |
| 220 | |
| 221 | /* If b > a, invoke reciprocity */ |
| 222 | bit ^= (bgta & a & b); |
| 223 | |
| 224 | /* b <-- min (a, b) */ |
| 225 | b += (bgta & t); |
| 226 | |
| 227 | /* a <-- |a - b| */ |
| 228 | a = (t ^ bgta) - bgta; |
| 229 | |
| 230 | /* Number of trailing zeros is the same no matter if we look at |
| 231 | * t or a, but using t gives more parallelism. */ |
| 232 | count_trailing_zeros (c, t); |
| 233 | c ++; |
| 234 | /* (2/b) = -1 if b = 3 or 5 mod 8 */ |
| 235 | bit ^= c & (b ^ (b >> 1)); |
| 236 | a >>= c; |
| 237 | } |
| 238 | while (b > 0); |
| 239 | |
| 240 | return 1-2*(bit & 1); |
| 241 | } |
| 242 | #endif /* JACOBI_BASE_METHOD == 4 */ |