Austin Schuh | dace2a6 | 2020-08-18 10:56:48 -0700 | [diff] [blame] | 1 | /* jacobi.c |
| 2 | |
| 3 | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
| 4 | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
| 5 | GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
| 6 | |
| 7 | Copyright 1996, 1998, 2000-2004, 2008, 2010, 2011 Free Software Foundation, |
| 8 | Inc. |
| 9 | |
| 10 | This file is part of the GNU MP Library. |
| 11 | |
| 12 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 13 | it under the terms of either: |
| 14 | |
| 15 | * the GNU Lesser General Public License as published by the Free |
| 16 | Software Foundation; either version 3 of the License, or (at your |
| 17 | option) any later version. |
| 18 | |
| 19 | or |
| 20 | |
| 21 | * the GNU General Public License as published by the Free Software |
| 22 | Foundation; either version 2 of the License, or (at your option) any |
| 23 | later version. |
| 24 | |
| 25 | or both in parallel, as here. |
| 26 | |
| 27 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 28 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 29 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 30 | for more details. |
| 31 | |
| 32 | You should have received copies of the GNU General Public License and the |
| 33 | GNU Lesser General Public License along with the GNU MP Library. If not, |
| 34 | see https://www.gnu.org/licenses/. */ |
| 35 | |
| 36 | #include "gmp-impl.h" |
| 37 | #include "longlong.h" |
| 38 | |
| 39 | #ifndef JACOBI_DC_THRESHOLD |
| 40 | #define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD |
| 41 | #endif |
| 42 | |
| 43 | /* Schönhage's rules: |
| 44 | * |
| 45 | * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3 |
| 46 | * |
| 47 | * If r1 is odd, then |
| 48 | * |
| 49 | * (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1) |
| 50 | * |
| 51 | * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)]. |
| 52 | * |
| 53 | * If r1 is even, r2 must be odd. We have |
| 54 | * |
| 55 | * (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0) |
| 56 | * = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1) |
| 57 | * = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1) |
| 58 | * |
| 59 | * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating |
| 60 | * q1 times gives |
| 61 | * |
| 62 | * (r1 | r0) = (r1 | r2) = (r3 | r2) |
| 63 | * |
| 64 | * On the other hand, if r1 = 2 (mod 4), the sign factor is |
| 65 | * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent |
| 66 | * |
| 67 | * (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2 |
| 68 | * = q1 (r0-1)/2 + q1 (q1-1)/2 |
| 69 | * |
| 70 | * and we can summarize the even case as |
| 71 | * |
| 72 | * (r1 | r0) = t(r1, r0, q1) (r3 | r2) |
| 73 | * |
| 74 | * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)} |
| 75 | * |
| 76 | * What about termination? The remainder sequence ends with (0|1) = 1 |
| 77 | * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is |
| 78 | * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and |
| 79 | * hence non-zero. We may have r3 = r1 - q2 r2 = 0. |
| 80 | * |
| 81 | * Examples: (11|15) = - (15|11) = - (4|11) |
| 82 | * (4|11) = (4| 3) = (1| 3) |
| 83 | * (1| 3) = (3|1) = (0|1) = 1 |
| 84 | * |
| 85 | * (2|7) = (2|1) = (0|1) = 1 |
| 86 | * |
| 87 | * Detail: (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5) |
| 88 | * (2|5) = (2-5|5) = (-1|5)(3|5) = (5|3) = (2|3) |
| 89 | * (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1) |
| 90 | * |
| 91 | */ |
| 92 | |
| 93 | /* In principle, the state consists of four variables: e (one bit), a, |
| 94 | b (two bits each), d (one bit). Collected factors are (-1)^e. a and |
| 95 | b are the least significant bits of the current remainders. d |
| 96 | (denominator) is 0 if we're currently subtracting multiplies of a |
| 97 | from b, and 1 if we're subtracting b from a. |
| 98 | |
| 99 | e is stored in the least significant bit, while a, b and d are |
| 100 | coded as only 13 distinct values in bits 1-4, according to the |
| 101 | following table. For rows not mentioning d, the value is either |
| 102 | implied, or it doesn't matter. */ |
| 103 | |
| 104 | #if WANT_ASSERT |
| 105 | static const struct |
| 106 | { |
| 107 | unsigned char a; |
| 108 | unsigned char b; |
| 109 | } decode_table[13] = { |
| 110 | /* 0 */ { 0, 1 }, |
| 111 | /* 1 */ { 0, 3 }, |
| 112 | /* 2 */ { 1, 1 }, |
| 113 | /* 3 */ { 1, 3 }, |
| 114 | /* 4 */ { 2, 1 }, |
| 115 | /* 5 */ { 2, 3 }, |
| 116 | /* 6 */ { 3, 1 }, |
| 117 | /* 7 */ { 3, 3 }, /* d = 1 */ |
| 118 | /* 8 */ { 1, 0 }, |
| 119 | /* 9 */ { 1, 2 }, |
| 120 | /* 10 */ { 3, 0 }, |
| 121 | /* 11 */ { 3, 2 }, |
| 122 | /* 12 */ { 3, 3 }, /* d = 0 */ |
| 123 | }; |
| 124 | #define JACOBI_A(bits) (decode_table[(bits)>>1].a) |
| 125 | #define JACOBI_B(bits) (decode_table[(bits)>>1].b) |
| 126 | #endif /* WANT_ASSERT */ |
| 127 | |
| 128 | const unsigned char jacobi_table[208] = { |
| 129 | #include "jacobitab.h" |
| 130 | }; |
| 131 | |
| 132 | #define BITS_FAIL 31 |
| 133 | |
| 134 | static void |
| 135 | jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn, |
| 136 | mp_srcptr qp, mp_size_t qn, int d) |
| 137 | { |
| 138 | unsigned *bitsp = (unsigned *) p; |
| 139 | |
| 140 | if (gp) |
| 141 | { |
| 142 | ASSERT (gn > 0); |
| 143 | if (gn != 1 || gp[0] != 1) |
| 144 | { |
| 145 | *bitsp = BITS_FAIL; |
| 146 | return; |
| 147 | } |
| 148 | } |
| 149 | |
| 150 | if (qp) |
| 151 | { |
| 152 | ASSERT (qn > 0); |
| 153 | ASSERT (d >= 0); |
| 154 | *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3); |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | #define CHOOSE_P(n) (2*(n) / 3) |
| 159 | |
| 160 | int |
| 161 | mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits) |
| 162 | { |
| 163 | mp_size_t scratch; |
| 164 | mp_size_t matrix_scratch; |
| 165 | mp_ptr tp; |
| 166 | |
| 167 | TMP_DECL; |
| 168 | |
| 169 | ASSERT (n > 0); |
| 170 | ASSERT ( (ap[n-1] | bp[n-1]) > 0); |
| 171 | ASSERT ( (bp[0] | ap[0]) & 1); |
| 172 | |
| 173 | /* FIXME: Check for small sizes first, before setting up temporary |
| 174 | storage etc. */ |
| 175 | scratch = MPN_GCD_SUBDIV_STEP_ITCH(n); |
| 176 | |
| 177 | if (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD)) |
| 178 | { |
| 179 | mp_size_t hgcd_scratch; |
| 180 | mp_size_t update_scratch; |
| 181 | mp_size_t p = CHOOSE_P (n); |
| 182 | mp_size_t dc_scratch; |
| 183 | |
| 184 | matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); |
| 185 | hgcd_scratch = mpn_hgcd_itch (n - p); |
| 186 | update_scratch = p + n - 1; |
| 187 | |
| 188 | dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch); |
| 189 | if (dc_scratch > scratch) |
| 190 | scratch = dc_scratch; |
| 191 | } |
| 192 | |
| 193 | TMP_MARK; |
| 194 | tp = TMP_ALLOC_LIMBS(scratch); |
| 195 | |
| 196 | while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD)) |
| 197 | { |
| 198 | struct hgcd_matrix M; |
| 199 | mp_size_t p = 2*n/3; |
| 200 | mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); |
| 201 | mp_size_t nn; |
| 202 | mpn_hgcd_matrix_init (&M, n - p, tp); |
| 203 | |
| 204 | nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits, |
| 205 | tp + matrix_scratch); |
| 206 | if (nn > 0) |
| 207 | { |
| 208 | ASSERT (M.n <= (n - p - 1)/2); |
| 209 | ASSERT (M.n + p <= (p + n - 1) / 2); |
| 210 | /* Temporary storage 2 (p + M->n) <= p + n - 1. */ |
| 211 | n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch); |
| 212 | } |
| 213 | else |
| 214 | { |
| 215 | /* Temporary storage n */ |
| 216 | n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp); |
| 217 | if (!n) |
| 218 | { |
| 219 | TMP_FREE; |
| 220 | return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits); |
| 221 | } |
| 222 | } |
| 223 | } |
| 224 | |
| 225 | while (n > 2) |
| 226 | { |
| 227 | struct hgcd_matrix1 M; |
| 228 | mp_limb_t ah, al, bh, bl; |
| 229 | mp_limb_t mask; |
| 230 | |
| 231 | mask = ap[n-1] | bp[n-1]; |
| 232 | ASSERT (mask > 0); |
| 233 | |
| 234 | if (mask & GMP_NUMB_HIGHBIT) |
| 235 | { |
| 236 | ah = ap[n-1]; al = ap[n-2]; |
| 237 | bh = bp[n-1]; bl = bp[n-2]; |
| 238 | } |
| 239 | else |
| 240 | { |
| 241 | int shift; |
| 242 | |
| 243 | count_leading_zeros (shift, mask); |
| 244 | ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]); |
| 245 | al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]); |
| 246 | bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]); |
| 247 | bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]); |
| 248 | } |
| 249 | |
| 250 | /* Try an mpn_nhgcd2 step */ |
| 251 | if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits)) |
| 252 | { |
| 253 | n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n); |
| 254 | MP_PTR_SWAP (ap, tp); |
| 255 | } |
| 256 | else |
| 257 | { |
| 258 | /* mpn_hgcd2 has failed. Then either one of a or b is very |
| 259 | small, or the difference is very small. Perform one |
| 260 | subtraction followed by one division. */ |
| 261 | n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp); |
| 262 | if (!n) |
| 263 | { |
| 264 | TMP_FREE; |
| 265 | return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits); |
| 266 | } |
| 267 | } |
| 268 | } |
| 269 | |
| 270 | if (bits >= 16) |
| 271 | MP_PTR_SWAP (ap, bp); |
| 272 | |
| 273 | ASSERT (bp[0] & 1); |
| 274 | |
| 275 | if (n == 1) |
| 276 | { |
| 277 | mp_limb_t al, bl; |
| 278 | al = ap[0]; |
| 279 | bl = bp[0]; |
| 280 | |
| 281 | TMP_FREE; |
| 282 | if (bl == 1) |
| 283 | return 1 - 2*(bits & 1); |
| 284 | else |
| 285 | return mpn_jacobi_base (al, bl, bits << 1); |
| 286 | } |
| 287 | |
| 288 | else |
| 289 | { |
| 290 | int res = mpn_jacobi_2 (ap, bp, bits & 1); |
| 291 | TMP_FREE; |
| 292 | return res; |
| 293 | } |
| 294 | } |