Austin Schuh | bb1338c | 2024-06-15 19:31:16 -0700 | [diff] [blame] | 1 | /* mpn/gcd.c: mpn_gcd for gcd of two odd integers. |
| 2 | |
| 3 | Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012, 2019 Free Software |
| 4 | Foundation, Inc. |
| 5 | |
| 6 | This file is part of the GNU MP Library. |
| 7 | |
| 8 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 9 | it under the terms of either: |
| 10 | |
| 11 | * the GNU Lesser General Public License as published by the Free |
| 12 | Software Foundation; either version 3 of the License, or (at your |
| 13 | option) any later version. |
| 14 | |
| 15 | or |
| 16 | |
| 17 | * the GNU General Public License as published by the Free Software |
| 18 | Foundation; either version 2 of the License, or (at your option) any |
| 19 | later version. |
| 20 | |
| 21 | or both in parallel, as here. |
| 22 | |
| 23 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 24 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 25 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 26 | for more details. |
| 27 | |
| 28 | You should have received copies of the GNU General Public License and the |
| 29 | GNU Lesser General Public License along with the GNU MP Library. If not, |
| 30 | see https://www.gnu.org/licenses/. */ |
| 31 | |
| 32 | #include "gmp-impl.h" |
| 33 | #include "longlong.h" |
| 34 | |
| 35 | /* Uses the HGCD operation described in |
| 36 | |
| 37 | N. Möller, On Schönhage's algorithm and subquadratic integer gcd |
| 38 | computation, Math. Comp. 77 (2008), 589-607. |
| 39 | |
| 40 | to reduce inputs until they are of size below GCD_DC_THRESHOLD, and |
| 41 | then uses Lehmer's algorithm. |
| 42 | */ |
| 43 | |
| 44 | /* Some reasonable choices are n / 2 (same as in hgcd), and p = (n + |
| 45 | * 2)/3, which gives a balanced multiplication in |
| 46 | * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better |
| 47 | * performance. The matrix-vector multiplication is then |
| 48 | * 4:1-unbalanced, with matrix elements of size n/6, and vector |
| 49 | * elements of size p = 2n/3. */ |
| 50 | |
| 51 | /* From analysis of the theoretical running time, it appears that when |
| 52 | * multiplication takes time O(n^alpha), p should be chosen so that |
| 53 | * the ratio of the time for the mpn_hgcd call, and the time for the |
| 54 | * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha - |
| 55 | * 1). */ |
| 56 | #ifdef TUNE_GCD_P |
| 57 | #define P_TABLE_SIZE 10000 |
| 58 | mp_size_t p_table[P_TABLE_SIZE]; |
| 59 | #define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3) |
| 60 | #else |
| 61 | #define CHOOSE_P(n) (2*(n) / 3) |
| 62 | #endif |
| 63 | |
| 64 | struct gcd_ctx |
| 65 | { |
| 66 | mp_ptr gp; |
| 67 | mp_size_t gn; |
| 68 | }; |
| 69 | |
| 70 | static void |
| 71 | gcd_hook (void *p, mp_srcptr gp, mp_size_t gn, |
| 72 | mp_srcptr qp, mp_size_t qn, int d) |
| 73 | { |
| 74 | struct gcd_ctx *ctx = (struct gcd_ctx *) p; |
| 75 | MPN_COPY (ctx->gp, gp, gn); |
| 76 | ctx->gn = gn; |
| 77 | } |
| 78 | |
| 79 | mp_size_t |
| 80 | mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n) |
| 81 | { |
| 82 | mp_size_t talloc; |
| 83 | mp_size_t scratch; |
| 84 | mp_size_t matrix_scratch; |
| 85 | |
| 86 | struct gcd_ctx ctx; |
| 87 | mp_ptr tp; |
| 88 | TMP_DECL; |
| 89 | |
| 90 | ASSERT (usize >= n); |
| 91 | ASSERT (n > 0); |
| 92 | ASSERT (vp[n-1] > 0); |
| 93 | |
| 94 | /* FIXME: Check for small sizes first, before setting up temporary |
| 95 | storage etc. */ |
| 96 | talloc = MPN_GCD_SUBDIV_STEP_ITCH(n); |
| 97 | |
| 98 | /* For initial division */ |
| 99 | scratch = usize - n + 1; |
| 100 | if (scratch > talloc) |
| 101 | talloc = scratch; |
| 102 | |
| 103 | #if TUNE_GCD_P |
| 104 | if (CHOOSE_P (n) > 0) |
| 105 | #else |
| 106 | if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD)) |
| 107 | #endif |
| 108 | { |
| 109 | mp_size_t hgcd_scratch; |
| 110 | mp_size_t update_scratch; |
| 111 | mp_size_t p = CHOOSE_P (n); |
| 112 | mp_size_t scratch; |
| 113 | #if TUNE_GCD_P |
| 114 | /* Worst case, since we don't guarantee that n - CHOOSE_P(n) |
| 115 | is increasing */ |
| 116 | matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n); |
| 117 | hgcd_scratch = mpn_hgcd_itch (n); |
| 118 | update_scratch = 2*(n - 1); |
| 119 | #else |
| 120 | matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); |
| 121 | hgcd_scratch = mpn_hgcd_itch (n - p); |
| 122 | update_scratch = p + n - 1; |
| 123 | #endif |
| 124 | scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch); |
| 125 | if (scratch > talloc) |
| 126 | talloc = scratch; |
| 127 | } |
| 128 | |
| 129 | TMP_MARK; |
| 130 | tp = TMP_ALLOC_LIMBS(talloc); |
| 131 | |
| 132 | if (usize > n) |
| 133 | { |
| 134 | mpn_tdiv_qr (tp, up, 0, up, usize, vp, n); |
| 135 | |
| 136 | if (mpn_zero_p (up, n)) |
| 137 | { |
| 138 | MPN_COPY (gp, vp, n); |
| 139 | ctx.gn = n; |
| 140 | goto done; |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | ctx.gp = gp; |
| 145 | |
| 146 | #if TUNE_GCD_P |
| 147 | while (CHOOSE_P (n) > 0) |
| 148 | #else |
| 149 | while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD)) |
| 150 | #endif |
| 151 | { |
| 152 | struct hgcd_matrix M; |
| 153 | mp_size_t p = CHOOSE_P (n); |
| 154 | mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); |
| 155 | mp_size_t nn; |
| 156 | mpn_hgcd_matrix_init (&M, n - p, tp); |
| 157 | nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch); |
| 158 | if (nn > 0) |
| 159 | { |
| 160 | ASSERT (M.n <= (n - p - 1)/2); |
| 161 | ASSERT (M.n + p <= (p + n - 1) / 2); |
| 162 | /* Temporary storage 2 (p + M->n) <= p + n - 1. */ |
| 163 | n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch); |
| 164 | } |
| 165 | else |
| 166 | { |
| 167 | /* Temporary storage n */ |
| 168 | n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp); |
| 169 | if (n == 0) |
| 170 | goto done; |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | while (n > 2) |
| 175 | { |
| 176 | struct hgcd_matrix1 M; |
| 177 | mp_limb_t uh, ul, vh, vl; |
| 178 | mp_limb_t mask; |
| 179 | |
| 180 | mask = up[n-1] | vp[n-1]; |
| 181 | ASSERT (mask > 0); |
| 182 | |
| 183 | if (mask & GMP_NUMB_HIGHBIT) |
| 184 | { |
| 185 | uh = up[n-1]; ul = up[n-2]; |
| 186 | vh = vp[n-1]; vl = vp[n-2]; |
| 187 | } |
| 188 | else |
| 189 | { |
| 190 | int shift; |
| 191 | |
| 192 | count_leading_zeros (shift, mask); |
| 193 | uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]); |
| 194 | ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]); |
| 195 | vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]); |
| 196 | vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]); |
| 197 | } |
| 198 | |
| 199 | /* Try an mpn_hgcd2 step */ |
| 200 | if (mpn_hgcd2 (uh, ul, vh, vl, &M)) |
| 201 | { |
| 202 | n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n); |
| 203 | MP_PTR_SWAP (up, tp); |
| 204 | } |
| 205 | else |
| 206 | { |
| 207 | /* mpn_hgcd2 has failed. Then either one of a or b is very |
| 208 | small, or the difference is very small. Perform one |
| 209 | subtraction followed by one division. */ |
| 210 | |
| 211 | /* Temporary storage n */ |
| 212 | n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp); |
| 213 | if (n == 0) |
| 214 | goto done; |
| 215 | } |
| 216 | } |
| 217 | |
| 218 | ASSERT(up[n-1] | vp[n-1]); |
| 219 | |
| 220 | /* Due to the calling convention for mpn_gcd, at most one can be even. */ |
| 221 | if ((up[0] & 1) == 0) |
| 222 | MP_PTR_SWAP (up, vp); |
| 223 | ASSERT ((up[0] & 1) != 0); |
| 224 | |
| 225 | { |
| 226 | mp_limb_t u0, u1, v0, v1; |
| 227 | mp_double_limb_t g; |
| 228 | |
| 229 | u0 = up[0]; |
| 230 | v0 = vp[0]; |
| 231 | |
| 232 | if (n == 1) |
| 233 | { |
| 234 | int cnt; |
| 235 | count_trailing_zeros (cnt, v0); |
| 236 | *gp = mpn_gcd_11 (u0, v0 >> cnt); |
| 237 | ctx.gn = 1; |
| 238 | goto done; |
| 239 | } |
| 240 | |
| 241 | v1 = vp[1]; |
| 242 | if (UNLIKELY (v0 == 0)) |
| 243 | { |
| 244 | v0 = v1; |
| 245 | v1 = 0; |
| 246 | /* FIXME: We could invoke a mpn_gcd_21 here, just like mpn_gcd_22 could |
| 247 | when this situation occurs internally. */ |
| 248 | } |
| 249 | if ((v0 & 1) == 0) |
| 250 | { |
| 251 | int cnt; |
| 252 | count_trailing_zeros (cnt, v0); |
| 253 | v0 = ((v1 << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK) | (v0 >> cnt); |
| 254 | v1 >>= cnt; |
| 255 | } |
| 256 | |
| 257 | u1 = up[1]; |
| 258 | g = mpn_gcd_22 (u1, u0, v1, v0); |
| 259 | gp[0] = g.d0; |
| 260 | gp[1] = g.d1; |
| 261 | ctx.gn = 1 + (g.d1 > 0); |
| 262 | } |
| 263 | done: |
| 264 | TMP_FREE; |
| 265 | return ctx.gn; |
| 266 | } |