blob: 3f92cbf0d1f39786191d7be883cbcf38ade0ca07 [file] [log] [blame]
Austin Schuhbb1338c2024-06-15 19:31:16 -07001/* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
2
3Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012, 2019 Free Software
4Foundation, Inc.
5
6This file is part of the GNU MP Library.
7
8The GNU MP Library is free software; you can redistribute it and/or modify
9it under the terms of either:
10
11 * the GNU Lesser General Public License as published by the Free
12 Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
14
15or
16
17 * the GNU General Public License as published by the Free Software
18 Foundation; either version 2 of the License, or (at your option) any
19 later version.
20
21or both in parallel, as here.
22
23The GNU MP Library is distributed in the hope that it will be useful, but
24WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26for more details.
27
28You should have received copies of the GNU General Public License and the
29GNU Lesser General Public License along with the GNU MP Library. If not,
30see https://www.gnu.org/licenses/. */
31
32#include "gmp-impl.h"
33#include "longlong.h"
34
35/* Uses the HGCD operation described in
36
37 N. Möller, On Schönhage's algorithm and subquadratic integer gcd
38 computation, Math. Comp. 77 (2008), 589-607.
39
40 to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
41 then uses Lehmer's algorithm.
42*/
43
44/* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
45 * 2)/3, which gives a balanced multiplication in
46 * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
47 * performance. The matrix-vector multiplication is then
48 * 4:1-unbalanced, with matrix elements of size n/6, and vector
49 * elements of size p = 2n/3. */
50
51/* From analysis of the theoretical running time, it appears that when
52 * multiplication takes time O(n^alpha), p should be chosen so that
53 * the ratio of the time for the mpn_hgcd call, and the time for the
54 * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
55 * 1). */
56#ifdef TUNE_GCD_P
57#define P_TABLE_SIZE 10000
58mp_size_t p_table[P_TABLE_SIZE];
59#define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
60#else
61#define CHOOSE_P(n) (2*(n) / 3)
62#endif
63
64struct gcd_ctx
65{
66 mp_ptr gp;
67 mp_size_t gn;
68};
69
70static void
71gcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
72 mp_srcptr qp, mp_size_t qn, int d)
73{
74 struct gcd_ctx *ctx = (struct gcd_ctx *) p;
75 MPN_COPY (ctx->gp, gp, gn);
76 ctx->gn = gn;
77}
78
79mp_size_t
80mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
81{
82 mp_size_t talloc;
83 mp_size_t scratch;
84 mp_size_t matrix_scratch;
85
86 struct gcd_ctx ctx;
87 mp_ptr tp;
88 TMP_DECL;
89
90 ASSERT (usize >= n);
91 ASSERT (n > 0);
92 ASSERT (vp[n-1] > 0);
93
94 /* FIXME: Check for small sizes first, before setting up temporary
95 storage etc. */
96 talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);
97
98 /* For initial division */
99 scratch = usize - n + 1;
100 if (scratch > talloc)
101 talloc = scratch;
102
103#if TUNE_GCD_P
104 if (CHOOSE_P (n) > 0)
105#else
106 if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
107#endif
108 {
109 mp_size_t hgcd_scratch;
110 mp_size_t update_scratch;
111 mp_size_t p = CHOOSE_P (n);
112 mp_size_t scratch;
113#if TUNE_GCD_P
114 /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
115 is increasing */
116 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
117 hgcd_scratch = mpn_hgcd_itch (n);
118 update_scratch = 2*(n - 1);
119#else
120 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
121 hgcd_scratch = mpn_hgcd_itch (n - p);
122 update_scratch = p + n - 1;
123#endif
124 scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
125 if (scratch > talloc)
126 talloc = scratch;
127 }
128
129 TMP_MARK;
130 tp = TMP_ALLOC_LIMBS(talloc);
131
132 if (usize > n)
133 {
134 mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
135
136 if (mpn_zero_p (up, n))
137 {
138 MPN_COPY (gp, vp, n);
139 ctx.gn = n;
140 goto done;
141 }
142 }
143
144 ctx.gp = gp;
145
146#if TUNE_GCD_P
147 while (CHOOSE_P (n) > 0)
148#else
149 while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
150#endif
151 {
152 struct hgcd_matrix M;
153 mp_size_t p = CHOOSE_P (n);
154 mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
155 mp_size_t nn;
156 mpn_hgcd_matrix_init (&M, n - p, tp);
157 nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
158 if (nn > 0)
159 {
160 ASSERT (M.n <= (n - p - 1)/2);
161 ASSERT (M.n + p <= (p + n - 1) / 2);
162 /* Temporary storage 2 (p + M->n) <= p + n - 1. */
163 n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
164 }
165 else
166 {
167 /* Temporary storage n */
168 n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);
169 if (n == 0)
170 goto done;
171 }
172 }
173
174 while (n > 2)
175 {
176 struct hgcd_matrix1 M;
177 mp_limb_t uh, ul, vh, vl;
178 mp_limb_t mask;
179
180 mask = up[n-1] | vp[n-1];
181 ASSERT (mask > 0);
182
183 if (mask & GMP_NUMB_HIGHBIT)
184 {
185 uh = up[n-1]; ul = up[n-2];
186 vh = vp[n-1]; vl = vp[n-2];
187 }
188 else
189 {
190 int shift;
191
192 count_leading_zeros (shift, mask);
193 uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);
194 ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);
195 vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);
196 vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);
197 }
198
199 /* Try an mpn_hgcd2 step */
200 if (mpn_hgcd2 (uh, ul, vh, vl, &M))
201 {
202 n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);
203 MP_PTR_SWAP (up, tp);
204 }
205 else
206 {
207 /* mpn_hgcd2 has failed. Then either one of a or b is very
208 small, or the difference is very small. Perform one
209 subtraction followed by one division. */
210
211 /* Temporary storage n */
212 n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);
213 if (n == 0)
214 goto done;
215 }
216 }
217
218 ASSERT(up[n-1] | vp[n-1]);
219
220 /* Due to the calling convention for mpn_gcd, at most one can be even. */
221 if ((up[0] & 1) == 0)
222 MP_PTR_SWAP (up, vp);
223 ASSERT ((up[0] & 1) != 0);
224
225 {
226 mp_limb_t u0, u1, v0, v1;
227 mp_double_limb_t g;
228
229 u0 = up[0];
230 v0 = vp[0];
231
232 if (n == 1)
233 {
234 int cnt;
235 count_trailing_zeros (cnt, v0);
236 *gp = mpn_gcd_11 (u0, v0 >> cnt);
237 ctx.gn = 1;
238 goto done;
239 }
240
241 v1 = vp[1];
242 if (UNLIKELY (v0 == 0))
243 {
244 v0 = v1;
245 v1 = 0;
246 /* FIXME: We could invoke a mpn_gcd_21 here, just like mpn_gcd_22 could
247 when this situation occurs internally. */
248 }
249 if ((v0 & 1) == 0)
250 {
251 int cnt;
252 count_trailing_zeros (cnt, v0);
253 v0 = ((v1 << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK) | (v0 >> cnt);
254 v1 >>= cnt;
255 }
256
257 u1 = up[1];
258 g = mpn_gcd_22 (u1, u0, v1, v0);
259 gp[0] = g.d0;
260 gp[1] = g.d1;
261 ctx.gn = 1 + (g.d1 > 0);
262 }
263done:
264 TMP_FREE;
265 return ctx.gn;
266}