blob: d98b126040cf72a15b6d691fe660feaabfbeb14c [file] [log] [blame]
Austin Schuhdace2a62020-08-18 10:56:48 -07001/* jacobi.c
2
3 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
4 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
5 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6
7Copyright 1996, 1998, 2000-2004, 2008, 2010, 2011 Free Software Foundation,
8Inc.
9
10This file is part of the GNU MP Library.
11
12The GNU MP Library is free software; you can redistribute it and/or modify
13it under the terms of either:
14
15 * the GNU Lesser General Public License as published by the Free
16 Software Foundation; either version 3 of the License, or (at your
17 option) any later version.
18
19or
20
21 * the GNU General Public License as published by the Free Software
22 Foundation; either version 2 of the License, or (at your option) any
23 later version.
24
25or both in parallel, as here.
26
27The GNU MP Library is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
29or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
30for more details.
31
32You should have received copies of the GNU General Public License and the
33GNU Lesser General Public License along with the GNU MP Library. If not,
34see https://www.gnu.org/licenses/. */
35
36#include "gmp-impl.h"
37#include "longlong.h"
38
39#ifndef JACOBI_DC_THRESHOLD
40#define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD
41#endif
42
43/* Schönhage's rules:
44 *
45 * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3
46 *
47 * If r1 is odd, then
48 *
49 * (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1)
50 *
51 * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)].
52 *
53 * If r1 is even, r2 must be odd. We have
54 *
55 * (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0)
56 * = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1)
57 * = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1)
58 *
59 * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating
60 * q1 times gives
61 *
62 * (r1 | r0) = (r1 | r2) = (r3 | r2)
63 *
64 * On the other hand, if r1 = 2 (mod 4), the sign factor is
65 * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent
66 *
67 * (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2
68 * = q1 (r0-1)/2 + q1 (q1-1)/2
69 *
70 * and we can summarize the even case as
71 *
72 * (r1 | r0) = t(r1, r0, q1) (r3 | r2)
73 *
74 * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)}
75 *
76 * What about termination? The remainder sequence ends with (0|1) = 1
77 * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is
78 * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and
79 * hence non-zero. We may have r3 = r1 - q2 r2 = 0.
80 *
81 * Examples: (11|15) = - (15|11) = - (4|11)
82 * (4|11) = (4| 3) = (1| 3)
83 * (1| 3) = (3|1) = (0|1) = 1
84 *
85 * (2|7) = (2|1) = (0|1) = 1
86 *
87 * Detail: (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5)
88 * (2|5) = (2-5|5) = (-1|5)(3|5) = (5|3) = (2|3)
89 * (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1)
90 *
91 */
92
93/* In principle, the state consists of four variables: e (one bit), a,
94 b (two bits each), d (one bit). Collected factors are (-1)^e. a and
95 b are the least significant bits of the current remainders. d
96 (denominator) is 0 if we're currently subtracting multiplies of a
97 from b, and 1 if we're subtracting b from a.
98
99 e is stored in the least significant bit, while a, b and d are
100 coded as only 13 distinct values in bits 1-4, according to the
101 following table. For rows not mentioning d, the value is either
102 implied, or it doesn't matter. */
103
104#if WANT_ASSERT
105static const struct
106{
107 unsigned char a;
108 unsigned char b;
109} decode_table[13] = {
110 /* 0 */ { 0, 1 },
111 /* 1 */ { 0, 3 },
112 /* 2 */ { 1, 1 },
113 /* 3 */ { 1, 3 },
114 /* 4 */ { 2, 1 },
115 /* 5 */ { 2, 3 },
116 /* 6 */ { 3, 1 },
117 /* 7 */ { 3, 3 }, /* d = 1 */
118 /* 8 */ { 1, 0 },
119 /* 9 */ { 1, 2 },
120 /* 10 */ { 3, 0 },
121 /* 11 */ { 3, 2 },
122 /* 12 */ { 3, 3 }, /* d = 0 */
123};
124#define JACOBI_A(bits) (decode_table[(bits)>>1].a)
125#define JACOBI_B(bits) (decode_table[(bits)>>1].b)
126#endif /* WANT_ASSERT */
127
128const unsigned char jacobi_table[208] = {
129#include "jacobitab.h"
130};
131
132#define BITS_FAIL 31
133
134static void
135jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
136 mp_srcptr qp, mp_size_t qn, int d)
137{
138 unsigned *bitsp = (unsigned *) p;
139
140 if (gp)
141 {
142 ASSERT (gn > 0);
143 if (gn != 1 || gp[0] != 1)
144 {
145 *bitsp = BITS_FAIL;
146 return;
147 }
148 }
149
150 if (qp)
151 {
152 ASSERT (qn > 0);
153 ASSERT (d >= 0);
154 *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3);
155 }
156}
157
158#define CHOOSE_P(n) (2*(n) / 3)
159
160int
161mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits)
162{
163 mp_size_t scratch;
164 mp_size_t matrix_scratch;
165 mp_ptr tp;
166
167 TMP_DECL;
168
169 ASSERT (n > 0);
170 ASSERT ( (ap[n-1] | bp[n-1]) > 0);
171 ASSERT ( (bp[0] | ap[0]) & 1);
172
173 /* FIXME: Check for small sizes first, before setting up temporary
174 storage etc. */
175 scratch = MPN_GCD_SUBDIV_STEP_ITCH(n);
176
177 if (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
178 {
179 mp_size_t hgcd_scratch;
180 mp_size_t update_scratch;
181 mp_size_t p = CHOOSE_P (n);
182 mp_size_t dc_scratch;
183
184 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
185 hgcd_scratch = mpn_hgcd_itch (n - p);
186 update_scratch = p + n - 1;
187
188 dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
189 if (dc_scratch > scratch)
190 scratch = dc_scratch;
191 }
192
193 TMP_MARK;
194 tp = TMP_ALLOC_LIMBS(scratch);
195
196 while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
197 {
198 struct hgcd_matrix M;
199 mp_size_t p = 2*n/3;
200 mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
201 mp_size_t nn;
202 mpn_hgcd_matrix_init (&M, n - p, tp);
203
204 nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits,
205 tp + matrix_scratch);
206 if (nn > 0)
207 {
208 ASSERT (M.n <= (n - p - 1)/2);
209 ASSERT (M.n + p <= (p + n - 1) / 2);
210 /* Temporary storage 2 (p + M->n) <= p + n - 1. */
211 n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
212 }
213 else
214 {
215 /* Temporary storage n */
216 n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp);
217 if (!n)
218 {
219 TMP_FREE;
220 return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
221 }
222 }
223 }
224
225 while (n > 2)
226 {
227 struct hgcd_matrix1 M;
228 mp_limb_t ah, al, bh, bl;
229 mp_limb_t mask;
230
231 mask = ap[n-1] | bp[n-1];
232 ASSERT (mask > 0);
233
234 if (mask & GMP_NUMB_HIGHBIT)
235 {
236 ah = ap[n-1]; al = ap[n-2];
237 bh = bp[n-1]; bl = bp[n-2];
238 }
239 else
240 {
241 int shift;
242
243 count_leading_zeros (shift, mask);
244 ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
245 al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
246 bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
247 bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
248 }
249
250 /* Try an mpn_nhgcd2 step */
251 if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits))
252 {
253 n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
254 MP_PTR_SWAP (ap, tp);
255 }
256 else
257 {
258 /* mpn_hgcd2 has failed. Then either one of a or b is very
259 small, or the difference is very small. Perform one
260 subtraction followed by one division. */
261 n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp);
262 if (!n)
263 {
264 TMP_FREE;
265 return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
266 }
267 }
268 }
269
270 if (bits >= 16)
271 MP_PTR_SWAP (ap, bp);
272
273 ASSERT (bp[0] & 1);
274
275 if (n == 1)
276 {
277 mp_limb_t al, bl;
278 al = ap[0];
279 bl = bp[0];
280
281 TMP_FREE;
282 if (bl == 1)
283 return 1 - 2*(bits & 1);
284 else
285 return mpn_jacobi_base (al, bl, bits << 1);
286 }
287
288 else
289 {
290 int res = mpn_jacobi_2 (ap, bp, bits & 1);
291 TMP_FREE;
292 return res;
293 }
294}