Blame - third_party/gmp/mpn/generic/jacobi.c - RealtimeRoboticsGroup/test

blob: d98b126040cf72a15b6d691fe660feaabfbeb14c [file] [log] [blame]

Austin Schuh	dace2a6	2020-08-18 10:56:48 -0700	[diff] [blame]	1	/* jacobi.c
				2
				3	THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
				4	SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
				5	GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
				6
				7	Copyright 1996, 1998, 2000-2004, 2008, 2010, 2011 Free Software Foundation,
				8	Inc.
				9
				10	This file is part of the GNU MP Library.
				11
				12	The GNU MP Library is free software; you can redistribute it and/or modify
				13	it under the terms of either:
				14
				15	* the GNU Lesser General Public License as published by the Free
				16	Software Foundation; either version 3 of the License, or (at your
				17	option) any later version.
				18
				19	or
				20
				21	* the GNU General Public License as published by the Free Software
				22	Foundation; either version 2 of the License, or (at your option) any
				23	later version.
				24
				25	or both in parallel, as here.
				26
				27	The GNU MP Library is distributed in the hope that it will be useful, but
				28	WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
				29	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				30	for more details.
				31
				32	You should have received copies of the GNU General Public License and the
				33	GNU Lesser General Public License along with the GNU MP Library. If not,
				34	see https://www.gnu.org/licenses/. */
				35
				36	#include "gmp-impl.h"
				37	#include "longlong.h"
				38
				39	#ifndef JACOBI_DC_THRESHOLD
				40	#define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD
				41	#endif
				42
				43	/* Schönhage's rules:
				44	*
				45	* Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3
				46	*
				47	* If r1 is odd, then
				48	*
				49	* (r1 \| r0) = s(r1, r0) (r0 \| r1) = s(r1, r0) (r2, r1)
				50	*
				51	* where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)].
				52	*
				53	* If r1 is even, r2 must be odd. We have
				54	*
				55	* (r1 \| r0) = (r1 - r0 \| r0) = (-1)^(r0-1)/2 (r0 - r1 \| r0)
				56	* = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 \| r0 - r1)
				57	* = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 \| r0 - r1)
				58	*
				59	* Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating
				60	* q1 times gives
				61	*
				62	* (r1 \| r0) = (r1 \| r2) = (r3 \| r2)
				63	*
				64	* On the other hand, if r1 = 2 (mod 4), the sign factor is
				65	* (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent
				66	*
				67	* (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2
				68	* = q1 (r0-1)/2 + q1 (q1-1)/2
				69	*
				70	* and we can summarize the even case as
				71	*
				72	* (r1 \| r0) = t(r1, r0, q1) (r3 \| r2)
				73	*
				74	* where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)}
				75	*
				76	* What about termination? The remainder sequence ends with (0\|1) = 1
				77	* (or (0 \| r) = 0 if r != 1). What are the possible cases? If r1 is
				78	* odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and
				79	* hence non-zero. We may have r3 = r1 - q2 r2 = 0.
				80	*
				81	* Examples: (11\|15) = - (15\|11) = - (4\|11)
				82	* (4\|11) = (4\| 3) = (1\| 3)
				83	* (1\| 3) = (3\|1) = (0\|1) = 1
				84	*
				85	* (2\|7) = (2\|1) = (0\|1) = 1
				86	*
				87	* Detail: (2\|7) = (2-7\|7) = (-1\|7)(5\|7) = -(7\|5) = -(2\|5)
				88	* (2\|5) = (2-5\|5) = (-1\|5)(3\|5) = (5\|3) = (2\|3)
				89	* (2\|3) = (2-3\|3) = (-1\|3)(1\|3) = -(3\|1) = -(2\|1)
				90	*
				91	*/
				92
				93	/* In principle, the state consists of four variables: e (one bit), a,
				94	b (two bits each), d (one bit). Collected factors are (-1)^e. a and
				95	b are the least significant bits of the current remainders. d
				96	(denominator) is 0 if we're currently subtracting multiplies of a
				97	from b, and 1 if we're subtracting b from a.
				98
				99	e is stored in the least significant bit, while a, b and d are
				100	coded as only 13 distinct values in bits 1-4, according to the
				101	following table. For rows not mentioning d, the value is either
				102	implied, or it doesn't matter. */
				103
				104	#if WANT_ASSERT
				105	static const struct
				106	{
				107	unsigned char a;
				108	unsigned char b;
				109	} decode_table[13] = {
				110	/* 0 */ { 0, 1 },
				111	/* 1 */ { 0, 3 },
				112	/* 2 */ { 1, 1 },
				113	/* 3 */ { 1, 3 },
				114	/* 4 */ { 2, 1 },
				115	/* 5 */ { 2, 3 },
				116	/* 6 */ { 3, 1 },
				117	/* 7 / { 3, 3 }, / d = 1 */
				118	/* 8 */ { 1, 0 },
				119	/* 9 */ { 1, 2 },
				120	/* 10 */ { 3, 0 },
				121	/* 11 */ { 3, 2 },
				122	/* 12 / { 3, 3 }, / d = 0 */
				123	};
				124	#define JACOBI_A(bits) (decode_table[(bits)>>1].a)
				125	#define JACOBI_B(bits) (decode_table[(bits)>>1].b)
				126	#endif /* WANT_ASSERT */
				127
				128	const unsigned char jacobi_table[208] = {
				129	#include "jacobitab.h"
				130	};
				131
				132	#define BITS_FAIL 31
				133
				134	static void
				135	jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
				136	mp_srcptr qp, mp_size_t qn, int d)
				137	{
				138	unsigned bitsp = (unsigned ) p;
				139
				140	if (gp)
				141	{
				142	ASSERT (gn > 0);
				143	if (gn != 1 \|\| gp[0] != 1)
				144	{
				145	*bitsp = BITS_FAIL;
				146	return;
				147	}
				148	}
				149
				150	if (qp)
				151	{
				152	ASSERT (qn > 0);
				153	ASSERT (d >= 0);
				154	bitsp = mpn_jacobi_update (bitsp, d, qp[0] & 3);
				155	}
				156	}
				157
				158	#define CHOOSE_P(n) (2*(n) / 3)
				159
				160	int
				161	mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits)
				162	{
				163	mp_size_t scratch;
				164	mp_size_t matrix_scratch;
				165	mp_ptr tp;
				166
				167	TMP_DECL;
				168
				169	ASSERT (n > 0);
				170	ASSERT ( (ap[n-1] \| bp[n-1]) > 0);
				171	ASSERT ( (bp[0] \| ap[0]) & 1);
				172
				173	/* FIXME: Check for small sizes first, before setting up temporary
				174	storage etc. */
				175	scratch = MPN_GCD_SUBDIV_STEP_ITCH(n);
				176
				177	if (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
				178	{
				179	mp_size_t hgcd_scratch;
				180	mp_size_t update_scratch;
				181	mp_size_t p = CHOOSE_P (n);
				182	mp_size_t dc_scratch;
				183
				184	matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
				185	hgcd_scratch = mpn_hgcd_itch (n - p);
				186	update_scratch = p + n - 1;
				187
				188	dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
				189	if (dc_scratch > scratch)
				190	scratch = dc_scratch;
				191	}
				192
				193	TMP_MARK;
				194	tp = TMP_ALLOC_LIMBS(scratch);
				195
				196	while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
				197	{
				198	struct hgcd_matrix M;
				199	mp_size_t p = 2*n/3;
				200	mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
				201	mp_size_t nn;
				202	mpn_hgcd_matrix_init (&M, n - p, tp);
				203
				204	nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits,
				205	tp + matrix_scratch);
				206	if (nn > 0)
				207	{
				208	ASSERT (M.n <= (n - p - 1)/2);
				209	ASSERT (M.n + p <= (p + n - 1) / 2);
				210	/* Temporary storage 2 (p + M->n) <= p + n - 1. */
				211	n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
				212	}
				213	else
				214	{
				215	/* Temporary storage n */
				216	n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp);
				217	if (!n)
				218	{
				219	TMP_FREE;
				220	return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
				221	}
				222	}
				223	}
				224
				225	while (n > 2)
				226	{
				227	struct hgcd_matrix1 M;
				228	mp_limb_t ah, al, bh, bl;
				229	mp_limb_t mask;
				230
				231	mask = ap[n-1] \| bp[n-1];
				232	ASSERT (mask > 0);
				233
				234	if (mask & GMP_NUMB_HIGHBIT)
				235	{
				236	ah = ap[n-1]; al = ap[n-2];
				237	bh = bp[n-1]; bl = bp[n-2];
				238	}
				239	else
				240	{
				241	int shift;
				242
				243	count_leading_zeros (shift, mask);
				244	ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
				245	al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
				246	bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
				247	bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
				248	}
				249
				250	/* Try an mpn_nhgcd2 step */
				251	if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits))
				252	{
				253	n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
				254	MP_PTR_SWAP (ap, tp);
				255	}
				256	else
				257	{
				258	/* mpn_hgcd2 has failed. Then either one of a or b is very
				259	small, or the difference is very small. Perform one
				260	subtraction followed by one division. */
				261	n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp);
				262	if (!n)
				263	{
				264	TMP_FREE;
				265	return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
				266	}
				267	}
				268	}
				269
				270	if (bits >= 16)
				271	MP_PTR_SWAP (ap, bp);
				272
				273	ASSERT (bp[0] & 1);
				274
				275	if (n == 1)
				276	{
				277	mp_limb_t al, bl;
				278	al = ap[0];
				279	bl = bp[0];
				280
				281	TMP_FREE;
				282	if (bl == 1)
				283	return 1 - 2*(bits & 1);
				284	else
				285	return mpn_jacobi_base (al, bl, bits << 1);
				286	}
				287
				288	else
				289	{
				290	int res = mpn_jacobi_2 (ap, bp, bits & 1);
				291	TMP_FREE;
				292	return res;
				293	}
				294	}