Blame - third_party/gmp/mpz/bin_ui.c - RealtimeRoboticsGroup/test

blob: bab3e076989e6eb6f2fdb5980d6c4e3804ab55eb [file] [log] [blame]

Austin Schuh	bb1338c	2024-06-15 19:31:16 -0700	[diff] [blame]	1	/* mpz_bin_ui(RESULT, N, K) -- Set RESULT to N over K.
				2
				3	Copyright 1998-2002, 2012, 2013, 2015, 2017-2018 Free Software Foundation, Inc.
				4
				5	This file is part of the GNU MP Library.
				6
				7	The GNU MP Library is free software; you can redistribute it and/or modify
				8	it under the terms of either:
				9
				10	* the GNU Lesser General Public License as published by the Free
				11	Software Foundation; either version 3 of the License, or (at your
				12	option) any later version.
				13
				14	or
				15
				16	* the GNU General Public License as published by the Free Software
				17	Foundation; either version 2 of the License, or (at your option) any
				18	later version.
				19
				20	or both in parallel, as here.
				21
				22	The GNU MP Library is distributed in the hope that it will be useful, but
				23	WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
				24	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				25	for more details.
				26
				27	You should have received copies of the GNU General Public License and the
				28	GNU Lesser General Public License along with the GNU MP Library. If not,
				29	see https://www.gnu.org/licenses/. */
				30
				31	#include "gmp-impl.h"
				32
				33	/* How many special cases? Minimum is 2: 0 and 1;
				34	* also 3 {0,1,2} and 5 {0,1,2,3,4} are implemented.
				35	*/
				36	#define APARTAJ_KALKULOJ 2
				37
				38	/* Whether to use (1) or not (0) the function mpz_bin_uiui whenever
				39	* the operands fit.
				40	*/
				41	#define UZU_BIN_UIUI 0
				42
				43	/* Whether to use a shortcut to precompute the product of four
				44	* elements (1), or precompute only the product of a couple (0).
				45	*
				46	* In both cases the precomputed product is then updated with some
				47	* linear operations to obtain the product of the next four (1)
				48	* [or two (0)] operands.
				49	*/
				50	#define KVAROPE 1
				51
				52	static void
				53	posmpz_init (mpz_ptr r)
				54	{
				55	mp_ptr rp;
				56	ASSERT (SIZ (r) > 0);
				57	rp = SIZ (r) + MPZ_REALLOC (r, SIZ (r) + 2);
				58	*rp = 0;
				59	*++rp = 0;
				60	}
				61
				62	/* Equivalent to mpz_add_ui (r, r, in), but faster when
				63	0 < SIZ (r) < ALLOC (r) and limbs above SIZ (r) contain 0. */
				64	static void
				65	posmpz_inc_ui (mpz_ptr r, unsigned long in)
				66	{
				67	#if BITS_PER_ULONG > GMP_NUMB_BITS
				68	mpz_add_ui (r, r, in);
				69	#else
				70	ASSERT (SIZ (r) > 0);
				71	MPN_INCR_U (PTR (r), SIZ (r) + 1, in);
				72	SIZ (r) += (PTR (r)[SIZ (r)] != 0);
				73	#endif
				74	}
				75
				76	/* Equivalent to mpz_sub_ui (r, r, in), but faster when
				77	0 < SIZ (r) and we know in advance that the result is positive. */
				78	static void
				79	posmpz_dec_ui (mpz_ptr r, unsigned long in)
				80	{
				81	#if BITS_PER_ULONG > GMP_NUMB_BITS
				82	mpz_sub_ui (r, r, in);
				83	#else
				84	ASSERT (mpz_cmp_ui (r, in) >= 0);
				85	MPN_DECR_U (PTR (r), SIZ (r), in);
				86	SIZ (r) -= (PTR (r)[SIZ (r)-1] == 0);
				87	#endif
				88	}
				89
				90	/* Equivalent to mpz_tdiv_q_2exp (r, r, 1), but faster when
				91	0 < SIZ (r) and we know in advance that the result is positive. */
				92	static void
				93	posmpz_rsh1 (mpz_ptr r)
				94	{
				95	mp_ptr rp;
				96	mp_size_t rn;
				97
				98	rn = SIZ (r);
				99	rp = PTR (r);
				100	ASSERT (rn > 0);
				101	mpn_rshift (rp, rp, rn, 1);
				102	SIZ (r) -= rp[rn - 1] == 0;
				103	}
				104
				105	/* Computes r = n(n+(2*k-1))/2
				106	It uses a sqare instead of a product, computing
				107	r = ((n+k-1)^2 + n - (k-1)^2)/2
				108	As a side effect, sets t = n+k-1
				109	*/
				110	static void
				111	mpz_hmul_nbnpk (mpz_ptr r, mpz_srcptr n, unsigned long int k, mpz_ptr t)
				112	{
				113	ASSERT (k > 0 && SIZ(n) > 0);
				114	--k;
				115	mpz_add_ui (t, n, k);
				116	mpz_mul (r, t, t);
				117	mpz_add (r, r, n);
				118	posmpz_rsh1 (r);
				119	if (LIKELY (k <= (1UL << (BITS_PER_ULONG / 2))))
				120	posmpz_dec_ui (r, (k + (k & 1))*(k >> 1));
				121	else
				122	{
				123	mpz_t tmp;
				124	mpz_init_set_ui (tmp, (k + (k & 1)));
				125	mpz_mul_ui (tmp, tmp, k >> 1);
				126	mpz_sub (r, r, tmp);
				127	mpz_clear (tmp);
				128	}
				129	}
				130
				131	#if KVAROPE
				132	static void
				133	rek_raising_fac4 (mpz_ptr r, mpz_ptr p, mpz_ptr P, unsigned long int k, unsigned long int lk, mpz_ptr t)
				134	{
				135	if (k - lk < 5)
				136	{
				137	do {
				138	posmpz_inc_ui (p, 4*k+2);
				139	mpz_addmul_ui (P, p, 4*k);
				140	posmpz_dec_ui (P, k);
				141	mpz_mul (r, r, P);
				142	} while (--k > lk);
				143	}
				144	else
				145	{
				146	mpz_t lt;
				147	unsigned long int m;
				148
				149	m = ((k + lk) >> 1) + 1;
				150	rek_raising_fac4 (r, p, P, k, m, t);
				151
				152	posmpz_inc_ui (p, 4*m+2);
				153	mpz_addmul_ui (P, p, 4*m);
				154	posmpz_dec_ui (P, m);
				155	if (t == NULL)
				156	{
				157	mpz_init_set (lt, P);
				158	t = lt;
				159	}
				160	else
				161	{
				162	ALLOC (lt) = 0;
				163	mpz_set (t, P);
				164	}
				165	rek_raising_fac4 (t, p, P, m - 1, lk, NULL);
				166
				167	mpz_mul (r, r, t);
				168	mpz_clear (lt);
				169	}
				170	}
				171
				172	/* Computes (n+1)(n+2)...(n+k)/2^(k/2 +k/4) using the helper function
				173	rek_raising_fac4, and exploiting an idea inspired by a piece of
				174	code that Fredrik Johansson wrote and by a comment by Niels Möller.
				175
				176	Assume k = 4i then compute:
				177	p = (n+1)(n+4i)/2 - i
				178	(n+1+1)(n+4i)/2 = p + i + (n+4i)/2
				179	(n+1+1)(n+4i-1)/2 = p + i + ((n+4i)-(n+1+1))/2 = p + i + (n-n+4i-2)/2 = p + 3i-1
				180	P = (p + i)*(p+3i-1)/2 = (n+1)(n+2)(n+4i-1)(n+4i)/8
				181	n' = n + 2
				182	i' = i - 1
				183	(n'-1)(n')(n'+4i'+1)(n'+4i'+2)/8 = P
				184	(n'-1)(n'+4i'+2)/2 - i' - 1 = p
				185	(n'-1+2)(n'+4i'+2)/2 - i' - 1 = p + (n'+4i'+2)
				186	(n'-1+2)(n'+4i'+2-2)/2 - i' - 1 = p + (n'+4i'+2) - (n'-1+2) = p + 4i' + 1
				187	(n'-1+2)(n'+4i'+2-2)/2 - i' = p + 4i' + 2
				188	p' = p + 4i' + 2 = (n'+1)(n'+4i')/2 - i'
				189	p' - 4i' - 2 = p
				190	(p' - 4i' - 2 + i)*(p' - 4i' - 2+3i-1)/2 = P
				191	(p' - 4i' - 2 + i' + 1)*(p' - 4i' - 2 + 3i' + 3 - 1)/2 = P
				192	(p' - 3i' - 1)*(p' - i')/2 = P
				193	(p' - 3i' - 1 + 4i' + 1)(p' - i' + 4i' - 1)/2 = P + (4i' + 1)(p' - i')/2 + (p' - 3i' - 1 + 4i' + 1)*(4i' - 1)/2
				194	(p' + i')(p' + 3i' - 1)/2 = P + (4i')(p' + p')/2 + (p' - i' - (p' + i'))/2
				195	(p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' + (p' - i' - p' - i')/2
				196	(p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' - i'
				197	P' = P + 4i'p' - i'
				198
				199	And compute the product P * P' * P" ...
				200	*/
				201
				202	static void
				203	mpz_raising_fac4 (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p)
				204	{
				205	ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 0));
				206	posmpz_init (n);
				207	posmpz_inc_ui (n, 1);
				208	SIZ (r) = 0;
				209	if (k & 1)
				210	{
				211	mpz_set (r, n);
				212	posmpz_inc_ui (n, 1);
				213	}
				214	k >>= 1;
				215	if (APARTAJ_KALKULOJ < 2 && k == 0)
				216	return;
				217
				218	mpz_hmul_nbnpk (p, n, k, t);
				219	posmpz_init (p);
				220
				221	if (k & 1)
				222	{
				223	if (SIZ (r))
				224	mpz_mul (r, r, p);
				225	else
				226	mpz_set (r, p);
				227	posmpz_inc_ui (p, k - 1);
				228	}
				229	k >>= 1;
				230	if (APARTAJ_KALKULOJ < 4 && k == 0)
				231	return;
				232
				233	mpz_hmul_nbnpk (t, p, k, n);
				234	if (SIZ (r))
				235	mpz_mul (r, r, t);
				236	else
				237	mpz_set (r, t);
				238
				239	if (APARTAJ_KALKULOJ > 8 \|\| k > 1)
				240	{
				241	posmpz_dec_ui (p, k);
				242	rek_raising_fac4 (r, p, t, k - 1, 0, n);
				243	}
				244	}
				245
				246	#else /* KVAROPE */
				247
				248	static void
				249	rek_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, unsigned long int lk, mpz_ptr t1, mpz_ptr t2)
				250	{
				251	/* Should the threshold depend on SIZ (n) ? */
				252	if (k - lk < 10)
				253	{
				254	do {
				255	posmpz_inc_ui (n, k);
				256	mpz_mul (r, r, n);
				257	--k;
				258	} while (k > lk);
				259	}
				260	else
				261	{
				262	mpz_t t3;
				263	unsigned long int m;
				264
				265	m = ((k + lk) >> 1) + 1;
				266	rek_raising_fac (r, n, k, m, t1, t2);
				267
				268	posmpz_inc_ui (n, m);
				269	if (t1 == NULL)
				270	{
				271	mpz_init_set (t3, n);
				272	t1 = t3;
				273	}
				274	else
				275	{
				276	ALLOC (t3) = 0;
				277	mpz_set (t1, n);
				278	}
				279	rek_raising_fac (t1, n, m - 1, lk, t2, NULL);
				280
				281	mpz_mul (r, r, t1);
				282	mpz_clear (t3);
				283	}
				284	}
				285
				286	/* Computes (n+1)(n+2)...(n+k)/2^(k/2) using the helper function
				287	rek_raising_fac, and exploiting an idea inspired by a piece of
				288	code that Fredrik Johansson wrote.
				289
				290	Force an even k = 2i then compute:
				291	p = (n+1)(n+2i)/2
				292	i' = i - 1
				293	p == (n+1)(n+2i'+2)/2
				294	p' = p + i' == (n+2)(n+2i'+1)/2
				295	n' = n + 1
				296	p'== (n'+1)(n'+2i')/2 == (n+1 +1)(n+2i -1)/2
				297
				298	And compute the product p * p' * p" ...
				299	*/
				300
				301	static void
				302	mpz_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p)
				303	{
				304	unsigned long int hk;
				305	ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 1));
				306	mpz_add_ui (n, n, 1);
				307	hk = k >> 1;
				308	mpz_hmul_nbnpk (p, n, hk, t);
				309
				310	if ((k & 1) != 0)
				311	{
				312	mpz_add_ui (t, t, hk + 1);
				313	mpz_mul (r, t, p);
				314	}
				315	else
				316	{
				317	mpz_set (r, p);
				318	}
				319
				320	if ((APARTAJ_KALKULOJ > 3) \|\| (hk > 1))
				321	{
				322	posmpz_init (p);
				323	rek_raising_fac (r, p, hk - 1, 0, t, n);
				324	}
				325	}
				326	#endif /* KVAROPE */
				327
				328	/* This is a poor implementation. Look at bin_uiui.c for improvement ideas.
				329	In fact consider calling mpz_bin_uiui() when the arguments fit, leaving
				330	the code here only for big n.
				331
				332	The identity bin(n,k) = (-1)^k * bin(-n+k-1,k) can be found in Knuth vol
				333	1 section 1.2.6 part G. */
				334
				335	void
				336	mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k)
				337	{
				338	mpz_t ni;
				339	mp_size_t negate;
				340
				341	if (SIZ (n) < 0)
				342	{
				343	/* bin(n,k) = (-1)^k * bin(-n+k-1,k), and set ni = -n+k-1 - k = -n-1 */
				344	mpz_init (ni);
				345	mpz_add_ui (ni, n, 1L);
				346	mpz_neg (ni, ni);
				347	negate = (k & 1); /* (-1)^k */
				348	}
				349	else
				350	{
				351	/* bin(n,k) == 0 if k>n
				352	(no test for this under the n<0 case, since -n+k-1 >= k there) */
				353	if (mpz_cmp_ui (n, k) < 0)
				354	{
				355	SIZ (r) = 0;
				356	return;
				357	}
				358
				359	/* set ni = n-k */
				360	mpz_init (ni);
				361	mpz_sub_ui (ni, n, k);
				362	negate = 0;
				363	}
				364
				365	/* Now wanting bin(ni+k,k), with ni positive, and "negate" is the sign (0
				366	for positive, 1 for negative). */
				367
				368	/* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. In this case it's
				369	whether ni+k-k < k meaning ni<k, and if so change to denominator ni+k-k
				370	= ni, and new ni of ni+k-ni = k. */
				371	if (mpz_cmp_ui (ni, k) < 0)
				372	{
				373	unsigned long tmp;
				374	tmp = k;
				375	k = mpz_get_ui (ni);
				376	mpz_set_ui (ni, tmp);
				377	}
				378
				379	if (k < APARTAJ_KALKULOJ)
				380	{
				381	if (k == 0)
				382	{
				383	SIZ (r) = 1;
				384	MPZ_NEWALLOC (r, 1)[0] = 1;
				385	}
				386	#if APARTAJ_KALKULOJ > 2
				387	else if (k == 2)
				388	{
				389	mpz_add_ui (ni, ni, 1);
				390	mpz_mul (r, ni, ni);
				391	mpz_add (r, r, ni);
				392	posmpz_rsh1 (r);
				393	}
				394	#endif
				395	#if APARTAJ_KALKULOJ > 3
				396	else if (k > 2)
				397	{ /* k = 3, 4 */
				398	mpz_add_ui (ni, ni, 2); /* n+1 */
				399	mpz_mul (r, ni, ni); /* (n+1)^2 */
				400	mpz_sub_ui (r, r, 1); /* (n+1)^2-1 */
				401	if (k == 3)
				402	{
				403	mpz_mul (r, r, ni); /* ((n+1)^2-1)(n+1) = n(n+1)(n+2) */
				404	/* mpz_divexact_ui (r, r, 6); /\* 6=3<<1; div_by3 ? \/ /
				405	mpn_pi1_bdiv_q_1 (PTR(r), PTR(r), SIZ(r), 3, GMP_NUMB_MASK/3*2+1, 1);
				406	MPN_NORMALIZE_NOT_ZERO (PTR(r), SIZ(r));
				407	}
				408	else /* k = 4 */
				409	{
				410	mpz_add (ni, ni, r); /* (n+1)^2+n */
				411	mpz_mul (r, ni, ni); /* ((n+1)^2+n)^2 */
				412	mpz_sub_ui (r, r, 1); /* ((n+1)^2+n)^2-1 = n(n+1)(n+2)(n+3) */
				413	/* mpz_divexact_ui (r, r, 24); /\* 24=3<<3; div_by3 ? \/ /
				414	mpn_pi1_bdiv_q_1 (PTR(r), PTR(r), SIZ(r), 3, GMP_NUMB_MASK/3*2+1, 3);
				415	MPN_NORMALIZE_NOT_ZERO (PTR(r), SIZ(r));
				416	}
				417	}
				418	#endif
				419	else
				420	{ /* k = 1 */
				421	mpz_add_ui (r, ni, 1);
				422	}
				423	}
				424	#if UZU_BIN_UIUI
				425	else if (mpz_cmp_ui (ni, ULONG_MAX - k) <= 0)
				426	{
				427	mpz_bin_uiui (r, mpz_get_ui (ni) + k, k);
				428	}
				429	#endif
				430	else
				431	{
				432	mp_limb_t count;
				433	mpz_t num, den;
				434
				435	mpz_init (num);
				436	mpz_init (den);
				437
				438	#if KVAROPE
				439	mpz_raising_fac4 (num, ni, k, den, r);
				440	popc_limb (count, k);
				441	ASSERT (k - (k >> 1) - (k >> 2) - count >= 0);
				442	mpz_tdiv_q_2exp (num, num, k - (k >> 1) - (k >> 2) - count);
				443	#else
				444	mpz_raising_fac (num, ni, k, den, r);
				445	popc_limb (count, k);
				446	ASSERT (k - (k >> 1) - count >= 0);
				447	mpz_tdiv_q_2exp (num, num, k - (k >> 1) - count);
				448	#endif
				449
				450	mpz_oddfac_1(den, k, 0);
				451
				452	mpz_divexact(r, num, den);
				453	mpz_clear (num);
				454	mpz_clear (den);
				455	}
				456	mpz_clear (ni);
				457
				458	SIZ(r) = (SIZ(r) ^ -negate) + negate;
				459	}