Blame - third_party/gmp/mpz/powm_ui.c - RealtimeRoboticsGroup/test

blob: 23d3ed8b1637b894549c076816d1d91eb515957f [file] [log] [blame]

Austin Schuh	dace2a6	2020-08-18 10:56:48 -0700	[diff] [blame^]	1	/* mpz_powm_ui(res,base,exp,mod) -- Set R to (B^E) mod M.
				2
				3	Contributed to the GNU project by Torbjörn Granlund.
				4
				5	Copyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2005, 2008, 2009,
				6	2011-2013, 2015 Free Software Foundation, Inc.
				7
				8	This file is part of the GNU MP Library.
				9
				10	The GNU MP Library is free software; you can redistribute it and/or modify
				11	it under the terms of either:
				12
				13	* the GNU Lesser General Public License as published by the Free
				14	Software Foundation; either version 3 of the License, or (at your
				15	option) any later version.
				16
				17	or
				18
				19	* the GNU General Public License as published by the Free Software
				20	Foundation; either version 2 of the License, or (at your option) any
				21	later version.
				22
				23	or both in parallel, as here.
				24
				25	The GNU MP Library is distributed in the hope that it will be useful, but
				26	WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
				27	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				28	for more details.
				29
				30	You should have received copies of the GNU General Public License and the
				31	GNU Lesser General Public License along with the GNU MP Library. If not,
				32	see https://www.gnu.org/licenses/. */
				33
				34
				35	#include "gmp-impl.h"
				36	#include "longlong.h"
				37
				38
				39	/* This code is very old, and should be rewritten to current GMP standard. It
				40	is slower than mpz_powm for large exponents, but also for small exponents
				41	when the mod argument is small.
				42
				43	As an intermediate solution, we now deflect to mpz_powm for exponents >= 20.
				44	*/
				45
				46	/*
				47	b ^ e mod m res
				48	0 0 0 ?
				49	0 e 0 ?
				50	0 0 m ?
				51	0 e m 0
				52	b 0 0 ?
				53	b e 0 ?
				54	b 0 m 1 mod m
				55	b e m b^e mod m
				56	*/
				57
				58	static void
				59	mod (mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv, mp_ptr tp)
				60	{
				61	mp_ptr qp = tp;
				62
				63	if (dn == 1)
				64	{
				65	np[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
				66	}
				67	else if (dn == 2)
				68	{
				69	mpn_div_qr_2n_pi1 (qp, np, np, nn, dp[1], dp[0], dinv->inv32);
				70	}
				71	else if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD) \|\|
				72	BELOW_THRESHOLD (nn - dn, DC_DIV_QR_THRESHOLD))
				73	{
				74	mpn_sbpi1_div_qr (qp, np, nn, dp, dn, dinv->inv32);
				75	}
				76	else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) \|\| /* fast condition */
				77	BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) \|\| /* fast condition */
				78	(double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
				79	+ (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn) /* ...condition */
				80	{
				81	mpn_dcpi1_div_qr (qp, np, nn, dp, dn, dinv);
				82	}
				83	else
				84	{
				85	/* We need to allocate separate remainder area, since mpn_mu_div_qr does
				86	not handle overlap between the numerator and remainder areas.
				87	FIXME: Make it handle such overlap. */
				88	mp_ptr rp, scratch;
				89	mp_size_t itch;
				90	TMP_DECL;
				91	TMP_MARK;
				92
				93	itch = mpn_mu_div_qr_itch (nn, dn, 0);
				94	rp = TMP_BALLOC_LIMBS (dn);
				95	scratch = TMP_BALLOC_LIMBS (itch);
				96
				97	mpn_mu_div_qr (qp, rp, np, nn, dp, dn, scratch);
				98	MPN_COPY (np, rp, dn);
				99
				100	TMP_FREE;
				101	}
				102	}
				103
				104	/* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
				105	t is defined by (tp,mn). */
				106	static void
				107	reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn, gmp_pi1_t *dinv)
				108	{
				109	mp_ptr rp, scratch;
				110	TMP_DECL;
				111	TMP_MARK;
				112
				113	TMP_ALLOC_LIMBS_2 (rp, an, scratch, an - mn + 1);
				114	MPN_COPY (rp, ap, an);
				115	mod (rp, an, mp, mn, dinv, scratch);
				116	MPN_COPY (tp, rp, mn);
				117
				118	TMP_FREE;
				119	}
				120
				121	void
				122	mpz_powm_ui (mpz_ptr r, mpz_srcptr b, unsigned long int el, mpz_srcptr m)
				123	{
				124	if (el < 20)
				125	{
				126	mp_ptr xp, tp, mp, bp, scratch;
				127	mp_size_t xn, tn, mn, bn;
				128	int m_zero_cnt;
				129	int c;
				130	mp_limb_t e, m2;
				131	gmp_pi1_t dinv;
				132	TMP_DECL;
				133
				134	mp = PTR(m);
				135	mn = ABSIZ(m);
				136	if (UNLIKELY (mn == 0))
				137	DIVIDE_BY_ZERO;
				138
				139	if (el <= 1)
				140	{
				141	if (el == 1)
				142	{
				143	mpz_mod (r, b, m);
				144	return;
				145	}
				146	/* Exponent is zero, result is 1 mod M, i.e., 1 or 0 depending on if
				147	M equals 1. */
				148	SIZ(r) = mn != 1 \|\| mp[0] != 1;
				149	MPZ_NEWALLOC (r, 1)[0] = 1;
				150	return;
				151	}
				152
				153	TMP_MARK;
				154
				155	/* Normalize m (i.e. make its most significant bit set) as required by
				156	division functions below. */
				157	count_leading_zeros (m_zero_cnt, mp[mn - 1]);
				158	m_zero_cnt -= GMP_NAIL_BITS;
				159	if (m_zero_cnt != 0)
				160	{
				161	mp_ptr new_mp = TMP_ALLOC_LIMBS (mn);
				162	mpn_lshift (new_mp, mp, mn, m_zero_cnt);
				163	mp = new_mp;
				164	}
				165
				166	m2 = mn == 1 ? 0 : mp[mn - 2];
				167	invert_pi1 (dinv, mp[mn - 1], m2);
				168
				169	bn = ABSIZ(b);
				170	bp = PTR(b);
				171	if (bn > mn)
				172	{
				173	/* Reduce possibly huge base. Use a function call to reduce, since we
				174	don't want the quotient allocation to live until function return. */
				175	mp_ptr new_bp = TMP_ALLOC_LIMBS (mn);
				176	reduce (new_bp, bp, bn, mp, mn, &dinv);
				177	bp = new_bp;
				178	bn = mn;
				179	/* Canonicalize the base, since we are potentially going to multiply with
				180	it quite a few times. */
				181	MPN_NORMALIZE (bp, bn);
				182	}
				183
				184	if (bn == 0)
				185	{
				186	SIZ(r) = 0;
				187	TMP_FREE;
				188	return;
				189	}
				190
				191	TMP_ALLOC_LIMBS_3 (xp, mn, scratch, mn + 1, tp, 2 * mn + 1);
				192
				193	MPN_COPY (xp, bp, bn);
				194	xn = bn;
				195
				196	e = el;
				197	count_leading_zeros (c, e);
				198	e = (e << c) << 1; /* shift the exp bits to the left, lose msb */
				199	c = GMP_LIMB_BITS - 1 - c;
				200
				201	ASSERT (c != 0); /* el > 1 */
				202	{
				203	/* Main loop. */
				204	do
				205	{
				206	mpn_sqr (tp, xp, xn);
				207	tn = 2 * xn; tn -= tp[tn - 1] == 0;
				208	if (tn < mn)
				209	{
				210	MPN_COPY (xp, tp, tn);
				211	xn = tn;
				212	}
				213	else
				214	{
				215	mod (tp, tn, mp, mn, &dinv, scratch);
				216	MPN_COPY (xp, tp, mn);
				217	xn = mn;
				218	}
				219
				220	if ((mp_limb_signed_t) e < 0)
				221	{
				222	mpn_mul (tp, xp, xn, bp, bn);
				223	tn = xn + bn; tn -= tp[tn - 1] == 0;
				224	if (tn < mn)
				225	{
				226	MPN_COPY (xp, tp, tn);
				227	xn = tn;
				228	}
				229	else
				230	{
				231	mod (tp, tn, mp, mn, &dinv, scratch);
				232	MPN_COPY (xp, tp, mn);
				233	xn = mn;
				234	}
				235	}
				236	e <<= 1;
				237	c--;
				238	}
				239	while (c != 0);
				240	}
				241
				242	/* We shifted m left m_zero_cnt steps. Adjust the result by reducing it
				243	with the original M. */
				244	if (m_zero_cnt != 0)
				245	{
				246	mp_limb_t cy;
				247	cy = mpn_lshift (tp, xp, xn, m_zero_cnt);
				248	tp[xn] = cy; xn += cy != 0;
				249
				250	if (xn >= mn)
				251	{
				252	mod (tp, xn, mp, mn, &dinv, scratch);
				253	xn = mn;
				254	}
				255	mpn_rshift (xp, tp, xn, m_zero_cnt);
				256	}
				257	MPN_NORMALIZE (xp, xn);
				258
				259	if ((el & 1) != 0 && SIZ(b) < 0 && xn != 0)
				260	{
				261	mp = PTR(m); /* want original, unnormalized m */
				262	mpn_sub (xp, mp, mn, xp, xn);
				263	xn = mn;
				264	MPN_NORMALIZE (xp, xn);
				265	}
				266	MPZ_NEWALLOC (r, xn);
				267	SIZ (r) = xn;
				268	MPN_COPY (PTR(r), xp, xn);
				269
				270	TMP_FREE;
				271	}
				272	else
				273	{
				274	/* For large exponents, fake an mpz_t exponent and deflect to the more
				275	sophisticated mpz_powm. */
				276	mpz_t e;
				277	mp_limb_t ep[LIMBS_PER_ULONG];
				278	MPZ_FAKE_UI (e, ep, el);
				279	mpz_powm (r, b, e, m);
				280	}
				281	}