Blame - third_party/gmp/mpn/generic/powm.c - RealtimeRoboticsGroup/test

blob: 28281030b7cab0f4131edff435459eed7dafbd6a [file] [log] [blame]

Austin Schuh	dace2a6	2020-08-18 10:56:48 -0700	[diff] [blame]	1	/* mpn_powm -- Compute R = U^E mod M.
				2
				3	Contributed to the GNU project by Torbjorn Granlund.
				4
				5	THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
				6	SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
				7	GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
				8
				9	Copyright 2007-2012, 2019 Free Software Foundation, Inc.
				10
				11	This file is part of the GNU MP Library.
				12
				13	The GNU MP Library is free software; you can redistribute it and/or modify
				14	it under the terms of either:
				15
				16	* the GNU Lesser General Public License as published by the Free
				17	Software Foundation; either version 3 of the License, or (at your
				18	option) any later version.
				19
				20	or
				21
				22	* the GNU General Public License as published by the Free Software
				23	Foundation; either version 2 of the License, or (at your option) any
				24	later version.
				25
				26	or both in parallel, as here.
				27
				28	The GNU MP Library is distributed in the hope that it will be useful, but
				29	WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
				30	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				31	for more details.
				32
				33	You should have received copies of the GNU General Public License and the
				34	GNU Lesser General Public License along with the GNU MP Library. If not,
				35	see https://www.gnu.org/licenses/. */
				36
				37
				38	/*
				39	BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
				40
				41	1. W <- U
				42
				43	2. T <- (B^n * U) mod M Convert to REDC form
				44
				45	3. Compute table U^1, U^3, U^5... of E-dependent size
				46
				47	4. While there are more bits in E
				48	W <- power left-to-right base-k
				49
				50
				51	TODO:
				52
				53	* Make getbits a macro, thereby allowing it to update the index operand.
				54	That will simplify the code using getbits. (Perhaps make getbits' sibling
				55	getbit then have similar form, for symmetry.)
				56
				57	* Write an itch function. Or perhaps get rid of tp parameter since the huge
				58	pp area is allocated locally anyway?
				59
				60	* Choose window size without looping. (Superoptimize or think(tm).)
				61
				62	* Handle small bases with initial, reduction-free exponentiation.
				63
				64	* Call new division functions, not mpn_tdiv_qr.
				65
				66	* Consider special code for one-limb M.
				67
				68	* How should we handle the redc1/redc2/redc_n choice?
				69	- redc1: T(binvert_1limb) + e * (n) * (T(mullo-1x1) + n*T(addmul_1))
				70	- redc2: T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
				71	- redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
				72	This disregards the addmul_N constant term, but we could think of
				73	that as part of the respective mullo.
				74
				75	* When U (the base) is small, we should start the exponentiation with plain
				76	operations, then convert that partial result to REDC form.
				77
				78	* When U is just one limb, should it be handled without the k-ary tricks?
				79	We could keep a factor of B^n in W, but use U' = BU as base. After
				80	multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
				81	mod M.
				82	*/
				83
				84	#include "gmp-impl.h"
				85	#include "longlong.h"
				86
				87	#undef MPN_REDC_0
				88	#define MPN_REDC_0(rp, up, mp, invm) \
				89	do { \
				90	mp_limb_t p1, r0, u0, _dummy; \
				91	u0 = *(up); \
				92	umul_ppmm (p1, _dummy, (mp), (u0 (invm)) & GMP_NUMB_MASK); \
				93	ASSERT (((u0 + _dummy) & GMP_NUMB_MASK) == 0); \
				94	p1 += (u0 != 0); \
				95	r0 = (up)[1] + p1; \
				96	if (p1 > r0) \
				97	r0 -= *(mp); \
				98	*(rp) = r0; \
				99	} while (0)
				100
				101	#undef MPN_REDC_1
				102	#if HAVE_NATIVE_mpn_sbpi1_bdiv_r
				103	#define MPN_REDC_1(rp, up, mp, n, invm) \
				104	do { \
				105	mp_limb_t cy; \
				106	cy = mpn_sbpi1_bdiv_r (up, 2 * n, mp, n, invm); \
				107	if (cy != 0) \
				108	mpn_sub_n (rp, up + n, mp, n); \
				109	else \
				110	MPN_COPY (rp, up + n, n); \
				111	} while (0)
				112	#else
				113	#define MPN_REDC_1(rp, up, mp, n, invm) \
				114	do { \
				115	mp_limb_t cy; \
				116	cy = mpn_redc_1 (rp, up, mp, n, invm); \
				117	if (cy != 0) \
				118	mpn_sub_n (rp, rp, mp, n); \
				119	} while (0)
				120	#endif
				121
				122	#undef MPN_REDC_2
				123	#define MPN_REDC_2(rp, up, mp, n, mip) \
				124	do { \
				125	mp_limb_t cy; \
				126	cy = mpn_redc_2 (rp, up, mp, n, mip); \
				127	if (cy != 0) \
				128	mpn_sub_n (rp, rp, mp, n); \
				129	} while (0)
				130
				131	#if HAVE_NATIVE_mpn_addmul_2 \|\| HAVE_NATIVE_mpn_redc_2
				132	#define WANT_REDC_2 1
				133	#endif
				134
				135	#define getbit(p,bi) \
				136	((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
				137
				138	static inline mp_limb_t
				139	getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
				140	{
				141	int nbits_in_r;
				142	mp_limb_t r;
				143	mp_size_t i;
				144
				145	if (bi < nbits)
				146	{
				147	return p[0] & (((mp_limb_t) 1 << bi) - 1);
				148	}
				149	else
				150	{
				151	bi -= nbits; /* bit index of low bit to extract */
				152	i = bi / GMP_NUMB_BITS; /* word index of low bit to extract */
				153	bi %= GMP_NUMB_BITS; /* bit index in low word */
				154	r = p[i] >> bi; /* extract (low) bits */
				155	nbits_in_r = GMP_NUMB_BITS - bi; /* number of bits now in r */
				156	if (nbits_in_r < nbits) /* did we get enough bits? */
				157	r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */
				158	return r & (((mp_limb_t) 1 << nbits) - 1);
				159	}
				160	}
				161
				162	static inline int
				163	win_size (mp_bitcnt_t eb)
				164	{
				165	int k;
				166	static mp_bitcnt_t x[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
				167	for (k = 1; eb > x[k]; k++)
				168	;
				169	return k;
				170	}
				171
				172	/* Convert U to REDC form, U_r = B^n * U mod M */
				173	static void
				174	redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
				175	{
				176	mp_ptr tp, qp;
				177	TMP_DECL;
				178	TMP_MARK;
				179
				180	TMP_ALLOC_LIMBS_2 (tp, un + n, qp, un + 1);
				181
				182	MPN_ZERO (tp, n);
				183	MPN_COPY (tp + n, up, un);
				184	mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
				185	TMP_FREE;
				186	}
				187
				188	/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
				189	Requires that mp[n-1..0] is odd.
				190	Requires that ep[en-1..0] is > 1.
				191	Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs. */
				192	void
				193	mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
				194	mp_srcptr ep, mp_size_t en,
				195	mp_srcptr mp, mp_size_t n, mp_ptr tp)
				196	{
				197	mp_limb_t ip[2], *mip;
				198	int cnt;
				199	mp_bitcnt_t ebi;
				200	int windowsize, this_windowsize;
				201	mp_limb_t expbits;
				202	mp_ptr pp, this_pp;
				203	long i;
				204	TMP_DECL;
				205
				206	ASSERT (en > 1 \|\| (en == 1 && ep[0] > 1));
				207	ASSERT (n >= 1 && ((mp[0] & 1) != 0));
				208
				209	TMP_MARK;
				210
				211	MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
				212
				213	#if 0
				214	if (bn < n)
				215	{
				216	/* Do the first few exponent bits without mod reductions,
				217	until the result is greater than the mod argument. */
				218	for (;;)
				219	{
				220	mpn_sqr (tp, this_pp, tn);
				221	tn = tn * 2 - 1, tn += tp[tn] != 0;
				222	if (getbit (ep, ebi) != 0)
				223	mpn_mul (..., tp, tn, bp, bn);
				224	ebi--;
				225	}
				226	}
				227	#endif
				228
				229	windowsize = win_size (ebi);
				230
				231	#if WANT_REDC_2
				232	if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
				233	{
				234	mip = ip;
				235	binvert_limb (mip[0], mp[0]);
				236	mip[0] = -mip[0];
				237	}
				238	else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
				239	{
				240	mip = ip;
				241	mpn_binvert (mip, mp, 2, tp);
				242	mip[0] = -mip[0]; mip[1] = ~mip[1];
				243	}
				244	#else
				245	if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
				246	{
				247	mip = ip;
				248	binvert_limb (mip[0], mp[0]);
				249	mip[0] = -mip[0];
				250	}
				251	#endif
				252	else
				253	{
				254	mip = TMP_ALLOC_LIMBS (n);
				255	mpn_binvert (mip, mp, n, tp);
				256	}
				257
				258	pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
				259
				260	this_pp = pp;
				261	redcify (this_pp, bp, bn, mp, n);
				262
				263	/* Store b^2 at rp. */
				264	mpn_sqr (tp, this_pp, n);
				265	#if 0
				266	if (n == 1) {
				267	MPN_REDC_0 (rp, tp, mp, mip[0]);
				268	} else
				269	#endif
				270	#if WANT_REDC_2
				271	if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
				272	MPN_REDC_1 (rp, tp, mp, n, mip[0]);
				273	else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
				274	MPN_REDC_2 (rp, tp, mp, n, mip);
				275	#else
				276	if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
				277	MPN_REDC_1 (rp, tp, mp, n, mip[0]);
				278	#endif
				279	else
				280	mpn_redc_n (rp, tp, mp, n, mip);
				281
				282	/* Precompute odd powers of b and put them in the temporary area at pp. */
				283	for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
				284	#if 1
				285	if (n == 1) {
				286	umul_ppmm((tp)[1], (tp), (this_pp), *(rp));
				287	++this_pp ;
				288	MPN_REDC_0 (this_pp, tp, mp, mip[0]);
				289	} else
				290	#endif
				291	{
				292	mpn_mul_n (tp, this_pp, rp, n);
				293	this_pp += n;
				294	#if WANT_REDC_2
				295	if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
				296	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
				297	else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
				298	MPN_REDC_2 (this_pp, tp, mp, n, mip);
				299	#else
				300	if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
				301	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
				302	#endif
				303	else
				304	mpn_redc_n (this_pp, tp, mp, n, mip);
				305	}
				306
				307	expbits = getbits (ep, ebi, windowsize);
				308	if (ebi < windowsize)
				309	ebi = 0;
				310	else
				311	ebi -= windowsize;
				312
				313	count_trailing_zeros (cnt, expbits);
				314	ebi += cnt;
				315	expbits >>= cnt;
				316
				317	MPN_COPY (rp, pp + n * (expbits >> 1), n);
				318
				319	#define INNERLOOP \
				320	while (ebi != 0) \
				321	{ \
				322	while (getbit (ep, ebi) == 0) \
				323	{ \
				324	MPN_SQR (tp, rp, n); \
				325	MPN_REDUCE (rp, tp, mp, n, mip); \
				326	if (--ebi == 0) \
				327	goto done; \
				328	} \
				329	\
				330	/* The next bit of the exponent is 1. Now extract the largest \
				331	block of bits <= windowsize, and such that the least \
				332	significant bit is 1. */ \
				333	\
				334	expbits = getbits (ep, ebi, windowsize); \
				335	this_windowsize = windowsize; \
				336	if (ebi < windowsize) \
				337	{ \
				338	this_windowsize -= windowsize - ebi; \
				339	ebi = 0; \
				340	} \
				341	else \
				342	ebi -= windowsize; \
				343	\
				344	count_trailing_zeros (cnt, expbits); \
				345	this_windowsize -= cnt; \
				346	ebi += cnt; \
				347	expbits >>= cnt; \
				348	\
				349	do \
				350	{ \
				351	MPN_SQR (tp, rp, n); \
				352	MPN_REDUCE (rp, tp, mp, n, mip); \
				353	} \
				354	while (--this_windowsize != 0); \
				355	\
				356	MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n); \
				357	MPN_REDUCE (rp, tp, mp, n, mip); \
				358	}
				359
				360
				361	if (n == 1)
				362	{
				363	#undef MPN_MUL_N
				364	#undef MPN_SQR
				365	#undef MPN_REDUCE
				366	#define MPN_MUL_N(r,a,b,n) umul_ppmm((r)[1], (r), (a), *(b))
				367	#define MPN_SQR(r,a,n) umul_ppmm((r)[1], (r), (a), *(a))
				368	#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_0(rp, tp, mp, mip[0])
				369	INNERLOOP;
				370	}
				371	else
				372	#if WANT_REDC_2
				373	if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
				374	{
				375	if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
				376	{
				377	if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
				378	\|\| BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
				379	{
				380	#undef MPN_MUL_N
				381	#undef MPN_SQR
				382	#undef MPN_REDUCE
				383	#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
				384	#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
				385	#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
				386	INNERLOOP;
				387	}
				388	else
				389	{
				390	#undef MPN_MUL_N
				391	#undef MPN_SQR
				392	#undef MPN_REDUCE
				393	#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
				394	#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
				395	#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
				396	INNERLOOP;
				397	}
				398	}
				399	else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
				400	{
				401	if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
				402	\|\| BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
				403	{
				404	#undef MPN_MUL_N
				405	#undef MPN_SQR
				406	#undef MPN_REDUCE
				407	#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
				408	#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
				409	#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
				410	INNERLOOP;
				411	}
				412	else
				413	{
				414	#undef MPN_MUL_N
				415	#undef MPN_SQR
				416	#undef MPN_REDUCE
				417	#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
				418	#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
				419	#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
				420	INNERLOOP;
				421	}
				422	}
				423	else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
				424	{
				425	#undef MPN_MUL_N
				426	#undef MPN_SQR
				427	#undef MPN_REDUCE
				428	#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
				429	#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
				430	#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
				431	INNERLOOP;
				432	}
				433	else
				434	{
				435	#undef MPN_MUL_N
				436	#undef MPN_SQR
				437	#undef MPN_REDUCE
				438	#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
				439	#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
				440	#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
				441	INNERLOOP;
				442	}
				443	}
				444	else
				445	{
				446	if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
				447	{
				448	if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
				449	\|\| BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
				450	{
				451	#undef MPN_MUL_N
				452	#undef MPN_SQR
				453	#undef MPN_REDUCE
				454	#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
				455	#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
				456	#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
				457	INNERLOOP;
				458	}
				459	else
				460	{
				461	#undef MPN_MUL_N
				462	#undef MPN_SQR
				463	#undef MPN_REDUCE
				464	#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
				465	#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
				466	#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
				467	INNERLOOP;
				468	}
				469	}
				470	else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
				471	{
				472	#undef MPN_MUL_N
				473	#undef MPN_SQR
				474	#undef MPN_REDUCE
				475	#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
				476	#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
				477	#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
				478	INNERLOOP;
				479	}
				480	else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
				481	{
				482	#undef MPN_MUL_N
				483	#undef MPN_SQR
				484	#undef MPN_REDUCE
				485	#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
				486	#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
				487	#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
				488	INNERLOOP;
				489	}
				490	else
				491	{
				492	#undef MPN_MUL_N
				493	#undef MPN_SQR
				494	#undef MPN_REDUCE
				495	#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
				496	#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
				497	#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
				498	INNERLOOP;
				499	}
				500	}
				501
				502	#else /* WANT_REDC_2 */
				503
				504	if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
				505	{
				506	if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
				507	{
				508	if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
				509	\|\| BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
				510	{
				511	#undef MPN_MUL_N
				512	#undef MPN_SQR
				513	#undef MPN_REDUCE
				514	#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
				515	#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
				516	#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
				517	INNERLOOP;
				518	}
				519	else
				520	{
				521	#undef MPN_MUL_N
				522	#undef MPN_SQR
				523	#undef MPN_REDUCE
				524	#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
				525	#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
				526	#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
				527	INNERLOOP;
				528	}
				529	}
				530	else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
				531	{
				532	if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
				533	\|\| BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
				534	{
				535	#undef MPN_MUL_N
				536	#undef MPN_SQR
				537	#undef MPN_REDUCE
				538	#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
				539	#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
				540	#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
				541	INNERLOOP;
				542	}
				543	else
				544	{
				545	#undef MPN_MUL_N
				546	#undef MPN_SQR
				547	#undef MPN_REDUCE
				548	#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
				549	#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
				550	#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
				551	INNERLOOP;
				552	}
				553	}
				554	else
				555	{
				556	#undef MPN_MUL_N
				557	#undef MPN_SQR
				558	#undef MPN_REDUCE
				559	#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
				560	#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
				561	#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
				562	INNERLOOP;
				563	}
				564	}
				565	else
				566	{
				567	if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
				568	{
				569	if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
				570	\|\| BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
				571	{
				572	#undef MPN_MUL_N
				573	#undef MPN_SQR
				574	#undef MPN_REDUCE
				575	#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
				576	#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
				577	#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
				578	INNERLOOP;
				579	}
				580	else
				581	{
				582	#undef MPN_MUL_N
				583	#undef MPN_SQR
				584	#undef MPN_REDUCE
				585	#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
				586	#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
				587	#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
				588	INNERLOOP;
				589	}
				590	}
				591	else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
				592	{
				593	#undef MPN_MUL_N
				594	#undef MPN_SQR
				595	#undef MPN_REDUCE
				596	#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
				597	#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
				598	#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
				599	INNERLOOP;
				600	}
				601	else
				602	{
				603	#undef MPN_MUL_N
				604	#undef MPN_SQR
				605	#undef MPN_REDUCE
				606	#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
				607	#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
				608	#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
				609	INNERLOOP;
				610	}
				611	}
				612	#endif /* WANT_REDC_2 */
				613
				614	done:
				615
				616	MPN_COPY (tp, rp, n);
				617	MPN_ZERO (tp + n, n);
				618
				619	#if WANT_REDC_2
				620	if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
				621	MPN_REDC_1 (rp, tp, mp, n, mip[0]);
				622	else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
				623	MPN_REDC_2 (rp, tp, mp, n, mip);
				624	#else
				625	if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
				626	MPN_REDC_1 (rp, tp, mp, n, mip[0]);
				627	#endif
				628	else
				629	mpn_redc_n (rp, tp, mp, n, mip);
				630
				631	if (mpn_cmp (rp, mp, n) >= 0)
				632	mpn_sub_n (rp, rp, mp, n);
				633
				634	TMP_FREE;
				635	}