Blame - third_party/gmp/mpn/generic/rootrem.c - RealtimeRoboticsGroup/test

blob: a79099e131725ea921f171262861e40355c2aff9 [file] [log] [blame]

Austin Schuh	bb1338c	2024-06-15 19:31:16 -0700	[diff] [blame]	1	/* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and
				2	store the truncated integer part at rootp and the remainder at remp.
				3
				4	Contributed by Paul Zimmermann (algorithm) and
				5	Paul Zimmermann and Torbjorn Granlund (implementation).
				6	Marco Bodrato wrote logbased_root to seed the loop.
				7
				8	THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES. IT'S
				9	ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT'S ALMOST
				10	GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
				11
				12	Copyright 2002, 2005, 2009-2012, 2015 Free Software Foundation, Inc.
				13
				14	This file is part of the GNU MP Library.
				15
				16	The GNU MP Library is free software; you can redistribute it and/or modify
				17	it under the terms of either:
				18
				19	* the GNU Lesser General Public License as published by the Free
				20	Software Foundation; either version 3 of the License, or (at your
				21	option) any later version.
				22
				23	or
				24
				25	* the GNU General Public License as published by the Free Software
				26	Foundation; either version 2 of the License, or (at your option) any
				27	later version.
				28
				29	or both in parallel, as here.
				30
				31	The GNU MP Library is distributed in the hope that it will be useful, but
				32	WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
				33	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				34	for more details.
				35
				36	You should have received copies of the GNU General Public License and the
				37	GNU Lesser General Public License along with the GNU MP Library. If not,
				38	see https://www.gnu.org/licenses/. */
				39
				40	/* FIXME:
				41	This implementation is not optimal when remp == NULL, since the complexity
				42	is M(n), whereas it should be M(n/k) on average.
				43	*/
				44
				45	#include <stdio.h> /* for NULL */
				46
				47	#include "gmp-impl.h"
				48	#include "longlong.h"
				49
				50	static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t,
				51	mp_limb_t, int);
				52
				53	#define MPN_RSHIFT(rp,up,un,cnt) \
				54	do { \
				55	if ((cnt) != 0) \
				56	mpn_rshift (rp, up, un, cnt); \
				57	else \
				58	{ \
				59	MPN_COPY_INCR (rp, up, un); \
				60	} \
				61	} while (0)
				62
				63	#define MPN_LSHIFT(cy,rp,up,un,cnt) \
				64	do { \
				65	if ((cnt) != 0) \
				66	cy = mpn_lshift (rp, up, un, cnt); \
				67	else \
				68	{ \
				69	MPN_COPY_DECR (rp, up, un); \
				70	cy = 0; \
				71	} \
				72	} while (0)
				73
				74
				75	/* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero.
				76	If remp <> NULL, put in {remp, un} the remainder.
				77	Return the size (in limbs) of the remainder if remp <> NULL,
				78	or a non-zero value iff the remainder is non-zero when remp = NULL.
				79	Assumes:
				80	(a) up[un-1] is not zero
				81	(b) rootp has at least space for ceil(un/k) limbs
				82	(c) remp has at least space for un limbs (in case remp <> NULL)
				83	(d) the operands do not overlap.
				84
				85	The auxiliary memory usage is 3*un+2 if remp = NULL,
				86	and 2*un+2 if remp <> NULL. FIXME: This is an incorrect comment.
				87	*/
				88	mp_size_t
				89	mpn_rootrem (mp_ptr rootp, mp_ptr remp,
				90	mp_srcptr up, mp_size_t un, mp_limb_t k)
				91	{
				92	ASSERT (un > 0);
				93	ASSERT (up[un - 1] != 0);
				94	ASSERT (k > 1);
				95
				96	if (UNLIKELY (k == 2))
				97	return mpn_sqrtrem (rootp, remp, up, un);
				98	/* (un-1)/k > 2 <=> un > 3k <=> (un + 2)/3 > k */
				99	if (remp == NULL && (un + 2) / 3 > k)
				100	/* Pad {up,un} with k zero limbs. This will produce an approximate root
				101	with one more limb, allowing us to compute the exact integral result. */
				102	{
				103	mp_ptr sp, wp;
				104	mp_size_t rn, sn, wn;
				105	TMP_DECL;
				106	TMP_MARK;
				107	wn = un + k;
				108	sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */
				109	TMP_ALLOC_LIMBS_2 (wp, wn, /* will contain the padded input */
				110	sp, sn); /* approximate root of padded input */
				111	MPN_COPY (wp + k, up, un);
				112	MPN_FILL (wp, k, 0);
				113	rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1);
				114	/* The approximate root S = {sp,sn} is either the correct root of
				115	{sp,sn}, or 1 too large. Thus unless the least significant limb of
				116	S is 0 or 1, we can deduce the root of {up,un} is S truncated by one
				117	limb. (In case sp[0]=1, we can deduce the root, but not decide
				118	whether it is exact or not.) */
				119	MPN_COPY (rootp, sp + 1, sn - 1);
				120	TMP_FREE;
				121	return rn;
				122	}
				123	else
				124	{
				125	return mpn_rootrem_internal (rootp, remp, up, un, k, 0);
				126	}
				127	}
				128
				129	#define LOGROOT_USED_BITS 8
				130	#define LOGROOT_NEEDS_TWO_CORRECTIONS 1
				131	#define LOGROOT_RETURNED_BITS (LOGROOT_USED_BITS + LOGROOT_NEEDS_TWO_CORRECTIONS)
				132	/* Puts in *rootp some bits of the k^nt root of the number
				133	2^bitn * 1.op ; where op represents the "fractional" bits.
				134
				135	The returned value is the number of bits of the root minus one;
				136	i.e. an approximation of the root will be
				137	(rootp) 2^(retval-LOGROOT_RETURNED_BITS+1).
				138
				139	Currently, only LOGROOT_USED_BITS bits of op are used (the implicit
				140	one is not counted).
				141	*/
				142	static unsigned
				143	logbased_root (mp_ptr rootp, mp_limb_t op, mp_bitcnt_t bitn, mp_limb_t k)
				144	{
				145	/* vlog=vector(256,i,floor((log(256+i)/log(2)-8)256)-(i>255)) /
				146	static const
				147	unsigned char vlog[] = {1, 2, 4, 5, 7, 8, 9, 11, 12, 14, 15, 16, 18, 19, 21, 22,
				148	23, 25, 26, 27, 29, 30, 31, 33, 34, 35, 37, 38, 39, 40, 42, 43,
				149	44, 46, 47, 48, 49, 51, 52, 53, 54, 56, 57, 58, 59, 61, 62, 63,
				150	64, 65, 67, 68, 69, 70, 71, 73, 74, 75, 76, 77, 78, 80, 81, 82,
				151	83, 84, 85, 87, 88, 89, 90, 91, 92, 93, 94, 96, 97, 98, 99, 100,
				152	101, 102, 103, 104, 105, 106, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
				153	118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 131, 132, 133, 134,
				154	135, 136, 137, 138, 139, 140, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
				155	150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 162, 163, 164,
				156	165, 166, 167, 168, 169, 170, 171, 172, 173, 173, 174, 175, 176, 177, 178, 179,
				157	180, 181, 181, 182, 183, 184, 185, 186, 187, 188, 188, 189, 190, 191, 192, 193,
				158	194, 194, 195, 196, 197, 198, 199, 200, 200, 201, 202, 203, 204, 205, 205, 206,
				159	207, 208, 209, 209, 210, 211, 212, 213, 214, 214, 215, 216, 217, 218, 218, 219,
				160	220, 221, 222, 222, 223, 224, 225, 225, 226, 227, 228, 229, 229, 230, 231, 232,
				161	232, 233, 234, 235, 235, 236, 237, 238, 239, 239, 240, 241, 242, 242, 243, 244,
				162	245, 245, 246, 247, 247, 248, 249, 250, 250, 251, 252, 253, 253, 254, 255, 255};
				163
				164	/* vexp=vector(256,i,floor(2^(8+i/256)-256)-(i>255)) */
				165	static const
				166	unsigned char vexp[] = {0, 1, 2, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 9, 10, 11,
				167	12, 12, 13, 14, 14, 15, 16, 17, 17, 18, 19, 20, 20, 21, 22, 23,
				168	23, 24, 25, 26, 26, 27, 28, 29, 30, 30, 31, 32, 33, 33, 34, 35,
				169	36, 37, 37, 38, 39, 40, 41, 41, 42, 43, 44, 45, 45, 46, 47, 48,
				170	49, 50, 50, 51, 52, 53, 54, 55, 55, 56, 57, 58, 59, 60, 61, 61,
				171	62, 63, 64, 65, 66, 67, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75,
				172	76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 86, 87, 88, 89, 90,
				173	91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
				174	107, 108, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 119, 120, 121, 122,
				175	123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138,
				176	139, 140, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 154, 155, 156,
				177	157, 158, 159, 160, 161, 163, 164, 165, 166, 167, 168, 169, 171, 172, 173, 174,
				178	175, 176, 178, 179, 180, 181, 182, 183, 185, 186, 187, 188, 189, 191, 192, 193,
				179	194, 196, 197, 198, 199, 200, 202, 203, 204, 205, 207, 208, 209, 210, 212, 213,
				180	214, 216, 217, 218, 219, 221, 222, 223, 225, 226, 227, 229, 230, 231, 232, 234,
				181	235, 236, 238, 239, 240, 242, 243, 245, 246, 247, 249, 250, 251, 253, 254, 255};
				182	mp_bitcnt_t retval;
				183
				184	if (UNLIKELY (bitn > (~ (mp_bitcnt_t) 0) >> LOGROOT_USED_BITS))
				185	{
				186	/* In the unlikely case, we use two divisions and a modulo. */
				187	retval = bitn / k;
				188	bitn %= k;
				189	bitn = (bitn << LOGROOT_USED_BITS \|
				190	vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k;
				191	}
				192	else
				193	{
				194	bitn = (bitn << LOGROOT_USED_BITS \|
				195	vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k;
				196	retval = bitn >> LOGROOT_USED_BITS;
				197	bitn &= (CNST_LIMB (1) << LOGROOT_USED_BITS) - 1;
				198	}
				199	ASSERT(bitn < CNST_LIMB (1) << LOGROOT_USED_BITS);
				200	*rootp = CNST_LIMB(1) << (LOGROOT_USED_BITS - ! LOGROOT_NEEDS_TWO_CORRECTIONS)
				201	\| vexp[bitn] >> ! LOGROOT_NEEDS_TWO_CORRECTIONS;
				202	return retval;
				203	}
				204
				205	/* if approx is non-zero, does not compute the final remainder */
				206	static mp_size_t
				207	mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
				208	mp_limb_t k, int approx)
				209	{
				210	mp_ptr qp, rp, sp, wp, scratch;
				211	mp_size_t qn, rn, sn, wn, nl, bn;
				212	mp_limb_t save, save2, cy, uh;
				213	mp_bitcnt_t unb; /* number of significant bits of {up,un} */
				214	mp_bitcnt_t xnb; /* number of significant bits of the result */
				215	mp_bitcnt_t b, kk;
				216	mp_bitcnt_t sizes[GMP_NUMB_BITS + 1];
				217	int ni;
				218	int perf_pow;
				219	unsigned ulz, snb, c, logk;
				220	TMP_DECL;
				221
				222	/* MPN_SIZEINBASE_2EXP(unb, up, un, 1); --unb; */
				223	uh = up[un - 1];
				224	count_leading_zeros (ulz, uh);
				225	ulz = ulz - GMP_NAIL_BITS + 1; /* Ignore the first 1. */
				226	unb = (mp_bitcnt_t) un * GMP_NUMB_BITS - ulz;
				227	/* unb is the (truncated) logarithm of the input U in base 2*/
				228
				229	if (unb < k) /* root is 1 */
				230	{
				231	rootp[0] = 1;
				232	if (remp == NULL)
				233	un -= (up == CNST_LIMB (1)); / Non-zero iif {up,un} > 1 */
				234	else
				235	{
				236	mpn_sub_1 (remp, up, un, CNST_LIMB (1));
				237	un -= (remp [un - 1] == 0); /* There should be at most one zero limb,
				238	if we demand u to be normalized */
				239	}
				240	return un;
				241	}
				242	/* if (unb - k < k/2 + k/16) // root is 2 */
				243
				244	if (ulz == GMP_NUMB_BITS)
				245	uh = up[un - 2];
				246	else
				247	uh = (uh << ulz & GMP_NUMB_MASK) \| up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz);
				248	ASSERT (un != 1 \|\| up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz) == 1);
				249
				250	xnb = logbased_root (rootp, uh, unb, k);
				251	snb = LOGROOT_RETURNED_BITS - 1;
				252	/* xnb+1 is the number of bits of the root R */
				253	/* snb+1 is the number of bits of the current approximation S */
				254
				255	kk = k * xnb; /* number of truncated bits in the input */
				256
				257	/* FIXME: Should we skip the next two loops when xnb <= snb ? */
				258	for (uh = (k - 1) / 2, logk = 3; (uh >>= 1) != 0; ++logk )
				259	;
				260	/* logk = ceil(log(k)/log(2)) + 1 */
				261
				262	/* xnb is the number of remaining bits to determine in the kth root */
				263	for (ni = 0; (sizes[ni] = xnb) > snb; ++ni)
				264	{
				265	/* invariant: here we want xnb+1 total bits for the kth root */
				266
				267	/* if c is the new value of xnb, this means that we'll go from a
				268	root of c+1 bits (say s') to a root of xnb+1 bits.
				269	It is proved in the book "Modern Computer Arithmetic" by Brent
				270	and Zimmermann, Chapter 1, that
				271	if s' >= k*beta, then at most one correction is necessary.
				272	Here beta = 2^(xnb-c), and s' >= 2^c, thus it suffices that
				273	c >= ceil((xnb + log2(k))/2). */
				274	if (xnb > logk)
				275	xnb = (xnb + logk) / 2;
				276	else
				277	--xnb; /* add just one bit at a time */
				278	}
				279
				280	*rootp >>= snb - xnb;
				281	kk -= xnb;
				282
				283	ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1);
				284	/* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with
				285	sizes[i] <= 2 * sizes[i+1].
				286	Newton iteration will first compute sizes[ni-1] extra bits,
				287	then sizes[ni-2], ..., then sizes[0] = b. */
				288
				289	TMP_MARK;
				290	/* qp and wp need enough space to store S'^k where S' is an approximate
				291	root. Since S' can be as large as S+2, the worst case is when S=2 and
				292	S'=4. But then since we know the number of bits of S in advance, S'
				293	can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
				294	So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
				295	fits in un limbs, the number of extra limbs needed is bounded by
				296	ceil(klog2(3/2)/GMP_NUMB_BITS). /
				297	/* THINK: with the use of logbased_root, maybe the constant is
				298	258/256 instead of 3/2 ? log2(258/256) < 1/89 < 1/64 */
				299	#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
				300	TMP_ALLOC_LIMBS_3 (scratch, un + 1, /* used by mpn_div_q */
				301	qp, un + EXTRA, /* will contain quotient and remainder
				302	of R/(kS^(k-1)), and S^k /
				303	wp, un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
				304	and temporary for mpn_pow_1 */
				305
				306	if (remp == NULL)
				307	rp = scratch; /* will contain the remainder */
				308	else
				309	rp = remp;
				310	sp = rootp;
				311
				312	sn = 1; /* Initial approximation has one limb */
				313
				314	for (b = xnb; ni != 0; --ni)
				315	{
				316	/* 1: loop invariant:
				317	{sp, sn} is the current approximation of the root, which has
				318	exactly 1 + sizes[ni] bits.
				319	{rp, rn} is the current remainder
				320	{wp, wn} = {sp, sn}^(k-1)
				321	kk = number of truncated bits of the input
				322	*/
				323
				324	/* Since each iteration treats b bits from the root and thus k*b bits
				325	from the input, and we already considered b bits from the input,
				326	we now have to take another (k-1)b bits from the input. /
				327	kk -= (k - 1) * b; /* remaining input bits */
				328	/* {rp, rn} = floor({up, un} / 2^kk) */
				329	rn = un - kk / GMP_NUMB_BITS;
				330	MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
				331	rn -= rp[rn - 1] == 0;
				332
				333	/* 9: current buffers: {sp,sn}, {rp,rn} */
				334
				335	for (c = 0;; c++)
				336	{
				337	/* Compute S^k in {qp,qn}. */
				338	/* W <- S^(k-1) for the next iteration,
				339	and S^k = W * S. */
				340	wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
				341	mpn_mul (qp, wp, wn, sp, sn);
				342	qn = wn + sn;
				343	qn -= qp[qn - 1] == 0;
				344
				345	perf_pow = 1;
				346	/* if S^k > floor(U/2^kk), the root approximation was too large */
				347	if (qn > rn \|\| (qn == rn && (perf_pow=mpn_cmp (qp, rp, rn)) > 0))
				348	MPN_DECR_U (sp, sn, 1);
				349	else
				350	break;
				351	}
				352
				353	/* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */
				354
				355	/* sometimes two corrections are needed with logbased_root*/
				356	ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
				357	ASSERT_ALWAYS (rn >= qn);
				358
				359	b = sizes[ni - 1] - sizes[ni]; /* number of bits to compute in the
				360	next iteration */
				361	bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[], after shift */
				362
				363	kk = kk - b;
				364	/* nl is the number of limbs in U which contain bits [kk,kk+b-1] */
				365	nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS);
				366	/* nl = 1 + floor((kk + b - 1) / GMP_NUMB_BITS)
				367	- floor(kk / GMP_NUMB_BITS)
				368	<= 1 + (kk + b - 1) / GMP_NUMB_BITS
				369	- (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS
				370	= 2 + (b - 2) / GMP_NUMB_BITS
				371	thus since nl is an integer:
				372	nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */
				373
				374	/* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
				375
				376	/* R = R - Q = floor(U/2^kk) - S^k */
				377	if (perf_pow != 0)
				378	{
				379	mpn_sub (rp, rp, rn, qp, qn);
				380	MPN_NORMALIZE_NOT_ZERO (rp, rn);
				381
				382	/* first multiply the remainder by 2^b */
				383	MPN_LSHIFT (cy, rp + bn, rp, rn, b % GMP_NUMB_BITS);
				384	rn = rn + bn;
				385	if (cy != 0)
				386	{
				387	rp[rn] = cy;
				388	rn++;
				389	}
				390
				391	save = rp[bn];
				392	/* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */
				393	if (nl - 1 > bn)
				394	save2 = rp[bn + 1];
				395	}
				396	else
				397	{
				398	rn = bn;
				399	save2 = save = 0;
				400	}
				401	/* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
				402
				403	/* Now insert bits [kk,kk+b-1] from the input U */
				404	MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS);
				405	/* set to zero high bits of rp[bn] */
				406	rp[bn] &= (CNST_LIMB (1) << (b % GMP_NUMB_BITS)) - 1;
				407	/* restore corresponding bits */
				408	rp[bn] \|= save;
				409	if (nl - 1 > bn)
				410	rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since
				411	they start by bit 0 in rp[0], so they use
				412	at most ceil(b/GMP_NUMB_BITS) limbs */
				413	/* FIXME: Should we normalise {rp,rn} here ?*/
				414
				415	/* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
				416
				417	/* compute {wp, wn} = k * {sp, sn}^(k-1) */
				418	cy = mpn_mul_1 (wp, wp, wn, k);
				419	wp[wn] = cy;
				420	wn += cy != 0;
				421
				422	/* 6: current buffers: {sp,sn}, {qp,qn} */
				423
				424	/* multiply the root approximation by 2^b */
				425	MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS);
				426	sn = sn + b / GMP_NUMB_BITS;
				427	if (cy != 0)
				428	{
				429	sp[sn] = cy;
				430	sn++;
				431	}
				432
				433	save = sp[b / GMP_NUMB_BITS];
				434
				435	/* Number of limbs used by b bits, when least significant bit is
				436	aligned to least limb */
				437	bn = (b - 1) / GMP_NUMB_BITS + 1;
				438
				439	/* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
				440
				441	/* now divide {rp, rn} by {wp, wn} to get the low part of the root */
				442	if (UNLIKELY (rn < wn))
				443	{
				444	MPN_FILL (sp, bn, 0);
				445	}
				446	else
				447	{
				448	qn = rn - wn; /* expected quotient size */
				449	if (qn <= bn) { /* Divide only if result is not too big. */
				450	mpn_div_q (qp, rp, rn, wp, wn, scratch);
				451	qn += qp[qn] != 0;
				452	}
				453
				454	/* 5: current buffers: {sp,sn}, {qp,qn}.
				455	Note: {rp,rn} is not needed any more since we'll compute it from
				456	scratch at the end of the loop.
				457	*/
				458
				459	/* the quotient should be smaller than 2^b, since the previous
				460	approximation was correctly rounded toward zero */
				461	if (qn > bn \|\| (qn == bn && (b % GMP_NUMB_BITS != 0) &&
				462	qp[qn - 1] >= (CNST_LIMB (1) << (b % GMP_NUMB_BITS))))
				463	{
				464	for (qn = 1; qn < bn; ++qn)
				465	sp[qn - 1] = GMP_NUMB_MAX;
				466	sp[qn - 1] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - 1 - ((b - 1) % GMP_NUMB_BITS));
				467	}
				468	else
				469	{
				470	/* 7: current buffers: {sp,sn}, {qp,qn} */
				471
				472	/* Combine sB and q to form sB + q. */
				473	MPN_COPY (sp, qp, qn);
				474	MPN_ZERO (sp + qn, bn - qn);
				475	}
				476	}
				477	sp[b / GMP_NUMB_BITS] \|= save;
				478
				479	/* 8: current buffer: {sp,sn} */
				480
				481	}
				482
				483	/* otherwise we have rn > 0, thus the return value is ok */
				484	if (!approx \|\| sp[0] <= CNST_LIMB (1))
				485	{
				486	for (c = 0;; c++)
				487	{
				488	/* Compute S^k in {qp,qn}. */
				489	/* Last iteration: we don't need W anymore. */
				490	/* mpn_pow_1 requires that both qp and wp have enough
				491	space to store the result {sp,sn}^k + 1 limb */
				492	qn = mpn_pow_1 (qp, sp, sn, k, wp);
				493
				494	perf_pow = 1;
				495	if (qn > un \|\| (qn == un && (perf_pow=mpn_cmp (qp, up, un)) > 0))
				496	MPN_DECR_U (sp, sn, 1);
				497	else
				498	break;
				499	};
				500
				501	/* sometimes two corrections are needed with logbased_root*/
				502	ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
				503
				504	rn = perf_pow != 0;
				505	if (rn != 0 && remp != NULL)
				506	{
				507	mpn_sub (remp, up, un, qp, qn);
				508	rn = un;
				509	MPN_NORMALIZE_NOT_ZERO (remp, rn);
				510	}
				511	}
				512
				513	TMP_FREE;
				514	return rn;
				515	}