Blame - third_party/gmp/mpn/sparc64/sparc64.h - RealtimeRoboticsGroup/test

blob: 8698a82a72ccab68ea028523016c8bf7f8aa6bd9 [file] [log] [blame]

Austin Schuh	dace2a6	2020-08-18 10:56:48 -0700	[diff] [blame]	1	/* UltraSPARC 64 support macros.
				2
				3	THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
				4	CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
				5	FUTURE GNU MP RELEASES.
				6
				7	Copyright 2003 Free Software Foundation, Inc.
				8
				9	This file is part of the GNU MP Library.
				10
				11	The GNU MP Library is free software; you can redistribute it and/or modify
				12	it under the terms of either:
				13
				14	* the GNU Lesser General Public License as published by the Free
				15	Software Foundation; either version 3 of the License, or (at your
				16	option) any later version.
				17
				18	or
				19
				20	* the GNU General Public License as published by the Free Software
				21	Foundation; either version 2 of the License, or (at your option) any
				22	later version.
				23
				24	or both in parallel, as here.
				25
				26	The GNU MP Library is distributed in the hope that it will be useful, but
				27	WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
				28	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				29	for more details.
				30
				31	You should have received copies of the GNU General Public License and the
				32	GNU Lesser General Public License along with the GNU MP Library. If not,
				33	see https://www.gnu.org/licenses/. */
				34
				35
				36	#define LOW32(x) ((x) & 0xFFFFFFFF)
				37	#define HIGH32(x) ((x) >> 32)
				38
				39
				40	/* Halfword number i in src is accessed as src[i+HALF_ENDIAN_ADJ(i)].
				41	Plain src[i] would be incorrect in big endian, HALF_ENDIAN_ADJ has the
				42	effect of swapping the two halves in this case. */
				43	#if HAVE_LIMB_BIG_ENDIAN
				44	#define HALF_ENDIAN_ADJ(i) (1 - (((i) & 1) << 1)) /* +1 even, -1 odd */
				45	#endif
				46	#if HAVE_LIMB_LITTLE_ENDIAN
				47	#define HALF_ENDIAN_ADJ(i) 0 /* no adjust */
				48	#endif
				49	#ifndef HALF_ENDIAN_ADJ
				50	Error, error, unknown limb endianness;
				51	#endif
				52
				53
				54	/* umul_ppmm_lowequal sets h to the high limb of q*d, assuming the low limb
				55	of that product is equal to l. dh and dl are the 32-bit halves of d.
				56
				57	\|-----high----\|\|----low-----\|
				58	+------+------+
				59	\| \| ph = qh * dh
				60	+------+------+
				61	+------+------+
				62	\| \| pm1 = ql * dh
				63	+------+------+
				64	+------+------+
				65	\| \| pm2 = qh * dl
				66	+------+------+
				67	+------+------+
				68	\| \| pl = ql * dl (not calculated)
				69	+------+------+
				70
				71	Knowing that the low 64 bits is equal to l means that LOW(pm1) + LOW(pm2)
				72	+ HIGH(pl) == HIGH(l). The only thing we need from those product parts
				73	is whether they produce a carry into the high.
				74
				75	pm_l = LOW(pm1)+LOW(pm2) is done to contribute its carry, then the only
				76	time there's a further carry from LOW(pm_l)+HIGH(pl) is if LOW(pm_l) >
				77	HIGH(l). pl is never actually calculated. */
				78
				79	#define umul_ppmm_lowequal(h, q, d, dh, dl, l) \
				80	do { \
				81	mp_limb_t ql, qh, ph, pm1, pm2, pm_l; \
				82	ASSERT (dh == HIGH32(d)); \
				83	ASSERT (dl == LOW32(d)); \
				84	ASSERT (q*d == l); \
				85	\
				86	ql = LOW32 (q); \
				87	qh = HIGH32 (q); \
				88	\
				89	pm1 = ql * dh; \
				90	pm2 = qh * dl; \
				91	ph = qh * dh; \
				92	\
				93	pm_l = LOW32 (pm1) + LOW32 (pm2); \
				94	\
				95	(h) = ph + HIGH32 (pm1) + HIGH32 (pm2) \
				96	+ HIGH32 (pm_l) + ((pm_l << 32) > l); \
				97	\
				98	ASSERT_HIGH_PRODUCT (h, q, d); \
				99	} while (0)
				100
				101
				102	/* Set h to the high of q*d, assuming the low limb of that product is equal
				103	to l, and that d fits in 32-bits.
				104
				105	\|-----high----\|\|----low-----\|
				106	+------+------+
				107	\| \| pm = qh * dl
				108	+------+------+
				109	+------+------+
				110	\| \| pl = ql * dl (not calculated)
				111	+------+------+
				112
				113	Knowing that LOW(pm) + HIGH(pl) == HIGH(l) (mod 2^32) means that the only
				114	time there's a carry from that sum is when LOW(pm) > HIGH(l). There's no
				115	need to calculate pl to determine this. */
				116
				117	#define umul_ppmm_half_lowequal(h, q, d, l) \
				118	do { \
				119	mp_limb_t pm; \
				120	ASSERT (q*d == l); \
				121	ASSERT (HIGH32(d) == 0); \
				122	\
				123	pm = HIGH32(q) * d; \
				124	(h) = HIGH32(pm) + ((pm << 32) > l); \
				125	ASSERT_HIGH_PRODUCT (h, q, d); \
				126	} while (0)
				127
				128
				129	/* check that h is the high limb of xy /
				130	#if WANT_ASSERT
				131	#define ASSERT_HIGH_PRODUCT(h, x, y) \
				132	do { \
				133	mp_limb_t want_h, dummy; \
				134	umul_ppmm (want_h, dummy, x, y); \
				135	ASSERT (h == want_h); \
				136	} while (0)
				137	#else
				138	#define ASSERT_HIGH_PRODUCT(h, q, d) \
				139	do { } while (0)
				140	#endif
				141
				142
				143	/* Multiply u anv v, where v < 2^32. */
				144	#define umul_ppmm_s(w1, w0, u, v) \
				145	do { \
				146	UWtype __x0, __x2; \
				147	UWtype __ul, __vl, __uh; \
				148	UWtype __u = (u), __v = (v); \
				149	\
				150	__ul = __ll_lowpart (__u); \
				151	__uh = __ll_highpart (__u); \
				152	__vl = __ll_lowpart (__v); \
				153	\
				154	__x0 = (UWtype) __ul * __vl; \
				155	__x2 = (UWtype) __uh * __vl; \
				156	\
				157	(w1) = (__x2 + (__x0 >> W_TYPE_SIZE/2)) >> W_TYPE_SIZE/2; \
				158	(w0) = (__x2 << W_TYPE_SIZE/2) + __x0; \
				159	} while (0)
				160
				161	/* Count the leading zeros on a limb, but assuming it fits in 32 bits.
				162	The count returned will be in the range 32 to 63.
				163	This is the 32-bit generic C count_leading_zeros from longlong.h. */
				164	#define count_leading_zeros_32(count, x) \
				165	do { \
				166	mp_limb_t __xr = (x); \
				167	unsigned __a; \
				168	ASSERT ((x) != 0); \
				169	ASSERT ((x) <= CNST_LIMB(0xFFFFFFFF)); \
				170	__a = __xr < ((UWtype) 1 << 16) ? (__xr < ((UWtype) 1 << 8) ? 1 : 8 + 1) \
				171	: (__xr < ((UWtype) 1 << 24) ? 16 + 1 : 24 + 1); \
				172	\
				173	(count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a]; \
				174	} while (0)
				175
				176
				177	/* Set inv to a 32-bit inverse floor((b*(b-d)-1) / d), knowing that d fits
				178	32 bits and is normalized (high bit set). */
				179	#define invert_half_limb(inv, d) \
				180	do { \
				181	mp_limb_t _n; \
				182	ASSERT ((d) <= 0xFFFFFFFF); \
				183	ASSERT ((d) & 0x80000000); \
				184	_n = (((mp_limb_t) -(d)) << 32) - 1; \
				185	(inv) = (mp_limb_t) (unsigned) (_n / (d)); \
				186	} while (0)
				187
				188
				189	/* Divide nh:nl by d, setting q to the quotient and r to the remainder.
				190	q, r, nh and nl are 32-bits each, d_limb is 32-bits but in an mp_limb_t,
				191	dinv_limb is similarly a 32-bit inverse but in an mp_limb_t. */
				192
				193	#define udiv_qrnnd_half_preinv(q, r, nh, nl, d_limb, dinv_limb) \
				194	do { \
				195	unsigned _n2, _n10, _n1, _nadj, _q11n, _xh, _r, _q; \
				196	mp_limb_t _n, _x; \
				197	ASSERT (d_limb <= 0xFFFFFFFF); \
				198	ASSERT (dinv_limb <= 0xFFFFFFFF); \
				199	ASSERT (d_limb & 0x80000000); \
				200	ASSERT (nh < d_limb); \
				201	_n10 = (nl); \
				202	_n2 = (nh); \
				203	_n1 = (int) _n10 >> 31; \
				204	_nadj = _n10 + (_n1 & d_limb); \
				205	_x = dinv_limb * (_n2 - _n1) + _nadj; \
				206	_q11n = ~(_n2 + HIGH32 (_x)); /* -q1-1 */ \
				207	_n = ((mp_limb_t) _n2 << 32) + _n10; \
				208	_x = _n + d_limb * _q11n; /* n-q1d-d / \
				209	_xh = HIGH32 (_x) - d_limb; /* high(n-q1d-d) / \
				210	ASSERT (_xh == 0 \|\| _xh == ~0); \
				211	_r = _x + (d_limb & _xh); /* addback */ \
				212	_q = _xh - _q11n; /* q1+1-addback */ \
				213	ASSERT (_r < d_limb); \
				214	ASSERT (d_limb * _q + _r == _n); \
				215	(r) = _r; \
				216	(q) = _q; \
				217	} while (0)