Austin Schuh | dace2a6 | 2020-08-18 10:56:48 -0700 | [diff] [blame] | 1 | /* UltraSPARC 64 support macros. |
| 2 | |
| 3 | THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST |
| 4 | CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN |
| 5 | FUTURE GNU MP RELEASES. |
| 6 | |
| 7 | Copyright 2003 Free Software Foundation, Inc. |
| 8 | |
| 9 | This file is part of the GNU MP Library. |
| 10 | |
| 11 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 12 | it under the terms of either: |
| 13 | |
| 14 | * the GNU Lesser General Public License as published by the Free |
| 15 | Software Foundation; either version 3 of the License, or (at your |
| 16 | option) any later version. |
| 17 | |
| 18 | or |
| 19 | |
| 20 | * the GNU General Public License as published by the Free Software |
| 21 | Foundation; either version 2 of the License, or (at your option) any |
| 22 | later version. |
| 23 | |
| 24 | or both in parallel, as here. |
| 25 | |
| 26 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 27 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 28 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 29 | for more details. |
| 30 | |
| 31 | You should have received copies of the GNU General Public License and the |
| 32 | GNU Lesser General Public License along with the GNU MP Library. If not, |
| 33 | see https://www.gnu.org/licenses/. */ |
| 34 | |
| 35 | |
| 36 | #define LOW32(x) ((x) & 0xFFFFFFFF) |
| 37 | #define HIGH32(x) ((x) >> 32) |
| 38 | |
| 39 | |
| 40 | /* Halfword number i in src is accessed as src[i+HALF_ENDIAN_ADJ(i)]. |
| 41 | Plain src[i] would be incorrect in big endian, HALF_ENDIAN_ADJ has the |
| 42 | effect of swapping the two halves in this case. */ |
| 43 | #if HAVE_LIMB_BIG_ENDIAN |
| 44 | #define HALF_ENDIAN_ADJ(i) (1 - (((i) & 1) << 1)) /* +1 even, -1 odd */ |
| 45 | #endif |
| 46 | #if HAVE_LIMB_LITTLE_ENDIAN |
| 47 | #define HALF_ENDIAN_ADJ(i) 0 /* no adjust */ |
| 48 | #endif |
| 49 | #ifndef HALF_ENDIAN_ADJ |
| 50 | Error, error, unknown limb endianness; |
| 51 | #endif |
| 52 | |
| 53 | |
| 54 | /* umul_ppmm_lowequal sets h to the high limb of q*d, assuming the low limb |
| 55 | of that product is equal to l. dh and dl are the 32-bit halves of d. |
| 56 | |
| 57 | |-----high----||----low-----| |
| 58 | +------+------+ |
| 59 | | | ph = qh * dh |
| 60 | +------+------+ |
| 61 | +------+------+ |
| 62 | | | pm1 = ql * dh |
| 63 | +------+------+ |
| 64 | +------+------+ |
| 65 | | | pm2 = qh * dl |
| 66 | +------+------+ |
| 67 | +------+------+ |
| 68 | | | pl = ql * dl (not calculated) |
| 69 | +------+------+ |
| 70 | |
| 71 | Knowing that the low 64 bits is equal to l means that LOW(pm1) + LOW(pm2) |
| 72 | + HIGH(pl) == HIGH(l). The only thing we need from those product parts |
| 73 | is whether they produce a carry into the high. |
| 74 | |
| 75 | pm_l = LOW(pm1)+LOW(pm2) is done to contribute its carry, then the only |
| 76 | time there's a further carry from LOW(pm_l)+HIGH(pl) is if LOW(pm_l) > |
| 77 | HIGH(l). pl is never actually calculated. */ |
| 78 | |
| 79 | #define umul_ppmm_lowequal(h, q, d, dh, dl, l) \ |
| 80 | do { \ |
| 81 | mp_limb_t ql, qh, ph, pm1, pm2, pm_l; \ |
| 82 | ASSERT (dh == HIGH32(d)); \ |
| 83 | ASSERT (dl == LOW32(d)); \ |
| 84 | ASSERT (q*d == l); \ |
| 85 | \ |
| 86 | ql = LOW32 (q); \ |
| 87 | qh = HIGH32 (q); \ |
| 88 | \ |
| 89 | pm1 = ql * dh; \ |
| 90 | pm2 = qh * dl; \ |
| 91 | ph = qh * dh; \ |
| 92 | \ |
| 93 | pm_l = LOW32 (pm1) + LOW32 (pm2); \ |
| 94 | \ |
| 95 | (h) = ph + HIGH32 (pm1) + HIGH32 (pm2) \ |
| 96 | + HIGH32 (pm_l) + ((pm_l << 32) > l); \ |
| 97 | \ |
| 98 | ASSERT_HIGH_PRODUCT (h, q, d); \ |
| 99 | } while (0) |
| 100 | |
| 101 | |
| 102 | /* Set h to the high of q*d, assuming the low limb of that product is equal |
| 103 | to l, and that d fits in 32-bits. |
| 104 | |
| 105 | |-----high----||----low-----| |
| 106 | +------+------+ |
| 107 | | | pm = qh * dl |
| 108 | +------+------+ |
| 109 | +------+------+ |
| 110 | | | pl = ql * dl (not calculated) |
| 111 | +------+------+ |
| 112 | |
| 113 | Knowing that LOW(pm) + HIGH(pl) == HIGH(l) (mod 2^32) means that the only |
| 114 | time there's a carry from that sum is when LOW(pm) > HIGH(l). There's no |
| 115 | need to calculate pl to determine this. */ |
| 116 | |
| 117 | #define umul_ppmm_half_lowequal(h, q, d, l) \ |
| 118 | do { \ |
| 119 | mp_limb_t pm; \ |
| 120 | ASSERT (q*d == l); \ |
| 121 | ASSERT (HIGH32(d) == 0); \ |
| 122 | \ |
| 123 | pm = HIGH32(q) * d; \ |
| 124 | (h) = HIGH32(pm) + ((pm << 32) > l); \ |
| 125 | ASSERT_HIGH_PRODUCT (h, q, d); \ |
| 126 | } while (0) |
| 127 | |
| 128 | |
| 129 | /* check that h is the high limb of x*y */ |
| 130 | #if WANT_ASSERT |
| 131 | #define ASSERT_HIGH_PRODUCT(h, x, y) \ |
| 132 | do { \ |
| 133 | mp_limb_t want_h, dummy; \ |
| 134 | umul_ppmm (want_h, dummy, x, y); \ |
| 135 | ASSERT (h == want_h); \ |
| 136 | } while (0) |
| 137 | #else |
| 138 | #define ASSERT_HIGH_PRODUCT(h, q, d) \ |
| 139 | do { } while (0) |
| 140 | #endif |
| 141 | |
| 142 | |
| 143 | /* Multiply u anv v, where v < 2^32. */ |
| 144 | #define umul_ppmm_s(w1, w0, u, v) \ |
| 145 | do { \ |
| 146 | UWtype __x0, __x2; \ |
| 147 | UWtype __ul, __vl, __uh; \ |
| 148 | UWtype __u = (u), __v = (v); \ |
| 149 | \ |
| 150 | __ul = __ll_lowpart (__u); \ |
| 151 | __uh = __ll_highpart (__u); \ |
| 152 | __vl = __ll_lowpart (__v); \ |
| 153 | \ |
| 154 | __x0 = (UWtype) __ul * __vl; \ |
| 155 | __x2 = (UWtype) __uh * __vl; \ |
| 156 | \ |
| 157 | (w1) = (__x2 + (__x0 >> W_TYPE_SIZE/2)) >> W_TYPE_SIZE/2; \ |
| 158 | (w0) = (__x2 << W_TYPE_SIZE/2) + __x0; \ |
| 159 | } while (0) |
| 160 | |
| 161 | /* Count the leading zeros on a limb, but assuming it fits in 32 bits. |
| 162 | The count returned will be in the range 32 to 63. |
| 163 | This is the 32-bit generic C count_leading_zeros from longlong.h. */ |
| 164 | #define count_leading_zeros_32(count, x) \ |
| 165 | do { \ |
| 166 | mp_limb_t __xr = (x); \ |
| 167 | unsigned __a; \ |
| 168 | ASSERT ((x) != 0); \ |
| 169 | ASSERT ((x) <= CNST_LIMB(0xFFFFFFFF)); \ |
| 170 | __a = __xr < ((UWtype) 1 << 16) ? (__xr < ((UWtype) 1 << 8) ? 1 : 8 + 1) \ |
| 171 | : (__xr < ((UWtype) 1 << 24) ? 16 + 1 : 24 + 1); \ |
| 172 | \ |
| 173 | (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a]; \ |
| 174 | } while (0) |
| 175 | |
| 176 | |
| 177 | /* Set inv to a 32-bit inverse floor((b*(b-d)-1) / d), knowing that d fits |
| 178 | 32 bits and is normalized (high bit set). */ |
| 179 | #define invert_half_limb(inv, d) \ |
| 180 | do { \ |
| 181 | mp_limb_t _n; \ |
| 182 | ASSERT ((d) <= 0xFFFFFFFF); \ |
| 183 | ASSERT ((d) & 0x80000000); \ |
| 184 | _n = (((mp_limb_t) -(d)) << 32) - 1; \ |
| 185 | (inv) = (mp_limb_t) (unsigned) (_n / (d)); \ |
| 186 | } while (0) |
| 187 | |
| 188 | |
| 189 | /* Divide nh:nl by d, setting q to the quotient and r to the remainder. |
| 190 | q, r, nh and nl are 32-bits each, d_limb is 32-bits but in an mp_limb_t, |
| 191 | dinv_limb is similarly a 32-bit inverse but in an mp_limb_t. */ |
| 192 | |
| 193 | #define udiv_qrnnd_half_preinv(q, r, nh, nl, d_limb, dinv_limb) \ |
| 194 | do { \ |
| 195 | unsigned _n2, _n10, _n1, _nadj, _q11n, _xh, _r, _q; \ |
| 196 | mp_limb_t _n, _x; \ |
| 197 | ASSERT (d_limb <= 0xFFFFFFFF); \ |
| 198 | ASSERT (dinv_limb <= 0xFFFFFFFF); \ |
| 199 | ASSERT (d_limb & 0x80000000); \ |
| 200 | ASSERT (nh < d_limb); \ |
| 201 | _n10 = (nl); \ |
| 202 | _n2 = (nh); \ |
| 203 | _n1 = (int) _n10 >> 31; \ |
| 204 | _nadj = _n10 + (_n1 & d_limb); \ |
| 205 | _x = dinv_limb * (_n2 - _n1) + _nadj; \ |
| 206 | _q11n = ~(_n2 + HIGH32 (_x)); /* -q1-1 */ \ |
| 207 | _n = ((mp_limb_t) _n2 << 32) + _n10; \ |
| 208 | _x = _n + d_limb * _q11n; /* n-q1*d-d */ \ |
| 209 | _xh = HIGH32 (_x) - d_limb; /* high(n-q1*d-d) */ \ |
| 210 | ASSERT (_xh == 0 || _xh == ~0); \ |
| 211 | _r = _x + (d_limb & _xh); /* addback */ \ |
| 212 | _q = _xh - _q11n; /* q1+1-addback */ \ |
| 213 | ASSERT (_r < d_limb); \ |
| 214 | ASSERT (d_limb * _q + _r == _n); \ |
| 215 | (r) = _r; \ |
| 216 | (q) = _q; \ |
| 217 | } while (0) |