Austin Schuh | dace2a6 | 2020-08-18 10:56:48 -0700 | [diff] [blame] | 1 | /* mpf_mul_ui -- Multiply a float and an unsigned integer. |
| 2 | |
| 3 | Copyright 1993, 1994, 1996, 2001, 2003, 2004 Free Software Foundation, Inc. |
| 4 | |
| 5 | This file is part of the GNU MP Library. |
| 6 | |
| 7 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 8 | it under the terms of either: |
| 9 | |
| 10 | * the GNU Lesser General Public License as published by the Free |
| 11 | Software Foundation; either version 3 of the License, or (at your |
| 12 | option) any later version. |
| 13 | |
| 14 | or |
| 15 | |
| 16 | * the GNU General Public License as published by the Free Software |
| 17 | Foundation; either version 2 of the License, or (at your option) any |
| 18 | later version. |
| 19 | |
| 20 | or both in parallel, as here. |
| 21 | |
| 22 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 23 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 24 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 25 | for more details. |
| 26 | |
| 27 | You should have received copies of the GNU General Public License and the |
| 28 | GNU Lesser General Public License along with the GNU MP Library. If not, |
| 29 | see https://www.gnu.org/licenses/. */ |
| 30 | |
| 31 | #include "gmp-impl.h" |
| 32 | #include "longlong.h" |
| 33 | |
| 34 | |
| 35 | /* The core operation is a multiply of PREC(r) limbs from u by v, producing |
| 36 | either PREC(r) or PREC(r)+1 result limbs. If u is shorter than PREC(r), |
| 37 | then we take only as much as it has. If u is longer we incorporate a |
| 38 | carry from the lower limbs. |
| 39 | |
| 40 | If u has just 1 extra limb, then the carry to add is high(up[0]*v). That |
| 41 | is of course what mpn_mul_1 would do if it was called with PREC(r)+1 |
| 42 | limbs of input. |
| 43 | |
| 44 | If u has more than 1 extra limb, then there can be a further carry bit |
| 45 | out of lower uncalculated limbs (the way the low of one product adds to |
| 46 | the high of the product below it). This is of course what an mpn_mul_1 |
| 47 | would do if it was called with the full u operand. But we instead work |
| 48 | downwards explicitly, until a carry occurs or until a value other than |
| 49 | GMP_NUMB_MAX occurs (that being the only value a carry bit can propagate |
| 50 | across). |
| 51 | |
| 52 | The carry determination normally requires two umul_ppmm's, only rarely |
| 53 | will GMP_NUMB_MAX occur and require further products. |
| 54 | |
| 55 | The carry limb is conveniently added into the mul_1 using mpn_mul_1c when |
| 56 | that function exists, otherwise a subsequent mpn_add_1 is needed. |
| 57 | |
| 58 | Clearly when mpn_mul_1c is used the carry must be calculated first. But |
| 59 | this is also the case when add_1 is used, since if r==u and ABSIZ(r) > |
| 60 | PREC(r) then the mpn_mul_1 overwrites the low part of the input. |
| 61 | |
| 62 | A reuse r==u with size > prec can occur from a size PREC(r)+1 in the |
| 63 | usual way, or it can occur from an mpf_set_prec_raw leaving a bigger |
| 64 | sized value. In both cases we can end up calling mpn_mul_1 with |
| 65 | overlapping src and dst regions, but this will be with dst < src and such |
| 66 | an overlap is permitted. |
| 67 | |
| 68 | Not done: |
| 69 | |
| 70 | No attempt is made to determine in advance whether the result will be |
| 71 | PREC(r) or PREC(r)+1 limbs. If it's going to be PREC(r)+1 then we could |
| 72 | take one less limb from u and generate just PREC(r), that of course |
| 73 | satisfying application requested precision. But any test counting bits |
| 74 | or forming the high product would almost certainly take longer than the |
| 75 | incremental cost of an extra limb in mpn_mul_1. |
| 76 | |
| 77 | Enhancements: |
| 78 | |
| 79 | Repeated mpf_mul_ui's with an even v will accumulate low zero bits on the |
| 80 | result, leaving low zero limbs after a while, which it might be nice to |
| 81 | strip to save work in subsequent operations. Calculating the low limb |
| 82 | explicitly would let us direct mpn_mul_1 to put the balance at rp when |
| 83 | the low is zero (instead of normally rp+1). But it's not clear whether |
| 84 | this would be worthwhile. Explicit code for the low limb will probably |
| 85 | be slower than having it done in mpn_mul_1, so we need to consider how |
| 86 | often a zero will be stripped and how much that's likely to save |
| 87 | later. */ |
| 88 | |
| 89 | void |
| 90 | mpf_mul_ui (mpf_ptr r, mpf_srcptr u, unsigned long int v) |
| 91 | { |
| 92 | mp_srcptr up; |
| 93 | mp_size_t usize; |
| 94 | mp_size_t size; |
| 95 | mp_size_t prec, excess; |
| 96 | mp_limb_t cy_limb, vl, cbit, cin; |
| 97 | mp_ptr rp; |
| 98 | |
| 99 | usize = u->_mp_size; |
| 100 | if (UNLIKELY (v == 0) || UNLIKELY (usize == 0)) |
| 101 | { |
| 102 | r->_mp_size = 0; |
| 103 | r->_mp_exp = 0; |
| 104 | return; |
| 105 | } |
| 106 | |
| 107 | #if BITS_PER_ULONG > GMP_NUMB_BITS /* avoid warnings about shift amount */ |
| 108 | if (v > GMP_NUMB_MAX) |
| 109 | { |
| 110 | mpf_t vf; |
| 111 | mp_limb_t vp[2]; |
| 112 | vp[0] = v & GMP_NUMB_MASK; |
| 113 | vp[1] = v >> GMP_NUMB_BITS; |
| 114 | PTR(vf) = vp; |
| 115 | SIZ(vf) = 2; |
| 116 | ASSERT_CODE (PREC(vf) = 2); |
| 117 | EXP(vf) = 2; |
| 118 | mpf_mul (r, u, vf); |
| 119 | return; |
| 120 | } |
| 121 | #endif |
| 122 | |
| 123 | size = ABS (usize); |
| 124 | prec = r->_mp_prec; |
| 125 | up = u->_mp_d; |
| 126 | vl = v; |
| 127 | excess = size - prec; |
| 128 | cin = 0; |
| 129 | |
| 130 | if (excess > 0) |
| 131 | { |
| 132 | /* up is bigger than desired rp, shorten it to prec limbs and |
| 133 | determine a carry-in */ |
| 134 | |
| 135 | mp_limb_t vl_shifted = vl << GMP_NAIL_BITS; |
| 136 | mp_limb_t hi, lo, next_lo, sum; |
| 137 | mp_size_t i; |
| 138 | |
| 139 | /* high limb of top product */ |
| 140 | i = excess - 1; |
| 141 | umul_ppmm (cin, lo, up[i], vl_shifted); |
| 142 | |
| 143 | /* and carry bit out of products below that, if any */ |
| 144 | for (;;) |
| 145 | { |
| 146 | i--; |
| 147 | if (i < 0) |
| 148 | break; |
| 149 | |
| 150 | umul_ppmm (hi, next_lo, up[i], vl_shifted); |
| 151 | lo >>= GMP_NAIL_BITS; |
| 152 | ADDC_LIMB (cbit, sum, hi, lo); |
| 153 | cin += cbit; |
| 154 | lo = next_lo; |
| 155 | |
| 156 | /* Continue only if the sum is GMP_NUMB_MAX. GMP_NUMB_MAX is the |
| 157 | only value a carry from below can propagate across. If we've |
| 158 | just seen the carry out (ie. cbit!=0) then sum!=GMP_NUMB_MAX, |
| 159 | so this test stops us for that case too. */ |
| 160 | if (LIKELY (sum != GMP_NUMB_MAX)) |
| 161 | break; |
| 162 | } |
| 163 | |
| 164 | up += excess; |
| 165 | size = prec; |
| 166 | } |
| 167 | |
| 168 | rp = r->_mp_d; |
| 169 | #if HAVE_NATIVE_mpn_mul_1c |
| 170 | cy_limb = mpn_mul_1c (rp, up, size, vl, cin); |
| 171 | #else |
| 172 | cy_limb = mpn_mul_1 (rp, up, size, vl); |
| 173 | __GMPN_ADD_1 (cbit, rp, rp, size, cin); |
| 174 | cy_limb += cbit; |
| 175 | #endif |
| 176 | rp[size] = cy_limb; |
| 177 | cy_limb = cy_limb != 0; |
| 178 | r->_mp_exp = u->_mp_exp + cy_limb; |
| 179 | size += cy_limb; |
| 180 | r->_mp_size = usize >= 0 ? size : -size; |
| 181 | } |