Austin Schuh | bb1338c | 2024-06-15 19:31:16 -0700 | [diff] [blame] | 1 | /* An example of extending the speed program to measure routines not in GMP. |
| 2 | |
| 3 | Copyright 1999, 2000, 2002, 2003, 2005 Free Software Foundation, Inc. |
| 4 | |
| 5 | This file is part of the GNU MP Library. |
| 6 | |
| 7 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 8 | it under the terms of either: |
| 9 | |
| 10 | * the GNU Lesser General Public License as published by the Free |
| 11 | Software Foundation; either version 3 of the License, or (at your |
| 12 | option) any later version. |
| 13 | |
| 14 | or |
| 15 | |
| 16 | * the GNU General Public License as published by the Free Software |
| 17 | Foundation; either version 2 of the License, or (at your option) any |
| 18 | later version. |
| 19 | |
| 20 | or both in parallel, as here. |
| 21 | |
| 22 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 23 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 24 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 25 | for more details. |
| 26 | |
| 27 | You should have received copies of the GNU General Public License and the |
| 28 | GNU Lesser General Public License along with the GNU MP Library. If not, |
| 29 | see https://www.gnu.org/licenses/. */ |
| 30 | |
| 31 | |
| 32 | /* The extension here is three versions of an mpn arithmetic mean. These |
| 33 | aren't meant to be particularly useful, just examples. |
| 34 | |
| 35 | You can run something like the following to compare their speeds. |
| 36 | |
| 37 | ./speed-ext -s 1-20 -c mean_calls mean_open mean_open2 |
| 38 | |
| 39 | On RISC chips, mean_open() might be fastest if the compiler is doing a |
| 40 | good job. On the register starved x86s, mean_calls will be fastest. |
| 41 | |
| 42 | |
| 43 | Notes: |
| 44 | |
| 45 | SPEED_EXTRA_PROTOS and SPEED_EXTRA_ROUTINES are macros that get expanded |
| 46 | by speed.c in useful places. SPEED_EXTRA_PROTOS goes after the header |
| 47 | files, and SPEED_EXTRA_ROUTINES goes in the array of available routines. |
| 48 | |
| 49 | The advantage of this #include "speed.c" scheme is that there's no |
| 50 | editing of a copy of that file, and new features in new versions of it |
| 51 | will be immediately available. |
| 52 | |
| 53 | In a real program the routines mean_calls() etc would probably be in |
| 54 | separate C or assembler source files, and just the measuring |
| 55 | speed_mean_calls() etc would be here. Linking against other libraries |
| 56 | for things to measure is perfectly possible too. |
| 57 | |
| 58 | When attempting to compare two versions of the same named routine, say |
| 59 | like the generic and assembler versions of mpn_add_n(), creative use of |
| 60 | cc -D or #define is suggested, so one or both can be renamed and linked |
| 61 | into the same program. It'll be much easier to compare them side by side |
| 62 | than with separate programs for each. |
| 63 | |
| 64 | common.c has notes on writing speed measuring routines. |
| 65 | |
| 66 | Remember to link against tune/libspeed.la (or tune/.libs/libspeed.a if |
| 67 | not using libtool) to get common.o and other objects needed by speed.c. */ |
| 68 | |
| 69 | |
| 70 | #define SPEED_EXTRA_PROTOS \ |
| 71 | double speed_mean_calls (struct speed_params *s); \ |
| 72 | double speed_mean_open (struct speed_params *s); \ |
| 73 | double speed_mean_open2 (struct speed_params *s); |
| 74 | |
| 75 | #define SPEED_EXTRA_ROUTINES \ |
| 76 | { "mean_calls", speed_mean_calls }, \ |
| 77 | { "mean_open", speed_mean_open }, \ |
| 78 | { "mean_open2", speed_mean_open2 }, |
| 79 | |
| 80 | #include "speed.c" |
| 81 | |
| 82 | |
| 83 | /* A straightforward implementation calling mpn subroutines. |
| 84 | |
| 85 | wp,size is set to (xp,size + yp,size) / 2. The return value is the |
| 86 | remainder from the division. The other versions are the same. */ |
| 87 | |
| 88 | mp_limb_t |
| 89 | mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size) |
| 90 | { |
| 91 | mp_limb_t c, ret; |
| 92 | |
| 93 | ASSERT (size >= 1); |
| 94 | |
| 95 | c = mpn_add_n (wp, xp, yp, size); |
| 96 | ret = mpn_rshift (wp, wp, size, 1) >> (GMP_LIMB_BITS-1); |
| 97 | wp[size-1] |= (c << (GMP_LIMB_BITS-1)); |
| 98 | return ret; |
| 99 | } |
| 100 | |
| 101 | |
| 102 | /* An open-coded version, making one pass over the data. The right shift is |
| 103 | done as the added limbs are produced. The addition code follows |
| 104 | mpn/generic/add_n.c. */ |
| 105 | |
| 106 | mp_limb_t |
| 107 | mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size) |
| 108 | { |
| 109 | mp_limb_t w, wprev, x, y, c, ret; |
| 110 | mp_size_t i; |
| 111 | |
| 112 | ASSERT (size >= 1); |
| 113 | |
| 114 | x = xp[0]; |
| 115 | y = yp[0]; |
| 116 | |
| 117 | wprev = x + y; |
| 118 | c = (wprev < x); |
| 119 | ret = (wprev & 1); |
| 120 | |
| 121 | #define RSHIFT(hi,lo) (((lo) >> 1) | ((hi) << (GMP_LIMB_BITS-1))) |
| 122 | |
| 123 | for (i = 1; i < size; i++) |
| 124 | { |
| 125 | x = xp[i]; |
| 126 | y = yp[i]; |
| 127 | |
| 128 | w = x + c; |
| 129 | c = (w < x); |
| 130 | w += y; |
| 131 | c += (w < y); |
| 132 | |
| 133 | wp[i-1] = RSHIFT (w, wprev); |
| 134 | wprev = w; |
| 135 | } |
| 136 | |
| 137 | wp[i-1] = RSHIFT (c, wprev); |
| 138 | |
| 139 | return ret; |
| 140 | } |
| 141 | |
| 142 | |
| 143 | /* Another one-pass version, but right shifting the source limbs rather than |
| 144 | the result limbs. There's not much chance of this being better than the |
| 145 | above, but it's an alternative at least. */ |
| 146 | |
| 147 | mp_limb_t |
| 148 | mean_open2 (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size) |
| 149 | { |
| 150 | mp_limb_t w, x, y, xnext, ynext, c, ret; |
| 151 | mp_size_t i; |
| 152 | |
| 153 | ASSERT (size >= 1); |
| 154 | |
| 155 | x = xp[0]; |
| 156 | y = yp[0]; |
| 157 | |
| 158 | /* ret is the low bit of x+y, c is the carry out of that low bit add */ |
| 159 | ret = (x ^ y) & 1; |
| 160 | c = (x & y) & 1; |
| 161 | |
| 162 | for (i = 0; i < size-1; i++) |
| 163 | { |
| 164 | xnext = xp[i+1]; |
| 165 | ynext = yp[i+1]; |
| 166 | x = RSHIFT (xnext, x); |
| 167 | y = RSHIFT (ynext, y); |
| 168 | |
| 169 | w = x + c; |
| 170 | c = (w < x); |
| 171 | w += y; |
| 172 | c += (w < y); |
| 173 | wp[i] = w; |
| 174 | |
| 175 | x = xnext; |
| 176 | y = ynext; |
| 177 | } |
| 178 | |
| 179 | wp[i] = (x >> 1) + (y >> 1) + c; |
| 180 | |
| 181 | return ret; |
| 182 | } |
| 183 | |
| 184 | |
| 185 | /* The speed measuring routines are the same apart from which function they |
| 186 | run, so a macro is used. Actually this macro is the same as |
| 187 | SPEED_ROUTINE_MPN_BINARY_N. */ |
| 188 | |
| 189 | #define SPEED_ROUTINE_MEAN(mean_fun) \ |
| 190 | { \ |
| 191 | unsigned i; \ |
| 192 | mp_ptr wp; \ |
| 193 | double t; \ |
| 194 | TMP_DECL; \ |
| 195 | \ |
| 196 | SPEED_RESTRICT_COND (s->size >= 1); \ |
| 197 | \ |
| 198 | TMP_MARK; \ |
| 199 | SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \ |
| 200 | \ |
| 201 | speed_operand_src (s, s->xp, s->size); \ |
| 202 | speed_operand_src (s, s->yp, s->size); \ |
| 203 | speed_operand_dst (s, wp, s->size); \ |
| 204 | speed_cache_fill (s); \ |
| 205 | \ |
| 206 | speed_starttime (); \ |
| 207 | i = s->reps; \ |
| 208 | do \ |
| 209 | mean_fun (wp, s->xp, s->yp, s->size); \ |
| 210 | while (--i != 0); \ |
| 211 | t = speed_endtime (); \ |
| 212 | \ |
| 213 | TMP_FREE; \ |
| 214 | return t; \ |
| 215 | } |
| 216 | |
| 217 | double |
| 218 | speed_mean_calls (struct speed_params *s) |
| 219 | { |
| 220 | SPEED_ROUTINE_MEAN (mean_calls); |
| 221 | } |
| 222 | |
| 223 | double |
| 224 | speed_mean_open (struct speed_params *s) |
| 225 | { |
| 226 | SPEED_ROUTINE_MEAN (mean_open); |
| 227 | } |
| 228 | |
| 229 | double |
| 230 | speed_mean_open2 (struct speed_params *s) |
| 231 | { |
| 232 | SPEED_ROUTINE_MEAN (mean_open2); |
| 233 | } |