blob: e7fb8b9f60d98713a235736fcc03bb5c83c39dcc [file] [log] [blame]
Austin Schuhbb1338c2024-06-15 19:31:16 -07001/* An example of extending the speed program to measure routines not in GMP.
2
3Copyright 1999, 2000, 2002, 2003, 2005 Free Software Foundation, Inc.
4
5This file is part of the GNU MP Library.
6
7The GNU MP Library is free software; you can redistribute it and/or modify
8it under the terms of either:
9
10 * the GNU Lesser General Public License as published by the Free
11 Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13
14or
15
16 * the GNU General Public License as published by the Free Software
17 Foundation; either version 2 of the License, or (at your option) any
18 later version.
19
20or both in parallel, as here.
21
22The GNU MP Library is distributed in the hope that it will be useful, but
23WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25for more details.
26
27You should have received copies of the GNU General Public License and the
28GNU Lesser General Public License along with the GNU MP Library. If not,
29see https://www.gnu.org/licenses/. */
30
31
32/* The extension here is three versions of an mpn arithmetic mean. These
33 aren't meant to be particularly useful, just examples.
34
35 You can run something like the following to compare their speeds.
36
37 ./speed-ext -s 1-20 -c mean_calls mean_open mean_open2
38
39 On RISC chips, mean_open() might be fastest if the compiler is doing a
40 good job. On the register starved x86s, mean_calls will be fastest.
41
42
43 Notes:
44
45 SPEED_EXTRA_PROTOS and SPEED_EXTRA_ROUTINES are macros that get expanded
46 by speed.c in useful places. SPEED_EXTRA_PROTOS goes after the header
47 files, and SPEED_EXTRA_ROUTINES goes in the array of available routines.
48
49 The advantage of this #include "speed.c" scheme is that there's no
50 editing of a copy of that file, and new features in new versions of it
51 will be immediately available.
52
53 In a real program the routines mean_calls() etc would probably be in
54 separate C or assembler source files, and just the measuring
55 speed_mean_calls() etc would be here. Linking against other libraries
56 for things to measure is perfectly possible too.
57
58 When attempting to compare two versions of the same named routine, say
59 like the generic and assembler versions of mpn_add_n(), creative use of
60 cc -D or #define is suggested, so one or both can be renamed and linked
61 into the same program. It'll be much easier to compare them side by side
62 than with separate programs for each.
63
64 common.c has notes on writing speed measuring routines.
65
66 Remember to link against tune/libspeed.la (or tune/.libs/libspeed.a if
67 not using libtool) to get common.o and other objects needed by speed.c. */
68
69
70#define SPEED_EXTRA_PROTOS \
71 double speed_mean_calls (struct speed_params *s); \
72 double speed_mean_open (struct speed_params *s); \
73 double speed_mean_open2 (struct speed_params *s);
74
75#define SPEED_EXTRA_ROUTINES \
76 { "mean_calls", speed_mean_calls }, \
77 { "mean_open", speed_mean_open }, \
78 { "mean_open2", speed_mean_open2 },
79
80#include "speed.c"
81
82
83/* A straightforward implementation calling mpn subroutines.
84
85 wp,size is set to (xp,size + yp,size) / 2. The return value is the
86 remainder from the division. The other versions are the same. */
87
88mp_limb_t
89mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
90{
91 mp_limb_t c, ret;
92
93 ASSERT (size >= 1);
94
95 c = mpn_add_n (wp, xp, yp, size);
96 ret = mpn_rshift (wp, wp, size, 1) >> (GMP_LIMB_BITS-1);
97 wp[size-1] |= (c << (GMP_LIMB_BITS-1));
98 return ret;
99}
100
101
102/* An open-coded version, making one pass over the data. The right shift is
103 done as the added limbs are produced. The addition code follows
104 mpn/generic/add_n.c. */
105
106mp_limb_t
107mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
108{
109 mp_limb_t w, wprev, x, y, c, ret;
110 mp_size_t i;
111
112 ASSERT (size >= 1);
113
114 x = xp[0];
115 y = yp[0];
116
117 wprev = x + y;
118 c = (wprev < x);
119 ret = (wprev & 1);
120
121#define RSHIFT(hi,lo) (((lo) >> 1) | ((hi) << (GMP_LIMB_BITS-1)))
122
123 for (i = 1; i < size; i++)
124 {
125 x = xp[i];
126 y = yp[i];
127
128 w = x + c;
129 c = (w < x);
130 w += y;
131 c += (w < y);
132
133 wp[i-1] = RSHIFT (w, wprev);
134 wprev = w;
135 }
136
137 wp[i-1] = RSHIFT (c, wprev);
138
139 return ret;
140}
141
142
143/* Another one-pass version, but right shifting the source limbs rather than
144 the result limbs. There's not much chance of this being better than the
145 above, but it's an alternative at least. */
146
147mp_limb_t
148mean_open2 (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
149{
150 mp_limb_t w, x, y, xnext, ynext, c, ret;
151 mp_size_t i;
152
153 ASSERT (size >= 1);
154
155 x = xp[0];
156 y = yp[0];
157
158 /* ret is the low bit of x+y, c is the carry out of that low bit add */
159 ret = (x ^ y) & 1;
160 c = (x & y) & 1;
161
162 for (i = 0; i < size-1; i++)
163 {
164 xnext = xp[i+1];
165 ynext = yp[i+1];
166 x = RSHIFT (xnext, x);
167 y = RSHIFT (ynext, y);
168
169 w = x + c;
170 c = (w < x);
171 w += y;
172 c += (w < y);
173 wp[i] = w;
174
175 x = xnext;
176 y = ynext;
177 }
178
179 wp[i] = (x >> 1) + (y >> 1) + c;
180
181 return ret;
182}
183
184
185/* The speed measuring routines are the same apart from which function they
186 run, so a macro is used. Actually this macro is the same as
187 SPEED_ROUTINE_MPN_BINARY_N. */
188
189#define SPEED_ROUTINE_MEAN(mean_fun) \
190 { \
191 unsigned i; \
192 mp_ptr wp; \
193 double t; \
194 TMP_DECL; \
195 \
196 SPEED_RESTRICT_COND (s->size >= 1); \
197 \
198 TMP_MARK; \
199 SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
200 \
201 speed_operand_src (s, s->xp, s->size); \
202 speed_operand_src (s, s->yp, s->size); \
203 speed_operand_dst (s, wp, s->size); \
204 speed_cache_fill (s); \
205 \
206 speed_starttime (); \
207 i = s->reps; \
208 do \
209 mean_fun (wp, s->xp, s->yp, s->size); \
210 while (--i != 0); \
211 t = speed_endtime (); \
212 \
213 TMP_FREE; \
214 return t; \
215 }
216
217double
218speed_mean_calls (struct speed_params *s)
219{
220 SPEED_ROUTINE_MEAN (mean_calls);
221}
222
223double
224speed_mean_open (struct speed_params *s)
225{
226 SPEED_ROUTINE_MEAN (mean_open);
227}
228
229double
230speed_mean_open2 (struct speed_params *s)
231{
232 SPEED_ROUTINE_MEAN (mean_open2);
233}