blob: 1708e4ca1968c55cd7704fc5437879272f1775ae [file] [log] [blame]
Austin Schuhdace2a62020-08-18 10:56:48 -07001/* Speed measuring program.
2
3Copyright 1999-2003, 2005, 2006, 2008-2019 Free Software Foundation, Inc.
4
5This file is part of the GNU MP Library.
6
7The GNU MP Library is free software; you can redistribute it and/or modify
8it under the terms of either:
9
10 * the GNU Lesser General Public License as published by the Free
11 Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13
14or
15
16 * the GNU General Public License as published by the Free Software
17 Foundation; either version 2 of the License, or (at your option) any
18 later version.
19
20or both in parallel, as here.
21
22The GNU MP Library is distributed in the hope that it will be useful, but
23WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25for more details.
26
27You should have received copies of the GNU General Public License and the
28GNU Lesser General Public License along with the GNU MP Library. If not,
29see https://www.gnu.org/licenses/. */
30
31/* Usage message is in the code below, run with no arguments to print it.
32 See README for interesting applications.
33
34 To add a new routine foo(), create a speed_foo() function in the style of
35 the existing ones and add an entry in the routine[] array. Put FLAG_R if
36 speed_foo() wants an "r" parameter.
37
38 The routines don't have help messages or descriptions, but most have
39 suggestive names. See the source code for full details.
40
41*/
42
43#include "config.h"
44
45#include <limits.h>
46#include <stdio.h>
47#include <stdlib.h>
48#include <string.h>
49
50#if HAVE_UNISTD_H
51#include <unistd.h> /* for getpid, R_OK */
52#endif
53
54#if TIME_WITH_SYS_TIME
55# include <sys/time.h> /* for struct timeval */
56# include <time.h>
57#else
58# if HAVE_SYS_TIME_H
59# include <sys/time.h>
60# else
61# include <time.h>
62# endif
63#endif
64
65#if HAVE_SYS_RESOURCE_H
66#include <sys/resource.h> /* for getrusage() */
67#endif
68
69
70#include "gmp-impl.h"
71#include "longlong.h" /* for the benefit of speed-many.c */
72#include "tests.h"
73#include "speed.h"
74
75
76#if !HAVE_DECL_OPTARG
77extern char *optarg;
78extern int optind, opterr;
79#endif
80
81#if !HAVE_STRTOUL
82#define strtoul(p,e,b) (unsigned long) strtol(p,e,b)
83#endif
84
85#ifdef SPEED_EXTRA_PROTOS
86SPEED_EXTRA_PROTOS
87#endif
88#ifdef SPEED_EXTRA_PROTOS2
89SPEED_EXTRA_PROTOS2
90#endif
91
92
93#if GMP_LIMB_BITS == 32
94#define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK)
95#endif
96#if GMP_LIMB_BITS == 64
97#define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK)
98#endif
99
100
101#define CMP_ABSOLUTE 1
102#define CMP_RATIO 2
103#define CMP_DIFFERENCE 3
104#define CMP_DIFFPREV 4
105int option_cmp = CMP_ABSOLUTE;
106
107#define UNIT_SECONDS 1
108#define UNIT_CYCLES 2
109#define UNIT_CYCLESPERLIMB 3
110int option_unit = UNIT_SECONDS;
111
112#define DATA_RANDOM 1
113#define DATA_RANDOM2 2
114#define DATA_ZEROS 3
115#define DATA_AAS 4
116#define DATA_FFS 5
117#define DATA_2FD 6
118int option_data = DATA_RANDOM;
119
120int option_square = 0;
121double option_factor = 0.0;
122mp_size_t option_step = 1;
123int option_gnuplot = 0;
124char *option_gnuplot_basename;
125struct size_array_t {
126 mp_size_t start, end;
127} *size_array = NULL;
128mp_size_t size_num = 0;
129mp_size_t size_allocnum = 0;
130int option_resource_usage = 0;
131long option_seed = 123456789;
132
133struct speed_params sp;
134
135#define COLUMN_WIDTH 13 /* for the free-form output */
136
137#define FLAG_R (1<<0) /* require ".r" */
138#define FLAG_R_OPTIONAL (1<<1) /* optional ".r" */
139#define FLAG_RSIZE (1<<2)
140#define FLAG_NODATA (1<<3) /* don't alloc xp, yp */
141
142const struct routine_t {
143 /* constants */
144 const char *name;
145 speed_function_t fun;
146 int flag;
147} routine[] = {
148
149 { "noop", speed_noop },
150 { "noop_wxs", speed_noop_wxs },
151 { "noop_wxys", speed_noop_wxys },
152
153 { "mpn_add_n", speed_mpn_add_n, FLAG_R_OPTIONAL },
154 { "mpn_sub_n", speed_mpn_sub_n, FLAG_R_OPTIONAL },
155 { "mpn_add_1", speed_mpn_add_1, FLAG_R },
156 { "mpn_add_1_inplace", speed_mpn_add_1_inplace, FLAG_R },
157 { "mpn_sub_1", speed_mpn_sub_1, FLAG_R },
158 { "mpn_sub_1_inplace", speed_mpn_sub_1_inplace, FLAG_R },
159
160 { "mpn_add_err1_n", speed_mpn_add_err1_n },
161 { "mpn_add_err2_n", speed_mpn_add_err2_n },
162 { "mpn_add_err3_n", speed_mpn_add_err3_n },
163 { "mpn_sub_err1_n", speed_mpn_sub_err1_n },
164 { "mpn_sub_err2_n", speed_mpn_sub_err2_n },
165 { "mpn_sub_err3_n", speed_mpn_sub_err3_n },
166
167#if HAVE_NATIVE_mpn_add_n_sub_n
168 { "mpn_add_n_sub_n", speed_mpn_add_n_sub_n, FLAG_R_OPTIONAL },
169#endif
170
171 { "mpn_addmul_1", speed_mpn_addmul_1, FLAG_R },
172 { "mpn_submul_1", speed_mpn_submul_1, FLAG_R },
173#if HAVE_NATIVE_mpn_addmul_2
174 { "mpn_addmul_2", speed_mpn_addmul_2, FLAG_R_OPTIONAL },
175#endif
176#if HAVE_NATIVE_mpn_addmul_3
177 { "mpn_addmul_3", speed_mpn_addmul_3, FLAG_R_OPTIONAL },
178#endif
179#if HAVE_NATIVE_mpn_addmul_4
180 { "mpn_addmul_4", speed_mpn_addmul_4, FLAG_R_OPTIONAL },
181#endif
182#if HAVE_NATIVE_mpn_addmul_5
183 { "mpn_addmul_5", speed_mpn_addmul_5, FLAG_R_OPTIONAL },
184#endif
185#if HAVE_NATIVE_mpn_addmul_6
186 { "mpn_addmul_6", speed_mpn_addmul_6, FLAG_R_OPTIONAL },
187#endif
188#if HAVE_NATIVE_mpn_addmul_7
189 { "mpn_addmul_7", speed_mpn_addmul_7, FLAG_R_OPTIONAL },
190#endif
191#if HAVE_NATIVE_mpn_addmul_8
192 { "mpn_addmul_8", speed_mpn_addmul_8, FLAG_R_OPTIONAL },
193#endif
194 { "mpn_mul_1", speed_mpn_mul_1, FLAG_R },
195 { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R },
196#if HAVE_NATIVE_mpn_mul_2
197 { "mpn_mul_2", speed_mpn_mul_2, FLAG_R_OPTIONAL },
198#endif
199#if HAVE_NATIVE_mpn_mul_3
200 { "mpn_mul_3", speed_mpn_mul_3, FLAG_R_OPTIONAL },
201#endif
202#if HAVE_NATIVE_mpn_mul_4
203 { "mpn_mul_4", speed_mpn_mul_4, FLAG_R_OPTIONAL },
204#endif
205#if HAVE_NATIVE_mpn_mul_5
206 { "mpn_mul_5", speed_mpn_mul_5, FLAG_R_OPTIONAL },
207#endif
208#if HAVE_NATIVE_mpn_mul_6
209 { "mpn_mul_6", speed_mpn_mul_6, FLAG_R_OPTIONAL },
210#endif
211
212 { "mpn_divrem_1", speed_mpn_divrem_1, FLAG_R },
213 { "mpn_divrem_1f", speed_mpn_divrem_1f, FLAG_R },
214#if HAVE_NATIVE_mpn_divrem_1c
215 { "mpn_divrem_1c", speed_mpn_divrem_1c, FLAG_R },
216 { "mpn_divrem_1cf", speed_mpn_divrem_1cf,FLAG_R },
217#endif
218 { "mpn_mod_1", speed_mpn_mod_1, FLAG_R },
219#if HAVE_NATIVE_mpn_mod_1c
220 { "mpn_mod_1c", speed_mpn_mod_1c, FLAG_R },
221#endif
222 { "mpn_preinv_divrem_1", speed_mpn_preinv_divrem_1, FLAG_R },
223 { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R },
224 { "mpn_preinv_mod_1", speed_mpn_preinv_mod_1, FLAG_R },
225
226 { "mpn_mod_1_1", speed_mpn_mod_1_1, FLAG_R },
227 { "mpn_mod_1_1_1", speed_mpn_mod_1_1_1, FLAG_R },
228 { "mpn_mod_1_1_2", speed_mpn_mod_1_1_2, FLAG_R },
229 { "mpn_mod_1s_2", speed_mpn_mod_1_2, FLAG_R },
230 { "mpn_mod_1s_3", speed_mpn_mod_1_3, FLAG_R },
231 { "mpn_mod_1s_4", speed_mpn_mod_1_4, FLAG_R },
232
233 { "mpn_divrem_1_div", speed_mpn_divrem_1_div, FLAG_R },
234 { "mpn_divrem_1_inv", speed_mpn_divrem_1_inv, FLAG_R },
235 { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R },
236 { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R },
237 { "mpn_mod_1_div", speed_mpn_mod_1_div, FLAG_R },
238 { "mpn_mod_1_inv", speed_mpn_mod_1_inv, FLAG_R },
239
240 { "mpn_divrem_2", speed_mpn_divrem_2, },
241 { "mpn_divrem_2_div", speed_mpn_divrem_2_div, },
242 { "mpn_divrem_2_inv", speed_mpn_divrem_2_inv, },
243
244 { "mpn_div_qr_1n_pi1", speed_mpn_div_qr_1n_pi1, FLAG_R },
245 { "mpn_div_qr_1n_pi1_1",speed_mpn_div_qr_1n_pi1_1, FLAG_R },
246 { "mpn_div_qr_1n_pi1_2",speed_mpn_div_qr_1n_pi1_2, FLAG_R },
247 { "mpn_div_qr_1", speed_mpn_div_qr_1, FLAG_R },
248
249 { "mpn_div_qr_2n", speed_mpn_div_qr_2n, },
250 { "mpn_div_qr_2u", speed_mpn_div_qr_2u, },
251
252 { "mpn_divexact_1", speed_mpn_divexact_1, FLAG_R },
253 { "mpn_divexact_by3", speed_mpn_divexact_by3 },
254
255 { "mpn_bdiv_q_1", speed_mpn_bdiv_q_1, FLAG_R },
256 { "mpn_pi1_bdiv_q_1", speed_mpn_pi1_bdiv_q_1, FLAG_R_OPTIONAL },
257 { "mpn_bdiv_dbm1c", speed_mpn_bdiv_dbm1c, FLAG_R_OPTIONAL },
258
259#if HAVE_NATIVE_mpn_modexact_1_odd
260 { "mpn_modexact_1_odd", speed_mpn_modexact_1_odd, FLAG_R },
261#endif
262 { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R },
263
264#if GMP_NUMB_BITS % 4 == 0
265 { "mpn_mod_34lsub1", speed_mpn_mod_34lsub1 },
266#endif
267
268 { "mpn_lshift", speed_mpn_lshift, FLAG_R },
269 { "mpn_lshiftc", speed_mpn_lshiftc, FLAG_R },
270 { "mpn_rshift", speed_mpn_rshift, FLAG_R },
271
272 { "mpn_and_n", speed_mpn_and_n, FLAG_R_OPTIONAL },
273 { "mpn_andn_n", speed_mpn_andn_n, FLAG_R_OPTIONAL },
274 { "mpn_nand_n", speed_mpn_nand_n, FLAG_R_OPTIONAL },
275 { "mpn_ior_n", speed_mpn_ior_n, FLAG_R_OPTIONAL },
276 { "mpn_iorn_n", speed_mpn_iorn_n, FLAG_R_OPTIONAL },
277 { "mpn_nior_n", speed_mpn_nior_n, FLAG_R_OPTIONAL },
278 { "mpn_xor_n", speed_mpn_xor_n, FLAG_R_OPTIONAL },
279 { "mpn_xnor_n", speed_mpn_xnor_n, FLAG_R_OPTIONAL },
280 { "mpn_com", speed_mpn_com },
281 { "mpn_neg", speed_mpn_neg },
282
283 { "mpn_popcount", speed_mpn_popcount },
284 { "mpn_hamdist", speed_mpn_hamdist },
285
286 { "mpn_matrix22_mul", speed_mpn_matrix22_mul },
287
288 { "mpn_hgcd2", speed_mpn_hgcd2, FLAG_NODATA },
289 { "mpn_hgcd2_1", speed_mpn_hgcd2_1, FLAG_NODATA },
290 { "mpn_hgcd2_2", speed_mpn_hgcd2_2, FLAG_NODATA },
291 { "mpn_hgcd2_3", speed_mpn_hgcd2_3, FLAG_NODATA },
292 { "mpn_hgcd2_4", speed_mpn_hgcd2_4, FLAG_NODATA },
293 { "mpn_hgcd2_5", speed_mpn_hgcd2_5, FLAG_NODATA },
294 { "mpn_hgcd", speed_mpn_hgcd },
295 { "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer },
296 { "mpn_hgcd_appr", speed_mpn_hgcd_appr },
297 { "mpn_hgcd_appr_lehmer", speed_mpn_hgcd_appr_lehmer },
298
299 { "mpn_hgcd_reduce", speed_mpn_hgcd_reduce },
300 { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1 },
301 { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2 },
302
303 { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL },
304 { "mpn_gcd_11", speed_mpn_gcd_11, FLAG_R_OPTIONAL },
305 { "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
306 { "mpn_gcd_22", speed_mpn_gcd_22, FLAG_R_OPTIONAL },
307
308 { "mpn_gcd", speed_mpn_gcd },
309
310 { "mpn_gcdext", speed_mpn_gcdext },
311 { "mpn_gcdext_single", speed_mpn_gcdext_single },
312 { "mpn_gcdext_double", speed_mpn_gcdext_double },
313 { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single },
314 { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double },
315#if 0
316 { "mpn_gcdext_lehmer", speed_mpn_gcdext_lehmer },
317#endif
318
319 { "mpz_nextprime", speed_mpz_nextprime },
320
321 { "mpz_jacobi", speed_mpz_jacobi },
322 { "mpn_jacobi_base", speed_mpn_jacobi_base },
323 { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1 },
324 { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2 },
325 { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3 },
326 { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4 },
327
328 { "mpn_mul", speed_mpn_mul, FLAG_R_OPTIONAL },
329 { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_R_OPTIONAL },
330 { "mpn_sqr_basecase", speed_mpn_sqr_basecase },
331#if HAVE_NATIVE_mpn_sqr_diagonal
332 { "mpn_sqr_diagonal", speed_mpn_sqr_diagonal },
333#endif
334#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
335 { "mpn_sqr_diag_addlsh1", speed_mpn_sqr_diag_addlsh1 },
336#endif
337
338 { "mpn_mul_n", speed_mpn_mul_n },
339 { "mpn_sqr", speed_mpn_sqr },
340
341 { "mpn_toom2_sqr", speed_mpn_toom2_sqr },
342 { "mpn_toom3_sqr", speed_mpn_toom3_sqr },
343 { "mpn_toom4_sqr", speed_mpn_toom4_sqr },
344 { "mpn_toom6_sqr", speed_mpn_toom6_sqr },
345 { "mpn_toom8_sqr", speed_mpn_toom8_sqr },
346 { "mpn_toom22_mul", speed_mpn_toom22_mul },
347 { "mpn_toom33_mul", speed_mpn_toom33_mul },
348 { "mpn_toom44_mul", speed_mpn_toom44_mul },
349 { "mpn_toom6h_mul", speed_mpn_toom6h_mul },
350 { "mpn_toom8h_mul", speed_mpn_toom8h_mul },
351 { "mpn_toom32_mul", speed_mpn_toom32_mul },
352 { "mpn_toom42_mul", speed_mpn_toom42_mul },
353 { "mpn_toom43_mul", speed_mpn_toom43_mul },
354 { "mpn_toom63_mul", speed_mpn_toom63_mul },
355 { "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul },
356 { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr},
357#if WANT_OLD_FFT_FULL
358 { "mpn_mul_fft_full", speed_mpn_mul_fft_full },
359 { "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr },
360#endif
361 { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL },
362 { "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
363
364 { "mpn_sqrlo", speed_mpn_sqrlo },
365 { "mpn_sqrlo_basecase", speed_mpn_sqrlo_basecase },
366 { "mpn_mullo_n", speed_mpn_mullo_n },
367 { "mpn_mullo_basecase", speed_mpn_mullo_basecase },
368
369 { "mpn_mulmid_basecase", speed_mpn_mulmid_basecase, FLAG_R_OPTIONAL },
370 { "mpn_toom42_mulmid", speed_mpn_toom42_mulmid },
371 { "mpn_mulmid_n", speed_mpn_mulmid_n },
372 { "mpn_mulmid", speed_mpn_mulmid, FLAG_R_OPTIONAL },
373
374 { "mpn_bc_mulmod_bnm1", speed_mpn_bc_mulmod_bnm1 },
375 { "mpn_mulmod_bnm1", speed_mpn_mulmod_bnm1 },
376 { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded },
377 { "mpn_sqrmod_bnm1", speed_mpn_sqrmod_bnm1 },
378
379 { "mpn_invert", speed_mpn_invert },
380 { "mpn_invertappr", speed_mpn_invertappr },
381 { "mpn_ni_invertappr", speed_mpn_ni_invertappr },
382 { "mpn_binvert", speed_mpn_binvert },
383 { "mpn_sec_invert", speed_mpn_sec_invert },
384
385 { "mpn_sbpi1_div_qr", speed_mpn_sbpi1_div_qr, FLAG_R_OPTIONAL},
386 { "mpn_dcpi1_div_qr", speed_mpn_dcpi1_div_qr, FLAG_R_OPTIONAL},
387 { "mpn_mu_div_qr", speed_mpn_mu_div_qr, FLAG_R_OPTIONAL},
388 { "mpn_mupi_div_qr", speed_mpn_mupi_div_qr, FLAG_R_OPTIONAL},
389 { "mpn_sbpi1_divappr_q", speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL},
390 { "mpn_dcpi1_divappr_q", speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL},
391
392 { "mpn_sbpi1_bdiv_qr", speed_mpn_sbpi1_bdiv_qr },
393 { "mpn_dcpi1_bdiv_qr", speed_mpn_dcpi1_bdiv_qr },
394 { "mpn_sbpi1_bdiv_q", speed_mpn_sbpi1_bdiv_q },
395 { "mpn_dcpi1_bdiv_q", speed_mpn_dcpi1_bdiv_q },
396 { "mpn_sbpi1_bdiv_r", speed_mpn_sbpi1_bdiv_r },
397
398 { "mpn_broot", speed_mpn_broot, FLAG_R },
399 { "mpn_broot_invm1", speed_mpn_broot_invm1, FLAG_R },
400 { "mpn_brootinv", speed_mpn_brootinv, FLAG_R },
401
402 { "mpn_get_str", speed_mpn_get_str, FLAG_R_OPTIONAL },
403 { "mpn_set_str", speed_mpn_set_str, FLAG_R_OPTIONAL },
404 { "mpn_set_str_basecase", speed_mpn_bc_set_str, FLAG_R_OPTIONAL },
405
406 { "mpn_sqrtrem", speed_mpn_sqrtrem },
407 { "mpn_rootrem", speed_mpn_rootrem, FLAG_R },
408 { "mpn_sqrt", speed_mpn_sqrt },
409 { "mpn_root", speed_mpn_root, FLAG_R },
410
411 { "mpn_perfect_power_p", speed_mpn_perfect_power_p, },
412 { "mpn_perfect_square_p", speed_mpn_perfect_square_p, },
413
414 { "mpn_fib2_ui", speed_mpn_fib2_ui, FLAG_NODATA },
415 { "mpz_fib_ui", speed_mpz_fib_ui, FLAG_NODATA },
416 { "mpz_fib2_ui", speed_mpz_fib2_ui, FLAG_NODATA },
417 { "mpz_lucnum_ui", speed_mpz_lucnum_ui, FLAG_NODATA },
418 { "mpz_lucnum2_ui", speed_mpz_lucnum2_ui, FLAG_NODATA },
419
420 { "mpz_add", speed_mpz_add },
421 { "mpz_invert", speed_mpz_invert, FLAG_R_OPTIONAL },
422 { "mpz_bin_uiui", speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
423 { "mpz_bin_ui", speed_mpz_bin_ui, FLAG_NODATA | FLAG_R_OPTIONAL },
424 { "mpz_fac_ui", speed_mpz_fac_ui, FLAG_NODATA },
425 { "mpz_2fac_ui", speed_mpz_2fac_ui, FLAG_NODATA },
426 { "mpz_mfac_uiui", speed_mpz_mfac_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
427 { "mpz_primorial_ui", speed_mpz_primorial_ui, FLAG_NODATA },
428 { "mpz_powm", speed_mpz_powm, FLAG_R_OPTIONAL },
429 { "mpz_powm_mod", speed_mpz_powm_mod },
430 { "mpz_powm_redc", speed_mpz_powm_redc },
431 { "mpz_powm_sec", speed_mpz_powm_sec },
432 { "mpz_powm_ui", speed_mpz_powm_ui, FLAG_R_OPTIONAL },
433
434 { "mpz_mod", speed_mpz_mod },
435 { "mpn_redc_1", speed_mpn_redc_1 },
436 { "mpn_redc_2", speed_mpn_redc_2 },
437 { "mpn_redc_n", speed_mpn_redc_n },
438
439 { "MPN_COPY", speed_MPN_COPY },
440 { "MPN_COPY_INCR", speed_MPN_COPY_INCR },
441 { "MPN_COPY_DECR", speed_MPN_COPY_DECR },
442 { "memcpy", speed_memcpy },
443#if HAVE_NATIVE_mpn_copyi
444 { "mpn_copyi", speed_mpn_copyi },
445#endif
446#if HAVE_NATIVE_mpn_copyd
447 { "mpn_copyd", speed_mpn_copyd },
448#endif
449 { "mpn_sec_tabselect", speed_mpn_sec_tabselect, FLAG_R_OPTIONAL },
450#if HAVE_NATIVE_mpn_addlsh1_n == 1
451 { "mpn_addlsh1_n", speed_mpn_addlsh1_n, FLAG_R_OPTIONAL },
452#endif
453#if HAVE_NATIVE_mpn_sublsh1_n == 1
454 { "mpn_sublsh1_n", speed_mpn_sublsh1_n, FLAG_R_OPTIONAL },
455#endif
456#if HAVE_NATIVE_mpn_addlsh1_n_ip1
457 { "mpn_addlsh1_n_ip1", speed_mpn_addlsh1_n_ip1 },
458#endif
459#if HAVE_NATIVE_mpn_addlsh1_n_ip2
460 { "mpn_addlsh1_n_ip2", speed_mpn_addlsh1_n_ip2 },
461#endif
462#if HAVE_NATIVE_mpn_sublsh1_n_ip1
463 { "mpn_sublsh1_n_ip1", speed_mpn_sublsh1_n_ip1 },
464#endif
465#if HAVE_NATIVE_mpn_rsblsh1_n == 1
466 { "mpn_rsblsh1_n", speed_mpn_rsblsh1_n, FLAG_R_OPTIONAL },
467#endif
468#if HAVE_NATIVE_mpn_addlsh2_n == 1
469 { "mpn_addlsh2_n", speed_mpn_addlsh2_n, FLAG_R_OPTIONAL },
470#endif
471#if HAVE_NATIVE_mpn_sublsh2_n == 1
472 { "mpn_sublsh2_n", speed_mpn_sublsh2_n, FLAG_R_OPTIONAL },
473#endif
474#if HAVE_NATIVE_mpn_addlsh2_n_ip1
475 { "mpn_addlsh2_n_ip1", speed_mpn_addlsh2_n_ip1 },
476#endif
477#if HAVE_NATIVE_mpn_addlsh2_n_ip2
478 { "mpn_addlsh2_n_ip2", speed_mpn_addlsh2_n_ip2 },
479#endif
480#if HAVE_NATIVE_mpn_sublsh2_n_ip1
481 { "mpn_sublsh2_n_ip1", speed_mpn_sublsh2_n_ip1 },
482#endif
483#if HAVE_NATIVE_mpn_rsblsh2_n == 1
484 { "mpn_rsblsh2_n", speed_mpn_rsblsh2_n, FLAG_R_OPTIONAL },
485#endif
486#if HAVE_NATIVE_mpn_addlsh_n
487 { "mpn_addlsh_n", speed_mpn_addlsh_n, FLAG_R_OPTIONAL },
488#endif
489#if HAVE_NATIVE_mpn_sublsh_n
490 { "mpn_sublsh_n", speed_mpn_sublsh_n, FLAG_R_OPTIONAL },
491#endif
492#if HAVE_NATIVE_mpn_addlsh_n_ip1
493 { "mpn_addlsh_n_ip1", speed_mpn_addlsh_n_ip1 },
494#endif
495#if HAVE_NATIVE_mpn_addlsh_n_ip2
496 { "mpn_addlsh_n_ip2", speed_mpn_addlsh_n_ip2 },
497#endif
498#if HAVE_NATIVE_mpn_sublsh_n_ip1
499 { "mpn_sublsh_n_ip1", speed_mpn_sublsh_n_ip1 },
500#endif
501#if HAVE_NATIVE_mpn_rsblsh_n
502 { "mpn_rsblsh_n", speed_mpn_rsblsh_n, FLAG_R_OPTIONAL },
503#endif
504#if HAVE_NATIVE_mpn_rsh1add_n
505 { "mpn_rsh1add_n", speed_mpn_rsh1add_n, FLAG_R_OPTIONAL },
506#endif
507#if HAVE_NATIVE_mpn_rsh1sub_n
508 { "mpn_rsh1sub_n", speed_mpn_rsh1sub_n, FLAG_R_OPTIONAL },
509#endif
510
511 { "mpn_cnd_add_n", speed_mpn_cnd_add_n, FLAG_R_OPTIONAL },
512 { "mpn_cnd_sub_n", speed_mpn_cnd_sub_n, FLAG_R_OPTIONAL },
513
514 { "MPN_ZERO", speed_MPN_ZERO },
515
516 { "binvert_limb", speed_binvert_limb, FLAG_NODATA },
517 { "binvert_limb_mul1", speed_binvert_limb_mul1, FLAG_NODATA },
518 { "binvert_limb_loop", speed_binvert_limb_loop, FLAG_NODATA },
519 { "binvert_limb_cond", speed_binvert_limb_cond, FLAG_NODATA },
520 { "binvert_limb_arith", speed_binvert_limb_arith, FLAG_NODATA },
521
522 { "malloc_free", speed_malloc_free },
523 { "malloc_realloc_free", speed_malloc_realloc_free },
524 { "gmp_allocate_free", speed_gmp_allocate_free },
525 { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free },
526 { "mpz_init_clear", speed_mpz_init_clear },
527 { "mpq_init_clear", speed_mpq_init_clear },
528 { "mpf_init_clear", speed_mpf_init_clear },
529 { "mpz_init_realloc_clear", speed_mpz_init_realloc_clear },
530
531 { "umul_ppmm", speed_umul_ppmm, FLAG_R_OPTIONAL },
532#if HAVE_NATIVE_mpn_umul_ppmm
533 { "mpn_umul_ppmm", speed_mpn_umul_ppmm, FLAG_R_OPTIONAL },
534#endif
535#if HAVE_NATIVE_mpn_umul_ppmm_r
536 { "mpn_umul_ppmm_r", speed_mpn_umul_ppmm_r, FLAG_R_OPTIONAL },
537#endif
538
539 { "count_leading_zeros", speed_count_leading_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
540 { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
541
542 { "udiv_qrnnd", speed_udiv_qrnnd, FLAG_R_OPTIONAL },
543 { "udiv_qrnnd_c", speed_udiv_qrnnd_c, FLAG_R_OPTIONAL },
544#if HAVE_NATIVE_mpn_udiv_qrnnd
545 { "mpn_udiv_qrnnd", speed_mpn_udiv_qrnnd, FLAG_R_OPTIONAL },
546#endif
547#if HAVE_NATIVE_mpn_udiv_qrnnd_r
548 { "mpn_udiv_qrnnd_r", speed_mpn_udiv_qrnnd_r, FLAG_R_OPTIONAL },
549#endif
550 { "invert_limb", speed_invert_limb, FLAG_R_OPTIONAL },
551
552 { "operator_div", speed_operator_div, FLAG_R_OPTIONAL },
553 { "operator_mod", speed_operator_mod, FLAG_R_OPTIONAL },
554
555 { "gmp_randseed", speed_gmp_randseed, FLAG_R_OPTIONAL },
556 { "gmp_randseed_ui", speed_gmp_randseed_ui, FLAG_R_OPTIONAL | FLAG_NODATA },
557 { "mpz_urandomb", speed_mpz_urandomb, FLAG_R_OPTIONAL | FLAG_NODATA },
558
559#ifdef SPEED_EXTRA_ROUTINES
560 SPEED_EXTRA_ROUTINES
561#endif
562#ifdef SPEED_EXTRA_ROUTINES2
563 SPEED_EXTRA_ROUTINES2
564#endif
565};
566
567
568struct choice_t {
569 const struct routine_t *p;
570 mp_limb_t r;
571 double scale;
572 double time;
573 int no_time;
574 double prev_time;
575 const char *name;
576};
577struct choice_t *choice;
578int num_choices = 0;
579
580
581void
582data_fill (mp_ptr ptr, mp_size_t size)
583{
584 switch (option_data) {
585 case DATA_RANDOM:
586 mpn_random (ptr, size);
587 break;
588 case DATA_RANDOM2:
589 mpn_random2 (ptr, size);
590 break;
591 case DATA_ZEROS:
592 MPN_ZERO (ptr, size);
593 break;
594 case DATA_AAS:
595 MPN_FILL (ptr, size, GMP_NUMB_0xAA);
596 break;
597 case DATA_FFS:
598 MPN_FILL (ptr, size, GMP_NUMB_MAX);
599 break;
600 case DATA_2FD:
601 MPN_FILL (ptr, size, GMP_NUMB_MAX);
602 ptr[0] -= 2;
603 break;
604 default:
605 abort();
606 /*NOTREACHED*/
607 }
608}
609
610/* The code here handling the various combinations of output options isn't
611 too attractive, but it works and is fairly clean. */
612
613#define SIZE_TO_DIVISOR(n) \
614 (option_square == 1 ? (n)*(n) \
615 : option_square == 2 ? (n)*((n)+1)/2 \
616 : (n))
617
618void
619run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size)
620{
621 const char *first_open_fastest, *first_open_notfastest, *first_close;
622 int i, fastest, want_data;
623 double fastest_time;
624 TMP_DECL;
625
626 TMP_MARK;
627
628 /* allocate data, unless all routines are NODATA */
629 want_data = 0;
630 for (i = 0; i < num_choices; i++)
631 want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0);
632
633 if (want_data)
634 {
635 SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp);
636 SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp);
637
638 data_fill (s->xp, s->size);
639 data_fill (s->yp, s->size);
640 }
641 else
642 {
643 sp.xp = NULL;
644 sp.yp = NULL;
645 }
646
647 if (prev_size == -1 && option_cmp == CMP_DIFFPREV)
648 {
649 first_open_fastest = "(#";
650 first_open_notfastest = " (";
651 first_close = ")";
652 }
653 else
654 {
655 first_open_fastest = "#";
656 first_open_notfastest = " ";
657 first_close = "";
658 }
659
660 fastest = -1;
661 fastest_time = -1.0;
662 for (i = 0; i < num_choices; i++)
663 {
664 s->r = choice[i].r;
665 choice[i].time = speed_measure (choice[i].p->fun, s);
666 choice[i].no_time = (choice[i].time == -1.0);
667 if (! choice[i].no_time)
668 choice[i].time *= choice[i].scale;
669
670 /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time
671 is before any differences. */
672 {
673 double t;
674 t = choice[i].time;
675 if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1)
676 {
677 if (choice[i].prev_time == -1.0)
678 choice[i].no_time = 1;
679 else
680 choice[i].time = choice[i].time - choice[i].prev_time;
681 }
682 choice[i].prev_time = t;
683 }
684
685 if (choice[i].no_time)
686 continue;
687
688 /* Look for the fastest after CMP_DIFFPREV has been applied, but
689 before CMP_RATIO or CMP_DIFFERENCE. There's only a fastest shown
690 if there's more than one routine. */
691 if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time))
692 {
693 fastest = i;
694 fastest_time = choice[i].time;
695 }
696
697 if (option_cmp == CMP_DIFFPREV)
698 {
699 /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */
700 if (option_unit == UNIT_CYCLES)
701 choice[i].time /= speed_cycletime;
702 else if (option_unit == UNIT_CYCLESPERLIMB)
703 {
704 if (prev_size == -1)
705 choice[i].time /= speed_cycletime;
706 else
707 choice[i].time /= (speed_cycletime
708 * (SIZE_TO_DIVISOR(s->size)
709 - SIZE_TO_DIVISOR(prev_size)));
710 }
711 }
712 else
713 {
714 if (option_unit == UNIT_CYCLES)
715 choice[i].time /= speed_cycletime;
716 else if (option_unit == UNIT_CYCLESPERLIMB)
717 choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size));
718
719 if (option_cmp == CMP_RATIO && i > 0)
720 {
721 /* A ratio isn't affected by the units chosen. */
722 if (choice[0].no_time || choice[0].time == 0.0)
723 choice[i].no_time = 1;
724 else
725 choice[i].time /= choice[0].time;
726 }
727 else if (option_cmp == CMP_DIFFERENCE && i > 0)
728 {
729 if (choice[0].no_time)
730 {
731 choice[i].no_time = 1;
732 continue;
733 }
734 choice[i].time -= choice[0].time;
735 }
736 }
737 }
738
739 if (option_gnuplot)
740 {
741 /* In CMP_DIFFPREV, don't print anything for the first size, start
742 with the second where an actual difference is available.
743
744 In CMP_RATIO, print the first column as 1.0.
745
746 The 9 decimals printed is much more than the expected precision of
747 the measurements actually. */
748
749 if (! (option_cmp == CMP_DIFFPREV && prev_size == -1))
750 {
751 fprintf (fp, "%-6ld ", s->size);
752 for (i = 0; i < num_choices; i++)
753 fprintf (fp, " %.9e",
754 choice[i].no_time ? 0.0
755 : (option_cmp == CMP_RATIO && i == 0) ? 1.0
756 : choice[i].time);
757 fprintf (fp, "\n");
758 }
759 }
760 else
761 {
762 fprintf (fp, "%-6ld ", s->size);
763 for (i = 0; i < num_choices; i++)
764 {
765 char buf[128];
766 int decimals;
767
768 if (choice[i].no_time)
769 {
770 fprintf (fp, " %*s", COLUMN_WIDTH, "n/a");
771 }
772 else
773 {if (option_unit == UNIT_CYCLESPERLIMB
774 || (option_cmp == CMP_RATIO && i > 0))
775 decimals = 4;
776 else if (option_unit == UNIT_CYCLES)
777 decimals = 2;
778 else
779 decimals = 9;
780
781 sprintf (buf, "%s%.*f%s",
782 i == fastest ? first_open_fastest : first_open_notfastest,
783 decimals, choice[i].time, first_close);
784 fprintf (fp, " %*s", COLUMN_WIDTH, buf);
785 }
786 }
787 fprintf (fp, "\n");
788 }
789
790 TMP_FREE;
791}
792
793void
794run_all (FILE *fp)
795{
796 mp_size_t prev_size;
797 int i;
798 TMP_DECL;
799
800 TMP_MARK;
801 SPEED_TMP_ALLOC_LIMBS (sp.xp_block, SPEED_BLOCK_SIZE, sp.align_xp);
802 SPEED_TMP_ALLOC_LIMBS (sp.yp_block, SPEED_BLOCK_SIZE, sp.align_yp);
803
804 data_fill (sp.xp_block, SPEED_BLOCK_SIZE);
805 data_fill (sp.yp_block, SPEED_BLOCK_SIZE);
806
807 for (i = 0; i < size_num; i++)
808 {
809 sp.size = size_array[i].start;
810 prev_size = -1;
811 for (;;)
812 {
813 mp_size_t step;
814
815 if (option_data == DATA_2FD && sp.size >= 2)
816 sp.xp[sp.size-1] = 2;
817
818 run_one (fp, &sp, prev_size);
819 prev_size = sp.size;
820
821 if (option_data == DATA_2FD && sp.size >= 2)
822 sp.xp[sp.size-1] = MP_LIMB_T_MAX;
823
824 if (option_factor != 0.0)
825 {
826 step = (mp_size_t) (sp.size * option_factor - sp.size);
827 if (step < 1)
828 step = 1;
829 }
830 else
831 step = 1;
832 if (step < option_step)
833 step = option_step;
834
835 sp.size += step;
836 if (sp.size > size_array[i].end)
837 break;
838 }
839 }
840
841 TMP_FREE;
842}
843
844
845FILE *
846fopen_for_write (const char *filename)
847{
848 FILE *fp;
849 if ((fp = fopen (filename, "w")) == NULL)
850 {
851 fprintf (stderr, "Cannot create %s\n", filename);
852 exit(1);
853 }
854 return fp;
855}
856
857void
858fclose_written (FILE *fp, const char *filename)
859{
860 int err;
861
862 err = ferror (fp);
863 err |= fclose (fp);
864
865 if (err)
866 {
867 fprintf (stderr, "Error writing %s\n", filename);
868 exit(1);
869 }
870}
871
872
873void
874run_gnuplot (int argc, char *argv[])
875{
876 char *plot_filename;
877 char *data_filename;
878 FILE *fp;
879 int i;
880
881 plot_filename = (char *) (*__gmp_allocate_func)
882 (strlen (option_gnuplot_basename) + 20);
883 data_filename = (char *) (*__gmp_allocate_func)
884 (strlen (option_gnuplot_basename) + 20);
885
886 sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename);
887 sprintf (data_filename, "%s.data", option_gnuplot_basename);
888
889 fp = fopen_for_write (plot_filename);
890
891 fprintf (fp, "# Generated with:\n");
892 fprintf (fp, "#");
893 for (i = 0; i < argc; i++)
894 fprintf (fp, " %s", argv[i]);
895 fprintf (fp, "\n");
896 fprintf (fp, "\n");
897
898 fprintf (fp, "reset\n");
899
900 /* Putting the key at the top left is usually good, and you can change it
901 interactively if it's not. */
902 fprintf (fp, "set key left\n");
903
904 /* write underscores, not subscripts */
905 fprintf (fp, "set termoption noenhanced\n");
906
907 /* designed to make it possible to see crossovers easily */
908 fprintf (fp, "set style data lines\n");
909
910 fprintf (fp, "plot ");
911 for (i = 0; i < num_choices; i++)
912 {
913 fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2);
914 fprintf (fp, " title \"%s\"", choice[i].name);
915
916 if (i != num_choices-1)
917 fprintf (fp, ", \\");
918 fprintf (fp, "\n");
919 }
920
921 fprintf (fp, "load \"-\"\n");
922 fclose_written (fp, plot_filename);
923
924 fp = fopen_for_write (data_filename);
925
926 /* Unbuffered so you can see where the program was up to if it crashes or
927 you kill it. */
928 setbuf (fp, NULL);
929
930 run_all (fp);
931 fclose_written (fp, data_filename);
932}
933
934
935/* Return a limb with n many one bits (starting from the least significant) */
936
937#define LIMB_ONES(n) \
938 ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX \
939 : (n) == 0 ? CNST_LIMB(0) \
940 : (CNST_LIMB(1) << (n)) - 1)
941
942mp_limb_t
943r_string (const char *s)
944{
945 const char *s_orig = s;
946 long n;
947
948 if (strcmp (s, "aas") == 0)
949 return GMP_NUMB_0xAA;
950
951 {
952 mpz_t z;
953 mp_limb_t l;
954 int set, siz;
955
956 mpz_init (z);
957 set = mpz_set_str (z, s, 0);
958 siz = SIZ(z);
959 l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]);
960 mpz_clear (z);
961 if (set == 0)
962 {
963 if (siz > 1 || siz < -1)
964 printf ("Warning, r parameter %s truncated to %d bits\n",
965 s_orig, GMP_LIMB_BITS);
966 return l;
967 }
968 }
969
970 if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
971 n = strtoul (s+2, (char **) &s, 16);
972 else
973 n = strtol (s, (char **) &s, 10);
974
975 if (strcmp (s, "bits") == 0)
976 {
977 mp_limb_t l;
978 if (n > GMP_LIMB_BITS)
979 {
980 fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
981 n, GMP_LIMB_BITS);
982 exit (1);
983 }
984 mpn_random (&l, 1);
985 return (l | (CNST_LIMB(1) << (n-1))) & LIMB_ONES(n);
986 }
987 else if (strcmp (s, "ones") == 0)
988 {
989 if (n > GMP_LIMB_BITS)
990 {
991 fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
992 n, GMP_LIMB_BITS);
993 exit (1);
994 }
995 return LIMB_ONES (n);
996 }
997 else if (*s != '\0')
998 {
999 fprintf (stderr, "invalid r parameter: %s\n", s_orig);
1000 exit (1);
1001 }
1002
1003 return n;
1004}
1005
1006
1007void
1008routine_find (struct choice_t *c, const char *s_orig)
1009{
1010 const char *s;
1011 int i;
1012 size_t nlen;
1013
1014 c->name = s_orig;
1015 s = strchr (s_orig, '*');
1016 if (s != NULL)
1017 {
1018 c->scale = atof(s_orig);
1019 s++;
1020 }
1021 else
1022 {
1023 c->scale = 1.0;
1024 s = s_orig;
1025 }
1026
1027 for (i = 0; i < numberof (routine); i++)
1028 {
1029 nlen = strlen (routine[i].name);
1030 if (memcmp (s, routine[i].name, nlen) != 0)
1031 continue;
1032
1033 if (s[nlen] == '.')
1034 {
1035 /* match, with a .r parameter */
1036
1037 if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))
1038 {
1039 fprintf (stderr,
1040 "Choice %s bad: doesn't take a \".<r>\" parameter\n",
1041 s_orig);
1042 exit (1);
1043 }
1044
1045 c->p = &routine[i];
1046 c->r = r_string (s + nlen + 1);
1047 return;
1048 }
1049
1050 if (s[nlen] == '\0')
1051 {
1052 /* match, with no parameter */
1053
1054 if (routine[i].flag & FLAG_R)
1055 {
1056 fprintf (stderr,
1057 "Choice %s bad: needs a \".<r>\" parameter\n",
1058 s_orig);
1059 exit (1);
1060 }
1061
1062 c->p = &routine[i];
1063 c->r = 0;
1064 return;
1065 }
1066 }
1067
1068 fprintf (stderr, "Choice %s unrecognised\n", s_orig);
1069 exit (1);
1070}
1071
1072
1073void
1074usage (void)
1075{
1076 int i;
1077
1078 speed_time_init ();
1079
1080 printf ("Usage: speed [-options] -s size <routine>...\n");
1081 printf ("Measure the speed of some routines.\n");
1082 printf ("Times are in seconds, accuracy is shown.\n");
1083 printf ("\n");
1084 printf (" -p num set precision as number of time units each routine must run\n");
1085 printf (" -s size[-end][,size[-end]]... sizes to measure\n");
1086 printf (" single sizes or ranges, sep with comma or use multiple -s\n");
1087 printf (" -t step step through sizes by given amount\n");
1088 printf (" -f factor step through sizes by given factor (eg. 1.05)\n");
1089 printf (" -r show times as ratios of the first routine\n");
1090 printf (" -d show times as difference from the first routine\n");
1091 printf (" -D show times as difference from previous size shown\n");
1092 printf (" -c show times in CPU cycles\n");
1093 printf (" -C show times in cycles per limb\n");
1094 printf (" -u print resource usage (memory) at end\n");
1095 printf (" -P name output plot files \"name.gnuplot\" and \"name.data\"\n");
1096 printf (" -a <type> use given data: random(default), random2, zeros, aas, ffs, 2fd\n");
1097 printf (" -x, -y, -w, -W <align> specify data alignments, sources and dests\n");
1098 printf (" -o addrs print addresses of data blocks\n");
1099 printf ("\n");
1100 printf ("If both -t and -f are used, it means step by the factor or the step, whichever\n");
1101 printf ("is greater.\n");
1102 printf ("If both -C and -D are used, it means cycles per however many limbs between a\n");
1103 printf ("size and the previous size.\n");
1104 printf ("\n");
1105 printf ("After running with -P, plots can be viewed with Gnuplot or Quickplot.\n");
1106 printf ("\"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n");
1107 printf ("a log/log plot).\n");
1108 printf ("\"quickplot -s name.data\" (has interactive zooming, and note -s is important\n");
1109 printf ("when viewing more than one routine, it means same axis scales for all data).\n");
1110 printf ("\n");
1111 printf ("The available routines are as follows.\n");
1112 printf ("\n");
1113
1114 for (i = 0; i < numberof (routine); i++)
1115 {
1116 if (routine[i].flag & FLAG_R)
1117 printf ("\t%s.r\n", routine[i].name);
1118 else if (routine[i].flag & FLAG_R_OPTIONAL)
1119 printf ("\t%s (optional .r)\n", routine[i].name);
1120 else
1121 printf ("\t%s\n", routine[i].name);
1122 }
1123 printf ("\n");
1124 printf ("Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n");
1125 printf ("r should be in decimal, or use 0xN for hexadecimal.\n");
1126 printf ("\n");
1127 printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n");
1128 printf ("N one bits, or \"aas\" for 0xAA..AA.\n");
1129 printf ("\n");
1130 printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n");
1131 printf ("The fastest routine at each size is marked with a # (free form output only).\n");
1132 printf ("\n");
1133 printf ("%s", speed_time_string);
1134 printf ("\n");
1135 printf ("Gnuplot home page http://www.gnuplot.info/\n");
1136 printf ("Quickplot home page http://quickplot.sourceforge.net/\n");
1137}
1138
1139void
1140check_align_option (const char *name, mp_size_t align)
1141{
1142 if (align < 0 || align > SPEED_TMP_ALLOC_ADJUST_MASK)
1143 {
1144 fprintf (stderr, "Alignment request out of range: %s %ld\n",
1145 name, (long) align);
1146 fprintf (stderr, " should be 0 to %d (limbs), inclusive\n",
1147 SPEED_TMP_ALLOC_ADJUST_MASK);
1148 exit (1);
1149 }
1150}
1151
1152int
1153main (int argc, char *argv[])
1154{
1155 int i;
1156 int opt;
1157
1158 /* Unbuffered so output goes straight out when directed to a pipe or file
1159 and isn't lost on killing the program half way. */
1160 setbuf (stdout, NULL);
1161
1162 for (;;)
1163 {
1164 opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z");
1165 if (opt == EOF)
1166 break;
1167
1168 switch (opt) {
1169 case 'a':
1170 if (strcmp (optarg, "random") == 0) option_data = DATA_RANDOM;
1171 else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2;
1172 else if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS;
1173 else if (strcmp (optarg, "aas") == 0) option_data = DATA_AAS;
1174 else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS;
1175 else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD;
1176 else
1177 {
1178 fprintf (stderr, "unrecognised data option: %s\n", optarg);
1179 exit (1);
1180 }
1181 break;
1182 case 'C':
1183 if (option_unit != UNIT_SECONDS) goto bad_unit;
1184 option_unit = UNIT_CYCLESPERLIMB;
1185 break;
1186 case 'c':
1187 if (option_unit != UNIT_SECONDS)
1188 {
1189 bad_unit:
1190 fprintf (stderr, "cannot use more than one of -c, -C\n");
1191 exit (1);
1192 }
1193 option_unit = UNIT_CYCLES;
1194 break;
1195 case 'D':
1196 if (option_cmp != CMP_ABSOLUTE) goto bad_cmp;
1197 option_cmp = CMP_DIFFPREV;
1198 break;
1199 case 'd':
1200 if (option_cmp != CMP_ABSOLUTE)
1201 {
1202 bad_cmp:
1203 fprintf (stderr, "cannot use more than one of -d, -D, -r\n");
1204 exit (1);
1205 }
1206 option_cmp = CMP_DIFFERENCE;
1207 break;
1208 case 'E':
1209 option_square = 1;
1210 break;
1211 case 'F':
1212 option_square = 2;
1213 break;
1214 case 'f':
1215 option_factor = atof (optarg);
1216 if (option_factor <= 1.0)
1217 {
1218 fprintf (stderr, "-f factor must be > 1.0\n");
1219 exit (1);
1220 }
1221 break;
1222 case 'o':
1223 speed_option_set (optarg);
1224 break;
1225 case 'P':
1226 option_gnuplot = 1;
1227 option_gnuplot_basename = optarg;
1228 break;
1229 case 'p':
1230 speed_precision = atoi (optarg);
1231 break;
1232 case 'R':
1233 option_seed = time (NULL);
1234 break;
1235 case 'r':
1236 if (option_cmp != CMP_ABSOLUTE)
1237 goto bad_cmp;
1238 option_cmp = CMP_RATIO;
1239 break;
1240 case 's':
1241 {
1242 char *s;
1243 for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ","))
1244 {
1245 if (size_num == size_allocnum)
1246 {
1247 size_array = (struct size_array_t *)
1248 __gmp_allocate_or_reallocate
1249 (size_array,
1250 size_allocnum * sizeof(size_array[0]),
1251 (size_allocnum+10) * sizeof(size_array[0]));
1252 size_allocnum += 10;
1253 }
1254 if (sscanf (s, "%ld-%ld",
1255 &size_array[size_num].start,
1256 &size_array[size_num].end) != 2)
1257 {
1258 size_array[size_num].start = size_array[size_num].end
1259 = atol (s);
1260 }
1261
1262 if (size_array[size_num].start < 0
1263 || size_array[size_num].end < 0
1264 || size_array[size_num].start > size_array[size_num].end)
1265 {
1266 fprintf (stderr, "invalid size parameter: %s\n", s);
1267 exit (1);
1268 }
1269
1270 size_num++;
1271 }
1272 }
1273 break;
1274 case 't':
1275 option_step = atol (optarg);
1276 if (option_step < 1)
1277 {
1278 fprintf (stderr, "-t step must be >= 1\n");
1279 exit (1);
1280 }
1281 break;
1282 case 'u':
1283 option_resource_usage = 1;
1284 break;
1285 case 'z':
1286 sp.cache = 1;
1287 break;
1288 case 'x':
1289 sp.align_xp = atol (optarg);
1290 check_align_option ("-x", sp.align_xp);
1291 break;
1292 case 'y':
1293 sp.align_yp = atol (optarg);
1294 check_align_option ("-y", sp.align_yp);
1295 break;
1296 case 'w':
1297 sp.align_wp = atol (optarg);
1298 check_align_option ("-w", sp.align_wp);
1299 break;
1300 case 'W':
1301 sp.align_wp2 = atol (optarg);
1302 check_align_option ("-W", sp.align_wp2);
1303 break;
1304 case '?':
1305 exit(1);
1306 }
1307 }
1308
1309 if (optind >= argc)
1310 {
1311 usage ();
1312 exit (1);
1313 }
1314
1315 if (size_num == 0)
1316 {
1317 fprintf (stderr, "-s <size> must be specified\n");
1318 exit (1);
1319 }
1320
1321 gmp_randinit_default (__gmp_rands);
1322 __gmp_rands_initialized = 1;
1323 gmp_randseed_ui (__gmp_rands, option_seed);
1324
1325 choice = (struct choice_t *) (*__gmp_allocate_func)
1326 ((argc - optind) * sizeof(choice[0]));
1327 for ( ; optind < argc; optind++)
1328 {
1329 struct choice_t c;
1330 routine_find (&c, argv[optind]);
1331 choice[num_choices] = c;
1332 num_choices++;
1333 }
1334
1335 if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) &&
1336 num_choices < 2)
1337 {
1338 fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n");
1339 }
1340
1341 speed_time_init ();
1342 if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB)
1343 speed_cycletime_need_cycles ();
1344 else
1345 speed_cycletime_need_seconds ();
1346
1347 if (option_gnuplot)
1348 {
1349 run_gnuplot (argc, argv);
1350 }
1351 else
1352 {
1353 if (option_unit == UNIT_SECONDS)
1354 printf ("overhead %.9f secs", speed_measure (speed_noop, NULL));
1355 else
1356 printf ("overhead %.2f cycles",
1357 speed_measure (speed_noop, NULL) / speed_cycletime);
1358 printf (", precision %d units of %.2e secs",
1359 speed_precision, speed_unittime);
1360
1361 if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
1362 printf (", CPU freq unknown\n");
1363 else
1364 printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime);
1365
1366 printf (" ");
1367 for (i = 0; i < num_choices; i++)
1368 printf (" %*s", COLUMN_WIDTH, choice[i].name);
1369 printf ("\n");
1370
1371 run_all (stdout);
1372 }
1373
1374 if (option_resource_usage)
1375 {
1376#if HAVE_GETRUSAGE
1377 {
1378 /* This doesn't give data sizes on linux 2.0.x, only utime. */
1379 struct rusage r;
1380 if (getrusage (RUSAGE_SELF, &r) != 0)
1381 perror ("getrusage");
1382 else
1383 printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n",
1384 r.ru_utime.tv_sec, r.ru_utime.tv_usec,
1385 r.ru_idrss, r.ru_isrss, r.ru_ixrss);
1386 }
1387#else
1388 printf ("getrusage() not available\n");
1389#endif
1390
1391 /* Linux kernel. */
1392 {
1393 char buf[128];
1394 sprintf (buf, "/proc/%d/status", getpid());
1395 if (access (buf, R_OK) == 0)
1396 {
1397 sprintf (buf, "cat /proc/%d/status", getpid());
1398 system (buf);
1399 }
1400
1401 }
1402 }
1403
1404 return 0;
1405}