Austin Schuh | dace2a6 | 2020-08-18 10:56:48 -0700 | [diff] [blame] | 1 | /* Speed measuring program. |
| 2 | |
| 3 | Copyright 1999-2003, 2005, 2006, 2008-2019 Free Software Foundation, Inc. |
| 4 | |
| 5 | This file is part of the GNU MP Library. |
| 6 | |
| 7 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 8 | it under the terms of either: |
| 9 | |
| 10 | * the GNU Lesser General Public License as published by the Free |
| 11 | Software Foundation; either version 3 of the License, or (at your |
| 12 | option) any later version. |
| 13 | |
| 14 | or |
| 15 | |
| 16 | * the GNU General Public License as published by the Free Software |
| 17 | Foundation; either version 2 of the License, or (at your option) any |
| 18 | later version. |
| 19 | |
| 20 | or both in parallel, as here. |
| 21 | |
| 22 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 23 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 24 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 25 | for more details. |
| 26 | |
| 27 | You should have received copies of the GNU General Public License and the |
| 28 | GNU Lesser General Public License along with the GNU MP Library. If not, |
| 29 | see https://www.gnu.org/licenses/. */ |
| 30 | |
| 31 | /* Usage message is in the code below, run with no arguments to print it. |
| 32 | See README for interesting applications. |
| 33 | |
| 34 | To add a new routine foo(), create a speed_foo() function in the style of |
| 35 | the existing ones and add an entry in the routine[] array. Put FLAG_R if |
| 36 | speed_foo() wants an "r" parameter. |
| 37 | |
| 38 | The routines don't have help messages or descriptions, but most have |
| 39 | suggestive names. See the source code for full details. |
| 40 | |
| 41 | */ |
| 42 | |
| 43 | #include "config.h" |
| 44 | |
| 45 | #include <limits.h> |
| 46 | #include <stdio.h> |
| 47 | #include <stdlib.h> |
| 48 | #include <string.h> |
| 49 | |
| 50 | #if HAVE_UNISTD_H |
| 51 | #include <unistd.h> /* for getpid, R_OK */ |
| 52 | #endif |
| 53 | |
| 54 | #if TIME_WITH_SYS_TIME |
| 55 | # include <sys/time.h> /* for struct timeval */ |
| 56 | # include <time.h> |
| 57 | #else |
| 58 | # if HAVE_SYS_TIME_H |
| 59 | # include <sys/time.h> |
| 60 | # else |
| 61 | # include <time.h> |
| 62 | # endif |
| 63 | #endif |
| 64 | |
| 65 | #if HAVE_SYS_RESOURCE_H |
| 66 | #include <sys/resource.h> /* for getrusage() */ |
| 67 | #endif |
| 68 | |
| 69 | |
| 70 | #include "gmp-impl.h" |
| 71 | #include "longlong.h" /* for the benefit of speed-many.c */ |
| 72 | #include "tests.h" |
| 73 | #include "speed.h" |
| 74 | |
| 75 | |
| 76 | #if !HAVE_DECL_OPTARG |
| 77 | extern char *optarg; |
| 78 | extern int optind, opterr; |
| 79 | #endif |
| 80 | |
| 81 | #if !HAVE_STRTOUL |
| 82 | #define strtoul(p,e,b) (unsigned long) strtol(p,e,b) |
| 83 | #endif |
| 84 | |
| 85 | #ifdef SPEED_EXTRA_PROTOS |
| 86 | SPEED_EXTRA_PROTOS |
| 87 | #endif |
| 88 | #ifdef SPEED_EXTRA_PROTOS2 |
| 89 | SPEED_EXTRA_PROTOS2 |
| 90 | #endif |
| 91 | |
| 92 | |
| 93 | #if GMP_LIMB_BITS == 32 |
| 94 | #define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK) |
| 95 | #endif |
| 96 | #if GMP_LIMB_BITS == 64 |
| 97 | #define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK) |
| 98 | #endif |
| 99 | |
| 100 | |
| 101 | #define CMP_ABSOLUTE 1 |
| 102 | #define CMP_RATIO 2 |
| 103 | #define CMP_DIFFERENCE 3 |
| 104 | #define CMP_DIFFPREV 4 |
| 105 | int option_cmp = CMP_ABSOLUTE; |
| 106 | |
| 107 | #define UNIT_SECONDS 1 |
| 108 | #define UNIT_CYCLES 2 |
| 109 | #define UNIT_CYCLESPERLIMB 3 |
| 110 | int option_unit = UNIT_SECONDS; |
| 111 | |
| 112 | #define DATA_RANDOM 1 |
| 113 | #define DATA_RANDOM2 2 |
| 114 | #define DATA_ZEROS 3 |
| 115 | #define DATA_AAS 4 |
| 116 | #define DATA_FFS 5 |
| 117 | #define DATA_2FD 6 |
| 118 | int option_data = DATA_RANDOM; |
| 119 | |
| 120 | int option_square = 0; |
| 121 | double option_factor = 0.0; |
| 122 | mp_size_t option_step = 1; |
| 123 | int option_gnuplot = 0; |
| 124 | char *option_gnuplot_basename; |
| 125 | struct size_array_t { |
| 126 | mp_size_t start, end; |
| 127 | } *size_array = NULL; |
| 128 | mp_size_t size_num = 0; |
| 129 | mp_size_t size_allocnum = 0; |
| 130 | int option_resource_usage = 0; |
| 131 | long option_seed = 123456789; |
| 132 | |
| 133 | struct speed_params sp; |
| 134 | |
| 135 | #define COLUMN_WIDTH 13 /* for the free-form output */ |
| 136 | |
| 137 | #define FLAG_R (1<<0) /* require ".r" */ |
| 138 | #define FLAG_R_OPTIONAL (1<<1) /* optional ".r" */ |
| 139 | #define FLAG_RSIZE (1<<2) |
| 140 | #define FLAG_NODATA (1<<3) /* don't alloc xp, yp */ |
| 141 | |
| 142 | const struct routine_t { |
| 143 | /* constants */ |
| 144 | const char *name; |
| 145 | speed_function_t fun; |
| 146 | int flag; |
| 147 | } routine[] = { |
| 148 | |
| 149 | { "noop", speed_noop }, |
| 150 | { "noop_wxs", speed_noop_wxs }, |
| 151 | { "noop_wxys", speed_noop_wxys }, |
| 152 | |
| 153 | { "mpn_add_n", speed_mpn_add_n, FLAG_R_OPTIONAL }, |
| 154 | { "mpn_sub_n", speed_mpn_sub_n, FLAG_R_OPTIONAL }, |
| 155 | { "mpn_add_1", speed_mpn_add_1, FLAG_R }, |
| 156 | { "mpn_add_1_inplace", speed_mpn_add_1_inplace, FLAG_R }, |
| 157 | { "mpn_sub_1", speed_mpn_sub_1, FLAG_R }, |
| 158 | { "mpn_sub_1_inplace", speed_mpn_sub_1_inplace, FLAG_R }, |
| 159 | |
| 160 | { "mpn_add_err1_n", speed_mpn_add_err1_n }, |
| 161 | { "mpn_add_err2_n", speed_mpn_add_err2_n }, |
| 162 | { "mpn_add_err3_n", speed_mpn_add_err3_n }, |
| 163 | { "mpn_sub_err1_n", speed_mpn_sub_err1_n }, |
| 164 | { "mpn_sub_err2_n", speed_mpn_sub_err2_n }, |
| 165 | { "mpn_sub_err3_n", speed_mpn_sub_err3_n }, |
| 166 | |
| 167 | #if HAVE_NATIVE_mpn_add_n_sub_n |
| 168 | { "mpn_add_n_sub_n", speed_mpn_add_n_sub_n, FLAG_R_OPTIONAL }, |
| 169 | #endif |
| 170 | |
| 171 | { "mpn_addmul_1", speed_mpn_addmul_1, FLAG_R }, |
| 172 | { "mpn_submul_1", speed_mpn_submul_1, FLAG_R }, |
| 173 | #if HAVE_NATIVE_mpn_addmul_2 |
| 174 | { "mpn_addmul_2", speed_mpn_addmul_2, FLAG_R_OPTIONAL }, |
| 175 | #endif |
| 176 | #if HAVE_NATIVE_mpn_addmul_3 |
| 177 | { "mpn_addmul_3", speed_mpn_addmul_3, FLAG_R_OPTIONAL }, |
| 178 | #endif |
| 179 | #if HAVE_NATIVE_mpn_addmul_4 |
| 180 | { "mpn_addmul_4", speed_mpn_addmul_4, FLAG_R_OPTIONAL }, |
| 181 | #endif |
| 182 | #if HAVE_NATIVE_mpn_addmul_5 |
| 183 | { "mpn_addmul_5", speed_mpn_addmul_5, FLAG_R_OPTIONAL }, |
| 184 | #endif |
| 185 | #if HAVE_NATIVE_mpn_addmul_6 |
| 186 | { "mpn_addmul_6", speed_mpn_addmul_6, FLAG_R_OPTIONAL }, |
| 187 | #endif |
| 188 | #if HAVE_NATIVE_mpn_addmul_7 |
| 189 | { "mpn_addmul_7", speed_mpn_addmul_7, FLAG_R_OPTIONAL }, |
| 190 | #endif |
| 191 | #if HAVE_NATIVE_mpn_addmul_8 |
| 192 | { "mpn_addmul_8", speed_mpn_addmul_8, FLAG_R_OPTIONAL }, |
| 193 | #endif |
| 194 | { "mpn_mul_1", speed_mpn_mul_1, FLAG_R }, |
| 195 | { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R }, |
| 196 | #if HAVE_NATIVE_mpn_mul_2 |
| 197 | { "mpn_mul_2", speed_mpn_mul_2, FLAG_R_OPTIONAL }, |
| 198 | #endif |
| 199 | #if HAVE_NATIVE_mpn_mul_3 |
| 200 | { "mpn_mul_3", speed_mpn_mul_3, FLAG_R_OPTIONAL }, |
| 201 | #endif |
| 202 | #if HAVE_NATIVE_mpn_mul_4 |
| 203 | { "mpn_mul_4", speed_mpn_mul_4, FLAG_R_OPTIONAL }, |
| 204 | #endif |
| 205 | #if HAVE_NATIVE_mpn_mul_5 |
| 206 | { "mpn_mul_5", speed_mpn_mul_5, FLAG_R_OPTIONAL }, |
| 207 | #endif |
| 208 | #if HAVE_NATIVE_mpn_mul_6 |
| 209 | { "mpn_mul_6", speed_mpn_mul_6, FLAG_R_OPTIONAL }, |
| 210 | #endif |
| 211 | |
| 212 | { "mpn_divrem_1", speed_mpn_divrem_1, FLAG_R }, |
| 213 | { "mpn_divrem_1f", speed_mpn_divrem_1f, FLAG_R }, |
| 214 | #if HAVE_NATIVE_mpn_divrem_1c |
| 215 | { "mpn_divrem_1c", speed_mpn_divrem_1c, FLAG_R }, |
| 216 | { "mpn_divrem_1cf", speed_mpn_divrem_1cf,FLAG_R }, |
| 217 | #endif |
| 218 | { "mpn_mod_1", speed_mpn_mod_1, FLAG_R }, |
| 219 | #if HAVE_NATIVE_mpn_mod_1c |
| 220 | { "mpn_mod_1c", speed_mpn_mod_1c, FLAG_R }, |
| 221 | #endif |
| 222 | { "mpn_preinv_divrem_1", speed_mpn_preinv_divrem_1, FLAG_R }, |
| 223 | { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R }, |
| 224 | { "mpn_preinv_mod_1", speed_mpn_preinv_mod_1, FLAG_R }, |
| 225 | |
| 226 | { "mpn_mod_1_1", speed_mpn_mod_1_1, FLAG_R }, |
| 227 | { "mpn_mod_1_1_1", speed_mpn_mod_1_1_1, FLAG_R }, |
| 228 | { "mpn_mod_1_1_2", speed_mpn_mod_1_1_2, FLAG_R }, |
| 229 | { "mpn_mod_1s_2", speed_mpn_mod_1_2, FLAG_R }, |
| 230 | { "mpn_mod_1s_3", speed_mpn_mod_1_3, FLAG_R }, |
| 231 | { "mpn_mod_1s_4", speed_mpn_mod_1_4, FLAG_R }, |
| 232 | |
| 233 | { "mpn_divrem_1_div", speed_mpn_divrem_1_div, FLAG_R }, |
| 234 | { "mpn_divrem_1_inv", speed_mpn_divrem_1_inv, FLAG_R }, |
| 235 | { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R }, |
| 236 | { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R }, |
| 237 | { "mpn_mod_1_div", speed_mpn_mod_1_div, FLAG_R }, |
| 238 | { "mpn_mod_1_inv", speed_mpn_mod_1_inv, FLAG_R }, |
| 239 | |
| 240 | { "mpn_divrem_2", speed_mpn_divrem_2, }, |
| 241 | { "mpn_divrem_2_div", speed_mpn_divrem_2_div, }, |
| 242 | { "mpn_divrem_2_inv", speed_mpn_divrem_2_inv, }, |
| 243 | |
| 244 | { "mpn_div_qr_1n_pi1", speed_mpn_div_qr_1n_pi1, FLAG_R }, |
| 245 | { "mpn_div_qr_1n_pi1_1",speed_mpn_div_qr_1n_pi1_1, FLAG_R }, |
| 246 | { "mpn_div_qr_1n_pi1_2",speed_mpn_div_qr_1n_pi1_2, FLAG_R }, |
| 247 | { "mpn_div_qr_1", speed_mpn_div_qr_1, FLAG_R }, |
| 248 | |
| 249 | { "mpn_div_qr_2n", speed_mpn_div_qr_2n, }, |
| 250 | { "mpn_div_qr_2u", speed_mpn_div_qr_2u, }, |
| 251 | |
| 252 | { "mpn_divexact_1", speed_mpn_divexact_1, FLAG_R }, |
| 253 | { "mpn_divexact_by3", speed_mpn_divexact_by3 }, |
| 254 | |
| 255 | { "mpn_bdiv_q_1", speed_mpn_bdiv_q_1, FLAG_R }, |
| 256 | { "mpn_pi1_bdiv_q_1", speed_mpn_pi1_bdiv_q_1, FLAG_R_OPTIONAL }, |
| 257 | { "mpn_bdiv_dbm1c", speed_mpn_bdiv_dbm1c, FLAG_R_OPTIONAL }, |
| 258 | |
| 259 | #if HAVE_NATIVE_mpn_modexact_1_odd |
| 260 | { "mpn_modexact_1_odd", speed_mpn_modexact_1_odd, FLAG_R }, |
| 261 | #endif |
| 262 | { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R }, |
| 263 | |
| 264 | #if GMP_NUMB_BITS % 4 == 0 |
| 265 | { "mpn_mod_34lsub1", speed_mpn_mod_34lsub1 }, |
| 266 | #endif |
| 267 | |
| 268 | { "mpn_lshift", speed_mpn_lshift, FLAG_R }, |
| 269 | { "mpn_lshiftc", speed_mpn_lshiftc, FLAG_R }, |
| 270 | { "mpn_rshift", speed_mpn_rshift, FLAG_R }, |
| 271 | |
| 272 | { "mpn_and_n", speed_mpn_and_n, FLAG_R_OPTIONAL }, |
| 273 | { "mpn_andn_n", speed_mpn_andn_n, FLAG_R_OPTIONAL }, |
| 274 | { "mpn_nand_n", speed_mpn_nand_n, FLAG_R_OPTIONAL }, |
| 275 | { "mpn_ior_n", speed_mpn_ior_n, FLAG_R_OPTIONAL }, |
| 276 | { "mpn_iorn_n", speed_mpn_iorn_n, FLAG_R_OPTIONAL }, |
| 277 | { "mpn_nior_n", speed_mpn_nior_n, FLAG_R_OPTIONAL }, |
| 278 | { "mpn_xor_n", speed_mpn_xor_n, FLAG_R_OPTIONAL }, |
| 279 | { "mpn_xnor_n", speed_mpn_xnor_n, FLAG_R_OPTIONAL }, |
| 280 | { "mpn_com", speed_mpn_com }, |
| 281 | { "mpn_neg", speed_mpn_neg }, |
| 282 | |
| 283 | { "mpn_popcount", speed_mpn_popcount }, |
| 284 | { "mpn_hamdist", speed_mpn_hamdist }, |
| 285 | |
| 286 | { "mpn_matrix22_mul", speed_mpn_matrix22_mul }, |
| 287 | |
| 288 | { "mpn_hgcd2", speed_mpn_hgcd2, FLAG_NODATA }, |
| 289 | { "mpn_hgcd2_1", speed_mpn_hgcd2_1, FLAG_NODATA }, |
| 290 | { "mpn_hgcd2_2", speed_mpn_hgcd2_2, FLAG_NODATA }, |
| 291 | { "mpn_hgcd2_3", speed_mpn_hgcd2_3, FLAG_NODATA }, |
| 292 | { "mpn_hgcd2_4", speed_mpn_hgcd2_4, FLAG_NODATA }, |
| 293 | { "mpn_hgcd2_5", speed_mpn_hgcd2_5, FLAG_NODATA }, |
| 294 | { "mpn_hgcd", speed_mpn_hgcd }, |
| 295 | { "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer }, |
| 296 | { "mpn_hgcd_appr", speed_mpn_hgcd_appr }, |
| 297 | { "mpn_hgcd_appr_lehmer", speed_mpn_hgcd_appr_lehmer }, |
| 298 | |
| 299 | { "mpn_hgcd_reduce", speed_mpn_hgcd_reduce }, |
| 300 | { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1 }, |
| 301 | { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2 }, |
| 302 | |
| 303 | { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL }, |
| 304 | { "mpn_gcd_11", speed_mpn_gcd_11, FLAG_R_OPTIONAL }, |
| 305 | { "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL }, |
| 306 | { "mpn_gcd_22", speed_mpn_gcd_22, FLAG_R_OPTIONAL }, |
| 307 | |
| 308 | { "mpn_gcd", speed_mpn_gcd }, |
| 309 | |
| 310 | { "mpn_gcdext", speed_mpn_gcdext }, |
| 311 | { "mpn_gcdext_single", speed_mpn_gcdext_single }, |
| 312 | { "mpn_gcdext_double", speed_mpn_gcdext_double }, |
| 313 | { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single }, |
| 314 | { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double }, |
| 315 | #if 0 |
| 316 | { "mpn_gcdext_lehmer", speed_mpn_gcdext_lehmer }, |
| 317 | #endif |
| 318 | |
| 319 | { "mpz_nextprime", speed_mpz_nextprime }, |
| 320 | |
| 321 | { "mpz_jacobi", speed_mpz_jacobi }, |
| 322 | { "mpn_jacobi_base", speed_mpn_jacobi_base }, |
| 323 | { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1 }, |
| 324 | { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2 }, |
| 325 | { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3 }, |
| 326 | { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4 }, |
| 327 | |
| 328 | { "mpn_mul", speed_mpn_mul, FLAG_R_OPTIONAL }, |
| 329 | { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_R_OPTIONAL }, |
| 330 | { "mpn_sqr_basecase", speed_mpn_sqr_basecase }, |
| 331 | #if HAVE_NATIVE_mpn_sqr_diagonal |
| 332 | { "mpn_sqr_diagonal", speed_mpn_sqr_diagonal }, |
| 333 | #endif |
| 334 | #if HAVE_NATIVE_mpn_sqr_diag_addlsh1 |
| 335 | { "mpn_sqr_diag_addlsh1", speed_mpn_sqr_diag_addlsh1 }, |
| 336 | #endif |
| 337 | |
| 338 | { "mpn_mul_n", speed_mpn_mul_n }, |
| 339 | { "mpn_sqr", speed_mpn_sqr }, |
| 340 | |
| 341 | { "mpn_toom2_sqr", speed_mpn_toom2_sqr }, |
| 342 | { "mpn_toom3_sqr", speed_mpn_toom3_sqr }, |
| 343 | { "mpn_toom4_sqr", speed_mpn_toom4_sqr }, |
| 344 | { "mpn_toom6_sqr", speed_mpn_toom6_sqr }, |
| 345 | { "mpn_toom8_sqr", speed_mpn_toom8_sqr }, |
| 346 | { "mpn_toom22_mul", speed_mpn_toom22_mul }, |
| 347 | { "mpn_toom33_mul", speed_mpn_toom33_mul }, |
| 348 | { "mpn_toom44_mul", speed_mpn_toom44_mul }, |
| 349 | { "mpn_toom6h_mul", speed_mpn_toom6h_mul }, |
| 350 | { "mpn_toom8h_mul", speed_mpn_toom8h_mul }, |
| 351 | { "mpn_toom32_mul", speed_mpn_toom32_mul }, |
| 352 | { "mpn_toom42_mul", speed_mpn_toom42_mul }, |
| 353 | { "mpn_toom43_mul", speed_mpn_toom43_mul }, |
| 354 | { "mpn_toom63_mul", speed_mpn_toom63_mul }, |
| 355 | { "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul }, |
| 356 | { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr}, |
| 357 | #if WANT_OLD_FFT_FULL |
| 358 | { "mpn_mul_fft_full", speed_mpn_mul_fft_full }, |
| 359 | { "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr }, |
| 360 | #endif |
| 361 | { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL }, |
| 362 | { "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL }, |
| 363 | |
| 364 | { "mpn_sqrlo", speed_mpn_sqrlo }, |
| 365 | { "mpn_sqrlo_basecase", speed_mpn_sqrlo_basecase }, |
| 366 | { "mpn_mullo_n", speed_mpn_mullo_n }, |
| 367 | { "mpn_mullo_basecase", speed_mpn_mullo_basecase }, |
| 368 | |
| 369 | { "mpn_mulmid_basecase", speed_mpn_mulmid_basecase, FLAG_R_OPTIONAL }, |
| 370 | { "mpn_toom42_mulmid", speed_mpn_toom42_mulmid }, |
| 371 | { "mpn_mulmid_n", speed_mpn_mulmid_n }, |
| 372 | { "mpn_mulmid", speed_mpn_mulmid, FLAG_R_OPTIONAL }, |
| 373 | |
| 374 | { "mpn_bc_mulmod_bnm1", speed_mpn_bc_mulmod_bnm1 }, |
| 375 | { "mpn_mulmod_bnm1", speed_mpn_mulmod_bnm1 }, |
| 376 | { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded }, |
| 377 | { "mpn_sqrmod_bnm1", speed_mpn_sqrmod_bnm1 }, |
| 378 | |
| 379 | { "mpn_invert", speed_mpn_invert }, |
| 380 | { "mpn_invertappr", speed_mpn_invertappr }, |
| 381 | { "mpn_ni_invertappr", speed_mpn_ni_invertappr }, |
| 382 | { "mpn_binvert", speed_mpn_binvert }, |
| 383 | { "mpn_sec_invert", speed_mpn_sec_invert }, |
| 384 | |
| 385 | { "mpn_sbpi1_div_qr", speed_mpn_sbpi1_div_qr, FLAG_R_OPTIONAL}, |
| 386 | { "mpn_dcpi1_div_qr", speed_mpn_dcpi1_div_qr, FLAG_R_OPTIONAL}, |
| 387 | { "mpn_mu_div_qr", speed_mpn_mu_div_qr, FLAG_R_OPTIONAL}, |
| 388 | { "mpn_mupi_div_qr", speed_mpn_mupi_div_qr, FLAG_R_OPTIONAL}, |
| 389 | { "mpn_sbpi1_divappr_q", speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL}, |
| 390 | { "mpn_dcpi1_divappr_q", speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL}, |
| 391 | |
| 392 | { "mpn_sbpi1_bdiv_qr", speed_mpn_sbpi1_bdiv_qr }, |
| 393 | { "mpn_dcpi1_bdiv_qr", speed_mpn_dcpi1_bdiv_qr }, |
| 394 | { "mpn_sbpi1_bdiv_q", speed_mpn_sbpi1_bdiv_q }, |
| 395 | { "mpn_dcpi1_bdiv_q", speed_mpn_dcpi1_bdiv_q }, |
| 396 | { "mpn_sbpi1_bdiv_r", speed_mpn_sbpi1_bdiv_r }, |
| 397 | |
| 398 | { "mpn_broot", speed_mpn_broot, FLAG_R }, |
| 399 | { "mpn_broot_invm1", speed_mpn_broot_invm1, FLAG_R }, |
| 400 | { "mpn_brootinv", speed_mpn_brootinv, FLAG_R }, |
| 401 | |
| 402 | { "mpn_get_str", speed_mpn_get_str, FLAG_R_OPTIONAL }, |
| 403 | { "mpn_set_str", speed_mpn_set_str, FLAG_R_OPTIONAL }, |
| 404 | { "mpn_set_str_basecase", speed_mpn_bc_set_str, FLAG_R_OPTIONAL }, |
| 405 | |
| 406 | { "mpn_sqrtrem", speed_mpn_sqrtrem }, |
| 407 | { "mpn_rootrem", speed_mpn_rootrem, FLAG_R }, |
| 408 | { "mpn_sqrt", speed_mpn_sqrt }, |
| 409 | { "mpn_root", speed_mpn_root, FLAG_R }, |
| 410 | |
| 411 | { "mpn_perfect_power_p", speed_mpn_perfect_power_p, }, |
| 412 | { "mpn_perfect_square_p", speed_mpn_perfect_square_p, }, |
| 413 | |
| 414 | { "mpn_fib2_ui", speed_mpn_fib2_ui, FLAG_NODATA }, |
| 415 | { "mpz_fib_ui", speed_mpz_fib_ui, FLAG_NODATA }, |
| 416 | { "mpz_fib2_ui", speed_mpz_fib2_ui, FLAG_NODATA }, |
| 417 | { "mpz_lucnum_ui", speed_mpz_lucnum_ui, FLAG_NODATA }, |
| 418 | { "mpz_lucnum2_ui", speed_mpz_lucnum2_ui, FLAG_NODATA }, |
| 419 | |
| 420 | { "mpz_add", speed_mpz_add }, |
| 421 | { "mpz_invert", speed_mpz_invert, FLAG_R_OPTIONAL }, |
| 422 | { "mpz_bin_uiui", speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL }, |
| 423 | { "mpz_bin_ui", speed_mpz_bin_ui, FLAG_NODATA | FLAG_R_OPTIONAL }, |
| 424 | { "mpz_fac_ui", speed_mpz_fac_ui, FLAG_NODATA }, |
| 425 | { "mpz_2fac_ui", speed_mpz_2fac_ui, FLAG_NODATA }, |
| 426 | { "mpz_mfac_uiui", speed_mpz_mfac_uiui, FLAG_NODATA | FLAG_R_OPTIONAL }, |
| 427 | { "mpz_primorial_ui", speed_mpz_primorial_ui, FLAG_NODATA }, |
| 428 | { "mpz_powm", speed_mpz_powm, FLAG_R_OPTIONAL }, |
| 429 | { "mpz_powm_mod", speed_mpz_powm_mod }, |
| 430 | { "mpz_powm_redc", speed_mpz_powm_redc }, |
| 431 | { "mpz_powm_sec", speed_mpz_powm_sec }, |
| 432 | { "mpz_powm_ui", speed_mpz_powm_ui, FLAG_R_OPTIONAL }, |
| 433 | |
| 434 | { "mpz_mod", speed_mpz_mod }, |
| 435 | { "mpn_redc_1", speed_mpn_redc_1 }, |
| 436 | { "mpn_redc_2", speed_mpn_redc_2 }, |
| 437 | { "mpn_redc_n", speed_mpn_redc_n }, |
| 438 | |
| 439 | { "MPN_COPY", speed_MPN_COPY }, |
| 440 | { "MPN_COPY_INCR", speed_MPN_COPY_INCR }, |
| 441 | { "MPN_COPY_DECR", speed_MPN_COPY_DECR }, |
| 442 | { "memcpy", speed_memcpy }, |
| 443 | #if HAVE_NATIVE_mpn_copyi |
| 444 | { "mpn_copyi", speed_mpn_copyi }, |
| 445 | #endif |
| 446 | #if HAVE_NATIVE_mpn_copyd |
| 447 | { "mpn_copyd", speed_mpn_copyd }, |
| 448 | #endif |
| 449 | { "mpn_sec_tabselect", speed_mpn_sec_tabselect, FLAG_R_OPTIONAL }, |
| 450 | #if HAVE_NATIVE_mpn_addlsh1_n == 1 |
| 451 | { "mpn_addlsh1_n", speed_mpn_addlsh1_n, FLAG_R_OPTIONAL }, |
| 452 | #endif |
| 453 | #if HAVE_NATIVE_mpn_sublsh1_n == 1 |
| 454 | { "mpn_sublsh1_n", speed_mpn_sublsh1_n, FLAG_R_OPTIONAL }, |
| 455 | #endif |
| 456 | #if HAVE_NATIVE_mpn_addlsh1_n_ip1 |
| 457 | { "mpn_addlsh1_n_ip1", speed_mpn_addlsh1_n_ip1 }, |
| 458 | #endif |
| 459 | #if HAVE_NATIVE_mpn_addlsh1_n_ip2 |
| 460 | { "mpn_addlsh1_n_ip2", speed_mpn_addlsh1_n_ip2 }, |
| 461 | #endif |
| 462 | #if HAVE_NATIVE_mpn_sublsh1_n_ip1 |
| 463 | { "mpn_sublsh1_n_ip1", speed_mpn_sublsh1_n_ip1 }, |
| 464 | #endif |
| 465 | #if HAVE_NATIVE_mpn_rsblsh1_n == 1 |
| 466 | { "mpn_rsblsh1_n", speed_mpn_rsblsh1_n, FLAG_R_OPTIONAL }, |
| 467 | #endif |
| 468 | #if HAVE_NATIVE_mpn_addlsh2_n == 1 |
| 469 | { "mpn_addlsh2_n", speed_mpn_addlsh2_n, FLAG_R_OPTIONAL }, |
| 470 | #endif |
| 471 | #if HAVE_NATIVE_mpn_sublsh2_n == 1 |
| 472 | { "mpn_sublsh2_n", speed_mpn_sublsh2_n, FLAG_R_OPTIONAL }, |
| 473 | #endif |
| 474 | #if HAVE_NATIVE_mpn_addlsh2_n_ip1 |
| 475 | { "mpn_addlsh2_n_ip1", speed_mpn_addlsh2_n_ip1 }, |
| 476 | #endif |
| 477 | #if HAVE_NATIVE_mpn_addlsh2_n_ip2 |
| 478 | { "mpn_addlsh2_n_ip2", speed_mpn_addlsh2_n_ip2 }, |
| 479 | #endif |
| 480 | #if HAVE_NATIVE_mpn_sublsh2_n_ip1 |
| 481 | { "mpn_sublsh2_n_ip1", speed_mpn_sublsh2_n_ip1 }, |
| 482 | #endif |
| 483 | #if HAVE_NATIVE_mpn_rsblsh2_n == 1 |
| 484 | { "mpn_rsblsh2_n", speed_mpn_rsblsh2_n, FLAG_R_OPTIONAL }, |
| 485 | #endif |
| 486 | #if HAVE_NATIVE_mpn_addlsh_n |
| 487 | { "mpn_addlsh_n", speed_mpn_addlsh_n, FLAG_R_OPTIONAL }, |
| 488 | #endif |
| 489 | #if HAVE_NATIVE_mpn_sublsh_n |
| 490 | { "mpn_sublsh_n", speed_mpn_sublsh_n, FLAG_R_OPTIONAL }, |
| 491 | #endif |
| 492 | #if HAVE_NATIVE_mpn_addlsh_n_ip1 |
| 493 | { "mpn_addlsh_n_ip1", speed_mpn_addlsh_n_ip1 }, |
| 494 | #endif |
| 495 | #if HAVE_NATIVE_mpn_addlsh_n_ip2 |
| 496 | { "mpn_addlsh_n_ip2", speed_mpn_addlsh_n_ip2 }, |
| 497 | #endif |
| 498 | #if HAVE_NATIVE_mpn_sublsh_n_ip1 |
| 499 | { "mpn_sublsh_n_ip1", speed_mpn_sublsh_n_ip1 }, |
| 500 | #endif |
| 501 | #if HAVE_NATIVE_mpn_rsblsh_n |
| 502 | { "mpn_rsblsh_n", speed_mpn_rsblsh_n, FLAG_R_OPTIONAL }, |
| 503 | #endif |
| 504 | #if HAVE_NATIVE_mpn_rsh1add_n |
| 505 | { "mpn_rsh1add_n", speed_mpn_rsh1add_n, FLAG_R_OPTIONAL }, |
| 506 | #endif |
| 507 | #if HAVE_NATIVE_mpn_rsh1sub_n |
| 508 | { "mpn_rsh1sub_n", speed_mpn_rsh1sub_n, FLAG_R_OPTIONAL }, |
| 509 | #endif |
| 510 | |
| 511 | { "mpn_cnd_add_n", speed_mpn_cnd_add_n, FLAG_R_OPTIONAL }, |
| 512 | { "mpn_cnd_sub_n", speed_mpn_cnd_sub_n, FLAG_R_OPTIONAL }, |
| 513 | |
| 514 | { "MPN_ZERO", speed_MPN_ZERO }, |
| 515 | |
| 516 | { "binvert_limb", speed_binvert_limb, FLAG_NODATA }, |
| 517 | { "binvert_limb_mul1", speed_binvert_limb_mul1, FLAG_NODATA }, |
| 518 | { "binvert_limb_loop", speed_binvert_limb_loop, FLAG_NODATA }, |
| 519 | { "binvert_limb_cond", speed_binvert_limb_cond, FLAG_NODATA }, |
| 520 | { "binvert_limb_arith", speed_binvert_limb_arith, FLAG_NODATA }, |
| 521 | |
| 522 | { "malloc_free", speed_malloc_free }, |
| 523 | { "malloc_realloc_free", speed_malloc_realloc_free }, |
| 524 | { "gmp_allocate_free", speed_gmp_allocate_free }, |
| 525 | { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free }, |
| 526 | { "mpz_init_clear", speed_mpz_init_clear }, |
| 527 | { "mpq_init_clear", speed_mpq_init_clear }, |
| 528 | { "mpf_init_clear", speed_mpf_init_clear }, |
| 529 | { "mpz_init_realloc_clear", speed_mpz_init_realloc_clear }, |
| 530 | |
| 531 | { "umul_ppmm", speed_umul_ppmm, FLAG_R_OPTIONAL }, |
| 532 | #if HAVE_NATIVE_mpn_umul_ppmm |
| 533 | { "mpn_umul_ppmm", speed_mpn_umul_ppmm, FLAG_R_OPTIONAL }, |
| 534 | #endif |
| 535 | #if HAVE_NATIVE_mpn_umul_ppmm_r |
| 536 | { "mpn_umul_ppmm_r", speed_mpn_umul_ppmm_r, FLAG_R_OPTIONAL }, |
| 537 | #endif |
| 538 | |
| 539 | { "count_leading_zeros", speed_count_leading_zeros, FLAG_NODATA | FLAG_R_OPTIONAL }, |
| 540 | { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL }, |
| 541 | |
| 542 | { "udiv_qrnnd", speed_udiv_qrnnd, FLAG_R_OPTIONAL }, |
| 543 | { "udiv_qrnnd_c", speed_udiv_qrnnd_c, FLAG_R_OPTIONAL }, |
| 544 | #if HAVE_NATIVE_mpn_udiv_qrnnd |
| 545 | { "mpn_udiv_qrnnd", speed_mpn_udiv_qrnnd, FLAG_R_OPTIONAL }, |
| 546 | #endif |
| 547 | #if HAVE_NATIVE_mpn_udiv_qrnnd_r |
| 548 | { "mpn_udiv_qrnnd_r", speed_mpn_udiv_qrnnd_r, FLAG_R_OPTIONAL }, |
| 549 | #endif |
| 550 | { "invert_limb", speed_invert_limb, FLAG_R_OPTIONAL }, |
| 551 | |
| 552 | { "operator_div", speed_operator_div, FLAG_R_OPTIONAL }, |
| 553 | { "operator_mod", speed_operator_mod, FLAG_R_OPTIONAL }, |
| 554 | |
| 555 | { "gmp_randseed", speed_gmp_randseed, FLAG_R_OPTIONAL }, |
| 556 | { "gmp_randseed_ui", speed_gmp_randseed_ui, FLAG_R_OPTIONAL | FLAG_NODATA }, |
| 557 | { "mpz_urandomb", speed_mpz_urandomb, FLAG_R_OPTIONAL | FLAG_NODATA }, |
| 558 | |
| 559 | #ifdef SPEED_EXTRA_ROUTINES |
| 560 | SPEED_EXTRA_ROUTINES |
| 561 | #endif |
| 562 | #ifdef SPEED_EXTRA_ROUTINES2 |
| 563 | SPEED_EXTRA_ROUTINES2 |
| 564 | #endif |
| 565 | }; |
| 566 | |
| 567 | |
| 568 | struct choice_t { |
| 569 | const struct routine_t *p; |
| 570 | mp_limb_t r; |
| 571 | double scale; |
| 572 | double time; |
| 573 | int no_time; |
| 574 | double prev_time; |
| 575 | const char *name; |
| 576 | }; |
| 577 | struct choice_t *choice; |
| 578 | int num_choices = 0; |
| 579 | |
| 580 | |
| 581 | void |
| 582 | data_fill (mp_ptr ptr, mp_size_t size) |
| 583 | { |
| 584 | switch (option_data) { |
| 585 | case DATA_RANDOM: |
| 586 | mpn_random (ptr, size); |
| 587 | break; |
| 588 | case DATA_RANDOM2: |
| 589 | mpn_random2 (ptr, size); |
| 590 | break; |
| 591 | case DATA_ZEROS: |
| 592 | MPN_ZERO (ptr, size); |
| 593 | break; |
| 594 | case DATA_AAS: |
| 595 | MPN_FILL (ptr, size, GMP_NUMB_0xAA); |
| 596 | break; |
| 597 | case DATA_FFS: |
| 598 | MPN_FILL (ptr, size, GMP_NUMB_MAX); |
| 599 | break; |
| 600 | case DATA_2FD: |
| 601 | MPN_FILL (ptr, size, GMP_NUMB_MAX); |
| 602 | ptr[0] -= 2; |
| 603 | break; |
| 604 | default: |
| 605 | abort(); |
| 606 | /*NOTREACHED*/ |
| 607 | } |
| 608 | } |
| 609 | |
| 610 | /* The code here handling the various combinations of output options isn't |
| 611 | too attractive, but it works and is fairly clean. */ |
| 612 | |
| 613 | #define SIZE_TO_DIVISOR(n) \ |
| 614 | (option_square == 1 ? (n)*(n) \ |
| 615 | : option_square == 2 ? (n)*((n)+1)/2 \ |
| 616 | : (n)) |
| 617 | |
| 618 | void |
| 619 | run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size) |
| 620 | { |
| 621 | const char *first_open_fastest, *first_open_notfastest, *first_close; |
| 622 | int i, fastest, want_data; |
| 623 | double fastest_time; |
| 624 | TMP_DECL; |
| 625 | |
| 626 | TMP_MARK; |
| 627 | |
| 628 | /* allocate data, unless all routines are NODATA */ |
| 629 | want_data = 0; |
| 630 | for (i = 0; i < num_choices; i++) |
| 631 | want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0); |
| 632 | |
| 633 | if (want_data) |
| 634 | { |
| 635 | SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp); |
| 636 | SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp); |
| 637 | |
| 638 | data_fill (s->xp, s->size); |
| 639 | data_fill (s->yp, s->size); |
| 640 | } |
| 641 | else |
| 642 | { |
| 643 | sp.xp = NULL; |
| 644 | sp.yp = NULL; |
| 645 | } |
| 646 | |
| 647 | if (prev_size == -1 && option_cmp == CMP_DIFFPREV) |
| 648 | { |
| 649 | first_open_fastest = "(#"; |
| 650 | first_open_notfastest = " ("; |
| 651 | first_close = ")"; |
| 652 | } |
| 653 | else |
| 654 | { |
| 655 | first_open_fastest = "#"; |
| 656 | first_open_notfastest = " "; |
| 657 | first_close = ""; |
| 658 | } |
| 659 | |
| 660 | fastest = -1; |
| 661 | fastest_time = -1.0; |
| 662 | for (i = 0; i < num_choices; i++) |
| 663 | { |
| 664 | s->r = choice[i].r; |
| 665 | choice[i].time = speed_measure (choice[i].p->fun, s); |
| 666 | choice[i].no_time = (choice[i].time == -1.0); |
| 667 | if (! choice[i].no_time) |
| 668 | choice[i].time *= choice[i].scale; |
| 669 | |
| 670 | /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time |
| 671 | is before any differences. */ |
| 672 | { |
| 673 | double t; |
| 674 | t = choice[i].time; |
| 675 | if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1) |
| 676 | { |
| 677 | if (choice[i].prev_time == -1.0) |
| 678 | choice[i].no_time = 1; |
| 679 | else |
| 680 | choice[i].time = choice[i].time - choice[i].prev_time; |
| 681 | } |
| 682 | choice[i].prev_time = t; |
| 683 | } |
| 684 | |
| 685 | if (choice[i].no_time) |
| 686 | continue; |
| 687 | |
| 688 | /* Look for the fastest after CMP_DIFFPREV has been applied, but |
| 689 | before CMP_RATIO or CMP_DIFFERENCE. There's only a fastest shown |
| 690 | if there's more than one routine. */ |
| 691 | if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time)) |
| 692 | { |
| 693 | fastest = i; |
| 694 | fastest_time = choice[i].time; |
| 695 | } |
| 696 | |
| 697 | if (option_cmp == CMP_DIFFPREV) |
| 698 | { |
| 699 | /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */ |
| 700 | if (option_unit == UNIT_CYCLES) |
| 701 | choice[i].time /= speed_cycletime; |
| 702 | else if (option_unit == UNIT_CYCLESPERLIMB) |
| 703 | { |
| 704 | if (prev_size == -1) |
| 705 | choice[i].time /= speed_cycletime; |
| 706 | else |
| 707 | choice[i].time /= (speed_cycletime |
| 708 | * (SIZE_TO_DIVISOR(s->size) |
| 709 | - SIZE_TO_DIVISOR(prev_size))); |
| 710 | } |
| 711 | } |
| 712 | else |
| 713 | { |
| 714 | if (option_unit == UNIT_CYCLES) |
| 715 | choice[i].time /= speed_cycletime; |
| 716 | else if (option_unit == UNIT_CYCLESPERLIMB) |
| 717 | choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size)); |
| 718 | |
| 719 | if (option_cmp == CMP_RATIO && i > 0) |
| 720 | { |
| 721 | /* A ratio isn't affected by the units chosen. */ |
| 722 | if (choice[0].no_time || choice[0].time == 0.0) |
| 723 | choice[i].no_time = 1; |
| 724 | else |
| 725 | choice[i].time /= choice[0].time; |
| 726 | } |
| 727 | else if (option_cmp == CMP_DIFFERENCE && i > 0) |
| 728 | { |
| 729 | if (choice[0].no_time) |
| 730 | { |
| 731 | choice[i].no_time = 1; |
| 732 | continue; |
| 733 | } |
| 734 | choice[i].time -= choice[0].time; |
| 735 | } |
| 736 | } |
| 737 | } |
| 738 | |
| 739 | if (option_gnuplot) |
| 740 | { |
| 741 | /* In CMP_DIFFPREV, don't print anything for the first size, start |
| 742 | with the second where an actual difference is available. |
| 743 | |
| 744 | In CMP_RATIO, print the first column as 1.0. |
| 745 | |
| 746 | The 9 decimals printed is much more than the expected precision of |
| 747 | the measurements actually. */ |
| 748 | |
| 749 | if (! (option_cmp == CMP_DIFFPREV && prev_size == -1)) |
| 750 | { |
| 751 | fprintf (fp, "%-6ld ", s->size); |
| 752 | for (i = 0; i < num_choices; i++) |
| 753 | fprintf (fp, " %.9e", |
| 754 | choice[i].no_time ? 0.0 |
| 755 | : (option_cmp == CMP_RATIO && i == 0) ? 1.0 |
| 756 | : choice[i].time); |
| 757 | fprintf (fp, "\n"); |
| 758 | } |
| 759 | } |
| 760 | else |
| 761 | { |
| 762 | fprintf (fp, "%-6ld ", s->size); |
| 763 | for (i = 0; i < num_choices; i++) |
| 764 | { |
| 765 | char buf[128]; |
| 766 | int decimals; |
| 767 | |
| 768 | if (choice[i].no_time) |
| 769 | { |
| 770 | fprintf (fp, " %*s", COLUMN_WIDTH, "n/a"); |
| 771 | } |
| 772 | else |
| 773 | {if (option_unit == UNIT_CYCLESPERLIMB |
| 774 | || (option_cmp == CMP_RATIO && i > 0)) |
| 775 | decimals = 4; |
| 776 | else if (option_unit == UNIT_CYCLES) |
| 777 | decimals = 2; |
| 778 | else |
| 779 | decimals = 9; |
| 780 | |
| 781 | sprintf (buf, "%s%.*f%s", |
| 782 | i == fastest ? first_open_fastest : first_open_notfastest, |
| 783 | decimals, choice[i].time, first_close); |
| 784 | fprintf (fp, " %*s", COLUMN_WIDTH, buf); |
| 785 | } |
| 786 | } |
| 787 | fprintf (fp, "\n"); |
| 788 | } |
| 789 | |
| 790 | TMP_FREE; |
| 791 | } |
| 792 | |
| 793 | void |
| 794 | run_all (FILE *fp) |
| 795 | { |
| 796 | mp_size_t prev_size; |
| 797 | int i; |
| 798 | TMP_DECL; |
| 799 | |
| 800 | TMP_MARK; |
| 801 | SPEED_TMP_ALLOC_LIMBS (sp.xp_block, SPEED_BLOCK_SIZE, sp.align_xp); |
| 802 | SPEED_TMP_ALLOC_LIMBS (sp.yp_block, SPEED_BLOCK_SIZE, sp.align_yp); |
| 803 | |
| 804 | data_fill (sp.xp_block, SPEED_BLOCK_SIZE); |
| 805 | data_fill (sp.yp_block, SPEED_BLOCK_SIZE); |
| 806 | |
| 807 | for (i = 0; i < size_num; i++) |
| 808 | { |
| 809 | sp.size = size_array[i].start; |
| 810 | prev_size = -1; |
| 811 | for (;;) |
| 812 | { |
| 813 | mp_size_t step; |
| 814 | |
| 815 | if (option_data == DATA_2FD && sp.size >= 2) |
| 816 | sp.xp[sp.size-1] = 2; |
| 817 | |
| 818 | run_one (fp, &sp, prev_size); |
| 819 | prev_size = sp.size; |
| 820 | |
| 821 | if (option_data == DATA_2FD && sp.size >= 2) |
| 822 | sp.xp[sp.size-1] = MP_LIMB_T_MAX; |
| 823 | |
| 824 | if (option_factor != 0.0) |
| 825 | { |
| 826 | step = (mp_size_t) (sp.size * option_factor - sp.size); |
| 827 | if (step < 1) |
| 828 | step = 1; |
| 829 | } |
| 830 | else |
| 831 | step = 1; |
| 832 | if (step < option_step) |
| 833 | step = option_step; |
| 834 | |
| 835 | sp.size += step; |
| 836 | if (sp.size > size_array[i].end) |
| 837 | break; |
| 838 | } |
| 839 | } |
| 840 | |
| 841 | TMP_FREE; |
| 842 | } |
| 843 | |
| 844 | |
| 845 | FILE * |
| 846 | fopen_for_write (const char *filename) |
| 847 | { |
| 848 | FILE *fp; |
| 849 | if ((fp = fopen (filename, "w")) == NULL) |
| 850 | { |
| 851 | fprintf (stderr, "Cannot create %s\n", filename); |
| 852 | exit(1); |
| 853 | } |
| 854 | return fp; |
| 855 | } |
| 856 | |
| 857 | void |
| 858 | fclose_written (FILE *fp, const char *filename) |
| 859 | { |
| 860 | int err; |
| 861 | |
| 862 | err = ferror (fp); |
| 863 | err |= fclose (fp); |
| 864 | |
| 865 | if (err) |
| 866 | { |
| 867 | fprintf (stderr, "Error writing %s\n", filename); |
| 868 | exit(1); |
| 869 | } |
| 870 | } |
| 871 | |
| 872 | |
| 873 | void |
| 874 | run_gnuplot (int argc, char *argv[]) |
| 875 | { |
| 876 | char *plot_filename; |
| 877 | char *data_filename; |
| 878 | FILE *fp; |
| 879 | int i; |
| 880 | |
| 881 | plot_filename = (char *) (*__gmp_allocate_func) |
| 882 | (strlen (option_gnuplot_basename) + 20); |
| 883 | data_filename = (char *) (*__gmp_allocate_func) |
| 884 | (strlen (option_gnuplot_basename) + 20); |
| 885 | |
| 886 | sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename); |
| 887 | sprintf (data_filename, "%s.data", option_gnuplot_basename); |
| 888 | |
| 889 | fp = fopen_for_write (plot_filename); |
| 890 | |
| 891 | fprintf (fp, "# Generated with:\n"); |
| 892 | fprintf (fp, "#"); |
| 893 | for (i = 0; i < argc; i++) |
| 894 | fprintf (fp, " %s", argv[i]); |
| 895 | fprintf (fp, "\n"); |
| 896 | fprintf (fp, "\n"); |
| 897 | |
| 898 | fprintf (fp, "reset\n"); |
| 899 | |
| 900 | /* Putting the key at the top left is usually good, and you can change it |
| 901 | interactively if it's not. */ |
| 902 | fprintf (fp, "set key left\n"); |
| 903 | |
| 904 | /* write underscores, not subscripts */ |
| 905 | fprintf (fp, "set termoption noenhanced\n"); |
| 906 | |
| 907 | /* designed to make it possible to see crossovers easily */ |
| 908 | fprintf (fp, "set style data lines\n"); |
| 909 | |
| 910 | fprintf (fp, "plot "); |
| 911 | for (i = 0; i < num_choices; i++) |
| 912 | { |
| 913 | fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2); |
| 914 | fprintf (fp, " title \"%s\"", choice[i].name); |
| 915 | |
| 916 | if (i != num_choices-1) |
| 917 | fprintf (fp, ", \\"); |
| 918 | fprintf (fp, "\n"); |
| 919 | } |
| 920 | |
| 921 | fprintf (fp, "load \"-\"\n"); |
| 922 | fclose_written (fp, plot_filename); |
| 923 | |
| 924 | fp = fopen_for_write (data_filename); |
| 925 | |
| 926 | /* Unbuffered so you can see where the program was up to if it crashes or |
| 927 | you kill it. */ |
| 928 | setbuf (fp, NULL); |
| 929 | |
| 930 | run_all (fp); |
| 931 | fclose_written (fp, data_filename); |
| 932 | } |
| 933 | |
| 934 | |
| 935 | /* Return a limb with n many one bits (starting from the least significant) */ |
| 936 | |
| 937 | #define LIMB_ONES(n) \ |
| 938 | ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX \ |
| 939 | : (n) == 0 ? CNST_LIMB(0) \ |
| 940 | : (CNST_LIMB(1) << (n)) - 1) |
| 941 | |
| 942 | mp_limb_t |
| 943 | r_string (const char *s) |
| 944 | { |
| 945 | const char *s_orig = s; |
| 946 | long n; |
| 947 | |
| 948 | if (strcmp (s, "aas") == 0) |
| 949 | return GMP_NUMB_0xAA; |
| 950 | |
| 951 | { |
| 952 | mpz_t z; |
| 953 | mp_limb_t l; |
| 954 | int set, siz; |
| 955 | |
| 956 | mpz_init (z); |
| 957 | set = mpz_set_str (z, s, 0); |
| 958 | siz = SIZ(z); |
| 959 | l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]); |
| 960 | mpz_clear (z); |
| 961 | if (set == 0) |
| 962 | { |
| 963 | if (siz > 1 || siz < -1) |
| 964 | printf ("Warning, r parameter %s truncated to %d bits\n", |
| 965 | s_orig, GMP_LIMB_BITS); |
| 966 | return l; |
| 967 | } |
| 968 | } |
| 969 | |
| 970 | if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) |
| 971 | n = strtoul (s+2, (char **) &s, 16); |
| 972 | else |
| 973 | n = strtol (s, (char **) &s, 10); |
| 974 | |
| 975 | if (strcmp (s, "bits") == 0) |
| 976 | { |
| 977 | mp_limb_t l; |
| 978 | if (n > GMP_LIMB_BITS) |
| 979 | { |
| 980 | fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n", |
| 981 | n, GMP_LIMB_BITS); |
| 982 | exit (1); |
| 983 | } |
| 984 | mpn_random (&l, 1); |
| 985 | return (l | (CNST_LIMB(1) << (n-1))) & LIMB_ONES(n); |
| 986 | } |
| 987 | else if (strcmp (s, "ones") == 0) |
| 988 | { |
| 989 | if (n > GMP_LIMB_BITS) |
| 990 | { |
| 991 | fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n", |
| 992 | n, GMP_LIMB_BITS); |
| 993 | exit (1); |
| 994 | } |
| 995 | return LIMB_ONES (n); |
| 996 | } |
| 997 | else if (*s != '\0') |
| 998 | { |
| 999 | fprintf (stderr, "invalid r parameter: %s\n", s_orig); |
| 1000 | exit (1); |
| 1001 | } |
| 1002 | |
| 1003 | return n; |
| 1004 | } |
| 1005 | |
| 1006 | |
| 1007 | void |
| 1008 | routine_find (struct choice_t *c, const char *s_orig) |
| 1009 | { |
| 1010 | const char *s; |
| 1011 | int i; |
| 1012 | size_t nlen; |
| 1013 | |
| 1014 | c->name = s_orig; |
| 1015 | s = strchr (s_orig, '*'); |
| 1016 | if (s != NULL) |
| 1017 | { |
| 1018 | c->scale = atof(s_orig); |
| 1019 | s++; |
| 1020 | } |
| 1021 | else |
| 1022 | { |
| 1023 | c->scale = 1.0; |
| 1024 | s = s_orig; |
| 1025 | } |
| 1026 | |
| 1027 | for (i = 0; i < numberof (routine); i++) |
| 1028 | { |
| 1029 | nlen = strlen (routine[i].name); |
| 1030 | if (memcmp (s, routine[i].name, nlen) != 0) |
| 1031 | continue; |
| 1032 | |
| 1033 | if (s[nlen] == '.') |
| 1034 | { |
| 1035 | /* match, with a .r parameter */ |
| 1036 | |
| 1037 | if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL))) |
| 1038 | { |
| 1039 | fprintf (stderr, |
| 1040 | "Choice %s bad: doesn't take a \".<r>\" parameter\n", |
| 1041 | s_orig); |
| 1042 | exit (1); |
| 1043 | } |
| 1044 | |
| 1045 | c->p = &routine[i]; |
| 1046 | c->r = r_string (s + nlen + 1); |
| 1047 | return; |
| 1048 | } |
| 1049 | |
| 1050 | if (s[nlen] == '\0') |
| 1051 | { |
| 1052 | /* match, with no parameter */ |
| 1053 | |
| 1054 | if (routine[i].flag & FLAG_R) |
| 1055 | { |
| 1056 | fprintf (stderr, |
| 1057 | "Choice %s bad: needs a \".<r>\" parameter\n", |
| 1058 | s_orig); |
| 1059 | exit (1); |
| 1060 | } |
| 1061 | |
| 1062 | c->p = &routine[i]; |
| 1063 | c->r = 0; |
| 1064 | return; |
| 1065 | } |
| 1066 | } |
| 1067 | |
| 1068 | fprintf (stderr, "Choice %s unrecognised\n", s_orig); |
| 1069 | exit (1); |
| 1070 | } |
| 1071 | |
| 1072 | |
| 1073 | void |
| 1074 | usage (void) |
| 1075 | { |
| 1076 | int i; |
| 1077 | |
| 1078 | speed_time_init (); |
| 1079 | |
| 1080 | printf ("Usage: speed [-options] -s size <routine>...\n"); |
| 1081 | printf ("Measure the speed of some routines.\n"); |
| 1082 | printf ("Times are in seconds, accuracy is shown.\n"); |
| 1083 | printf ("\n"); |
| 1084 | printf (" -p num set precision as number of time units each routine must run\n"); |
| 1085 | printf (" -s size[-end][,size[-end]]... sizes to measure\n"); |
| 1086 | printf (" single sizes or ranges, sep with comma or use multiple -s\n"); |
| 1087 | printf (" -t step step through sizes by given amount\n"); |
| 1088 | printf (" -f factor step through sizes by given factor (eg. 1.05)\n"); |
| 1089 | printf (" -r show times as ratios of the first routine\n"); |
| 1090 | printf (" -d show times as difference from the first routine\n"); |
| 1091 | printf (" -D show times as difference from previous size shown\n"); |
| 1092 | printf (" -c show times in CPU cycles\n"); |
| 1093 | printf (" -C show times in cycles per limb\n"); |
| 1094 | printf (" -u print resource usage (memory) at end\n"); |
| 1095 | printf (" -P name output plot files \"name.gnuplot\" and \"name.data\"\n"); |
| 1096 | printf (" -a <type> use given data: random(default), random2, zeros, aas, ffs, 2fd\n"); |
| 1097 | printf (" -x, -y, -w, -W <align> specify data alignments, sources and dests\n"); |
| 1098 | printf (" -o addrs print addresses of data blocks\n"); |
| 1099 | printf ("\n"); |
| 1100 | printf ("If both -t and -f are used, it means step by the factor or the step, whichever\n"); |
| 1101 | printf ("is greater.\n"); |
| 1102 | printf ("If both -C and -D are used, it means cycles per however many limbs between a\n"); |
| 1103 | printf ("size and the previous size.\n"); |
| 1104 | printf ("\n"); |
| 1105 | printf ("After running with -P, plots can be viewed with Gnuplot or Quickplot.\n"); |
| 1106 | printf ("\"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n"); |
| 1107 | printf ("a log/log plot).\n"); |
| 1108 | printf ("\"quickplot -s name.data\" (has interactive zooming, and note -s is important\n"); |
| 1109 | printf ("when viewing more than one routine, it means same axis scales for all data).\n"); |
| 1110 | printf ("\n"); |
| 1111 | printf ("The available routines are as follows.\n"); |
| 1112 | printf ("\n"); |
| 1113 | |
| 1114 | for (i = 0; i < numberof (routine); i++) |
| 1115 | { |
| 1116 | if (routine[i].flag & FLAG_R) |
| 1117 | printf ("\t%s.r\n", routine[i].name); |
| 1118 | else if (routine[i].flag & FLAG_R_OPTIONAL) |
| 1119 | printf ("\t%s (optional .r)\n", routine[i].name); |
| 1120 | else |
| 1121 | printf ("\t%s\n", routine[i].name); |
| 1122 | } |
| 1123 | printf ("\n"); |
| 1124 | printf ("Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n"); |
| 1125 | printf ("r should be in decimal, or use 0xN for hexadecimal.\n"); |
| 1126 | printf ("\n"); |
| 1127 | printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n"); |
| 1128 | printf ("N one bits, or \"aas\" for 0xAA..AA.\n"); |
| 1129 | printf ("\n"); |
| 1130 | printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n"); |
| 1131 | printf ("The fastest routine at each size is marked with a # (free form output only).\n"); |
| 1132 | printf ("\n"); |
| 1133 | printf ("%s", speed_time_string); |
| 1134 | printf ("\n"); |
| 1135 | printf ("Gnuplot home page http://www.gnuplot.info/\n"); |
| 1136 | printf ("Quickplot home page http://quickplot.sourceforge.net/\n"); |
| 1137 | } |
| 1138 | |
| 1139 | void |
| 1140 | check_align_option (const char *name, mp_size_t align) |
| 1141 | { |
| 1142 | if (align < 0 || align > SPEED_TMP_ALLOC_ADJUST_MASK) |
| 1143 | { |
| 1144 | fprintf (stderr, "Alignment request out of range: %s %ld\n", |
| 1145 | name, (long) align); |
| 1146 | fprintf (stderr, " should be 0 to %d (limbs), inclusive\n", |
| 1147 | SPEED_TMP_ALLOC_ADJUST_MASK); |
| 1148 | exit (1); |
| 1149 | } |
| 1150 | } |
| 1151 | |
| 1152 | int |
| 1153 | main (int argc, char *argv[]) |
| 1154 | { |
| 1155 | int i; |
| 1156 | int opt; |
| 1157 | |
| 1158 | /* Unbuffered so output goes straight out when directed to a pipe or file |
| 1159 | and isn't lost on killing the program half way. */ |
| 1160 | setbuf (stdout, NULL); |
| 1161 | |
| 1162 | for (;;) |
| 1163 | { |
| 1164 | opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z"); |
| 1165 | if (opt == EOF) |
| 1166 | break; |
| 1167 | |
| 1168 | switch (opt) { |
| 1169 | case 'a': |
| 1170 | if (strcmp (optarg, "random") == 0) option_data = DATA_RANDOM; |
| 1171 | else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2; |
| 1172 | else if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS; |
| 1173 | else if (strcmp (optarg, "aas") == 0) option_data = DATA_AAS; |
| 1174 | else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS; |
| 1175 | else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD; |
| 1176 | else |
| 1177 | { |
| 1178 | fprintf (stderr, "unrecognised data option: %s\n", optarg); |
| 1179 | exit (1); |
| 1180 | } |
| 1181 | break; |
| 1182 | case 'C': |
| 1183 | if (option_unit != UNIT_SECONDS) goto bad_unit; |
| 1184 | option_unit = UNIT_CYCLESPERLIMB; |
| 1185 | break; |
| 1186 | case 'c': |
| 1187 | if (option_unit != UNIT_SECONDS) |
| 1188 | { |
| 1189 | bad_unit: |
| 1190 | fprintf (stderr, "cannot use more than one of -c, -C\n"); |
| 1191 | exit (1); |
| 1192 | } |
| 1193 | option_unit = UNIT_CYCLES; |
| 1194 | break; |
| 1195 | case 'D': |
| 1196 | if (option_cmp != CMP_ABSOLUTE) goto bad_cmp; |
| 1197 | option_cmp = CMP_DIFFPREV; |
| 1198 | break; |
| 1199 | case 'd': |
| 1200 | if (option_cmp != CMP_ABSOLUTE) |
| 1201 | { |
| 1202 | bad_cmp: |
| 1203 | fprintf (stderr, "cannot use more than one of -d, -D, -r\n"); |
| 1204 | exit (1); |
| 1205 | } |
| 1206 | option_cmp = CMP_DIFFERENCE; |
| 1207 | break; |
| 1208 | case 'E': |
| 1209 | option_square = 1; |
| 1210 | break; |
| 1211 | case 'F': |
| 1212 | option_square = 2; |
| 1213 | break; |
| 1214 | case 'f': |
| 1215 | option_factor = atof (optarg); |
| 1216 | if (option_factor <= 1.0) |
| 1217 | { |
| 1218 | fprintf (stderr, "-f factor must be > 1.0\n"); |
| 1219 | exit (1); |
| 1220 | } |
| 1221 | break; |
| 1222 | case 'o': |
| 1223 | speed_option_set (optarg); |
| 1224 | break; |
| 1225 | case 'P': |
| 1226 | option_gnuplot = 1; |
| 1227 | option_gnuplot_basename = optarg; |
| 1228 | break; |
| 1229 | case 'p': |
| 1230 | speed_precision = atoi (optarg); |
| 1231 | break; |
| 1232 | case 'R': |
| 1233 | option_seed = time (NULL); |
| 1234 | break; |
| 1235 | case 'r': |
| 1236 | if (option_cmp != CMP_ABSOLUTE) |
| 1237 | goto bad_cmp; |
| 1238 | option_cmp = CMP_RATIO; |
| 1239 | break; |
| 1240 | case 's': |
| 1241 | { |
| 1242 | char *s; |
| 1243 | for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ",")) |
| 1244 | { |
| 1245 | if (size_num == size_allocnum) |
| 1246 | { |
| 1247 | size_array = (struct size_array_t *) |
| 1248 | __gmp_allocate_or_reallocate |
| 1249 | (size_array, |
| 1250 | size_allocnum * sizeof(size_array[0]), |
| 1251 | (size_allocnum+10) * sizeof(size_array[0])); |
| 1252 | size_allocnum += 10; |
| 1253 | } |
| 1254 | if (sscanf (s, "%ld-%ld", |
| 1255 | &size_array[size_num].start, |
| 1256 | &size_array[size_num].end) != 2) |
| 1257 | { |
| 1258 | size_array[size_num].start = size_array[size_num].end |
| 1259 | = atol (s); |
| 1260 | } |
| 1261 | |
| 1262 | if (size_array[size_num].start < 0 |
| 1263 | || size_array[size_num].end < 0 |
| 1264 | || size_array[size_num].start > size_array[size_num].end) |
| 1265 | { |
| 1266 | fprintf (stderr, "invalid size parameter: %s\n", s); |
| 1267 | exit (1); |
| 1268 | } |
| 1269 | |
| 1270 | size_num++; |
| 1271 | } |
| 1272 | } |
| 1273 | break; |
| 1274 | case 't': |
| 1275 | option_step = atol (optarg); |
| 1276 | if (option_step < 1) |
| 1277 | { |
| 1278 | fprintf (stderr, "-t step must be >= 1\n"); |
| 1279 | exit (1); |
| 1280 | } |
| 1281 | break; |
| 1282 | case 'u': |
| 1283 | option_resource_usage = 1; |
| 1284 | break; |
| 1285 | case 'z': |
| 1286 | sp.cache = 1; |
| 1287 | break; |
| 1288 | case 'x': |
| 1289 | sp.align_xp = atol (optarg); |
| 1290 | check_align_option ("-x", sp.align_xp); |
| 1291 | break; |
| 1292 | case 'y': |
| 1293 | sp.align_yp = atol (optarg); |
| 1294 | check_align_option ("-y", sp.align_yp); |
| 1295 | break; |
| 1296 | case 'w': |
| 1297 | sp.align_wp = atol (optarg); |
| 1298 | check_align_option ("-w", sp.align_wp); |
| 1299 | break; |
| 1300 | case 'W': |
| 1301 | sp.align_wp2 = atol (optarg); |
| 1302 | check_align_option ("-W", sp.align_wp2); |
| 1303 | break; |
| 1304 | case '?': |
| 1305 | exit(1); |
| 1306 | } |
| 1307 | } |
| 1308 | |
| 1309 | if (optind >= argc) |
| 1310 | { |
| 1311 | usage (); |
| 1312 | exit (1); |
| 1313 | } |
| 1314 | |
| 1315 | if (size_num == 0) |
| 1316 | { |
| 1317 | fprintf (stderr, "-s <size> must be specified\n"); |
| 1318 | exit (1); |
| 1319 | } |
| 1320 | |
| 1321 | gmp_randinit_default (__gmp_rands); |
| 1322 | __gmp_rands_initialized = 1; |
| 1323 | gmp_randseed_ui (__gmp_rands, option_seed); |
| 1324 | |
| 1325 | choice = (struct choice_t *) (*__gmp_allocate_func) |
| 1326 | ((argc - optind) * sizeof(choice[0])); |
| 1327 | for ( ; optind < argc; optind++) |
| 1328 | { |
| 1329 | struct choice_t c; |
| 1330 | routine_find (&c, argv[optind]); |
| 1331 | choice[num_choices] = c; |
| 1332 | num_choices++; |
| 1333 | } |
| 1334 | |
| 1335 | if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) && |
| 1336 | num_choices < 2) |
| 1337 | { |
| 1338 | fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n"); |
| 1339 | } |
| 1340 | |
| 1341 | speed_time_init (); |
| 1342 | if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB) |
| 1343 | speed_cycletime_need_cycles (); |
| 1344 | else |
| 1345 | speed_cycletime_need_seconds (); |
| 1346 | |
| 1347 | if (option_gnuplot) |
| 1348 | { |
| 1349 | run_gnuplot (argc, argv); |
| 1350 | } |
| 1351 | else |
| 1352 | { |
| 1353 | if (option_unit == UNIT_SECONDS) |
| 1354 | printf ("overhead %.9f secs", speed_measure (speed_noop, NULL)); |
| 1355 | else |
| 1356 | printf ("overhead %.2f cycles", |
| 1357 | speed_measure (speed_noop, NULL) / speed_cycletime); |
| 1358 | printf (", precision %d units of %.2e secs", |
| 1359 | speed_precision, speed_unittime); |
| 1360 | |
| 1361 | if (speed_cycletime == 1.0 || speed_cycletime == 0.0) |
| 1362 | printf (", CPU freq unknown\n"); |
| 1363 | else |
| 1364 | printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime); |
| 1365 | |
| 1366 | printf (" "); |
| 1367 | for (i = 0; i < num_choices; i++) |
| 1368 | printf (" %*s", COLUMN_WIDTH, choice[i].name); |
| 1369 | printf ("\n"); |
| 1370 | |
| 1371 | run_all (stdout); |
| 1372 | } |
| 1373 | |
| 1374 | if (option_resource_usage) |
| 1375 | { |
| 1376 | #if HAVE_GETRUSAGE |
| 1377 | { |
| 1378 | /* This doesn't give data sizes on linux 2.0.x, only utime. */ |
| 1379 | struct rusage r; |
| 1380 | if (getrusage (RUSAGE_SELF, &r) != 0) |
| 1381 | perror ("getrusage"); |
| 1382 | else |
| 1383 | printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n", |
| 1384 | r.ru_utime.tv_sec, r.ru_utime.tv_usec, |
| 1385 | r.ru_idrss, r.ru_isrss, r.ru_ixrss); |
| 1386 | } |
| 1387 | #else |
| 1388 | printf ("getrusage() not available\n"); |
| 1389 | #endif |
| 1390 | |
| 1391 | /* Linux kernel. */ |
| 1392 | { |
| 1393 | char buf[128]; |
| 1394 | sprintf (buf, "/proc/%d/status", getpid()); |
| 1395 | if (access (buf, R_OK) == 0) |
| 1396 | { |
| 1397 | sprintf (buf, "cat /proc/%d/status", getpid()); |
| 1398 | system (buf); |
| 1399 | } |
| 1400 | |
| 1401 | } |
| 1402 | } |
| 1403 | |
| 1404 | return 0; |
| 1405 | } |