blob: 9060c3d0509db0c39df65207106564cdaf67e4d1 [file] [log] [blame]
Austin Schuhdace2a62020-08-18 10:56:48 -07001/* Time routines for speed measurements.
2
3Copyright 1999-2004, 2010-2012 Free Software Foundation, Inc.
4
5This file is part of the GNU MP Library.
6
7The GNU MP Library is free software; you can redistribute it and/or modify
8it under the terms of either:
9
10 * the GNU Lesser General Public License as published by the Free
11 Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13
14or
15
16 * the GNU General Public License as published by the Free Software
17 Foundation; either version 2 of the License, or (at your option) any
18 later version.
19
20or both in parallel, as here.
21
22The GNU MP Library is distributed in the hope that it will be useful, but
23WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25for more details.
26
27You should have received copies of the GNU General Public License and the
28GNU Lesser General Public License along with the GNU MP Library. If not,
29see https://www.gnu.org/licenses/. */
30
31
32/* Usage:
33
34 The code in this file implements the lowest level of time measuring,
35 simple one-time measuring of time between two points.
36
37 void speed_starttime (void)
38 double speed_endtime (void)
39 Call speed_starttime to start measuring, and then call speed_endtime
40 when done.
41
42 speed_endtime returns the time taken, in seconds. Or if the timebase
43 is in CPU cycles and the CPU frequency is unknown then speed_endtime
44 returns cycles. Applications can identify the cycles return by
45 checking for speed_cycletime (described below) equal to 1.0.
46
47 If some sort of temporary glitch occurs then speed_endtime returns
48 0.0. Currently this is for various cases where a negative time has
49 occurred. This unfortunately occurs with getrusage on some systems,
50 and with the hppa cycle counter on hpux.
51
52 double speed_cycletime
53 The time in seconds for each CPU cycle. For example on a 100 MHz CPU
54 this would be 1.0e-8.
55
56 If the CPU frequency is unknown, then speed_cycletime is either 0.0
57 or 1.0. It's 0.0 when speed_endtime is returning seconds, or it's
58 1.0 when speed_endtime is returning cycles.
59
60 It may be noted that "speed_endtime() / speed_cycletime" gives a
61 measured time in cycles, irrespective of whether speed_endtime is
62 returning cycles or seconds. (Assuming cycles can be had, ie. it's
63 either cycles already or the cpu frequency is known. See also
64 speed_cycletime_need_cycles below.)
65
66 double speed_unittime
67 The unit of time measurement accuracy for the timing method in use.
68 This is in seconds or cycles, as per speed_endtime.
69
70 char speed_time_string[]
71 A null-terminated string describing the time method in use.
72
73 void speed_time_init (void)
74 Initialize time measuring. speed_starttime() does this
75 automatically, so it's only needed if an application wants to inspect
76 the above global variables before making a measurement.
77
78 int speed_precision
79 The intended accuracy of time measurements. speed_measure() in
80 common.c for instance runs target routines with enough repetitions so
81 it takes at least "speed_unittime * speed_precision" (this expression
82 works for both cycles or seconds from speed_endtime).
83
84 A program can provide an option so the user to set speed_precision.
85 If speed_precision is zero when speed_time_init or speed_starttime
86 first run then it gets a default based on the measuring method
87 chosen. (More precision for higher accuracy methods.)
88
89 void speed_cycletime_need_seconds (void)
90 Call this to demand that speed_endtime will return seconds, and not
91 cycles. If only cycles are available then an error is printed and
92 the program exits.
93
94 void speed_cycletime_need_cycles (void)
95 Call this to demand that speed_cycletime is non-zero, so that
96 "speed_endtime() / speed_cycletime" will give times in cycles.
97
98
99
100 Notes:
101
102 Various combinations of cycle counter, read_real_time(), getrusage(),
103 gettimeofday() and times() can arise, according to which are available
104 and their precision.
105
106
107 Allowing speed_endtime() to return either seconds or cycles is only a
108 slight complication and makes it possible for the speed program to do
109 some sensible things without demanding the CPU frequency. If seconds are
110 being measured then it can always print seconds, and if cycles are being
111 measured then it can always print them without needing to know how long
112 they are. Also the tune program doesn't care at all what the units are.
113
114 GMP_CPU_FREQUENCY can always be set when the automated methods in freq.c
115 fail. This will be needed if times in seconds are wanted but a cycle
116 counter is being used, or if times in cycles are wanted but getrusage or
117 another seconds based timer is in use.
118
119 If the measuring method uses a cycle counter but supplements it with
120 getrusage or the like, then knowing the CPU frequency is mandatory since
121 the code compares values from the two.
122
123
124 Not done:
125
126 Solaris gethrtime() seems no more than a slow way to access the Sparc V9
127 cycle counter. gethrvtime() seems to be relevant only to light weight
128 processes, it doesn't for instance give nanosecond virtual time. So
129 neither of these are used.
130
131
132 Bugs:
133
134 getrusage_microseconds_p is fundamentally flawed, getrusage and
135 gettimeofday can have resolutions other than clock ticks or microseconds,
136 for instance IRIX 5 has a tick of 10 ms but a getrusage of 1 ms.
137
138
139 Enhancements:
140
141 The SGI hardware counter has 64 bits on some machines, which could be
142 used when available. But perhaps 32 bits is enough range, and then rely
143 on the getrusage supplement.
144
145 Maybe getrusage (or times) should be used as a supplement for any
146 wall-clock measuring method. Currently a wall clock with a good range
147 (eg. a 64-bit cycle counter) is used without a supplement.
148
149 On PowerPC the timebase registers could be used, but would have to do
150 something to find out the speed. On 6xx chips it's normally 1/4 bus
151 speed, on 4xx chips it's either that or an external clock. Measuring
152 against gettimeofday might be ok. */
153
154
155#include "config.h"
156
157#include <errno.h>
158#include <setjmp.h>
159#include <signal.h>
160#include <stddef.h>
161#include <stdio.h>
162#include <string.h>
163#include <stdlib.h> /* for getenv() */
164
165#if HAVE_FCNTL_H
166#include <fcntl.h> /* for open() */
167#endif
168
169#if HAVE_STDINT_H
170#include <stdint.h> /* for uint64_t */
171#endif
172
173#if HAVE_UNISTD_H
174#include <unistd.h> /* for sysconf() */
175#endif
176
177#include <sys/types.h>
178
179#if TIME_WITH_SYS_TIME
180# include <sys/time.h> /* for struct timeval */
181# include <time.h>
182#else
183# if HAVE_SYS_TIME_H
184# include <sys/time.h>
185# else
186# include <time.h>
187# endif
188#endif
189
190#if HAVE_SYS_MMAN_H
191#include <sys/mman.h> /* for mmap() */
192#endif
193
194#if HAVE_SYS_RESOURCE_H
195#include <sys/resource.h> /* for struct rusage */
196#endif
197
198#if HAVE_SYS_SYSSGI_H
199#include <sys/syssgi.h> /* for syssgi() */
200#endif
201
202#if HAVE_SYS_SYSTEMCFG_H
203#include <sys/systemcfg.h> /* for RTC_POWER on AIX */
204#endif
205
206#if HAVE_SYS_TIMES_H
207#include <sys/times.h> /* for times() and struct tms */
208#endif
209
210#include "gmp-impl.h"
211
212#include "speed.h"
213
214
215/* strerror is only used for some stuff on newish systems, no need to have a
216 proper replacement */
217#if ! HAVE_STRERROR
218#define strerror(n) "<strerror not available>"
219#endif
220
221
222char speed_time_string[256];
223int speed_precision = 0;
224double speed_unittime;
225double speed_cycletime = 0.0;
226
227
228/* don't rely on "unsigned" to "double" conversion, it's broken in SunOS 4
229 native cc */
230#define M_2POWU (((double) INT_MAX + 1.0) * 2.0)
231
232#define M_2POW32 4294967296.0
233#define M_2POW64 (M_2POW32 * M_2POW32)
234
235
236/* Conditionals for the time functions available are done with normal C
237 code, which is a lot easier than wildly nested preprocessor directives.
238
239 The choice of what to use is partly made at run-time, according to
240 whether the cycle counter works and the measured accuracy of getrusage
241 and gettimeofday.
242
243 A routine that's not available won't be getting called, but is an abort()
244 to be sure it isn't called mistakenly.
245
246 It can be assumed that if a function exists then its data type will, but
247 if the function doesn't then the data type might or might not exist, so
248 the type can't be used unconditionally. The "struct_rusage" etc macros
249 provide dummies when the respective function doesn't exist. */
250
251
252#if HAVE_SPEED_CYCLECOUNTER
253static const int have_cycles = HAVE_SPEED_CYCLECOUNTER;
254#else
255static const int have_cycles = 0;
256#define speed_cyclecounter(p) ASSERT_FAIL (speed_cyclecounter not available)
257#endif
258
259/* "stck" returns ticks since 1 Jan 1900 00:00 GMT, where each tick is 2^-12
260 microseconds. Same #ifdefs here as in longlong.h. */
261#if defined (__GNUC__) && ! defined (NO_ASM) \
262 && (defined (__i370__) || defined (__s390__) || defined (__mvs__))
263static const int have_stck = 1;
264static const int use_stck = 1; /* always use when available */
265typedef uint64_t stck_t; /* gcc for s390 is quite new, always has uint64_t */
266#define STCK(timestamp) \
267 do { \
268 asm ("stck %0" : "=Q" (timestamp)); \
269 } while (0)
270#else
271static const int have_stck = 0;
272static const int use_stck = 0;
273typedef unsigned long stck_t; /* dummy */
274#define STCK(timestamp) ASSERT_FAIL (stck instruction not available)
275#endif
276#define STCK_PERIOD (1.0 / 4096e6) /* 2^-12 microseconds */
277
278/* mftb
279 Enhancement: On 64-bit chips mftb gives a 64-bit value, no need for mftbu
280 and a loop (see powerpc64.asm). */
281#if HAVE_HOST_CPU_FAMILY_powerpc
282static const int have_mftb = 1;
283#if defined (__GNUC__) && ! defined (NO_ASM)
284#define MFTB(a) \
285 do { \
286 unsigned __h1, __l, __h2; \
287 do { \
288 asm volatile ("mftbu %0\n" \
289 "mftb %1\n" \
290 "mftbu %2" \
291 : "=r" (__h1), \
292 "=r" (__l), \
293 "=r" (__h2)); \
294 } while (__h1 != __h2); \
295 a[0] = __l; \
296 a[1] = __h1; \
297 } while (0)
298#else
299#define MFTB(a) mftb_function (a)
300#endif
301#else /* ! powerpc */
302static const int have_mftb = 0;
303#define MFTB(a) \
304 do { \
305 a[0] = 0; \
306 a[1] = 0; \
307 ASSERT_FAIL (mftb not available); \
308 } while (0)
309#endif
310
311/* Unicos 10.X has syssgi(), but not mmap(). */
312#if HAVE_SYSSGI && HAVE_MMAP
313static const int have_sgi = 1;
314#else
315static const int have_sgi = 0;
316#endif
317
318#if HAVE_READ_REAL_TIME
319static const int have_rrt = 1;
320#else
321static const int have_rrt = 0;
322#define read_real_time(t,s) ASSERT_FAIL (read_real_time not available)
323#define time_base_to_time(t,s) ASSERT_FAIL (time_base_to_time not available)
324#define RTC_POWER 1
325#define RTC_POWER_PC 2
326#define timebasestruct_t struct timebasestruct_dummy
327struct timebasestruct_dummy {
328 int flag;
329 unsigned int tb_high;
330 unsigned int tb_low;
331};
332#endif
333
334#if HAVE_CLOCK_GETTIME
335static const int have_cgt = 1;
336#define struct_timespec struct timespec
337#else
338static const int have_cgt = 0;
339#define struct_timespec struct timespec_dummy
340#define clock_gettime(id,ts) (ASSERT_FAIL (clock_gettime not available), -1)
341#define clock_getres(id,ts) (ASSERT_FAIL (clock_getres not available), -1)
342#endif
343
344#if HAVE_GETRUSAGE
345static const int have_grus = 1;
346#define struct_rusage struct rusage
347#else
348static const int have_grus = 0;
349#define getrusage(n,ru) ASSERT_FAIL (getrusage not available)
350#define struct_rusage struct rusage_dummy
351#endif
352
353#if HAVE_GETTIMEOFDAY
354static const int have_gtod = 1;
355#define struct_timeval struct timeval
356#else
357static const int have_gtod = 0;
358#define gettimeofday(tv,tz) ASSERT_FAIL (gettimeofday not available)
359#define struct_timeval struct timeval_dummy
360#endif
361
362#if HAVE_TIMES
363static const int have_times = 1;
364#define struct_tms struct tms
365#else
366static const int have_times = 0;
367#define times(tms) ASSERT_FAIL (times not available)
368#define struct_tms struct tms_dummy
369#endif
370
371struct tms_dummy {
372 long tms_utime;
373};
374struct timeval_dummy {
375 long tv_sec;
376 long tv_usec;
377};
378struct rusage_dummy {
379 struct_timeval ru_utime;
380};
381struct timespec_dummy {
382 long tv_sec;
383 long tv_nsec;
384};
385
386static int use_cycles;
387static int use_mftb;
388static int use_sgi;
389static int use_rrt;
390static int use_cgt;
391static int use_gtod;
392static int use_grus;
393static int use_times;
394static int use_tick_boundary;
395
396static unsigned start_cycles[2];
397static stck_t start_stck;
398static unsigned start_mftb[2];
399static unsigned start_sgi;
400static timebasestruct_t start_rrt;
401static struct_timespec start_cgt;
402static struct_rusage start_grus;
403static struct_timeval start_gtod;
404static struct_tms start_times;
405
406static double cycles_limit = 1e100;
407static double mftb_unittime;
408static double sgi_unittime;
409static double cgt_unittime;
410static double grus_unittime;
411static double gtod_unittime;
412static double times_unittime;
413
414/* for RTC_POWER format, ie. seconds and nanoseconds */
415#define TIMEBASESTRUCT_SECS(t) ((t)->tb_high + (t)->tb_low * 1e-9)
416
417
418/* Return a string representing a time in seconds, nicely formatted.
419 Eg. "10.25ms". */
420char *
421unittime_string (double t)
422{
423 static char buf[128];
424
425 const char *unit;
426 int prec;
427
428 /* choose units and scale */
429 if (t < 1e-6)
430 t *= 1e9, unit = "ns";
431 else if (t < 1e-3)
432 t *= 1e6, unit = "us";
433 else if (t < 1.0)
434 t *= 1e3, unit = "ms";
435 else
436 unit = "s";
437
438 /* want 4 significant figures */
439 if (t < 1.0)
440 prec = 4;
441 else if (t < 10.0)
442 prec = 3;
443 else if (t < 100.0)
444 prec = 2;
445 else
446 prec = 1;
447
448 sprintf (buf, "%.*f%s", prec, t, unit);
449 return buf;
450}
451
452
453static jmp_buf cycles_works_buf;
454
455static RETSIGTYPE
456cycles_works_handler (int sig)
457{
458 longjmp (cycles_works_buf, 1);
459}
460
461int
462cycles_works_p (void)
463{
464 static int result = -1;
465
466 if (result != -1)
467 goto done;
468
469 /* FIXME: On linux, the cycle counter is not saved and restored over
470 * context switches, making it almost useless for precise cputime
471 * measurements. When available, it's better to use clock_gettime,
472 * which seems to have reasonable accuracy (tested on x86_32,
473 * linux-2.6.26, glibc-2.7). However, there are also some linux
474 * systems where clock_gettime is broken in one way or the other,
475 * like CLOCK_PROCESS_CPUTIME_ID not implemented (easy case) or
476 * kind-of implemented but broken (needs code to detect that), and
477 * on those systems a wall-clock cycle counter is the least bad
478 * fallback.
479 *
480 * So we need some code to disable the cycle counter on some but not
481 * all linux systems. */
482#ifdef SIGILL
483 {
484 RETSIGTYPE (*old_handler) (int);
485 unsigned cycles[2];
486
487 old_handler = signal (SIGILL, cycles_works_handler);
488 if (old_handler == SIG_ERR)
489 {
490 if (speed_option_verbose)
491 printf ("cycles_works_p(): SIGILL not supported, assuming speed_cyclecounter() works\n");
492 goto yes;
493 }
494 if (setjmp (cycles_works_buf))
495 {
496 if (speed_option_verbose)
497 printf ("cycles_works_p(): SIGILL during speed_cyclecounter(), so doesn't work\n");
498 result = 0;
499 goto done;
500 }
501 speed_cyclecounter (cycles);
502 signal (SIGILL, old_handler);
503 if (speed_option_verbose)
504 printf ("cycles_works_p(): speed_cyclecounter() works\n");
505 }
506#else
507
508 if (speed_option_verbose)
509 printf ("cycles_works_p(): SIGILL not defined, assuming speed_cyclecounter() works\n");
510 goto yes;
511#endif
512
513 yes:
514 result = 1;
515
516 done:
517 return result;
518}
519
520
521/* The number of clock ticks per second, but looking at sysconf rather than
522 just CLK_TCK, where possible. */
523long
524clk_tck (void)
525{
526 static long result = -1L;
527 if (result != -1L)
528 return result;
529
530#if HAVE_SYSCONF
531 result = sysconf (_SC_CLK_TCK);
532 if (result != -1L)
533 {
534 if (speed_option_verbose)
535 printf ("sysconf(_SC_CLK_TCK) is %ld per second\n", result);
536 return result;
537 }
538
539 fprintf (stderr,
540 "sysconf(_SC_CLK_TCK) not working, using CLK_TCK instead\n");
541#endif
542
543#ifdef CLK_TCK
544 result = CLK_TCK;
545 if (speed_option_verbose)
546 printf ("CLK_TCK is %ld per second\n", result);
547 return result;
548#else
549 fprintf (stderr, "CLK_TCK not defined, cannot continue\n");
550 abort ();
551#endif
552}
553
554
555/* If two times can be observed less than half a clock tick apart, then
556 assume "get" is microsecond accurate.
557
558 Two times only 1 microsecond apart are not believed, since some kernels
559 take it upon themselves to ensure gettimeofday doesn't return the same
560 value twice, for the benefit of applications using it for a timestamp.
561 This is obviously very stupid given the speed of CPUs these days.
562
563 Making "reps" many calls to noop_1() is designed to waste some CPU, with
564 a view to getting measurements 2 microseconds (or more) apart. "reps" is
565 increased progressively until such a period is seen.
566
567 The outer loop "attempts" are just to allow for any random nonsense or
568 system load upsetting the measurements (ie. making two successive calls
569 to "get" come out as a longer interval than normal).
570
571 Bugs:
572
573 The assumption that any interval less than a half tick implies
574 microsecond resolution is obviously fairly rash, the true resolution
575 could be anything between a microsecond and that half tick. Perhaps
576 something special would have to be done on a system where this is the
577 case, since there's no obvious reliable way to detect it
578 automatically. */
579
580#define MICROSECONDS_P(name, type, get, sec, usec) \
581 { \
582 static int result = -1; \
583 type st, et; \
584 long dt, half_tick; \
585 unsigned attempt, reps, i, j; \
586 \
587 if (result != -1) \
588 return result; \
589 \
590 result = 0; \
591 half_tick = (1000000L / clk_tck ()) / 2; \
592 \
593 for (attempt = 0; attempt < 5; attempt++) \
594 { \
595 reps = 0; \
596 for (;;) \
597 { \
598 get (st); \
599 for (i = 0; i < reps; i++) \
600 for (j = 0; j < 100; j++) \
601 noop_1 (CNST_LIMB(0)); \
602 get (et); \
603 \
604 dt = (sec(et)-sec(st))*1000000L + usec(et)-usec(st); \
605 \
606 if (speed_option_verbose >= 2) \
607 printf ("%s attempt=%u, reps=%u, dt=%ld\n", \
608 name, attempt, reps, dt); \
609 \
610 if (dt >= 2) \
611 break; \
612 \
613 reps = (reps == 0 ? 1 : 2*reps); \
614 if (reps == 0) \
615 break; /* uint overflow, not normal */ \
616 } \
617 \
618 if (dt < half_tick) \
619 { \
620 result = 1; \
621 break; \
622 } \
623 } \
624 \
625 if (speed_option_verbose) \
626 { \
627 if (result) \
628 printf ("%s is microsecond accurate\n", name); \
629 else \
630 printf ("%s is only %s clock tick accurate\n", \
631 name, unittime_string (1.0/clk_tck())); \
632 } \
633 return result; \
634 }
635
636
637int
638gettimeofday_microseconds_p (void)
639{
640#define call_gettimeofday(t) gettimeofday (&(t), NULL)
641#define timeval_tv_sec(t) ((t).tv_sec)
642#define timeval_tv_usec(t) ((t).tv_usec)
643 MICROSECONDS_P ("gettimeofday", struct_timeval,
644 call_gettimeofday, timeval_tv_sec, timeval_tv_usec);
645}
646
647int
648getrusage_microseconds_p (void)
649{
650#define call_getrusage(t) getrusage (0, &(t))
651#define rusage_tv_sec(t) ((t).ru_utime.tv_sec)
652#define rusage_tv_usec(t) ((t).ru_utime.tv_usec)
653 MICROSECONDS_P ("getrusage", struct_rusage,
654 call_getrusage, rusage_tv_sec, rusage_tv_usec);
655}
656
657/* Test whether getrusage goes backwards, return non-zero if it does
658 (suggesting it's flawed).
659
660 On a macintosh m68040-unknown-netbsd1.4.1 getrusage looks like it's
661 microsecond accurate, but has been seen remaining unchanged after many
662 microseconds have elapsed. It also regularly goes backwards by 1000 to
663 5000 usecs, this has been seen after between 500 and 4000 attempts taking
664 perhaps 0.03 seconds. We consider this too broken for good measuring.
665 We used to have configure pretend getrusage didn't exist on this system,
666 but a runtime test should be more reliable, since we imagine the problem
667 is not confined to just this exact system tuple. */
668
669int
670getrusage_backwards_p (void)
671{
672 static int result = -1;
673 struct rusage start, prev, next;
674 long d;
675 int i;
676
677 if (result != -1)
678 return result;
679
680 getrusage (0, &start);
681 memcpy (&next, &start, sizeof (next));
682
683 result = 0;
684 i = 0;
685 for (;;)
686 {
687 memcpy (&prev, &next, sizeof (prev));
688 getrusage (0, &next);
689
690 if (next.ru_utime.tv_sec < prev.ru_utime.tv_sec
691 || (next.ru_utime.tv_sec == prev.ru_utime.tv_sec
692 && next.ru_utime.tv_usec < prev.ru_utime.tv_usec))
693 {
694 if (speed_option_verbose)
695 printf ("getrusage went backwards (attempt %d: %ld.%06ld -> %ld.%06ld)\n",
696 i,
697 (long) prev.ru_utime.tv_sec, (long) prev.ru_utime.tv_usec,
698 (long) next.ru_utime.tv_sec, (long) next.ru_utime.tv_usec);
699 result = 1;
700 break;
701 }
702
703 /* minimum 1000 attempts, then stop after either 0.1 seconds or 50000
704 attempts, whichever comes first */
705 d = 1000000 * (next.ru_utime.tv_sec - start.ru_utime.tv_sec)
706 + (next.ru_utime.tv_usec - start.ru_utime.tv_usec);
707 i++;
708 if (i > 50000 || (i > 1000 && d > 100000))
709 break;
710 }
711
712 return result;
713}
714
715/* CLOCK_PROCESS_CPUTIME_ID looks like it's going to be in a future version
716 of glibc (some time post 2.2).
717
718 CLOCK_VIRTUAL is process time, available in BSD systems (though sometimes
719 defined, but returning -1 for an error). */
720
721#ifdef CLOCK_PROCESS_CPUTIME_ID
722# define CGT_ID CLOCK_PROCESS_CPUTIME_ID
723#else
724# ifdef CLOCK_VIRTUAL
725# define CGT_ID CLOCK_VIRTUAL
726# endif
727#endif
728#ifdef CGT_ID
729const int have_cgt_id = 1;
730#else
731const int have_cgt_id = 0;
732# define CGT_ID (ASSERT_FAIL (CGT_ID not determined), -1)
733#endif
734
735#define CGT_DELAY_COUNT 1000
736
737int
738cgt_works_p (void)
739{
740 static int result = -1;
741 struct_timespec unit;
742
743 if (! have_cgt)
744 return 0;
745
746 if (! have_cgt_id)
747 {
748 if (speed_option_verbose)
749 printf ("clock_gettime don't know what ID to use\n");
750 result = 0;
751 return result;
752 }
753
754 if (result != -1)
755 return result;
756
757 /* trial run to see if it works */
758 if (clock_gettime (CGT_ID, &unit) != 0)
759 {
760 if (speed_option_verbose)
761 printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
762 result = 0;
763 return result;
764 }
765
766 /* get the resolution */
767 if (clock_getres (CGT_ID, &unit) != 0)
768 {
769 if (speed_option_verbose)
770 printf ("clock_getres id=%d error: %s\n", CGT_ID, strerror (errno));
771 result = 0;
772 return result;
773 }
774
775 cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9;
776 if (speed_option_verbose)
777 printf ("clock_gettime is %s accurate\n", unittime_string (cgt_unittime));
778
779 if (cgt_unittime < 10e-9)
780 {
781 /* Do we believe this? */
782 struct timespec start, end;
783 static volatile int counter;
784 double duration;
785 if (clock_gettime (CGT_ID, &start))
786 {
787 if (speed_option_verbose)
788 printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
789 result = 0;
790 return result;
791 }
792 /* Loop of at least 1000 memory accesses, ought to take at
793 least 100 ns*/
794 for (counter = 0; counter < CGT_DELAY_COUNT; counter++)
795 ;
796 if (clock_gettime (CGT_ID, &end))
797 {
798 if (speed_option_verbose)
799 printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
800 result = 0;
801 return result;
802 }
803 duration = (end.tv_sec + end.tv_nsec * 1e-9
804 - start.tv_sec - start.tv_nsec * 1e-9);
805 if (speed_option_verbose)
806 printf ("delay loop of %d rounds took %s (according to clock_gettime)\n",
807 CGT_DELAY_COUNT, unittime_string (duration));
808 if (duration < 100e-9)
809 {
810 if (speed_option_verbose)
811 printf ("clock_gettime id=%d not believable\n", CGT_ID);
812 result = 0;
813 return result;
814 }
815 }
816 result = 1;
817 return result;
818}
819
820
821static double
822freq_measure_mftb_one (void)
823{
824#define call_gettimeofday(t) gettimeofday (&(t), NULL)
825#define timeval_tv_sec(t) ((t).tv_sec)
826#define timeval_tv_usec(t) ((t).tv_usec)
827 FREQ_MEASURE_ONE ("mftb", struct_timeval,
828 call_gettimeofday, MFTB,
829 timeval_tv_sec, timeval_tv_usec);
830}
831
832
833static jmp_buf mftb_works_buf;
834
835static RETSIGTYPE
836mftb_works_handler (int sig)
837{
838 longjmp (mftb_works_buf, 1);
839}
840
841int
842mftb_works_p (void)
843{
844 unsigned a[2];
845 RETSIGTYPE (*old_handler) (int);
846 double cycletime;
847
848 /* suppress a warning about a[] unused */
849 a[0] = 0;
850
851 if (! have_mftb)
852 return 0;
853
854#ifdef SIGILL
855 old_handler = signal (SIGILL, mftb_works_handler);
856 if (old_handler == SIG_ERR)
857 {
858 if (speed_option_verbose)
859 printf ("mftb_works_p(): SIGILL not supported, assuming mftb works\n");
860 return 1;
861 }
862 if (setjmp (mftb_works_buf))
863 {
864 if (speed_option_verbose)
865 printf ("mftb_works_p(): SIGILL during mftb, so doesn't work\n");
866 return 0;
867 }
868 MFTB (a);
869 signal (SIGILL, old_handler);
870 if (speed_option_verbose)
871 printf ("mftb_works_p(): mftb works\n");
872#else
873
874 if (speed_option_verbose)
875 printf ("mftb_works_p(): SIGILL not defined, assuming mftb works\n");
876#endif
877
878#if ! HAVE_GETTIMEOFDAY
879 if (speed_option_verbose)
880 printf ("mftb_works_p(): no gettimeofday available to measure mftb\n");
881 return 0;
882#endif
883
884 /* The time base is normally 1/4 of the bus speed on 6xx and 7xx chips, on
885 other chips it can be driven from an external clock. */
886 cycletime = freq_measure ("mftb", freq_measure_mftb_one);
887 if (cycletime == -1.0)
888 {
889 if (speed_option_verbose)
890 printf ("mftb_works_p(): cannot measure mftb period\n");
891 return 0;
892 }
893
894 mftb_unittime = cycletime;
895 return 1;
896}
897
898
899volatile unsigned *sgi_addr;
900
901int
902sgi_works_p (void)
903{
904#if HAVE_SYSSGI && HAVE_MMAP
905 static int result = -1;
906
907 size_t pagesize, offset;
908 __psunsigned_t phys, physpage;
909 void *virtpage;
910 unsigned period_picoseconds;
911 int size, fd;
912
913 if (result != -1)
914 return result;
915
916 phys = syssgi (SGI_QUERY_CYCLECNTR, &period_picoseconds);
917 if (phys == (__psunsigned_t) -1)
918 {
919 /* ENODEV is the error when a counter is not available */
920 if (speed_option_verbose)
921 printf ("syssgi SGI_QUERY_CYCLECNTR error: %s\n", strerror (errno));
922 result = 0;
923 return result;
924 }
925 sgi_unittime = period_picoseconds * 1e-12;
926
927 /* IRIX 5 doesn't have SGI_CYCLECNTR_SIZE, assume 32 bits in that case.
928 Challenge/ONYX hardware has a 64 bit byte counter, but there seems no
929 obvious way to identify that without SGI_CYCLECNTR_SIZE. */
930#ifdef SGI_CYCLECNTR_SIZE
931 size = syssgi (SGI_CYCLECNTR_SIZE);
932 if (size == -1)
933 {
934 if (speed_option_verbose)
935 {
936 printf ("syssgi SGI_CYCLECNTR_SIZE error: %s\n", strerror (errno));
937 printf (" will assume size==4\n");
938 }
939 size = 32;
940 }
941#else
942 size = 32;
943#endif
944
945 if (size < 32)
946 {
947 printf ("syssgi SGI_CYCLECNTR_SIZE gives %d, expected 32 or 64\n", size);
948 result = 0;
949 return result;
950 }
951
952 pagesize = getpagesize();
953 offset = (size_t) phys & (pagesize-1);
954 physpage = phys - offset;
955
956 /* shouldn't cross over a page boundary */
957 ASSERT_ALWAYS (offset + size/8 <= pagesize);
958
959 fd = open("/dev/mmem", O_RDONLY);
960 if (fd == -1)
961 {
962 if (speed_option_verbose)
963 printf ("open /dev/mmem: %s\n", strerror (errno));
964 result = 0;
965 return result;
966 }
967
968 virtpage = mmap (0, pagesize, PROT_READ, MAP_PRIVATE, fd, (off_t) physpage);
969 if (virtpage == (void *) -1)
970 {
971 if (speed_option_verbose)
972 printf ("mmap /dev/mmem: %s\n", strerror (errno));
973 result = 0;
974 return result;
975 }
976
977 /* address of least significant 4 bytes, knowing mips is big endian */
978 sgi_addr = (unsigned *) ((char *) virtpage + offset
979 + size/8 - sizeof(unsigned));
980 result = 1;
981 return result;
982
983#else /* ! (HAVE_SYSSGI && HAVE_MMAP) */
984 return 0;
985#endif
986}
987
988
989#define DEFAULT(var,n) \
990 do { \
991 if (! (var)) \
992 (var) = (n); \
993 } while (0)
994
995void
996speed_time_init (void)
997{
998 double supplement_unittime = 0.0;
999
1000 static int speed_time_initialized = 0;
1001 if (speed_time_initialized)
1002 return;
1003 speed_time_initialized = 1;
1004
1005 speed_cycletime_init ();
1006
1007 if (!speed_option_cycles_broken && have_cycles && cycles_works_p ())
1008 {
1009 use_cycles = 1;
1010 DEFAULT (speed_cycletime, 1.0);
1011 speed_unittime = speed_cycletime;
1012 DEFAULT (speed_precision, 10000);
1013 strcpy (speed_time_string, "CPU cycle counter");
1014
1015 /* only used if a supplementary method is chosen below */
1016 cycles_limit = (have_cycles == 1 ? M_2POW32 : M_2POW64) / 2.0
1017 * speed_cycletime;
1018
1019 if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
1020 {
1021 /* this is a good combination */
1022 use_grus = 1;
1023 supplement_unittime = grus_unittime = 1.0e-6;
1024 strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond getrusage()");
1025 }
1026 else if (have_cycles == 1)
1027 {
1028 /* When speed_cyclecounter has a limited range, look for something
1029 to supplement it. */
1030 if (have_gtod && gettimeofday_microseconds_p())
1031 {
1032 use_gtod = 1;
1033 supplement_unittime = gtod_unittime = 1.0e-6;
1034 strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond gettimeofday()");
1035 }
1036 else if (have_grus)
1037 {
1038 use_grus = 1;
1039 supplement_unittime = grus_unittime = 1.0 / (double) clk_tck ();
1040 sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick getrusage()", unittime_string (supplement_unittime));
1041 }
1042 else if (have_times)
1043 {
1044 use_times = 1;
1045 supplement_unittime = times_unittime = 1.0 / (double) clk_tck ();
1046 sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick times()", unittime_string (supplement_unittime));
1047 }
1048 else if (have_gtod)
1049 {
1050 use_gtod = 1;
1051 supplement_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
1052 sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick gettimeofday()", unittime_string (supplement_unittime));
1053 }
1054 else
1055 {
1056 fprintf (stderr, "WARNING: cycle counter is 32 bits and there's no other functions.\n");
1057 fprintf (stderr, " Wraparounds may produce bad results on long measurements.\n");
1058 }
1059 }
1060
1061 if (use_grus || use_times || use_gtod)
1062 {
1063 /* must know cycle period to compare cycles to other measuring
1064 (via cycles_limit) */
1065 speed_cycletime_need_seconds ();
1066
1067 if (speed_precision * supplement_unittime > cycles_limit)
1068 {
1069 fprintf (stderr, "WARNING: requested precision can't always be achieved due to limited range\n");
1070 fprintf (stderr, " cycle counter and limited precision supplemental method\n");
1071 fprintf (stderr, " (%s)\n", speed_time_string);
1072 }
1073 }
1074 }
1075 else if (have_stck)
1076 {
1077 strcpy (speed_time_string, "STCK timestamp");
1078 /* stck is in units of 2^-12 microseconds, which is very likely higher
1079 resolution than a cpu cycle */
1080 if (speed_cycletime == 0.0)
1081 speed_cycletime_fail
1082 ("Need to know CPU frequency for effective stck unit");
1083 speed_unittime = MAX (speed_cycletime, STCK_PERIOD);
1084 DEFAULT (speed_precision, 10000);
1085 }
1086 else if (have_mftb && mftb_works_p ())
1087 {
1088 use_mftb = 1;
1089 DEFAULT (speed_precision, 10000);
1090 speed_unittime = mftb_unittime;
1091 sprintf (speed_time_string, "mftb counter (%s)",
1092 unittime_string (speed_unittime));
1093 }
1094 else if (have_sgi && sgi_works_p ())
1095 {
1096 use_sgi = 1;
1097 DEFAULT (speed_precision, 10000);
1098 speed_unittime = sgi_unittime;
1099 sprintf (speed_time_string, "syssgi() mmap counter (%s), supplemented by millisecond getrusage()",
1100 unittime_string (speed_unittime));
1101 /* supplemented with getrusage, which we assume to have 1ms resolution */
1102 use_grus = 1;
1103 supplement_unittime = 1e-3;
1104 }
1105 else if (have_rrt)
1106 {
1107 timebasestruct_t t;
1108 use_rrt = 1;
1109 DEFAULT (speed_precision, 10000);
1110 read_real_time (&t, sizeof(t));
1111 switch (t.flag) {
1112 case RTC_POWER:
1113 /* FIXME: What's the actual RTC resolution? */
1114 speed_unittime = 1e-7;
1115 strcpy (speed_time_string, "read_real_time() power nanoseconds");
1116 break;
1117 case RTC_POWER_PC:
1118 t.tb_high = 1;
1119 t.tb_low = 0;
1120 time_base_to_time (&t, sizeof(t));
1121 speed_unittime = TIMEBASESTRUCT_SECS(&t) / M_2POW32;
1122 sprintf (speed_time_string, "%s read_real_time() powerpc ticks",
1123 unittime_string (speed_unittime));
1124 break;
1125 default:
1126 fprintf (stderr, "ERROR: Unrecognised timebasestruct_t flag=%d\n",
1127 t.flag);
1128 abort ();
1129 }
1130 }
1131 else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5e-6)
1132 {
1133 /* use clock_gettime if microsecond or better resolution */
1134 choose_cgt:
1135 use_cgt = 1;
1136 speed_unittime = cgt_unittime;
1137 DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000));
1138 strcpy (speed_time_string, "microsecond accurate clock_gettime()");
1139 }
1140 else if (have_times && clk_tck() > 1000000)
1141 {
1142 /* Cray vector systems have times() which is clock cycle resolution
1143 (eg. 450 MHz). */
1144 DEFAULT (speed_precision, 10000);
1145 goto choose_times;
1146 }
1147 else if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
1148 {
1149 use_grus = 1;
1150 speed_unittime = grus_unittime = 1.0e-6;
1151 DEFAULT (speed_precision, 1000);
1152 strcpy (speed_time_string, "microsecond accurate getrusage()");
1153 }
1154 else if (have_gtod && gettimeofday_microseconds_p())
1155 {
1156 use_gtod = 1;
1157 speed_unittime = gtod_unittime = 1.0e-6;
1158 DEFAULT (speed_precision, 1000);
1159 strcpy (speed_time_string, "microsecond accurate gettimeofday()");
1160 }
1161 else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5/clk_tck())
1162 {
1163 /* use clock_gettime if 1 tick or better resolution */
1164 goto choose_cgt;
1165 }
1166 else if (have_times)
1167 {
1168 use_tick_boundary = 1;
1169 DEFAULT (speed_precision, 200);
1170 choose_times:
1171 use_times = 1;
1172 speed_unittime = times_unittime = 1.0 / (double) clk_tck ();
1173 sprintf (speed_time_string, "%s clock tick times()",
1174 unittime_string (speed_unittime));
1175 }
1176 else if (have_grus)
1177 {
1178 use_grus = 1;
1179 use_tick_boundary = 1;
1180 speed_unittime = grus_unittime = 1.0 / (double) clk_tck ();
1181 DEFAULT (speed_precision, 200);
1182 sprintf (speed_time_string, "%s clock tick getrusage()\n",
1183 unittime_string (speed_unittime));
1184 }
1185 else if (have_gtod)
1186 {
1187 use_gtod = 1;
1188 use_tick_boundary = 1;
1189 speed_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
1190 DEFAULT (speed_precision, 200);
1191 sprintf (speed_time_string, "%s clock tick gettimeofday()",
1192 unittime_string (speed_unittime));
1193 }
1194 else
1195 {
1196 fprintf (stderr, "No time measuring method available\n");
1197 fprintf (stderr, "None of: speed_cyclecounter(), STCK(), getrusage(), gettimeofday(), times()\n");
1198 abort ();
1199 }
1200
1201 if (speed_option_verbose)
1202 {
1203 printf ("speed_time_init: %s\n", speed_time_string);
1204 printf (" speed_precision %d\n", speed_precision);
1205 printf (" speed_unittime %.2g\n", speed_unittime);
1206 if (supplement_unittime)
1207 printf (" supplement_unittime %.2g\n", supplement_unittime);
1208 printf (" use_tick_boundary %d\n", use_tick_boundary);
1209 if (have_cycles)
1210 printf (" cycles_limit %.2g seconds\n", cycles_limit);
1211 }
1212}
1213
1214
1215
1216/* Burn up CPU until a clock tick boundary, for greater accuracy. Set the
1217 corresponding "start_foo" appropriately too. */
1218
1219void
1220grus_tick_boundary (void)
1221{
1222 struct_rusage prev;
1223 getrusage (0, &prev);
1224 do {
1225 getrusage (0, &start_grus);
1226 } while (start_grus.ru_utime.tv_usec == prev.ru_utime.tv_usec);
1227}
1228
1229void
1230gtod_tick_boundary (void)
1231{
1232 struct_timeval prev;
1233 gettimeofday (&prev, NULL);
1234 do {
1235 gettimeofday (&start_gtod, NULL);
1236 } while (start_gtod.tv_usec == prev.tv_usec);
1237}
1238
1239void
1240times_tick_boundary (void)
1241{
1242 struct_tms prev;
1243 times (&prev);
1244 do
1245 times (&start_times);
1246 while (start_times.tms_utime == prev.tms_utime);
1247}
1248
1249
1250/* "have_" values are tested to let unused code go dead. */
1251
1252void
1253speed_starttime (void)
1254{
1255 speed_time_init ();
1256
1257 if (have_grus && use_grus)
1258 {
1259 if (use_tick_boundary)
1260 grus_tick_boundary ();
1261 else
1262 getrusage (0, &start_grus);
1263 }
1264
1265 if (have_gtod && use_gtod)
1266 {
1267 if (use_tick_boundary)
1268 gtod_tick_boundary ();
1269 else
1270 gettimeofday (&start_gtod, NULL);
1271 }
1272
1273 if (have_times && use_times)
1274 {
1275 if (use_tick_boundary)
1276 times_tick_boundary ();
1277 else
1278 times (&start_times);
1279 }
1280
1281 if (have_cgt && use_cgt)
1282 clock_gettime (CGT_ID, &start_cgt);
1283
1284 if (have_rrt && use_rrt)
1285 read_real_time (&start_rrt, sizeof(start_rrt));
1286
1287 if (have_sgi && use_sgi)
1288 start_sgi = *sgi_addr;
1289
1290 if (have_mftb && use_mftb)
1291 MFTB (start_mftb);
1292
1293 if (have_stck && use_stck)
1294 STCK (start_stck);
1295
1296 /* Cycles sampled last for maximum accuracy. */
1297 if (have_cycles && use_cycles)
1298 speed_cyclecounter (start_cycles);
1299}
1300
1301
1302/* Calculate the difference between two cycle counter samples, as a "double"
1303 counter of cycles.
1304
1305 The start and end values are allowed to cancel in integers in case the
1306 counter values are bigger than the 53 bits that normally fit in a double.
1307
1308 This works even if speed_cyclecounter() puts a value bigger than 32-bits
1309 in the low word (the high word always gets a 2**32 multiplier though). */
1310
1311double
1312speed_cyclecounter_diff (const unsigned end[2], const unsigned start[2])
1313{
1314 unsigned d;
1315 double t;
1316
1317 if (have_cycles == 1)
1318 {
1319 t = (end[0] - start[0]);
1320 }
1321 else
1322 {
1323 d = end[0] - start[0];
1324 t = d - (d > end[0] ? M_2POWU : 0.0);
1325 t += (end[1] - start[1]) * M_2POW32;
1326 }
1327 return t;
1328}
1329
1330
1331double
1332speed_mftb_diff (const unsigned end[2], const unsigned start[2])
1333{
1334 unsigned d;
1335 double t;
1336
1337 d = end[0] - start[0];
1338 t = (double) d - (d > end[0] ? M_2POW32 : 0.0);
1339 t += (end[1] - start[1]) * M_2POW32;
1340 return t;
1341}
1342
1343
1344/* Calculate the difference between "start" and "end" using fields "sec" and
1345 "psec", where each "psec" is a "punit" of a second.
1346
1347 The seconds parts are allowed to cancel before being combined with the
1348 psec parts, in case a simple "sec+psec*punit" exceeds the precision of a
1349 double.
1350
1351 Total time is only calculated in a "double" since an integer count of
1352 psecs might overflow. 2^32 microseconds is only a bit over an hour, or
1353 2^32 nanoseconds only about 4 seconds.
1354
1355 The casts to "long" are for the benefit of timebasestruct_t, where the
1356 fields are only "unsigned int", but we want a signed difference. */
1357
1358#define DIFF_SECS_ROUTINE(sec, psec, punit) \
1359 { \
1360 long sec_diff, psec_diff; \
1361 sec_diff = (long) end->sec - (long) start->sec; \
1362 psec_diff = (long) end->psec - (long) start->psec; \
1363 return (double) sec_diff + punit * (double) psec_diff; \
1364 }
1365
1366double
1367timeval_diff_secs (const struct_timeval *end, const struct_timeval *start)
1368{
1369 DIFF_SECS_ROUTINE (tv_sec, tv_usec, 1e-6);
1370}
1371
1372double
1373rusage_diff_secs (const struct_rusage *end, const struct_rusage *start)
1374{
1375 DIFF_SECS_ROUTINE (ru_utime.tv_sec, ru_utime.tv_usec, 1e-6);
1376}
1377
1378double
1379timespec_diff_secs (const struct_timespec *end, const struct_timespec *start)
1380{
1381 DIFF_SECS_ROUTINE (tv_sec, tv_nsec, 1e-9);
1382}
1383
1384/* This is for use after time_base_to_time, ie. for seconds and nanoseconds. */
1385double
1386timebasestruct_diff_secs (const timebasestruct_t *end,
1387 const timebasestruct_t *start)
1388{
1389 DIFF_SECS_ROUTINE (tb_high, tb_low, 1e-9);
1390}
1391
1392
1393double
1394speed_endtime (void)
1395{
1396#define END_USE(name,value) \
1397 do { \
1398 if (speed_option_verbose >= 3) \
1399 printf ("speed_endtime(): used %s\n", name); \
1400 result = value; \
1401 goto done; \
1402 } while (0)
1403
1404#define END_ENOUGH(name,value) \
1405 do { \
1406 if (speed_option_verbose >= 3) \
1407 printf ("speed_endtime(): %s gives enough precision\n", name); \
1408 result = value; \
1409 goto done; \
1410 } while (0)
1411
1412#define END_EXCEED(name,value) \
1413 do { \
1414 if (speed_option_verbose >= 3) \
1415 printf ("speed_endtime(): cycle counter limit exceeded, used %s\n", \
1416 name); \
1417 result = value; \
1418 goto done; \
1419 } while (0)
1420
1421 unsigned end_cycles[2];
1422 stck_t end_stck;
1423 unsigned end_mftb[2];
1424 unsigned end_sgi;
1425 timebasestruct_t end_rrt;
1426 struct_timespec end_cgt;
1427 struct_timeval end_gtod;
1428 struct_rusage end_grus;
1429 struct_tms end_times;
1430 double t_gtod, t_grus, t_times, t_cgt;
1431 double t_rrt, t_sgi, t_mftb, t_stck, t_cycles;
1432 double result;
1433
1434 /* Cycles sampled first for maximum accuracy.
1435 "have_" values tested to let unused code go dead. */
1436
1437 if (have_cycles && use_cycles) speed_cyclecounter (end_cycles);
1438 if (have_stck && use_stck) STCK (end_stck);
1439 if (have_mftb && use_mftb) MFTB (end_mftb);
1440 if (have_sgi && use_sgi) end_sgi = *sgi_addr;
1441 if (have_rrt && use_rrt) read_real_time (&end_rrt, sizeof(end_rrt));
1442 if (have_cgt && use_cgt) clock_gettime (CGT_ID, &end_cgt);
1443 if (have_gtod && use_gtod) gettimeofday (&end_gtod, NULL);
1444 if (have_grus && use_grus) getrusage (0, &end_grus);
1445 if (have_times && use_times) times (&end_times);
1446
1447 result = -1.0;
1448
1449 if (speed_option_verbose >= 4)
1450 {
1451 printf ("speed_endtime():\n");
1452 if (use_cycles)
1453 printf (" cycles 0x%X,0x%X -> 0x%X,0x%X\n",
1454 start_cycles[1], start_cycles[0],
1455 end_cycles[1], end_cycles[0]);
1456
1457 if (use_stck)
1458 printf (" stck 0x%lX -> 0x%lX\n", start_stck, end_stck);
1459
1460 if (use_mftb)
1461 printf (" mftb 0x%X,%08X -> 0x%X,%08X\n",
1462 start_mftb[1], start_mftb[0],
1463 end_mftb[1], end_mftb[0]);
1464
1465 if (use_sgi)
1466 printf (" sgi 0x%X -> 0x%X\n", start_sgi, end_sgi);
1467
1468 if (use_rrt)
1469 printf (" read_real_time (%d)%u,%u -> (%d)%u,%u\n",
1470 start_rrt.flag, start_rrt.tb_high, start_rrt.tb_low,
1471 end_rrt.flag, end_rrt.tb_high, end_rrt.tb_low);
1472
1473 if (use_cgt)
1474 printf (" clock_gettime %ld.%09ld -> %ld.%09ld\n",
1475 start_cgt.tv_sec, start_cgt.tv_nsec,
1476 end_cgt.tv_sec, end_cgt.tv_nsec);
1477
1478 if (use_gtod)
1479 printf (" gettimeofday %ld.%06ld -> %ld.%06ld\n",
1480 start_gtod.tv_sec, start_gtod.tv_usec,
1481 end_gtod.tv_sec, end_gtod.tv_usec);
1482
1483 if (use_grus)
1484 printf (" getrusage %ld.%06ld -> %ld.%06ld\n",
1485 start_grus.ru_utime.tv_sec, start_grus.ru_utime.tv_usec,
1486 end_grus.ru_utime.tv_sec, end_grus.ru_utime.tv_usec);
1487
1488 if (use_times)
1489 printf (" times %ld -> %ld\n",
1490 start_times.tms_utime, end_times.tms_utime);
1491 }
1492
1493 if (use_rrt)
1494 {
1495 time_base_to_time (&start_rrt, sizeof(start_rrt));
1496 time_base_to_time (&end_rrt, sizeof(end_rrt));
1497 t_rrt = timebasestruct_diff_secs (&end_rrt, &start_rrt);
1498 END_USE ("read_real_time()", t_rrt);
1499 }
1500
1501 if (use_cgt)
1502 {
1503 t_cgt = timespec_diff_secs (&end_cgt, &start_cgt);
1504 END_USE ("clock_gettime()", t_cgt);
1505 }
1506
1507 if (use_grus)
1508 {
1509 t_grus = rusage_diff_secs (&end_grus, &start_grus);
1510
1511 /* Use getrusage() if the cycle counter limit would be exceeded, or if
1512 it provides enough accuracy already. */
1513 if (use_cycles)
1514 {
1515 if (t_grus >= speed_precision*grus_unittime)
1516 END_ENOUGH ("getrusage()", t_grus);
1517 if (t_grus >= cycles_limit)
1518 END_EXCEED ("getrusage()", t_grus);
1519 }
1520 }
1521
1522 if (use_times)
1523 {
1524 t_times = (end_times.tms_utime - start_times.tms_utime) * times_unittime;
1525
1526 /* Use times() if the cycle counter limit would be exceeded, or if
1527 it provides enough accuracy already. */
1528 if (use_cycles)
1529 {
1530 if (t_times >= speed_precision*times_unittime)
1531 END_ENOUGH ("times()", t_times);
1532 if (t_times >= cycles_limit)
1533 END_EXCEED ("times()", t_times);
1534 }
1535 }
1536
1537 if (use_gtod)
1538 {
1539 t_gtod = timeval_diff_secs (&end_gtod, &start_gtod);
1540
1541 /* Use gettimeofday() if it measured a value bigger than the cycle
1542 counter can handle. */
1543 if (use_cycles)
1544 {
1545 if (t_gtod >= cycles_limit)
1546 END_EXCEED ("gettimeofday()", t_gtod);
1547 }
1548 }
1549
1550 if (use_mftb)
1551 {
1552 t_mftb = speed_mftb_diff (end_mftb, start_mftb) * mftb_unittime;
1553 END_USE ("mftb", t_mftb);
1554 }
1555
1556 if (use_stck)
1557 {
1558 t_stck = (end_stck - start_stck) * STCK_PERIOD;
1559 END_USE ("stck", t_stck);
1560 }
1561
1562 if (use_sgi)
1563 {
1564 t_sgi = (end_sgi - start_sgi) * sgi_unittime;
1565 END_USE ("SGI hardware counter", t_sgi);
1566 }
1567
1568 if (use_cycles)
1569 {
1570 t_cycles = speed_cyclecounter_diff (end_cycles, start_cycles)
1571 * speed_cycletime;
1572 END_USE ("cycle counter", t_cycles);
1573 }
1574
1575 if (use_grus && getrusage_microseconds_p())
1576 END_USE ("getrusage()", t_grus);
1577
1578 if (use_gtod && gettimeofday_microseconds_p())
1579 END_USE ("gettimeofday()", t_gtod);
1580
1581 if (use_times) END_USE ("times()", t_times);
1582 if (use_grus) END_USE ("getrusage()", t_grus);
1583 if (use_gtod) END_USE ("gettimeofday()", t_gtod);
1584
1585 fprintf (stderr, "speed_endtime(): oops, no time method available\n");
1586 abort ();
1587
1588 done:
1589 if (result < 0.0)
1590 {
1591 if (speed_option_verbose >= 2)
1592 fprintf (stderr, "speed_endtime(): warning, treating negative time as zero: %.9f\n", result);
1593 result = 0.0;
1594 }
1595 return result;
1596}