Blame - third_party/gmp/tune/speed-ext.c - RealtimeRoboticsGroup/test

blob: e7fb8b9f60d98713a235736fcc03bb5c83c39dcc [file] [log] [blame]

Austin Schuh	bb1338c	2024-06-15 19:31:16 -0700	[diff] [blame]	1	/* An example of extending the speed program to measure routines not in GMP.
				2
				3	Copyright 1999, 2000, 2002, 2003, 2005 Free Software Foundation, Inc.
				4
				5	This file is part of the GNU MP Library.
				6
				7	The GNU MP Library is free software; you can redistribute it and/or modify
				8	it under the terms of either:
				9
				10	* the GNU Lesser General Public License as published by the Free
				11	Software Foundation; either version 3 of the License, or (at your
				12	option) any later version.
				13
				14	or
				15
				16	* the GNU General Public License as published by the Free Software
				17	Foundation; either version 2 of the License, or (at your option) any
				18	later version.
				19
				20	or both in parallel, as here.
				21
				22	The GNU MP Library is distributed in the hope that it will be useful, but
				23	WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
				24	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				25	for more details.
				26
				27	You should have received copies of the GNU General Public License and the
				28	GNU Lesser General Public License along with the GNU MP Library. If not,
				29	see https://www.gnu.org/licenses/. */
				30
				31
				32	/* The extension here is three versions of an mpn arithmetic mean. These
				33	aren't meant to be particularly useful, just examples.
				34
				35	You can run something like the following to compare their speeds.
				36
				37	./speed-ext -s 1-20 -c mean_calls mean_open mean_open2
				38
				39	On RISC chips, mean_open() might be fastest if the compiler is doing a
				40	good job. On the register starved x86s, mean_calls will be fastest.
				41
				42
				43	Notes:
				44
				45	SPEED_EXTRA_PROTOS and SPEED_EXTRA_ROUTINES are macros that get expanded
				46	by speed.c in useful places. SPEED_EXTRA_PROTOS goes after the header
				47	files, and SPEED_EXTRA_ROUTINES goes in the array of available routines.
				48
				49	The advantage of this #include "speed.c" scheme is that there's no
				50	editing of a copy of that file, and new features in new versions of it
				51	will be immediately available.
				52
				53	In a real program the routines mean_calls() etc would probably be in
				54	separate C or assembler source files, and just the measuring
				55	speed_mean_calls() etc would be here. Linking against other libraries
				56	for things to measure is perfectly possible too.
				57
				58	When attempting to compare two versions of the same named routine, say
				59	like the generic and assembler versions of mpn_add_n(), creative use of
				60	cc -D or #define is suggested, so one or both can be renamed and linked
				61	into the same program. It'll be much easier to compare them side by side
				62	than with separate programs for each.
				63
				64	common.c has notes on writing speed measuring routines.
				65
				66	Remember to link against tune/libspeed.la (or tune/.libs/libspeed.a if
				67	not using libtool) to get common.o and other objects needed by speed.c. */
				68
				69
				70	#define SPEED_EXTRA_PROTOS \
				71	double speed_mean_calls (struct speed_params *s); \
				72	double speed_mean_open (struct speed_params *s); \
				73	double speed_mean_open2 (struct speed_params *s);
				74
				75	#define SPEED_EXTRA_ROUTINES \
				76	{ "mean_calls", speed_mean_calls }, \
				77	{ "mean_open", speed_mean_open }, \
				78	{ "mean_open2", speed_mean_open2 },
				79
				80	#include "speed.c"
				81
				82
				83	/* A straightforward implementation calling mpn subroutines.
				84
				85	wp,size is set to (xp,size + yp,size) / 2. The return value is the
				86	remainder from the division. The other versions are the same. */
				87
				88	mp_limb_t
				89	mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
				90	{
				91	mp_limb_t c, ret;
				92
				93	ASSERT (size >= 1);
				94
				95	c = mpn_add_n (wp, xp, yp, size);
				96	ret = mpn_rshift (wp, wp, size, 1) >> (GMP_LIMB_BITS-1);
				97	wp[size-1] \|= (c << (GMP_LIMB_BITS-1));
				98	return ret;
				99	}
				100
				101
				102	/* An open-coded version, making one pass over the data. The right shift is
				103	done as the added limbs are produced. The addition code follows
				104	mpn/generic/add_n.c. */
				105
				106	mp_limb_t
				107	mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
				108	{
				109	mp_limb_t w, wprev, x, y, c, ret;
				110	mp_size_t i;
				111
				112	ASSERT (size >= 1);
				113
				114	x = xp[0];
				115	y = yp[0];
				116
				117	wprev = x + y;
				118	c = (wprev < x);
				119	ret = (wprev & 1);
				120
				121	#define RSHIFT(hi,lo) (((lo) >> 1) \| ((hi) << (GMP_LIMB_BITS-1)))
				122
				123	for (i = 1; i < size; i++)
				124	{
				125	x = xp[i];
				126	y = yp[i];
				127
				128	w = x + c;
				129	c = (w < x);
				130	w += y;
				131	c += (w < y);
				132
				133	wp[i-1] = RSHIFT (w, wprev);
				134	wprev = w;
				135	}
				136
				137	wp[i-1] = RSHIFT (c, wprev);
				138
				139	return ret;
				140	}
				141
				142
				143	/* Another one-pass version, but right shifting the source limbs rather than
				144	the result limbs. There's not much chance of this being better than the
				145	above, but it's an alternative at least. */
				146
				147	mp_limb_t
				148	mean_open2 (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
				149	{
				150	mp_limb_t w, x, y, xnext, ynext, c, ret;
				151	mp_size_t i;
				152
				153	ASSERT (size >= 1);
				154
				155	x = xp[0];
				156	y = yp[0];
				157
				158	/* ret is the low bit of x+y, c is the carry out of that low bit add */
				159	ret = (x ^ y) & 1;
				160	c = (x & y) & 1;
				161
				162	for (i = 0; i < size-1; i++)
				163	{
				164	xnext = xp[i+1];
				165	ynext = yp[i+1];
				166	x = RSHIFT (xnext, x);
				167	y = RSHIFT (ynext, y);
				168
				169	w = x + c;
				170	c = (w < x);
				171	w += y;
				172	c += (w < y);
				173	wp[i] = w;
				174
				175	x = xnext;
				176	y = ynext;
				177	}
				178
				179	wp[i] = (x >> 1) + (y >> 1) + c;
				180
				181	return ret;
				182	}
				183
				184
				185	/* The speed measuring routines are the same apart from which function they
				186	run, so a macro is used. Actually this macro is the same as
				187	SPEED_ROUTINE_MPN_BINARY_N. */
				188
				189	#define SPEED_ROUTINE_MEAN(mean_fun) \
				190	{ \
				191	unsigned i; \
				192	mp_ptr wp; \
				193	double t; \
				194	TMP_DECL; \
				195	\
				196	SPEED_RESTRICT_COND (s->size >= 1); \
				197	\
				198	TMP_MARK; \
				199	SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
				200	\
				201	speed_operand_src (s, s->xp, s->size); \
				202	speed_operand_src (s, s->yp, s->size); \
				203	speed_operand_dst (s, wp, s->size); \
				204	speed_cache_fill (s); \
				205	\
				206	speed_starttime (); \
				207	i = s->reps; \
				208	do \
				209	mean_fun (wp, s->xp, s->yp, s->size); \
				210	while (--i != 0); \
				211	t = speed_endtime (); \
				212	\
				213	TMP_FREE; \
				214	return t; \
				215	}
				216
				217	double
				218	speed_mean_calls (struct speed_params *s)
				219	{
				220	SPEED_ROUTINE_MEAN (mean_calls);
				221	}
				222
				223	double
				224	speed_mean_open (struct speed_params *s)
				225	{
				226	SPEED_ROUTINE_MEAN (mean_open);
				227	}
				228
				229	double
				230	speed_mean_open2 (struct speed_params *s)
				231	{
				232	SPEED_ROUTINE_MEAN (mean_open2);
				233	}