Blame - third_party/gmp/gen-psqr.c - RealtimeRoboticsGroup/test

blob: a5054c627235c95dd57517e3831b1376919ea01b [file] [log] [blame]

Austin Schuh	dace2a6	2020-08-18 10:56:48 -0700	[diff] [blame]	1	/* Generate perfect square testing data.
				2
				3	Copyright 2002-2004, 2012, 2014 Free Software Foundation, Inc.
				4
				5	This file is part of the GNU MP Library.
				6
				7	The GNU MP Library is free software; you can redistribute it and/or modify
				8	it under the terms of either:
				9
				10	* the GNU Lesser General Public License as published by the Free
				11	Software Foundation; either version 3 of the License, or (at your
				12	option) any later version.
				13
				14	or
				15
				16	* the GNU General Public License as published by the Free Software
				17	Foundation; either version 2 of the License, or (at your option) any
				18	later version.
				19
				20	or both in parallel, as here.
				21
				22	The GNU MP Library is distributed in the hope that it will be useful, but
				23	WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
				24	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				25	for more details.
				26
				27	You should have received copies of the GNU General Public License and the
				28	GNU Lesser General Public License along with the GNU MP Library. If not,
				29	see https://www.gnu.org/licenses/. */
				30
				31	#include <stdio.h>
				32	#include <stdlib.h>
				33
				34	#include "bootstrap.c"
				35
				36
				37	/* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1
				38	(plus a PERFSQR_PP modulus), and generate tables indicating quadratic
				39	residues and non-residues modulo small factors of that modulus.
				40
				41	For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used. That
				42	function exists specifically because 2^24-1 and 2^48-1 have nice sets of
				43	prime factors. For other limb sizes it's considered, but if it doesn't
				44	have good factors then mpn_mod_1 will be used instead.
				45
				46	When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a
				47	selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb,
				48	with that bit count chosen so (2GMP_LIMB_BITS)2^PERFSQR_MOD_BITS <=
				49	GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its
				50	calculation within a single limb.
				51
				52	In either case primes can be combined to make divisors. The table data
				53	then effectively indicates remainders which are quadratic residues mod
				54	all the primes. This sort of combining reduces the number of steps
				55	needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time.
				56	Nothing is gained or lost in terms of detections, the same total fraction
				57	of non-residues will be identified.
				58
				59	Nothing particularly sophisticated is attempted for combining factors to
				60	make divisors. This is probably a kind of knapsack problem so it'd be
				61	too hard to attempt anything completely general. For the usual 32 and 64
				62	bit limbs we get a good enough result just pairing the biggest and
				63	smallest which fit together, repeatedly.
				64
				65	Another aim is to get powerful combinations, ie. divisors which identify
				66	biggest fraction of non-residues, and have those run first. Again for
				67	the usual 32 and 64 bits it seems good enough just to pair for big
				68	divisors then sort according to the resulting fraction of non-residues
				69	identified.
				70
				71	Also in this program, a table sq_res_0x100 of residues modulo 256 is
				72	generated. This simply fills bits into limbs of the appropriate
				73	build-time GMP_LIMB_BITS each.
				74
				75	*/
				76
				77
				78	/* Normally we aren't using const in gen*.c programs, so as not to have to
				79	bother figuring out if it works, but using it with f_cmp_divisor and
				80	f_cmp_fraction avoids warnings from the qsort calls. */
				81
				82	/* Same tests as gmp.h. */
				83	#if defined (__STDC__) \
				84	\|\| defined (__cplusplus) \
				85	\|\| defined (_AIX) \
				86	\|\| defined (__DECC) \
				87	\|\| (defined (__mips) && defined (_SYSTYPE_SVR4)) \
				88	\|\| defined (_MSC_VER) \
				89	\|\| defined (_WIN32)
				90	#define HAVE_CONST 1
				91	#endif
				92
				93	#if ! HAVE_CONST
				94	#define const
				95	#endif
				96
				97
				98	mpz_t sq_res_0x100; / table of limbs */
				99	int nsq_res_0x100; /* elements in sq_res_0x100 array */
				100	int sq_res_0x100_num; /* squares in sq_res_0x100 */
				101	double sq_res_0x100_fraction; /* sq_res_0x100_num / 256 */
				102
				103	int mod34_bits; /* 3GMP_NUMB_BITS/4 /
				104	int mod_bits; /* bits from PERFSQR_MOD_34 or MOD_PP */
				105	int max_divisor; /* all divisors <= max_divisor */
				106	int max_divisor_bits; /* ceil(log2(max_divisor)) */
				107	double total_fraction; /* of squares */
				108	mpz_t pp; /* product of primes, or 0 if mod_34lsub1 used */
				109	mpz_t pp_norm; /* pp shifted so NUMB high bit set */
				110	mpz_t pp_inverted; /* invert_limb style inverse */
				111	mpz_t mod_mask; /* 2^mod_bits-1 */
				112	char mod34_excuse[128]; /* why mod_34lsub1 not used (if it's not) */
				113
				114	/* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */
				115	struct rawfactor_t {
				116	int divisor;
				117	int multiplicity;
				118	};
				119	struct rawfactor_t *rawfactor;
				120	int nrawfactor;
				121
				122	/* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */
				123	struct factor_t {
				124	int divisor;
				125	mpz_t inverse; /* 1/divisor mod 2^mod_bits */
				126	mpz_t mask; /* indicating squares mod divisor */
				127	double fraction; /* squares/total */
				128	};
				129	struct factor_t *factor;
				130	int nfactor; /* entries in use in factor array */
				131	int factor_alloc; /* entries allocated to factor array */
				132
				133
				134	int
				135	f_cmp_divisor (const void parg, const void qarg)
				136	{
				137	const struct factor_t p, q;
				138	p = (const struct factor_t *) parg;
				139	q = (const struct factor_t *) qarg;
				140	if (p->divisor > q->divisor)
				141	return 1;
				142	else if (p->divisor < q->divisor)
				143	return -1;
				144	else
				145	return 0;
				146	}
				147
				148	int
				149	f_cmp_fraction (const void parg, const void qarg)
				150	{
				151	const struct factor_t p, q;
				152	p = (const struct factor_t *) parg;
				153	q = (const struct factor_t *) qarg;
				154	if (p->fraction > q->fraction)
				155	return 1;
				156	else if (p->fraction < q->fraction)
				157	return -1;
				158	else
				159	return 0;
				160	}
				161
				162	/* Remove array[idx] by copying the remainder down, and adjust narray
				163	accordingly. */
				164	#define COLLAPSE_ELEMENT(array, idx, narray) \
				165	do { \
				166	memmove (&(array)[idx], \
				167	&(array)[idx+1], \
				168	((narray)-((idx)+1)) * sizeof (array[0])); \
				169	(narray)--; \
				170	} while (0)
				171
				172
				173	/* return n2^p mod m /
				174	int
				175	mul_2exp_mod (int n, int p, int m)
				176	{
				177	while (--p >= 0)
				178	n = (2 * n) % m;
				179	return n;
				180	}
				181
				182	/* return -n mod m */
				183	int
				184	neg_mod (int n, int m)
				185	{
				186	assert (n >= 0 && n < m);
				187	return (n == 0 ? 0 : m-n);
				188	}
				189
				190	/* Set "mask" to a value such that "mask & (1<<idx)" is non-zero if
				191	"-(idx<<mod_bits)" can be a square modulo m. */
				192	void
				193	square_mask (mpz_t mask, int m)
				194	{
				195	int p, i, r, idx;
				196
				197	p = mul_2exp_mod (1, mod_bits, m);
				198	p = neg_mod (p, m);
				199
				200	mpz_set_ui (mask, 0L);
				201	for (i = 0; i < m; i++)
				202	{
				203	r = (i * i) % m;
				204	idx = (r * p) % m;
				205	mpz_setbit (mask, (unsigned long) idx);
				206	}
				207	}
				208
				209	void
				210	generate_sq_res_0x100 (int limb_bits)
				211	{
				212	int i, res;
				213
				214	nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits;
				215	sq_res_0x100 = (mpz_t ) xmalloc (nsq_res_0x100 sizeof (*sq_res_0x100));
				216
				217	for (i = 0; i < nsq_res_0x100; i++)
				218	mpz_init_set_ui (sq_res_0x100[i], 0L);
				219
				220	for (i = 0; i < 0x100; i++)
				221	{
				222	res = (i * i) % 0x100;
				223	mpz_setbit (sq_res_0x100[res / limb_bits],
				224	(unsigned long) (res % limb_bits));
				225	}
				226
				227	sq_res_0x100_num = 0;
				228	for (i = 0; i < nsq_res_0x100; i++)
				229	sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]);
				230	sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0;
				231	}
				232
				233	void
				234	generate_mod (int limb_bits, int nail_bits)
				235	{
				236	int numb_bits = limb_bits - nail_bits;
				237	int i, divisor;
				238
				239	mpz_init_set_ui (pp, 0L);
				240	mpz_init_set_ui (pp_norm, 0L);
				241	mpz_init_set_ui (pp_inverted, 0L);
				242
				243	/* no more than limb_bits many factors in a one limb modulus (and of
				244	course in reality nothing like that many) */
				245	factor_alloc = limb_bits;
				246	factor = (struct factor_t ) xmalloc (factor_alloc sizeof (*factor));
				247	rawfactor = (struct rawfactor_t ) xmalloc (factor_alloc sizeof (*rawfactor));
				248
				249	if (numb_bits % 4 != 0)
				250	{
				251	strcpy (mod34_excuse, "GMP_NUMB_BITS % 4 != 0");
				252	goto use_pp;
				253	}
				254
				255	max_divisor = 2*limb_bits;
				256	max_divisor_bits = log2_ceil (max_divisor);
				257
				258	if (numb_bits / 4 < max_divisor_bits)
				259	{
				260	/* Wind back to one limb worth of max_divisor, if that will let us use
				261	mpn_mod_34lsub1. */
				262	max_divisor = limb_bits;
				263	max_divisor_bits = log2_ceil (max_divisor);
				264
				265	if (numb_bits / 4 < max_divisor_bits)
				266	{
				267	strcpy (mod34_excuse, "GMP_NUMB_BITS / 4 too small");
				268	goto use_pp;
				269	}
				270	}
				271
				272	{
				273	/* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */
				274	mpz_t m, q, r;
				275	int multiplicity;
				276
				277	mod34_bits = (numb_bits / 4) * 3;
				278
				279	/* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at
				280	the mod34_bits mark, adding the two halves for a remainder of at most
				281	mod34_bits+1 many bits */
				282	mod_bits = mod34_bits + 1;
				283
				284	mpz_init_set_ui (m, 1L);
				285	mpz_mul_2exp (m, m, mod34_bits);
				286	mpz_sub_ui (m, m, 1L);
				287
				288	mpz_init (q);
				289	mpz_init (r);
				290
				291	for (i = 3; i <= max_divisor; i+=2)
				292	{
				293	if (! isprime (i))
				294	continue;
				295
				296	mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
				297	if (mpz_sgn (r) != 0)
				298	continue;
				299
				300	/* if a repeated prime is found it's used as an i^n in one factor */
				301	divisor = 1;
				302	multiplicity = 0;
				303	do
				304	{
				305	if (divisor > max_divisor / i)
				306	break;
				307	multiplicity++;
				308	mpz_set (m, q);
				309	mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
				310	}
				311	while (mpz_sgn (r) == 0);
				312
				313	assert (nrawfactor < factor_alloc);
				314	rawfactor[nrawfactor].divisor = i;
				315	rawfactor[nrawfactor].multiplicity = multiplicity;
				316	nrawfactor++;
				317	}
				318
				319	mpz_clear (m);
				320	mpz_clear (q);
				321	mpz_clear (r);
				322	}
				323
				324	if (nrawfactor <= 2)
				325	{
				326	mpz_t new_pp;
				327
				328	sprintf (mod34_excuse, "only %d small factor%s",
				329	nrawfactor, nrawfactor == 1 ? "" : "s");
				330
				331	use_pp:
				332	/* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code
				333	tried with just one */
				334	max_divisor = 2*limb_bits;
				335	max_divisor_bits = log2_ceil (max_divisor);
				336
				337	mpz_init (new_pp);
				338	nrawfactor = 0;
				339	mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits);
				340
				341	/* one copy of each small prime */
				342	mpz_set_ui (pp, 1L);
				343	for (i = 3; i <= max_divisor; i+=2)
				344	{
				345	if (! isprime (i))
				346	continue;
				347
				348	mpz_mul_ui (new_pp, pp, (unsigned long) i);
				349	if (mpz_sizeinbase (new_pp, 2) > mod_bits)
				350	break;
				351	mpz_set (pp, new_pp);
				352
				353	assert (nrawfactor < factor_alloc);
				354	rawfactor[nrawfactor].divisor = i;
				355	rawfactor[nrawfactor].multiplicity = 1;
				356	nrawfactor++;
				357	}
				358
				359	/* Plus an extra copy of one or more of the primes selected, if that
				360	still fits in max_divisor and the total in mod_bits. Usually only
				361	3 or 5 will be candidates */
				362	for (i = nrawfactor-1; i >= 0; i--)
				363	{
				364	if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor)
				365	continue;
				366	mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor);
				367	if (mpz_sizeinbase (new_pp, 2) > mod_bits)
				368	continue;
				369	mpz_set (pp, new_pp);
				370
				371	rawfactor[i].multiplicity++;
				372	}
				373
				374	mod_bits = mpz_sizeinbase (pp, 2);
				375
				376	mpz_set (pp_norm, pp);
				377	while (mpz_sizeinbase (pp_norm, 2) < numb_bits)
				378	mpz_add (pp_norm, pp_norm, pp_norm);
				379
				380	mpz_preinv_invert (pp_inverted, pp_norm, numb_bits);
				381
				382	mpz_clear (new_pp);
				383	}
				384
				385	/* start the factor array */
				386	for (i = 0; i < nrawfactor; i++)
				387	{
				388	int j;
				389	assert (nfactor < factor_alloc);
				390	factor[nfactor].divisor = 1;
				391	for (j = 0; j < rawfactor[i].multiplicity; j++)
				392	factor[nfactor].divisor *= rawfactor[i].divisor;
				393	nfactor++;
				394	}
				395
				396	combine:
				397	/* Combine entries in the factor array. Combine the smallest entry with
				398	the biggest one that will fit with it (ie. under max_divisor), then
				399	repeat that with the new smallest entry. */
				400	qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor);
				401	for (i = nfactor-1; i >= 1; i--)
				402	{
				403	if (factor[i].divisor <= max_divisor / factor[0].divisor)
				404	{
				405	factor[0].divisor *= factor[i].divisor;
				406	COLLAPSE_ELEMENT (factor, i, nfactor);
				407	goto combine;
				408	}
				409	}
				410
				411	total_fraction = 1.0;
				412	for (i = 0; i < nfactor; i++)
				413	{
				414	mpz_init (factor[i].inverse);
				415	mpz_invert_ui_2exp (factor[i].inverse,
				416	(unsigned long) factor[i].divisor,
				417	(unsigned long) mod_bits);
				418
				419	mpz_init (factor[i].mask);
				420	square_mask (factor[i].mask, factor[i].divisor);
				421
				422	/* fraction of possible squares */
				423	factor[i].fraction = (double) mpz_popcount (factor[i].mask)
				424	/ factor[i].divisor;
				425
				426	/* total fraction of possible squares */
				427	total_fraction *= factor[i].fraction;
				428	}
				429
				430	/* best tests first (ie. smallest fraction) */
				431	qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction);
				432	}
				433
				434	void
				435	print (int limb_bits, int nail_bits)
				436	{
				437	int i;
				438	mpz_t mhi, mlo;
				439
				440	printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */\n");
				441	printf ("\n");
				442
				443	printf ("#if GMP_LIMB_BITS != %d \|\| GMP_NAIL_BITS != %d\n",
				444	limb_bits, nail_bits);
				445	printf ("Error, error, this data is for %d bit limb and %d bit nail\n",
				446	limb_bits, nail_bits);
				447	printf ("#endif\n");
				448	printf ("\n");
				449
				450	printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.\n");
				451	printf (" This test identifies %.2f%% as non-squares (%d/256). */\n",
				452	(1.0 - sq_res_0x100_fraction) * 100.0,
				453	0x100 - sq_res_0x100_num);
				454	printf ("static const mp_limb_t\n");
				455	printf ("sq_res_0x100[%d] = {\n", nsq_res_0x100);
				456	for (i = 0; i < nsq_res_0x100; i++)
				457	{
				458	printf (" CNST_LIMB(0x");
				459	mpz_out_str (stdout, 16, sq_res_0x100[i]);
				460	printf ("),\n");
				461	}
				462	printf ("};\n");
				463	printf ("\n");
				464
				465	if (mpz_sgn (pp) != 0)
				466	{
				467	printf ("/* mpn_mod_34lsub1 not used due to %s */\n", mod34_excuse);
				468	printf ("/* PERFSQR_PP = ");
				469	}
				470	else
				471	printf ("/* 2^%d-1 = ", mod34_bits);
				472	for (i = 0; i < nrawfactor; i++)
				473	{
				474	if (i != 0)
				475	printf (" * ");
				476	printf ("%d", rawfactor[i].divisor);
				477	if (rawfactor[i].multiplicity != 1)
				478	printf ("^%d", rawfactor[i].multiplicity);
				479	}
				480	printf (" %s*/\n", mpz_sgn (pp) == 0 ? "... " : "");
				481
				482	printf ("#define PERFSQR_MOD_BITS %d\n", mod_bits);
				483	if (mpz_sgn (pp) != 0)
				484	{
				485	printf ("#define PERFSQR_PP CNST_LIMB(0x");
				486	mpz_out_str (stdout, 16, pp);
				487	printf (")\n");
				488	printf ("#define PERFSQR_PP_NORM CNST_LIMB(0x");
				489	mpz_out_str (stdout, 16, pp_norm);
				490	printf (")\n");
				491	printf ("#define PERFSQR_PP_INVERTED CNST_LIMB(0x");
				492	mpz_out_str (stdout, 16, pp_inverted);
				493	printf (")\n");
				494	}
				495	printf ("\n");
				496
				497	mpz_init (mhi);
				498	mpz_init (mlo);
				499
				500	printf ("/* This test identifies %.2f%% as non-squares. */\n",
				501	(1.0 - total_fraction) * 100.0);
				502	printf ("#define PERFSQR_MOD_TEST(up, usize) \\\n");
				503	printf (" do { \\\n");
				504	printf (" mp_limb_t r; \\\n");
				505	if (mpz_sgn (pp) != 0)
				506	printf (" PERFSQR_MOD_PP (r, up, usize); \\\n");
				507	else
				508	printf (" PERFSQR_MOD_34 (r, up, usize); \\\n");
				509
				510	for (i = 0; i < nfactor; i++)
				511	{
				512	printf (" \\\n");
				513	printf (" /* %5.2f%% */ \\\n",
				514	(1.0 - factor[i].fraction) * 100.0);
				515
				516	printf (" PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x",
				517	factor[i].divisor <= limb_bits ? 1 : 2,
				518	factor[i].divisor);
				519	mpz_out_str (stdout, 16, factor[i].inverse);
				520	printf ("), \\\n");
				521	printf (" CNST_LIMB(0x");
				522
				523	if ( factor[i].divisor <= limb_bits)
				524	{
				525	mpz_out_str (stdout, 16, factor[i].mask);
				526	}
				527	else
				528	{
				529	mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits);
				530	mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits);
				531	mpz_out_str (stdout, 16, mhi);
				532	printf ("), CNST_LIMB(0x");
				533	mpz_out_str (stdout, 16, mlo);
				534	}
				535	printf (")); \\\n");
				536	}
				537
				538	printf (" } while (0)\n");
				539	printf ("\n");
				540
				541	printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\n",
				542	(1.0 - (total_fraction * 44.0/256.0)) * 100.0);
				543	printf ("\n");
				544
				545	printf ("/* helper for tests/mpz/t-perfsqr.c */\n");
				546	printf ("#define PERFSQR_DIVISORS { 256,");
				547	for (i = 0; i < nfactor; i++)
				548	printf (" %d,", factor[i].divisor);
				549	printf (" }\n");
				550
				551
				552	mpz_clear (mhi);
				553	mpz_clear (mlo);
				554	}
				555
				556	int
				557	main (int argc, char *argv[])
				558	{
				559	int limb_bits, nail_bits;
				560
				561	if (argc != 3)
				562	{
				563	fprintf (stderr, "Usage: gen-psqr <limbbits> <nailbits>\n");
				564	exit (1);
				565	}
				566
				567	limb_bits = atoi (argv[1]);
				568	nail_bits = atoi (argv[2]);
				569
				570	if (limb_bits <= 0
				571	\|\| nail_bits < 0
				572	\|\| nail_bits >= limb_bits)
				573	{
				574	fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
				575	limb_bits, nail_bits);
				576	exit (1);
				577	}
				578
				579	generate_sq_res_0x100 (limb_bits);
				580	generate_mod (limb_bits, nail_bits);
				581
				582	print (limb_bits, nail_bits);
				583
				584	return 0;
				585	}