blob: f8f4a1c0e8ede1e320fd7ab09b2c9bbcb3a47b8a [file] [log] [blame]
Austin Schuhbb1338c2024-06-15 19:31:16 -07001/* Run some tests on various mpn routines.
2
3 THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT. IT'S ALMOST CERTAIN TO
4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP.
5
6Copyright 2000-2006, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
7
8This file is part of the GNU MP Library test suite.
9
10The GNU MP Library test suite is free software; you can redistribute it
11and/or modify it under the terms of the GNU General Public License as
12published by the Free Software Foundation; either version 3 of the License,
13or (at your option) any later version.
14
15The GNU MP Library test suite is distributed in the hope that it will be
16useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
18Public License for more details.
19
20You should have received a copy of the GNU General Public License along with
21the GNU MP Library test suite. If not, see https://www.gnu.org/licenses/. */
22
23
24/* Usage: try [options] <function>...
25
26 For example, "./try mpn_add_n" to run tests of that function.
27
28 Combinations of alignments and overlaps are tested, with redzones above
29 or below the destinations, and with the sources write-protected.
30
31 The number of tests performed becomes ridiculously large with all the
32 combinations, and for that reason this can't be a part of a "make check",
33 it's meant only for development. The code isn't very pretty either.
34
35 During development it can help to disable the redzones, since seeing the
36 rest of the destination written can show where the wrong part is, or if
37 the dst pointers are off by 1 or whatever. The magic DEADVAL initial
38 fill (see below) will show locations never written.
39
40 The -s option can be used to test only certain size operands, which is
41 useful if some new code doesn't yet support say sizes less than the
42 unrolling, or whatever.
43
44 When a problem occurs it'll of course be necessary to run the program
45 under gdb to find out quite where, how and why it's going wrong. Disable
46 the spinner with the -W option when doing this, or single stepping won't
47 work. Using the "-1" option to run with simple data can be useful.
48
49 New functions to test can be added in try_array[]. If a new TYPE is
50 required then add it to the existing constants, set up its parameters in
51 param_init(), and add it to the call() function. Extra parameter fields
52 can be added if necessary, or further interpretations given to existing
53 fields.
54
55
56 Portability:
57
58 This program is not designed for use on Cray vector systems under Unicos,
59 it will fail to compile due to missing _SC_PAGE_SIZE. Those systems
60 don't really have pages or mprotect. We could arrange to run the tests
61 without the redzones, but we haven't bothered currently.
62
63
64 Enhancements:
65
66 umul_ppmm support is not very good, lots of source data is generated
67 whereas only two limbs are needed.
68
69 Make a little scheme for interpreting the "SIZE" selections uniformly.
70
71 Make tr->size==SIZE_2 work, for the benefit of find_a which wants just 2
72 source limbs. Possibly increase the default repetitions in that case.
73
74 Automatically detect gdb and disable the spinner (use -W for now).
75
76 Make a way to re-run a failing case in the debugger. Have an option to
77 snapshot each test case before it's run so the data is available if a
78 segv occurs. (This should be more reliable than the current print_all()
79 in the signal handler.)
80
81 When alignment means a dst isn't hard against the redzone, check the
82 space in between remains unchanged.
83
84 When a source overlaps a destination, don't run both s[i].high 0 and 1,
85 as s[i].high has no effect. Maybe encode s[i].high into overlap->s[i].
86
87 When partial overlaps aren't done, don't loop over source alignments
88 during overlaps.
89
90 Try to make the looping code a bit less horrible. Right now it's pretty
91 hard to see what iterations are actually done.
92
93 Perhaps specific setups and loops for each style of function under test
94 would be clearer than a parameterized general loop. There's lots of
95 stuff common to all functions, but the exceptions get messy.
96
97 When there's no overlap, run with both src>dst and src<dst. A subtle
98 calling-conventions violation occurred in a P6 copy which depended on the
99 relative location of src and dst.
100
101 multiplier_N is more or less a third source region for the addmul_N
102 routines, and could be done with the redzoned region scheme.
103
104*/
105
106
107/* always do assertion checking */
108#define WANT_ASSERT 1
109
110#include "config.h"
111
112#include <errno.h>
113#include <limits.h>
114#include <signal.h>
115#include <stdio.h>
116#include <stdlib.h>
117#include <string.h>
118#include <time.h>
119
120#if HAVE_UNISTD_H
121#include <unistd.h>
122#endif
123
124#if HAVE_SYS_MMAN_H
125#include <sys/mman.h>
126#endif
127
128#include "gmp-impl.h"
129#include "longlong.h"
130#include "tests.h"
131
132
133#if !HAVE_DECL_OPTARG
134extern char *optarg;
135extern int optind, opterr;
136#endif
137
138#if ! HAVE_DECL_SYS_NERR
139extern int sys_nerr;
140#endif
141
142#if ! HAVE_DECL_SYS_ERRLIST
143extern char *sys_errlist[];
144#endif
145
146#if ! HAVE_STRERROR
147char *
148strerror (int n)
149{
150 if (n < 0 || n >= sys_nerr)
151 return "errno out of range";
152 else
153 return sys_errlist[n];
154}
155#endif
156
157/* Rumour has it some systems lack a define of PROT_NONE. */
158#ifndef PROT_NONE
159#define PROT_NONE 0
160#endif
161
162/* Dummy defines for when mprotect doesn't exist. */
163#ifndef PROT_READ
164#define PROT_READ 0
165#endif
166#ifndef PROT_WRITE
167#define PROT_WRITE 0
168#endif
169
170/* _SC_PAGESIZE is standard, but hpux 9 and possibly other systems have
171 _SC_PAGE_SIZE instead. */
172#if defined (_SC_PAGE_SIZE) && ! defined (_SC_PAGESIZE)
173#define _SC_PAGESIZE _SC_PAGE_SIZE
174#endif
175
176
177#ifdef EXTRA_PROTOS
178EXTRA_PROTOS
179#endif
180#ifdef EXTRA_PROTOS2
181EXTRA_PROTOS2
182#endif
183
184
185#define DEFAULT_REPETITIONS 10
186
187int option_repetitions = DEFAULT_REPETITIONS;
188int option_spinner = 1;
189int option_redzones = 1;
190int option_firstsize = 0;
191int option_lastsize = 500;
192int option_firstsize2 = 0;
193
194#define ALIGNMENTS 4
195#define OVERLAPS 4
196#define CARRY_RANDOMS 5
197#define MULTIPLIER_RANDOMS 5
198#define DIVISOR_RANDOMS 5
199#define FRACTION_COUNT 4
200
201int option_print = 0;
202
203#define DATA_TRAND 0
204#define DATA_ZEROS 1
205#define DATA_SEQ 2
206#define DATA_FFS 3
207#define DATA_2FD 4
208int option_data = DATA_TRAND;
209
210
211mp_size_t pagesize;
212#define PAGESIZE_LIMBS (pagesize / GMP_LIMB_BYTES)
213
214/* must be a multiple of the page size */
215#define REDZONE_BYTES (pagesize * 16)
216#define REDZONE_LIMBS (REDZONE_BYTES / GMP_LIMB_BYTES)
217
218
219#define MAX3(x,y,z) (MAX (x, MAX (y, z)))
220
221#if GMP_LIMB_BITS == 32
222#define DEADVAL CNST_LIMB(0xDEADBEEF)
223#else
224#define DEADVAL CNST_LIMB(0xDEADBEEFBADDCAFE)
225#endif
226
227
228struct region_t {
229 mp_ptr ptr;
230 mp_size_t size;
231};
232
233
234#define TRAP_NOWHERE 0
235#define TRAP_REF 1
236#define TRAP_FUN 2
237#define TRAP_SETUPS 3
238int trap_location = TRAP_NOWHERE;
239
240
241#define NUM_SOURCES 5
242#define NUM_DESTS 2
243
244struct source_t {
245 struct region_t region;
246 int high;
247 mp_size_t align;
248 mp_ptr p;
249};
250
251struct source_t s[NUM_SOURCES];
252
253struct dest_t {
254 int high;
255 mp_size_t align;
256 mp_size_t size;
257};
258
259struct dest_t d[NUM_DESTS];
260
261struct source_each_t {
262 mp_ptr p;
263};
264
265struct dest_each_t {
266 struct region_t region;
267 mp_ptr p;
268};
269
270mp_size_t size;
271mp_size_t size2;
272unsigned long shift;
273mp_limb_t carry;
274mp_limb_t divisor;
275mp_limb_t multiplier;
276mp_limb_t multiplier_N[8];
277
278struct each_t {
279 const char *name;
280 struct dest_each_t d[NUM_DESTS];
281 struct source_each_t s[NUM_SOURCES];
282 mp_limb_t retval;
283};
284
285struct each_t ref = { "Ref" };
286struct each_t fun = { "Fun" };
287
288#define SRC_SIZE(n) ((n) == 1 && tr->size2 ? size2 : size)
289
290void validate_fail (void);
291
292
293#if HAVE_TRY_NEW_C
294#include "try-new.c"
295#endif
296
297
298typedef mp_limb_t (*tryfun_t) (ANYARGS);
299
300struct try_t {
301 char retval;
302
303 char src[NUM_SOURCES];
304 char dst[NUM_DESTS];
305
306#define SIZE_YES 1
307#define SIZE_ALLOW_ZERO 2
308#define SIZE_1 3 /* 1 limb */
309#define SIZE_2 4 /* 2 limbs */
310#define SIZE_3 5 /* 3 limbs */
311#define SIZE_4 6 /* 4 limbs */
312#define SIZE_6 7 /* 6 limbs */
313#define SIZE_FRACTION 8 /* size2 is fraction for divrem etc */
314#define SIZE_SIZE2 9
315#define SIZE_PLUS_1 10
316#define SIZE_SUM 11
317#define SIZE_DIFF 12
318#define SIZE_DIFF_PLUS_1 13
319#define SIZE_DIFF_PLUS_3 14
320#define SIZE_RETVAL 15
321#define SIZE_CEIL_HALF 16
322#define SIZE_GET_STR 17
323#define SIZE_PLUS_MSIZE_SUB_1 18 /* size+msize-1 */
324#define SIZE_ODD 19
325 char size;
326 char size2;
327 char dst_size[NUM_DESTS];
328
329 /* multiplier_N size in limbs */
330 mp_size_t msize;
331
332 char dst_bytes[NUM_DESTS];
333
334 char dst0_from_src1;
335
336#define CARRY_BIT 1 /* single bit 0 or 1 */
337#define CARRY_3 2 /* 0, 1, 2 */
338#define CARRY_4 3 /* 0 to 3 */
339#define CARRY_LIMB 4 /* any limb value */
340#define CARRY_DIVISOR 5 /* carry<divisor */
341 char carry;
342
343 /* a fudge to tell the output when to print negatives */
344 char carry_sign;
345
346 char multiplier;
347 char shift;
348
349#define DIVISOR_LIMB 1
350#define DIVISOR_NORM 2
351#define DIVISOR_ODD 3
352 char divisor;
353
354#define DATA_NON_ZERO 1
355#define DATA_GCD 2
356#define DATA_SRC0_ODD 3
357#define DATA_SRC0_HIGHBIT 4
358#define DATA_SRC1_ODD 5
359#define DATA_SRC1_ODD_PRIME 6
360#define DATA_SRC1_HIGHBIT 7
361#define DATA_MULTIPLE_DIVISOR 8
362#define DATA_UDIV_QRNND 9
363#define DATA_DIV_QR_1 10
364 char data;
365
366/* Default is allow full overlap. */
367#define OVERLAP_NONE 1
368#define OVERLAP_LOW_TO_HIGH 2
369#define OVERLAP_HIGH_TO_LOW 3
370#define OVERLAP_NOT_SRCS 4
371#define OVERLAP_NOT_SRC2 8
372#define OVERLAP_NOT_DST2 16
373 char overlap;
374
375 tryfun_t reference;
376 const char *reference_name;
377
378 void (*validate) (void);
379 const char *validate_name;
380};
381
382struct try_t *tr;
383
384
385void
386validate_mod_34lsub1 (void)
387{
388#define CNST_34LSUB1 ((CNST_LIMB(1) << (3 * (GMP_NUMB_BITS / 4))) - 1)
389
390 mp_srcptr ptr = s[0].p;
391 int error = 0;
392 mp_limb_t got, got_mod, want, want_mod;
393
394 ASSERT (size >= 1);
395
396 got = fun.retval;
397 got_mod = got % CNST_34LSUB1;
398
399 want = refmpn_mod_34lsub1 (ptr, size);
400 want_mod = want % CNST_34LSUB1;
401
402 if (got_mod != want_mod)
403 {
404 gmp_printf ("got 0x%MX reduced from 0x%MX\n", got_mod, got);
405 gmp_printf ("want 0x%MX reduced from 0x%MX\n", want_mod, want);
406 error = 1;
407 }
408
409 if (error)
410 validate_fail ();
411}
412
413void
414validate_divexact_1 (void)
415{
416 mp_srcptr src = s[0].p;
417 mp_srcptr dst = fun.d[0].p;
418 int error = 0;
419
420 ASSERT (size >= 1);
421
422 {
423 mp_ptr tp = refmpn_malloc_limbs (size);
424 mp_limb_t rem;
425
426 rem = refmpn_divrem_1 (tp, 0, src, size, divisor);
427 if (rem != 0)
428 {
429 gmp_printf ("Remainder a%%d == 0x%MX, mpn_divexact_1 undefined\n", rem);
430 error = 1;
431 }
432 if (! refmpn_equal_anynail (tp, dst, size))
433 {
434 printf ("Quotient a/d wrong\n");
435 mpn_trace ("fun ", dst, size);
436 mpn_trace ("want", tp, size);
437 error = 1;
438 }
439 free (tp);
440 }
441
442 if (error)
443 validate_fail ();
444}
445
446void
447validate_bdiv_q_1
448 (void)
449{
450 mp_srcptr src = s[0].p;
451 mp_srcptr dst = fun.d[0].p;
452 int error = 0;
453
454 ASSERT (size >= 1);
455
456 {
457 mp_ptr tp = refmpn_malloc_limbs (size + 1);
458
459 refmpn_mul_1 (tp, dst, size, divisor);
460 /* Set ignored low bits */
461 tp[0] |= (src[0] & LOW_ZEROS_MASK (divisor));
462 if (! refmpn_equal_anynail (tp, src, size))
463 {
464 printf ("Bdiv wrong: res * divisor != src (mod B^size)\n");
465 mpn_trace ("res ", dst, size);
466 mpn_trace ("src ", src, size);
467 error = 1;
468 }
469 free (tp);
470 }
471
472 if (error)
473 validate_fail ();
474}
475
476
477void
478validate_modexact_1c_odd (void)
479{
480 mp_srcptr ptr = s[0].p;
481 mp_limb_t r = fun.retval;
482 int error = 0;
483
484 ASSERT (size >= 1);
485 ASSERT (divisor & 1);
486
487 if ((r & GMP_NAIL_MASK) != 0)
488 printf ("r has non-zero nail\n");
489
490 if (carry < divisor)
491 {
492 if (! (r < divisor))
493 {
494 printf ("Don't have r < divisor\n");
495 error = 1;
496 }
497 }
498 else /* carry >= divisor */
499 {
500 if (! (r <= divisor))
501 {
502 printf ("Don't have r <= divisor\n");
503 error = 1;
504 }
505 }
506
507 {
508 mp_limb_t c = carry % divisor;
509 mp_ptr tp = refmpn_malloc_limbs (size+1);
510 mp_size_t k;
511
512 for (k = size-1; k <= size; k++)
513 {
514 /* set {tp,size+1} to r*b^k + a - c */
515 refmpn_copyi (tp, ptr, size);
516 tp[size] = 0;
517 ASSERT_NOCARRY (refmpn_add_1 (tp+k, tp+k, size+1-k, r));
518 if (refmpn_sub_1 (tp, tp, size+1, c))
519 ASSERT_CARRY (mpn_add_1 (tp, tp, size+1, divisor));
520
521 if (refmpn_mod_1 (tp, size+1, divisor) == 0)
522 goto good_remainder;
523 }
524 printf ("Remainder matches neither r*b^(size-1) nor r*b^size\n");
525 error = 1;
526
527 good_remainder:
528 free (tp);
529 }
530
531 if (error)
532 validate_fail ();
533}
534
535void
536validate_modexact_1_odd (void)
537{
538 carry = 0;
539 validate_modexact_1c_odd ();
540}
541
542void
543validate_div_qr_1_pi1 (void)
544{
545 mp_srcptr up = ref.s[0].p;
546 mp_size_t un = size;
547 mp_size_t uh = ref.s[1].p[0];
548 mp_srcptr qp = fun.d[0].p;
549 mp_limb_t r = fun.retval;
550 mp_limb_t cy;
551 int cmp;
552 mp_ptr tp;
553 if (r >= divisor)
554 {
555 gmp_printf ("Bad remainder %Md, d = %Md\n", r, divisor);
556 validate_fail ();
557 }
558 tp = refmpn_malloc_limbs (un);
559 cy = refmpn_mul_1 (tp, qp, un, divisor);
560 cy += refmpn_add_1 (tp, tp, un, r);
561 if (cy != uh || refmpn_cmp (tp, up, un) != 0)
562 {
563 gmp_printf ("Incorrect result, size %ld.\n"
564 "d = %Mx, u = %Mx, %Nx\n"
565 "got: r = %Mx, q = %Nx\n"
566 "q d + r = %Mx, %Nx",
567 (long) un,
568 divisor, uh, up, un,
569 r, qp, un,
570 cy, tp, un);
571 validate_fail ();
572 }
573 free (tp);
574}
575
576
577void
578validate_sqrtrem (void)
579{
580 mp_srcptr orig_ptr = s[0].p;
581 mp_size_t orig_size = size;
582 mp_size_t root_size = (size+1)/2;
583 mp_srcptr root_ptr = fun.d[0].p;
584 mp_size_t rem_size = fun.retval;
585 mp_srcptr rem_ptr = fun.d[1].p;
586 mp_size_t prod_size = 2*root_size;
587 mp_ptr p;
588 int error = 0;
589
590 if (rem_size < 0 || rem_size > size)
591 {
592 printf ("Bad remainder size retval %ld\n", (long) rem_size);
593 validate_fail ();
594 }
595
596 p = refmpn_malloc_limbs (prod_size);
597
598 p[root_size] = refmpn_lshift (p, root_ptr, root_size, 1);
599 if (refmpn_cmp_twosizes (p,root_size+1, rem_ptr,rem_size) < 0)
600 {
601 printf ("Remainder bigger than 2*root\n");
602 error = 1;
603 }
604
605 refmpn_sqr (p, root_ptr, root_size);
606 if (rem_size != 0)
607 refmpn_add (p, p, prod_size, rem_ptr, rem_size);
608 if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != 0)
609 {
610 printf ("root^2+rem != original\n");
611 mpn_trace ("prod", p, prod_size);
612 error = 1;
613 }
614 free (p);
615
616 if (error)
617 validate_fail ();
618}
619
620void
621validate_sqrt (void)
622{
623 mp_srcptr orig_ptr = s[0].p;
624 mp_size_t orig_size = size;
625 mp_size_t root_size = (size+1)/2;
626 mp_srcptr root_ptr = fun.d[0].p;
627 int perf_pow = (fun.retval == 0);
628 mp_size_t prod_size = 2*root_size;
629 mp_ptr p;
630 int error = 0;
631
632 p = refmpn_malloc_limbs (prod_size);
633
634 refmpn_sqr (p, root_ptr, root_size);
635 MPN_NORMALIZE (p, prod_size);
636 if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != - !perf_pow)
637 {
638 printf ("root^2 bigger than original, or wrong return value.\n");
639 mpn_trace ("prod...", p, prod_size);
640 error = 1;
641 }
642
643 refmpn_sub (p, orig_ptr,orig_size, p,prod_size);
644 MPN_NORMALIZE (p, prod_size);
645 if (prod_size >= root_size &&
646 refmpn_sub (p, p,prod_size, root_ptr, root_size) == 0 &&
647 refmpn_cmp_twosizes (p, prod_size, root_ptr, root_size) > 0)
648 {
649 printf ("(root+1)^2 smaller than original.\n");
650 mpn_trace ("prod", p, prod_size);
651 error = 1;
652 }
653 free (p);
654
655 if (error)
656 validate_fail ();
657}
658
659
660/* These types are indexes into the param[] array and are arbitrary so long
661 as they're all distinct and within the size of param[]. Renumber
662 whenever necessary or desired. */
663
664enum {
665 TYPE_ADD = 1, TYPE_ADD_N, TYPE_ADD_NC, TYPE_SUB, TYPE_SUB_N, TYPE_SUB_NC,
666
667 TYPE_ADD_ERR1_N, TYPE_ADD_ERR2_N, TYPE_ADD_ERR3_N,
668 TYPE_SUB_ERR1_N, TYPE_SUB_ERR2_N, TYPE_SUB_ERR3_N,
669
670 TYPE_MUL_1, TYPE_MUL_1C,
671
672 TYPE_MUL_2, TYPE_MUL_3, TYPE_MUL_4, TYPE_MUL_5, TYPE_MUL_6,
673
674 TYPE_ADDMUL_1, TYPE_ADDMUL_1C, TYPE_SUBMUL_1, TYPE_SUBMUL_1C,
675
676 TYPE_ADDMUL_2, TYPE_ADDMUL_3, TYPE_ADDMUL_4, TYPE_ADDMUL_5, TYPE_ADDMUL_6,
677 TYPE_ADDMUL_7, TYPE_ADDMUL_8,
678
679 TYPE_ADDSUB_N, TYPE_ADDSUB_NC,
680
681 TYPE_RSHIFT, TYPE_LSHIFT, TYPE_LSHIFTC,
682
683 TYPE_COPY, TYPE_COPYI, TYPE_COPYD, TYPE_COM,
684
685 TYPE_ADDLSH1_N, TYPE_ADDLSH2_N, TYPE_ADDLSH_N,
686 TYPE_ADDLSH1_N_IP1, TYPE_ADDLSH2_N_IP1, TYPE_ADDLSH_N_IP1,
687 TYPE_ADDLSH1_N_IP2, TYPE_ADDLSH2_N_IP2, TYPE_ADDLSH_N_IP2,
688 TYPE_SUBLSH1_N, TYPE_SUBLSH2_N, TYPE_SUBLSH_N,
689 TYPE_SUBLSH1_N_IP1, TYPE_SUBLSH2_N_IP1, TYPE_SUBLSH_N_IP1,
690 TYPE_RSBLSH1_N, TYPE_RSBLSH2_N, TYPE_RSBLSH_N,
691 TYPE_RSH1ADD_N, TYPE_RSH1SUB_N,
692
693 TYPE_ADDLSH1_NC, TYPE_ADDLSH2_NC, TYPE_ADDLSH_NC,
694 TYPE_SUBLSH1_NC, TYPE_SUBLSH2_NC, TYPE_SUBLSH_NC,
695 TYPE_RSBLSH1_NC, TYPE_RSBLSH2_NC, TYPE_RSBLSH_NC,
696
697 TYPE_ADDCND_N, TYPE_SUBCND_N,
698
699 TYPE_MOD_1, TYPE_MOD_1C, TYPE_DIVMOD_1, TYPE_DIVMOD_1C, TYPE_DIVREM_1,
700 TYPE_DIVREM_1C, TYPE_PREINV_DIVREM_1, TYPE_DIVREM_2, TYPE_PREINV_MOD_1,
701 TYPE_DIV_QR_1N_PI1,
702 TYPE_MOD_34LSUB1, TYPE_UDIV_QRNND, TYPE_UDIV_QRNND_R,
703
704 TYPE_DIVEXACT_1, TYPE_BDIV_Q_1, TYPE_DIVEXACT_BY3, TYPE_DIVEXACT_BY3C,
705 TYPE_MODEXACT_1_ODD, TYPE_MODEXACT_1C_ODD,
706
707 TYPE_INVERT, TYPE_BINVERT,
708
709 TYPE_GCD, TYPE_GCD_1, TYPE_GCD_FINDA, TYPE_MPZ_JACOBI, TYPE_MPZ_KRONECKER,
710 TYPE_MPZ_KRONECKER_UI, TYPE_MPZ_KRONECKER_SI, TYPE_MPZ_UI_KRONECKER,
711 TYPE_MPZ_SI_KRONECKER, TYPE_MPZ_LEGENDRE,
712
713 TYPE_AND_N, TYPE_NAND_N, TYPE_ANDN_N, TYPE_IOR_N, TYPE_IORN_N, TYPE_NIOR_N,
714 TYPE_XOR_N, TYPE_XNOR_N,
715
716 TYPE_MUL_MN, TYPE_MUL_N, TYPE_SQR, TYPE_UMUL_PPMM, TYPE_UMUL_PPMM_R,
717 TYPE_MULLO_N, TYPE_SQRLO, TYPE_MULMID_MN, TYPE_MULMID_N,
718
719 TYPE_SBPI1_DIV_QR, TYPE_TDIV_QR,
720
721 TYPE_SQRTREM, TYPE_SQRT, TYPE_ZERO, TYPE_GET_STR, TYPE_POPCOUNT, TYPE_HAMDIST,
722
723 TYPE_EXTRA
724};
725
726struct try_t param[TYPE_EXTRA];
727
728
729void
730param_init (void)
731{
732 struct try_t *p;
733
734#define COPY(index) memcpy (p, &param[index], sizeof (*p))
735
736#define REFERENCE(fun) \
737 p->reference = (tryfun_t) fun; \
738 p->reference_name = #fun
739#define VALIDATE(fun) \
740 p->validate = fun; \
741 p->validate_name = #fun
742
743
744 p = &param[TYPE_ADD_N];
745 p->retval = 1;
746 p->dst[0] = 1;
747 p->src[0] = 1;
748 p->src[1] = 1;
749 REFERENCE (refmpn_add_n);
750
751 p = &param[TYPE_ADD_NC];
752 COPY (TYPE_ADD_N);
753 p->carry = CARRY_BIT;
754 REFERENCE (refmpn_add_nc);
755
756 p = &param[TYPE_SUB_N];
757 COPY (TYPE_ADD_N);
758 REFERENCE (refmpn_sub_n);
759
760 p = &param[TYPE_SUB_NC];
761 COPY (TYPE_ADD_NC);
762 REFERENCE (refmpn_sub_nc);
763
764 p = &param[TYPE_ADD];
765 COPY (TYPE_ADD_N);
766 p->size = SIZE_ALLOW_ZERO;
767 p->size2 = 1;
768 REFERENCE (refmpn_add);
769
770 p = &param[TYPE_SUB];
771 COPY (TYPE_ADD);
772 REFERENCE (refmpn_sub);
773
774
775 p = &param[TYPE_ADD_ERR1_N];
776 p->retval = 1;
777 p->dst[0] = 1;
778 p->dst[1] = 1;
779 p->src[0] = 1;
780 p->src[1] = 1;
781 p->src[2] = 1;
782 p->dst_size[1] = SIZE_2;
783 p->carry = CARRY_BIT;
784 p->overlap = OVERLAP_NOT_DST2;
785 REFERENCE (refmpn_add_err1_n);
786
787 p = &param[TYPE_SUB_ERR1_N];
788 COPY (TYPE_ADD_ERR1_N);
789 REFERENCE (refmpn_sub_err1_n);
790
791 p = &param[TYPE_ADD_ERR2_N];
792 COPY (TYPE_ADD_ERR1_N);
793 p->src[3] = 1;
794 p->dst_size[1] = SIZE_4;
795 REFERENCE (refmpn_add_err2_n);
796
797 p = &param[TYPE_SUB_ERR2_N];
798 COPY (TYPE_ADD_ERR2_N);
799 REFERENCE (refmpn_sub_err2_n);
800
801 p = &param[TYPE_ADD_ERR3_N];
802 COPY (TYPE_ADD_ERR2_N);
803 p->src[4] = 1;
804 p->dst_size[1] = SIZE_6;
805 REFERENCE (refmpn_add_err3_n);
806
807 p = &param[TYPE_SUB_ERR3_N];
808 COPY (TYPE_ADD_ERR3_N);
809 REFERENCE (refmpn_sub_err3_n);
810
811 p = &param[TYPE_ADDCND_N];
812 COPY (TYPE_ADD_N);
813 p->carry = CARRY_BIT;
814 REFERENCE (refmpn_cnd_add_n);
815
816 p = &param[TYPE_SUBCND_N];
817 COPY (TYPE_ADD_N);
818 p->carry = CARRY_BIT;
819 REFERENCE (refmpn_cnd_sub_n);
820
821
822 p = &param[TYPE_MUL_1];
823 p->retval = 1;
824 p->dst[0] = 1;
825 p->src[0] = 1;
826 p->multiplier = 1;
827 p->overlap = OVERLAP_LOW_TO_HIGH;
828 REFERENCE (refmpn_mul_1);
829
830 p = &param[TYPE_MUL_1C];
831 COPY (TYPE_MUL_1);
832 p->carry = CARRY_LIMB;
833 REFERENCE (refmpn_mul_1c);
834
835
836 p = &param[TYPE_MUL_2];
837 p->retval = 1;
838 p->dst[0] = 1;
839 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
840 p->src[0] = 1;
841 p->src[1] = 1;
842 p->msize = 2;
843 p->overlap = OVERLAP_NOT_SRC2;
844 REFERENCE (refmpn_mul_2);
845
846 p = &param[TYPE_MUL_3];
847 COPY (TYPE_MUL_2);
848 p->msize = 3;
849 REFERENCE (refmpn_mul_3);
850
851 p = &param[TYPE_MUL_4];
852 COPY (TYPE_MUL_2);
853 p->msize = 4;
854 REFERENCE (refmpn_mul_4);
855
856 p = &param[TYPE_MUL_5];
857 COPY (TYPE_MUL_2);
858 p->msize = 5;
859 REFERENCE (refmpn_mul_5);
860
861 p = &param[TYPE_MUL_6];
862 COPY (TYPE_MUL_2);
863 p->msize = 6;
864 REFERENCE (refmpn_mul_6);
865
866
867 p = &param[TYPE_ADDMUL_1];
868 p->retval = 1;
869 p->dst[0] = 1;
870 p->src[0] = 1;
871 p->multiplier = 1;
872 p->dst0_from_src1 = 1;
873 REFERENCE (refmpn_addmul_1);
874
875 p = &param[TYPE_ADDMUL_1C];
876 COPY (TYPE_ADDMUL_1);
877 p->carry = CARRY_LIMB;
878 REFERENCE (refmpn_addmul_1c);
879
880 p = &param[TYPE_SUBMUL_1];
881 COPY (TYPE_ADDMUL_1);
882 REFERENCE (refmpn_submul_1);
883
884 p = &param[TYPE_SUBMUL_1C];
885 COPY (TYPE_ADDMUL_1C);
886 REFERENCE (refmpn_submul_1c);
887
888
889 p = &param[TYPE_ADDMUL_2];
890 p->retval = 1;
891 p->dst[0] = 1;
892 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
893 p->src[0] = 1;
894 p->src[1] = 1;
895 p->msize = 2;
896 p->dst0_from_src1 = 1;
897 p->overlap = OVERLAP_NONE;
898 REFERENCE (refmpn_addmul_2);
899
900 p = &param[TYPE_ADDMUL_3];
901 COPY (TYPE_ADDMUL_2);
902 p->msize = 3;
903 REFERENCE (refmpn_addmul_3);
904
905 p = &param[TYPE_ADDMUL_4];
906 COPY (TYPE_ADDMUL_2);
907 p->msize = 4;
908 REFERENCE (refmpn_addmul_4);
909
910 p = &param[TYPE_ADDMUL_5];
911 COPY (TYPE_ADDMUL_2);
912 p->msize = 5;
913 REFERENCE (refmpn_addmul_5);
914
915 p = &param[TYPE_ADDMUL_6];
916 COPY (TYPE_ADDMUL_2);
917 p->msize = 6;
918 REFERENCE (refmpn_addmul_6);
919
920 p = &param[TYPE_ADDMUL_7];
921 COPY (TYPE_ADDMUL_2);
922 p->msize = 7;
923 REFERENCE (refmpn_addmul_7);
924
925 p = &param[TYPE_ADDMUL_8];
926 COPY (TYPE_ADDMUL_2);
927 p->msize = 8;
928 REFERENCE (refmpn_addmul_8);
929
930
931 p = &param[TYPE_AND_N];
932 p->dst[0] = 1;
933 p->src[0] = 1;
934 p->src[1] = 1;
935 REFERENCE (refmpn_and_n);
936
937 p = &param[TYPE_ANDN_N];
938 COPY (TYPE_AND_N);
939 REFERENCE (refmpn_andn_n);
940
941 p = &param[TYPE_NAND_N];
942 COPY (TYPE_AND_N);
943 REFERENCE (refmpn_nand_n);
944
945 p = &param[TYPE_IOR_N];
946 COPY (TYPE_AND_N);
947 REFERENCE (refmpn_ior_n);
948
949 p = &param[TYPE_IORN_N];
950 COPY (TYPE_AND_N);
951 REFERENCE (refmpn_iorn_n);
952
953 p = &param[TYPE_NIOR_N];
954 COPY (TYPE_AND_N);
955 REFERENCE (refmpn_nior_n);
956
957 p = &param[TYPE_XOR_N];
958 COPY (TYPE_AND_N);
959 REFERENCE (refmpn_xor_n);
960
961 p = &param[TYPE_XNOR_N];
962 COPY (TYPE_AND_N);
963 REFERENCE (refmpn_xnor_n);
964
965
966 p = &param[TYPE_ADDSUB_N];
967 p->retval = 1;
968 p->dst[0] = 1;
969 p->dst[1] = 1;
970 p->src[0] = 1;
971 p->src[1] = 1;
972 REFERENCE (refmpn_add_n_sub_n);
973
974 p = &param[TYPE_ADDSUB_NC];
975 COPY (TYPE_ADDSUB_N);
976 p->carry = CARRY_4;
977 REFERENCE (refmpn_add_n_sub_nc);
978
979
980 p = &param[TYPE_COPY];
981 p->dst[0] = 1;
982 p->src[0] = 1;
983 p->overlap = OVERLAP_NONE;
984 p->size = SIZE_ALLOW_ZERO;
985 REFERENCE (refmpn_copy);
986
987 p = &param[TYPE_COPYI];
988 p->dst[0] = 1;
989 p->src[0] = 1;
990 p->overlap = OVERLAP_LOW_TO_HIGH;
991 p->size = SIZE_ALLOW_ZERO;
992 REFERENCE (refmpn_copyi);
993
994 p = &param[TYPE_COPYD];
995 p->dst[0] = 1;
996 p->src[0] = 1;
997 p->overlap = OVERLAP_HIGH_TO_LOW;
998 p->size = SIZE_ALLOW_ZERO;
999 REFERENCE (refmpn_copyd);
1000
1001 p = &param[TYPE_COM];
1002 p->dst[0] = 1;
1003 p->src[0] = 1;
1004 REFERENCE (refmpn_com);
1005
1006
1007 p = &param[TYPE_ADDLSH1_N];
1008 COPY (TYPE_ADD_N);
1009 REFERENCE (refmpn_addlsh1_n);
1010
1011 p = &param[TYPE_ADDLSH2_N];
1012 COPY (TYPE_ADD_N);
1013 REFERENCE (refmpn_addlsh2_n);
1014
1015 p = &param[TYPE_ADDLSH_N];
1016 COPY (TYPE_ADD_N);
1017 p->shift = 1;
1018 REFERENCE (refmpn_addlsh_n);
1019
1020 p = &param[TYPE_ADDLSH1_N_IP1];
1021 p->retval = 1;
1022 p->dst[0] = 1;
1023 p->src[0] = 1;
1024 p->dst0_from_src1 = 1;
1025 REFERENCE (refmpn_addlsh1_n_ip1);
1026
1027 p = &param[TYPE_ADDLSH2_N_IP1];
1028 COPY (TYPE_ADDLSH1_N_IP1);
1029 REFERENCE (refmpn_addlsh2_n_ip1);
1030
1031 p = &param[TYPE_ADDLSH_N_IP1];
1032 COPY (TYPE_ADDLSH1_N_IP1);
1033 p->shift = 1;
1034 REFERENCE (refmpn_addlsh_n_ip1);
1035
1036 p = &param[TYPE_ADDLSH1_N_IP2];
1037 COPY (TYPE_ADDLSH1_N_IP1);
1038 REFERENCE (refmpn_addlsh1_n_ip2);
1039
1040 p = &param[TYPE_ADDLSH2_N_IP2];
1041 COPY (TYPE_ADDLSH1_N_IP1);
1042 REFERENCE (refmpn_addlsh2_n_ip2);
1043
1044 p = &param[TYPE_ADDLSH_N_IP2];
1045 COPY (TYPE_ADDLSH_N_IP1);
1046 REFERENCE (refmpn_addlsh_n_ip2);
1047
1048 p = &param[TYPE_SUBLSH1_N];
1049 COPY (TYPE_ADD_N);
1050 REFERENCE (refmpn_sublsh1_n);
1051
1052 p = &param[TYPE_SUBLSH2_N];
1053 COPY (TYPE_ADD_N);
1054 REFERENCE (refmpn_sublsh2_n);
1055
1056 p = &param[TYPE_SUBLSH_N];
1057 COPY (TYPE_ADDLSH_N);
1058 REFERENCE (refmpn_sublsh_n);
1059
1060 p = &param[TYPE_SUBLSH1_N_IP1];
1061 COPY (TYPE_ADDLSH1_N_IP1);
1062 REFERENCE (refmpn_sublsh1_n_ip1);
1063
1064 p = &param[TYPE_SUBLSH2_N_IP1];
1065 COPY (TYPE_ADDLSH1_N_IP1);
1066 REFERENCE (refmpn_sublsh2_n_ip1);
1067
1068 p = &param[TYPE_SUBLSH_N_IP1];
1069 COPY (TYPE_ADDLSH_N_IP1);
1070 REFERENCE (refmpn_sublsh_n_ip1);
1071
1072 p = &param[TYPE_RSBLSH1_N];
1073 COPY (TYPE_ADD_N);
1074 REFERENCE (refmpn_rsblsh1_n);
1075
1076 p = &param[TYPE_RSBLSH2_N];
1077 COPY (TYPE_ADD_N);
1078 REFERENCE (refmpn_rsblsh2_n);
1079
1080 p = &param[TYPE_RSBLSH_N];
1081 COPY (TYPE_ADDLSH_N);
1082 REFERENCE (refmpn_rsblsh_n);
1083
1084 p = &param[TYPE_RSH1ADD_N];
1085 COPY (TYPE_ADD_N);
1086 REFERENCE (refmpn_rsh1add_n);
1087
1088 p = &param[TYPE_RSH1SUB_N];
1089 COPY (TYPE_ADD_N);
1090 REFERENCE (refmpn_rsh1sub_n);
1091
1092
1093 p = &param[TYPE_ADDLSH1_NC];
1094 COPY (TYPE_ADDLSH1_N);
1095 p->carry = CARRY_3;
1096 REFERENCE (refmpn_addlsh1_nc);
1097
1098 p = &param[TYPE_ADDLSH2_NC];
1099 COPY (TYPE_ADDLSH2_N);
1100 p->carry = CARRY_4; /* FIXME */
1101 REFERENCE (refmpn_addlsh2_nc);
1102
1103 p = &param[TYPE_ADDLSH_NC];
1104 COPY (TYPE_ADDLSH_N);
1105 p->carry = CARRY_BIT; /* FIXME */
1106 REFERENCE (refmpn_addlsh_nc);
1107
1108 p = &param[TYPE_SUBLSH1_NC];
1109 COPY (TYPE_ADDLSH1_NC);
1110 REFERENCE (refmpn_sublsh1_nc);
1111
1112 p = &param[TYPE_SUBLSH2_NC];
1113 COPY (TYPE_ADDLSH2_NC);
1114 REFERENCE (refmpn_sublsh2_nc);
1115
1116 p = &param[TYPE_SUBLSH_NC];
1117 COPY (TYPE_ADDLSH_NC);
1118 REFERENCE (refmpn_sublsh_nc);
1119
1120 p = &param[TYPE_RSBLSH1_NC];
1121 COPY (TYPE_RSBLSH1_N);
1122 p->carry = CARRY_BIT; /* FIXME */
1123 REFERENCE (refmpn_rsblsh1_nc);
1124
1125 p = &param[TYPE_RSBLSH2_NC];
1126 COPY (TYPE_RSBLSH2_N);
1127 p->carry = CARRY_4; /* FIXME */
1128 REFERENCE (refmpn_rsblsh2_nc);
1129
1130 p = &param[TYPE_RSBLSH_NC];
1131 COPY (TYPE_RSBLSH_N);
1132 p->carry = CARRY_BIT; /* FIXME */
1133 REFERENCE (refmpn_rsblsh_nc);
1134
1135
1136 p = &param[TYPE_MOD_1];
1137 p->retval = 1;
1138 p->src[0] = 1;
1139 p->size = SIZE_ALLOW_ZERO;
1140 p->divisor = DIVISOR_LIMB;
1141 REFERENCE (refmpn_mod_1);
1142
1143 p = &param[TYPE_MOD_1C];
1144 COPY (TYPE_MOD_1);
1145 p->carry = CARRY_DIVISOR;
1146 REFERENCE (refmpn_mod_1c);
1147
1148 p = &param[TYPE_DIVMOD_1];
1149 COPY (TYPE_MOD_1);
1150 p->dst[0] = 1;
1151 REFERENCE (refmpn_divmod_1);
1152
1153 p = &param[TYPE_DIVMOD_1C];
1154 COPY (TYPE_DIVMOD_1);
1155 p->carry = CARRY_DIVISOR;
1156 REFERENCE (refmpn_divmod_1c);
1157
1158 p = &param[TYPE_DIVREM_1];
1159 COPY (TYPE_DIVMOD_1);
1160 p->size2 = SIZE_FRACTION;
1161 p->dst_size[0] = SIZE_SUM;
1162 REFERENCE (refmpn_divrem_1);
1163
1164 p = &param[TYPE_DIVREM_1C];
1165 COPY (TYPE_DIVREM_1);
1166 p->carry = CARRY_DIVISOR;
1167 REFERENCE (refmpn_divrem_1c);
1168
1169 p = &param[TYPE_PREINV_DIVREM_1];
1170 COPY (TYPE_DIVREM_1);
1171 p->size = SIZE_YES; /* ie. no size==0 */
1172 REFERENCE (refmpn_preinv_divrem_1);
1173
1174 p = &param[TYPE_DIV_QR_1N_PI1];
1175 p->retval = 1;
1176 p->src[0] = 1;
1177 p->src[1] = 1;
1178 /* SIZE_1 not supported. Always uses low limb only. */
1179 p->size2 = 1;
1180 p->dst[0] = 1;
1181 p->divisor = DIVISOR_NORM;
1182 p->data = DATA_DIV_QR_1;
1183 VALIDATE (validate_div_qr_1_pi1);
1184
1185 p = &param[TYPE_PREINV_MOD_1];
1186 p->retval = 1;
1187 p->src[0] = 1;
1188 p->divisor = DIVISOR_NORM;
1189 REFERENCE (refmpn_preinv_mod_1);
1190
1191 p = &param[TYPE_MOD_34LSUB1];
1192 p->retval = 1;
1193 p->src[0] = 1;
1194 VALIDATE (validate_mod_34lsub1);
1195
1196 p = &param[TYPE_UDIV_QRNND];
1197 p->retval = 1;
1198 p->src[0] = 1;
1199 p->dst[0] = 1;
1200 p->dst_size[0] = SIZE_1;
1201 p->divisor = UDIV_NEEDS_NORMALIZATION ? DIVISOR_NORM : DIVISOR_LIMB;
1202 p->data = DATA_UDIV_QRNND;
1203 p->overlap = OVERLAP_NONE;
1204 REFERENCE (refmpn_udiv_qrnnd);
1205
1206 p = &param[TYPE_UDIV_QRNND_R];
1207 COPY (TYPE_UDIV_QRNND);
1208 REFERENCE (refmpn_udiv_qrnnd_r);
1209
1210
1211 p = &param[TYPE_DIVEXACT_1];
1212 p->dst[0] = 1;
1213 p->src[0] = 1;
1214 p->divisor = DIVISOR_LIMB;
1215 p->data = DATA_MULTIPLE_DIVISOR;
1216 VALIDATE (validate_divexact_1);
1217 REFERENCE (refmpn_divmod_1);
1218
1219 p = &param[TYPE_BDIV_Q_1];
1220 p->dst[0] = 1;
1221 p->src[0] = 1;
1222 p->divisor = DIVISOR_LIMB;
1223 VALIDATE (validate_bdiv_q_1);
1224
1225 p = &param[TYPE_DIVEXACT_BY3];
1226 p->retval = 1;
1227 p->dst[0] = 1;
1228 p->src[0] = 1;
1229 REFERENCE (refmpn_divexact_by3);
1230
1231 p = &param[TYPE_DIVEXACT_BY3C];
1232 COPY (TYPE_DIVEXACT_BY3);
1233 p->carry = CARRY_3;
1234 REFERENCE (refmpn_divexact_by3c);
1235
1236
1237 p = &param[TYPE_MODEXACT_1_ODD];
1238 p->retval = 1;
1239 p->src[0] = 1;
1240 p->divisor = DIVISOR_ODD;
1241 VALIDATE (validate_modexact_1_odd);
1242
1243 p = &param[TYPE_MODEXACT_1C_ODD];
1244 COPY (TYPE_MODEXACT_1_ODD);
1245 p->carry = CARRY_LIMB;
1246 VALIDATE (validate_modexact_1c_odd);
1247
1248
1249 p = &param[TYPE_GCD_1];
1250 p->retval = 1;
1251 p->src[0] = 1;
1252 p->data = DATA_NON_ZERO;
1253 p->divisor = DIVISOR_LIMB;
1254 REFERENCE (refmpn_gcd_1);
1255
1256 p = &param[TYPE_GCD];
1257 p->retval = 1;
1258 p->dst[0] = 1;
1259 p->src[0] = 1;
1260 p->src[1] = 1;
1261 p->size2 = 1;
1262 p->dst_size[0] = SIZE_RETVAL;
1263 p->overlap = OVERLAP_NOT_SRCS;
1264 p->data = DATA_GCD;
1265 REFERENCE (refmpn_gcd);
1266
1267
1268 p = &param[TYPE_MPZ_LEGENDRE];
1269 p->retval = 1;
1270 p->src[0] = 1;
1271 p->size = SIZE_ALLOW_ZERO;
1272 p->src[1] = 1;
1273 p->data = DATA_SRC1_ODD_PRIME;
1274 p->size2 = 1;
1275 p->carry = CARRY_BIT;
1276 p->carry_sign = 1;
1277 REFERENCE (refmpz_legendre);
1278
1279 p = &param[TYPE_MPZ_JACOBI];
1280 p->retval = 1;
1281 p->src[0] = 1;
1282 p->size = SIZE_ALLOW_ZERO;
1283 p->src[1] = 1;
1284 p->data = DATA_SRC1_ODD;
1285 p->size2 = 1;
1286 p->carry = CARRY_BIT;
1287 p->carry_sign = 1;
1288 REFERENCE (refmpz_jacobi);
1289
1290 p = &param[TYPE_MPZ_KRONECKER];
1291 p->retval = 1;
1292 p->src[0] = 1;
1293 p->size = SIZE_ALLOW_ZERO;
1294 p->src[1] = 1;
1295 p->data = 0;
1296 p->size2 = 1;
1297 p->carry = CARRY_4;
1298 p->carry_sign = 1;
1299 REFERENCE (refmpz_kronecker);
1300
1301
1302 p = &param[TYPE_MPZ_KRONECKER_UI];
1303 p->retval = 1;
1304 p->src[0] = 1;
1305 p->size = SIZE_ALLOW_ZERO;
1306 p->multiplier = 1;
1307 p->carry = CARRY_BIT;
1308 REFERENCE (refmpz_kronecker_ui);
1309
1310 p = &param[TYPE_MPZ_KRONECKER_SI];
1311 COPY (TYPE_MPZ_KRONECKER_UI);
1312 REFERENCE (refmpz_kronecker_si);
1313
1314 p = &param[TYPE_MPZ_UI_KRONECKER];
1315 COPY (TYPE_MPZ_KRONECKER_UI);
1316 REFERENCE (refmpz_ui_kronecker);
1317
1318 p = &param[TYPE_MPZ_SI_KRONECKER];
1319 COPY (TYPE_MPZ_KRONECKER_UI);
1320 REFERENCE (refmpz_si_kronecker);
1321
1322
1323 p = &param[TYPE_SQR];
1324 p->dst[0] = 1;
1325 p->src[0] = 1;
1326 p->dst_size[0] = SIZE_SUM;
1327 p->overlap = OVERLAP_NONE;
1328 REFERENCE (refmpn_sqr);
1329
1330 p = &param[TYPE_MUL_N];
1331 COPY (TYPE_SQR);
1332 p->src[1] = 1;
1333 REFERENCE (refmpn_mul_n);
1334
1335 p = &param[TYPE_MULLO_N];
1336 COPY (TYPE_MUL_N);
1337 p->dst_size[0] = 0;
1338 REFERENCE (refmpn_mullo_n);
1339
1340 p = &param[TYPE_SQRLO];
1341 COPY (TYPE_SQR);
1342 p->dst_size[0] = 0;
1343 REFERENCE (refmpn_sqrlo);
1344
1345 p = &param[TYPE_MUL_MN];
1346 COPY (TYPE_MUL_N);
1347 p->size2 = 1;
1348 REFERENCE (refmpn_mul_basecase);
1349
1350 p = &param[TYPE_MULMID_MN];
1351 COPY (TYPE_MUL_MN);
1352 p->dst_size[0] = SIZE_DIFF_PLUS_3;
1353 REFERENCE (refmpn_mulmid_basecase);
1354
1355 p = &param[TYPE_MULMID_N];
1356 COPY (TYPE_MUL_N);
1357 p->size = SIZE_ODD;
1358 p->size2 = SIZE_CEIL_HALF;
1359 p->dst_size[0] = SIZE_DIFF_PLUS_3;
1360 REFERENCE (refmpn_mulmid_n);
1361
1362 p = &param[TYPE_UMUL_PPMM];
1363 p->retval = 1;
1364 p->src[0] = 1;
1365 p->dst[0] = 1;
1366 p->dst_size[0] = SIZE_1;
1367 p->overlap = OVERLAP_NONE;
1368 REFERENCE (refmpn_umul_ppmm);
1369
1370 p = &param[TYPE_UMUL_PPMM_R];
1371 COPY (TYPE_UMUL_PPMM);
1372 REFERENCE (refmpn_umul_ppmm_r);
1373
1374
1375 p = &param[TYPE_RSHIFT];
1376 p->retval = 1;
1377 p->dst[0] = 1;
1378 p->src[0] = 1;
1379 p->shift = 1;
1380 p->overlap = OVERLAP_LOW_TO_HIGH;
1381 REFERENCE (refmpn_rshift);
1382
1383 p = &param[TYPE_LSHIFT];
1384 COPY (TYPE_RSHIFT);
1385 p->overlap = OVERLAP_HIGH_TO_LOW;
1386 REFERENCE (refmpn_lshift);
1387
1388 p = &param[TYPE_LSHIFTC];
1389 COPY (TYPE_RSHIFT);
1390 p->overlap = OVERLAP_HIGH_TO_LOW;
1391 REFERENCE (refmpn_lshiftc);
1392
1393
1394 p = &param[TYPE_POPCOUNT];
1395 p->retval = 1;
1396 p->src[0] = 1;
1397 REFERENCE (refmpn_popcount);
1398
1399 p = &param[TYPE_HAMDIST];
1400 COPY (TYPE_POPCOUNT);
1401 p->src[1] = 1;
1402 REFERENCE (refmpn_hamdist);
1403
1404
1405 p = &param[TYPE_SBPI1_DIV_QR];
1406 p->retval = 1;
1407 p->dst[0] = 1;
1408 p->dst[1] = 1;
1409 p->src[0] = 1;
1410 p->src[1] = 1;
1411 p->data = DATA_SRC1_HIGHBIT;
1412 p->size2 = 1;
1413 p->dst_size[0] = SIZE_DIFF;
1414 p->overlap = OVERLAP_NONE;
1415 REFERENCE (refmpn_sb_div_qr);
1416
1417 p = &param[TYPE_TDIV_QR];
1418 p->dst[0] = 1;
1419 p->dst[1] = 1;
1420 p->src[0] = 1;
1421 p->src[1] = 1;
1422 p->size2 = 1;
1423 p->dst_size[0] = SIZE_DIFF_PLUS_1;
1424 p->dst_size[1] = SIZE_SIZE2;
1425 p->overlap = OVERLAP_NONE;
1426 REFERENCE (refmpn_tdiv_qr);
1427
1428 p = &param[TYPE_SQRTREM];
1429 p->retval = 1;
1430 p->dst[0] = 1;
1431 p->dst[1] = 1;
1432 p->src[0] = 1;
1433 p->dst_size[0] = SIZE_CEIL_HALF;
1434 p->dst_size[1] = SIZE_RETVAL;
1435 p->overlap = OVERLAP_NONE;
1436 VALIDATE (validate_sqrtrem);
1437 REFERENCE (refmpn_sqrtrem);
1438
1439 p = &param[TYPE_SQRT];
1440 p->retval = 1;
1441 p->dst[0] = 1;
1442 p->dst[1] = 0;
1443 p->src[0] = 1;
1444 p->dst_size[0] = SIZE_CEIL_HALF;
1445 p->overlap = OVERLAP_NONE;
1446 VALIDATE (validate_sqrt);
1447
1448 p = &param[TYPE_ZERO];
1449 p->dst[0] = 1;
1450 p->size = SIZE_ALLOW_ZERO;
1451 REFERENCE (refmpn_zero);
1452
1453 p = &param[TYPE_GET_STR];
1454 p->retval = 1;
1455 p->src[0] = 1;
1456 p->size = SIZE_ALLOW_ZERO;
1457 p->dst[0] = 1;
1458 p->dst[1] = 1;
1459 p->dst_size[0] = SIZE_GET_STR;
1460 p->dst_bytes[0] = 1;
1461 p->overlap = OVERLAP_NONE;
1462 REFERENCE (refmpn_get_str);
1463
1464 p = &param[TYPE_BINVERT];
1465 p->dst[0] = 1;
1466 p->src[0] = 1;
1467 p->data = DATA_SRC0_ODD;
1468 p->overlap = OVERLAP_NONE;
1469 REFERENCE (refmpn_binvert);
1470
1471 p = &param[TYPE_INVERT];
1472 p->dst[0] = 1;
1473 p->src[0] = 1;
1474 p->data = DATA_SRC0_HIGHBIT;
1475 p->overlap = OVERLAP_NONE;
1476 REFERENCE (refmpn_invert);
1477
1478#ifdef EXTRA_PARAM_INIT
1479 EXTRA_PARAM_INIT
1480#endif
1481}
1482
1483
1484/* The following are macros if there's no native versions, so wrap them in
1485 functions that can be in try_array[]. */
1486
1487void
1488MPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1489{ MPN_COPY (rp, sp, size); }
1490
1491void
1492MPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1493{ MPN_COPY_INCR (rp, sp, size); }
1494
1495void
1496MPN_COPY_DECR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1497{ MPN_COPY_DECR (rp, sp, size); }
1498
1499void
1500__GMPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1501{ __GMPN_COPY (rp, sp, size); }
1502
1503#ifdef __GMPN_COPY_INCR
1504void
1505__GMPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1506{ __GMPN_COPY_INCR (rp, sp, size); }
1507#endif
1508
1509void
1510mpn_com_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1511{ mpn_com (rp, sp, size); }
1512
1513void
1514mpn_and_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1515{ mpn_and_n (rp, s1, s2, size); }
1516
1517void
1518mpn_andn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1519{ mpn_andn_n (rp, s1, s2, size); }
1520
1521void
1522mpn_nand_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1523{ mpn_nand_n (rp, s1, s2, size); }
1524
1525void
1526mpn_ior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1527{ mpn_ior_n (rp, s1, s2, size); }
1528
1529void
1530mpn_iorn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1531{ mpn_iorn_n (rp, s1, s2, size); }
1532
1533void
1534mpn_nior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1535{ mpn_nior_n (rp, s1, s2, size); }
1536
1537void
1538mpn_xor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1539{ mpn_xor_n (rp, s1, s2, size); }
1540
1541void
1542mpn_xnor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1543{ mpn_xnor_n (rp, s1, s2, size); }
1544
1545mp_limb_t
1546udiv_qrnnd_fun (mp_limb_t *remptr, mp_limb_t n1, mp_limb_t n0, mp_limb_t d)
1547{
1548 mp_limb_t q;
1549 udiv_qrnnd (q, *remptr, n1, n0, d);
1550 return q;
1551}
1552
1553mp_limb_t
1554mpn_divexact_by3_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1555{
1556 return mpn_divexact_by3 (rp, sp, size);
1557}
1558
1559#if HAVE_NATIVE_mpn_addlsh1_n_ip1
1560mp_limb_t
1561mpn_addlsh1_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1562{
1563 return mpn_addlsh1_n_ip1 (rp, sp, size);
1564}
1565#endif
1566#if HAVE_NATIVE_mpn_addlsh2_n_ip1
1567mp_limb_t
1568mpn_addlsh2_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1569{
1570 return mpn_addlsh2_n_ip1 (rp, sp, size);
1571}
1572#endif
1573#if HAVE_NATIVE_mpn_addlsh_n_ip1
1574mp_limb_t
1575mpn_addlsh_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh)
1576{
1577 return mpn_addlsh_n_ip1 (rp, sp, size, sh);
1578}
1579#endif
1580#if HAVE_NATIVE_mpn_addlsh1_n_ip2
1581mp_limb_t
1582mpn_addlsh1_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1583{
1584 return mpn_addlsh1_n_ip2 (rp, sp, size);
1585}
1586#endif
1587#if HAVE_NATIVE_mpn_addlsh2_n_ip2
1588mp_limb_t
1589mpn_addlsh2_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1590{
1591 return mpn_addlsh2_n_ip2 (rp, sp, size);
1592}
1593#endif
1594#if HAVE_NATIVE_mpn_addlsh_n_ip2
1595mp_limb_t
1596mpn_addlsh_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh)
1597{
1598 return mpn_addlsh_n_ip2 (rp, sp, size, sh);
1599}
1600#endif
1601#if HAVE_NATIVE_mpn_sublsh1_n_ip1
1602mp_limb_t
1603mpn_sublsh1_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1604{
1605 return mpn_sublsh1_n_ip1 (rp, sp, size);
1606}
1607#endif
1608#if HAVE_NATIVE_mpn_sublsh2_n_ip1
1609mp_limb_t
1610mpn_sublsh2_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1611{
1612 return mpn_sublsh2_n_ip1 (rp, sp, size);
1613}
1614#endif
1615#if HAVE_NATIVE_mpn_sublsh_n_ip1
1616mp_limb_t
1617mpn_sublsh_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh)
1618{
1619 return mpn_sublsh_n_ip1 (rp, sp, size, sh);
1620}
1621#endif
1622
1623mp_limb_t
1624mpn_modexact_1_odd_fun (mp_srcptr ptr, mp_size_t size, mp_limb_t divisor)
1625{
1626 return mpn_modexact_1_odd (ptr, size, divisor);
1627}
1628
1629void
1630mpn_toom22_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1631{
1632 mp_ptr tspace;
1633 TMP_DECL;
1634 TMP_MARK;
1635 tspace = TMP_ALLOC_LIMBS (mpn_toom22_mul_itch (size, size));
1636 mpn_toom22_mul (dst, src1, size, src2, size, tspace);
1637 TMP_FREE;
1638}
1639void
1640mpn_toom2_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1641{
1642 mp_ptr tspace;
1643 TMP_DECL;
1644 TMP_MARK;
1645 tspace = TMP_ALLOC_LIMBS (mpn_toom2_sqr_itch (size));
1646 mpn_toom2_sqr (dst, src, size, tspace);
1647 TMP_FREE;
1648}
1649void
1650mpn_toom33_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1651{
1652 mp_ptr tspace;
1653 TMP_DECL;
1654 TMP_MARK;
1655 tspace = TMP_ALLOC_LIMBS (mpn_toom33_mul_itch (size, size));
1656 mpn_toom33_mul (dst, src1, size, src2, size, tspace);
1657 TMP_FREE;
1658}
1659void
1660mpn_toom3_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1661{
1662 mp_ptr tspace;
1663 TMP_DECL;
1664 TMP_MARK;
1665 tspace = TMP_ALLOC_LIMBS (mpn_toom3_sqr_itch (size));
1666 mpn_toom3_sqr (dst, src, size, tspace);
1667 TMP_FREE;
1668}
1669void
1670mpn_toom44_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1671{
1672 mp_ptr tspace;
1673 TMP_DECL;
1674 TMP_MARK;
1675 tspace = TMP_ALLOC_LIMBS (mpn_toom44_mul_itch (size, size));
1676 mpn_toom44_mul (dst, src1, size, src2, size, tspace);
1677 TMP_FREE;
1678}
1679void
1680mpn_toom4_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1681{
1682 mp_ptr tspace;
1683 TMP_DECL;
1684 TMP_MARK;
1685 tspace = TMP_ALLOC_LIMBS (mpn_toom4_sqr_itch (size));
1686 mpn_toom4_sqr (dst, src, size, tspace);
1687 TMP_FREE;
1688}
1689
1690void
1691mpn_toom42_mulmid_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
1692 mp_size_t size)
1693{
1694 mp_ptr tspace;
1695 mp_size_t n;
1696 TMP_DECL;
1697 TMP_MARK;
1698 tspace = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (size));
1699 mpn_toom42_mulmid (dst, src1, src2, size, tspace);
1700 TMP_FREE;
1701}
1702
1703mp_limb_t
1704umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2)
1705{
1706 mp_limb_t high;
1707 umul_ppmm (high, *lowptr, m1, m2);
1708 return high;
1709}
1710
1711void
1712MPN_ZERO_fun (mp_ptr ptr, mp_size_t size)
1713{ MPN_ZERO (ptr, size); }
1714
1715mp_size_t
1716mpn_sqrt_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1717{ return mpn_sqrtrem (dst, NULL, src, size); }
1718
1719struct choice_t {
1720 const char *name;
1721 tryfun_t function;
1722 int type;
1723 mp_size_t minsize;
1724};
1725
1726#define TRY(fun) #fun, (tryfun_t) fun
1727#define TRY_FUNFUN(fun) #fun, (tryfun_t) fun##_fun
1728
1729const struct choice_t choice_array[] = {
1730 { TRY(mpn_add), TYPE_ADD },
1731 { TRY(mpn_sub), TYPE_SUB },
1732
1733 { TRY(mpn_add_n), TYPE_ADD_N },
1734 { TRY(mpn_sub_n), TYPE_SUB_N },
1735
1736#if HAVE_NATIVE_mpn_add_nc
1737 { TRY(mpn_add_nc), TYPE_ADD_NC },
1738#endif
1739#if HAVE_NATIVE_mpn_sub_nc
1740 { TRY(mpn_sub_nc), TYPE_SUB_NC },
1741#endif
1742
1743#if HAVE_NATIVE_mpn_add_n_sub_n
1744 { TRY(mpn_add_n_sub_n), TYPE_ADDSUB_N },
1745#endif
1746#if HAVE_NATIVE_mpn_add_n_sub_nc
1747 { TRY(mpn_add_n_sub_nc), TYPE_ADDSUB_NC },
1748#endif
1749
1750 { TRY(mpn_add_err1_n), TYPE_ADD_ERR1_N },
1751 { TRY(mpn_sub_err1_n), TYPE_SUB_ERR1_N },
1752 { TRY(mpn_add_err2_n), TYPE_ADD_ERR2_N },
1753 { TRY(mpn_sub_err2_n), TYPE_SUB_ERR2_N },
1754 { TRY(mpn_add_err3_n), TYPE_ADD_ERR3_N },
1755 { TRY(mpn_sub_err3_n), TYPE_SUB_ERR3_N },
1756
1757 { TRY(mpn_addmul_1), TYPE_ADDMUL_1 },
1758 { TRY(mpn_submul_1), TYPE_SUBMUL_1 },
1759#if HAVE_NATIVE_mpn_addmul_1c
1760 { TRY(mpn_addmul_1c), TYPE_ADDMUL_1C },
1761#endif
1762#if HAVE_NATIVE_mpn_submul_1c
1763 { TRY(mpn_submul_1c), TYPE_SUBMUL_1C },
1764#endif
1765
1766#if HAVE_NATIVE_mpn_addmul_2
1767 { TRY(mpn_addmul_2), TYPE_ADDMUL_2, 2 },
1768#endif
1769#if HAVE_NATIVE_mpn_addmul_3
1770 { TRY(mpn_addmul_3), TYPE_ADDMUL_3, 3 },
1771#endif
1772#if HAVE_NATIVE_mpn_addmul_4
1773 { TRY(mpn_addmul_4), TYPE_ADDMUL_4, 4 },
1774#endif
1775#if HAVE_NATIVE_mpn_addmul_5
1776 { TRY(mpn_addmul_5), TYPE_ADDMUL_5, 5 },
1777#endif
1778#if HAVE_NATIVE_mpn_addmul_6
1779 { TRY(mpn_addmul_6), TYPE_ADDMUL_6, 6 },
1780#endif
1781#if HAVE_NATIVE_mpn_addmul_7
1782 { TRY(mpn_addmul_7), TYPE_ADDMUL_7, 7 },
1783#endif
1784#if HAVE_NATIVE_mpn_addmul_8
1785 { TRY(mpn_addmul_8), TYPE_ADDMUL_8, 8 },
1786#endif
1787
1788 { TRY_FUNFUN(mpn_com), TYPE_COM },
1789
1790 { TRY_FUNFUN(MPN_COPY), TYPE_COPY },
1791 { TRY_FUNFUN(MPN_COPY_INCR), TYPE_COPYI },
1792 { TRY_FUNFUN(MPN_COPY_DECR), TYPE_COPYD },
1793
1794 { TRY_FUNFUN(__GMPN_COPY), TYPE_COPY },
1795#ifdef __GMPN_COPY_INCR
1796 { TRY_FUNFUN(__GMPN_COPY_INCR), TYPE_COPYI },
1797#endif
1798
1799#if HAVE_NATIVE_mpn_copyi
1800 { TRY(mpn_copyi), TYPE_COPYI },
1801#endif
1802#if HAVE_NATIVE_mpn_copyd
1803 { TRY(mpn_copyd), TYPE_COPYD },
1804#endif
1805
1806 { TRY(mpn_cnd_add_n), TYPE_ADDCND_N },
1807 { TRY(mpn_cnd_sub_n), TYPE_SUBCND_N },
1808#if HAVE_NATIVE_mpn_addlsh1_n == 1
1809 { TRY(mpn_addlsh1_n), TYPE_ADDLSH1_N },
1810#endif
1811#if HAVE_NATIVE_mpn_addlsh2_n == 1
1812 { TRY(mpn_addlsh2_n), TYPE_ADDLSH2_N },
1813#endif
1814#if HAVE_NATIVE_mpn_addlsh_n
1815 { TRY(mpn_addlsh_n), TYPE_ADDLSH_N },
1816#endif
1817#if HAVE_NATIVE_mpn_addlsh1_n_ip1
1818 { TRY_FUNFUN(mpn_addlsh1_n_ip1), TYPE_ADDLSH1_N_IP1 },
1819#endif
1820#if HAVE_NATIVE_mpn_addlsh2_n_ip1
1821 { TRY_FUNFUN(mpn_addlsh2_n_ip1), TYPE_ADDLSH2_N_IP1 },
1822#endif
1823#if HAVE_NATIVE_mpn_addlsh_n_ip1
1824 { TRY_FUNFUN(mpn_addlsh_n_ip1), TYPE_ADDLSH_N_IP1 },
1825#endif
1826#if HAVE_NATIVE_mpn_addlsh1_n_ip2
1827 { TRY_FUNFUN(mpn_addlsh1_n_ip2), TYPE_ADDLSH1_N_IP2 },
1828#endif
1829#if HAVE_NATIVE_mpn_addlsh2_n_ip2
1830 { TRY_FUNFUN(mpn_addlsh2_n_ip2), TYPE_ADDLSH2_N_IP2 },
1831#endif
1832#if HAVE_NATIVE_mpn_addlsh_n_ip2
1833 { TRY_FUNFUN(mpn_addlsh_n_ip2), TYPE_ADDLSH_N_IP2 },
1834#endif
1835#if HAVE_NATIVE_mpn_sublsh1_n == 1
1836 { TRY(mpn_sublsh1_n), TYPE_SUBLSH1_N },
1837#endif
1838#if HAVE_NATIVE_mpn_sublsh2_n == 1
1839 { TRY(mpn_sublsh2_n), TYPE_SUBLSH2_N },
1840#endif
1841#if HAVE_NATIVE_mpn_sublsh_n
1842 { TRY(mpn_sublsh_n), TYPE_SUBLSH_N },
1843#endif
1844#if HAVE_NATIVE_mpn_sublsh1_n_ip1
1845 { TRY_FUNFUN(mpn_sublsh1_n_ip1), TYPE_SUBLSH1_N_IP1 },
1846#endif
1847#if HAVE_NATIVE_mpn_sublsh2_n_ip1
1848 { TRY_FUNFUN(mpn_sublsh2_n_ip1), TYPE_SUBLSH2_N_IP1 },
1849#endif
1850#if HAVE_NATIVE_mpn_sublsh_n_ip1
1851 { TRY_FUNFUN(mpn_sublsh_n_ip1), TYPE_SUBLSH_N_IP1 },
1852#endif
1853#if HAVE_NATIVE_mpn_rsblsh1_n == 1
1854 { TRY(mpn_rsblsh1_n), TYPE_RSBLSH1_N },
1855#endif
1856#if HAVE_NATIVE_mpn_rsblsh2_n == 1
1857 { TRY(mpn_rsblsh2_n), TYPE_RSBLSH2_N },
1858#endif
1859#if HAVE_NATIVE_mpn_rsblsh_n
1860 { TRY(mpn_rsblsh_n), TYPE_RSBLSH_N },
1861#endif
1862#if HAVE_NATIVE_mpn_rsh1add_n
1863 { TRY(mpn_rsh1add_n), TYPE_RSH1ADD_N },
1864#endif
1865#if HAVE_NATIVE_mpn_rsh1sub_n
1866 { TRY(mpn_rsh1sub_n), TYPE_RSH1SUB_N },
1867#endif
1868
1869#if HAVE_NATIVE_mpn_addlsh1_nc == 1
1870 { TRY(mpn_addlsh1_nc), TYPE_ADDLSH1_NC },
1871#endif
1872#if HAVE_NATIVE_mpn_addlsh2_nc == 1
1873 { TRY(mpn_addlsh2_nc), TYPE_ADDLSH2_NC },
1874#endif
1875#if HAVE_NATIVE_mpn_addlsh_nc
1876 { TRY(mpn_addlsh_nc), TYPE_ADDLSH_NC },
1877#endif
1878#if HAVE_NATIVE_mpn_sublsh1_nc == 1
1879 { TRY(mpn_sublsh1_nc), TYPE_SUBLSH1_NC },
1880#endif
1881#if HAVE_NATIVE_mpn_sublsh2_nc == 1
1882 { TRY(mpn_sublsh2_nc), TYPE_SUBLSH2_NC },
1883#endif
1884#if HAVE_NATIVE_mpn_sublsh_nc
1885 { TRY(mpn_sublsh_nc), TYPE_SUBLSH_NC },
1886#endif
1887#if HAVE_NATIVE_mpn_rsblsh1_nc
1888 { TRY(mpn_rsblsh1_nc), TYPE_RSBLSH1_NC },
1889#endif
1890#if HAVE_NATIVE_mpn_rsblsh2_nc
1891 { TRY(mpn_rsblsh2_nc), TYPE_RSBLSH2_NC },
1892#endif
1893#if HAVE_NATIVE_mpn_rsblsh_nc
1894 { TRY(mpn_rsblsh_nc), TYPE_RSBLSH_NC },
1895#endif
1896
1897 { TRY_FUNFUN(mpn_and_n), TYPE_AND_N },
1898 { TRY_FUNFUN(mpn_andn_n), TYPE_ANDN_N },
1899 { TRY_FUNFUN(mpn_nand_n), TYPE_NAND_N },
1900 { TRY_FUNFUN(mpn_ior_n), TYPE_IOR_N },
1901 { TRY_FUNFUN(mpn_iorn_n), TYPE_IORN_N },
1902 { TRY_FUNFUN(mpn_nior_n), TYPE_NIOR_N },
1903 { TRY_FUNFUN(mpn_xor_n), TYPE_XOR_N },
1904 { TRY_FUNFUN(mpn_xnor_n), TYPE_XNOR_N },
1905
1906 { TRY(mpn_divrem_1), TYPE_DIVREM_1 },
1907#if USE_PREINV_DIVREM_1
1908 { TRY(mpn_preinv_divrem_1), TYPE_PREINV_DIVREM_1 },
1909#endif
1910 { TRY(mpn_mod_1), TYPE_MOD_1 },
1911#if USE_PREINV_MOD_1
1912 { TRY(mpn_preinv_mod_1), TYPE_PREINV_MOD_1 },
1913#endif
1914#if HAVE_NATIVE_mpn_divrem_1c
1915 { TRY(mpn_divrem_1c), TYPE_DIVREM_1C },
1916#endif
1917#if HAVE_NATIVE_mpn_mod_1c
1918 { TRY(mpn_mod_1c), TYPE_MOD_1C },
1919#endif
1920 { TRY(mpn_div_qr_1n_pi1), TYPE_DIV_QR_1N_PI1 },
1921#if GMP_NUMB_BITS % 4 == 0
1922 { TRY(mpn_mod_34lsub1), TYPE_MOD_34LSUB1 },
1923#endif
1924
1925 { TRY_FUNFUN(udiv_qrnnd), TYPE_UDIV_QRNND, 2 },
1926#if HAVE_NATIVE_mpn_udiv_qrnnd
1927 { TRY(mpn_udiv_qrnnd), TYPE_UDIV_QRNND, 2 },
1928#endif
1929#if HAVE_NATIVE_mpn_udiv_qrnnd_r
1930 { TRY(mpn_udiv_qrnnd_r), TYPE_UDIV_QRNND_R, 2 },
1931#endif
1932
1933 { TRY(mpn_divexact_1), TYPE_DIVEXACT_1 },
1934 { TRY(mpn_bdiv_q_1), TYPE_BDIV_Q_1 },
1935 { TRY_FUNFUN(mpn_divexact_by3), TYPE_DIVEXACT_BY3 },
1936 { TRY(mpn_divexact_by3c), TYPE_DIVEXACT_BY3C },
1937
1938 { TRY_FUNFUN(mpn_modexact_1_odd), TYPE_MODEXACT_1_ODD },
1939 { TRY(mpn_modexact_1c_odd), TYPE_MODEXACT_1C_ODD },
1940
1941
1942 { TRY(mpn_sbpi1_div_qr), TYPE_SBPI1_DIV_QR, 3},
1943 { TRY(mpn_tdiv_qr), TYPE_TDIV_QR },
1944
1945 { TRY(mpn_mul_1), TYPE_MUL_1 },
1946#if HAVE_NATIVE_mpn_mul_1c
1947 { TRY(mpn_mul_1c), TYPE_MUL_1C },
1948#endif
1949#if HAVE_NATIVE_mpn_mul_2
1950 { TRY(mpn_mul_2), TYPE_MUL_2, 2 },
1951#endif
1952#if HAVE_NATIVE_mpn_mul_3
1953 { TRY(mpn_mul_3), TYPE_MUL_3, 3 },
1954#endif
1955#if HAVE_NATIVE_mpn_mul_4
1956 { TRY(mpn_mul_4), TYPE_MUL_4, 4 },
1957#endif
1958#if HAVE_NATIVE_mpn_mul_5
1959 { TRY(mpn_mul_5), TYPE_MUL_5, 5 },
1960#endif
1961#if HAVE_NATIVE_mpn_mul_6
1962 { TRY(mpn_mul_6), TYPE_MUL_6, 6 },
1963#endif
1964
1965 { TRY(mpn_rshift), TYPE_RSHIFT },
1966 { TRY(mpn_lshift), TYPE_LSHIFT },
1967 { TRY(mpn_lshiftc), TYPE_LSHIFTC },
1968
1969
1970 { TRY(mpn_mul_basecase), TYPE_MUL_MN },
1971 { TRY(mpn_mulmid_basecase), TYPE_MULMID_MN },
1972 { TRY(mpn_mullo_basecase), TYPE_MULLO_N },
1973 { TRY(mpn_sqrlo_basecase), TYPE_SQRLO },
1974 { TRY(mpn_sqrlo), TYPE_SQRLO },
1975#if SQR_TOOM2_THRESHOLD > 0
1976 { TRY(mpn_sqr_basecase), TYPE_SQR },
1977#endif
1978
1979 { TRY(mpn_mul), TYPE_MUL_MN },
1980 { TRY(mpn_mul_n), TYPE_MUL_N },
1981 { TRY(mpn_sqr), TYPE_SQR },
1982
1983 { TRY_FUNFUN(umul_ppmm), TYPE_UMUL_PPMM, 2 },
1984#if HAVE_NATIVE_mpn_umul_ppmm
1985 { TRY(mpn_umul_ppmm), TYPE_UMUL_PPMM, 2 },
1986#endif
1987#if HAVE_NATIVE_mpn_umul_ppmm_r
1988 { TRY(mpn_umul_ppmm_r), TYPE_UMUL_PPMM_R, 2 },
1989#endif
1990
1991 { TRY_FUNFUN(mpn_toom22_mul), TYPE_MUL_N, MPN_TOOM22_MUL_MINSIZE },
1992 { TRY_FUNFUN(mpn_toom2_sqr), TYPE_SQR, MPN_TOOM2_SQR_MINSIZE },
1993 { TRY_FUNFUN(mpn_toom33_mul), TYPE_MUL_N, MPN_TOOM33_MUL_MINSIZE },
1994 { TRY_FUNFUN(mpn_toom3_sqr), TYPE_SQR, MPN_TOOM3_SQR_MINSIZE },
1995 { TRY_FUNFUN(mpn_toom44_mul), TYPE_MUL_N, MPN_TOOM44_MUL_MINSIZE },
1996 { TRY_FUNFUN(mpn_toom4_sqr), TYPE_SQR, MPN_TOOM4_SQR_MINSIZE },
1997
1998 { TRY(mpn_mulmid_n), TYPE_MULMID_N, 1 },
1999 { TRY(mpn_mulmid), TYPE_MULMID_MN, 1 },
2000 { TRY_FUNFUN(mpn_toom42_mulmid), TYPE_MULMID_N,
2001 (2 * MPN_TOOM42_MULMID_MINSIZE - 1) },
2002
2003 { TRY(mpn_gcd_1), TYPE_GCD_1 },
2004 { TRY(mpn_gcd), TYPE_GCD },
2005 { TRY(mpz_legendre), TYPE_MPZ_LEGENDRE },
2006 { TRY(mpz_jacobi), TYPE_MPZ_JACOBI },
2007 { TRY(mpz_kronecker), TYPE_MPZ_KRONECKER },
2008 { TRY(mpz_kronecker_ui), TYPE_MPZ_KRONECKER_UI },
2009 { TRY(mpz_kronecker_si), TYPE_MPZ_KRONECKER_SI },
2010 { TRY(mpz_ui_kronecker), TYPE_MPZ_UI_KRONECKER },
2011 { TRY(mpz_si_kronecker), TYPE_MPZ_SI_KRONECKER },
2012
2013 { TRY(mpn_popcount), TYPE_POPCOUNT },
2014 { TRY(mpn_hamdist), TYPE_HAMDIST },
2015
2016 { TRY(mpn_sqrtrem), TYPE_SQRTREM },
2017 { TRY_FUNFUN(mpn_sqrt), TYPE_SQRT },
2018
2019 { TRY_FUNFUN(MPN_ZERO), TYPE_ZERO },
2020
2021 { TRY(mpn_get_str), TYPE_GET_STR },
2022
2023 { TRY(mpn_binvert), TYPE_BINVERT },
2024 { TRY(mpn_invert), TYPE_INVERT },
2025
2026#ifdef EXTRA_ROUTINES
2027 EXTRA_ROUTINES
2028#endif
2029};
2030
2031const struct choice_t *choice = NULL;
2032
2033
2034void
2035mprotect_maybe (void *addr, size_t len, int prot)
2036{
2037 if (!option_redzones)
2038 return;
2039
2040#if HAVE_MPROTECT
2041 if (mprotect (addr, len, prot) != 0)
2042 {
2043 fprintf (stderr, "Cannot mprotect %p 0x%X 0x%X: %s\n",
2044 addr, (unsigned) len, prot, strerror (errno));
2045 exit (1);
2046 }
2047#else
2048 {
2049 static int warned = 0;
2050 if (!warned)
2051 {
2052 fprintf (stderr,
2053 "mprotect not available, bounds testing not performed\n");
2054 warned = 1;
2055 }
2056 }
2057#endif
2058}
2059
2060/* round "a" up to a multiple of "m" */
2061size_t
2062round_up_multiple (size_t a, size_t m)
2063{
2064 unsigned long r;
2065
2066 r = a % m;
2067 if (r == 0)
2068 return a;
2069 else
2070 return a + (m - r);
2071}
2072
2073
2074/* On some systems it seems that only an mmap'ed region can be mprotect'ed,
2075 for instance HP-UX 10.
2076
2077 mmap will almost certainly return a pointer already aligned to a page
2078 boundary, but it's easy enough to share the alignment handling with the
2079 malloc case. */
2080
2081void
2082malloc_region (struct region_t *r, mp_size_t n)
2083{
2084 mp_ptr p;
2085 size_t nbytes;
2086
2087 ASSERT ((pagesize % GMP_LIMB_BYTES) == 0);
2088
2089 n = round_up_multiple (n, PAGESIZE_LIMBS);
2090 r->size = n;
2091
2092 nbytes = n*GMP_LIMB_BYTES + 2*REDZONE_BYTES + pagesize;
2093
2094#if defined (MAP_ANONYMOUS) && ! defined (MAP_ANON)
2095#define MAP_ANON MAP_ANONYMOUS
2096#endif
2097
2098#if HAVE_MMAP && defined (MAP_ANON)
2099 /* note must pass fd=-1 for MAP_ANON on BSD */
2100 p = (mp_ptr) mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
2101 if (p == (void *) -1)
2102 {
2103 fprintf (stderr, "Cannot mmap %#x anon bytes: %s\n",
2104 (unsigned) nbytes, strerror (errno));
2105 exit (1);
2106 }
2107#else
2108 p = (mp_ptr) malloc (nbytes);
2109 ASSERT_ALWAYS (p != NULL);
2110#endif
2111
2112 p = (mp_ptr) align_pointer (p, pagesize);
2113
2114 mprotect_maybe (p, REDZONE_BYTES, PROT_NONE);
2115 p += REDZONE_LIMBS;
2116 r->ptr = p;
2117
2118 mprotect_maybe (p + n, REDZONE_BYTES, PROT_NONE);
2119}
2120
2121void
2122mprotect_region (const struct region_t *r, int prot)
2123{
2124 mprotect_maybe (r->ptr, r->size, prot);
2125}
2126
2127
2128/* First four entries must be 0,1,2,3 for the benefit of CARRY_BIT, CARRY_3,
2129 and CARRY_4 */
2130mp_limb_t carry_array[] = {
2131 0, 1, 2, 3,
2132 4,
2133 CNST_LIMB(1) << 8,
2134 CNST_LIMB(1) << 16,
2135 GMP_NUMB_MAX
2136};
2137int carry_index;
2138
2139#define CARRY_COUNT \
2140 ((tr->carry == CARRY_BIT) ? 2 \
2141 : tr->carry == CARRY_3 ? 3 \
2142 : tr->carry == CARRY_4 ? 4 \
2143 : (tr->carry == CARRY_LIMB || tr->carry == CARRY_DIVISOR) \
2144 ? numberof(carry_array) + CARRY_RANDOMS \
2145 : 1)
2146
2147#define MPN_RANDOM_ALT(index,dst,size) \
2148 (((index) & 1) ? refmpn_random (dst, size) : refmpn_random2 (dst, size))
2149
2150/* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
2151 the same type */
2152#define CARRY_ITERATION \
2153 for (carry_index = 0; \
2154 (carry_index < numberof (carry_array) \
2155 ? (carry = carry_array[carry_index]) \
2156 : (MPN_RANDOM_ALT (carry_index, &carry, 1), (mp_limb_t) 0)), \
2157 (tr->carry == CARRY_DIVISOR ? carry %= divisor : 0), \
2158 carry_index < CARRY_COUNT; \
2159 carry_index++)
2160
2161
2162mp_limb_t multiplier_array[] = {
2163 0, 1, 2, 3,
2164 CNST_LIMB(1) << 8,
2165 CNST_LIMB(1) << 16,
2166 GMP_NUMB_MAX - 2,
2167 GMP_NUMB_MAX - 1,
2168 GMP_NUMB_MAX
2169};
2170int multiplier_index;
2171
2172mp_limb_t divisor_array[] = {
2173 1, 2, 3,
2174 CNST_LIMB(1) << 8,
2175 CNST_LIMB(1) << 16,
2176 CNST_LIMB(1) << (GMP_NUMB_BITS/2 - 1),
2177 GMP_NUMB_MAX >> (GMP_NUMB_BITS/2),
2178 GMP_NUMB_HIGHBIT,
2179 GMP_NUMB_HIGHBIT + 1,
2180 GMP_NUMB_MAX - 2,
2181 GMP_NUMB_MAX - 1,
2182 GMP_NUMB_MAX
2183};
2184
2185int divisor_index;
2186
2187/* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
2188 the same type */
2189#define ARRAY_ITERATION(var, index, limit, array, randoms, cond) \
2190 for (index = 0; \
2191 (index < numberof (array) \
2192 ? (var = array[index]) \
2193 : (MPN_RANDOM_ALT (index, &var, 1), (mp_limb_t) 0)), \
2194 index < limit; \
2195 index++)
2196
2197#define MULTIPLIER_COUNT \
2198 (tr->multiplier \
2199 ? numberof (multiplier_array) + MULTIPLIER_RANDOMS \
2200 : 1)
2201
2202#define MULTIPLIER_ITERATION \
2203 ARRAY_ITERATION(multiplier, multiplier_index, MULTIPLIER_COUNT, \
2204 multiplier_array, MULTIPLIER_RANDOMS, TRY_MULTIPLIER)
2205
2206#define DIVISOR_COUNT \
2207 (tr->divisor \
2208 ? numberof (divisor_array) + DIVISOR_RANDOMS \
2209 : 1)
2210
2211#define DIVISOR_ITERATION \
2212 ARRAY_ITERATION(divisor, divisor_index, DIVISOR_COUNT, divisor_array, \
2213 DIVISOR_RANDOMS, TRY_DIVISOR)
2214
2215
2216/* overlap_array[].s[i] is where s[i] should be, 0 or 1 means overlapping
2217 d[0] or d[1] respectively, -1 means a separate (write-protected)
2218 location. */
2219
2220struct overlap_t {
2221 int s[NUM_SOURCES];
2222} overlap_array[] = {
2223 { { -1, -1, -1, -1, -1 } },
2224 { { 0, -1, -1, -1, -1 } },
2225 { { -1, 0, -1, -1, -1 } },
2226 { { 0, 0, -1, -1, -1 } },
2227 { { 1, -1, -1, -1, -1 } },
2228 { { -1, 1, -1, -1, -1 } },
2229 { { 1, 1, -1, -1, -1 } },
2230 { { 0, 1, -1, -1, -1 } },
2231 { { 1, 0, -1, -1, -1 } },
2232};
2233
2234struct overlap_t *overlap, *overlap_limit;
2235
2236#define OVERLAP_COUNT \
2237 (tr->overlap & OVERLAP_NONE ? 1 \
2238 : tr->overlap & OVERLAP_NOT_SRCS ? 3 \
2239 : tr->overlap & OVERLAP_NOT_SRC2 ? 2 \
2240 : tr->overlap & OVERLAP_NOT_DST2 ? 4 \
2241 : tr->dst[1] ? 9 \
2242 : tr->src[1] ? 4 \
2243 : tr->dst[0] ? 2 \
2244 : 1)
2245
2246#define OVERLAP_ITERATION \
2247 for (overlap = &overlap_array[0], \
2248 overlap_limit = &overlap_array[OVERLAP_COUNT]; \
2249 overlap < overlap_limit; \
2250 overlap++)
2251
2252
2253int base = 10;
2254
2255#define T_RAND_COUNT 2
2256int t_rand;
2257
2258void
2259t_random (mp_ptr ptr, mp_size_t n)
2260{
2261 if (n == 0)
2262 return;
2263
2264 switch (option_data) {
2265 case DATA_TRAND:
2266 switch (t_rand) {
2267 case 0: refmpn_random (ptr, n); break;
2268 case 1: refmpn_random2 (ptr, n); break;
2269 default: abort();
2270 }
2271 break;
2272 case DATA_SEQ:
2273 {
2274 static mp_limb_t counter = 0;
2275 mp_size_t i;
2276 for (i = 0; i < n; i++)
2277 ptr[i] = ++counter;
2278 }
2279 break;
2280 case DATA_ZEROS:
2281 refmpn_zero (ptr, n);
2282 break;
2283 case DATA_FFS:
2284 refmpn_fill (ptr, n, GMP_NUMB_MAX);
2285 break;
2286 case DATA_2FD:
2287 /* Special value 0x2FFF...FFFD, which divided by 3 gives 0xFFF...FFF,
2288 inducing the q1_ff special case in the mul-by-inverse part of some
2289 versions of divrem_1 and mod_1. */
2290 refmpn_fill (ptr, n, (mp_limb_t) -1);
2291 ptr[n-1] = 2;
2292 ptr[0] -= 2;
2293 break;
2294
2295 default:
2296 abort();
2297 }
2298}
2299#define T_RAND_ITERATION \
2300 for (t_rand = 0; t_rand < T_RAND_COUNT; t_rand++)
2301
2302
2303void
2304print_each (const struct each_t *e)
2305{
2306 int i;
2307
2308 printf ("%s %s\n", e->name, e == &ref ? tr->reference_name : choice->name);
2309 if (tr->retval)
2310 mpn_trace (" retval", &e->retval, 1);
2311
2312 for (i = 0; i < NUM_DESTS; i++)
2313 {
2314 if (tr->dst[i])
2315 {
2316 if (tr->dst_bytes[i])
2317 byte_tracen (" d[%d]", i, e->d[i].p, d[i].size);
2318 else
2319 mpn_tracen (" d[%d]", i, e->d[i].p, d[i].size);
2320 printf (" located %p\n", (void *) (e->d[i].p));
2321 }
2322 }
2323
2324 for (i = 0; i < NUM_SOURCES; i++)
2325 if (tr->src[i])
2326 printf (" s[%d] located %p\n", i, (void *) (e->s[i].p));
2327}
2328
2329
2330void
2331print_all (void)
2332{
2333 int i;
2334
2335 printf ("\n");
2336 printf ("size %ld\n", (long) size);
2337 if (tr->size2)
2338 printf ("size2 %ld\n", (long) size2);
2339
2340 for (i = 0; i < NUM_DESTS; i++)
2341 if (d[i].size != size)
2342 printf ("d[%d].size %ld\n", i, (long) d[i].size);
2343
2344 if (tr->multiplier)
2345 mpn_trace (" multiplier", &multiplier, 1);
2346 if (tr->divisor)
2347 mpn_trace (" divisor", &divisor, 1);
2348 if (tr->shift)
2349 printf (" shift %lu\n", shift);
2350 if (tr->carry)
2351 mpn_trace (" carry", &carry, 1);
2352 if (tr->msize)
2353 mpn_trace (" multiplier_N", multiplier_N, tr->msize);
2354
2355 for (i = 0; i < NUM_DESTS; i++)
2356 if (tr->dst[i])
2357 printf (" d[%d] %s, align %ld, size %ld\n",
2358 i, d[i].high ? "high" : "low",
2359 (long) d[i].align, (long) d[i].size);
2360
2361 for (i = 0; i < NUM_SOURCES; i++)
2362 {
2363 if (tr->src[i])
2364 {
2365 printf (" s[%d] %s, align %ld, ",
2366 i, s[i].high ? "high" : "low", (long) s[i].align);
2367 switch (overlap->s[i]) {
2368 case -1:
2369 printf ("no overlap\n");
2370 break;
2371 default:
2372 printf ("==d[%d]%s\n",
2373 overlap->s[i],
2374 tr->overlap == OVERLAP_LOW_TO_HIGH ? "+a"
2375 : tr->overlap == OVERLAP_HIGH_TO_LOW ? "-a"
2376 : "");
2377 break;
2378 }
2379 printf (" s[%d]=", i);
2380 if (tr->carry_sign && (carry & (1 << i)))
2381 printf ("-");
2382 mpn_trace (NULL, s[i].p, SRC_SIZE(i));
2383 }
2384 }
2385
2386 if (tr->dst0_from_src1)
2387 mpn_trace (" d[0]", s[1].region.ptr, size);
2388
2389 if (tr->reference)
2390 print_each (&ref);
2391 print_each (&fun);
2392}
2393
2394void
2395compare (void)
2396{
2397 int error = 0;
2398 int i;
2399
2400 if (tr->retval && ref.retval != fun.retval)
2401 {
2402 gmp_printf ("Different return values (%Mu, %Mu)\n",
2403 ref.retval, fun.retval);
2404 error = 1;
2405 }
2406
2407 for (i = 0; i < NUM_DESTS; i++)
2408 {
2409 switch (tr->dst_size[i]) {
2410 case SIZE_RETVAL:
2411 case SIZE_GET_STR:
2412 d[i].size = ref.retval;
2413 break;
2414 }
2415 }
2416
2417 for (i = 0; i < NUM_DESTS; i++)
2418 {
2419 if (! tr->dst[i])
2420 continue;
2421
2422 if (tr->dst_bytes[i])
2423 {
2424 if (memcmp (ref.d[i].p, fun.d[i].p, d[i].size) != 0)
2425 {
2426 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
2427 i,
2428 (long) byte_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
2429 (long) byte_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
2430 error = 1;
2431 }
2432 }
2433 else
2434 {
2435 if (d[i].size != 0
2436 && ! refmpn_equal_anynail (ref.d[i].p, fun.d[i].p, d[i].size))
2437 {
2438 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
2439 i,
2440 (long) mpn_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
2441 (long) mpn_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
2442 error = 1;
2443 }
2444 }
2445 }
2446
2447 if (error)
2448 {
2449 print_all();
2450 abort();
2451 }
2452}
2453
2454
2455/* The functions are cast if the return value should be a long rather than
2456 the default mp_limb_t. This is necessary under _LONG_LONG_LIMB. This
2457 might not be enough if some actual calling conventions checking is
2458 implemented on a long long limb system. */
2459
2460void
2461call (struct each_t *e, tryfun_t function)
2462{
2463 switch (choice->type) {
2464 case TYPE_ADD:
2465 case TYPE_SUB:
2466 e->retval = CALLING_CONVENTIONS (function)
2467 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
2468 break;
2469
2470 case TYPE_ADD_N:
2471 case TYPE_SUB_N:
2472 case TYPE_ADDLSH1_N:
2473 case TYPE_ADDLSH2_N:
2474 case TYPE_SUBLSH1_N:
2475 case TYPE_SUBLSH2_N:
2476 case TYPE_RSBLSH1_N:
2477 case TYPE_RSBLSH2_N:
2478 case TYPE_RSH1ADD_N:
2479 case TYPE_RSH1SUB_N:
2480 e->retval = CALLING_CONVENTIONS (function)
2481 (e->d[0].p, e->s[0].p, e->s[1].p, size);
2482 break;
2483 case TYPE_ADDLSH_N:
2484 case TYPE_SUBLSH_N:
2485 case TYPE_RSBLSH_N:
2486 e->retval = CALLING_CONVENTIONS (function)
2487 (e->d[0].p, e->s[0].p, e->s[1].p, size, shift);
2488 break;
2489 case TYPE_ADDLSH_NC:
2490 case TYPE_SUBLSH_NC:
2491 case TYPE_RSBLSH_NC:
2492 e->retval = CALLING_CONVENTIONS (function)
2493 (e->d[0].p, e->s[0].p, e->s[1].p, size, shift, carry);
2494 break;
2495 case TYPE_ADDLSH1_NC:
2496 case TYPE_ADDLSH2_NC:
2497 case TYPE_SUBLSH1_NC:
2498 case TYPE_SUBLSH2_NC:
2499 case TYPE_RSBLSH1_NC:
2500 case TYPE_RSBLSH2_NC:
2501 case TYPE_ADD_NC:
2502 case TYPE_SUB_NC:
2503 e->retval = CALLING_CONVENTIONS (function)
2504 (e->d[0].p, e->s[0].p, e->s[1].p, size, carry);
2505 break;
2506 case TYPE_ADDCND_N:
2507 case TYPE_SUBCND_N:
2508 e->retval = CALLING_CONVENTIONS (function)
2509 (carry, e->d[0].p, e->s[0].p, e->s[1].p, size);
2510 break;
2511 case TYPE_ADD_ERR1_N:
2512 case TYPE_SUB_ERR1_N:
2513 e->retval = CALLING_CONVENTIONS (function)
2514 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, size, carry);
2515 break;
2516 case TYPE_ADD_ERR2_N:
2517 case TYPE_SUB_ERR2_N:
2518 e->retval = CALLING_CONVENTIONS (function)
2519 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, e->s[3].p, size, carry);
2520 break;
2521 case TYPE_ADD_ERR3_N:
2522 case TYPE_SUB_ERR3_N:
2523 e->retval = CALLING_CONVENTIONS (function)
2524 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, e->s[3].p, e->s[4].p, size, carry);
2525 break;
2526
2527 case TYPE_MUL_1:
2528 case TYPE_ADDMUL_1:
2529 case TYPE_SUBMUL_1:
2530 e->retval = CALLING_CONVENTIONS (function)
2531 (e->d[0].p, e->s[0].p, size, multiplier);
2532 break;
2533 case TYPE_MUL_1C:
2534 case TYPE_ADDMUL_1C:
2535 case TYPE_SUBMUL_1C:
2536 e->retval = CALLING_CONVENTIONS (function)
2537 (e->d[0].p, e->s[0].p, size, multiplier, carry);
2538 break;
2539
2540 case TYPE_MUL_2:
2541 case TYPE_MUL_3:
2542 case TYPE_MUL_4:
2543 case TYPE_MUL_5:
2544 case TYPE_MUL_6:
2545 if (size == 1)
2546 abort ();
2547 e->retval = CALLING_CONVENTIONS (function)
2548 (e->d[0].p, e->s[0].p, size, multiplier_N);
2549 break;
2550
2551 case TYPE_ADDMUL_2:
2552 case TYPE_ADDMUL_3:
2553 case TYPE_ADDMUL_4:
2554 case TYPE_ADDMUL_5:
2555 case TYPE_ADDMUL_6:
2556 case TYPE_ADDMUL_7:
2557 case TYPE_ADDMUL_8:
2558 if (size == 1)
2559 abort ();
2560 e->retval = CALLING_CONVENTIONS (function)
2561 (e->d[0].p, e->s[0].p, size, multiplier_N);
2562 break;
2563
2564 case TYPE_AND_N:
2565 case TYPE_ANDN_N:
2566 case TYPE_NAND_N:
2567 case TYPE_IOR_N:
2568 case TYPE_IORN_N:
2569 case TYPE_NIOR_N:
2570 case TYPE_XOR_N:
2571 case TYPE_XNOR_N:
2572 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
2573 break;
2574
2575 case TYPE_ADDSUB_N:
2576 e->retval = CALLING_CONVENTIONS (function)
2577 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size);
2578 break;
2579 case TYPE_ADDSUB_NC:
2580 e->retval = CALLING_CONVENTIONS (function)
2581 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size, carry);
2582 break;
2583
2584 case TYPE_COPY:
2585 case TYPE_COPYI:
2586 case TYPE_COPYD:
2587 case TYPE_COM:
2588 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2589 break;
2590
2591 case TYPE_ADDLSH1_N_IP1:
2592 case TYPE_ADDLSH2_N_IP1:
2593 case TYPE_ADDLSH1_N_IP2:
2594 case TYPE_ADDLSH2_N_IP2:
2595 case TYPE_SUBLSH1_N_IP1:
2596 case TYPE_SUBLSH2_N_IP1:
2597 case TYPE_DIVEXACT_BY3:
2598 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2599 break;
2600 case TYPE_DIVEXACT_BY3C:
2601 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size,
2602 carry);
2603 break;
2604
2605
2606 case TYPE_DIVMOD_1:
2607 case TYPE_DIVEXACT_1:
2608 case TYPE_BDIV_Q_1:
2609 e->retval = CALLING_CONVENTIONS (function)
2610 (e->d[0].p, e->s[0].p, size, divisor);
2611 break;
2612 case TYPE_DIVMOD_1C:
2613 e->retval = CALLING_CONVENTIONS (function)
2614 (e->d[0].p, e->s[0].p, size, divisor, carry);
2615 break;
2616 case TYPE_DIVREM_1:
2617 e->retval = CALLING_CONVENTIONS (function)
2618 (e->d[0].p, size2, e->s[0].p, size, divisor);
2619 break;
2620 case TYPE_DIVREM_1C:
2621 e->retval = CALLING_CONVENTIONS (function)
2622 (e->d[0].p, size2, e->s[0].p, size, divisor, carry);
2623 break;
2624 case TYPE_PREINV_DIVREM_1:
2625 {
2626 mp_limb_t dinv;
2627 unsigned shift;
2628 shift = refmpn_count_leading_zeros (divisor);
2629 dinv = refmpn_invert_limb (divisor << shift);
2630 e->retval = CALLING_CONVENTIONS (function)
2631 (e->d[0].p, size2, e->s[0].p, size, divisor, dinv, shift);
2632 }
2633 break;
2634 case TYPE_MOD_1:
2635 case TYPE_MODEXACT_1_ODD:
2636 e->retval = CALLING_CONVENTIONS (function)
2637 (e->s[0].p, size, divisor);
2638 break;
2639 case TYPE_MOD_1C:
2640 case TYPE_MODEXACT_1C_ODD:
2641 e->retval = CALLING_CONVENTIONS (function)
2642 (e->s[0].p, size, divisor, carry);
2643 break;
2644 case TYPE_PREINV_MOD_1:
2645 e->retval = CALLING_CONVENTIONS (function)
2646 (e->s[0].p, size, divisor, refmpn_invert_limb (divisor));
2647 break;
2648 case TYPE_DIV_QR_1N_PI1:
2649 {
2650 mp_limb_t dinv = refmpn_invert_limb (divisor);
2651 e->retval = CALLING_CONVENTIONS (function)
2652 (e->d[0].p, e->s[0].p, size, e->s[1].p[0], divisor, dinv);
2653 break;
2654 }
2655
2656 case TYPE_MOD_34LSUB1:
2657 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size);
2658 break;
2659
2660 case TYPE_UDIV_QRNND:
2661 e->retval = CALLING_CONVENTIONS (function)
2662 (e->d[0].p, e->s[0].p[1], e->s[0].p[0], divisor);
2663 break;
2664 case TYPE_UDIV_QRNND_R:
2665 e->retval = CALLING_CONVENTIONS (function)
2666 (e->s[0].p[1], e->s[0].p[0], divisor, e->d[0].p);
2667 break;
2668
2669 case TYPE_SBPI1_DIV_QR:
2670 {
2671 gmp_pi1_t dinv;
2672 invert_pi1 (dinv, e->s[1].p[size2-1], e->s[1].p[size2-2]); /* FIXME: use refinvert_pi1 */
2673 refmpn_copyi (e->d[1].p, e->s[0].p, size); /* dividend */
2674 refmpn_fill (e->d[0].p, size-size2, 0x98765432); /* quotient */
2675 e->retval = CALLING_CONVENTIONS (function)
2676 (e->d[0].p, e->d[1].p, size, e->s[1].p, size2, dinv.inv32);
2677 refmpn_zero (e->d[1].p+size2, size-size2); /* excess over remainder */
2678 }
2679 break;
2680
2681 case TYPE_TDIV_QR:
2682 CALLING_CONVENTIONS (function) (e->d[0].p, e->d[1].p, 0,
2683 e->s[0].p, size, e->s[1].p, size2);
2684 break;
2685
2686 case TYPE_GCD_1:
2687 /* Must have a non-zero src, but this probably isn't the best way to do
2688 it. */
2689 if (refmpn_zero_p (e->s[0].p, size))
2690 e->retval = 0;
2691 else
2692 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size, divisor);
2693 break;
2694
2695 case TYPE_GCD:
2696 /* Sources are destroyed, so they're saved and replaced, but a general
2697 approach to this might be better. Note that it's still e->s[0].p and
2698 e->s[1].p that are passed, to get the desired alignments. */
2699 {
2700 mp_ptr s0 = refmpn_malloc_limbs (size);
2701 mp_ptr s1 = refmpn_malloc_limbs (size2);
2702 refmpn_copyi (s0, e->s[0].p, size);
2703 refmpn_copyi (s1, e->s[1].p, size2);
2704
2705 mprotect_region (&s[0].region, PROT_READ|PROT_WRITE);
2706 mprotect_region (&s[1].region, PROT_READ|PROT_WRITE);
2707 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p,
2708 e->s[0].p, size,
2709 e->s[1].p, size2);
2710 refmpn_copyi (e->s[0].p, s0, size);
2711 refmpn_copyi (e->s[1].p, s1, size2);
2712 free (s0);
2713 free (s1);
2714 }
2715 break;
2716
2717 case TYPE_GCD_FINDA:
2718 {
2719 /* FIXME: do this with a flag */
2720 mp_limb_t c[2];
2721 c[0] = e->s[0].p[0];
2722 c[0] += (c[0] == 0);
2723 c[1] = e->s[0].p[0];
2724 c[1] += (c[1] == 0);
2725 e->retval = CALLING_CONVENTIONS (function) (c);
2726 }
2727 break;
2728
2729 case TYPE_MPZ_LEGENDRE:
2730 case TYPE_MPZ_JACOBI:
2731 {
2732 mpz_t a, b;
2733 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
2734 PTR(b) = e->s[1].p; SIZ(b) = size2;
2735 e->retval = CALLING_CONVENTIONS (function) (a, b);
2736 }
2737 break;
2738 case TYPE_MPZ_KRONECKER:
2739 {
2740 mpz_t a, b;
2741 PTR(a) = e->s[0].p; SIZ(a) = ((carry&1)==0 ? size : -size);
2742 PTR(b) = e->s[1].p; SIZ(b) = ((carry&2)==0 ? size2 : -size2);
2743 e->retval = CALLING_CONVENTIONS (function) (a, b);
2744 }
2745 break;
2746 case TYPE_MPZ_KRONECKER_UI:
2747 {
2748 mpz_t a;
2749 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
2750 e->retval = CALLING_CONVENTIONS(function) (a, (unsigned long)multiplier);
2751 }
2752 break;
2753 case TYPE_MPZ_KRONECKER_SI:
2754 {
2755 mpz_t a;
2756 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
2757 e->retval = CALLING_CONVENTIONS (function) (a, (long) multiplier);
2758 }
2759 break;
2760 case TYPE_MPZ_UI_KRONECKER:
2761 {
2762 mpz_t b;
2763 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
2764 e->retval = CALLING_CONVENTIONS(function) ((unsigned long)multiplier, b);
2765 }
2766 break;
2767 case TYPE_MPZ_SI_KRONECKER:
2768 {
2769 mpz_t b;
2770 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
2771 e->retval = CALLING_CONVENTIONS (function) ((long) multiplier, b);
2772 }
2773 break;
2774
2775 case TYPE_MUL_MN:
2776 case TYPE_MULMID_MN:
2777 CALLING_CONVENTIONS (function)
2778 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
2779 break;
2780 case TYPE_MUL_N:
2781 case TYPE_MULLO_N:
2782 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
2783 break;
2784 case TYPE_MULMID_N:
2785 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p,
2786 (size + 1) / 2);
2787 break;
2788 case TYPE_SQR:
2789 case TYPE_SQRLO:
2790 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2791 break;
2792
2793 case TYPE_UMUL_PPMM:
2794 e->retval = CALLING_CONVENTIONS (function)
2795 (e->d[0].p, e->s[0].p[0], e->s[0].p[1]);
2796 break;
2797 case TYPE_UMUL_PPMM_R:
2798 e->retval = CALLING_CONVENTIONS (function)
2799 (e->s[0].p[0], e->s[0].p[1], e->d[0].p);
2800 break;
2801
2802 case TYPE_ADDLSH_N_IP1:
2803 case TYPE_ADDLSH_N_IP2:
2804 case TYPE_SUBLSH_N_IP1:
2805 case TYPE_LSHIFT:
2806 case TYPE_LSHIFTC:
2807 case TYPE_RSHIFT:
2808 e->retval = CALLING_CONVENTIONS (function)
2809 (e->d[0].p, e->s[0].p, size, shift);
2810 break;
2811
2812 case TYPE_POPCOUNT:
2813 e->retval = (* (unsigned long (*)(ANYARGS))
2814 CALLING_CONVENTIONS (function)) (e->s[0].p, size);
2815 break;
2816 case TYPE_HAMDIST:
2817 e->retval = (* (unsigned long (*)(ANYARGS))
2818 CALLING_CONVENTIONS (function)) (e->s[0].p, e->s[1].p, size);
2819 break;
2820
2821 case TYPE_SQRTREM:
2822 e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function))
2823 (e->d[0].p, e->d[1].p, e->s[0].p, size);
2824 break;
2825
2826 case TYPE_SQRT:
2827 e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function))
2828 (e->d[0].p, e->s[0].p, size);
2829 break;
2830
2831 case TYPE_ZERO:
2832 CALLING_CONVENTIONS (function) (e->d[0].p, size);
2833 break;
2834
2835 case TYPE_GET_STR:
2836 {
2837 size_t sizeinbase, fill;
2838 char *dst;
2839 MPN_SIZEINBASE (sizeinbase, e->s[0].p, size, base);
2840 ASSERT_ALWAYS (sizeinbase <= d[0].size);
2841 fill = d[0].size - sizeinbase;
2842 if (d[0].high)
2843 {
2844 memset (e->d[0].p, 0xBA, fill);
2845 dst = (char *) e->d[0].p + fill;
2846 }
2847 else
2848 {
2849 dst = (char *) e->d[0].p;
2850 memset (dst + sizeinbase, 0xBA, fill);
2851 }
2852 if (POW2_P (base))
2853 {
2854 e->retval = CALLING_CONVENTIONS (function) (dst, base,
2855 e->s[0].p, size);
2856 }
2857 else
2858 {
2859 refmpn_copy (e->d[1].p, e->s[0].p, size);
2860 e->retval = CALLING_CONVENTIONS (function) (dst, base,
2861 e->d[1].p, size);
2862 }
2863 refmpn_zero (e->d[1].p, size); /* clobbered or unused */
2864 }
2865 break;
2866
2867 case TYPE_INVERT:
2868 {
2869 mp_ptr scratch;
2870 TMP_DECL;
2871 TMP_MARK;
2872 scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (size));
2873 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch);
2874 TMP_FREE;
2875 }
2876 break;
2877 case TYPE_BINVERT:
2878 {
2879 mp_ptr scratch;
2880 TMP_DECL;
2881 TMP_MARK;
2882 scratch = TMP_ALLOC_LIMBS (mpn_binvert_itch (size));
2883 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch);
2884 TMP_FREE;
2885 }
2886 break;
2887
2888#ifdef EXTRA_CALL
2889 EXTRA_CALL
2890#endif
2891
2892 default:
2893 printf ("Unknown routine type %d\n", choice->type);
2894 abort ();
2895 break;
2896 }
2897}
2898
2899
2900void
2901pointer_setup (struct each_t *e)
2902{
2903 int i, j;
2904
2905 for (i = 0; i < NUM_DESTS; i++)
2906 {
2907 switch (tr->dst_size[i]) {
2908 case 0:
2909 case SIZE_RETVAL: /* will be adjusted later */
2910 d[i].size = size;
2911 break;
2912
2913 case SIZE_1:
2914 d[i].size = 1;
2915 break;
2916 case SIZE_2:
2917 d[i].size = 2;
2918 break;
2919 case SIZE_3:
2920 d[i].size = 3;
2921 break;
2922 case SIZE_4:
2923 d[i].size = 4;
2924 break;
2925 case SIZE_6:
2926 d[i].size = 6;
2927 break;
2928
2929 case SIZE_PLUS_1:
2930 d[i].size = size+1;
2931 break;
2932 case SIZE_PLUS_MSIZE_SUB_1:
2933 d[i].size = size + tr->msize - 1;
2934 break;
2935
2936 case SIZE_SUM:
2937 if (tr->size2)
2938 d[i].size = size + size2;
2939 else
2940 d[i].size = 2*size;
2941 break;
2942
2943 case SIZE_SIZE2:
2944 d[i].size = size2;
2945 break;
2946
2947 case SIZE_DIFF:
2948 d[i].size = size - size2;
2949 break;
2950
2951 case SIZE_DIFF_PLUS_1:
2952 d[i].size = size - size2 + 1;
2953 break;
2954
2955 case SIZE_DIFF_PLUS_3:
2956 d[i].size = size - size2 + 3;
2957 break;
2958
2959 case SIZE_CEIL_HALF:
2960 d[i].size = (size+1)/2;
2961 break;
2962
2963 case SIZE_GET_STR:
2964 {
2965 mp_limb_t ff = GMP_NUMB_MAX;
2966 MPN_SIZEINBASE (d[i].size, &ff - (size-1), size, base);
2967 }
2968 break;
2969
2970 default:
2971 printf ("Unrecognised dst_size type %d\n", tr->dst_size[i]);
2972 abort ();
2973 }
2974 }
2975
2976 /* establish e->d[].p destinations */
2977 for (i = 0; i < NUM_DESTS; i++)
2978 {
2979 mp_size_t offset = 0;
2980
2981 /* possible room for overlapping sources */
2982 for (j = 0; j < numberof (overlap->s); j++)
2983 if (overlap->s[j] == i)
2984 offset = MAX (offset, s[j].align);
2985
2986 if (d[i].high)
2987 {
2988 if (tr->dst_bytes[i])
2989 {
2990 e->d[i].p = (mp_ptr)
2991 ((char *) (e->d[i].region.ptr + e->d[i].region.size)
2992 - d[i].size - d[i].align);
2993 }
2994 else
2995 {
2996 e->d[i].p = e->d[i].region.ptr + e->d[i].region.size
2997 - d[i].size - d[i].align;
2998 if (tr->overlap == OVERLAP_LOW_TO_HIGH)
2999 e->d[i].p -= offset;
3000 }
3001 }
3002 else
3003 {
3004 if (tr->dst_bytes[i])
3005 {
3006 e->d[i].p = (mp_ptr) ((char *) e->d[i].region.ptr + d[i].align);
3007 }
3008 else
3009 {
3010 e->d[i].p = e->d[i].region.ptr + d[i].align;
3011 if (tr->overlap == OVERLAP_HIGH_TO_LOW)
3012 e->d[i].p += offset;
3013 }
3014 }
3015 }
3016
3017 /* establish e->s[].p sources */
3018 for (i = 0; i < NUM_SOURCES; i++)
3019 {
3020 int o = overlap->s[i];
3021 switch (o) {
3022 case -1:
3023 /* no overlap */
3024 e->s[i].p = s[i].p;
3025 break;
3026 case 0:
3027 case 1:
3028 /* overlap with d[o] */
3029 if (tr->overlap == OVERLAP_HIGH_TO_LOW)
3030 e->s[i].p = e->d[o].p - s[i].align;
3031 else if (tr->overlap == OVERLAP_LOW_TO_HIGH)
3032 e->s[i].p = e->d[o].p + s[i].align;
3033 else if (tr->size2 == SIZE_FRACTION)
3034 e->s[i].p = e->d[o].p + size2;
3035 else
3036 e->s[i].p = e->d[o].p;
3037 break;
3038 default:
3039 abort();
3040 break;
3041 }
3042 }
3043}
3044
3045
3046void
3047validate_fail (void)
3048{
3049 if (tr->reference)
3050 {
3051 trap_location = TRAP_REF;
3052 call (&ref, tr->reference);
3053 trap_location = TRAP_NOWHERE;
3054 }
3055
3056 print_all();
3057 abort();
3058}
3059
3060
3061void
3062try_one (void)
3063{
3064 int i;
3065
3066 if (option_spinner)
3067 spinner();
3068 spinner_count++;
3069
3070 trap_location = TRAP_SETUPS;
3071
3072 if (tr->divisor == DIVISOR_NORM)
3073 divisor |= GMP_NUMB_HIGHBIT;
3074 if (tr->divisor == DIVISOR_ODD)
3075 divisor |= 1;
3076
3077 for (i = 0; i < NUM_SOURCES; i++)
3078 {
3079 if (s[i].high)
3080 s[i].p = s[i].region.ptr + s[i].region.size - SRC_SIZE(i) - s[i].align;
3081 else
3082 s[i].p = s[i].region.ptr + s[i].align;
3083 }
3084
3085 pointer_setup (&ref);
3086 pointer_setup (&fun);
3087
3088 ref.retval = 0x04152637;
3089 fun.retval = 0x8C9DAEBF;
3090
3091 t_random (multiplier_N, tr->msize);
3092
3093 for (i = 0; i < NUM_SOURCES; i++)
3094 {
3095 if (! tr->src[i])
3096 continue;
3097
3098 mprotect_region (&s[i].region, PROT_READ|PROT_WRITE);
3099 t_random (s[i].p, SRC_SIZE(i));
3100
3101 switch (tr->data) {
3102 case DATA_NON_ZERO:
3103 if (refmpn_zero_p (s[i].p, SRC_SIZE(i)))
3104 s[i].p[0] = 1;
3105 break;
3106
3107 case DATA_MULTIPLE_DIVISOR:
3108 /* same number of low zero bits as divisor */
3109 s[i].p[0] &= ~ LOW_ZEROS_MASK (divisor);
3110 refmpn_sub_1 (s[i].p, s[i].p, size,
3111 refmpn_mod_1 (s[i].p, size, divisor));
3112 break;
3113
3114 case DATA_GCD:
3115 /* s[1] no more bits than s[0] */
3116 if (i == 1 && size2 == size)
3117 s[1].p[size-1] &= refmpn_msbone_mask (s[0].p[size-1]);
3118
3119 /* high limb non-zero */
3120 s[i].p[SRC_SIZE(i)-1] += (s[i].p[SRC_SIZE(i)-1] == 0);
3121
3122 /* odd */
3123 s[i].p[0] |= 1;
3124 break;
3125
3126 case DATA_SRC0_ODD:
3127 if (i == 0)
3128 s[i].p[0] |= 1;
3129 break;
3130
3131 case DATA_SRC1_ODD:
3132 if (i == 1)
3133 s[i].p[0] |= 1;
3134 break;
3135
3136 case DATA_SRC1_ODD_PRIME:
3137 if (i == 1)
3138 {
3139 if (refmpn_zero_p (s[i].p+1, SRC_SIZE(i)-1)
3140 && s[i].p[0] <=3)
3141 s[i].p[0] = 3;
3142 else
3143 {
3144 mpz_t p;
3145 mpz_init (p);
3146 for (;;)
3147 {
3148 _mpz_realloc (p, SRC_SIZE(i));
3149 MPN_COPY (PTR(p), s[i].p, SRC_SIZE(i));
3150 SIZ(p) = SRC_SIZE(i);
3151 MPN_NORMALIZE (PTR(p), SIZ(p));
3152 mpz_nextprime (p, p);
3153 if (mpz_size (p) <= SRC_SIZE(i))
3154 break;
3155
3156 t_random (s[i].p, SRC_SIZE(i));
3157 }
3158 MPN_COPY (s[i].p, PTR(p), SIZ(p));
3159 if (SIZ(p) < SRC_SIZE(i))
3160 MPN_ZERO (s[i].p + SIZ(p), SRC_SIZE(i) - SIZ(p));
3161 mpz_clear (p);
3162 }
3163 }
3164 break;
3165
3166 case DATA_SRC1_HIGHBIT:
3167 if (i == 1)
3168 {
3169 if (tr->size2)
3170 s[i].p[size2-1] |= GMP_NUMB_HIGHBIT;
3171 else
3172 s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
3173 }
3174 break;
3175
3176 case DATA_SRC0_HIGHBIT:
3177 if (i == 0)
3178 {
3179 s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
3180 }
3181 break;
3182
3183 case DATA_UDIV_QRNND:
3184 s[i].p[1] %= divisor;
3185 break;
3186 case DATA_DIV_QR_1:
3187 if (i == 1)
3188 s[i].p[0] %= divisor;
3189 break;
3190 }
3191
3192 mprotect_region (&s[i].region, PROT_READ);
3193 }
3194
3195 for (i = 0; i < NUM_DESTS; i++)
3196 {
3197 if (! tr->dst[i])
3198 continue;
3199
3200 if (tr->dst0_from_src1 && i==0)
3201 {
3202 mp_size_t copy = MIN (d[0].size, SRC_SIZE(1));
3203 mp_size_t fill = MAX (0, d[0].size - copy);
3204 MPN_COPY (fun.d[0].p, s[1].region.ptr, copy);
3205 MPN_COPY (ref.d[0].p, s[1].region.ptr, copy);
3206 refmpn_fill (fun.d[0].p + copy, fill, DEADVAL);
3207 refmpn_fill (ref.d[0].p + copy, fill, DEADVAL);
3208 }
3209 else if (tr->dst_bytes[i])
3210 {
3211 memset (ref.d[i].p, 0xBA, d[i].size);
3212 memset (fun.d[i].p, 0xBA, d[i].size);
3213 }
3214 else
3215 {
3216 refmpn_fill (ref.d[i].p, d[i].size, DEADVAL);
3217 refmpn_fill (fun.d[i].p, d[i].size, DEADVAL);
3218 }
3219 }
3220
3221 for (i = 0; i < NUM_SOURCES; i++)
3222 {
3223 if (! tr->src[i])
3224 continue;
3225
3226 if (ref.s[i].p != s[i].p)
3227 {
3228 refmpn_copyi (ref.s[i].p, s[i].p, SRC_SIZE(i));
3229 refmpn_copyi (fun.s[i].p, s[i].p, SRC_SIZE(i));
3230 }
3231 }
3232
3233 if (option_print)
3234 print_all();
3235
3236 if (tr->validate != NULL)
3237 {
3238 trap_location = TRAP_FUN;
3239 call (&fun, choice->function);
3240 trap_location = TRAP_NOWHERE;
3241
3242 if (! CALLING_CONVENTIONS_CHECK ())
3243 {
3244 print_all();
3245 abort();
3246 }
3247
3248 (*tr->validate) ();
3249 }
3250 else
3251 {
3252 trap_location = TRAP_REF;
3253 call (&ref, tr->reference);
3254 trap_location = TRAP_FUN;
3255 call (&fun, choice->function);
3256 trap_location = TRAP_NOWHERE;
3257
3258 if (! CALLING_CONVENTIONS_CHECK ())
3259 {
3260 print_all();
3261 abort();
3262 }
3263
3264 compare ();
3265 }
3266}
3267
3268
3269#define SIZE_ITERATION \
3270 for (size = MAX3 (option_firstsize, \
3271 choice->minsize, \
3272 (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1), \
3273 size += (tr->size == SIZE_ODD) && !(size & 1); \
3274 size <= option_lastsize; \
3275 size += (tr->size == SIZE_ODD) ? 2 : 1)
3276
3277#define SIZE2_FIRST \
3278 (tr->size2 == SIZE_2 ? 2 \
3279 : tr->size2 == SIZE_FRACTION ? option_firstsize2 \
3280 : tr->size2 == SIZE_CEIL_HALF ? ((size + 1) / 2) \
3281 : tr->size2 ? \
3282 MAX (choice->minsize, (option_firstsize2 != 0 \
3283 ? option_firstsize2 : 1)) \
3284 : 0)
3285
3286#define SIZE2_LAST \
3287 (tr->size2 == SIZE_2 ? 2 \
3288 : tr->size2 == SIZE_FRACTION ? FRACTION_COUNT-1 \
3289 : tr->size2 == SIZE_CEIL_HALF ? ((size + 1) / 2) \
3290 : tr->size2 ? size \
3291 : 0)
3292
3293#define SIZE2_ITERATION \
3294 for (size2 = SIZE2_FIRST; size2 <= SIZE2_LAST; size2++)
3295
3296#define ALIGN_COUNT(cond) ((cond) ? ALIGNMENTS : 1)
3297#define ALIGN_ITERATION(w,n,cond) \
3298 for (w[n].align = 0; w[n].align < ALIGN_COUNT(cond); w[n].align++)
3299
3300#define HIGH_LIMIT(cond) ((cond) != 0)
3301#define HIGH_COUNT(cond) (HIGH_LIMIT (cond) + 1)
3302#define HIGH_ITERATION(w,n,cond) \
3303 for (w[n].high = 0; w[n].high <= HIGH_LIMIT(cond); w[n].high++)
3304
3305#define SHIFT_LIMIT \
3306 ((unsigned long) (tr->shift ? GMP_NUMB_BITS -1 : 1))
3307
3308#define SHIFT_ITERATION \
3309 for (shift = 1; shift <= SHIFT_LIMIT; shift++)
3310
3311
3312void
3313try_many (void)
3314{
3315 int i;
3316
3317 {
3318 unsigned long total = 1;
3319
3320 total *= option_repetitions;
3321 total *= option_lastsize;
3322 if (tr->size2 == SIZE_FRACTION) total *= FRACTION_COUNT;
3323 else if (tr->size2) total *= (option_lastsize+1)/2;
3324
3325 total *= SHIFT_LIMIT;
3326 total *= MULTIPLIER_COUNT;
3327 total *= DIVISOR_COUNT;
3328 total *= CARRY_COUNT;
3329 total *= T_RAND_COUNT;
3330
3331 total *= HIGH_COUNT (tr->dst[0]);
3332 total *= HIGH_COUNT (tr->dst[1]);
3333 total *= HIGH_COUNT (tr->src[0]);
3334 total *= HIGH_COUNT (tr->src[1]);
3335
3336 total *= ALIGN_COUNT (tr->dst[0]);
3337 total *= ALIGN_COUNT (tr->dst[1]);
3338 total *= ALIGN_COUNT (tr->src[0]);
3339 total *= ALIGN_COUNT (tr->src[1]);
3340
3341 total *= OVERLAP_COUNT;
3342
3343 printf ("%s %lu\n", choice->name, total);
3344 }
3345
3346 spinner_count = 0;
3347
3348 for (i = 0; i < option_repetitions; i++)
3349 SIZE_ITERATION
3350 SIZE2_ITERATION
3351
3352 SHIFT_ITERATION
3353 MULTIPLIER_ITERATION
3354 DIVISOR_ITERATION
3355 CARRY_ITERATION /* must be after divisor */
3356 T_RAND_ITERATION
3357
3358 HIGH_ITERATION(d,0, tr->dst[0])
3359 HIGH_ITERATION(d,1, tr->dst[1])
3360 HIGH_ITERATION(s,0, tr->src[0])
3361 HIGH_ITERATION(s,1, tr->src[1])
3362
3363 ALIGN_ITERATION(d,0, tr->dst[0])
3364 ALIGN_ITERATION(d,1, tr->dst[1])
3365 ALIGN_ITERATION(s,0, tr->src[0])
3366 ALIGN_ITERATION(s,1, tr->src[1])
3367
3368 OVERLAP_ITERATION
3369 try_one();
3370
3371 printf("\n");
3372}
3373
3374
3375/* Usually print_all() doesn't show much, but it might give a hint as to
3376 where the function was up to when it died. */
3377void
3378trap (int sig)
3379{
3380 const char *name = "noname";
3381
3382 switch (sig) {
3383 case SIGILL: name = "SIGILL"; break;
3384#ifdef SIGBUS
3385 case SIGBUS: name = "SIGBUS"; break;
3386#endif
3387 case SIGSEGV: name = "SIGSEGV"; break;
3388 case SIGFPE: name = "SIGFPE"; break;
3389 }
3390
3391 printf ("\n\nSIGNAL TRAP: %s\n", name);
3392
3393 switch (trap_location) {
3394 case TRAP_REF:
3395 printf (" in reference function: %s\n", tr->reference_name);
3396 break;
3397 case TRAP_FUN:
3398 printf (" in test function: %s\n", choice->name);
3399 print_all ();
3400 break;
3401 case TRAP_SETUPS:
3402 printf (" in parameter setups\n");
3403 print_all ();
3404 break;
3405 default:
3406 printf (" somewhere unknown\n");
3407 break;
3408 }
3409 exit (1);
3410}
3411
3412
3413void
3414try_init (void)
3415{
3416#if HAVE_GETPAGESIZE
3417 /* Prefer getpagesize() over sysconf(), since on SunOS 4 sysconf() doesn't
3418 know _SC_PAGESIZE. */
3419 pagesize = getpagesize ();
3420#else
3421#if HAVE_SYSCONF
3422 if ((pagesize = sysconf (_SC_PAGESIZE)) == -1)
3423 {
3424 /* According to the linux man page, sysconf doesn't set errno */
3425 fprintf (stderr, "Cannot get sysconf _SC_PAGESIZE\n");
3426 exit (1);
3427 }
3428#else
3429Error, error, cannot get page size
3430#endif
3431#endif
3432
3433 printf ("pagesize is 0x%lX bytes\n", pagesize);
3434
3435 signal (SIGILL, trap);
3436#ifdef SIGBUS
3437 signal (SIGBUS, trap);
3438#endif
3439 signal (SIGSEGV, trap);
3440 signal (SIGFPE, trap);
3441
3442 {
3443 int i;
3444
3445 for (i = 0; i < NUM_SOURCES; i++)
3446 {
3447 malloc_region (&s[i].region, 2*option_lastsize+ALIGNMENTS-1);
3448 printf ("s[%d] %p to %p (0x%lX bytes)\n",
3449 i, (void *) (s[i].region.ptr),
3450 (void *) (s[i].region.ptr + s[i].region.size),
3451 (long) s[i].region.size * GMP_LIMB_BYTES);
3452 }
3453
3454#define INIT_EACH(e,es) \
3455 for (i = 0; i < NUM_DESTS; i++) \
3456 { \
3457 malloc_region (&e.d[i].region, 2*option_lastsize+ALIGNMENTS-1); \
3458 printf ("%s d[%d] %p to %p (0x%lX bytes)\n", \
3459 es, i, (void *) (e.d[i].region.ptr), \
3460 (void *) (e.d[i].region.ptr + e.d[i].region.size), \
3461 (long) e.d[i].region.size * GMP_LIMB_BYTES); \
3462 }
3463
3464 INIT_EACH(ref, "ref");
3465 INIT_EACH(fun, "fun");
3466 }
3467}
3468
3469int
3470strmatch_wild (const char *pattern, const char *str)
3471{
3472 size_t plen, slen;
3473
3474 /* wildcard at start */
3475 if (pattern[0] == '*')
3476 {
3477 pattern++;
3478 plen = strlen (pattern);
3479 slen = strlen (str);
3480 return (plen == 0
3481 || (slen >= plen && memcmp (pattern, str+slen-plen, plen) == 0));
3482 }
3483
3484 /* wildcard at end */
3485 plen = strlen (pattern);
3486 if (plen >= 1 && pattern[plen-1] == '*')
3487 return (memcmp (pattern, str, plen-1) == 0);
3488
3489 /* no wildcards */
3490 return (strcmp (pattern, str) == 0);
3491}
3492
3493void
3494try_name (const char *name)
3495{
3496 int found = 0;
3497 int i;
3498
3499 for (i = 0; i < numberof (choice_array); i++)
3500 {
3501 if (strmatch_wild (name, choice_array[i].name))
3502 {
3503 choice = &choice_array[i];
3504 tr = &param[choice->type];
3505 try_many ();
3506 found = 1;
3507 }
3508 }
3509
3510 if (!found)
3511 {
3512 printf ("%s unknown\n", name);
3513 /* exit (1); */
3514 }
3515}
3516
3517
3518void
3519usage (const char *prog)
3520{
3521 int col = 0;
3522 int i;
3523
3524 printf ("Usage: %s [options] function...\n", prog);
3525 printf (" -1 use limb data 1,2,3,etc\n");
3526 printf (" -9 use limb data all 0xFF..FFs\n");
3527 printf (" -a zeros use limb data all zeros\n");
3528 printf (" -a ffs use limb data all 0xFF..FFs (same as -9)\n");
3529 printf (" -a 2fd use data 0x2FFF...FFFD\n");
3530 printf (" -p print each case tried (try this if seg faulting)\n");
3531 printf (" -R seed random numbers from time()\n");
3532 printf (" -r reps set repetitions (default %d)\n", DEFAULT_REPETITIONS);
3533 printf (" -s size starting size to test\n");
3534 printf (" -S size2 starting size2 to test\n");
3535 printf (" -s s1-s2 range of sizes to test\n");
3536 printf (" -W don't show the spinner (use this in gdb)\n");
3537 printf (" -z disable mprotect() redzones\n");
3538 printf ("Default data is refmpn_random() and refmpn_random2().\n");
3539 printf ("\n");
3540 printf ("Functions that can be tested:\n");
3541
3542 for (i = 0; i < numberof (choice_array); i++)
3543 {
3544 if (col + 1 + strlen (choice_array[i].name) > 79)
3545 {
3546 printf ("\n");
3547 col = 0;
3548 }
3549 printf (" %s", choice_array[i].name);
3550 col += 1 + strlen (choice_array[i].name);
3551 }
3552 printf ("\n");
3553
3554 exit(1);
3555}
3556
3557
3558int
3559main (int argc, char *argv[])
3560{
3561 int i;
3562
3563 /* unbuffered output */
3564 setbuf (stdout, NULL);
3565 setbuf (stderr, NULL);
3566
3567 /* default trace in hex, and in upper-case so can paste into bc */
3568 mp_trace_base = -16;
3569
3570 param_init ();
3571
3572 {
3573 unsigned long seed = 123;
3574 int opt;
3575
3576 while ((opt = getopt(argc, argv, "19a:b:E:pRr:S:s:Wz")) != EOF)
3577 {
3578 switch (opt) {
3579 case '1':
3580 /* use limb data values 1, 2, 3, ... etc */
3581 option_data = DATA_SEQ;
3582 break;
3583 case '9':
3584 /* use limb data values 0xFFF...FFF always */
3585 option_data = DATA_FFS;
3586 break;
3587 case 'a':
3588 if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS;
3589 else if (strcmp (optarg, "seq") == 0) option_data = DATA_SEQ;
3590 else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS;
3591 else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD;
3592 else
3593 {
3594 fprintf (stderr, "unrecognised data option: %s\n", optarg);
3595 exit (1);
3596 }
3597 break;
3598 case 'b':
3599 mp_trace_base = atoi (optarg);
3600 break;
3601 case 'E':
3602 /* re-seed */
3603 sscanf (optarg, "%lu", &seed);
3604 printf ("Re-seeding with %lu\n", seed);
3605 break;
3606 case 'p':
3607 option_print = 1;
3608 break;
3609 case 'R':
3610 /* randomize */
3611 seed = time (NULL);
3612 printf ("Seeding with %lu, re-run using \"-E %lu\"\n", seed, seed);
3613 break;
3614 case 'r':
3615 option_repetitions = atoi (optarg);
3616 break;
3617 case 's':
3618 {
3619 char *p;
3620 option_firstsize = strtol (optarg, 0, 0);
3621 if ((p = strchr (optarg, '-')) != NULL)
3622 option_lastsize = strtol (p+1, 0, 0);
3623 }
3624 break;
3625 case 'S':
3626 /* -S <size> sets the starting size for the second of a two size
3627 routine (like mpn_mul_basecase) */
3628 option_firstsize2 = strtol (optarg, 0, 0);
3629 break;
3630 case 'W':
3631 /* use this when running in the debugger */
3632 option_spinner = 0;
3633 break;
3634 case 'z':
3635 /* disable redzones */
3636 option_redzones = 0;
3637 break;
3638 case '?':
3639 usage (argv[0]);
3640 break;
3641 }
3642 }
3643
3644 gmp_randinit_default (__gmp_rands);
3645 __gmp_rands_initialized = 1;
3646 gmp_randseed_ui (__gmp_rands, seed);
3647 }
3648
3649 try_init();
3650
3651 if (argc <= optind)
3652 usage (argv[0]);
3653
3654 for (i = optind; i < argc; i++)
3655 try_name (argv[i]);
3656
3657 return 0;
3658}