blob: 8192dac21775b4ebdc90579d21b809ed4a8eb855 [file] [log] [blame]
Austin Schuhbb1338c2024-06-15 19:31:16 -07001/* Include file for internal GNU MP types and definitions.
2
3 THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
5
6Copyright 1991-2018 Free Software Foundation, Inc.
7
8This file is part of the GNU MP Library.
9
10The GNU MP Library is free software; you can redistribute it and/or modify
11it under the terms of either:
12
13 * the GNU Lesser General Public License as published by the Free
14 Software Foundation; either version 3 of the License, or (at your
15 option) any later version.
16
17or
18
19 * the GNU General Public License as published by the Free Software
20 Foundation; either version 2 of the License, or (at your option) any
21 later version.
22
23or both in parallel, as here.
24
25The GNU MP Library is distributed in the hope that it will be useful, but
26WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
27or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
28for more details.
29
30You should have received copies of the GNU General Public License and the
31GNU Lesser General Public License along with the GNU MP Library. If not,
32see https://www.gnu.org/licenses/. */
33
34
35/* __GMP_DECLSPEC must be given on any global data that will be accessed
36 from outside libgmp, meaning from the test or development programs, or
37 from libgmpxx. Failing to do this will result in an incorrect address
38 being used for the accesses. On functions __GMP_DECLSPEC makes calls
39 from outside libgmp more efficient, but they'll still work fine without
40 it. */
41
42
43#ifndef __GMP_IMPL_H__
44#define __GMP_IMPL_H__
45
46#if defined _CRAY
47#include <intrinsics.h> /* for _popcnt */
48#endif
49
50/* For INT_MAX, etc. We used to avoid it because of a bug (on solaris,
51 gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong
52 values (the ABI=64 values)), but it should be safe now.
53
54 On Cray vector systems, however, we need the system limits.h since sizes
55 of signed and unsigned types can differ there, depending on compiler
56 options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail. For
57 reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and
58 short can be 24, 32, 46 or 64 bits, and different for ushort. */
59
60#include <limits.h>
61
62/* For fat.h and other fat binary stuff.
63 No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions
64 declared this way are only used to set function pointers in __gmpn_cpuvec,
65 they're not called directly. */
66#define DECL_add_n(name) \
67 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
68#define DECL_addlsh1_n(name) \
69 DECL_add_n (name)
70#define DECL_addlsh2_n(name) \
71 DECL_add_n (name)
72#define DECL_addmul_1(name) \
73 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
74#define DECL_addmul_2(name) \
75 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)
76#define DECL_bdiv_dbm1c(name) \
77 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
78#define DECL_cnd_add_n(name) \
79 __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
80#define DECL_cnd_sub_n(name) \
81 __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
82#define DECL_com(name) \
83 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
84#define DECL_copyd(name) \
85 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
86#define DECL_copyi(name) \
87 DECL_copyd (name)
88#define DECL_divexact_1(name) \
89 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
90#define DECL_divexact_by3c(name) \
91 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
92#define DECL_divrem_1(name) \
93 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)
94#define DECL_gcd_11(name) \
95 __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_limb_t)
96#define DECL_lshift(name) \
97 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, unsigned)
98#define DECL_lshiftc(name) \
99 DECL_lshift (name)
100#define DECL_mod_1(name) \
101 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t)
102#define DECL_mod_1_1p(name) \
103 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [])
104#define DECL_mod_1_1p_cps(name) \
105 __GMP_DECLSPEC void name (mp_limb_t cps[], mp_limb_t b)
106#define DECL_mod_1s_2p(name) \
107 DECL_mod_1_1p (name)
108#define DECL_mod_1s_2p_cps(name) \
109 DECL_mod_1_1p_cps (name)
110#define DECL_mod_1s_4p(name) \
111 DECL_mod_1_1p (name)
112#define DECL_mod_1s_4p_cps(name) \
113 DECL_mod_1_1p_cps (name)
114#define DECL_mod_34lsub1(name) \
115 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t)
116#define DECL_modexact_1c_odd(name) \
117 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
118#define DECL_mul_1(name) \
119 DECL_addmul_1 (name)
120#define DECL_mul_basecase(name) \
121 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)
122#define DECL_mullo_basecase(name) \
123 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
124#define DECL_preinv_divrem_1(name) \
125 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int)
126#define DECL_preinv_mod_1(name) \
127 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
128#define DECL_redc_1(name) \
129 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
130#define DECL_redc_2(name) \
131 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)
132#define DECL_rshift(name) \
133 DECL_lshift (name)
134#define DECL_sqr_basecase(name) \
135 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
136#define DECL_sub_n(name) \
137 DECL_add_n (name)
138#define DECL_sublsh1_n(name) \
139 DECL_add_n (name)
140#define DECL_submul_1(name) \
141 DECL_addmul_1 (name)
142
143#if ! defined (__GMP_WITHIN_CONFIGURE)
144#include "config.h"
145#include "gmp.h"
146#include "gmp-mparam.h"
147#include "fib_table.h"
148#include "fac_table.h"
149#include "mp_bases.h"
150#if WANT_FAT_BINARY
151#include "fat.h"
152#endif
153#endif
154
155#if HAVE_INTTYPES_H /* for uint_least32_t */
156# include <inttypes.h>
157#else
158# if HAVE_STDINT_H
159# include <stdint.h>
160# endif
161#endif
162
163#ifdef __cplusplus
164#include <cstring> /* for strlen */
165#include <string> /* for std::string */
166#endif
167
168
169#ifndef WANT_TMP_DEBUG /* for TMP_ALLOC_LIMBS_2 and others */
170#define WANT_TMP_DEBUG 0
171#endif
172
173/* The following tries to get a good version of alloca. The tests are
174 adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions.
175 Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will
176 be setup appropriately.
177
178 ifndef alloca - a cpp define might already exist.
179 glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca.
180 HP cc +Olibcalls adds a #define of alloca to __builtin_alloca.
181
182 GCC __builtin_alloca - preferred whenever available.
183
184 _AIX pragma - IBM compilers need a #pragma in "each module that needs to
185 use alloca". Pragma indented to protect pre-ANSI cpp's. _IBMR2 was
186 used in past versions of GMP, retained still in case it matters.
187
188 The autoconf manual says this pragma needs to be at the start of a C
189 file, apart from comments and preprocessor directives. Is that true?
190 xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc
191 from gmp.h.
192*/
193
194#ifndef alloca
195# ifdef __GNUC__
196# define alloca __builtin_alloca
197# else
198# ifdef __DECC
199# define alloca(x) __ALLOCA(x)
200# else
201# ifdef _MSC_VER
202# include <malloc.h>
203# define alloca _alloca
204# else
205# if HAVE_ALLOCA_H
206# include <alloca.h>
207# else
208# if defined (_AIX) || defined (_IBMR2)
209 #pragma alloca
210# else
211 char *alloca ();
212# endif
213# endif
214# endif
215# endif
216# endif
217#endif
218
219
220/* if not provided by gmp-mparam.h */
221#ifndef GMP_LIMB_BYTES
222#define GMP_LIMB_BYTES SIZEOF_MP_LIMB_T
223#endif
224#ifndef GMP_LIMB_BITS
225#define GMP_LIMB_BITS (8 * SIZEOF_MP_LIMB_T)
226#endif
227
228#define BITS_PER_ULONG (8 * SIZEOF_UNSIGNED_LONG)
229
230
231/* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */
232#if HAVE_UINT_LEAST32_T
233typedef uint_least32_t gmp_uint_least32_t;
234#else
235#if SIZEOF_UNSIGNED_SHORT >= 4
236typedef unsigned short gmp_uint_least32_t;
237#else
238#if SIZEOF_UNSIGNED >= 4
239typedef unsigned gmp_uint_least32_t;
240#else
241typedef unsigned long gmp_uint_least32_t;
242#endif
243#endif
244#endif
245
246
247/* gmp_intptr_t, for pointer to integer casts */
248#if HAVE_INTPTR_T
249typedef intptr_t gmp_intptr_t;
250#else /* fallback */
251typedef size_t gmp_intptr_t;
252#endif
253
254
255/* pre-inverse types for truncating division and modulo */
256typedef struct {mp_limb_t inv32;} gmp_pi1_t;
257typedef struct {mp_limb_t inv21, inv32, inv53;} gmp_pi2_t;
258
259
260/* "const" basically means a function does nothing but examine its arguments
261 and give a return value, it doesn't read or write any memory (neither
262 global nor pointed to by arguments), and has no other side-effects. This
263 is more restrictive than "pure". See info node "(gcc)Function
264 Attributes". __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn
265 this off when trying to write timing loops. */
266#if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE)
267#define ATTRIBUTE_CONST __attribute__ ((const))
268#else
269#define ATTRIBUTE_CONST
270#endif
271
272#if HAVE_ATTRIBUTE_NORETURN
273#define ATTRIBUTE_NORETURN __attribute__ ((noreturn))
274#else
275#define ATTRIBUTE_NORETURN
276#endif
277
278/* "malloc" means a function behaves like malloc in that the pointer it
279 returns doesn't alias anything. */
280#if HAVE_ATTRIBUTE_MALLOC
281#define ATTRIBUTE_MALLOC __attribute__ ((malloc))
282#else
283#define ATTRIBUTE_MALLOC
284#endif
285
286
287#if ! HAVE_STRCHR
288#define strchr(s,c) index(s,c)
289#endif
290
291#if ! HAVE_MEMSET
292#define memset(p, c, n) \
293 do { \
294 ASSERT ((n) >= 0); \
295 char *__memset__p = (p); \
296 int __i; \
297 for (__i = 0; __i < (n); __i++) \
298 __memset__p[__i] = (c); \
299 } while (0)
300#endif
301
302/* va_copy is standard in C99, and gcc provides __va_copy when in strict C89
303 mode. Falling back to a memcpy will give maximum portability, since it
304 works no matter whether va_list is a pointer, struct or array. */
305#if ! defined (va_copy) && defined (__va_copy)
306#define va_copy(dst,src) __va_copy(dst,src)
307#endif
308#if ! defined (va_copy)
309#define va_copy(dst,src) \
310 do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0)
311#endif
312
313
314/* HAVE_HOST_CPU_alpha_CIX is 1 on an alpha with the CIX instructions
315 (ie. ctlz, ctpop, cttz). */
316#if HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68 \
317 || HAVE_HOST_CPU_alphaev7
318#define HAVE_HOST_CPU_alpha_CIX 1
319#endif
320
321
322#if defined (__cplusplus)
323extern "C" {
324#endif
325
326
327/* Usage: TMP_DECL;
328 TMP_MARK;
329 ptr = TMP_ALLOC (bytes);
330 TMP_FREE;
331
332 Small allocations should use TMP_SALLOC, big allocations should use
333 TMP_BALLOC. Allocations that might be small or big should use TMP_ALLOC.
334
335 Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and
336 TMP_SFREE.
337
338 TMP_DECL just declares a variable, but might be empty and so must be last
339 in a list of variables. TMP_MARK must be done before any TMP_ALLOC.
340 TMP_ALLOC(0) is not allowed. TMP_FREE doesn't need to be done if a
341 TMP_MARK was made, but then no TMP_ALLOCs. */
342
343/* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or
344 __gmp_allocate_func doesn't already determine it. */
345union tmp_align_t {
346 mp_limb_t l;
347 double d;
348 char *p;
349};
350#define __TMP_ALIGN sizeof (union tmp_align_t)
351
352/* Return "a" rounded upwards to a multiple of "m", if it isn't already.
353 "a" must be an unsigned type.
354 This is designed for use with a compile-time constant "m".
355 The POW2 case is expected to be usual, and gcc 3.0 and up recognises
356 "(-(8*n))%8" or the like is always zero, which means the rounding up in
357 the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop. */
358#define ROUND_UP_MULTIPLE(a,m) \
359 (POW2_P(m) ? (a) + (-(a))%(m) \
360 : (a)+(m)-1 - (((a)+(m)-1) % (m)))
361
362#if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT)
363struct tmp_reentrant_t {
364 struct tmp_reentrant_t *next;
365 size_t size; /* bytes, including header */
366};
367__GMP_DECLSPEC void *__gmp_tmp_reentrant_alloc (struct tmp_reentrant_t **, size_t) ATTRIBUTE_MALLOC;
368__GMP_DECLSPEC void __gmp_tmp_reentrant_free (struct tmp_reentrant_t *);
369#endif
370
371#if WANT_TMP_ALLOCA
372#define TMP_SDECL
373#define TMP_DECL struct tmp_reentrant_t *__tmp_marker
374#define TMP_SMARK
375#define TMP_MARK __tmp_marker = 0
376#define TMP_SALLOC(n) alloca(n)
377#define TMP_BALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
378/* The peculiar stack allocation limit here is chosen for efficient asm. */
379#define TMP_ALLOC(n) \
380 (LIKELY ((n) <= 0x7f00) ? TMP_SALLOC(n) : TMP_BALLOC(n))
381#define TMP_SFREE
382#define TMP_FREE \
383 do { \
384 if (UNLIKELY (__tmp_marker != 0)) \
385 __gmp_tmp_reentrant_free (__tmp_marker); \
386 } while (0)
387#endif
388
389#if WANT_TMP_REENTRANT
390#define TMP_SDECL TMP_DECL
391#define TMP_DECL struct tmp_reentrant_t *__tmp_marker
392#define TMP_SMARK TMP_MARK
393#define TMP_MARK __tmp_marker = 0
394#define TMP_SALLOC(n) TMP_ALLOC(n)
395#define TMP_BALLOC(n) TMP_ALLOC(n)
396#define TMP_ALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
397#define TMP_SFREE TMP_FREE
398#define TMP_FREE __gmp_tmp_reentrant_free (__tmp_marker)
399#endif
400
401#if WANT_TMP_NOTREENTRANT
402struct tmp_marker
403{
404 struct tmp_stack *which_chunk;
405 void *alloc_point;
406};
407__GMP_DECLSPEC void *__gmp_tmp_alloc (unsigned long) ATTRIBUTE_MALLOC;
408__GMP_DECLSPEC void __gmp_tmp_mark (struct tmp_marker *);
409__GMP_DECLSPEC void __gmp_tmp_free (struct tmp_marker *);
410#define TMP_SDECL TMP_DECL
411#define TMP_DECL struct tmp_marker __tmp_marker
412#define TMP_SMARK TMP_MARK
413#define TMP_MARK __gmp_tmp_mark (&__tmp_marker)
414#define TMP_SALLOC(n) TMP_ALLOC(n)
415#define TMP_BALLOC(n) TMP_ALLOC(n)
416#define TMP_ALLOC(n) \
417 __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN))
418#define TMP_SFREE TMP_FREE
419#define TMP_FREE __gmp_tmp_free (&__tmp_marker)
420#endif
421
422#if WANT_TMP_DEBUG
423/* See tal-debug.c for some comments. */
424struct tmp_debug_t {
425 struct tmp_debug_entry_t *list;
426 const char *file;
427 int line;
428};
429struct tmp_debug_entry_t {
430 struct tmp_debug_entry_t *next;
431 void *block;
432 size_t size;
433};
434__GMP_DECLSPEC void __gmp_tmp_debug_mark (const char *, int, struct tmp_debug_t **,
435 struct tmp_debug_t *,
436 const char *, const char *);
437__GMP_DECLSPEC void *__gmp_tmp_debug_alloc (const char *, int, int,
438 struct tmp_debug_t **, const char *,
439 size_t) ATTRIBUTE_MALLOC;
440__GMP_DECLSPEC void __gmp_tmp_debug_free (const char *, int, int,
441 struct tmp_debug_t **,
442 const char *, const char *);
443#define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
444#define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
445#define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
446#define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
447#define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
448#define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
449/* The marker variable is designed to provoke an uninitialized variable
450 warning from the compiler if TMP_FREE is used without a TMP_MARK.
451 __tmp_marker_inscope does the same for TMP_ALLOC. Runtime tests pick
452 these things up too. */
453#define TMP_DECL_NAME(marker, marker_name) \
454 int marker; \
455 int __tmp_marker_inscope; \
456 const char *__tmp_marker_name = marker_name; \
457 struct tmp_debug_t __tmp_marker_struct; \
458 /* don't demand NULL, just cast a zero */ \
459 struct tmp_debug_t *__tmp_marker = (struct tmp_debug_t *) 0
460#define TMP_MARK_NAME(marker, marker_name) \
461 do { \
462 marker = 1; \
463 __tmp_marker_inscope = 1; \
464 __gmp_tmp_debug_mark (ASSERT_FILE, ASSERT_LINE, \
465 &__tmp_marker, &__tmp_marker_struct, \
466 __tmp_marker_name, marker_name); \
467 } while (0)
468#define TMP_SALLOC(n) TMP_ALLOC(n)
469#define TMP_BALLOC(n) TMP_ALLOC(n)
470#define TMP_ALLOC(size) \
471 __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE, \
472 __tmp_marker_inscope, \
473 &__tmp_marker, __tmp_marker_name, size)
474#define TMP_FREE_NAME(marker, marker_name) \
475 do { \
476 __gmp_tmp_debug_free (ASSERT_FILE, ASSERT_LINE, \
477 marker, &__tmp_marker, \
478 __tmp_marker_name, marker_name); \
479 } while (0)
480#endif /* WANT_TMP_DEBUG */
481
482
483/* Allocating various types. */
484#define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type)))
485#define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type)))
486#define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type)))
487#define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t)
488#define TMP_SALLOC_LIMBS(n) TMP_SALLOC_TYPE(n,mp_limb_t)
489#define TMP_BALLOC_LIMBS(n) TMP_BALLOC_TYPE(n,mp_limb_t)
490#define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr)
491#define TMP_SALLOC_MP_PTRS(n) TMP_SALLOC_TYPE(n,mp_ptr)
492#define TMP_BALLOC_MP_PTRS(n) TMP_BALLOC_TYPE(n,mp_ptr)
493
494/* It's more efficient to allocate one block than many. This is certainly
495 true of the malloc methods, but it can even be true of alloca if that
496 involves copying a chunk of stack (various RISCs), or a call to a stack
497 bounds check (mingw). In any case, when debugging keep separate blocks
498 so a redzoning malloc debugger can protect each individually. */
499#define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize) \
500 do { \
501 if (WANT_TMP_DEBUG) \
502 { \
503 (xp) = TMP_ALLOC_LIMBS (xsize); \
504 (yp) = TMP_ALLOC_LIMBS (ysize); \
505 } \
506 else \
507 { \
508 (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize)); \
509 (yp) = (xp) + (xsize); \
510 } \
511 } while (0)
512#define TMP_ALLOC_LIMBS_3(xp,xsize, yp,ysize, zp,zsize) \
513 do { \
514 if (WANT_TMP_DEBUG) \
515 { \
516 (xp) = TMP_ALLOC_LIMBS (xsize); \
517 (yp) = TMP_ALLOC_LIMBS (ysize); \
518 (zp) = TMP_ALLOC_LIMBS (zsize); \
519 } \
520 else \
521 { \
522 (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize) + (zsize)); \
523 (yp) = (xp) + (xsize); \
524 (zp) = (yp) + (ysize); \
525 } \
526 } while (0)
527
528/* From gmp.h, nicer names for internal use. */
529#define CRAY_Pragma(str) __GMP_CRAY_Pragma(str)
530#define MPN_CMP(result, xp, yp, size) __GMPN_CMP(result, xp, yp, size)
531#define LIKELY(cond) __GMP_LIKELY(cond)
532#define UNLIKELY(cond) __GMP_UNLIKELY(cond)
533
534#define ABS(x) ((x) >= 0 ? (x) : -(x))
535#define NEG_CAST(T,x) (- (__GMP_CAST (T, (x) + 1) - 1))
536#define ABS_CAST(T,x) ((x) >= 0 ? __GMP_CAST (T, x) : NEG_CAST (T,x))
537#undef MIN
538#define MIN(l,o) ((l) < (o) ? (l) : (o))
539#undef MAX
540#define MAX(h,i) ((h) > (i) ? (h) : (i))
541#define numberof(x) (sizeof (x) / sizeof ((x)[0]))
542
543/* Field access macros. */
544#define SIZ(x) ((x)->_mp_size)
545#define ABSIZ(x) ABS (SIZ (x))
546#define PTR(x) ((x)->_mp_d)
547#define EXP(x) ((x)->_mp_exp)
548#define PREC(x) ((x)->_mp_prec)
549#define ALLOC(x) ((x)->_mp_alloc)
550#define NUM(x) mpq_numref(x)
551#define DEN(x) mpq_denref(x)
552
553/* n-1 inverts any low zeros and the lowest one bit. If n&(n-1) leaves zero
554 then that lowest one bit must have been the only bit set. n==0 will
555 return true though, so avoid that. */
556#define POW2_P(n) (((n) & ((n) - 1)) == 0)
557
558/* This is intended for constant THRESHOLDs only, where the compiler
559 can completely fold the result. */
560#define LOG2C(n) \
561 (((n) >= 0x1) + ((n) >= 0x2) + ((n) >= 0x4) + ((n) >= 0x8) + \
562 ((n) >= 0x10) + ((n) >= 0x20) + ((n) >= 0x40) + ((n) >= 0x80) + \
563 ((n) >= 0x100) + ((n) >= 0x200) + ((n) >= 0x400) + ((n) >= 0x800) + \
564 ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
565
566#define MP_LIMB_T_MAX (~ (mp_limb_t) 0)
567
568/* Must cast ULONG_MAX etc to unsigned long etc, since they might not be
569 unsigned on a K&R compiler. In particular the HP-UX 10 bundled K&R cc
570 treats the plain decimal values in <limits.h> as signed. */
571#define ULONG_HIGHBIT (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1))
572#define UINT_HIGHBIT (UINT_MAX ^ ((unsigned) UINT_MAX >> 1))
573#define USHRT_HIGHBIT (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1))
574#define GMP_LIMB_HIGHBIT (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1))
575
576#if __GMP_MP_SIZE_T_INT
577#define MP_SIZE_T_MAX INT_MAX
578#define MP_SIZE_T_MIN INT_MIN
579#else
580#define MP_SIZE_T_MAX LONG_MAX
581#define MP_SIZE_T_MIN LONG_MIN
582#endif
583
584/* mp_exp_t is the same as mp_size_t */
585#define MP_EXP_T_MAX MP_SIZE_T_MAX
586#define MP_EXP_T_MIN MP_SIZE_T_MIN
587
588#define LONG_HIGHBIT LONG_MIN
589#define INT_HIGHBIT INT_MIN
590#define SHRT_HIGHBIT SHRT_MIN
591
592
593#define GMP_NUMB_HIGHBIT (CNST_LIMB(1) << (GMP_NUMB_BITS-1))
594
595#if GMP_NAIL_BITS == 0
596#define GMP_NAIL_LOWBIT CNST_LIMB(0)
597#else
598#define GMP_NAIL_LOWBIT (CNST_LIMB(1) << GMP_NUMB_BITS)
599#endif
600
601#if GMP_NAIL_BITS != 0
602/* Set various *_THRESHOLD values to be used for nails. Thus we avoid using
603 code that has not yet been qualified. */
604
605#undef DC_DIV_QR_THRESHOLD
606#define DC_DIV_QR_THRESHOLD 50
607
608#undef DIVREM_1_NORM_THRESHOLD
609#undef DIVREM_1_UNNORM_THRESHOLD
610#undef MOD_1_NORM_THRESHOLD
611#undef MOD_1_UNNORM_THRESHOLD
612#undef USE_PREINV_DIVREM_1
613#undef DIVREM_2_THRESHOLD
614#undef DIVEXACT_1_THRESHOLD
615#define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
616#define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
617#define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
618#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
619#define USE_PREINV_DIVREM_1 0 /* no preinv */
620#define DIVREM_2_THRESHOLD MP_SIZE_T_MAX /* no preinv */
621
622/* mpn/generic/mul_fft.c is not nails-capable. */
623#undef MUL_FFT_THRESHOLD
624#undef SQR_FFT_THRESHOLD
625#define MUL_FFT_THRESHOLD MP_SIZE_T_MAX
626#define SQR_FFT_THRESHOLD MP_SIZE_T_MAX
627#endif
628
629/* Swap macros. */
630
631#define MP_LIMB_T_SWAP(x, y) \
632 do { \
633 mp_limb_t __mp_limb_t_swap__tmp = (x); \
634 (x) = (y); \
635 (y) = __mp_limb_t_swap__tmp; \
636 } while (0)
637#define MP_SIZE_T_SWAP(x, y) \
638 do { \
639 mp_size_t __mp_size_t_swap__tmp = (x); \
640 (x) = (y); \
641 (y) = __mp_size_t_swap__tmp; \
642 } while (0)
643
644#define MP_PTR_SWAP(x, y) \
645 do { \
646 mp_ptr __mp_ptr_swap__tmp = (x); \
647 (x) = (y); \
648 (y) = __mp_ptr_swap__tmp; \
649 } while (0)
650#define MP_SRCPTR_SWAP(x, y) \
651 do { \
652 mp_srcptr __mp_srcptr_swap__tmp = (x); \
653 (x) = (y); \
654 (y) = __mp_srcptr_swap__tmp; \
655 } while (0)
656
657#define MPN_PTR_SWAP(xp,xs, yp,ys) \
658 do { \
659 MP_PTR_SWAP (xp, yp); \
660 MP_SIZE_T_SWAP (xs, ys); \
661 } while(0)
662#define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
663 do { \
664 MP_SRCPTR_SWAP (xp, yp); \
665 MP_SIZE_T_SWAP (xs, ys); \
666 } while(0)
667
668#define MPZ_PTR_SWAP(x, y) \
669 do { \
670 mpz_ptr __mpz_ptr_swap__tmp = (x); \
671 (x) = (y); \
672 (y) = __mpz_ptr_swap__tmp; \
673 } while (0)
674#define MPZ_SRCPTR_SWAP(x, y) \
675 do { \
676 mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
677 (x) = (y); \
678 (y) = __mpz_srcptr_swap__tmp; \
679 } while (0)
680
681#define MPQ_PTR_SWAP(x, y) \
682 do { \
683 mpq_ptr __mpq_ptr_swap__tmp = (x); \
684 (x) = (y); \
685 (y) = __mpq_ptr_swap__tmp; \
686 } while (0)
687#define MPQ_SRCPTR_SWAP(x, y) \
688 do { \
689 mpq_srcptr __mpq_srcptr_swap__tmp = (x); \
690 (x) = (y); \
691 (y) = __mpq_srcptr_swap__tmp; \
692 } while (0)
693
694
695/* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))",
696 but current gcc (3.0) doesn't seem to support that. */
697__GMP_DECLSPEC extern void * (*__gmp_allocate_func) (size_t);
698__GMP_DECLSPEC extern void * (*__gmp_reallocate_func) (void *, size_t, size_t);
699__GMP_DECLSPEC extern void (*__gmp_free_func) (void *, size_t);
700
701__GMP_DECLSPEC void *__gmp_default_allocate (size_t);
702__GMP_DECLSPEC void *__gmp_default_reallocate (void *, size_t, size_t);
703__GMP_DECLSPEC void __gmp_default_free (void *, size_t);
704
705#define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
706 ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
707#define __GMP_ALLOCATE_FUNC_LIMBS(n) __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
708
709#define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \
710 ((type *) (*__gmp_reallocate_func) \
711 (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
712#define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \
713 __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
714
715#define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))
716#define __GMP_FREE_FUNC_LIMBS(p,n) __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)
717
718#define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize) \
719 do { \
720 if ((oldsize) != (newsize)) \
721 (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \
722 } while (0)
723
724#define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type) \
725 do { \
726 if ((oldsize) != (newsize)) \
727 (ptr) = (type *) (*__gmp_reallocate_func) \
728 (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type)); \
729 } while (0)
730
731
732/* Dummy for non-gcc, code involving it will go dead. */
733#if ! defined (__GNUC__) || __GNUC__ < 2
734#define __builtin_constant_p(x) 0
735#endif
736
737
738/* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the
739 stack usage is compatible. __attribute__ ((regparm (N))) helps by
740 putting leading parameters in registers, avoiding extra stack.
741
742 regparm cannot be used with calls going through the PLT, because the
743 binding code there may clobber the registers (%eax, %edx, %ecx) used for
744 the regparm parameters. Calls to local (ie. static) functions could
745 still use this, if we cared to differentiate locals and globals.
746
747 On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with
748 -p or -pg profiling, since that version of gcc doesn't realize the
749 .mcount calls will clobber the parameter registers. Other systems are
750 ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't
751 bother to try to detect this. regparm is only an optimization so we just
752 disable it when profiling (profiling being a slowdown anyway). */
753
754#if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \
755 && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF
756#define USE_LEADING_REGPARM 1
757#else
758#define USE_LEADING_REGPARM 0
759#endif
760
761/* Macros for altering parameter order according to regparm usage. */
762#if USE_LEADING_REGPARM
763#define REGPARM_2_1(a,b,x) x,a,b
764#define REGPARM_3_1(a,b,c,x) x,a,b,c
765#define REGPARM_ATTR(n) __attribute__ ((regparm (n)))
766#else
767#define REGPARM_2_1(a,b,x) a,b,x
768#define REGPARM_3_1(a,b,c,x) a,b,c,x
769#define REGPARM_ATTR(n)
770#endif
771
772
773/* ASM_L gives a local label for a gcc asm block, for use when temporary
774 local labels like "1:" might not be available, which is the case for
775 instance on the x86s (the SCO assembler doesn't support them).
776
777 The label generated is made unique by including "%=" which is a unique
778 number for each insn. This ensures the same name can be used in multiple
779 asm blocks, perhaps via a macro. Since jumps between asm blocks are not
780 allowed there's no need for a label to be usable outside a single
781 block. */
782
783#define ASM_L(name) LSYM_PREFIX "asm_%=_" #name
784
785
786#if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86
787#if 0
788/* FIXME: Check that these actually improve things.
789 FIXME: Need a cld after each std.
790 FIXME: Can't have inputs in clobbered registers, must describe them as
791 dummy outputs, and add volatile. */
792#define MPN_COPY_INCR(DST, SRC, N) \
793 __asm__ ("cld\n\trep\n\tmovsl" : : \
794 "D" (DST), "S" (SRC), "c" (N) : \
795 "cx", "di", "si", "memory")
796#define MPN_COPY_DECR(DST, SRC, N) \
797 __asm__ ("std\n\trep\n\tmovsl" : : \
798 "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \
799 "cx", "di", "si", "memory")
800#endif
801#endif
802
803
804__GMP_DECLSPEC void __gmpz_aorsmul_1 (REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t)) REGPARM_ATTR(1);
805#define mpz_aorsmul_1(w,u,v,sub) __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))
806
807#define mpz_n_pow_ui __gmpz_n_pow_ui
808__GMP_DECLSPEC void mpz_n_pow_ui (mpz_ptr, mp_srcptr, mp_size_t, unsigned long);
809
810
811#define mpn_addmul_1c __MPN(addmul_1c)
812__GMP_DECLSPEC mp_limb_t mpn_addmul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
813
814#ifndef mpn_addmul_2 /* if not done with cpuvec in a fat binary */
815#define mpn_addmul_2 __MPN(addmul_2)
816__GMP_DECLSPEC mp_limb_t mpn_addmul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
817#endif
818
819#define mpn_addmul_3 __MPN(addmul_3)
820__GMP_DECLSPEC mp_limb_t mpn_addmul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
821
822#define mpn_addmul_4 __MPN(addmul_4)
823__GMP_DECLSPEC mp_limb_t mpn_addmul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
824
825#define mpn_addmul_5 __MPN(addmul_5)
826__GMP_DECLSPEC mp_limb_t mpn_addmul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
827
828#define mpn_addmul_6 __MPN(addmul_6)
829__GMP_DECLSPEC mp_limb_t mpn_addmul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
830
831#define mpn_addmul_7 __MPN(addmul_7)
832__GMP_DECLSPEC mp_limb_t mpn_addmul_7 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
833
834#define mpn_addmul_8 __MPN(addmul_8)
835__GMP_DECLSPEC mp_limb_t mpn_addmul_8 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
836
837/* Alternative entry point in mpn_addmul_2 for the benefit of mpn_sqr_basecase. */
838#define mpn_addmul_2s __MPN(addmul_2s)
839__GMP_DECLSPEC mp_limb_t mpn_addmul_2s (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
840
841/* Override mpn_addlsh1_n, mpn_addlsh2_n, mpn_sublsh1_n, etc with mpn_addlsh_n,
842 etc when !HAVE_NATIVE the former but HAVE_NATIVE_ the latter. Similarly,
843 override foo_ip1 functions with foo. We then lie and say these macros
844 represent native functions, but leave a trace by using the value 2 rather
845 than 1. */
846
847#if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh1_n
848#define mpn_addlsh1_n(a,b,c,d) mpn_addlsh_n(a,b,c,d,1)
849#define HAVE_NATIVE_mpn_addlsh1_n 2
850#endif
851
852#if HAVE_NATIVE_mpn_addlsh_nc && ! HAVE_NATIVE_mpn_addlsh1_nc
853#define mpn_addlsh1_nc(a,b,c,d,x) mpn_addlsh_nc(a,b,c,d,1,x)
854#define HAVE_NATIVE_mpn_addlsh1_nc 2
855#endif
856
857#if HAVE_NATIVE_mpn_addlsh1_n && ! HAVE_NATIVE_mpn_addlsh1_n_ip1
858#define mpn_addlsh1_n_ip1(a,b,n) mpn_addlsh1_n(a,a,b,n)
859#define HAVE_NATIVE_mpn_addlsh1_n_ip1 2
860#endif
861
862#if HAVE_NATIVE_mpn_addlsh1_nc && ! HAVE_NATIVE_mpn_addlsh1_nc_ip1
863#define mpn_addlsh1_nc_ip1(a,b,n,c) mpn_addlsh1_nc(a,a,b,n,c)
864#define HAVE_NATIVE_mpn_addlsh1_nc_ip1 2
865#endif
866
867#if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh2_n
868#define mpn_addlsh2_n(a,b,c,d) mpn_addlsh_n(a,b,c,d,2)
869#define HAVE_NATIVE_mpn_addlsh2_n 2
870#endif
871
872#if HAVE_NATIVE_mpn_addlsh_nc && ! HAVE_NATIVE_mpn_addlsh2_nc
873#define mpn_addlsh2_nc(a,b,c,d,x) mpn_addlsh_nc(a,b,c,d,2,x)
874#define HAVE_NATIVE_mpn_addlsh2_nc 2
875#endif
876
877#if HAVE_NATIVE_mpn_addlsh2_n && ! HAVE_NATIVE_mpn_addlsh2_n_ip1
878#define mpn_addlsh2_n_ip1(a,b,n) mpn_addlsh2_n(a,a,b,n)
879#define HAVE_NATIVE_mpn_addlsh2_n_ip1 2
880#endif
881
882#if HAVE_NATIVE_mpn_addlsh2_nc && ! HAVE_NATIVE_mpn_addlsh2_nc_ip1
883#define mpn_addlsh2_nc_ip1(a,b,n,c) mpn_addlsh2_nc(a,a,b,n,c)
884#define HAVE_NATIVE_mpn_addlsh2_nc_ip1 2
885#endif
886
887#if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh1_n
888#define mpn_sublsh1_n(a,b,c,d) mpn_sublsh_n(a,b,c,d,1)
889#define HAVE_NATIVE_mpn_sublsh1_n 2
890#endif
891
892#if HAVE_NATIVE_mpn_sublsh_nc && ! HAVE_NATIVE_mpn_sublsh1_nc
893#define mpn_sublsh1_nc(a,b,c,d,x) mpn_sublsh_nc(a,b,c,d,1,x)
894#define HAVE_NATIVE_mpn_sublsh1_nc 2
895#endif
896
897#if HAVE_NATIVE_mpn_sublsh1_n && ! HAVE_NATIVE_mpn_sublsh1_n_ip1
898#define mpn_sublsh1_n_ip1(a,b,n) mpn_sublsh1_n(a,a,b,n)
899#define HAVE_NATIVE_mpn_sublsh1_n_ip1 2
900#endif
901
902#if HAVE_NATIVE_mpn_sublsh1_nc && ! HAVE_NATIVE_mpn_sublsh1_nc_ip1
903#define mpn_sublsh1_nc_ip1(a,b,n,c) mpn_sublsh1_nc(a,a,b,n,c)
904#define HAVE_NATIVE_mpn_sublsh1_nc_ip1 2
905#endif
906
907#if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh2_n
908#define mpn_sublsh2_n(a,b,c,d) mpn_sublsh_n(a,b,c,d,2)
909#define HAVE_NATIVE_mpn_sublsh2_n 2
910#endif
911
912#if HAVE_NATIVE_mpn_sublsh_nc && ! HAVE_NATIVE_mpn_sublsh2_nc
913#define mpn_sublsh2_nc(a,b,c,d,x) mpn_sublsh_nc(a,b,c,d,2,x)
914#define HAVE_NATIVE_mpn_sublsh2_nc 2
915#endif
916
917#if HAVE_NATIVE_mpn_sublsh2_n && ! HAVE_NATIVE_mpn_sublsh2_n_ip1
918#define mpn_sublsh2_n_ip1(a,b,n) mpn_sublsh2_n(a,a,b,n)
919#define HAVE_NATIVE_mpn_sublsh2_n_ip1 2
920#endif
921
922#if HAVE_NATIVE_mpn_sublsh2_nc && ! HAVE_NATIVE_mpn_sublsh2_nc_ip1
923#define mpn_sublsh2_nc_ip1(a,b,n,c) mpn_sublsh2_nc(a,a,b,n,c)
924#define HAVE_NATIVE_mpn_sublsh2_nc_ip1 2
925#endif
926
927#if HAVE_NATIVE_mpn_rsblsh_n && ! HAVE_NATIVE_mpn_rsblsh1_n
928#define mpn_rsblsh1_n(a,b,c,d) mpn_rsblsh_n(a,b,c,d,1)
929#define HAVE_NATIVE_mpn_rsblsh1_n 2
930#endif
931
932#if HAVE_NATIVE_mpn_rsblsh_nc && ! HAVE_NATIVE_mpn_rsblsh1_nc
933#define mpn_rsblsh1_nc(a,b,c,d,x) mpn_rsblsh_nc(a,b,c,d,1,x)
934#define HAVE_NATIVE_mpn_rsblsh1_nc 2
935#endif
936
937#if HAVE_NATIVE_mpn_rsblsh1_n && ! HAVE_NATIVE_mpn_rsblsh1_n_ip1
938#define mpn_rsblsh1_n_ip1(a,b,n) mpn_rsblsh1_n(a,a,b,n)
939#define HAVE_NATIVE_mpn_rsblsh1_n_ip1 2
940#endif
941
942#if HAVE_NATIVE_mpn_rsblsh1_nc && ! HAVE_NATIVE_mpn_rsblsh1_nc_ip1
943#define mpn_rsblsh1_nc_ip1(a,b,n,c) mpn_rsblsh1_nc(a,a,b,n,c)
944#define HAVE_NATIVE_mpn_rsblsh1_nc_ip1 2
945#endif
946
947#if HAVE_NATIVE_mpn_rsblsh_n && ! HAVE_NATIVE_mpn_rsblsh2_n
948#define mpn_rsblsh2_n(a,b,c,d) mpn_rsblsh_n(a,b,c,d,2)
949#define HAVE_NATIVE_mpn_rsblsh2_n 2
950#endif
951
952#if HAVE_NATIVE_mpn_rsblsh_nc && ! HAVE_NATIVE_mpn_rsblsh2_nc
953#define mpn_rsblsh2_nc(a,b,c,d,x) mpn_rsblsh_nc(a,b,c,d,2,x)
954#define HAVE_NATIVE_mpn_rsblsh2_nc 2
955#endif
956
957#if HAVE_NATIVE_mpn_rsblsh2_n && ! HAVE_NATIVE_mpn_rsblsh2_n_ip1
958#define mpn_rsblsh2_n_ip1(a,b,n) mpn_rsblsh2_n(a,a,b,n)
959#define HAVE_NATIVE_mpn_rsblsh2_n_ip1 2
960#endif
961
962#if HAVE_NATIVE_mpn_rsblsh2_nc && ! HAVE_NATIVE_mpn_rsblsh2_nc_ip1
963#define mpn_rsblsh2_nc_ip1(a,b,n,c) mpn_rsblsh2_nc(a,a,b,n,c)
964#define HAVE_NATIVE_mpn_rsblsh2_nc_ip1 2
965#endif
966
967
968#ifndef mpn_addlsh1_n
969#define mpn_addlsh1_n __MPN(addlsh1_n)
970__GMP_DECLSPEC mp_limb_t mpn_addlsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
971#endif
972#ifndef mpn_addlsh1_nc
973#define mpn_addlsh1_nc __MPN(addlsh1_nc)
974__GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
975#endif
976#ifndef mpn_addlsh1_n_ip1
977#define mpn_addlsh1_n_ip1 __MPN(addlsh1_n_ip1)
978__GMP_DECLSPEC mp_limb_t mpn_addlsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
979#endif
980#ifndef mpn_addlsh1_nc_ip1
981#define mpn_addlsh1_nc_ip1 __MPN(addlsh1_nc_ip1)
982__GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
983#endif
984
985#ifndef mpn_addlsh2_n
986#define mpn_addlsh2_n __MPN(addlsh2_n)
987__GMP_DECLSPEC mp_limb_t mpn_addlsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
988#endif
989#ifndef mpn_addlsh2_nc
990#define mpn_addlsh2_nc __MPN(addlsh2_nc)
991__GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
992#endif
993#ifndef mpn_addlsh2_n_ip1
994#define mpn_addlsh2_n_ip1 __MPN(addlsh2_n_ip1)
995__GMP_DECLSPEC mp_limb_t mpn_addlsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
996#endif
997#ifndef mpn_addlsh2_nc_ip1
998#define mpn_addlsh2_nc_ip1 __MPN(addlsh2_nc_ip1)
999__GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1000#endif
1001
1002#ifndef mpn_addlsh_n
1003#define mpn_addlsh_n __MPN(addlsh_n)
1004__GMP_DECLSPEC mp_limb_t mpn_addlsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
1005#endif
1006#ifndef mpn_addlsh_nc
1007#define mpn_addlsh_nc __MPN(addlsh_nc)
1008__GMP_DECLSPEC mp_limb_t mpn_addlsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
1009#endif
1010#ifndef mpn_addlsh_n_ip1
1011#define mpn_addlsh_n_ip1 __MPN(addlsh_n_ip1)
1012 __GMP_DECLSPEC mp_limb_t mpn_addlsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
1013#endif
1014#ifndef mpn_addlsh_nc_ip1
1015#define mpn_addlsh_nc_ip1 __MPN(addlsh_nc_ip1)
1016__GMP_DECLSPEC mp_limb_t mpn_addlsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
1017#endif
1018
1019#ifndef mpn_sublsh1_n
1020#define mpn_sublsh1_n __MPN(sublsh1_n)
1021__GMP_DECLSPEC mp_limb_t mpn_sublsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1022#endif
1023#ifndef mpn_sublsh1_nc
1024#define mpn_sublsh1_nc __MPN(sublsh1_nc)
1025__GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1026#endif
1027#ifndef mpn_sublsh1_n_ip1
1028#define mpn_sublsh1_n_ip1 __MPN(sublsh1_n_ip1)
1029__GMP_DECLSPEC mp_limb_t mpn_sublsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
1030#endif
1031#ifndef mpn_sublsh1_nc_ip1
1032#define mpn_sublsh1_nc_ip1 __MPN(sublsh1_nc_ip1)
1033__GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1034#endif
1035
1036#ifndef mpn_sublsh2_n
1037#define mpn_sublsh2_n __MPN(sublsh2_n)
1038__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1039#endif
1040#ifndef mpn_sublsh2_nc
1041#define mpn_sublsh2_nc __MPN(sublsh2_nc)
1042__GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1043#endif
1044#ifndef mpn_sublsh2_n_ip1
1045#define mpn_sublsh2_n_ip1 __MPN(sublsh2_n_ip1)
1046__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
1047#endif
1048#ifndef mpn_sublsh2_nc_ip1
1049#define mpn_sublsh2_nc_ip1 __MPN(sublsh2_nc_ip1)
1050__GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1051#endif
1052
1053#ifndef mpn_sublsh_n
1054#define mpn_sublsh_n __MPN(sublsh_n)
1055__GMP_DECLSPEC mp_limb_t mpn_sublsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
1056#endif
1057#ifndef mpn_sublsh_nc
1058#define mpn_sublsh_nc __MPN(sublsh_nc)
1059__GMP_DECLSPEC mp_limb_t mpn_sublsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
1060#endif
1061#ifndef mpn_sublsh_n_ip1
1062#define mpn_sublsh_n_ip1 __MPN(sublsh_n_ip1)
1063 __GMP_DECLSPEC mp_limb_t mpn_sublsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
1064#endif
1065#ifndef mpn_sublsh_nc_ip1
1066#define mpn_sublsh_nc_ip1 __MPN(sublsh_nc_ip1)
1067__GMP_DECLSPEC mp_limb_t mpn_sublsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
1068#endif
1069
1070#define mpn_rsblsh1_n __MPN(rsblsh1_n)
1071__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1072#define mpn_rsblsh1_nc __MPN(rsblsh1_nc)
1073__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1074
1075#define mpn_rsblsh2_n __MPN(rsblsh2_n)
1076__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1077#define mpn_rsblsh2_nc __MPN(rsblsh2_nc)
1078__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1079
1080#define mpn_rsblsh_n __MPN(rsblsh_n)
1081__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
1082#define mpn_rsblsh_nc __MPN(rsblsh_nc)
1083__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
1084
1085#define mpn_rsh1add_n __MPN(rsh1add_n)
1086__GMP_DECLSPEC mp_limb_t mpn_rsh1add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1087#define mpn_rsh1add_nc __MPN(rsh1add_nc)
1088__GMP_DECLSPEC mp_limb_t mpn_rsh1add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1089
1090#define mpn_rsh1sub_n __MPN(rsh1sub_n)
1091__GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1092#define mpn_rsh1sub_nc __MPN(rsh1sub_nc)
1093__GMP_DECLSPEC mp_limb_t mpn_rsh1sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1094
1095#ifndef mpn_lshiftc /* if not done with cpuvec in a fat binary */
1096#define mpn_lshiftc __MPN(lshiftc)
1097__GMP_DECLSPEC mp_limb_t mpn_lshiftc (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
1098#endif
1099
1100#define mpn_add_err1_n __MPN(add_err1_n)
1101__GMP_DECLSPEC mp_limb_t mpn_add_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1102
1103#define mpn_add_err2_n __MPN(add_err2_n)
1104__GMP_DECLSPEC mp_limb_t mpn_add_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1105
1106#define mpn_add_err3_n __MPN(add_err3_n)
1107__GMP_DECLSPEC mp_limb_t mpn_add_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1108
1109#define mpn_sub_err1_n __MPN(sub_err1_n)
1110__GMP_DECLSPEC mp_limb_t mpn_sub_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1111
1112#define mpn_sub_err2_n __MPN(sub_err2_n)
1113__GMP_DECLSPEC mp_limb_t mpn_sub_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1114
1115#define mpn_sub_err3_n __MPN(sub_err3_n)
1116__GMP_DECLSPEC mp_limb_t mpn_sub_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1117
1118#define mpn_add_n_sub_n __MPN(add_n_sub_n)
1119__GMP_DECLSPEC mp_limb_t mpn_add_n_sub_n (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1120
1121#define mpn_add_n_sub_nc __MPN(add_n_sub_nc)
1122__GMP_DECLSPEC mp_limb_t mpn_add_n_sub_nc (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1123
1124#define mpn_addaddmul_1msb0 __MPN(addaddmul_1msb0)
1125__GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1126
1127#define mpn_divrem_1c __MPN(divrem_1c)
1128__GMP_DECLSPEC mp_limb_t mpn_divrem_1c (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1129
1130#define mpn_dump __MPN(dump)
1131__GMP_DECLSPEC void mpn_dump (mp_srcptr, mp_size_t);
1132
1133#define mpn_fib2_ui __MPN(fib2_ui)
1134__GMP_DECLSPEC mp_size_t mpn_fib2_ui (mp_ptr, mp_ptr, unsigned long);
1135
1136#define mpn_fib2m __MPN(fib2m)
1137__GMP_DECLSPEC int mpn_fib2m (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1138
1139#define mpn_strongfibo __MPN(strongfibo)
1140__GMP_DECLSPEC int mpn_strongfibo (mp_srcptr, mp_size_t, mp_ptr);
1141
1142/* Remap names of internal mpn functions. */
1143#define __clz_tab __MPN(clz_tab)
1144#define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv)
1145
1146#define mpn_jacobi_base __MPN(jacobi_base)
1147__GMP_DECLSPEC int mpn_jacobi_base (mp_limb_t, mp_limb_t, int) ATTRIBUTE_CONST;
1148
1149#define mpn_jacobi_2 __MPN(jacobi_2)
1150__GMP_DECLSPEC int mpn_jacobi_2 (mp_srcptr, mp_srcptr, unsigned);
1151
1152#define mpn_jacobi_n __MPN(jacobi_n)
1153__GMP_DECLSPEC int mpn_jacobi_n (mp_ptr, mp_ptr, mp_size_t, unsigned);
1154
1155#define mpn_mod_1c __MPN(mod_1c)
1156__GMP_DECLSPEC mp_limb_t mpn_mod_1c (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
1157
1158#define mpn_mul_1c __MPN(mul_1c)
1159__GMP_DECLSPEC mp_limb_t mpn_mul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1160
1161#define mpn_mul_2 __MPN(mul_2)
1162__GMP_DECLSPEC mp_limb_t mpn_mul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1163
1164#define mpn_mul_3 __MPN(mul_3)
1165__GMP_DECLSPEC mp_limb_t mpn_mul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1166
1167#define mpn_mul_4 __MPN(mul_4)
1168__GMP_DECLSPEC mp_limb_t mpn_mul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1169
1170#define mpn_mul_5 __MPN(mul_5)
1171__GMP_DECLSPEC mp_limb_t mpn_mul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1172
1173#define mpn_mul_6 __MPN(mul_6)
1174__GMP_DECLSPEC mp_limb_t mpn_mul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1175
1176#ifndef mpn_mul_basecase /* if not done with cpuvec in a fat binary */
1177#define mpn_mul_basecase __MPN(mul_basecase)
1178__GMP_DECLSPEC void mpn_mul_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1179#endif
1180
1181#define mpn_mullo_n __MPN(mullo_n)
1182__GMP_DECLSPEC void mpn_mullo_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1183
1184#ifndef mpn_mullo_basecase /* if not done with cpuvec in a fat binary */
1185#define mpn_mullo_basecase __MPN(mullo_basecase)
1186__GMP_DECLSPEC void mpn_mullo_basecase (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1187#endif
1188
1189#ifndef mpn_sqr_basecase /* if not done with cpuvec in a fat binary */
1190#define mpn_sqr_basecase __MPN(sqr_basecase)
1191__GMP_DECLSPEC void mpn_sqr_basecase (mp_ptr, mp_srcptr, mp_size_t);
1192#endif
1193
1194#define mpn_sqrlo __MPN(sqrlo)
1195__GMP_DECLSPEC void mpn_sqrlo (mp_ptr, mp_srcptr, mp_size_t);
1196
1197#define mpn_sqrlo_basecase __MPN(sqrlo_basecase)
1198__GMP_DECLSPEC void mpn_sqrlo_basecase (mp_ptr, mp_srcptr, mp_size_t);
1199
1200#define mpn_mulmid_basecase __MPN(mulmid_basecase)
1201__GMP_DECLSPEC void mpn_mulmid_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1202
1203#define mpn_mulmid_n __MPN(mulmid_n)
1204__GMP_DECLSPEC void mpn_mulmid_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1205
1206#define mpn_mulmid __MPN(mulmid)
1207__GMP_DECLSPEC void mpn_mulmid (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1208
1209#define mpn_submul_1c __MPN(submul_1c)
1210__GMP_DECLSPEC mp_limb_t mpn_submul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1211
1212#ifndef mpn_redc_1 /* if not done with cpuvec in a fat binary */
1213#define mpn_redc_1 __MPN(redc_1)
1214__GMP_DECLSPEC mp_limb_t mpn_redc_1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1215#endif
1216
1217#ifndef mpn_redc_2 /* if not done with cpuvec in a fat binary */
1218#define mpn_redc_2 __MPN(redc_2)
1219__GMP_DECLSPEC mp_limb_t mpn_redc_2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1220#endif
1221
1222#define mpn_redc_n __MPN(redc_n)
1223__GMP_DECLSPEC void mpn_redc_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1224
1225
1226#ifndef mpn_mod_1_1p_cps /* if not done with cpuvec in a fat binary */
1227#define mpn_mod_1_1p_cps __MPN(mod_1_1p_cps)
1228__GMP_DECLSPEC void mpn_mod_1_1p_cps (mp_limb_t [4], mp_limb_t);
1229#endif
1230#ifndef mpn_mod_1_1p /* if not done with cpuvec in a fat binary */
1231#define mpn_mod_1_1p __MPN(mod_1_1p)
1232__GMP_DECLSPEC mp_limb_t mpn_mod_1_1p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [4]) __GMP_ATTRIBUTE_PURE;
1233#endif
1234
1235#ifndef mpn_mod_1s_2p_cps /* if not done with cpuvec in a fat binary */
1236#define mpn_mod_1s_2p_cps __MPN(mod_1s_2p_cps)
1237__GMP_DECLSPEC void mpn_mod_1s_2p_cps (mp_limb_t [5], mp_limb_t);
1238#endif
1239#ifndef mpn_mod_1s_2p /* if not done with cpuvec in a fat binary */
1240#define mpn_mod_1s_2p __MPN(mod_1s_2p)
1241__GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [5]) __GMP_ATTRIBUTE_PURE;
1242#endif
1243
1244#ifndef mpn_mod_1s_3p_cps /* if not done with cpuvec in a fat binary */
1245#define mpn_mod_1s_3p_cps __MPN(mod_1s_3p_cps)
1246__GMP_DECLSPEC void mpn_mod_1s_3p_cps (mp_limb_t [6], mp_limb_t);
1247#endif
1248#ifndef mpn_mod_1s_3p /* if not done with cpuvec in a fat binary */
1249#define mpn_mod_1s_3p __MPN(mod_1s_3p)
1250__GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [6]) __GMP_ATTRIBUTE_PURE;
1251#endif
1252
1253#ifndef mpn_mod_1s_4p_cps /* if not done with cpuvec in a fat binary */
1254#define mpn_mod_1s_4p_cps __MPN(mod_1s_4p_cps)
1255__GMP_DECLSPEC void mpn_mod_1s_4p_cps (mp_limb_t [7], mp_limb_t);
1256#endif
1257#ifndef mpn_mod_1s_4p /* if not done with cpuvec in a fat binary */
1258#define mpn_mod_1s_4p __MPN(mod_1s_4p)
1259__GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [7]) __GMP_ATTRIBUTE_PURE;
1260#endif
1261
1262#define mpn_bc_mulmod_bnm1 __MPN(bc_mulmod_bnm1)
1263__GMP_DECLSPEC void mpn_bc_mulmod_bnm1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
1264#define mpn_mulmod_bnm1 __MPN(mulmod_bnm1)
1265__GMP_DECLSPEC void mpn_mulmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1266#define mpn_mulmod_bnm1_next_size __MPN(mulmod_bnm1_next_size)
1267__GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST;
1268static inline mp_size_t
1269mpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) {
1270 mp_size_t n, itch;
1271 n = rn >> 1;
1272 itch = rn + 4 +
1273 (an > n ? (bn > n ? rn : n) : 0);
1274 return itch;
1275}
1276
1277#define mpn_sqrmod_bnm1 __MPN(sqrmod_bnm1)
1278__GMP_DECLSPEC void mpn_sqrmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1279#define mpn_sqrmod_bnm1_next_size __MPN(sqrmod_bnm1_next_size)
1280__GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST;
1281static inline mp_size_t
1282mpn_sqrmod_bnm1_itch (mp_size_t rn, mp_size_t an) {
1283 mp_size_t n, itch;
1284 n = rn >> 1;
1285 itch = rn + 3 +
1286 (an > n ? an : 0);
1287 return itch;
1288}
1289
1290typedef __gmp_randstate_struct *gmp_randstate_ptr;
1291typedef const __gmp_randstate_struct *gmp_randstate_srcptr;
1292
1293/* Pseudo-random number generator function pointers structure. */
1294typedef struct {
1295 void (*randseed_fn) (gmp_randstate_t, mpz_srcptr);
1296 void (*randget_fn) (gmp_randstate_t, mp_ptr, unsigned long int);
1297 void (*randclear_fn) (gmp_randstate_t);
1298 void (*randiset_fn) (gmp_randstate_ptr, gmp_randstate_srcptr);
1299} gmp_randfnptr_t;
1300
1301/* Macro to obtain a void pointer to the function pointers structure. */
1302#define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc)
1303
1304/* Macro to obtain a pointer to the generator's state.
1305 When used as a lvalue the rvalue needs to be cast to mp_ptr. */
1306#define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d)
1307
1308/* Write a given number of random bits to rp. */
1309#define _gmp_rand(rp, state, bits) \
1310 do { \
1311 gmp_randstate_ptr __rstate = (state); \
1312 (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn) \
1313 (__rstate, rp, bits); \
1314 } while (0)
1315
1316__GMP_DECLSPEC void __gmp_randinit_mt_noseed (gmp_randstate_t);
1317
1318
1319/* __gmp_rands is the global state for the old-style random functions, and
1320 is also used in the test programs (hence the __GMP_DECLSPEC).
1321
1322 There's no seeding here, so mpz_random etc will generate the same
1323 sequence every time. This is not unlike the C library random functions
1324 if you don't seed them, so perhaps it's acceptable. Digging up a seed
1325 from /dev/random or the like would work on many systems, but might
1326 encourage a false confidence, since it'd be pretty much impossible to do
1327 something that would work reliably everywhere. In any case the new style
1328 functions are recommended to applications which care about randomness, so
1329 the old functions aren't too important. */
1330
1331__GMP_DECLSPEC extern char __gmp_rands_initialized;
1332__GMP_DECLSPEC extern gmp_randstate_t __gmp_rands;
1333
1334#define RANDS \
1335 ((__gmp_rands_initialized ? 0 \
1336 : (__gmp_rands_initialized = 1, \
1337 __gmp_randinit_mt_noseed (__gmp_rands), 0)), \
1338 __gmp_rands)
1339
1340/* this is used by the test programs, to free memory */
1341#define RANDS_CLEAR() \
1342 do { \
1343 if (__gmp_rands_initialized) \
1344 { \
1345 __gmp_rands_initialized = 0; \
1346 gmp_randclear (__gmp_rands); \
1347 } \
1348 } while (0)
1349
1350
1351/* For a threshold between algorithms A and B, size>=thresh is where B
1352 should be used. Special value MP_SIZE_T_MAX means only ever use A, or
1353 value 0 means only ever use B. The tests for these special values will
1354 be compile-time constants, so the compiler should be able to eliminate
1355 the code for the unwanted algorithm. */
1356
1357#if ! defined (__GNUC__) || __GNUC__ < 2
1358#define ABOVE_THRESHOLD(size,thresh) \
1359 ((thresh) == 0 \
1360 || ((thresh) != MP_SIZE_T_MAX \
1361 && (size) >= (thresh)))
1362#else
1363#define ABOVE_THRESHOLD(size,thresh) \
1364 ((__builtin_constant_p (thresh) && (thresh) == 0) \
1365 || (!(__builtin_constant_p (thresh) && (thresh) == MP_SIZE_T_MAX) \
1366 && (size) >= (thresh)))
1367#endif
1368#define BELOW_THRESHOLD(size,thresh) (! ABOVE_THRESHOLD (size, thresh))
1369
1370/* The minimal supported value for Toom22 depends also on Toom32 and
1371 Toom42 implementations. */
1372#define MPN_TOOM22_MUL_MINSIZE 6
1373#define MPN_TOOM2_SQR_MINSIZE 4
1374
1375#define MPN_TOOM33_MUL_MINSIZE 17
1376#define MPN_TOOM3_SQR_MINSIZE 17
1377
1378#define MPN_TOOM44_MUL_MINSIZE 30
1379#define MPN_TOOM4_SQR_MINSIZE 30
1380
1381#define MPN_TOOM6H_MUL_MINSIZE 46
1382#define MPN_TOOM6_SQR_MINSIZE 46
1383
1384#define MPN_TOOM8H_MUL_MINSIZE 86
1385#define MPN_TOOM8_SQR_MINSIZE 86
1386
1387#define MPN_TOOM32_MUL_MINSIZE 10
1388#define MPN_TOOM42_MUL_MINSIZE 10
1389#define MPN_TOOM43_MUL_MINSIZE 25
1390#define MPN_TOOM53_MUL_MINSIZE 17
1391#define MPN_TOOM54_MUL_MINSIZE 31
1392#define MPN_TOOM63_MUL_MINSIZE 49
1393
1394#define MPN_TOOM42_MULMID_MINSIZE 4
1395
1396#define mpn_sqr_diagonal __MPN(sqr_diagonal)
1397__GMP_DECLSPEC void mpn_sqr_diagonal (mp_ptr, mp_srcptr, mp_size_t);
1398
1399#define mpn_sqr_diag_addlsh1 __MPN(sqr_diag_addlsh1)
1400__GMP_DECLSPEC void mpn_sqr_diag_addlsh1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1401
1402#define mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts)
1403__GMP_DECLSPEC void mpn_toom_interpolate_5pts (mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t);
1404
1405enum toom6_flags {toom6_all_pos = 0, toom6_vm1_neg = 1, toom6_vm2_neg = 2};
1406#define mpn_toom_interpolate_6pts __MPN(toom_interpolate_6pts)
1407__GMP_DECLSPEC void mpn_toom_interpolate_6pts (mp_ptr, mp_size_t, enum toom6_flags, mp_ptr, mp_ptr, mp_ptr, mp_size_t);
1408
1409enum toom7_flags { toom7_w1_neg = 1, toom7_w3_neg = 2 };
1410#define mpn_toom_interpolate_7pts __MPN(toom_interpolate_7pts)
1411__GMP_DECLSPEC void mpn_toom_interpolate_7pts (mp_ptr, mp_size_t, enum toom7_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
1412
1413#define mpn_toom_interpolate_8pts __MPN(toom_interpolate_8pts)
1414__GMP_DECLSPEC void mpn_toom_interpolate_8pts (mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
1415
1416#define mpn_toom_interpolate_12pts __MPN(toom_interpolate_12pts)
1417__GMP_DECLSPEC void mpn_toom_interpolate_12pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr);
1418
1419#define mpn_toom_interpolate_16pts __MPN(toom_interpolate_16pts)
1420__GMP_DECLSPEC void mpn_toom_interpolate_16pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr);
1421
1422#define mpn_toom_couple_handling __MPN(toom_couple_handling)
1423__GMP_DECLSPEC void mpn_toom_couple_handling (mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int);
1424
1425#define mpn_toom_eval_dgr3_pm1 __MPN(toom_eval_dgr3_pm1)
1426__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1427
1428#define mpn_toom_eval_dgr3_pm2 __MPN(toom_eval_dgr3_pm2)
1429__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1430
1431#define mpn_toom_eval_pm1 __MPN(toom_eval_pm1)
1432__GMP_DECLSPEC int mpn_toom_eval_pm1 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1433
1434#define mpn_toom_eval_pm2 __MPN(toom_eval_pm2)
1435__GMP_DECLSPEC int mpn_toom_eval_pm2 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1436
1437#define mpn_toom_eval_pm2exp __MPN(toom_eval_pm2exp)
1438__GMP_DECLSPEC int mpn_toom_eval_pm2exp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr);
1439
1440#define mpn_toom_eval_pm2rexp __MPN(toom_eval_pm2rexp)
1441__GMP_DECLSPEC int mpn_toom_eval_pm2rexp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr);
1442
1443#define mpn_toom22_mul __MPN(toom22_mul)
1444__GMP_DECLSPEC void mpn_toom22_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1445
1446#define mpn_toom32_mul __MPN(toom32_mul)
1447__GMP_DECLSPEC void mpn_toom32_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1448
1449#define mpn_toom42_mul __MPN(toom42_mul)
1450__GMP_DECLSPEC void mpn_toom42_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1451
1452#define mpn_toom52_mul __MPN(toom52_mul)
1453__GMP_DECLSPEC void mpn_toom52_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1454
1455#define mpn_toom62_mul __MPN(toom62_mul)
1456__GMP_DECLSPEC void mpn_toom62_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1457
1458#define mpn_toom2_sqr __MPN(toom2_sqr)
1459__GMP_DECLSPEC void mpn_toom2_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1460
1461#define mpn_toom33_mul __MPN(toom33_mul)
1462__GMP_DECLSPEC void mpn_toom33_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1463
1464#define mpn_toom43_mul __MPN(toom43_mul)
1465__GMP_DECLSPEC void mpn_toom43_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1466
1467#define mpn_toom53_mul __MPN(toom53_mul)
1468__GMP_DECLSPEC void mpn_toom53_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1469
1470#define mpn_toom54_mul __MPN(toom54_mul)
1471__GMP_DECLSPEC void mpn_toom54_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1472
1473#define mpn_toom63_mul __MPN(toom63_mul)
1474__GMP_DECLSPEC void mpn_toom63_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1475
1476#define mpn_toom3_sqr __MPN(toom3_sqr)
1477__GMP_DECLSPEC void mpn_toom3_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1478
1479#define mpn_toom44_mul __MPN(toom44_mul)
1480__GMP_DECLSPEC void mpn_toom44_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1481
1482#define mpn_toom4_sqr __MPN(toom4_sqr)
1483__GMP_DECLSPEC void mpn_toom4_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1484
1485#define mpn_toom6h_mul __MPN(toom6h_mul)
1486__GMP_DECLSPEC void mpn_toom6h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1487
1488#define mpn_toom6_sqr __MPN(toom6_sqr)
1489__GMP_DECLSPEC void mpn_toom6_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1490
1491#define mpn_toom8h_mul __MPN(toom8h_mul)
1492__GMP_DECLSPEC void mpn_toom8h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1493
1494#define mpn_toom8_sqr __MPN(toom8_sqr)
1495__GMP_DECLSPEC void mpn_toom8_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1496
1497#define mpn_toom42_mulmid __MPN(toom42_mulmid)
1498__GMP_DECLSPEC void mpn_toom42_mulmid (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
1499
1500#define mpn_fft_best_k __MPN(fft_best_k)
1501__GMP_DECLSPEC int mpn_fft_best_k (mp_size_t, int) ATTRIBUTE_CONST;
1502
1503#define mpn_mul_fft __MPN(mul_fft)
1504__GMP_DECLSPEC mp_limb_t mpn_mul_fft (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int);
1505
1506#define mpn_mul_fft_full __MPN(mul_fft_full)
1507__GMP_DECLSPEC void mpn_mul_fft_full (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1508
1509#define mpn_nussbaumer_mul __MPN(nussbaumer_mul)
1510__GMP_DECLSPEC void mpn_nussbaumer_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1511
1512#define mpn_fft_next_size __MPN(fft_next_size)
1513__GMP_DECLSPEC mp_size_t mpn_fft_next_size (mp_size_t, int) ATTRIBUTE_CONST;
1514
1515#define mpn_div_qr_1n_pi1 __MPN(div_qr_1n_pi1)
1516 __GMP_DECLSPEC mp_limb_t mpn_div_qr_1n_pi1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
1517
1518#define mpn_div_qr_2n_pi1 __MPN(div_qr_2n_pi1)
1519 __GMP_DECLSPEC mp_limb_t mpn_div_qr_2n_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
1520
1521#define mpn_div_qr_2u_pi1 __MPN(div_qr_2u_pi1)
1522 __GMP_DECLSPEC mp_limb_t mpn_div_qr_2u_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int, mp_limb_t);
1523
1524#define mpn_sbpi1_div_qr __MPN(sbpi1_div_qr)
1525__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1526
1527#define mpn_sbpi1_div_q __MPN(sbpi1_div_q)
1528__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1529
1530#define mpn_sbpi1_divappr_q __MPN(sbpi1_divappr_q)
1531__GMP_DECLSPEC mp_limb_t mpn_sbpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1532
1533#define mpn_dcpi1_div_qr __MPN(dcpi1_div_qr)
1534__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
1535#define mpn_dcpi1_div_qr_n __MPN(dcpi1_div_qr_n)
1536__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr);
1537
1538#define mpn_dcpi1_div_q __MPN(dcpi1_div_q)
1539__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
1540
1541#define mpn_dcpi1_divappr_q __MPN(dcpi1_divappr_q)
1542__GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
1543
1544#define mpn_mu_div_qr __MPN(mu_div_qr)
1545__GMP_DECLSPEC mp_limb_t mpn_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1546#define mpn_mu_div_qr_itch __MPN(mu_div_qr_itch)
1547__GMP_DECLSPEC mp_size_t mpn_mu_div_qr_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST;
1548
1549#define mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr)
1550__GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1551#define mpn_preinv_mu_div_qr_itch __MPN(preinv_mu_div_qr_itch)
1552__GMP_DECLSPEC mp_size_t mpn_preinv_mu_div_qr_itch (mp_size_t, mp_size_t, mp_size_t) ATTRIBUTE_CONST;
1553
1554#define mpn_mu_divappr_q __MPN(mu_divappr_q)
1555__GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1556#define mpn_mu_divappr_q_itch __MPN(mu_divappr_q_itch)
1557__GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST;
1558
1559#define mpn_mu_div_q __MPN(mu_div_q)
1560__GMP_DECLSPEC mp_limb_t mpn_mu_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1561#define mpn_mu_div_q_itch __MPN(mu_div_q_itch)
1562__GMP_DECLSPEC mp_size_t mpn_mu_div_q_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST;
1563
1564#define mpn_div_q __MPN(div_q)
1565__GMP_DECLSPEC void mpn_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1566
1567#define mpn_invert __MPN(invert)
1568__GMP_DECLSPEC void mpn_invert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1569#define mpn_invert_itch(n) mpn_invertappr_itch(n)
1570
1571#define mpn_ni_invertappr __MPN(ni_invertappr)
1572__GMP_DECLSPEC mp_limb_t mpn_ni_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1573#define mpn_invertappr __MPN(invertappr)
1574__GMP_DECLSPEC mp_limb_t mpn_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1575#define mpn_invertappr_itch(n) (2 * (n))
1576
1577#define mpn_binvert __MPN(binvert)
1578__GMP_DECLSPEC void mpn_binvert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1579#define mpn_binvert_itch __MPN(binvert_itch)
1580__GMP_DECLSPEC mp_size_t mpn_binvert_itch (mp_size_t) ATTRIBUTE_CONST;
1581
1582#define mpn_bdiv_q_1 __MPN(bdiv_q_1)
1583__GMP_DECLSPEC mp_limb_t mpn_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1584
1585#define mpn_pi1_bdiv_q_1 __MPN(pi1_bdiv_q_1)
1586__GMP_DECLSPEC mp_limb_t mpn_pi1_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int);
1587
1588#define mpn_sbpi1_bdiv_qr __MPN(sbpi1_bdiv_qr)
1589__GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1590
1591#define mpn_sbpi1_bdiv_q __MPN(sbpi1_bdiv_q)
1592__GMP_DECLSPEC void mpn_sbpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1593
1594#define mpn_sbpi1_bdiv_r __MPN(sbpi1_bdiv_r)
1595__GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_r (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1596
1597#define mpn_dcpi1_bdiv_qr __MPN(dcpi1_bdiv_qr)
1598__GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1599#define mpn_dcpi1_bdiv_qr_n_itch __MPN(dcpi1_bdiv_qr_n_itch)
1600__GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_qr_n_itch (mp_size_t) ATTRIBUTE_CONST;
1601
1602#define mpn_dcpi1_bdiv_qr_n __MPN(dcpi1_bdiv_qr_n)
1603__GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
1604#define mpn_dcpi1_bdiv_q __MPN(dcpi1_bdiv_q)
1605__GMP_DECLSPEC void mpn_dcpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1606
1607#define mpn_mu_bdiv_qr __MPN(mu_bdiv_qr)
1608__GMP_DECLSPEC mp_limb_t mpn_mu_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1609#define mpn_mu_bdiv_qr_itch __MPN(mu_bdiv_qr_itch)
1610__GMP_DECLSPEC mp_size_t mpn_mu_bdiv_qr_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
1611
1612#define mpn_mu_bdiv_q __MPN(mu_bdiv_q)
1613__GMP_DECLSPEC void mpn_mu_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1614#define mpn_mu_bdiv_q_itch __MPN(mu_bdiv_q_itch)
1615__GMP_DECLSPEC mp_size_t mpn_mu_bdiv_q_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
1616
1617#define mpn_bdiv_qr __MPN(bdiv_qr)
1618__GMP_DECLSPEC mp_limb_t mpn_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1619#define mpn_bdiv_qr_itch __MPN(bdiv_qr_itch)
1620__GMP_DECLSPEC mp_size_t mpn_bdiv_qr_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
1621
1622#define mpn_bdiv_q __MPN(bdiv_q)
1623__GMP_DECLSPEC void mpn_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1624#define mpn_bdiv_q_itch __MPN(bdiv_q_itch)
1625__GMP_DECLSPEC mp_size_t mpn_bdiv_q_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
1626
1627#define mpn_divexact __MPN(divexact)
1628__GMP_DECLSPEC void mpn_divexact (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1629#define mpn_divexact_itch __MPN(divexact_itch)
1630__GMP_DECLSPEC mp_size_t mpn_divexact_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
1631
1632#ifndef mpn_bdiv_dbm1c /* if not done with cpuvec in a fat binary */
1633#define mpn_bdiv_dbm1c __MPN(bdiv_dbm1c)
1634__GMP_DECLSPEC mp_limb_t mpn_bdiv_dbm1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1635#endif
1636
1637#define mpn_bdiv_dbm1(dst, src, size, divisor) \
1638 mpn_bdiv_dbm1c (dst, src, size, divisor, __GMP_CAST (mp_limb_t, 0))
1639
1640#define mpn_powm __MPN(powm)
1641__GMP_DECLSPEC void mpn_powm (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1642#define mpn_powlo __MPN(powlo)
1643__GMP_DECLSPEC void mpn_powlo (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1644
1645#define mpn_sec_pi1_div_qr __MPN(sec_pi1_div_qr)
1646__GMP_DECLSPEC mp_limb_t mpn_sec_pi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
1647#define mpn_sec_pi1_div_r __MPN(sec_pi1_div_r)
1648__GMP_DECLSPEC void mpn_sec_pi1_div_r (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
1649
1650
1651#ifndef DIVEXACT_BY3_METHOD
1652#if GMP_NUMB_BITS % 2 == 0 && ! defined (HAVE_NATIVE_mpn_divexact_by3c)
1653#define DIVEXACT_BY3_METHOD 0 /* default to using mpn_bdiv_dbm1c */
1654#else
1655#define DIVEXACT_BY3_METHOD 1
1656#endif
1657#endif
1658
1659#if DIVEXACT_BY3_METHOD == 0
1660#undef mpn_divexact_by3
1661#define mpn_divexact_by3(dst,src,size) \
1662 (3 & mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3)))
1663/* override mpn_divexact_by3c defined in gmp.h */
1664/*
1665#undef mpn_divexact_by3c
1666#define mpn_divexact_by3c(dst,src,size,cy) \
1667 (3 & mpn_bdiv_dbm1c (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * cy)))
1668*/
1669#endif
1670
1671#if GMP_NUMB_BITS % 4 == 0
1672#define mpn_divexact_by5(dst,src,size) \
1673 (7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5)))
1674#endif
1675
1676#if GMP_NUMB_BITS % 3 == 0
1677#define mpn_divexact_by7(dst,src,size) \
1678 (7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7)))
1679#endif
1680
1681#if GMP_NUMB_BITS % 6 == 0
1682#define mpn_divexact_by9(dst,src,size) \
1683 (15 & 7 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 9)))
1684#endif
1685
1686#if GMP_NUMB_BITS % 10 == 0
1687#define mpn_divexact_by11(dst,src,size) \
1688 (15 & 5 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 11)))
1689#endif
1690
1691#if GMP_NUMB_BITS % 12 == 0
1692#define mpn_divexact_by13(dst,src,size) \
1693 (15 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 13)))
1694#endif
1695
1696#if GMP_NUMB_BITS % 4 == 0
1697#define mpn_divexact_by15(dst,src,size) \
1698 (15 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 15)))
1699#endif
1700
1701#define mpz_divexact_gcd __gmpz_divexact_gcd
1702__GMP_DECLSPEC void mpz_divexact_gcd (mpz_ptr, mpz_srcptr, mpz_srcptr);
1703
1704#define mpz_prodlimbs __gmpz_prodlimbs
1705__GMP_DECLSPEC mp_size_t mpz_prodlimbs (mpz_ptr, mp_ptr, mp_size_t);
1706
1707#define mpz_oddfac_1 __gmpz_oddfac_1
1708__GMP_DECLSPEC void mpz_oddfac_1 (mpz_ptr, mp_limb_t, unsigned);
1709
1710#define mpz_stronglucas __gmpz_stronglucas
1711__GMP_DECLSPEC int mpz_stronglucas (mpz_srcptr, mpz_ptr, mpz_ptr);
1712
1713#define mpz_lucas_mod __gmpz_lucas_mod
1714__GMP_DECLSPEC int mpz_lucas_mod (mpz_ptr, mpz_ptr, long, mp_bitcnt_t, mpz_srcptr, mpz_ptr, mpz_ptr);
1715
1716#define mpz_inp_str_nowhite __gmpz_inp_str_nowhite
1717#ifdef _GMP_H_HAVE_FILE
1718__GMP_DECLSPEC size_t mpz_inp_str_nowhite (mpz_ptr, FILE *, int, int, size_t);
1719#endif
1720
1721#define mpn_divisible_p __MPN(divisible_p)
1722__GMP_DECLSPEC int mpn_divisible_p (mp_srcptr, mp_size_t, mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
1723
1724#define mpn_rootrem __MPN(rootrem)
1725__GMP_DECLSPEC mp_size_t mpn_rootrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1726
1727#define mpn_broot __MPN(broot)
1728__GMP_DECLSPEC void mpn_broot (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1729
1730#define mpn_broot_invm1 __MPN(broot_invm1)
1731__GMP_DECLSPEC void mpn_broot_invm1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1732
1733#define mpn_brootinv __MPN(brootinv)
1734__GMP_DECLSPEC void mpn_brootinv (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
1735
1736#define mpn_bsqrt __MPN(bsqrt)
1737__GMP_DECLSPEC void mpn_bsqrt (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr);
1738
1739#define mpn_bsqrtinv __MPN(bsqrtinv)
1740__GMP_DECLSPEC int mpn_bsqrtinv (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr);
1741
1742#if defined (_CRAY)
1743#define MPN_COPY_INCR(dst, src, n) \
1744 do { \
1745 int __i; /* Faster on some Crays with plain int */ \
1746 _Pragma ("_CRI ivdep"); \
1747 for (__i = 0; __i < (n); __i++) \
1748 (dst)[__i] = (src)[__i]; \
1749 } while (0)
1750#endif
1751
1752/* used by test programs, hence __GMP_DECLSPEC */
1753#ifndef mpn_copyi /* if not done with cpuvec in a fat binary */
1754#define mpn_copyi __MPN(copyi)
1755__GMP_DECLSPEC void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t);
1756#endif
1757
1758#if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi
1759#define MPN_COPY_INCR(dst, src, size) \
1760 do { \
1761 ASSERT ((size) >= 0); \
1762 ASSERT (MPN_SAME_OR_INCR_P (dst, src, size)); \
1763 mpn_copyi (dst, src, size); \
1764 } while (0)
1765#endif
1766
1767/* Copy N limbs from SRC to DST incrementing, N==0 allowed. */
1768#if ! defined (MPN_COPY_INCR)
1769#define MPN_COPY_INCR(dst, src, n) \
1770 do { \
1771 ASSERT ((n) >= 0); \
1772 ASSERT (MPN_SAME_OR_INCR_P (dst, src, n)); \
1773 if ((n) != 0) \
1774 { \
1775 mp_size_t __n = (n) - 1; \
1776 mp_ptr __dst = (dst); \
1777 mp_srcptr __src = (src); \
1778 mp_limb_t __x; \
1779 __x = *__src++; \
1780 if (__n != 0) \
1781 { \
1782 do \
1783 { \
1784 *__dst++ = __x; \
1785 __x = *__src++; \
1786 } \
1787 while (--__n); \
1788 } \
1789 *__dst++ = __x; \
1790 } \
1791 } while (0)
1792#endif
1793
1794
1795#if defined (_CRAY)
1796#define MPN_COPY_DECR(dst, src, n) \
1797 do { \
1798 int __i; /* Faster on some Crays with plain int */ \
1799 _Pragma ("_CRI ivdep"); \
1800 for (__i = (n) - 1; __i >= 0; __i--) \
1801 (dst)[__i] = (src)[__i]; \
1802 } while (0)
1803#endif
1804
1805/* used by test programs, hence __GMP_DECLSPEC */
1806#ifndef mpn_copyd /* if not done with cpuvec in a fat binary */
1807#define mpn_copyd __MPN(copyd)
1808__GMP_DECLSPEC void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t);
1809#endif
1810
1811#if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd
1812#define MPN_COPY_DECR(dst, src, size) \
1813 do { \
1814 ASSERT ((size) >= 0); \
1815 ASSERT (MPN_SAME_OR_DECR_P (dst, src, size)); \
1816 mpn_copyd (dst, src, size); \
1817 } while (0)
1818#endif
1819
1820/* Copy N limbs from SRC to DST decrementing, N==0 allowed. */
1821#if ! defined (MPN_COPY_DECR)
1822#define MPN_COPY_DECR(dst, src, n) \
1823 do { \
1824 ASSERT ((n) >= 0); \
1825 ASSERT (MPN_SAME_OR_DECR_P (dst, src, n)); \
1826 if ((n) != 0) \
1827 { \
1828 mp_size_t __n = (n) - 1; \
1829 mp_ptr __dst = (dst) + __n; \
1830 mp_srcptr __src = (src) + __n; \
1831 mp_limb_t __x; \
1832 __x = *__src--; \
1833 if (__n != 0) \
1834 { \
1835 do \
1836 { \
1837 *__dst-- = __x; \
1838 __x = *__src--; \
1839 } \
1840 while (--__n); \
1841 } \
1842 *__dst-- = __x; \
1843 } \
1844 } while (0)
1845#endif
1846
1847
1848#ifndef MPN_COPY
1849#define MPN_COPY(d,s,n) \
1850 do { \
1851 ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n)); \
1852 MPN_COPY_INCR (d, s, n); \
1853 } while (0)
1854#endif
1855
1856
1857/* Set {dst,size} to the limbs of {src,size} in reverse order. */
1858#define MPN_REVERSE(dst, src, size) \
1859 do { \
1860 mp_ptr __dst = (dst); \
1861 mp_size_t __size = (size); \
1862 mp_srcptr __src = (src) + __size - 1; \
1863 mp_size_t __i; \
1864 ASSERT ((size) >= 0); \
1865 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
1866 CRAY_Pragma ("_CRI ivdep"); \
1867 for (__i = 0; __i < __size; __i++) \
1868 { \
1869 *__dst = *__src; \
1870 __dst++; \
1871 __src--; \
1872 } \
1873 } while (0)
1874
1875
1876/* Zero n limbs at dst.
1877
1878 For power and powerpc we want an inline stu/bdnz loop for zeroing. On
1879 ppc630 for instance this is optimal since it can sustain only 1 store per
1880 cycle.
1881
1882 gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the
1883 "for" loop in the generic code below can become stu/bdnz. The do/while
1884 here helps it get to that. The same caveat about plain -mpowerpc64 mode
1885 applies here as to __GMPN_COPY_INCR in gmp.h.
1886
1887 xlc 3.1 already generates stu/bdnz from the generic C, and does so from
1888 this loop too.
1889
1890 Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines
1891 at a time. MPN_ZERO isn't all that important in GMP, so it might be more
1892 trouble than it's worth to do the same, though perhaps a call to memset
1893 would be good when on a GNU system. */
1894
1895#if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
1896#define MPN_FILL(dst, n, f) \
1897 do { \
1898 mp_ptr __dst = (dst) - 1; \
1899 mp_size_t __n = (n); \
1900 ASSERT (__n > 0); \
1901 do \
1902 *++__dst = (f); \
1903 while (--__n); \
1904 } while (0)
1905#endif
1906
1907#ifndef MPN_FILL
1908#define MPN_FILL(dst, n, f) \
1909 do { \
1910 mp_ptr __dst = (dst); \
1911 mp_size_t __n = (n); \
1912 ASSERT (__n > 0); \
1913 do \
1914 *__dst++ = (f); \
1915 while (--__n); \
1916 } while (0)
1917#endif
1918
1919#define MPN_ZERO(dst, n) \
1920 do { \
1921 ASSERT ((n) >= 0); \
1922 if ((n) != 0) \
1923 MPN_FILL (dst, n, CNST_LIMB (0)); \
1924 } while (0)
1925
1926/* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to
1927 start up and would need to strip a lot of zeros before it'd be faster
1928 than a simple cmpl loop. Here are some times in cycles for
1929 std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
1930 low zeros).
1931
1932 std cld
1933 P5 18 16
1934 P6 46 38
1935 K6 36 13
1936 K7 21 20
1937*/
1938#ifndef MPN_NORMALIZE
1939#define MPN_NORMALIZE(DST, NLIMBS) \
1940 do { \
1941 while ((NLIMBS) > 0) \
1942 { \
1943 if ((DST)[(NLIMBS) - 1] != 0) \
1944 break; \
1945 (NLIMBS)--; \
1946 } \
1947 } while (0)
1948#endif
1949#ifndef MPN_NORMALIZE_NOT_ZERO
1950#define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
1951 do { \
1952 while (1) \
1953 { \
1954 ASSERT ((NLIMBS) >= 1); \
1955 if ((DST)[(NLIMBS) - 1] != 0) \
1956 break; \
1957 (NLIMBS)--; \
1958 } \
1959 } while (0)
1960#endif
1961
1962/* Strip least significant zero limbs from {ptr,size} by incrementing ptr
1963 and decrementing size. low should be ptr[0], and will be the new ptr[0]
1964 on returning. The number in {ptr,size} must be non-zero, ie. size!=0 and
1965 somewhere a non-zero limb. */
1966#define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low) \
1967 do { \
1968 ASSERT ((size) >= 1); \
1969 ASSERT ((low) == (ptr)[0]); \
1970 \
1971 while ((low) == 0) \
1972 { \
1973 (size)--; \
1974 ASSERT ((size) >= 1); \
1975 (ptr)++; \
1976 (low) = *(ptr); \
1977 } \
1978 } while (0)
1979
1980/* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a
1981 temporary variable; it will be automatically cleared out at function
1982 return. We use __x here to make it possible to accept both mpz_ptr and
1983 mpz_t arguments. */
1984#define MPZ_TMP_INIT(X, NLIMBS) \
1985 do { \
1986 mpz_ptr __x = (X); \
1987 ASSERT ((NLIMBS) >= 1); \
1988 __x->_mp_alloc = (NLIMBS); \
1989 __x->_mp_d = TMP_ALLOC_LIMBS (NLIMBS); \
1990 } while (0)
1991
1992#if WANT_ASSERT
1993static inline void *
1994_mpz_newalloc (mpz_ptr z, mp_size_t n)
1995{
1996 void * res = _mpz_realloc(z,n);
1997 /* If we are checking the code, force a random change to limbs. */
1998 ((mp_ptr) res)[0] = ~ ((mp_ptr) res)[ALLOC (z) - 1];
1999 return res;
2000}
2001#else
2002#define _mpz_newalloc _mpz_realloc
2003#endif
2004/* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */
2005#define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \
2006 ? (mp_ptr) _mpz_realloc(z,n) \
2007 : PTR(z))
2008#define MPZ_NEWALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \
2009 ? (mp_ptr) _mpz_newalloc(z,n) \
2010 : PTR(z))
2011
2012#define MPZ_EQUAL_1_P(z) (SIZ(z)==1 && PTR(z)[0] == 1)
2013
2014
2015/* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and
2016 f1p.
2017
2018 From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the
2019 nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio. So the
2020 number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.
2021
2022 The multiplier used is 23/32=0.71875 for efficient calculation on CPUs
2023 without good floating point. There's +2 for rounding up, and a further
2024 +2 since at the last step x limbs are doubled into a 2x+1 limb region
2025 whereas the actual F[2k] value might be only 2x-1 limbs.
2026
2027 Note that a division is done first, since on a 32-bit system it's at
2028 least conceivable to go right up to n==ULONG_MAX. (F[2^32-1] would be
2029 about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and
2030 whatever a multiply of two 190Mbyte numbers takes.)
2031
2032 Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be
2033 worked into the multiplier. */
2034
2035#define MPN_FIB2_SIZE(n) \
2036 ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)
2037
2038
2039/* FIB_TABLE(n) returns the Fibonacci number F[n]. Must have n in the range
2040 -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h).
2041
2042 FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] =
2043 F[n] + 2*F[n-1] fits in a limb. */
2044
2045__GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
2046#define FIB_TABLE(n) (__gmp_fib_table[(n)+1])
2047
2048extern const mp_limb_t __gmp_oddfac_table[];
2049extern const mp_limb_t __gmp_odd2fac_table[];
2050extern const unsigned char __gmp_fac2cnt_table[];
2051extern const mp_limb_t __gmp_limbroots_table[];
2052
2053/* n^log <= GMP_NUMB_MAX, a limb can store log factors less than n */
2054static inline unsigned
2055log_n_max (mp_limb_t n)
2056{
2057 unsigned log;
2058 for (log = 8; n > __gmp_limbroots_table[log - 1]; log--);
2059 return log;
2060}
2061
2062#define SIEVESIZE 512 /* FIXME: Allow gmp_init_primesieve to choose */
2063typedef struct
2064{
2065 unsigned long d; /* current index in s[] */
2066 unsigned long s0; /* number corresponding to s[0] */
2067 unsigned long sqrt_s0; /* misnomer for sqrt(s[SIEVESIZE-1]) */
2068 unsigned char s[SIEVESIZE + 1]; /* sieve table */
2069} gmp_primesieve_t;
2070
2071#define gmp_init_primesieve __gmp_init_primesieve
2072__GMP_DECLSPEC void gmp_init_primesieve (gmp_primesieve_t *);
2073
2074#define gmp_nextprime __gmp_nextprime
2075__GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *);
2076
2077#define gmp_primesieve __gmp_primesieve
2078__GMP_DECLSPEC mp_limb_t gmp_primesieve (mp_ptr, mp_limb_t);
2079
2080
2081#ifndef MUL_TOOM22_THRESHOLD
2082#define MUL_TOOM22_THRESHOLD 30
2083#endif
2084
2085#ifndef MUL_TOOM33_THRESHOLD
2086#define MUL_TOOM33_THRESHOLD 100
2087#endif
2088
2089#ifndef MUL_TOOM44_THRESHOLD
2090#define MUL_TOOM44_THRESHOLD 300
2091#endif
2092
2093#ifndef MUL_TOOM6H_THRESHOLD
2094#define MUL_TOOM6H_THRESHOLD 350
2095#endif
2096
2097#ifndef SQR_TOOM6_THRESHOLD
2098#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
2099#endif
2100
2101#ifndef MUL_TOOM8H_THRESHOLD
2102#define MUL_TOOM8H_THRESHOLD 450
2103#endif
2104
2105#ifndef SQR_TOOM8_THRESHOLD
2106#define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
2107#endif
2108
2109#ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD
2110#define MUL_TOOM32_TO_TOOM43_THRESHOLD 100
2111#endif
2112
2113#ifndef MUL_TOOM32_TO_TOOM53_THRESHOLD
2114#define MUL_TOOM32_TO_TOOM53_THRESHOLD 110
2115#endif
2116
2117#ifndef MUL_TOOM42_TO_TOOM53_THRESHOLD
2118#define MUL_TOOM42_TO_TOOM53_THRESHOLD 100
2119#endif
2120
2121#ifndef MUL_TOOM42_TO_TOOM63_THRESHOLD
2122#define MUL_TOOM42_TO_TOOM63_THRESHOLD 110
2123#endif
2124
2125#ifndef MUL_TOOM43_TO_TOOM54_THRESHOLD
2126#define MUL_TOOM43_TO_TOOM54_THRESHOLD 150
2127#endif
2128
2129/* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD. In a
2130 normal build MUL_TOOM22_THRESHOLD is a constant and we use that. In a fat
2131 binary or tune program build MUL_TOOM22_THRESHOLD is a variable and a
2132 separate hard limit will have been defined. Similarly for TOOM3. */
2133#ifndef MUL_TOOM22_THRESHOLD_LIMIT
2134#define MUL_TOOM22_THRESHOLD_LIMIT MUL_TOOM22_THRESHOLD
2135#endif
2136#ifndef MUL_TOOM33_THRESHOLD_LIMIT
2137#define MUL_TOOM33_THRESHOLD_LIMIT MUL_TOOM33_THRESHOLD
2138#endif
2139#ifndef MULLO_BASECASE_THRESHOLD_LIMIT
2140#define MULLO_BASECASE_THRESHOLD_LIMIT MULLO_BASECASE_THRESHOLD
2141#endif
2142#ifndef SQRLO_BASECASE_THRESHOLD_LIMIT
2143#define SQRLO_BASECASE_THRESHOLD_LIMIT SQRLO_BASECASE_THRESHOLD
2144#endif
2145#ifndef SQRLO_DC_THRESHOLD_LIMIT
2146#define SQRLO_DC_THRESHOLD_LIMIT SQRLO_DC_THRESHOLD
2147#endif
2148
2149/* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from
2150 mpn_mul_basecase. Default is to use mpn_sqr_basecase from 0. (Note that we
2151 certainly always want it if there's a native assembler mpn_sqr_basecase.)
2152
2153 If it turns out that mpn_toom2_sqr becomes faster than mpn_mul_basecase
2154 before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the toom2
2155 threshold and SQR_TOOM2_THRESHOLD is 0. This oddity arises more or less
2156 because SQR_TOOM2_THRESHOLD represents the size up to which mpn_sqr_basecase
2157 should be used, and that may be never. */
2158
2159#ifndef SQR_BASECASE_THRESHOLD
2160#define SQR_BASECASE_THRESHOLD 0 /* never use mpn_mul_basecase */
2161#endif
2162
2163#ifndef SQR_TOOM2_THRESHOLD
2164#define SQR_TOOM2_THRESHOLD 50
2165#endif
2166
2167#ifndef SQR_TOOM3_THRESHOLD
2168#define SQR_TOOM3_THRESHOLD 120
2169#endif
2170
2171#ifndef SQR_TOOM4_THRESHOLD
2172#define SQR_TOOM4_THRESHOLD 400
2173#endif
2174
2175/* See comments above about MUL_TOOM33_THRESHOLD_LIMIT. */
2176#ifndef SQR_TOOM3_THRESHOLD_LIMIT
2177#define SQR_TOOM3_THRESHOLD_LIMIT SQR_TOOM3_THRESHOLD
2178#endif
2179
2180#ifndef MULMID_TOOM42_THRESHOLD
2181#define MULMID_TOOM42_THRESHOLD MUL_TOOM22_THRESHOLD
2182#endif
2183
2184#ifndef MULLO_BASECASE_THRESHOLD
2185#define MULLO_BASECASE_THRESHOLD 0 /* never use mpn_mul_basecase */
2186#endif
2187
2188#ifndef MULLO_DC_THRESHOLD
2189#define MULLO_DC_THRESHOLD (2*MUL_TOOM22_THRESHOLD)
2190#endif
2191
2192#ifndef MULLO_MUL_N_THRESHOLD
2193#define MULLO_MUL_N_THRESHOLD (2*MUL_FFT_THRESHOLD)
2194#endif
2195
2196#ifndef SQRLO_BASECASE_THRESHOLD
2197#define SQRLO_BASECASE_THRESHOLD 0 /* never use mpn_sqr_basecase */
2198#endif
2199
2200#ifndef SQRLO_DC_THRESHOLD
2201#define SQRLO_DC_THRESHOLD (MULLO_DC_THRESHOLD)
2202#endif
2203
2204#ifndef SQRLO_SQR_THRESHOLD
2205#define SQRLO_SQR_THRESHOLD (MULLO_MUL_N_THRESHOLD)
2206#endif
2207
2208#ifndef DC_DIV_QR_THRESHOLD
2209#define DC_DIV_QR_THRESHOLD (2*MUL_TOOM22_THRESHOLD)
2210#endif
2211
2212#ifndef DC_DIVAPPR_Q_THRESHOLD
2213#define DC_DIVAPPR_Q_THRESHOLD 200
2214#endif
2215
2216#ifndef DC_BDIV_QR_THRESHOLD
2217#define DC_BDIV_QR_THRESHOLD (2*MUL_TOOM22_THRESHOLD)
2218#endif
2219
2220#ifndef DC_BDIV_Q_THRESHOLD
2221#define DC_BDIV_Q_THRESHOLD 180
2222#endif
2223
2224#ifndef DIVEXACT_JEB_THRESHOLD
2225#define DIVEXACT_JEB_THRESHOLD 25
2226#endif
2227
2228#ifndef INV_MULMOD_BNM1_THRESHOLD
2229#define INV_MULMOD_BNM1_THRESHOLD (4*MULMOD_BNM1_THRESHOLD)
2230#endif
2231
2232#ifndef INV_APPR_THRESHOLD
2233#define INV_APPR_THRESHOLD INV_NEWTON_THRESHOLD
2234#endif
2235
2236#ifndef INV_NEWTON_THRESHOLD
2237#define INV_NEWTON_THRESHOLD 200
2238#endif
2239
2240#ifndef BINV_NEWTON_THRESHOLD
2241#define BINV_NEWTON_THRESHOLD 300
2242#endif
2243
2244#ifndef MU_DIVAPPR_Q_THRESHOLD
2245#define MU_DIVAPPR_Q_THRESHOLD 2000
2246#endif
2247
2248#ifndef MU_DIV_QR_THRESHOLD
2249#define MU_DIV_QR_THRESHOLD 2000
2250#endif
2251
2252#ifndef MUPI_DIV_QR_THRESHOLD
2253#define MUPI_DIV_QR_THRESHOLD 200
2254#endif
2255
2256#ifndef MU_BDIV_Q_THRESHOLD
2257#define MU_BDIV_Q_THRESHOLD 2000
2258#endif
2259
2260#ifndef MU_BDIV_QR_THRESHOLD
2261#define MU_BDIV_QR_THRESHOLD 2000
2262#endif
2263
2264#ifndef MULMOD_BNM1_THRESHOLD
2265#define MULMOD_BNM1_THRESHOLD 16
2266#endif
2267
2268#ifndef SQRMOD_BNM1_THRESHOLD
2269#define SQRMOD_BNM1_THRESHOLD 16
2270#endif
2271
2272#ifndef MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD
2273#define MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD (INV_MULMOD_BNM1_THRESHOLD/2)
2274#endif
2275
2276#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
2277
2278#ifndef REDC_1_TO_REDC_2_THRESHOLD
2279#define REDC_1_TO_REDC_2_THRESHOLD 15
2280#endif
2281#ifndef REDC_2_TO_REDC_N_THRESHOLD
2282#define REDC_2_TO_REDC_N_THRESHOLD 100
2283#endif
2284
2285#else
2286
2287#ifndef REDC_1_TO_REDC_N_THRESHOLD
2288#define REDC_1_TO_REDC_N_THRESHOLD 100
2289#endif
2290
2291#endif /* HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 */
2292
2293
2294/* First k to use for an FFT modF multiply. A modF FFT is an order
2295 log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
2296 whereas k=4 is 1.33 which is faster than toom3 at 1.485. */
2297#define FFT_FIRST_K 4
2298
2299/* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
2300#ifndef MUL_FFT_MODF_THRESHOLD
2301#define MUL_FFT_MODF_THRESHOLD (MUL_TOOM33_THRESHOLD * 3)
2302#endif
2303#ifndef SQR_FFT_MODF_THRESHOLD
2304#define SQR_FFT_MODF_THRESHOLD (SQR_TOOM3_THRESHOLD * 3)
2305#endif
2306
2307/* Threshold at which FFT should be used to do an NxN -> 2N multiply. This
2308 will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
2309 NxN->2N multiply and not recursing into itself is an order
2310 log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
2311 is the first better than toom3. */
2312#ifndef MUL_FFT_THRESHOLD
2313#define MUL_FFT_THRESHOLD (MUL_FFT_MODF_THRESHOLD * 10)
2314#endif
2315#ifndef SQR_FFT_THRESHOLD
2316#define SQR_FFT_THRESHOLD (SQR_FFT_MODF_THRESHOLD * 10)
2317#endif
2318
2319/* Table of thresholds for successive modF FFT "k"s. The first entry is
2320 where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
2321 etc. See mpn_fft_best_k(). */
2322#ifndef MUL_FFT_TABLE
2323#define MUL_FFT_TABLE \
2324 { MUL_TOOM33_THRESHOLD * 4, /* k=5 */ \
2325 MUL_TOOM33_THRESHOLD * 8, /* k=6 */ \
2326 MUL_TOOM33_THRESHOLD * 16, /* k=7 */ \
2327 MUL_TOOM33_THRESHOLD * 32, /* k=8 */ \
2328 MUL_TOOM33_THRESHOLD * 96, /* k=9 */ \
2329 MUL_TOOM33_THRESHOLD * 288, /* k=10 */ \
2330 0 }
2331#endif
2332#ifndef SQR_FFT_TABLE
2333#define SQR_FFT_TABLE \
2334 { SQR_TOOM3_THRESHOLD * 4, /* k=5 */ \
2335 SQR_TOOM3_THRESHOLD * 8, /* k=6 */ \
2336 SQR_TOOM3_THRESHOLD * 16, /* k=7 */ \
2337 SQR_TOOM3_THRESHOLD * 32, /* k=8 */ \
2338 SQR_TOOM3_THRESHOLD * 96, /* k=9 */ \
2339 SQR_TOOM3_THRESHOLD * 288, /* k=10 */ \
2340 0 }
2341#endif
2342
2343struct fft_table_nk
2344{
2345 gmp_uint_least32_t n:27;
2346 gmp_uint_least32_t k:5;
2347};
2348
2349#ifndef FFT_TABLE_ATTRS
2350#define FFT_TABLE_ATTRS static const
2351#endif
2352
2353#define MPN_FFT_TABLE_SIZE 16
2354
2355
2356#ifndef DC_DIV_QR_THRESHOLD
2357#define DC_DIV_QR_THRESHOLD (3 * MUL_TOOM22_THRESHOLD)
2358#endif
2359
2360#ifndef GET_STR_DC_THRESHOLD
2361#define GET_STR_DC_THRESHOLD 18
2362#endif
2363
2364#ifndef GET_STR_PRECOMPUTE_THRESHOLD
2365#define GET_STR_PRECOMPUTE_THRESHOLD 35
2366#endif
2367
2368#ifndef SET_STR_DC_THRESHOLD
2369#define SET_STR_DC_THRESHOLD 750
2370#endif
2371
2372#ifndef SET_STR_PRECOMPUTE_THRESHOLD
2373#define SET_STR_PRECOMPUTE_THRESHOLD 2000
2374#endif
2375
2376#ifndef FAC_ODD_THRESHOLD
2377#define FAC_ODD_THRESHOLD 35
2378#endif
2379
2380#ifndef FAC_DSC_THRESHOLD
2381#define FAC_DSC_THRESHOLD 400
2382#endif
2383
2384/* Return non-zero if xp,xsize and yp,ysize overlap.
2385 If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
2386 overlap. If both these are false, there's an overlap. */
2387#define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
2388 ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
2389#define MEM_OVERLAP_P(xp, xsize, yp, ysize) \
2390 ( (char *) (xp) + (xsize) > (char *) (yp) \
2391 && (char *) (yp) + (ysize) > (char *) (xp))
2392
2393/* Return non-zero if xp,xsize and yp,ysize are either identical or not
2394 overlapping. Return zero if they're partially overlapping. */
2395#define MPN_SAME_OR_SEPARATE_P(xp, yp, size) \
2396 MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
2397#define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize) \
2398 ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
2399
2400/* Return non-zero if dst,dsize and src,ssize are either identical or
2401 overlapping in a way suitable for an incrementing/decrementing algorithm.
2402 Return zero if they're partially overlapping in an unsuitable fashion. */
2403#define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize) \
2404 ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
2405#define MPN_SAME_OR_INCR_P(dst, src, size) \
2406 MPN_SAME_OR_INCR2_P(dst, size, src, size)
2407#define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize) \
2408 ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
2409#define MPN_SAME_OR_DECR_P(dst, src, size) \
2410 MPN_SAME_OR_DECR2_P(dst, size, src, size)
2411
2412
2413/* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
2414 ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
2415 does it always. Generally assertions are meant for development, but
2416 might help when looking for a problem later too. */
2417
2418#ifdef __LINE__
2419#define ASSERT_LINE __LINE__
2420#else
2421#define ASSERT_LINE -1
2422#endif
2423
2424#ifdef __FILE__
2425#define ASSERT_FILE __FILE__
2426#else
2427#define ASSERT_FILE ""
2428#endif
2429
2430__GMP_DECLSPEC void __gmp_assert_header (const char *, int);
2431__GMP_DECLSPEC void __gmp_assert_fail (const char *, int, const char *) ATTRIBUTE_NORETURN;
2432
2433#define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
2434
2435#define ASSERT_ALWAYS(expr) \
2436 do { \
2437 if (UNLIKELY (!(expr))) \
2438 ASSERT_FAIL (expr); \
2439 } while (0)
2440
2441#if WANT_ASSERT
2442#define ASSERT(expr) ASSERT_ALWAYS (expr)
2443#else
2444#define ASSERT(expr) do {} while (0)
2445#endif
2446
2447
2448/* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks
2449 that it's zero. In both cases if assertion checking is disabled the
2450 expression is still evaluated. These macros are meant for use with
2451 routines like mpn_add_n() where the return value represents a carry or
2452 whatever that should or shouldn't occur in some context. For example,
2453 ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
2454#if WANT_ASSERT
2455#define ASSERT_CARRY(expr) ASSERT_ALWAYS ((expr) != 0)
2456#define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0)
2457#else
2458#define ASSERT_CARRY(expr) (expr)
2459#define ASSERT_NOCARRY(expr) (expr)
2460#endif
2461
2462
2463/* ASSERT_CODE includes code when assertion checking is wanted. This is the
2464 same as writing "#if WANT_ASSERT", but more compact. */
2465#if WANT_ASSERT
2466#define ASSERT_CODE(expr) expr
2467#else
2468#define ASSERT_CODE(expr)
2469#endif
2470
2471
2472/* Test that an mpq_t is in fully canonical form. This can be used as
2473 protection on routines like mpq_equal which give wrong results on
2474 non-canonical inputs. */
2475#if WANT_ASSERT
2476#define ASSERT_MPQ_CANONICAL(q) \
2477 do { \
2478 ASSERT (q->_mp_den._mp_size > 0); \
2479 if (q->_mp_num._mp_size == 0) \
2480 { \
2481 /* zero should be 0/1 */ \
2482 ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0); \
2483 } \
2484 else \
2485 { \
2486 /* no common factors */ \
2487 mpz_t __g; \
2488 mpz_init (__g); \
2489 mpz_gcd (__g, mpq_numref(q), mpq_denref(q)); \
2490 ASSERT (mpz_cmp_ui (__g, 1) == 0); \
2491 mpz_clear (__g); \
2492 } \
2493 } while (0)
2494#else
2495#define ASSERT_MPQ_CANONICAL(q) do {} while (0)
2496#endif
2497
2498/* Check that the nail parts are zero. */
2499#define ASSERT_ALWAYS_LIMB(limb) \
2500 do { \
2501 mp_limb_t __nail = (limb) & GMP_NAIL_MASK; \
2502 ASSERT_ALWAYS (__nail == 0); \
2503 } while (0)
2504#define ASSERT_ALWAYS_MPN(ptr, size) \
2505 do { \
2506 /* let whole loop go dead when no nails */ \
2507 if (GMP_NAIL_BITS != 0) \
2508 { \
2509 mp_size_t __i; \
2510 for (__i = 0; __i < (size); __i++) \
2511 ASSERT_ALWAYS_LIMB ((ptr)[__i]); \
2512 } \
2513 } while (0)
2514#if WANT_ASSERT
2515#define ASSERT_LIMB(limb) ASSERT_ALWAYS_LIMB (limb)
2516#define ASSERT_MPN(ptr, size) ASSERT_ALWAYS_MPN (ptr, size)
2517#else
2518#define ASSERT_LIMB(limb) do {} while (0)
2519#define ASSERT_MPN(ptr, size) do {} while (0)
2520#endif
2521
2522
2523/* Assert that an mpn region {ptr,size} is zero, or non-zero.
2524 size==0 is allowed, and in that case {ptr,size} considered to be zero. */
2525#if WANT_ASSERT
2526#define ASSERT_MPN_ZERO_P(ptr,size) \
2527 do { \
2528 mp_size_t __i; \
2529 ASSERT ((size) >= 0); \
2530 for (__i = 0; __i < (size); __i++) \
2531 ASSERT ((ptr)[__i] == 0); \
2532 } while (0)
2533#define ASSERT_MPN_NONZERO_P(ptr,size) \
2534 do { \
2535 mp_size_t __i; \
2536 int __nonzero = 0; \
2537 ASSERT ((size) >= 0); \
2538 for (__i = 0; __i < (size); __i++) \
2539 if ((ptr)[__i] != 0) \
2540 { \
2541 __nonzero = 1; \
2542 break; \
2543 } \
2544 ASSERT (__nonzero); \
2545 } while (0)
2546#else
2547#define ASSERT_MPN_ZERO_P(ptr,size) do {} while (0)
2548#define ASSERT_MPN_NONZERO_P(ptr,size) do {} while (0)
2549#endif
2550
2551
2552#if ! HAVE_NATIVE_mpn_com
2553#undef mpn_com
2554#define mpn_com(d,s,n) \
2555 do { \
2556 mp_ptr __d = (d); \
2557 mp_srcptr __s = (s); \
2558 mp_size_t __n = (n); \
2559 ASSERT (__n >= 1); \
2560 ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n)); \
2561 do \
2562 *__d++ = (~ *__s++) & GMP_NUMB_MASK; \
2563 while (--__n); \
2564 } while (0)
2565#endif
2566
2567#define MPN_LOGOPS_N_INLINE(rp, up, vp, n, operation) \
2568 do { \
2569 mp_srcptr __up = (up); \
2570 mp_srcptr __vp = (vp); \
2571 mp_ptr __rp = (rp); \
2572 mp_size_t __n = (n); \
2573 mp_limb_t __a, __b; \
2574 ASSERT (__n > 0); \
2575 ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __up, __n)); \
2576 ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __vp, __n)); \
2577 __up += __n; \
2578 __vp += __n; \
2579 __rp += __n; \
2580 __n = -__n; \
2581 do { \
2582 __a = __up[__n]; \
2583 __b = __vp[__n]; \
2584 __rp[__n] = operation; \
2585 } while (++__n); \
2586 } while (0)
2587
2588
2589#if ! HAVE_NATIVE_mpn_and_n
2590#undef mpn_and_n
2591#define mpn_and_n(rp, up, vp, n) \
2592 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & __b)
2593#endif
2594
2595#if ! HAVE_NATIVE_mpn_andn_n
2596#undef mpn_andn_n
2597#define mpn_andn_n(rp, up, vp, n) \
2598 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & ~__b)
2599#endif
2600
2601#if ! HAVE_NATIVE_mpn_nand_n
2602#undef mpn_nand_n
2603#define mpn_nand_n(rp, up, vp, n) \
2604 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a & __b) & GMP_NUMB_MASK)
2605#endif
2606
2607#if ! HAVE_NATIVE_mpn_ior_n
2608#undef mpn_ior_n
2609#define mpn_ior_n(rp, up, vp, n) \
2610 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a | __b)
2611#endif
2612
2613#if ! HAVE_NATIVE_mpn_iorn_n
2614#undef mpn_iorn_n
2615#define mpn_iorn_n(rp, up, vp, n) \
2616 MPN_LOGOPS_N_INLINE (rp, up, vp, n, (__a | ~__b) & GMP_NUMB_MASK)
2617#endif
2618
2619#if ! HAVE_NATIVE_mpn_nior_n
2620#undef mpn_nior_n
2621#define mpn_nior_n(rp, up, vp, n) \
2622 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a | __b) & GMP_NUMB_MASK)
2623#endif
2624
2625#if ! HAVE_NATIVE_mpn_xor_n
2626#undef mpn_xor_n
2627#define mpn_xor_n(rp, up, vp, n) \
2628 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a ^ __b)
2629#endif
2630
2631#if ! HAVE_NATIVE_mpn_xnor_n
2632#undef mpn_xnor_n
2633#define mpn_xnor_n(rp, up, vp, n) \
2634 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a ^ __b) & GMP_NUMB_MASK)
2635#endif
2636
2637#define mpn_trialdiv __MPN(trialdiv)
2638__GMP_DECLSPEC mp_limb_t mpn_trialdiv (mp_srcptr, mp_size_t, mp_size_t, int *);
2639
2640#define mpn_remove __MPN(remove)
2641__GMP_DECLSPEC mp_bitcnt_t mpn_remove (mp_ptr, mp_size_t *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_bitcnt_t);
2642
2643
2644/* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */
2645#if GMP_NAIL_BITS == 0
2646#define ADDC_LIMB(cout, w, x, y) \
2647 do { \
2648 mp_limb_t __x = (x); \
2649 mp_limb_t __y = (y); \
2650 mp_limb_t __w = __x + __y; \
2651 (w) = __w; \
2652 (cout) = __w < __x; \
2653 } while (0)
2654#else
2655#define ADDC_LIMB(cout, w, x, y) \
2656 do { \
2657 mp_limb_t __w; \
2658 ASSERT_LIMB (x); \
2659 ASSERT_LIMB (y); \
2660 __w = (x) + (y); \
2661 (w) = __w & GMP_NUMB_MASK; \
2662 (cout) = __w >> GMP_NUMB_BITS; \
2663 } while (0)
2664#endif
2665
2666/* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that
2667 subtract. */
2668#if GMP_NAIL_BITS == 0
2669#define SUBC_LIMB(cout, w, x, y) \
2670 do { \
2671 mp_limb_t __x = (x); \
2672 mp_limb_t __y = (y); \
2673 mp_limb_t __w = __x - __y; \
2674 (w) = __w; \
2675 (cout) = __w > __x; \
2676 } while (0)
2677#else
2678#define SUBC_LIMB(cout, w, x, y) \
2679 do { \
2680 mp_limb_t __w = (x) - (y); \
2681 (w) = __w & GMP_NUMB_MASK; \
2682 (cout) = __w >> (GMP_LIMB_BITS-1); \
2683 } while (0)
2684#endif
2685
2686
2687/* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both
2688 expecting no carry (or borrow) from that.
2689
2690 The size parameter is only for the benefit of assertion checking. In a
2691 normal build it's unused and the carry/borrow is just propagated as far
2692 as it needs to go.
2693
2694 On random data, usually only one or two limbs of {ptr,size} get updated,
2695 so there's no need for any sophisticated looping, just something compact
2696 and sensible.
2697
2698 FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U,
2699 declaring their operand sizes, then remove the former. This is purely
2700 for the benefit of assertion checking. */
2701
2702#if defined (__GNUC__) && GMP_NAIL_BITS == 0 && ! defined (NO_ASM) \
2703 && (defined(HAVE_HOST_CPU_FAMILY_x86) || defined(HAVE_HOST_CPU_FAMILY_x86_64)) \
2704 && ! WANT_ASSERT
2705/* Better flags handling than the generic C gives on i386, saving a few
2706 bytes of code and maybe a cycle or two. */
2707
2708#define MPN_IORD_U(ptr, incr, aors) \
2709 do { \
2710 mp_ptr __ptr_dummy; \
2711 if (__builtin_constant_p (incr) && (incr) == 0) \
2712 { \
2713 } \
2714 else if (__builtin_constant_p (incr) && (incr) == 1) \
2715 { \
2716 __asm__ __volatile__ \
2717 ("\n" ASM_L(top) ":\n" \
2718 "\t" aors "\t$1, (%0)\n" \
2719 "\tlea\t%c2(%0), %0\n" \
2720 "\tjc\t" ASM_L(top) \
2721 : "=r" (__ptr_dummy) \
2722 : "0" (ptr), "n" (sizeof(mp_limb_t)) \
2723 : "memory"); \
2724 } \
2725 else \
2726 { \
2727 __asm__ __volatile__ \
2728 ( aors "\t%2, (%0)\n" \
2729 "\tjnc\t" ASM_L(done) "\n" \
2730 ASM_L(top) ":\n" \
2731 "\t" aors "\t$1, %c3(%0)\n" \
2732 "\tlea\t%c3(%0), %0\n" \
2733 "\tjc\t" ASM_L(top) "\n" \
2734 ASM_L(done) ":\n" \
2735 : "=r" (__ptr_dummy) \
2736 : "0" (ptr), \
2737 "re" ((mp_limb_t) (incr)), "n" (sizeof(mp_limb_t)) \
2738 : "memory"); \
2739 } \
2740 } while (0)
2741
2742#if GMP_LIMB_BITS == 32
2743#define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addl")
2744#define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subl")
2745#endif
2746#if GMP_LIMB_BITS == 64
2747#define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addq")
2748#define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subq")
2749#endif
2750#define mpn_incr_u(ptr, incr) MPN_INCR_U (ptr, 0, incr)
2751#define mpn_decr_u(ptr, incr) MPN_DECR_U (ptr, 0, incr)
2752#endif
2753
2754#if GMP_NAIL_BITS == 0
2755#ifndef mpn_incr_u
2756#define mpn_incr_u(p,incr) \
2757 do { \
2758 mp_limb_t __x; \
2759 mp_ptr __p = (p); \
2760 if (__builtin_constant_p (incr) && (incr) == 1) \
2761 { \
2762 while (++(*(__p++)) == 0) \
2763 ; \
2764 } \
2765 else \
2766 { \
2767 __x = *__p + (incr); \
2768 *__p = __x; \
2769 if (__x < (incr)) \
2770 while (++(*(++__p)) == 0) \
2771 ; \
2772 } \
2773 } while (0)
2774#endif
2775#ifndef mpn_decr_u
2776#define mpn_decr_u(p,incr) \
2777 do { \
2778 mp_limb_t __x; \
2779 mp_ptr __p = (p); \
2780 if (__builtin_constant_p (incr) && (incr) == 1) \
2781 { \
2782 while ((*(__p++))-- == 0) \
2783 ; \
2784 } \
2785 else \
2786 { \
2787 __x = *__p; \
2788 *__p = __x - (incr); \
2789 if (__x < (incr)) \
2790 while ((*(++__p))-- == 0) \
2791 ; \
2792 } \
2793 } while (0)
2794#endif
2795#endif
2796
2797#if GMP_NAIL_BITS >= 1
2798#ifndef mpn_incr_u
2799#define mpn_incr_u(p,incr) \
2800 do { \
2801 mp_limb_t __x; \
2802 mp_ptr __p = (p); \
2803 if (__builtin_constant_p (incr) && (incr) == 1) \
2804 { \
2805 do \
2806 { \
2807 __x = (*__p + 1) & GMP_NUMB_MASK; \
2808 *__p++ = __x; \
2809 } \
2810 while (__x == 0); \
2811 } \
2812 else \
2813 { \
2814 __x = (*__p + (incr)); \
2815 *__p++ = __x & GMP_NUMB_MASK; \
2816 if (__x >> GMP_NUMB_BITS != 0) \
2817 { \
2818 do \
2819 { \
2820 __x = (*__p + 1) & GMP_NUMB_MASK; \
2821 *__p++ = __x; \
2822 } \
2823 while (__x == 0); \
2824 } \
2825 } \
2826 } while (0)
2827#endif
2828#ifndef mpn_decr_u
2829#define mpn_decr_u(p,incr) \
2830 do { \
2831 mp_limb_t __x; \
2832 mp_ptr __p = (p); \
2833 if (__builtin_constant_p (incr) && (incr) == 1) \
2834 { \
2835 do \
2836 { \
2837 __x = *__p; \
2838 *__p++ = (__x - 1) & GMP_NUMB_MASK; \
2839 } \
2840 while (__x == 0); \
2841 } \
2842 else \
2843 { \
2844 __x = *__p - (incr); \
2845 *__p++ = __x & GMP_NUMB_MASK; \
2846 if (__x >> GMP_NUMB_BITS != 0) \
2847 { \
2848 do \
2849 { \
2850 __x = *__p; \
2851 *__p++ = (__x - 1) & GMP_NUMB_MASK; \
2852 } \
2853 while (__x == 0); \
2854 } \
2855 } \
2856 } while (0)
2857#endif
2858#endif
2859
2860#ifndef MPN_INCR_U
2861#if WANT_ASSERT
2862#define MPN_INCR_U(ptr, size, n) \
2863 do { \
2864 ASSERT ((size) >= 1); \
2865 ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n)); \
2866 } while (0)
2867#else
2868#define MPN_INCR_U(ptr, size, n) mpn_incr_u (ptr, n)
2869#endif
2870#endif
2871
2872#ifndef MPN_DECR_U
2873#if WANT_ASSERT
2874#define MPN_DECR_U(ptr, size, n) \
2875 do { \
2876 ASSERT ((size) >= 1); \
2877 ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n)); \
2878 } while (0)
2879#else
2880#define MPN_DECR_U(ptr, size, n) mpn_decr_u (ptr, n)
2881#endif
2882#endif
2883
2884
2885/* Structure for conversion between internal binary format and strings. */
2886struct bases
2887{
2888 /* Number of digits in the conversion base that always fits in an mp_limb_t.
2889 For example, for base 10 on a machine where an mp_limb_t has 32 bits this
2890 is 9, since 10**9 is the largest number that fits into an mp_limb_t. */
2891 int chars_per_limb;
2892
2893 /* log(2)/log(conversion_base) */
2894 mp_limb_t logb2;
2895
2896 /* log(conversion_base)/log(2) */
2897 mp_limb_t log2b;
2898
2899 /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
2900 factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base),
2901 i.e. the number of bits used to represent each digit in the base. */
2902 mp_limb_t big_base;
2903
2904 /* A GMP_LIMB_BITS bit approximation to 1/big_base, represented as a
2905 fixed-point number. Instead of dividing by big_base an application can
2906 choose to multiply by big_base_inverted. */
2907 mp_limb_t big_base_inverted;
2908};
2909
2910#define mp_bases __MPN(bases)
2911__GMP_DECLSPEC extern const struct bases mp_bases[257];
2912
2913
2914/* Compute the number of digits in base for nbits bits, making sure the result
2915 is never too small. The two variants of the macro implement the same
2916 function; the GT2 variant below works just for bases > 2. */
2917#define DIGITS_IN_BASE_FROM_BITS(res, nbits, b) \
2918 do { \
2919 mp_limb_t _ph, _dummy; \
2920 size_t _nbits = (nbits); \
2921 umul_ppmm (_ph, _dummy, mp_bases[b].logb2, _nbits); \
2922 _ph += (_dummy + _nbits < _dummy); \
2923 res = _ph + 1; \
2924 } while (0)
2925#define DIGITS_IN_BASEGT2_FROM_BITS(res, nbits, b) \
2926 do { \
2927 mp_limb_t _ph, _dummy; \
2928 size_t _nbits = (nbits); \
2929 umul_ppmm (_ph, _dummy, mp_bases[b].logb2 + 1, _nbits); \
2930 res = _ph + 1; \
2931 } while (0)
2932
2933/* For power of 2 bases this is exact. For other bases the result is either
2934 exact or one too big.
2935
2936 To be exact always it'd be necessary to examine all the limbs of the
2937 operand, since numbers like 100..000 and 99...999 generally differ only
2938 in the lowest limb. It'd be possible to examine just a couple of high
2939 limbs to increase the probability of being exact, but that doesn't seem
2940 worth bothering with. */
2941
2942#define MPN_SIZEINBASE(result, ptr, size, base) \
2943 do { \
2944 int __lb_base, __cnt; \
2945 size_t __totbits; \
2946 \
2947 ASSERT ((size) >= 0); \
2948 ASSERT ((base) >= 2); \
2949 ASSERT ((base) < numberof (mp_bases)); \
2950 \
2951 /* Special case for X == 0. */ \
2952 if ((size) == 0) \
2953 (result) = 1; \
2954 else \
2955 { \
2956 /* Calculate the total number of significant bits of X. */ \
2957 count_leading_zeros (__cnt, (ptr)[(size)-1]); \
2958 __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2959 \
2960 if (POW2_P (base)) \
2961 { \
2962 __lb_base = mp_bases[base].big_base; \
2963 (result) = (__totbits + __lb_base - 1) / __lb_base; \
2964 } \
2965 else \
2966 { \
2967 DIGITS_IN_BASEGT2_FROM_BITS (result, __totbits, base); \
2968 } \
2969 } \
2970 } while (0)
2971
2972#define MPN_SIZEINBASE_2EXP(result, ptr, size, base2exp) \
2973 do { \
2974 int __cnt; \
2975 mp_bitcnt_t __totbits; \
2976 ASSERT ((size) > 0); \
2977 ASSERT ((ptr)[(size)-1] != 0); \
2978 count_leading_zeros (__cnt, (ptr)[(size)-1]); \
2979 __totbits = (mp_bitcnt_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS); \
2980 (result) = (__totbits + (base2exp)-1) / (base2exp); \
2981 } while (0)
2982
2983
2984/* bit count to limb count, rounding up */
2985#define BITS_TO_LIMBS(n) (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS)
2986
2987/* MPN_SET_UI sets an mpn (ptr, cnt) to given ui. MPZ_FAKE_UI creates fake
2988 mpz_t from ui. The zp argument must have room for LIMBS_PER_ULONG limbs
2989 in both cases (LIMBS_PER_ULONG is also defined here.) */
2990#if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */
2991
2992#define LIMBS_PER_ULONG 1
2993#define MPN_SET_UI(zp, zn, u) \
2994 (zp)[0] = (u); \
2995 (zn) = ((zp)[0] != 0);
2996#define MPZ_FAKE_UI(z, zp, u) \
2997 (zp)[0] = (u); \
2998 PTR (z) = (zp); \
2999 SIZ (z) = ((zp)[0] != 0); \
3000 ASSERT_CODE (ALLOC (z) = 1);
3001
3002#else /* need two limbs per ulong */
3003
3004#define LIMBS_PER_ULONG 2
3005#define MPN_SET_UI(zp, zn, u) \
3006 (zp)[0] = (u) & GMP_NUMB_MASK; \
3007 (zp)[1] = (u) >> GMP_NUMB_BITS; \
3008 (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);
3009#define MPZ_FAKE_UI(z, zp, u) \
3010 (zp)[0] = (u) & GMP_NUMB_MASK; \
3011 (zp)[1] = (u) >> GMP_NUMB_BITS; \
3012 SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \
3013 PTR (z) = (zp); \
3014 ASSERT_CODE (ALLOC (z) = 2);
3015
3016#endif
3017
3018
3019#if HAVE_HOST_CPU_FAMILY_x86
3020#define TARGET_REGISTER_STARVED 1
3021#else
3022#define TARGET_REGISTER_STARVED 0
3023#endif
3024
3025
3026/* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1
3027 or 0 there into a limb 0xFF..FF or 0 respectively.
3028
3029 On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1,
3030 but C99 doesn't guarantee signed right shifts are arithmetic, so we have
3031 a little compile-time test and a fallback to a "? :" form. The latter is
3032 necessary for instance on Cray vector systems.
3033
3034 Recent versions of gcc (eg. 3.3) will in fact optimize a "? :" like this
3035 to an arithmetic right shift anyway, but it's good to get the desired
3036 shift on past versions too (in particular since an important use of
3037 LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv). */
3038
3039#define LIMB_HIGHBIT_TO_MASK(n) \
3040 (((mp_limb_signed_t) -1 >> 1) < 0 \
3041 ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1) \
3042 : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0))
3043
3044
3045/* Use a library function for invert_limb, if available. */
3046#define mpn_invert_limb __MPN(invert_limb)
3047__GMP_DECLSPEC mp_limb_t mpn_invert_limb (mp_limb_t) ATTRIBUTE_CONST;
3048#if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
3049#define invert_limb(invxl,xl) \
3050 do { \
3051 (invxl) = mpn_invert_limb (xl); \
3052 } while (0)
3053#endif
3054
3055#ifndef invert_limb
3056#define invert_limb(invxl,xl) \
3057 do { \
3058 mp_limb_t _dummy; \
3059 ASSERT ((xl) != 0); \
3060 udiv_qrnnd (invxl, _dummy, ~(xl), ~CNST_LIMB(0), xl); \
3061 } while (0)
3062#endif
3063
3064#define invert_pi1(dinv, d1, d0) \
3065 do { \
3066 mp_limb_t _v, _p, _t1, _t0, _mask; \
3067 invert_limb (_v, d1); \
3068 _p = (d1) * _v; \
3069 _p += (d0); \
3070 if (_p < (d0)) \
3071 { \
3072 _v--; \
3073 _mask = -(mp_limb_t) (_p >= (d1)); \
3074 _p -= (d1); \
3075 _v += _mask; \
3076 _p -= _mask & (d1); \
3077 } \
3078 umul_ppmm (_t1, _t0, d0, _v); \
3079 _p += _t1; \
3080 if (_p < _t1) \
3081 { \
3082 _v--; \
3083 if (UNLIKELY (_p >= (d1))) \
3084 { \
3085 if (_p > (d1) || _t0 >= (d0)) \
3086 _v--; \
3087 } \
3088 } \
3089 (dinv).inv32 = _v; \
3090 } while (0)
3091
3092
3093/* udiv_qrnnd_preinv -- Based on work by Niels Möller and Torbjörn Granlund.
3094 We write things strangely below, to help gcc. A more straightforward
3095 version:
3096 _r = (nl) - _qh * (d);
3097 _t = _r + (d);
3098 if (_r >= _ql)
3099 {
3100 _qh--;
3101 _r = _t;
3102 }
3103 For one operation shorter critical path, one may want to use this form:
3104 _p = _qh * (d)
3105 _s = (nl) + (d);
3106 _r = (nl) - _p;
3107 _t = _s - _p;
3108 if (_r >= _ql)
3109 {
3110 _qh--;
3111 _r = _t;
3112 }
3113*/
3114#define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
3115 do { \
3116 mp_limb_t _qh, _ql, _r, _mask; \
3117 umul_ppmm (_qh, _ql, (nh), (di)); \
3118 if (__builtin_constant_p (nl) && (nl) == 0) \
3119 { \
3120 _qh += (nh) + 1; \
3121 _r = - _qh * (d); \
3122 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
3123 _qh += _mask; \
3124 _r += _mask & (d); \
3125 } \
3126 else \
3127 { \
3128 add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
3129 _r = (nl) - _qh * (d); \
3130 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
3131 _qh += _mask; \
3132 _r += _mask & (d); \
3133 if (UNLIKELY (_r >= (d))) \
3134 { \
3135 _r -= (d); \
3136 _qh++; \
3137 } \
3138 } \
3139 (r) = _r; \
3140 (q) = _qh; \
3141 } while (0)
3142
3143/* Dividing (NH, NL) by D, returning the remainder only. Unlike
3144 udiv_qrnnd_preinv, works also for the case NH == D, where the
3145 quotient doesn't quite fit in a single limb. */
3146#define udiv_rnnd_preinv(r, nh, nl, d, di) \
3147 do { \
3148 mp_limb_t _qh, _ql, _r, _mask; \
3149 umul_ppmm (_qh, _ql, (nh), (di)); \
3150 if (__builtin_constant_p (nl) && (nl) == 0) \
3151 { \
3152 _r = ~(_qh + (nh)) * (d); \
3153 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
3154 _r += _mask & (d); \
3155 } \
3156 else \
3157 { \
3158 add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
3159 _r = (nl) - _qh * (d); \
3160 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
3161 _r += _mask & (d); \
3162 if (UNLIKELY (_r >= (d))) \
3163 _r -= (d); \
3164 } \
3165 (r) = _r; \
3166 } while (0)
3167
3168/* Compute quotient the quotient and remainder for n / d. Requires d
3169 >= B^2 / 2 and n < d B. di is the inverse
3170
3171 floor ((B^3 - 1) / (d0 + d1 B)) - B.
3172
3173 NOTE: Output variables are updated multiple times. Only some inputs
3174 and outputs may overlap.
3175*/
3176#define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \
3177 do { \
3178 mp_limb_t _q0, _t1, _t0, _mask; \
3179 umul_ppmm ((q), _q0, (n2), (dinv)); \
3180 add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \
3181 \
3182 /* Compute the two most significant limbs of n - q'd */ \
3183 (r1) = (n1) - (d1) * (q); \
3184 sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0)); \
3185 umul_ppmm (_t1, _t0, (d0), (q)); \
3186 sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \
3187 (q)++; \
3188 \
3189 /* Conditionally adjust q and the remainders */ \
3190 _mask = - (mp_limb_t) ((r1) >= _q0); \
3191 (q) += _mask; \
3192 add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \
3193 if (UNLIKELY ((r1) >= (d1))) \
3194 { \
3195 if ((r1) > (d1) || (r0) >= (d0)) \
3196 { \
3197 (q)++; \
3198 sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \
3199 } \
3200 } \
3201 } while (0)
3202
3203#ifndef mpn_preinv_divrem_1 /* if not done with cpuvec in a fat binary */
3204#define mpn_preinv_divrem_1 __MPN(preinv_divrem_1)
3205__GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int);
3206#endif
3207
3208
3209/* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to the
3210 plain mpn_divrem_1. The default is yes, since the few CISC chips where
3211 preinv is not good have defines saying so. */
3212#ifndef USE_PREINV_DIVREM_1
3213#define USE_PREINV_DIVREM_1 1
3214#endif
3215
3216#if USE_PREINV_DIVREM_1
3217#define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \
3218 mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift)
3219#else
3220#define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \
3221 mpn_divrem_1 (qp, xsize, ap, size, d)
3222#endif
3223
3224#ifndef PREINV_MOD_1_TO_MOD_1_THRESHOLD
3225#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
3226#endif
3227
3228/* This selection may seem backwards. The reason mpn_mod_1 typically takes
3229 over for larger sizes is that it uses the mod_1_1 function. */
3230#define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \
3231 (BELOW_THRESHOLD (size, PREINV_MOD_1_TO_MOD_1_THRESHOLD) \
3232 ? mpn_preinv_mod_1 (src, size, divisor, inverse) \
3233 : mpn_mod_1 (src, size, divisor))
3234
3235
3236#ifndef mpn_mod_34lsub1 /* if not done with cpuvec in a fat binary */
3237#define mpn_mod_34lsub1 __MPN(mod_34lsub1)
3238__GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1 (mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
3239#endif
3240
3241
3242/* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to
3243 plain mpn_divrem_1. Likewise BMOD_1_TO_MOD_1_THRESHOLD for
3244 mpn_modexact_1_odd against plain mpn_mod_1. On most CPUs divexact and
3245 modexact are faster at all sizes, so the defaults are 0. Those CPUs
3246 where this is not right have a tuned threshold. */
3247#ifndef DIVEXACT_1_THRESHOLD
3248#define DIVEXACT_1_THRESHOLD 0
3249#endif
3250#ifndef BMOD_1_TO_MOD_1_THRESHOLD
3251#define BMOD_1_TO_MOD_1_THRESHOLD 10
3252#endif
3253
3254#define MPN_DIVREM_OR_DIVEXACT_1(rp, up, n, d) \
3255 do { \
3256 if (BELOW_THRESHOLD (n, DIVEXACT_1_THRESHOLD)) \
3257 ASSERT_NOCARRY (mpn_divrem_1 (rp, (mp_size_t) 0, up, n, d)); \
3258 else \
3259 { \
3260 ASSERT (mpn_mod_1 (up, n, d) == 0); \
3261 mpn_divexact_1 (rp, up, n, d); \
3262 } \
3263 } while (0)
3264
3265#ifndef mpn_modexact_1c_odd /* if not done with cpuvec in a fat binary */
3266#define mpn_modexact_1c_odd __MPN(modexact_1c_odd)
3267__GMP_DECLSPEC mp_limb_t mpn_modexact_1c_odd (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
3268#endif
3269
3270#if HAVE_NATIVE_mpn_modexact_1_odd
3271#define mpn_modexact_1_odd __MPN(modexact_1_odd)
3272__GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
3273#else
3274#define mpn_modexact_1_odd(src,size,divisor) \
3275 mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))
3276#endif
3277
3278#define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor) \
3279 (BELOW_THRESHOLD (size, BMOD_1_TO_MOD_1_THRESHOLD) \
3280 ? mpn_modexact_1_odd (src, size, divisor) \
3281 : mpn_mod_1 (src, size, divisor))
3282
3283/* binvert_limb() sets inv to the multiplicative inverse of n modulo
3284 2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS.
3285 n must be odd (otherwise such an inverse doesn't exist).
3286
3287 This is not to be confused with invert_limb(), which is completely
3288 different.
3289
3290 The table lookup gives an inverse with the low 8 bits valid, and each
3291 multiply step doubles the number of bits. See Jebelean "An algorithm for
3292 exact division" end of section 4 (reference in gmp.texi).
3293
3294 Possible enhancement: Could use UHWtype until the last step, if half-size
3295 multiplies are faster (might help under _LONG_LONG_LIMB).
3296
3297 Alternative: As noted in Granlund and Montgomery "Division by Invariant
3298 Integers using Multiplication" (reference in gmp.texi), n itself gives a
3299 3-bit inverse immediately, and could be used instead of a table lookup.
3300 A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into
3301 bit 3, for instance with (((n + 2) & 4) << 1) ^ n. */
3302
3303#define binvert_limb_table __gmp_binvert_limb_table
3304__GMP_DECLSPEC extern const unsigned char binvert_limb_table[128];
3305
3306#define binvert_limb(inv,n) \
3307 do { \
3308 mp_limb_t __n = (n); \
3309 mp_limb_t __inv; \
3310 ASSERT ((__n & 1) == 1); \
3311 \
3312 __inv = binvert_limb_table[(__n/2) & 0x7F]; /* 8 */ \
3313 if (GMP_NUMB_BITS > 8) __inv = 2 * __inv - __inv * __inv * __n; \
3314 if (GMP_NUMB_BITS > 16) __inv = 2 * __inv - __inv * __inv * __n; \
3315 if (GMP_NUMB_BITS > 32) __inv = 2 * __inv - __inv * __inv * __n; \
3316 \
3317 if (GMP_NUMB_BITS > 64) \
3318 { \
3319 int __invbits = 64; \
3320 do { \
3321 __inv = 2 * __inv - __inv * __inv * __n; \
3322 __invbits *= 2; \
3323 } while (__invbits < GMP_NUMB_BITS); \
3324 } \
3325 \
3326 ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \
3327 (inv) = __inv & GMP_NUMB_MASK; \
3328 } while (0)
3329#define modlimb_invert binvert_limb /* backward compatibility */
3330
3331/* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS.
3332 Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits.
3333 GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd
3334 we need to start from GMP_NUMB_MAX>>1. */
3335#define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1)
3336
3337/* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3).
3338 These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */
3339#define GMP_NUMB_CEIL_MAX_DIV3 (GMP_NUMB_MAX / 3 + 1)
3340#define GMP_NUMB_CEIL_2MAX_DIV3 ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT)
3341
3342
3343/* Set r to -a mod d. a>=d is allowed. Can give r>d. All should be limbs.
3344
3345 It's not clear whether this is the best way to do this calculation.
3346 Anything congruent to -a would be fine for the one limb congruence
3347 tests. */
3348
3349#define NEG_MOD(r, a, d) \
3350 do { \
3351 ASSERT ((d) != 0); \
3352 ASSERT_LIMB (a); \
3353 ASSERT_LIMB (d); \
3354 \
3355 if ((a) <= (d)) \
3356 { \
3357 /* small a is reasonably likely */ \
3358 (r) = (d) - (a); \
3359 } \
3360 else \
3361 { \
3362 unsigned __twos; \
3363 mp_limb_t __dnorm; \
3364 count_leading_zeros (__twos, d); \
3365 __twos -= GMP_NAIL_BITS; \
3366 __dnorm = (d) << __twos; \
3367 (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a); \
3368 } \
3369 \
3370 ASSERT_LIMB (r); \
3371 } while (0)
3372
3373/* A bit mask of all the least significant zero bits of n, or -1 if n==0. */
3374#define LOW_ZEROS_MASK(n) (((n) & -(n)) - 1)
3375
3376
3377/* ULONG_PARITY sets "p" to 1 if there's an odd number of 1 bits in "n", or
3378 to 0 if there's an even number. "n" should be an unsigned long and "p"
3379 an int. */
3380
3381#if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
3382#define ULONG_PARITY(p, n) \
3383 do { \
3384 int __p; \
3385 __asm__ ("ctpop %1, %0" : "=r" (__p) : "r" (n)); \
3386 (p) = __p & 1; \
3387 } while (0)
3388#endif
3389
3390/* Cray intrinsic _popcnt. */
3391#ifdef _CRAY
3392#define ULONG_PARITY(p, n) \
3393 do { \
3394 (p) = _popcnt (n) & 1; \
3395 } while (0)
3396#endif
3397
3398#if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3399 && ! defined (NO_ASM) && defined (__ia64)
3400/* unsigned long is either 32 or 64 bits depending on the ABI, zero extend
3401 to a 64 bit unsigned long long for popcnt */
3402#define ULONG_PARITY(p, n) \
3403 do { \
3404 unsigned long long __n = (unsigned long) (n); \
3405 int __p; \
3406 __asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n)); \
3407 (p) = __p & 1; \
3408 } while (0)
3409#endif
3410
3411#if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3412 && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86
3413#if __GMP_GNUC_PREREQ (3,1)
3414#define __GMP_qm "=Qm"
3415#define __GMP_q "=Q"
3416#else
3417#define __GMP_qm "=qm"
3418#define __GMP_q "=q"
3419#endif
3420#define ULONG_PARITY(p, n) \
3421 do { \
3422 char __p; \
3423 unsigned long __n = (n); \
3424 __n ^= (__n >> 16); \
3425 __asm__ ("xorb %h1, %b1\n\t" \
3426 "setpo %0" \
3427 : __GMP_qm (__p), __GMP_q (__n) \
3428 : "1" (__n)); \
3429 (p) = __p; \
3430 } while (0)
3431#endif
3432
3433#if ! defined (ULONG_PARITY)
3434#define ULONG_PARITY(p, n) \
3435 do { \
3436 unsigned long __n = (n); \
3437 int __p = 0; \
3438 do \
3439 { \
3440 __p ^= 0x96696996L >> (__n & 0x1F); \
3441 __n >>= 5; \
3442 } \
3443 while (__n != 0); \
3444 \
3445 (p) = __p & 1; \
3446 } while (0)
3447#endif
3448
3449
3450/* 3 cycles on 604 or 750 since shifts and rlwimi's can pair. gcc (as of
3451 version 3.1 at least) doesn't seem to know how to generate rlwimi for
3452 anything other than bit-fields, so use "asm". */
3453#if defined (__GNUC__) && ! defined (NO_ASM) \
3454 && HAVE_HOST_CPU_FAMILY_powerpc && GMP_LIMB_BITS == 32
3455#define BSWAP_LIMB(dst, src) \
3456 do { \
3457 mp_limb_t __bswapl_src = (src); \
3458 mp_limb_t __tmp1 = __bswapl_src >> 24; /* low byte */ \
3459 mp_limb_t __tmp2 = __bswapl_src << 24; /* high byte */ \
3460 __asm__ ("rlwimi %0, %2, 24, 16, 23" /* 2nd low */ \
3461 : "=r" (__tmp1) : "0" (__tmp1), "r" (__bswapl_src)); \
3462 __asm__ ("rlwimi %0, %2, 8, 8, 15" /* 3nd high */ \
3463 : "=r" (__tmp2) : "0" (__tmp2), "r" (__bswapl_src)); \
3464 (dst) = __tmp1 | __tmp2; /* whole */ \
3465 } while (0)
3466#endif
3467
3468/* bswap is available on i486 and up and is fast. A combination rorw $8 /
3469 roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux
3470 kernel with xchgb instead of rorw), but this is not done here, because
3471 i386 means generic x86 and mixing word and dword operations will cause
3472 partial register stalls on P6 chips. */
3473#if defined (__GNUC__) && ! defined (NO_ASM) \
3474 && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386 \
3475 && GMP_LIMB_BITS == 32
3476#define BSWAP_LIMB(dst, src) \
3477 do { \
3478 __asm__ ("bswap %0" : "=r" (dst) : "0" (src)); \
3479 } while (0)
3480#endif
3481
3482#if defined (__GNUC__) && ! defined (NO_ASM) \
3483 && defined (__amd64__) && GMP_LIMB_BITS == 64
3484#define BSWAP_LIMB(dst, src) \
3485 do { \
3486 __asm__ ("bswap %q0" : "=r" (dst) : "0" (src)); \
3487 } while (0)
3488#endif
3489
3490#if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3491 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
3492#define BSWAP_LIMB(dst, src) \
3493 do { \
3494 __asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) : "r" (src)); \
3495 } while (0)
3496#endif
3497
3498/* As per glibc. */
3499#if defined (__GNUC__) && ! defined (NO_ASM) \
3500 && HAVE_HOST_CPU_FAMILY_m68k && GMP_LIMB_BITS == 32
3501#define BSWAP_LIMB(dst, src) \
3502 do { \
3503 mp_limb_t __bswapl_src = (src); \
3504 __asm__ ("ror%.w %#8, %0\n\t" \
3505 "swap %0\n\t" \
3506 "ror%.w %#8, %0" \
3507 : "=d" (dst) \
3508 : "0" (__bswapl_src)); \
3509 } while (0)
3510#endif
3511
3512#if ! defined (BSWAP_LIMB)
3513#if GMP_LIMB_BITS == 8
3514#define BSWAP_LIMB(dst, src) \
3515 do { (dst) = (src); } while (0)
3516#endif
3517#if GMP_LIMB_BITS == 16
3518#define BSWAP_LIMB(dst, src) \
3519 do { \
3520 (dst) = ((src) << 8) + ((src) >> 8); \
3521 } while (0)
3522#endif
3523#if GMP_LIMB_BITS == 32
3524#define BSWAP_LIMB(dst, src) \
3525 do { \
3526 (dst) = \
3527 ((src) << 24) \
3528 + (((src) & 0xFF00) << 8) \
3529 + (((src) >> 8) & 0xFF00) \
3530 + ((src) >> 24); \
3531 } while (0)
3532#endif
3533#if GMP_LIMB_BITS == 64
3534#define BSWAP_LIMB(dst, src) \
3535 do { \
3536 (dst) = \
3537 ((src) << 56) \
3538 + (((src) & 0xFF00) << 40) \
3539 + (((src) & 0xFF0000) << 24) \
3540 + (((src) & 0xFF000000) << 8) \
3541 + (((src) >> 8) & 0xFF000000) \
3542 + (((src) >> 24) & 0xFF0000) \
3543 + (((src) >> 40) & 0xFF00) \
3544 + ((src) >> 56); \
3545 } while (0)
3546#endif
3547#endif
3548
3549#if ! defined (BSWAP_LIMB)
3550#define BSWAP_LIMB(dst, src) \
3551 do { \
3552 mp_limb_t __bswapl_src = (src); \
3553 mp_limb_t __dstl = 0; \
3554 int __i; \
3555 for (__i = 0; __i < GMP_LIMB_BYTES; __i++) \
3556 { \
3557 __dstl = (__dstl << 8) | (__bswapl_src & 0xFF); \
3558 __bswapl_src >>= 8; \
3559 } \
3560 (dst) = __dstl; \
3561 } while (0)
3562#endif
3563
3564
3565/* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to
3566 those we know are fast. */
3567#if defined (__GNUC__) && ! defined (NO_ASM) \
3568 && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \
3569 && (HAVE_HOST_CPU_powerpc604 \
3570 || HAVE_HOST_CPU_powerpc604e \
3571 || HAVE_HOST_CPU_powerpc750 \
3572 || HAVE_HOST_CPU_powerpc7400)
3573#define BSWAP_LIMB_FETCH(limb, src) \
3574 do { \
3575 mp_srcptr __blf_src = (src); \
3576 mp_limb_t __limb; \
3577 __asm__ ("lwbrx %0, 0, %1" \
3578 : "=r" (__limb) \
3579 : "r" (__blf_src), \
3580 "m" (*__blf_src)); \
3581 (limb) = __limb; \
3582 } while (0)
3583#endif
3584
3585#if ! defined (BSWAP_LIMB_FETCH)
3586#define BSWAP_LIMB_FETCH(limb, src) BSWAP_LIMB (limb, *(src))
3587#endif
3588
3589
3590/* On the same basis that lwbrx might be slow, restrict stwbrx to those we
3591 know are fast. FIXME: Is this necessary? */
3592#if defined (__GNUC__) && ! defined (NO_ASM) \
3593 && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \
3594 && (HAVE_HOST_CPU_powerpc604 \
3595 || HAVE_HOST_CPU_powerpc604e \
3596 || HAVE_HOST_CPU_powerpc750 \
3597 || HAVE_HOST_CPU_powerpc7400)
3598#define BSWAP_LIMB_STORE(dst, limb) \
3599 do { \
3600 mp_ptr __dst = (dst); \
3601 mp_limb_t __limb = (limb); \
3602 __asm__ ("stwbrx %1, 0, %2" \
3603 : "=m" (*__dst) \
3604 : "r" (__limb), \
3605 "r" (__dst)); \
3606 } while (0)
3607#endif
3608
3609#if ! defined (BSWAP_LIMB_STORE)
3610#define BSWAP_LIMB_STORE(dst, limb) BSWAP_LIMB (*(dst), limb)
3611#endif
3612
3613
3614/* Byte swap limbs from {src,size} and store at {dst,size}. */
3615#define MPN_BSWAP(dst, src, size) \
3616 do { \
3617 mp_ptr __dst = (dst); \
3618 mp_srcptr __src = (src); \
3619 mp_size_t __size = (size); \
3620 mp_size_t __i; \
3621 ASSERT ((size) >= 0); \
3622 ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); \
3623 CRAY_Pragma ("_CRI ivdep"); \
3624 for (__i = 0; __i < __size; __i++) \
3625 { \
3626 BSWAP_LIMB_FETCH (*__dst, __src); \
3627 __dst++; \
3628 __src++; \
3629 } \
3630 } while (0)
3631
3632/* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */
3633#define MPN_BSWAP_REVERSE(dst, src, size) \
3634 do { \
3635 mp_ptr __dst = (dst); \
3636 mp_size_t __size = (size); \
3637 mp_srcptr __src = (src) + __size - 1; \
3638 mp_size_t __i; \
3639 ASSERT ((size) >= 0); \
3640 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
3641 CRAY_Pragma ("_CRI ivdep"); \
3642 for (__i = 0; __i < __size; __i++) \
3643 { \
3644 BSWAP_LIMB_FETCH (*__dst, __src); \
3645 __dst++; \
3646 __src--; \
3647 } \
3648 } while (0)
3649
3650
3651/* No processor claiming to be SPARC v9 compliant seems to
3652 implement the POPC instruction. Disable pattern for now. */
3653#if 0
3654#if defined __GNUC__ && defined __sparc_v9__ && GMP_LIMB_BITS == 64
3655#define popc_limb(result, input) \
3656 do { \
3657 DItype __res; \
3658 __asm__ ("popc %1,%0" : "=r" (result) : "rI" (input)); \
3659 } while (0)
3660#endif
3661#endif
3662
3663#if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
3664#define popc_limb(result, input) \
3665 do { \
3666 __asm__ ("ctpop %1, %0" : "=r" (result) : "r" (input)); \
3667 } while (0)
3668#endif
3669
3670/* Cray intrinsic. */
3671#ifdef _CRAY
3672#define popc_limb(result, input) \
3673 do { \
3674 (result) = _popcnt (input); \
3675 } while (0)
3676#endif
3677
3678#if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3679 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
3680#define popc_limb(result, input) \
3681 do { \
3682 __asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input)); \
3683 } while (0)
3684#endif
3685
3686/* Cool population count of an mp_limb_t.
3687 You have to figure out how this works, We won't tell you!
3688
3689 The constants could also be expressed as:
3690 0x55... = [2^N / 3] = [(2^N-1)/3]
3691 0x33... = [2^N / 5] = [(2^N-1)/5]
3692 0x0f... = [2^N / 17] = [(2^N-1)/17]
3693 (N is GMP_LIMB_BITS, [] denotes truncation.) */
3694
3695#if ! defined (popc_limb) && GMP_LIMB_BITS == 8
3696#define popc_limb(result, input) \
3697 do { \
3698 mp_limb_t __x = (input); \
3699 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3700 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3701 __x = ((__x >> 4) + __x); \
3702 (result) = __x & 0x0f; \
3703 } while (0)
3704#endif
3705
3706#if ! defined (popc_limb) && GMP_LIMB_BITS == 16
3707#define popc_limb(result, input) \
3708 do { \
3709 mp_limb_t __x = (input); \
3710 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3711 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3712 __x += (__x >> 4); \
3713 __x = ((__x >> 8) & MP_LIMB_T_MAX/4369)+(__x & MP_LIMB_T_MAX/4369); \
3714 (result) = __x; \
3715 } while (0)
3716#endif
3717
3718#if ! defined (popc_limb) && GMP_LIMB_BITS == 32
3719#define popc_limb(result, input) \
3720 do { \
3721 mp_limb_t __x = (input); \
3722 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3723 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3724 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
3725 __x = ((__x >> 8) + __x); \
3726 __x = ((__x >> 16) + __x); \
3727 (result) = __x & 0xff; \
3728 } while (0)
3729#endif
3730
3731#if ! defined (popc_limb) && GMP_LIMB_BITS == 64
3732#define popc_limb(result, input) \
3733 do { \
3734 mp_limb_t __x = (input); \
3735 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3736 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3737 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
3738 __x = ((__x >> 8) + __x); \
3739 __x = ((__x >> 16) + __x); \
3740 __x = ((__x >> 32) + __x); \
3741 (result) = __x & 0xff; \
3742 } while (0)
3743#endif
3744
3745
3746/* Define stuff for longlong.h. */
3747#if HAVE_ATTRIBUTE_MODE
3748typedef unsigned int UQItype __attribute__ ((mode (QI)));
3749typedef int SItype __attribute__ ((mode (SI)));
3750typedef unsigned int USItype __attribute__ ((mode (SI)));
3751typedef int DItype __attribute__ ((mode (DI)));
3752typedef unsigned int UDItype __attribute__ ((mode (DI)));
3753#else
3754typedef unsigned char UQItype;
3755typedef long SItype;
3756typedef unsigned long USItype;
3757#if HAVE_LONG_LONG
3758typedef long long int DItype;
3759typedef unsigned long long int UDItype;
3760#else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
3761typedef long int DItype;
3762typedef unsigned long int UDItype;
3763#endif
3764#endif
3765
3766typedef mp_limb_t UWtype;
3767typedef unsigned int UHWtype;
3768#define W_TYPE_SIZE GMP_LIMB_BITS
3769
3770/* Define ieee_double_extract and _GMP_IEEE_FLOATS.
3771
3772 Bit field packing is "implementation defined" according to C99, which
3773 leaves us at the compiler's mercy here. For some systems packing is
3774 defined in the ABI (eg. x86). In any case so far it seems universal that
3775 little endian systems pack from low to high, and big endian from high to
3776 low within the given type.
3777
3778 Within the fields we rely on the integer endianness being the same as the
3779 float endianness, this is true everywhere we know of and it'd be a fairly
3780 strange system that did anything else. */
3781
3782#if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
3783#define _GMP_IEEE_FLOATS 1
3784union ieee_double_extract
3785{
3786 struct
3787 {
3788 gmp_uint_least32_t manh:20;
3789 gmp_uint_least32_t exp:11;
3790 gmp_uint_least32_t sig:1;
3791 gmp_uint_least32_t manl:32;
3792 } s;
3793 double d;
3794};
3795#endif
3796
3797#if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN
3798#define _GMP_IEEE_FLOATS 1
3799union ieee_double_extract
3800{
3801 struct
3802 {
3803 gmp_uint_least32_t manl:32;
3804 gmp_uint_least32_t manh:20;
3805 gmp_uint_least32_t exp:11;
3806 gmp_uint_least32_t sig:1;
3807 } s;
3808 double d;
3809};
3810#endif
3811
3812#if HAVE_DOUBLE_IEEE_BIG_ENDIAN
3813#define _GMP_IEEE_FLOATS 1
3814union ieee_double_extract
3815{
3816 struct
3817 {
3818 gmp_uint_least32_t sig:1;
3819 gmp_uint_least32_t exp:11;
3820 gmp_uint_least32_t manh:20;
3821 gmp_uint_least32_t manl:32;
3822 } s;
3823 double d;
3824};
3825#endif
3826
3827#if HAVE_DOUBLE_VAX_D
3828union double_extract
3829{
3830 struct
3831 {
3832 gmp_uint_least32_t man3:7; /* highest 7 bits */
3833 gmp_uint_least32_t exp:8; /* excess-128 exponent */
3834 gmp_uint_least32_t sig:1;
3835 gmp_uint_least32_t man2:16;
3836 gmp_uint_least32_t man1:16;
3837 gmp_uint_least32_t man0:16; /* lowest 16 bits */
3838 } s;
3839 double d;
3840};
3841#endif
3842
3843/* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers
3844 that don't convert ulong->double correctly (eg. SunOS 4 native cc). */
3845#define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2)))
3846/* Maximum number of limbs it will take to store any `double'.
3847 We assume doubles have 53 mantissa bits. */
3848#define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 2) / GMP_NUMB_BITS + 1)
3849
3850__GMP_DECLSPEC int __gmp_extract_double (mp_ptr, double);
3851
3852#define mpn_get_d __gmpn_get_d
3853__GMP_DECLSPEC double mpn_get_d (mp_srcptr, mp_size_t, mp_size_t, long) __GMP_ATTRIBUTE_PURE;
3854
3855
3856/* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes
3857 a_inf if x is an infinity. Both are considered unlikely values, for
3858 branch prediction. */
3859
3860#if _GMP_IEEE_FLOATS
3861#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
3862 do { \
3863 union ieee_double_extract u; \
3864 u.d = (x); \
3865 if (UNLIKELY (u.s.exp == 0x7FF)) \
3866 { \
3867 if (u.s.manl == 0 && u.s.manh == 0) \
3868 { a_inf; } \
3869 else \
3870 { a_nan; } \
3871 } \
3872 } while (0)
3873#endif
3874
3875#if HAVE_DOUBLE_VAX_D || HAVE_DOUBLE_VAX_G || HAVE_DOUBLE_CRAY_CFP
3876/* no nans or infs in these formats */
3877#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
3878 do { } while (0)
3879#endif
3880
3881#ifndef DOUBLE_NAN_INF_ACTION
3882/* Unknown format, try something generic.
3883 NaN should be "unordered", so x!=x.
3884 Inf should be bigger than DBL_MAX. */
3885#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
3886 do { \
3887 { \
3888 if (UNLIKELY ((x) != (x))) \
3889 { a_nan; } \
3890 else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX)) \
3891 { a_inf; } \
3892 } \
3893 } while (0)
3894#endif
3895
3896/* On m68k, x86 and amd64, gcc (and maybe other compilers) can hold doubles
3897 in the coprocessor, which means a bigger exponent range than normal, and
3898 depending on the rounding mode, a bigger mantissa than normal. (See
3899 "Disappointments" in the gcc manual.) FORCE_DOUBLE stores and fetches
3900 "d" through memory to force any rounding and overflows to occur.
3901
3902 On amd64, and on x86s with SSE2, gcc (depending on options) uses the xmm
3903 registers, where there's no such extra precision and no need for the
3904 FORCE_DOUBLE. We don't bother to detect this since the present uses for
3905 FORCE_DOUBLE are only in test programs and default generic C code.
3906
3907 Not quite sure that an "automatic volatile" will use memory, but it does
3908 in gcc. An asm("":"=m"(d):"0"(d)) can't be used to trick gcc, since
3909 apparently matching operands like "0" are only allowed on a register
3910 output. gcc 3.4 warns about this, though in fact it and past versions
3911 seem to put the operand through memory as hoped. */
3912
3913#if (HAVE_HOST_CPU_FAMILY_m68k || HAVE_HOST_CPU_FAMILY_x86 \
3914 || defined (__amd64__))
3915#define FORCE_DOUBLE(d) \
3916 do { volatile double __gmp_force = (d); (d) = __gmp_force; } while (0)
3917#else
3918#define FORCE_DOUBLE(d) do { } while (0)
3919#endif
3920
3921
3922__GMP_DECLSPEC extern const unsigned char __gmp_digit_value_tab[];
3923
3924__GMP_DECLSPEC extern int __gmp_junk;
3925__GMP_DECLSPEC extern const int __gmp_0;
3926__GMP_DECLSPEC void __gmp_exception (int) ATTRIBUTE_NORETURN;
3927__GMP_DECLSPEC void __gmp_divide_by_zero (void) ATTRIBUTE_NORETURN;
3928__GMP_DECLSPEC void __gmp_sqrt_of_negative (void) ATTRIBUTE_NORETURN;
3929__GMP_DECLSPEC void __gmp_invalid_operation (void) ATTRIBUTE_NORETURN;
3930#define GMP_ERROR(code) __gmp_exception (code)
3931#define DIVIDE_BY_ZERO __gmp_divide_by_zero ()
3932#define SQRT_OF_NEGATIVE __gmp_sqrt_of_negative ()
3933
3934#if defined _LONG_LONG_LIMB
3935#define CNST_LIMB(C) ((mp_limb_t) C##LL)
3936#else /* not _LONG_LONG_LIMB */
3937#define CNST_LIMB(C) ((mp_limb_t) C##L)
3938#endif /* _LONG_LONG_LIMB */
3939
3940/* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */
3941#if GMP_NUMB_BITS == 2
3942#define PP 0x3 /* 3 */
3943#define PP_FIRST_OMITTED 5
3944#endif
3945#if GMP_NUMB_BITS == 4
3946#define PP 0xF /* 3 x 5 */
3947#define PP_FIRST_OMITTED 7
3948#endif
3949#if GMP_NUMB_BITS == 8
3950#define PP 0x69 /* 3 x 5 x 7 */
3951#define PP_FIRST_OMITTED 11
3952#endif
3953#if GMP_NUMB_BITS == 16
3954#define PP 0x3AA7 /* 3 x 5 x 7 x 11 x 13 */
3955#define PP_FIRST_OMITTED 17
3956#endif
3957#if GMP_NUMB_BITS == 32
3958#define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x ... x 29 */
3959#define PP_INVERTED 0x53E5645CL
3960#define PP_FIRST_OMITTED 31
3961#endif
3962#if GMP_NUMB_BITS == 64
3963#define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x ... x 53 */
3964#define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
3965#define PP_FIRST_OMITTED 59
3966#endif
3967#ifndef PP_FIRST_OMITTED
3968#define PP_FIRST_OMITTED 3
3969#endif
3970
3971typedef struct
3972{
3973 mp_limb_t d0, d1;
3974} mp_double_limb_t;
3975
3976#define mpn_gcd_22 __MPN (gcd_22)
3977__GMP_DECLSPEC mp_double_limb_t mpn_gcd_22 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t);
3978
3979/* BIT1 means a result value in bit 1 (second least significant bit), with a
3980 zero bit representing +1 and a one bit representing -1. Bits other than
3981 bit 1 are garbage. These are meant to be kept in "int"s, and casts are
3982 used to ensure the expressions are "int"s even if a and/or b might be
3983 other types.
3984
3985 JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
3986 and their speed is important. Expressions are used rather than
3987 conditionals to accumulate sign changes, which effectively means XORs
3988 instead of conditional JUMPs. */
3989
3990/* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
3991#define JACOBI_S0(a) (((a) == 1) | ((a) == -1))
3992
3993/* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
3994#define JACOBI_U0(a) ((a) == 1)
3995
3996/* FIXME: JACOBI_LS0 and JACOBI_0LS are the same, so delete one and
3997 come up with a better name. */
3998
3999/* (a/0), with a given by low and size;
4000 is 1 if a=+/-1, 0 otherwise */
4001#define JACOBI_LS0(alow,asize) \
4002 (((asize) == 1 || (asize) == -1) && (alow) == 1)
4003
4004/* (a/0), with a an mpz_t;
4005 fetch of low limb always valid, even if size is zero */
4006#define JACOBI_Z0(a) JACOBI_LS0 (PTR(a)[0], SIZ(a))
4007
4008/* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */
4009#define JACOBI_0U(b) ((b) == 1)
4010
4011/* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */
4012#define JACOBI_0S(b) ((b) == 1 || (b) == -1)
4013
4014/* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */
4015#define JACOBI_0LS(blow,bsize) \
4016 (((bsize) == 1 || (bsize) == -1) && (blow) == 1)
4017
4018/* Convert a bit1 to +1 or -1. */
4019#define JACOBI_BIT1_TO_PN(result_bit1) \
4020 (1 - ((int) (result_bit1) & 2))
4021
4022/* (2/b), with b unsigned and odd;
4023 is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
4024 hence obtained from (b>>1)^b */
4025#define JACOBI_TWO_U_BIT1(b) \
4026 ((int) (((b) >> 1) ^ (b)))
4027
4028/* (2/b)^twos, with b unsigned and odd */
4029#define JACOBI_TWOS_U_BIT1(twos, b) \
4030 ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
4031
4032/* (2/b)^twos, with b unsigned and odd */
4033#define JACOBI_TWOS_U(twos, b) \
4034 (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
4035
4036/* (-1/b), with b odd (signed or unsigned);
4037 is (-1)^((b-1)/2) */
4038#define JACOBI_N1B_BIT1(b) \
4039 ((int) (b))
4040
4041/* (a/b) effect due to sign of a: signed/unsigned, b odd;
4042 is (-1/b) if a<0, or +1 if a>=0 */
4043#define JACOBI_ASGN_SU_BIT1(a, b) \
4044 ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b))
4045
4046/* (a/b) effect due to sign of b: signed/signed;
4047 is -1 if a and b both negative, +1 otherwise */
4048#define JACOBI_BSGN_SS_BIT1(a, b) \
4049 ((((a)<0) & ((b)<0)) << 1)
4050
4051/* (a/b) effect due to sign of b: signed/mpz;
4052 is -1 if a and b both negative, +1 otherwise */
4053#define JACOBI_BSGN_SZ_BIT1(a, b) \
4054 JACOBI_BSGN_SS_BIT1 (a, SIZ(b))
4055
4056/* (a/b) effect due to sign of b: mpz/signed;
4057 is -1 if a and b both negative, +1 otherwise */
4058#define JACOBI_BSGN_ZS_BIT1(a, b) \
4059 JACOBI_BSGN_SZ_BIT1 (b, a)
4060
4061/* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd;
4062 is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if
4063 both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd
4064 because this is used in a couple of places with only bit 1 of a or b
4065 valid. */
4066#define JACOBI_RECIP_UU_BIT1(a, b) \
4067 ((int) ((a) & (b)))
4068
4069/* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and
4070 decrementing b_size. b_low should be b_ptr[0] on entry, and will be
4071 updated for the new b_ptr. result_bit1 is updated according to the
4072 factors of 2 stripped, as per (a/2). */
4073#define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low) \
4074 do { \
4075 ASSERT ((b_size) >= 1); \
4076 ASSERT ((b_low) == (b_ptr)[0]); \
4077 \
4078 while (UNLIKELY ((b_low) == 0)) \
4079 { \
4080 (b_size)--; \
4081 ASSERT ((b_size) >= 1); \
4082 (b_ptr)++; \
4083 (b_low) = *(b_ptr); \
4084 \
4085 ASSERT (((a) & 1) != 0); \
4086 if ((GMP_NUMB_BITS % 2) == 1) \
4087 (result_bit1) ^= JACOBI_TWO_U_BIT1(a); \
4088 } \
4089 } while (0)
4090
4091/* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or
4092 modexact_1_odd, but in either case leaving a_rem<b. b must be odd and
4093 unsigned. modexact_1_odd effectively calculates -a mod b, and
4094 result_bit1 is adjusted for the factor of -1.
4095
4096 The way mpn_modexact_1_odd sometimes bases its remainder on a_size and
4097 sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what
4098 factor to introduce into result_bit1, so for that case use mpn_mod_1
4099 unconditionally.
4100
4101 FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used
4102 for odd GMP_NUMB_BITS would be good. Perhaps it could mung its result,
4103 or not skip a divide step, or something. */
4104
4105#define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \
4106 do { \
4107 mp_srcptr __a_ptr = (a_ptr); \
4108 mp_size_t __a_size = (a_size); \
4109 mp_limb_t __b = (b); \
4110 \
4111 ASSERT (__a_size >= 1); \
4112 ASSERT (__b & 1); \
4113 \
4114 if ((GMP_NUMB_BITS % 2) != 0 \
4115 || ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD)) \
4116 { \
4117 (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b); \
4118 } \
4119 else \
4120 { \
4121 (result_bit1) ^= JACOBI_N1B_BIT1 (__b); \
4122 (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b); \
4123 } \
4124 } while (0)
4125
4126/* State for the Jacobi computation using Lehmer. */
4127#define jacobi_table __gmp_jacobi_table
4128__GMP_DECLSPEC extern const unsigned char jacobi_table[208];
4129
4130/* Bit layout for the initial state. b must be odd.
4131
4132 3 2 1 0
4133 +--+--+--+--+
4134 |a1|a0|b1| s|
4135 +--+--+--+--+
4136
4137 */
4138static inline unsigned
4139mpn_jacobi_init (unsigned a, unsigned b, unsigned s)
4140{
4141 ASSERT (b & 1);
4142 ASSERT (s <= 1);
4143 return ((a & 3) << 2) + (b & 2) + s;
4144}
4145
4146static inline int
4147mpn_jacobi_finish (unsigned bits)
4148{
4149 /* (a, b) = (1,0) or (0,1) */
4150 ASSERT ( (bits & 14) == 0);
4151
4152 return 1-2*(bits & 1);
4153}
4154
4155static inline unsigned
4156mpn_jacobi_update (unsigned bits, unsigned denominator, unsigned q)
4157{
4158 /* FIXME: Could halve table size by not including the e bit in the
4159 * index, and instead xor when updating. Then the lookup would be
4160 * like
4161 *
4162 * bits ^= table[((bits & 30) << 2) + (denominator << 2) + q];
4163 */
4164
4165 ASSERT (bits < 26);
4166 ASSERT (denominator < 2);
4167 ASSERT (q < 4);
4168
4169 /* For almost all calls, denominator is constant and quite often q
4170 is constant too. So use addition rather than or, so the compiler
4171 can put the constant part can into the offset of an indexed
4172 addressing instruction.
4173
4174 With constant denominator, the below table lookup is compiled to
4175
4176 C Constant q = 1, constant denominator = 1
4177 movzbl table+5(%eax,8), %eax
4178
4179 or
4180
4181 C q in %edx, constant denominator = 1
4182 movzbl table+4(%edx,%eax,8), %eax
4183
4184 One could maintain the state preshifted 3 bits, to save a shift
4185 here, but at least on x86, that's no real saving.
4186 */
4187 return bits = jacobi_table[(bits << 3) + (denominator << 2) + q];
4188}
4189
4190/* Matrix multiplication */
4191#define mpn_matrix22_mul __MPN(matrix22_mul)
4192__GMP_DECLSPEC void mpn_matrix22_mul (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
4193#define mpn_matrix22_mul_itch __MPN(matrix22_mul_itch)
4194__GMP_DECLSPEC mp_size_t mpn_matrix22_mul_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
4195
4196#ifndef MATRIX22_STRASSEN_THRESHOLD
4197#define MATRIX22_STRASSEN_THRESHOLD 30
4198#endif
4199
4200/* HGCD definitions */
4201
4202/* Extract one numb, shifting count bits left
4203 ________ ________
4204 |___xh___||___xl___|
4205 |____r____|
4206 >count <
4207
4208 The count includes any nail bits, so it should work fine if count
4209 is computed using count_leading_zeros. If GMP_NAIL_BITS > 0, all of
4210 xh, xl and r include nail bits. Must have 0 < count < GMP_LIMB_BITS.
4211
4212 FIXME: Omit masking with GMP_NUMB_MASK, and let callers do that for
4213 those calls where the count high bits of xh may be non-zero.
4214*/
4215
4216#define MPN_EXTRACT_NUMB(count, xh, xl) \
4217 ((((xh) << ((count) - GMP_NAIL_BITS)) & GMP_NUMB_MASK) | \
4218 ((xl) >> (GMP_LIMB_BITS - (count))))
4219
4220
4221/* The matrix non-negative M = (u, u'; v,v') keeps track of the
4222 reduction (a;b) = M (alpha; beta) where alpha, beta are smaller
4223 than a, b. The determinant must always be one, so that M has an
4224 inverse (v', -u'; -v, u). Elements always fit in GMP_NUMB_BITS - 1
4225 bits. */
4226struct hgcd_matrix1
4227{
4228 mp_limb_t u[2][2];
4229};
4230
4231#define mpn_hgcd2 __MPN (hgcd2)
4232__GMP_DECLSPEC int mpn_hgcd2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *);
4233
4234#define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector)
4235__GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t);
4236
4237#define mpn_matrix22_mul1_inverse_vector __MPN (matrix22_mul1_inverse_vector)
4238__GMP_DECLSPEC mp_size_t mpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t);
4239
4240#define mpn_hgcd2_jacobi __MPN (hgcd2_jacobi)
4241__GMP_DECLSPEC int mpn_hgcd2_jacobi (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *, unsigned *);
4242
4243struct hgcd_matrix
4244{
4245 mp_size_t alloc; /* for sanity checking only */
4246 mp_size_t n;
4247 mp_ptr p[2][2];
4248};
4249
4250#define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1))
4251
4252#define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init)
4253__GMP_DECLSPEC void mpn_hgcd_matrix_init (struct hgcd_matrix *, mp_size_t, mp_ptr);
4254
4255#define mpn_hgcd_matrix_update_q __MPN (hgcd_matrix_update_q)
4256__GMP_DECLSPEC void mpn_hgcd_matrix_update_q (struct hgcd_matrix *, mp_srcptr, mp_size_t, unsigned, mp_ptr);
4257
4258#define mpn_hgcd_matrix_mul_1 __MPN (hgcd_matrix_mul_1)
4259__GMP_DECLSPEC void mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *, const struct hgcd_matrix1 *, mp_ptr);
4260
4261#define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul)
4262__GMP_DECLSPEC void mpn_hgcd_matrix_mul (struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr);
4263
4264#define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust)
4265__GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust (const struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
4266
4267#define mpn_hgcd_step __MPN(hgcd_step)
4268__GMP_DECLSPEC mp_size_t mpn_hgcd_step (mp_size_t, mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
4269
4270#define mpn_hgcd_reduce __MPN(hgcd_reduce)
4271__GMP_DECLSPEC mp_size_t mpn_hgcd_reduce (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
4272
4273#define mpn_hgcd_reduce_itch __MPN(hgcd_reduce_itch)
4274__GMP_DECLSPEC mp_size_t mpn_hgcd_reduce_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
4275
4276#define mpn_hgcd_itch __MPN (hgcd_itch)
4277__GMP_DECLSPEC mp_size_t mpn_hgcd_itch (mp_size_t) ATTRIBUTE_CONST;
4278
4279#define mpn_hgcd __MPN (hgcd)
4280__GMP_DECLSPEC mp_size_t mpn_hgcd (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
4281
4282#define mpn_hgcd_appr_itch __MPN (hgcd_appr_itch)
4283__GMP_DECLSPEC mp_size_t mpn_hgcd_appr_itch (mp_size_t) ATTRIBUTE_CONST;
4284
4285#define mpn_hgcd_appr __MPN (hgcd_appr)
4286__GMP_DECLSPEC int mpn_hgcd_appr (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
4287
4288#define mpn_hgcd_jacobi __MPN (hgcd_jacobi)
4289__GMP_DECLSPEC mp_size_t mpn_hgcd_jacobi (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, unsigned *, mp_ptr);
4290
4291typedef void gcd_subdiv_step_hook(void *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int);
4292
4293/* Needs storage for the quotient */
4294#define MPN_GCD_SUBDIV_STEP_ITCH(n) (n)
4295
4296#define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step)
4297__GMP_DECLSPEC mp_size_t mpn_gcd_subdiv_step (mp_ptr, mp_ptr, mp_size_t, mp_size_t, gcd_subdiv_step_hook *, void *, mp_ptr);
4298
4299struct gcdext_ctx
4300{
4301 /* Result parameters. */
4302 mp_ptr gp;
4303 mp_size_t gn;
4304 mp_ptr up;
4305 mp_size_t *usize;
4306
4307 /* Cofactors updated in each step. */
4308 mp_size_t un;
4309 mp_ptr u0, u1, tp;
4310};
4311
4312#define mpn_gcdext_hook __MPN (gcdext_hook)
4313gcd_subdiv_step_hook mpn_gcdext_hook;
4314
4315#define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3)
4316
4317#define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n)
4318__GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
4319
4320/* 4*(an + 1) + 4*(bn + 1) + an */
4321#define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8)
4322
4323#ifndef HGCD_THRESHOLD
4324#define HGCD_THRESHOLD 400
4325#endif
4326
4327#ifndef HGCD_APPR_THRESHOLD
4328#define HGCD_APPR_THRESHOLD 400
4329#endif
4330
4331#ifndef HGCD_REDUCE_THRESHOLD
4332#define HGCD_REDUCE_THRESHOLD 1000
4333#endif
4334
4335#ifndef GCD_DC_THRESHOLD
4336#define GCD_DC_THRESHOLD 1000
4337#endif
4338
4339#ifndef GCDEXT_DC_THRESHOLD
4340#define GCDEXT_DC_THRESHOLD 600
4341#endif
4342
4343/* Definitions for mpn_set_str and mpn_get_str */
4344struct powers
4345{
4346 mp_ptr p; /* actual power value */
4347 mp_size_t n; /* # of limbs at p */
4348 mp_size_t shift; /* weight of lowest limb, in limb base B */
4349 size_t digits_in_base; /* number of corresponding digits */
4350 int base;
4351};
4352typedef struct powers powers_t;
4353#define mpn_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS) /* FIXME: This can perhaps be trimmed */
4354#define mpn_dc_set_str_itch(n) ((n) + GMP_LIMB_BITS)
4355#define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS)
4356
4357#define mpn_compute_powtab __MPN(compute_powtab)
4358__GMP_DECLSPEC size_t mpn_compute_powtab (powers_t *, mp_ptr, mp_size_t, int);
4359#define mpn_dc_set_str __MPN(dc_set_str)
4360__GMP_DECLSPEC mp_size_t mpn_dc_set_str (mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr);
4361#define mpn_bc_set_str __MPN(bc_set_str)
4362__GMP_DECLSPEC mp_size_t mpn_bc_set_str (mp_ptr, const unsigned char *, size_t, int);
4363
4364
4365/* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole
4366 limb and adds an extra limb. __GMPF_PREC_TO_BITS drops that extra limb,
4367 hence giving back the user's size in bits rounded up. Notice that
4368 converting prec->bits->prec gives an unchanged value. */
4369#define __GMPF_BITS_TO_PREC(n) \
4370 ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS))
4371#define __GMPF_PREC_TO_BITS(n) \
4372 ((mp_bitcnt_t) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS)
4373
4374__GMP_DECLSPEC extern mp_size_t __gmp_default_fp_limb_precision;
4375
4376/* Compute the number of base-b digits corresponding to nlimbs limbs, rounding
4377 down. */
4378#define DIGITS_IN_BASE_PER_LIMB(res, nlimbs, b) \
4379 do { \
4380 mp_limb_t _ph, _dummy; \
4381 umul_ppmm (_ph, _dummy, \
4382 mp_bases[b].logb2, GMP_NUMB_BITS * (mp_limb_t) (nlimbs));\
4383 res = _ph; \
4384 } while (0)
4385
4386/* Compute the number of limbs corresponding to ndigits base-b digits, rounding
4387 up. */
4388#define LIMBS_PER_DIGIT_IN_BASE(res, ndigits, b) \
4389 do { \
4390 mp_limb_t _ph, _dummy; \
4391 umul_ppmm (_ph, _dummy, mp_bases[b].log2b, (mp_limb_t) (ndigits)); \
4392 res = 8 * _ph / GMP_NUMB_BITS + 2; \
4393 } while (0)
4394
4395
4396/* Set n to the number of significant digits an mpf of the given _mp_prec
4397 field, in the given base. This is a rounded up value, designed to ensure
4398 there's enough digits to reproduce all the guaranteed part of the value.
4399
4400 There are prec many limbs, but the high might be only "1" so forget it
4401 and just count prec-1 limbs into chars. +1 rounds that upwards, and a
4402 further +1 is because the limbs usually won't fall on digit boundaries.
4403
4404 FIXME: If base is a power of 2 and the bits per digit divides
4405 GMP_LIMB_BITS then the +2 is unnecessary. This happens always for
4406 base==2, and in base==16 with the current 32 or 64 bit limb sizes. */
4407
4408#define MPF_SIGNIFICANT_DIGITS(n, base, prec) \
4409 do { \
4410 size_t rawn; \
4411 ASSERT (base >= 2 && base < numberof (mp_bases)); \
4412 DIGITS_IN_BASE_PER_LIMB (rawn, (prec) - 1, base); \
4413 n = rawn + 2; \
4414 } while (0)
4415
4416
4417/* Decimal point string, from the current C locale. Needs <langinfo.h> for
4418 nl_langinfo and constants, preferably with _GNU_SOURCE defined to get
4419 DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under
4420 their respective #if HAVE_FOO_H.
4421
4422 GLIBC recommends nl_langinfo because getting only one facet can be
4423 faster, apparently. */
4424
4425/* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */
4426#if HAVE_NL_LANGINFO && defined (DECIMAL_POINT)
4427#define GMP_DECIMAL_POINT (nl_langinfo (DECIMAL_POINT))
4428#endif
4429/* RADIXCHAR is deprecated, still in unix98 or some such. */
4430#if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT)
4431#define GMP_DECIMAL_POINT (nl_langinfo (RADIXCHAR))
4432#endif
4433/* localeconv is slower since it returns all locale stuff */
4434#if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT)
4435#define GMP_DECIMAL_POINT (localeconv()->decimal_point)
4436#endif
4437#if ! defined (GMP_DECIMAL_POINT)
4438#define GMP_DECIMAL_POINT (".")
4439#endif
4440
4441
4442#define DOPRNT_CONV_FIXED 1
4443#define DOPRNT_CONV_SCIENTIFIC 2
4444#define DOPRNT_CONV_GENERAL 3
4445
4446#define DOPRNT_JUSTIFY_NONE 0
4447#define DOPRNT_JUSTIFY_LEFT 1
4448#define DOPRNT_JUSTIFY_RIGHT 2
4449#define DOPRNT_JUSTIFY_INTERNAL 3
4450
4451#define DOPRNT_SHOWBASE_YES 1
4452#define DOPRNT_SHOWBASE_NO 2
4453#define DOPRNT_SHOWBASE_NONZERO 3
4454
4455struct doprnt_params_t {
4456 int base; /* negative for upper case */
4457 int conv; /* choices above */
4458 const char *expfmt; /* exponent format */
4459 int exptimes4; /* exponent multiply by 4 */
4460 char fill; /* character */
4461 int justify; /* choices above */
4462 int prec; /* prec field, or -1 for all digits */
4463 int showbase; /* choices above */
4464 int showpoint; /* if radix point always shown */
4465 int showtrailing; /* if trailing zeros wanted */
4466 char sign; /* '+', ' ', or '\0' */
4467 int width; /* width field */
4468};
4469
4470#if _GMP_H_HAVE_VA_LIST
4471
4472typedef int (*doprnt_format_t) (void *, const char *, va_list);
4473typedef int (*doprnt_memory_t) (void *, const char *, size_t);
4474typedef int (*doprnt_reps_t) (void *, int, int);
4475typedef int (*doprnt_final_t) (void *);
4476
4477struct doprnt_funs_t {
4478 doprnt_format_t format;
4479 doprnt_memory_t memory;
4480 doprnt_reps_t reps;
4481 doprnt_final_t final; /* NULL if not required */
4482};
4483
4484extern const struct doprnt_funs_t __gmp_fprintf_funs;
4485extern const struct doprnt_funs_t __gmp_sprintf_funs;
4486extern const struct doprnt_funs_t __gmp_snprintf_funs;
4487extern const struct doprnt_funs_t __gmp_obstack_printf_funs;
4488extern const struct doprnt_funs_t __gmp_ostream_funs;
4489
4490/* "buf" is a __gmp_allocate_func block of "alloc" many bytes. The first
4491 "size" of these have been written. "alloc > size" is maintained, so
4492 there's room to store a '\0' at the end. "result" is where the
4493 application wants the final block pointer. */
4494struct gmp_asprintf_t {
4495 char **result;
4496 char *buf;
4497 size_t size;
4498 size_t alloc;
4499};
4500
4501#define GMP_ASPRINTF_T_INIT(d, output) \
4502 do { \
4503 (d).result = (output); \
4504 (d).alloc = 256; \
4505 (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc); \
4506 (d).size = 0; \
4507 } while (0)
4508
4509/* If a realloc is necessary, use twice the size actually required, so as to
4510 avoid repeated small reallocs. */
4511#define GMP_ASPRINTF_T_NEED(d, n) \
4512 do { \
4513 size_t alloc, newsize, newalloc; \
4514 ASSERT ((d)->alloc >= (d)->size + 1); \
4515 \
4516 alloc = (d)->alloc; \
4517 newsize = (d)->size + (n); \
4518 if (alloc <= newsize) \
4519 { \
4520 newalloc = 2*newsize; \
4521 (d)->alloc = newalloc; \
4522 (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf, \
4523 alloc, newalloc, char); \
4524 } \
4525 } while (0)
4526
4527__GMP_DECLSPEC int __gmp_asprintf_memory (struct gmp_asprintf_t *, const char *, size_t);
4528__GMP_DECLSPEC int __gmp_asprintf_reps (struct gmp_asprintf_t *, int, int);
4529__GMP_DECLSPEC int __gmp_asprintf_final (struct gmp_asprintf_t *);
4530
4531/* buf is where to write the next output, and size is how much space is left
4532 there. If the application passed size==0 then that's what we'll have
4533 here, and nothing at all should be written. */
4534struct gmp_snprintf_t {
4535 char *buf;
4536 size_t size;
4537};
4538
4539/* Add the bytes printed by the call to the total retval, or bail out on an
4540 error. */
4541#define DOPRNT_ACCUMULATE(call) \
4542 do { \
4543 int __ret; \
4544 __ret = call; \
4545 if (__ret == -1) \
4546 goto error; \
4547 retval += __ret; \
4548 } while (0)
4549#define DOPRNT_ACCUMULATE_FUN(fun, params) \
4550 do { \
4551 ASSERT ((fun) != NULL); \
4552 DOPRNT_ACCUMULATE ((*(fun)) params); \
4553 } while (0)
4554
4555#define DOPRNT_FORMAT(fmt, ap) \
4556 DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))
4557#define DOPRNT_MEMORY(ptr, len) \
4558 DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))
4559#define DOPRNT_REPS(c, n) \
4560 DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))
4561
4562#define DOPRNT_STRING(str) DOPRNT_MEMORY (str, strlen (str))
4563
4564#define DOPRNT_REPS_MAYBE(c, n) \
4565 do { \
4566 if ((n) != 0) \
4567 DOPRNT_REPS (c, n); \
4568 } while (0)
4569#define DOPRNT_MEMORY_MAYBE(ptr, len) \
4570 do { \
4571 if ((len) != 0) \
4572 DOPRNT_MEMORY (ptr, len); \
4573 } while (0)
4574
4575__GMP_DECLSPEC int __gmp_doprnt (const struct doprnt_funs_t *, void *, const char *, va_list);
4576__GMP_DECLSPEC int __gmp_doprnt_integer (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *);
4577
4578#define __gmp_doprnt_mpf __gmp_doprnt_mpf2
4579__GMP_DECLSPEC int __gmp_doprnt_mpf (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr);
4580
4581__GMP_DECLSPEC int __gmp_replacement_vsnprintf (char *, size_t, const char *, va_list);
4582#endif /* _GMP_H_HAVE_VA_LIST */
4583
4584
4585typedef int (*gmp_doscan_scan_t) (void *, const char *, ...);
4586typedef void *(*gmp_doscan_step_t) (void *, int);
4587typedef int (*gmp_doscan_get_t) (void *);
4588typedef int (*gmp_doscan_unget_t) (int, void *);
4589
4590struct gmp_doscan_funs_t {
4591 gmp_doscan_scan_t scan;
4592 gmp_doscan_step_t step;
4593 gmp_doscan_get_t get;
4594 gmp_doscan_unget_t unget;
4595};
4596extern const struct gmp_doscan_funs_t __gmp_fscanf_funs;
4597extern const struct gmp_doscan_funs_t __gmp_sscanf_funs;
4598
4599#if _GMP_H_HAVE_VA_LIST
4600__GMP_DECLSPEC int __gmp_doscan (const struct gmp_doscan_funs_t *, void *, const char *, va_list);
4601#endif
4602
4603
4604/* For testing and debugging. */
4605#define MPZ_CHECK_FORMAT(z) \
4606 do { \
4607 ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0); \
4608 ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)); \
4609 ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z)); \
4610 } while (0)
4611
4612#define MPQ_CHECK_FORMAT(q) \
4613 do { \
4614 MPZ_CHECK_FORMAT (mpq_numref (q)); \
4615 MPZ_CHECK_FORMAT (mpq_denref (q)); \
4616 ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1); \
4617 \
4618 if (SIZ(mpq_numref(q)) == 0) \
4619 { \
4620 /* should have zero as 0/1 */ \
4621 ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1 \
4622 && PTR(mpq_denref(q))[0] == 1); \
4623 } \
4624 else \
4625 { \
4626 /* should have no common factors */ \
4627 mpz_t g; \
4628 mpz_init (g); \
4629 mpz_gcd (g, mpq_numref(q), mpq_denref(q)); \
4630 ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0); \
4631 mpz_clear (g); \
4632 } \
4633 } while (0)
4634
4635#define MPF_CHECK_FORMAT(f) \
4636 do { \
4637 ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \
4638 ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1); \
4639 if (SIZ(f) == 0) \
4640 ASSERT_ALWAYS (EXP(f) == 0); \
4641 if (SIZ(f) != 0) \
4642 ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0); \
4643 } while (0)
4644
4645
4646/* Enhancement: The "mod" and "gcd_1" functions below could have
4647 __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on
4648 function pointers, only actual functions. It probably doesn't make much
4649 difference to the gmp code, since hopefully we arrange calls so there's
4650 no great need for the compiler to move things around. */
4651
4652#if WANT_FAT_BINARY && (HAVE_HOST_CPU_FAMILY_x86 || HAVE_HOST_CPU_FAMILY_x86_64)
4653/* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST
4654 in mpn/x86/x86-defs.m4 and mpn/x86_64/x86_64-defs.m4. Be sure to update
4655 those when changing here. */
4656struct cpuvec_t {
4657 DECL_add_n ((*add_n));
4658 DECL_addlsh1_n ((*addlsh1_n));
4659 DECL_addlsh2_n ((*addlsh2_n));
4660 DECL_addmul_1 ((*addmul_1));
4661 DECL_addmul_2 ((*addmul_2));
4662 DECL_bdiv_dbm1c ((*bdiv_dbm1c));
4663 DECL_cnd_add_n ((*cnd_add_n));
4664 DECL_cnd_sub_n ((*cnd_sub_n));
4665 DECL_com ((*com));
4666 DECL_copyd ((*copyd));
4667 DECL_copyi ((*copyi));
4668 DECL_divexact_1 ((*divexact_1));
4669 DECL_divrem_1 ((*divrem_1));
4670 DECL_gcd_11 ((*gcd_11));
4671 DECL_lshift ((*lshift));
4672 DECL_lshiftc ((*lshiftc));
4673 DECL_mod_1 ((*mod_1));
4674 DECL_mod_1_1p ((*mod_1_1p));
4675 DECL_mod_1_1p_cps ((*mod_1_1p_cps));
4676 DECL_mod_1s_2p ((*mod_1s_2p));
4677 DECL_mod_1s_2p_cps ((*mod_1s_2p_cps));
4678 DECL_mod_1s_4p ((*mod_1s_4p));
4679 DECL_mod_1s_4p_cps ((*mod_1s_4p_cps));
4680 DECL_mod_34lsub1 ((*mod_34lsub1));
4681 DECL_modexact_1c_odd ((*modexact_1c_odd));
4682 DECL_mul_1 ((*mul_1));
4683 DECL_mul_basecase ((*mul_basecase));
4684 DECL_mullo_basecase ((*mullo_basecase));
4685 DECL_preinv_divrem_1 ((*preinv_divrem_1));
4686 DECL_preinv_mod_1 ((*preinv_mod_1));
4687 DECL_redc_1 ((*redc_1));
4688 DECL_redc_2 ((*redc_2));
4689 DECL_rshift ((*rshift));
4690 DECL_sqr_basecase ((*sqr_basecase));
4691 DECL_sub_n ((*sub_n));
4692 DECL_sublsh1_n ((*sublsh1_n));
4693 DECL_submul_1 ((*submul_1));
4694 mp_size_t mul_toom22_threshold;
4695 mp_size_t mul_toom33_threshold;
4696 mp_size_t sqr_toom2_threshold;
4697 mp_size_t sqr_toom3_threshold;
4698 mp_size_t bmod_1_to_mod_1_threshold;
4699};
4700__GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec;
4701__GMP_DECLSPEC extern int __gmpn_cpuvec_initialized;
4702#endif /* x86 fat binary */
4703
4704__GMP_DECLSPEC void __gmpn_cpuvec_init (void);
4705
4706/* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init()
4707 if that hasn't yet been done (to establish the right values). */
4708#define CPUVEC_THRESHOLD(field) \
4709 ((LIKELY (__gmpn_cpuvec_initialized) ? 0 : (__gmpn_cpuvec_init (), 0)), \
4710 __gmpn_cpuvec.field)
4711
4712
4713#if HAVE_NATIVE_mpn_add_nc
4714#define mpn_add_nc __MPN(add_nc)
4715__GMP_DECLSPEC mp_limb_t mpn_add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
4716#else
4717static inline
4718mp_limb_t
4719mpn_add_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4720{
4721 mp_limb_t co;
4722 co = mpn_add_n (rp, up, vp, n);
4723 co += mpn_add_1 (rp, rp, n, ci);
4724 return co;
4725}
4726#endif
4727
4728#if HAVE_NATIVE_mpn_sub_nc
4729#define mpn_sub_nc __MPN(sub_nc)
4730__GMP_DECLSPEC mp_limb_t mpn_sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
4731#else
4732static inline mp_limb_t
4733mpn_sub_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4734{
4735 mp_limb_t co;
4736 co = mpn_sub_n (rp, up, vp, n);
4737 co += mpn_sub_1 (rp, rp, n, ci);
4738 return co;
4739}
4740#endif
4741
4742#if TUNE_PROGRAM_BUILD
4743/* Some extras wanted when recompiling some .c files for use by the tune
4744 program. Not part of a normal build.
4745
4746 It's necessary to keep these thresholds as #defines (just to an
4747 identically named variable), since various defaults are established based
4748 on #ifdef in the .c files. For some this is not so (the defaults are
4749 instead established above), but all are done this way for consistency. */
4750
4751#undef MUL_TOOM22_THRESHOLD
4752#define MUL_TOOM22_THRESHOLD mul_toom22_threshold
4753extern mp_size_t mul_toom22_threshold;
4754
4755#undef MUL_TOOM33_THRESHOLD
4756#define MUL_TOOM33_THRESHOLD mul_toom33_threshold
4757extern mp_size_t mul_toom33_threshold;
4758
4759#undef MUL_TOOM44_THRESHOLD
4760#define MUL_TOOM44_THRESHOLD mul_toom44_threshold
4761extern mp_size_t mul_toom44_threshold;
4762
4763#undef MUL_TOOM6H_THRESHOLD
4764#define MUL_TOOM6H_THRESHOLD mul_toom6h_threshold
4765extern mp_size_t mul_toom6h_threshold;
4766
4767#undef MUL_TOOM8H_THRESHOLD
4768#define MUL_TOOM8H_THRESHOLD mul_toom8h_threshold
4769extern mp_size_t mul_toom8h_threshold;
4770
4771#undef MUL_TOOM32_TO_TOOM43_THRESHOLD
4772#define MUL_TOOM32_TO_TOOM43_THRESHOLD mul_toom32_to_toom43_threshold
4773extern mp_size_t mul_toom32_to_toom43_threshold;
4774
4775#undef MUL_TOOM32_TO_TOOM53_THRESHOLD
4776#define MUL_TOOM32_TO_TOOM53_THRESHOLD mul_toom32_to_toom53_threshold
4777extern mp_size_t mul_toom32_to_toom53_threshold;
4778
4779#undef MUL_TOOM42_TO_TOOM53_THRESHOLD
4780#define MUL_TOOM42_TO_TOOM53_THRESHOLD mul_toom42_to_toom53_threshold
4781extern mp_size_t mul_toom42_to_toom53_threshold;
4782
4783#undef MUL_TOOM42_TO_TOOM63_THRESHOLD
4784#define MUL_TOOM42_TO_TOOM63_THRESHOLD mul_toom42_to_toom63_threshold
4785extern mp_size_t mul_toom42_to_toom63_threshold;
4786
4787#undef MUL_TOOM43_TO_TOOM54_THRESHOLD
4788#define MUL_TOOM43_TO_TOOM54_THRESHOLD mul_toom43_to_toom54_threshold;
4789extern mp_size_t mul_toom43_to_toom54_threshold;
4790
4791#undef MUL_FFT_THRESHOLD
4792#define MUL_FFT_THRESHOLD mul_fft_threshold
4793extern mp_size_t mul_fft_threshold;
4794
4795#undef MUL_FFT_MODF_THRESHOLD
4796#define MUL_FFT_MODF_THRESHOLD mul_fft_modf_threshold
4797extern mp_size_t mul_fft_modf_threshold;
4798
4799#undef MUL_FFT_TABLE
4800#define MUL_FFT_TABLE { 0 }
4801
4802#undef MUL_FFT_TABLE3
4803#define MUL_FFT_TABLE3 { {0,0} }
4804
4805/* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should
4806 remain as zero (always use it). */
4807#if ! HAVE_NATIVE_mpn_sqr_basecase
4808#undef SQR_BASECASE_THRESHOLD
4809#define SQR_BASECASE_THRESHOLD sqr_basecase_threshold
4810extern mp_size_t sqr_basecase_threshold;
4811#endif
4812
4813#if TUNE_PROGRAM_BUILD_SQR
4814#undef SQR_TOOM2_THRESHOLD
4815#define SQR_TOOM2_THRESHOLD SQR_TOOM2_MAX_GENERIC
4816#else
4817#undef SQR_TOOM2_THRESHOLD
4818#define SQR_TOOM2_THRESHOLD sqr_toom2_threshold
4819extern mp_size_t sqr_toom2_threshold;
4820#endif
4821
4822#undef SQR_TOOM3_THRESHOLD
4823#define SQR_TOOM3_THRESHOLD sqr_toom3_threshold
4824extern mp_size_t sqr_toom3_threshold;
4825
4826#undef SQR_TOOM4_THRESHOLD
4827#define SQR_TOOM4_THRESHOLD sqr_toom4_threshold
4828extern mp_size_t sqr_toom4_threshold;
4829
4830#undef SQR_TOOM6_THRESHOLD
4831#define SQR_TOOM6_THRESHOLD sqr_toom6_threshold
4832extern mp_size_t sqr_toom6_threshold;
4833
4834#undef SQR_TOOM8_THRESHOLD
4835#define SQR_TOOM8_THRESHOLD sqr_toom8_threshold
4836extern mp_size_t sqr_toom8_threshold;
4837
4838#undef SQR_FFT_THRESHOLD
4839#define SQR_FFT_THRESHOLD sqr_fft_threshold
4840extern mp_size_t sqr_fft_threshold;
4841
4842#undef SQR_FFT_MODF_THRESHOLD
4843#define SQR_FFT_MODF_THRESHOLD sqr_fft_modf_threshold
4844extern mp_size_t sqr_fft_modf_threshold;
4845
4846#undef SQR_FFT_TABLE
4847#define SQR_FFT_TABLE { 0 }
4848
4849#undef SQR_FFT_TABLE3
4850#define SQR_FFT_TABLE3 { {0,0} }
4851
4852#undef MULLO_BASECASE_THRESHOLD
4853#define MULLO_BASECASE_THRESHOLD mullo_basecase_threshold
4854extern mp_size_t mullo_basecase_threshold;
4855
4856#undef MULLO_DC_THRESHOLD
4857#define MULLO_DC_THRESHOLD mullo_dc_threshold
4858extern mp_size_t mullo_dc_threshold;
4859
4860#undef MULLO_MUL_N_THRESHOLD
4861#define MULLO_MUL_N_THRESHOLD mullo_mul_n_threshold
4862extern mp_size_t mullo_mul_n_threshold;
4863
4864#undef SQRLO_BASECASE_THRESHOLD
4865#define SQRLO_BASECASE_THRESHOLD sqrlo_basecase_threshold
4866extern mp_size_t sqrlo_basecase_threshold;
4867
4868#undef SQRLO_DC_THRESHOLD
4869#define SQRLO_DC_THRESHOLD sqrlo_dc_threshold
4870extern mp_size_t sqrlo_dc_threshold;
4871
4872#undef SQRLO_SQR_THRESHOLD
4873#define SQRLO_SQR_THRESHOLD sqrlo_sqr_threshold
4874extern mp_size_t sqrlo_sqr_threshold;
4875
4876#undef MULMID_TOOM42_THRESHOLD
4877#define MULMID_TOOM42_THRESHOLD mulmid_toom42_threshold
4878extern mp_size_t mulmid_toom42_threshold;
4879
4880#undef DIV_QR_2_PI2_THRESHOLD
4881#define DIV_QR_2_PI2_THRESHOLD div_qr_2_pi2_threshold
4882extern mp_size_t div_qr_2_pi2_threshold;
4883
4884#undef DC_DIV_QR_THRESHOLD
4885#define DC_DIV_QR_THRESHOLD dc_div_qr_threshold
4886extern mp_size_t dc_div_qr_threshold;
4887
4888#undef DC_DIVAPPR_Q_THRESHOLD
4889#define DC_DIVAPPR_Q_THRESHOLD dc_divappr_q_threshold
4890extern mp_size_t dc_divappr_q_threshold;
4891
4892#undef DC_BDIV_Q_THRESHOLD
4893#define DC_BDIV_Q_THRESHOLD dc_bdiv_q_threshold
4894extern mp_size_t dc_bdiv_q_threshold;
4895
4896#undef DC_BDIV_QR_THRESHOLD
4897#define DC_BDIV_QR_THRESHOLD dc_bdiv_qr_threshold
4898extern mp_size_t dc_bdiv_qr_threshold;
4899
4900#undef MU_DIV_QR_THRESHOLD
4901#define MU_DIV_QR_THRESHOLD mu_div_qr_threshold
4902extern mp_size_t mu_div_qr_threshold;
4903
4904#undef MU_DIVAPPR_Q_THRESHOLD
4905#define MU_DIVAPPR_Q_THRESHOLD mu_divappr_q_threshold
4906extern mp_size_t mu_divappr_q_threshold;
4907
4908#undef MUPI_DIV_QR_THRESHOLD
4909#define MUPI_DIV_QR_THRESHOLD mupi_div_qr_threshold
4910extern mp_size_t mupi_div_qr_threshold;
4911
4912#undef MU_BDIV_QR_THRESHOLD
4913#define MU_BDIV_QR_THRESHOLD mu_bdiv_qr_threshold
4914extern mp_size_t mu_bdiv_qr_threshold;
4915
4916#undef MU_BDIV_Q_THRESHOLD
4917#define MU_BDIV_Q_THRESHOLD mu_bdiv_q_threshold
4918extern mp_size_t mu_bdiv_q_threshold;
4919
4920#undef INV_MULMOD_BNM1_THRESHOLD
4921#define INV_MULMOD_BNM1_THRESHOLD inv_mulmod_bnm1_threshold
4922extern mp_size_t inv_mulmod_bnm1_threshold;
4923
4924#undef INV_NEWTON_THRESHOLD
4925#define INV_NEWTON_THRESHOLD inv_newton_threshold
4926extern mp_size_t inv_newton_threshold;
4927
4928#undef INV_APPR_THRESHOLD
4929#define INV_APPR_THRESHOLD inv_appr_threshold
4930extern mp_size_t inv_appr_threshold;
4931
4932#undef BINV_NEWTON_THRESHOLD
4933#define BINV_NEWTON_THRESHOLD binv_newton_threshold
4934extern mp_size_t binv_newton_threshold;
4935
4936#undef REDC_1_TO_REDC_2_THRESHOLD
4937#define REDC_1_TO_REDC_2_THRESHOLD redc_1_to_redc_2_threshold
4938extern mp_size_t redc_1_to_redc_2_threshold;
4939
4940#undef REDC_2_TO_REDC_N_THRESHOLD
4941#define REDC_2_TO_REDC_N_THRESHOLD redc_2_to_redc_n_threshold
4942extern mp_size_t redc_2_to_redc_n_threshold;
4943
4944#undef REDC_1_TO_REDC_N_THRESHOLD
4945#define REDC_1_TO_REDC_N_THRESHOLD redc_1_to_redc_n_threshold
4946extern mp_size_t redc_1_to_redc_n_threshold;
4947
4948#undef MATRIX22_STRASSEN_THRESHOLD
4949#define MATRIX22_STRASSEN_THRESHOLD matrix22_strassen_threshold
4950extern mp_size_t matrix22_strassen_threshold;
4951
4952typedef int hgcd2_func_t (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t,
4953 struct hgcd_matrix1 *);
4954extern hgcd2_func_t *hgcd2_func;
4955
4956#undef HGCD_THRESHOLD
4957#define HGCD_THRESHOLD hgcd_threshold
4958extern mp_size_t hgcd_threshold;
4959
4960#undef HGCD_APPR_THRESHOLD
4961#define HGCD_APPR_THRESHOLD hgcd_appr_threshold
4962extern mp_size_t hgcd_appr_threshold;
4963
4964#undef HGCD_REDUCE_THRESHOLD
4965#define HGCD_REDUCE_THRESHOLD hgcd_reduce_threshold
4966extern mp_size_t hgcd_reduce_threshold;
4967
4968#undef GCD_DC_THRESHOLD
4969#define GCD_DC_THRESHOLD gcd_dc_threshold
4970extern mp_size_t gcd_dc_threshold;
4971
4972#undef GCDEXT_DC_THRESHOLD
4973#define GCDEXT_DC_THRESHOLD gcdext_dc_threshold
4974extern mp_size_t gcdext_dc_threshold;
4975
4976#undef DIV_QR_1N_PI1_METHOD
4977#define DIV_QR_1N_PI1_METHOD div_qr_1n_pi1_method
4978extern int div_qr_1n_pi1_method;
4979
4980#undef DIV_QR_1_NORM_THRESHOLD
4981#define DIV_QR_1_NORM_THRESHOLD div_qr_1_norm_threshold
4982extern mp_size_t div_qr_1_norm_threshold;
4983
4984#undef DIV_QR_1_UNNORM_THRESHOLD
4985#define DIV_QR_1_UNNORM_THRESHOLD div_qr_1_unnorm_threshold
4986extern mp_size_t div_qr_1_unnorm_threshold;
4987
4988#undef DIVREM_1_NORM_THRESHOLD
4989#define DIVREM_1_NORM_THRESHOLD divrem_1_norm_threshold
4990extern mp_size_t divrem_1_norm_threshold;
4991
4992#undef DIVREM_1_UNNORM_THRESHOLD
4993#define DIVREM_1_UNNORM_THRESHOLD divrem_1_unnorm_threshold
4994extern mp_size_t divrem_1_unnorm_threshold;
4995
4996#undef MOD_1_NORM_THRESHOLD
4997#define MOD_1_NORM_THRESHOLD mod_1_norm_threshold
4998extern mp_size_t mod_1_norm_threshold;
4999
5000#undef MOD_1_UNNORM_THRESHOLD
5001#define MOD_1_UNNORM_THRESHOLD mod_1_unnorm_threshold
5002extern mp_size_t mod_1_unnorm_threshold;
5003
5004#undef MOD_1_1P_METHOD
5005#define MOD_1_1P_METHOD mod_1_1p_method
5006extern int mod_1_1p_method;
5007
5008#undef MOD_1N_TO_MOD_1_1_THRESHOLD
5009#define MOD_1N_TO_MOD_1_1_THRESHOLD mod_1n_to_mod_1_1_threshold
5010extern mp_size_t mod_1n_to_mod_1_1_threshold;
5011
5012#undef MOD_1U_TO_MOD_1_1_THRESHOLD
5013#define MOD_1U_TO_MOD_1_1_THRESHOLD mod_1u_to_mod_1_1_threshold
5014extern mp_size_t mod_1u_to_mod_1_1_threshold;
5015
5016#undef MOD_1_1_TO_MOD_1_2_THRESHOLD
5017#define MOD_1_1_TO_MOD_1_2_THRESHOLD mod_1_1_to_mod_1_2_threshold
5018extern mp_size_t mod_1_1_to_mod_1_2_threshold;
5019
5020#undef MOD_1_2_TO_MOD_1_4_THRESHOLD
5021#define MOD_1_2_TO_MOD_1_4_THRESHOLD mod_1_2_to_mod_1_4_threshold
5022extern mp_size_t mod_1_2_to_mod_1_4_threshold;
5023
5024#undef PREINV_MOD_1_TO_MOD_1_THRESHOLD
5025#define PREINV_MOD_1_TO_MOD_1_THRESHOLD preinv_mod_1_to_mod_1_threshold
5026extern mp_size_t preinv_mod_1_to_mod_1_threshold;
5027
5028#if ! UDIV_PREINV_ALWAYS
5029#undef DIVREM_2_THRESHOLD
5030#define DIVREM_2_THRESHOLD divrem_2_threshold
5031extern mp_size_t divrem_2_threshold;
5032#endif
5033
5034#undef MULMOD_BNM1_THRESHOLD
5035#define MULMOD_BNM1_THRESHOLD mulmod_bnm1_threshold
5036extern mp_size_t mulmod_bnm1_threshold;
5037
5038#undef SQRMOD_BNM1_THRESHOLD
5039#define SQRMOD_BNM1_THRESHOLD sqrmod_bnm1_threshold
5040extern mp_size_t sqrmod_bnm1_threshold;
5041
5042#undef GET_STR_DC_THRESHOLD
5043#define GET_STR_DC_THRESHOLD get_str_dc_threshold
5044extern mp_size_t get_str_dc_threshold;
5045
5046#undef GET_STR_PRECOMPUTE_THRESHOLD
5047#define GET_STR_PRECOMPUTE_THRESHOLD get_str_precompute_threshold
5048extern mp_size_t get_str_precompute_threshold;
5049
5050#undef SET_STR_DC_THRESHOLD
5051#define SET_STR_DC_THRESHOLD set_str_dc_threshold
5052extern mp_size_t set_str_dc_threshold;
5053
5054#undef SET_STR_PRECOMPUTE_THRESHOLD
5055#define SET_STR_PRECOMPUTE_THRESHOLD set_str_precompute_threshold
5056extern mp_size_t set_str_precompute_threshold;
5057
5058#undef FAC_ODD_THRESHOLD
5059#define FAC_ODD_THRESHOLD fac_odd_threshold
5060extern mp_size_t fac_odd_threshold;
5061
5062#undef FAC_DSC_THRESHOLD
5063#define FAC_DSC_THRESHOLD fac_dsc_threshold
5064extern mp_size_t fac_dsc_threshold;
5065
5066#undef FFT_TABLE_ATTRS
5067#define FFT_TABLE_ATTRS
5068extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
5069#define FFT_TABLE3_SIZE 2000 /* generous space for tuning */
5070extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE];
5071
5072/* Sizes the tune program tests up to, used in a couple of recompilations. */
5073#undef MUL_TOOM22_THRESHOLD_LIMIT
5074#undef MUL_TOOM33_THRESHOLD_LIMIT
5075#undef MULLO_BASECASE_THRESHOLD_LIMIT
5076#undef SQRLO_BASECASE_THRESHOLD_LIMIT
5077#undef SQRLO_DC_THRESHOLD_LIMIT
5078#undef SQR_TOOM3_THRESHOLD_LIMIT
5079#define SQR_TOOM2_MAX_GENERIC 200
5080#define MUL_TOOM22_THRESHOLD_LIMIT 700
5081#define MUL_TOOM33_THRESHOLD_LIMIT 700
5082#define SQR_TOOM3_THRESHOLD_LIMIT 400
5083#define MUL_TOOM44_THRESHOLD_LIMIT 1000
5084#define SQR_TOOM4_THRESHOLD_LIMIT 1000
5085#define MUL_TOOM6H_THRESHOLD_LIMIT 1100
5086#define SQR_TOOM6_THRESHOLD_LIMIT 1100
5087#define MUL_TOOM8H_THRESHOLD_LIMIT 1200
5088#define SQR_TOOM8_THRESHOLD_LIMIT 1200
5089#define MULLO_BASECASE_THRESHOLD_LIMIT 200
5090#define SQRLO_BASECASE_THRESHOLD_LIMIT 200
5091#define SQRLO_DC_THRESHOLD_LIMIT 400
5092#define GET_STR_THRESHOLD_LIMIT 150
5093#define FAC_DSC_THRESHOLD_LIMIT 2048
5094
5095#endif /* TUNE_PROGRAM_BUILD */
5096
5097#if defined (__cplusplus)
5098}
5099#endif
5100
5101/* FIXME: Make these itch functions less conservative. Also consider making
5102 them dependent on just 'an', and compute the allocation directly from 'an'
5103 instead of via n. */
5104
5105/* toom22/toom2: Scratch need is 2*(an + k), k is the recursion depth.
5106 k is ths smallest k such that
5107 ceil(an/2^k) < MUL_TOOM22_THRESHOLD.
5108 which implies that
5109 k = bitsize of floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))
5110 = 1 + floor (log_2 (floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))))
5111*/
5112#define mpn_toom22_mul_itch(an, bn) \
5113 (2 * ((an) + GMP_NUMB_BITS))
5114#define mpn_toom2_sqr_itch(an) \
5115 (2 * ((an) + GMP_NUMB_BITS))
5116
5117/* toom33/toom3: Scratch need is 5an/2 + 10k, k is the recursion depth.
5118 We use 3an + C, so that we can use a smaller constant.
5119 */
5120#define mpn_toom33_mul_itch(an, bn) \
5121 (3 * (an) + GMP_NUMB_BITS)
5122#define mpn_toom3_sqr_itch(an) \
5123 (3 * (an) + GMP_NUMB_BITS)
5124
5125/* toom33/toom3: Scratch need is 8an/3 + 13k, k is the recursion depth.
5126 We use 3an + C, so that we can use a smaller constant.
5127 */
5128#define mpn_toom44_mul_itch(an, bn) \
5129 (3 * (an) + GMP_NUMB_BITS)
5130#define mpn_toom4_sqr_itch(an) \
5131 (3 * (an) + GMP_NUMB_BITS)
5132
5133#define mpn_toom6_sqr_itch(n) \
5134 (((n) - SQR_TOOM6_THRESHOLD)*2 + \
5135 MAX(SQR_TOOM6_THRESHOLD*2 + GMP_NUMB_BITS*6, \
5136 mpn_toom4_sqr_itch(SQR_TOOM6_THRESHOLD)))
5137
5138#define MUL_TOOM6H_MIN \
5139 ((MUL_TOOM6H_THRESHOLD > MUL_TOOM44_THRESHOLD) ? \
5140 MUL_TOOM6H_THRESHOLD : MUL_TOOM44_THRESHOLD)
5141#define mpn_toom6_mul_n_itch(n) \
5142 (((n) - MUL_TOOM6H_MIN)*2 + \
5143 MAX(MUL_TOOM6H_MIN*2 + GMP_NUMB_BITS*6, \
5144 mpn_toom44_mul_itch(MUL_TOOM6H_MIN,MUL_TOOM6H_MIN)))
5145
5146static inline mp_size_t
5147mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
5148 mp_size_t estimatedN;
5149 estimatedN = (an + bn) / (size_t) 10 + 1;
5150 return mpn_toom6_mul_n_itch (estimatedN * 6);
5151}
5152
5153#define mpn_toom8_sqr_itch(n) \
5154 ((((n)*15)>>3) - ((SQR_TOOM8_THRESHOLD*15)>>3) + \
5155 MAX(((SQR_TOOM8_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6, \
5156 mpn_toom6_sqr_itch(SQR_TOOM8_THRESHOLD)))
5157
5158#define MUL_TOOM8H_MIN \
5159 ((MUL_TOOM8H_THRESHOLD > MUL_TOOM6H_MIN) ? \
5160 MUL_TOOM8H_THRESHOLD : MUL_TOOM6H_MIN)
5161#define mpn_toom8_mul_n_itch(n) \
5162 ((((n)*15)>>3) - ((MUL_TOOM8H_MIN*15)>>3) + \
5163 MAX(((MUL_TOOM8H_MIN*15)>>3) + GMP_NUMB_BITS*6, \
5164 mpn_toom6_mul_n_itch(MUL_TOOM8H_MIN)))
5165
5166static inline mp_size_t
5167mpn_toom8h_mul_itch (mp_size_t an, mp_size_t bn) {
5168 mp_size_t estimatedN;
5169 estimatedN = (an + bn) / (size_t) 14 + 1;
5170 return mpn_toom8_mul_n_itch (estimatedN * 8);
5171}
5172
5173static inline mp_size_t
5174mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn)
5175{
5176 mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
5177 mp_size_t itch = 2 * n + 1;
5178
5179 return itch;
5180}
5181
5182static inline mp_size_t
5183mpn_toom42_mul_itch (mp_size_t an, mp_size_t bn)
5184{
5185 mp_size_t n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1;
5186 return 6 * n + 3;
5187}
5188
5189static inline mp_size_t
5190mpn_toom43_mul_itch (mp_size_t an, mp_size_t bn)
5191{
5192 mp_size_t n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3);
5193
5194 return 6*n + 4;
5195}
5196
5197static inline mp_size_t
5198mpn_toom52_mul_itch (mp_size_t an, mp_size_t bn)
5199{
5200 mp_size_t n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1);
5201 return 6*n + 4;
5202}
5203
5204static inline mp_size_t
5205mpn_toom53_mul_itch (mp_size_t an, mp_size_t bn)
5206{
5207 mp_size_t n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);
5208 return 10 * n + 10;
5209}
5210
5211static inline mp_size_t
5212mpn_toom62_mul_itch (mp_size_t an, mp_size_t bn)
5213{
5214 mp_size_t n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1);
5215 return 10 * n + 10;
5216}
5217
5218static inline mp_size_t
5219mpn_toom63_mul_itch (mp_size_t an, mp_size_t bn)
5220{
5221 mp_size_t n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
5222 return 9 * n + 3;
5223}
5224
5225static inline mp_size_t
5226mpn_toom54_mul_itch (mp_size_t an, mp_size_t bn)
5227{
5228 mp_size_t n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4);
5229 return 9 * n + 3;
5230}
5231
5232/* let S(n) = space required for input size n,
5233 then S(n) = 3 floor(n/2) + 1 + S(floor(n/2)). */
5234#define mpn_toom42_mulmid_itch(n) \
5235 (3 * (n) + GMP_NUMB_BITS)
5236
5237#if 0
5238#define mpn_fft_mul mpn_mul_fft_full
5239#else
5240#define mpn_fft_mul mpn_nussbaumer_mul
5241#endif
5242
5243#ifdef __cplusplus
5244
5245/* A little helper for a null-terminated __gmp_allocate_func string.
5246 The destructor ensures it's freed even if an exception is thrown.
5247 The len field is needed by the destructor, and can be used by anyone else
5248 to avoid a second strlen pass over the data.
5249
5250 Since our input is a C string, using strlen is correct. Perhaps it'd be
5251 more C++-ish style to use std::char_traits<char>::length, but char_traits
5252 isn't available in gcc 2.95.4. */
5253
5254class gmp_allocated_string {
5255 public:
5256 char *str;
5257 size_t len;
5258 gmp_allocated_string(char *arg)
5259 {
5260 str = arg;
5261 len = std::strlen (str);
5262 }
5263 ~gmp_allocated_string()
5264 {
5265 (*__gmp_free_func) (str, len+1);
5266 }
5267};
5268
5269std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char);
5270int __gmp_istream_set_base (std::istream &, char &, bool &, bool &);
5271void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);
5272void __gmp_doprnt_params_from_ios (struct doprnt_params_t *, std::ios &);
5273std::ostream& __gmp_doprnt_integer_ostream (std::ostream &, struct doprnt_params_t *, char *);
5274extern const struct doprnt_funs_t __gmp_asprintf_funs_noformat;
5275
5276#endif /* __cplusplus */
5277
5278#endif /* __GMP_IMPL_H__ */