blob: 42bb41122973fa68f3985be1a2907fe5feac04c7 [file] [log] [blame]
Austin Schuhdace2a62020-08-18 10:56:48 -07001/* Reference mpn functions, designed to be simple, portable and independent
2 of the normal gmp code. Speed isn't a consideration.
3
4Copyright 1996-2009, 2011-2014 Free Software Foundation, Inc.
5
6This file is part of the GNU MP Library test suite.
7
8The GNU MP Library test suite is free software; you can redistribute it
9and/or modify it under the terms of the GNU General Public License as
10published by the Free Software Foundation; either version 3 of the License,
11or (at your option) any later version.
12
13The GNU MP Library test suite is distributed in the hope that it will be
14useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
16Public License for more details.
17
18You should have received a copy of the GNU General Public License along with
19the GNU MP Library test suite. If not, see https://www.gnu.org/licenses/. */
20
21
22/* Most routines have assertions representing what the mpn routines are
23 supposed to accept. Many of these reference routines do sensible things
24 outside these ranges (eg. for size==0), but the assertions are present to
25 pick up bad parameters passed here that are about to be passed the same
26 to a real mpn routine being compared. */
27
28/* always do assertion checking */
29#define WANT_ASSERT 1
30
31#include <stdio.h> /* for NULL */
32#include <stdlib.h> /* for malloc */
33
34#include "gmp-impl.h"
35#include "longlong.h"
36
37#include "tests.h"
38
39
40
41/* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes
42 in bytes. */
43int
44byte_overlap_p (const void *v_xp, mp_size_t xsize,
45 const void *v_yp, mp_size_t ysize)
46{
47 const char *xp = (const char *) v_xp;
48 const char *yp = (const char *) v_yp;
49
50 ASSERT (xsize >= 0);
51 ASSERT (ysize >= 0);
52
53 /* no wraparounds */
54 ASSERT (xp+xsize >= xp);
55 ASSERT (yp+ysize >= yp);
56
57 if (xp + xsize <= yp)
58 return 0;
59
60 if (yp + ysize <= xp)
61 return 0;
62
63 return 1;
64}
65
66/* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */
67int
68refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize)
69{
70 return byte_overlap_p (xp, xsize * GMP_LIMB_BYTES,
71 yp, ysize * GMP_LIMB_BYTES);
72}
73
74/* Check overlap for a routine defined to work low to high. */
75int
76refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
77{
78 return (dst <= src || ! refmpn_overlap_p (dst, size, src, size));
79}
80
81/* Check overlap for a routine defined to work high to low. */
82int
83refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
84{
85 return (dst >= src || ! refmpn_overlap_p (dst, size, src, size));
86}
87
88/* Check overlap for a standard routine requiring equal or separate. */
89int
90refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
91{
92 return (dst == src || ! refmpn_overlap_p (dst, size, src, size));
93}
94int
95refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2,
96 mp_size_t size)
97{
98 return (refmpn_overlap_fullonly_p (dst, src1, size)
99 && refmpn_overlap_fullonly_p (dst, src2, size));
100}
101
102
103mp_ptr
104refmpn_malloc_limbs (mp_size_t size)
105{
106 mp_ptr p;
107 ASSERT (size >= 0);
108 if (size == 0)
109 size = 1;
110 p = (mp_ptr) malloc ((size_t) (size * GMP_LIMB_BYTES));
111 ASSERT (p != NULL);
112 return p;
113}
114
115/* Free limbs allocated by refmpn_malloc_limbs. NOTE: Can't free
116 * memory allocated by refmpn_malloc_limbs_aligned. */
117void
118refmpn_free_limbs (mp_ptr p)
119{
120 free (p);
121}
122
123mp_ptr
124refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size)
125{
126 mp_ptr p;
127 p = refmpn_malloc_limbs (size);
128 refmpn_copyi (p, ptr, size);
129 return p;
130}
131
132/* malloc n limbs on a multiple of m bytes boundary */
133mp_ptr
134refmpn_malloc_limbs_aligned (mp_size_t n, size_t m)
135{
136 return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m);
137}
138
139
140void
141refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value)
142{
143 mp_size_t i;
144 ASSERT (size >= 0);
145 for (i = 0; i < size; i++)
146 ptr[i] = value;
147}
148
149void
150refmpn_zero (mp_ptr ptr, mp_size_t size)
151{
152 refmpn_fill (ptr, size, CNST_LIMB(0));
153}
154
155void
156refmpn_zero_extend (mp_ptr ptr, mp_size_t oldsize, mp_size_t newsize)
157{
158 ASSERT (newsize >= oldsize);
159 refmpn_zero (ptr+oldsize, newsize-oldsize);
160}
161
162int
163refmpn_zero_p (mp_srcptr ptr, mp_size_t size)
164{
165 mp_size_t i;
166 for (i = 0; i < size; i++)
167 if (ptr[i] != 0)
168 return 0;
169 return 1;
170}
171
172mp_size_t
173refmpn_normalize (mp_srcptr ptr, mp_size_t size)
174{
175 ASSERT (size >= 0);
176 while (size > 0 && ptr[size-1] == 0)
177 size--;
178 return size;
179}
180
181/* the highest one bit in x */
182mp_limb_t
183refmpn_msbone (mp_limb_t x)
184{
185 mp_limb_t n = (mp_limb_t) 1 << (GMP_LIMB_BITS-1);
186
187 while (n != 0)
188 {
189 if (x & n)
190 break;
191 n >>= 1;
192 }
193 return n;
194}
195
196/* a mask of the highest one bit plus and all bits below */
197mp_limb_t
198refmpn_msbone_mask (mp_limb_t x)
199{
200 if (x == 0)
201 return 0;
202
203 return (refmpn_msbone (x) << 1) - 1;
204}
205
206/* How many digits in the given base will fit in a limb.
207 Notice that the product b is allowed to be equal to the limit
208 2^GMP_NUMB_BITS, this ensures the result for base==2 will be
209 GMP_NUMB_BITS (and similarly other powers of 2). */
210int
211refmpn_chars_per_limb (int base)
212{
213 mp_limb_t limit[2], b[2];
214 int chars_per_limb;
215
216 ASSERT (base >= 2);
217
218 limit[0] = 0; /* limit = 2^GMP_NUMB_BITS */
219 limit[1] = 1;
220 b[0] = 1; /* b = 1 */
221 b[1] = 0;
222
223 chars_per_limb = 0;
224 for (;;)
225 {
226 if (refmpn_mul_1 (b, b, (mp_size_t) 2, (mp_limb_t) base))
227 break;
228 if (refmpn_cmp (b, limit, (mp_size_t) 2) > 0)
229 break;
230 chars_per_limb++;
231 }
232 return chars_per_limb;
233}
234
235/* The biggest value base**n which fits in GMP_NUMB_BITS. */
236mp_limb_t
237refmpn_big_base (int base)
238{
239 int chars_per_limb = refmpn_chars_per_limb (base);
240 int i;
241 mp_limb_t bb;
242
243 ASSERT (base >= 2);
244 bb = 1;
245 for (i = 0; i < chars_per_limb; i++)
246 bb *= base;
247 return bb;
248}
249
250
251void
252refmpn_setbit (mp_ptr ptr, unsigned long bit)
253{
254 ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS);
255}
256
257void
258refmpn_clrbit (mp_ptr ptr, unsigned long bit)
259{
260 ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS));
261}
262
263#define REFMPN_TSTBIT(ptr,bit) \
264 (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0)
265
266int
267refmpn_tstbit (mp_srcptr ptr, unsigned long bit)
268{
269 return REFMPN_TSTBIT (ptr, bit);
270}
271
272unsigned long
273refmpn_scan0 (mp_srcptr ptr, unsigned long bit)
274{
275 while (REFMPN_TSTBIT (ptr, bit) != 0)
276 bit++;
277 return bit;
278}
279
280unsigned long
281refmpn_scan1 (mp_srcptr ptr, unsigned long bit)
282{
283 while (REFMPN_TSTBIT (ptr, bit) == 0)
284 bit++;
285 return bit;
286}
287
288void
289refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size)
290{
291 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
292 refmpn_copyi (rp, sp, size);
293}
294
295void
296refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size)
297{
298 mp_size_t i;
299
300 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
301 ASSERT (size >= 0);
302
303 for (i = 0; i < size; i++)
304 rp[i] = sp[i];
305}
306
307void
308refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size)
309{
310 mp_size_t i;
311
312 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
313 ASSERT (size >= 0);
314
315 for (i = size-1; i >= 0; i--)
316 rp[i] = sp[i];
317}
318
319/* Copy {xp,xsize} to {wp,wsize}. If x is shorter, then pad w with low
320 zeros to wsize. If x is longer, then copy just the high wsize limbs. */
321void
322refmpn_copy_extend (mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize)
323{
324 ASSERT (wsize >= 0);
325 ASSERT (xsize >= 0);
326
327 /* high part of x if x bigger than w */
328 if (xsize > wsize)
329 {
330 xp += xsize - wsize;
331 xsize = wsize;
332 }
333
334 refmpn_copy (wp + wsize-xsize, xp, xsize);
335 refmpn_zero (wp, wsize-xsize);
336}
337
338int
339refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
340{
341 mp_size_t i;
342
343 ASSERT (size >= 1);
344 ASSERT_MPN (xp, size);
345 ASSERT_MPN (yp, size);
346
347 for (i = size-1; i >= 0; i--)
348 {
349 if (xp[i] > yp[i]) return 1;
350 if (xp[i] < yp[i]) return -1;
351 }
352 return 0;
353}
354
355int
356refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
357{
358 if (size == 0)
359 return 0;
360 else
361 return refmpn_cmp (xp, yp, size);
362}
363
364int
365refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize,
366 mp_srcptr yp, mp_size_t ysize)
367{
368 int opp, cmp;
369
370 ASSERT_MPN (xp, xsize);
371 ASSERT_MPN (yp, ysize);
372
373 opp = (xsize < ysize);
374 if (opp)
375 MPN_SRCPTR_SWAP (xp,xsize, yp,ysize);
376
377 if (! refmpn_zero_p (xp+ysize, xsize-ysize))
378 cmp = 1;
379 else
380 cmp = refmpn_cmp (xp, yp, ysize);
381
382 return (opp ? -cmp : cmp);
383}
384
385int
386refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
387{
388 mp_size_t i;
389 ASSERT (size >= 0);
390
391 for (i = 0; i < size; i++)
392 if (xp[i] != yp[i])
393 return 0;
394 return 1;
395}
396
397
398#define LOGOPS(operation) \
399 { \
400 mp_size_t i; \
401 \
402 ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \
403 ASSERT (size >= 1); \
404 ASSERT_MPN (s1p, size); \
405 ASSERT_MPN (s2p, size); \
406 \
407 for (i = 0; i < size; i++) \
408 rp[i] = operation; \
409 }
410
411void
412refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
413{
414 LOGOPS (s1p[i] & s2p[i]);
415}
416void
417refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
418{
419 LOGOPS (s1p[i] & ~s2p[i]);
420}
421void
422refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
423{
424 LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK);
425}
426void
427refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
428{
429 LOGOPS (s1p[i] | s2p[i]);
430}
431void
432refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
433{
434 LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK));
435}
436void
437refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
438{
439 LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK);
440}
441void
442refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
443{
444 LOGOPS (s1p[i] ^ s2p[i]);
445}
446void
447refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
448{
449 LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK);
450}
451
452
453/* set *dh,*dl to mh:ml - sh:sl, in full limbs */
454void
455refmpn_sub_ddmmss (mp_limb_t *dh, mp_limb_t *dl,
456 mp_limb_t mh, mp_limb_t ml, mp_limb_t sh, mp_limb_t sl)
457{
458 *dl = ml - sl;
459 *dh = mh - sh - (ml < sl);
460}
461
462
463/* set *w to x+y, return 0 or 1 carry */
464mp_limb_t
465ref_addc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
466{
467 mp_limb_t sum, cy;
468
469 ASSERT_LIMB (x);
470 ASSERT_LIMB (y);
471
472 sum = x + y;
473#if GMP_NAIL_BITS == 0
474 *w = sum;
475 cy = (sum < x);
476#else
477 *w = sum & GMP_NUMB_MASK;
478 cy = (sum >> GMP_NUMB_BITS);
479#endif
480 return cy;
481}
482
483/* set *w to x-y, return 0 or 1 borrow */
484mp_limb_t
485ref_subc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
486{
487 mp_limb_t diff, cy;
488
489 ASSERT_LIMB (x);
490 ASSERT_LIMB (y);
491
492 diff = x - y;
493#if GMP_NAIL_BITS == 0
494 *w = diff;
495 cy = (diff > x);
496#else
497 *w = diff & GMP_NUMB_MASK;
498 cy = (diff >> GMP_NUMB_BITS) & 1;
499#endif
500 return cy;
501}
502
503/* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */
504mp_limb_t
505adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
506{
507 mp_limb_t r;
508
509 ASSERT_LIMB (x);
510 ASSERT_LIMB (y);
511 ASSERT (c == 0 || c == 1);
512
513 r = ref_addc_limb (w, x, y);
514 return r + ref_addc_limb (w, *w, c);
515}
516
517/* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */
518mp_limb_t
519sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
520{
521 mp_limb_t r;
522
523 ASSERT_LIMB (x);
524 ASSERT_LIMB (y);
525 ASSERT (c == 0 || c == 1);
526
527 r = ref_subc_limb (w, x, y);
528 return r + ref_subc_limb (w, *w, c);
529}
530
531
532#define AORS_1(operation) \
533 { \
534 mp_size_t i; \
535 \
536 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \
537 ASSERT (size >= 1); \
538 ASSERT_MPN (sp, size); \
539 ASSERT_LIMB (n); \
540 \
541 for (i = 0; i < size; i++) \
542 n = operation (&rp[i], sp[i], n); \
543 return n; \
544 }
545
546mp_limb_t
547refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
548{
549 AORS_1 (ref_addc_limb);
550}
551mp_limb_t
552refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
553{
554 AORS_1 (ref_subc_limb);
555}
556
557#define AORS_NC(operation) \
558 { \
559 mp_size_t i; \
560 \
561 ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \
562 ASSERT (carry == 0 || carry == 1); \
563 ASSERT (size >= 1); \
564 ASSERT_MPN (s1p, size); \
565 ASSERT_MPN (s2p, size); \
566 \
567 for (i = 0; i < size; i++) \
568 carry = operation (&rp[i], s1p[i], s2p[i], carry); \
569 return carry; \
570 }
571
572mp_limb_t
573refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
574 mp_limb_t carry)
575{
576 AORS_NC (adc);
577}
578mp_limb_t
579refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
580 mp_limb_t carry)
581{
582 AORS_NC (sbb);
583}
584
585
586mp_limb_t
587refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
588{
589 return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0));
590}
591mp_limb_t
592refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
593{
594 return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0));
595}
596
597mp_limb_t
598refmpn_cnd_add_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
599{
600 if (cnd != 0)
601 return refmpn_add_n (rp, s1p, s2p, size);
602 else
603 {
604 refmpn_copyi (rp, s1p, size);
605 return 0;
606 }
607}
608mp_limb_t
609refmpn_cnd_sub_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
610{
611 if (cnd != 0)
612 return refmpn_sub_n (rp, s1p, s2p, size);
613 else
614 {
615 refmpn_copyi (rp, s1p, size);
616 return 0;
617 }
618}
619
620
621#define AORS_ERR1_N(operation) \
622 { \
623 mp_size_t i; \
624 mp_limb_t carry2; \
625 \
626 ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \
627 ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \
628 ASSERT (! refmpn_overlap_p (rp, size, yp, size)); \
629 ASSERT (! refmpn_overlap_p (ep, 2, s1p, size)); \
630 ASSERT (! refmpn_overlap_p (ep, 2, s2p, size)); \
631 ASSERT (! refmpn_overlap_p (ep, 2, yp, size)); \
632 ASSERT (! refmpn_overlap_p (ep, 2, rp, size)); \
633 \
634 ASSERT (carry == 0 || carry == 1); \
635 ASSERT (size >= 1); \
636 ASSERT_MPN (s1p, size); \
637 ASSERT_MPN (s2p, size); \
638 ASSERT_MPN (yp, size); \
639 \
640 ep[0] = ep[1] = CNST_LIMB(0); \
641 \
642 for (i = 0; i < size; i++) \
643 { \
644 carry = operation (&rp[i], s1p[i], s2p[i], carry); \
645 if (carry == 1) \
646 { \
647 carry2 = ref_addc_limb (&ep[0], ep[0], yp[size - 1 - i]); \
648 carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \
649 ASSERT (carry2 == 0); \
650 } \
651 } \
652 return carry; \
653 }
654
655mp_limb_t
656refmpn_add_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
657 mp_ptr ep, mp_srcptr yp,
658 mp_size_t size, mp_limb_t carry)
659{
660 AORS_ERR1_N (adc);
661}
662mp_limb_t
663refmpn_sub_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
664 mp_ptr ep, mp_srcptr yp,
665 mp_size_t size, mp_limb_t carry)
666{
667 AORS_ERR1_N (sbb);
668}
669
670
671#define AORS_ERR2_N(operation) \
672 { \
673 mp_size_t i; \
674 mp_limb_t carry2; \
675 \
676 ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \
677 ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \
678 ASSERT (! refmpn_overlap_p (rp, size, y1p, size)); \
679 ASSERT (! refmpn_overlap_p (rp, size, y2p, size)); \
680 ASSERT (! refmpn_overlap_p (ep, 4, s1p, size)); \
681 ASSERT (! refmpn_overlap_p (ep, 4, s2p, size)); \
682 ASSERT (! refmpn_overlap_p (ep, 4, y1p, size)); \
683 ASSERT (! refmpn_overlap_p (ep, 4, y2p, size)); \
684 ASSERT (! refmpn_overlap_p (ep, 4, rp, size)); \
685 \
686 ASSERT (carry == 0 || carry == 1); \
687 ASSERT (size >= 1); \
688 ASSERT_MPN (s1p, size); \
689 ASSERT_MPN (s2p, size); \
690 ASSERT_MPN (y1p, size); \
691 ASSERT_MPN (y2p, size); \
692 \
693 ep[0] = ep[1] = CNST_LIMB(0); \
694 ep[2] = ep[3] = CNST_LIMB(0); \
695 \
696 for (i = 0; i < size; i++) \
697 { \
698 carry = operation (&rp[i], s1p[i], s2p[i], carry); \
699 if (carry == 1) \
700 { \
701 carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]); \
702 carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \
703 ASSERT (carry2 == 0); \
704 carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]); \
705 carry2 = ref_addc_limb (&ep[3], ep[3], carry2); \
706 ASSERT (carry2 == 0); \
707 } \
708 } \
709 return carry; \
710 }
711
712mp_limb_t
713refmpn_add_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
714 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
715 mp_size_t size, mp_limb_t carry)
716{
717 AORS_ERR2_N (adc);
718}
719mp_limb_t
720refmpn_sub_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
721 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
722 mp_size_t size, mp_limb_t carry)
723{
724 AORS_ERR2_N (sbb);
725}
726
727
728#define AORS_ERR3_N(operation) \
729 { \
730 mp_size_t i; \
731 mp_limb_t carry2; \
732 \
733 ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \
734 ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \
735 ASSERT (! refmpn_overlap_p (rp, size, y1p, size)); \
736 ASSERT (! refmpn_overlap_p (rp, size, y2p, size)); \
737 ASSERT (! refmpn_overlap_p (rp, size, y3p, size)); \
738 ASSERT (! refmpn_overlap_p (ep, 6, s1p, size)); \
739 ASSERT (! refmpn_overlap_p (ep, 6, s2p, size)); \
740 ASSERT (! refmpn_overlap_p (ep, 6, y1p, size)); \
741 ASSERT (! refmpn_overlap_p (ep, 6, y2p, size)); \
742 ASSERT (! refmpn_overlap_p (ep, 6, y3p, size)); \
743 ASSERT (! refmpn_overlap_p (ep, 6, rp, size)); \
744 \
745 ASSERT (carry == 0 || carry == 1); \
746 ASSERT (size >= 1); \
747 ASSERT_MPN (s1p, size); \
748 ASSERT_MPN (s2p, size); \
749 ASSERT_MPN (y1p, size); \
750 ASSERT_MPN (y2p, size); \
751 ASSERT_MPN (y3p, size); \
752 \
753 ep[0] = ep[1] = CNST_LIMB(0); \
754 ep[2] = ep[3] = CNST_LIMB(0); \
755 ep[4] = ep[5] = CNST_LIMB(0); \
756 \
757 for (i = 0; i < size; i++) \
758 { \
759 carry = operation (&rp[i], s1p[i], s2p[i], carry); \
760 if (carry == 1) \
761 { \
762 carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]); \
763 carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \
764 ASSERT (carry2 == 0); \
765 carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]); \
766 carry2 = ref_addc_limb (&ep[3], ep[3], carry2); \
767 ASSERT (carry2 == 0); \
768 carry2 = ref_addc_limb (&ep[4], ep[4], y3p[size - 1 - i]); \
769 carry2 = ref_addc_limb (&ep[5], ep[5], carry2); \
770 ASSERT (carry2 == 0); \
771 } \
772 } \
773 return carry; \
774 }
775
776mp_limb_t
777refmpn_add_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
778 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
779 mp_size_t size, mp_limb_t carry)
780{
781 AORS_ERR3_N (adc);
782}
783mp_limb_t
784refmpn_sub_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
785 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
786 mp_size_t size, mp_limb_t carry)
787{
788 AORS_ERR3_N (sbb);
789}
790
791
792mp_limb_t
793refmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
794 mp_size_t n, unsigned int s)
795{
796 mp_limb_t cy;
797 mp_ptr tp;
798
799 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
800 ASSERT (n >= 1);
801 ASSERT (0 < s && s < GMP_NUMB_BITS);
802 ASSERT_MPN (up, n);
803 ASSERT_MPN (vp, n);
804
805 tp = refmpn_malloc_limbs (n);
806 cy = refmpn_lshift (tp, vp, n, s);
807 cy += refmpn_add_n (rp, up, tp, n);
808 free (tp);
809 return cy;
810}
811mp_limb_t
812refmpn_addlsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
813{
814 return refmpn_addlsh_n (rp, up, vp, n, 1);
815}
816mp_limb_t
817refmpn_addlsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
818{
819 return refmpn_addlsh_n (rp, up, vp, n, 2);
820}
821mp_limb_t
822refmpn_addlsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
823{
824 return refmpn_addlsh_n (rp, rp, vp, n, s);
825}
826mp_limb_t
827refmpn_addlsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
828{
829 return refmpn_addlsh_n (rp, rp, vp, n, 1);
830}
831mp_limb_t
832refmpn_addlsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
833{
834 return refmpn_addlsh_n (rp, rp, vp, n, 2);
835}
836mp_limb_t
837refmpn_addlsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
838{
839 return refmpn_addlsh_n (rp, vp, rp, n, s);
840}
841mp_limb_t
842refmpn_addlsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
843{
844 return refmpn_addlsh_n (rp, vp, rp, n, 1);
845}
846mp_limb_t
847refmpn_addlsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
848{
849 return refmpn_addlsh_n (rp, vp, rp, n, 2);
850}
851mp_limb_t
852refmpn_addlsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
853 mp_size_t n, unsigned int s, mp_limb_t carry)
854{
855 mp_limb_t cy;
856
857 ASSERT (carry <= (CNST_LIMB(1) << s));
858
859 cy = refmpn_addlsh_n (rp, up, vp, n, s);
860 cy += refmpn_add_1 (rp, rp, n, carry);
861 return cy;
862}
863mp_limb_t
864refmpn_addlsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
865{
866 return refmpn_addlsh_nc (rp, up, vp, n, 1, carry);
867}
868mp_limb_t
869refmpn_addlsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
870{
871 return refmpn_addlsh_nc (rp, up, vp, n, 2, carry);
872}
873
874mp_limb_t
875refmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
876 mp_size_t n, unsigned int s)
877{
878 mp_limb_t cy;
879 mp_ptr tp;
880
881 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
882 ASSERT (n >= 1);
883 ASSERT (0 < s && s < GMP_NUMB_BITS);
884 ASSERT_MPN (up, n);
885 ASSERT_MPN (vp, n);
886
887 tp = refmpn_malloc_limbs (n);
888 cy = mpn_lshift (tp, vp, n, s);
889 cy += mpn_sub_n (rp, up, tp, n);
890 free (tp);
891 return cy;
892}
893mp_limb_t
894refmpn_sublsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
895{
896 return refmpn_sublsh_n (rp, up, vp, n, 1);
897}
898mp_limb_t
899refmpn_sublsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
900{
901 return refmpn_sublsh_n (rp, up, vp, n, 2);
902}
903mp_limb_t
904refmpn_sublsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
905{
906 return refmpn_sublsh_n (rp, rp, vp, n, s);
907}
908mp_limb_t
909refmpn_sublsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
910{
911 return refmpn_sublsh_n (rp, rp, vp, n, 1);
912}
913mp_limb_t
914refmpn_sublsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
915{
916 return refmpn_sublsh_n (rp, rp, vp, n, 2);
917}
918mp_limb_t
919refmpn_sublsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
920{
921 return refmpn_sublsh_n (rp, vp, rp, n, s);
922}
923mp_limb_t
924refmpn_sublsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
925{
926 return refmpn_sublsh_n (rp, vp, rp, n, 1);
927}
928mp_limb_t
929refmpn_sublsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
930{
931 return refmpn_sublsh_n (rp, vp, rp, n, 2);
932}
933mp_limb_t
934refmpn_sublsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
935 mp_size_t n, unsigned int s, mp_limb_t carry)
936{
937 mp_limb_t cy;
938
939 ASSERT (carry <= (CNST_LIMB(1) << s));
940
941 cy = refmpn_sublsh_n (rp, up, vp, n, s);
942 cy += refmpn_sub_1 (rp, rp, n, carry);
943 return cy;
944}
945mp_limb_t
946refmpn_sublsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
947{
948 return refmpn_sublsh_nc (rp, up, vp, n, 1, carry);
949}
950mp_limb_t
951refmpn_sublsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
952{
953 return refmpn_sublsh_nc (rp, up, vp, n, 2, carry);
954}
955
956mp_limb_signed_t
957refmpn_rsblsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
958 mp_size_t n, unsigned int s)
959{
960 mp_limb_signed_t cy;
961 mp_ptr tp;
962
963 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
964 ASSERT (n >= 1);
965 ASSERT (0 < s && s < GMP_NUMB_BITS);
966 ASSERT_MPN (up, n);
967 ASSERT_MPN (vp, n);
968
969 tp = refmpn_malloc_limbs (n);
970 cy = mpn_lshift (tp, vp, n, s);
971 cy -= mpn_sub_n (rp, tp, up, n);
972 free (tp);
973 return cy;
974}
975mp_limb_signed_t
976refmpn_rsblsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
977{
978 return refmpn_rsblsh_n (rp, up, vp, n, 1);
979}
980mp_limb_signed_t
981refmpn_rsblsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
982{
983 return refmpn_rsblsh_n (rp, up, vp, n, 2);
984}
985mp_limb_signed_t
986refmpn_rsblsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
987 mp_size_t n, unsigned int s, mp_limb_signed_t carry)
988{
989 mp_limb_signed_t cy;
990
991 ASSERT (carry == -1 || (carry >> s) == 0);
992
993 cy = refmpn_rsblsh_n (rp, up, vp, n, s);
994 if (carry > 0)
995 cy += refmpn_add_1 (rp, rp, n, carry);
996 else
997 cy -= refmpn_sub_1 (rp, rp, n, -carry);
998 return cy;
999}
1000mp_limb_signed_t
1001refmpn_rsblsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
1002{
1003 return refmpn_rsblsh_nc (rp, up, vp, n, 1, carry);
1004}
1005mp_limb_signed_t
1006refmpn_rsblsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
1007{
1008 return refmpn_rsblsh_nc (rp, up, vp, n, 2, carry);
1009}
1010
1011mp_limb_t
1012refmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
1013{
1014 mp_limb_t cya, cys;
1015
1016 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
1017 ASSERT (n >= 1);
1018 ASSERT_MPN (up, n);
1019 ASSERT_MPN (vp, n);
1020
1021 cya = mpn_add_n (rp, up, vp, n);
1022 cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
1023 rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
1024 return cys;
1025}
1026mp_limb_t
1027refmpn_rsh1sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
1028{
1029 mp_limb_t cya, cys;
1030
1031 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
1032 ASSERT (n >= 1);
1033 ASSERT_MPN (up, n);
1034 ASSERT_MPN (vp, n);
1035
1036 cya = mpn_sub_n (rp, up, vp, n);
1037 cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
1038 rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
1039 return cys;
1040}
1041
1042/* Twos complement, return borrow. */
1043mp_limb_t
1044refmpn_neg (mp_ptr dst, mp_srcptr src, mp_size_t size)
1045{
1046 mp_ptr zeros;
1047 mp_limb_t ret;
1048
1049 ASSERT (size >= 1);
1050
1051 zeros = refmpn_malloc_limbs (size);
1052 refmpn_fill (zeros, size, CNST_LIMB(0));
1053 ret = refmpn_sub_n (dst, zeros, src, size);
1054 free (zeros);
1055 return ret;
1056}
1057
1058
1059#define AORS(aors_n, aors_1) \
1060 { \
1061 mp_limb_t c; \
1062 ASSERT (s1size >= s2size); \
1063 ASSERT (s2size >= 1); \
1064 c = aors_n (rp, s1p, s2p, s2size); \
1065 if (s1size-s2size != 0) \
1066 c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c); \
1067 return c; \
1068 }
1069mp_limb_t
1070refmpn_add (mp_ptr rp,
1071 mp_srcptr s1p, mp_size_t s1size,
1072 mp_srcptr s2p, mp_size_t s2size)
1073{
1074 AORS (refmpn_add_n, refmpn_add_1);
1075}
1076mp_limb_t
1077refmpn_sub (mp_ptr rp,
1078 mp_srcptr s1p, mp_size_t s1size,
1079 mp_srcptr s2p, mp_size_t s2size)
1080{
1081 AORS (refmpn_sub_n, refmpn_sub_1);
1082}
1083
1084
1085#define SHIFTHIGH(x) ((x) << GMP_LIMB_BITS/2)
1086#define SHIFTLOW(x) ((x) >> GMP_LIMB_BITS/2)
1087
1088#define LOWMASK (((mp_limb_t) 1 << GMP_LIMB_BITS/2)-1)
1089#define HIGHMASK SHIFTHIGH(LOWMASK)
1090
1091#define LOWPART(x) ((x) & LOWMASK)
1092#define HIGHPART(x) SHIFTLOW((x) & HIGHMASK)
1093
1094/* Set return:*lo to x*y, using full limbs not nails. */
1095mp_limb_t
1096refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y)
1097{
1098 mp_limb_t hi, s;
1099
1100 *lo = LOWPART(x) * LOWPART(y);
1101 hi = HIGHPART(x) * HIGHPART(y);
1102
1103 s = LOWPART(x) * HIGHPART(y);
1104 hi += HIGHPART(s);
1105 s = SHIFTHIGH(LOWPART(s));
1106 *lo += s;
1107 hi += (*lo < s);
1108
1109 s = HIGHPART(x) * LOWPART(y);
1110 hi += HIGHPART(s);
1111 s = SHIFTHIGH(LOWPART(s));
1112 *lo += s;
1113 hi += (*lo < s);
1114
1115 return hi;
1116}
1117
1118mp_limb_t
1119refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo)
1120{
1121 return refmpn_umul_ppmm (lo, x, y);
1122}
1123
1124mp_limb_t
1125refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier,
1126 mp_limb_t carry)
1127{
1128 mp_size_t i;
1129 mp_limb_t hi, lo;
1130
1131 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
1132 ASSERT (size >= 1);
1133 ASSERT_MPN (sp, size);
1134 ASSERT_LIMB (multiplier);
1135 ASSERT_LIMB (carry);
1136
1137 multiplier <<= GMP_NAIL_BITS;
1138 for (i = 0; i < size; i++)
1139 {
1140 hi = refmpn_umul_ppmm (&lo, sp[i], multiplier);
1141 lo >>= GMP_NAIL_BITS;
1142 ASSERT_NOCARRY (ref_addc_limb (&hi, hi, ref_addc_limb (&lo, lo, carry)));
1143 rp[i] = lo;
1144 carry = hi;
1145 }
1146 return carry;
1147}
1148
1149mp_limb_t
1150refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
1151{
1152 return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
1153}
1154
1155
1156mp_limb_t
1157refmpn_mul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
1158 mp_srcptr mult, mp_size_t msize)
1159{
1160 mp_ptr src_copy;
1161 mp_limb_t ret;
1162 mp_size_t i;
1163
1164 ASSERT (refmpn_overlap_fullonly_p (dst, src, size));
1165 ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
1166 ASSERT (size >= msize);
1167 ASSERT_MPN (mult, msize);
1168
1169 /* in case dst==src */
1170 src_copy = refmpn_malloc_limbs (size);
1171 refmpn_copyi (src_copy, src, size);
1172 src = src_copy;
1173
1174 dst[size] = refmpn_mul_1 (dst, src, size, mult[0]);
1175 for (i = 1; i < msize-1; i++)
1176 dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1177 ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1178
1179 free (src_copy);
1180 return ret;
1181}
1182
1183mp_limb_t
1184refmpn_mul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1185{
1186 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 2);
1187}
1188mp_limb_t
1189refmpn_mul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1190{
1191 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 3);
1192}
1193mp_limb_t
1194refmpn_mul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1195{
1196 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4);
1197}
1198mp_limb_t
1199refmpn_mul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1200{
1201 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 5);
1202}
1203mp_limb_t
1204refmpn_mul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1205{
1206 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 6);
1207}
1208
1209#define AORSMUL_1C(operation_n) \
1210 { \
1211 mp_ptr p; \
1212 mp_limb_t ret; \
1213 \
1214 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \
1215 \
1216 p = refmpn_malloc_limbs (size); \
1217 ret = refmpn_mul_1c (p, sp, size, multiplier, carry); \
1218 ret += operation_n (rp, rp, p, size); \
1219 \
1220 free (p); \
1221 return ret; \
1222 }
1223
1224mp_limb_t
1225refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1226 mp_limb_t multiplier, mp_limb_t carry)
1227{
1228 AORSMUL_1C (refmpn_add_n);
1229}
1230mp_limb_t
1231refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1232 mp_limb_t multiplier, mp_limb_t carry)
1233{
1234 AORSMUL_1C (refmpn_sub_n);
1235}
1236
1237
1238mp_limb_t
1239refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
1240{
1241 return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
1242}
1243mp_limb_t
1244refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
1245{
1246 return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
1247}
1248
1249
1250mp_limb_t
1251refmpn_addmul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
1252 mp_srcptr mult, mp_size_t msize)
1253{
1254 mp_ptr src_copy;
1255 mp_limb_t ret;
1256 mp_size_t i;
1257
1258 ASSERT (dst == src || ! refmpn_overlap_p (dst, size+msize-1, src, size));
1259 ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
1260 ASSERT (size >= msize);
1261 ASSERT_MPN (mult, msize);
1262
1263 /* in case dst==src */
1264 src_copy = refmpn_malloc_limbs (size);
1265 refmpn_copyi (src_copy, src, size);
1266 src = src_copy;
1267
1268 for (i = 0; i < msize-1; i++)
1269 dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1270 ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1271
1272 free (src_copy);
1273 return ret;
1274}
1275
1276mp_limb_t
1277refmpn_addmul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1278{
1279 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 2);
1280}
1281mp_limb_t
1282refmpn_addmul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1283{
1284 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 3);
1285}
1286mp_limb_t
1287refmpn_addmul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1288{
1289 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 4);
1290}
1291mp_limb_t
1292refmpn_addmul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1293{
1294 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 5);
1295}
1296mp_limb_t
1297refmpn_addmul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1298{
1299 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 6);
1300}
1301mp_limb_t
1302refmpn_addmul_7 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1303{
1304 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 7);
1305}
1306mp_limb_t
1307refmpn_addmul_8 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1308{
1309 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 8);
1310}
1311
1312mp_limb_t
1313refmpn_add_n_sub_nc (mp_ptr r1p, mp_ptr r2p,
1314 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
1315 mp_limb_t carry)
1316{
1317 mp_ptr p;
1318 mp_limb_t acy, scy;
1319
1320 /* Destinations can't overlap. */
1321 ASSERT (! refmpn_overlap_p (r1p, size, r2p, size));
1322 ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));
1323 ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));
1324 ASSERT (size >= 1);
1325
1326 /* in case r1p==s1p or r1p==s2p */
1327 p = refmpn_malloc_limbs (size);
1328
1329 acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1);
1330 scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1);
1331 refmpn_copyi (r1p, p, size);
1332
1333 free (p);
1334 return 2 * acy + scy;
1335}
1336
1337mp_limb_t
1338refmpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p,
1339 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
1340{
1341 return refmpn_add_n_sub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0));
1342}
1343
1344
1345/* Right shift hi,lo and return the low limb of the result.
1346 Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
1347mp_limb_t
1348rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
1349{
1350 ASSERT (shift < GMP_NUMB_BITS);
1351 if (shift == 0)
1352 return lo;
1353 else
1354 return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK;
1355}
1356
1357/* Left shift hi,lo and return the high limb of the result.
1358 Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
1359mp_limb_t
1360lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
1361{
1362 ASSERT (shift < GMP_NUMB_BITS);
1363 if (shift == 0)
1364 return hi;
1365 else
1366 return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
1367}
1368
1369
1370mp_limb_t
1371refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1372{
1373 mp_limb_t ret;
1374 mp_size_t i;
1375
1376 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
1377 ASSERT (size >= 1);
1378 ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
1379 ASSERT_MPN (sp, size);
1380
1381 ret = rshift_make (sp[0], CNST_LIMB(0), shift);
1382
1383 for (i = 0; i < size-1; i++)
1384 rp[i] = rshift_make (sp[i+1], sp[i], shift);
1385
1386 rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift);
1387 return ret;
1388}
1389
1390mp_limb_t
1391refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1392{
1393 mp_limb_t ret;
1394 mp_size_t i;
1395
1396 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
1397 ASSERT (size >= 1);
1398 ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
1399 ASSERT_MPN (sp, size);
1400
1401 ret = lshift_make (CNST_LIMB(0), sp[size-1], shift);
1402
1403 for (i = size-2; i >= 0; i--)
1404 rp[i+1] = lshift_make (sp[i+1], sp[i], shift);
1405
1406 rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift);
1407 return ret;
1408}
1409
1410void
1411refmpn_com (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1412{
1413 mp_size_t i;
1414
1415 /* We work downwards since mpn_lshiftc needs that. */
1416 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
1417
1418 for (i = size - 1; i >= 0; i--)
1419 rp[i] = (~sp[i]) & GMP_NUMB_MASK;
1420}
1421
1422mp_limb_t
1423refmpn_lshiftc (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1424{
1425 mp_limb_t res;
1426
1427 /* No asserts here, refmpn_lshift will assert what we need. */
1428
1429 res = refmpn_lshift (rp, sp, size, shift);
1430 refmpn_com (rp, rp, size);
1431 return res;
1432}
1433
1434/* accepting shift==0 and doing a plain copyi or copyd in that case */
1435mp_limb_t
1436refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1437{
1438 if (shift == 0)
1439 {
1440 refmpn_copyi (rp, sp, size);
1441 return 0;
1442 }
1443 else
1444 {
1445 return refmpn_rshift (rp, sp, size, shift);
1446 }
1447}
1448mp_limb_t
1449refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1450{
1451 if (shift == 0)
1452 {
1453 refmpn_copyd (rp, sp, size);
1454 return 0;
1455 }
1456 else
1457 {
1458 return refmpn_lshift (rp, sp, size, shift);
1459 }
1460}
1461
1462/* accepting size==0 too */
1463mp_limb_t
1464refmpn_rshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1465 unsigned shift)
1466{
1467 return (size == 0 ? 0 : refmpn_rshift_or_copy (rp, sp, size, shift));
1468}
1469mp_limb_t
1470refmpn_lshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1471 unsigned shift)
1472{
1473 return (size == 0 ? 0 : refmpn_lshift_or_copy (rp, sp, size, shift));
1474}
1475
1476/* Divide h,l by d, return quotient, store remainder to *rp.
1477 Operates on full limbs, not nails.
1478 Must have h < d.
1479 __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */
1480mp_limb_t
1481refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d)
1482{
1483 mp_limb_t q, r;
1484 int n;
1485
1486 ASSERT (d != 0);
1487 ASSERT (h < d);
1488
1489#if 0
1490 udiv_qrnnd (q, r, h, l, d);
1491 *rp = r;
1492 return q;
1493#endif
1494
1495 n = refmpn_count_leading_zeros (d);
1496 d <<= n;
1497
1498 if (n != 0)
1499 {
1500 h = (h << n) | (l >> (GMP_LIMB_BITS - n));
1501 l <<= n;
1502 }
1503
1504 __udiv_qrnnd_c (q, r, h, l, d);
1505 r >>= n;
1506 *rp = r;
1507 return q;
1508}
1509
1510mp_limb_t
1511refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp)
1512{
1513 return refmpn_udiv_qrnnd (rp, h, l, d);
1514}
1515
1516/* This little subroutine avoids some bad code generation from i386 gcc 3.0
1517 -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized). */
1518static mp_limb_t
1519refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1520 mp_limb_t divisor, mp_limb_t carry)
1521{
1522 mp_size_t i;
1523 mp_limb_t rem[1];
1524 for (i = size-1; i >= 0; i--)
1525 {
1526 rp[i] = refmpn_udiv_qrnnd (rem, carry,
1527 sp[i] << GMP_NAIL_BITS,
1528 divisor << GMP_NAIL_BITS);
1529 carry = *rem >> GMP_NAIL_BITS;
1530 }
1531 return carry;
1532}
1533
1534mp_limb_t
1535refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1536 mp_limb_t divisor, mp_limb_t carry)
1537{
1538 mp_ptr sp_orig;
1539 mp_ptr prod;
1540 mp_limb_t carry_orig;
1541
1542 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
1543 ASSERT (size >= 0);
1544 ASSERT (carry < divisor);
1545 ASSERT_MPN (sp, size);
1546 ASSERT_LIMB (divisor);
1547 ASSERT_LIMB (carry);
1548
1549 if (size == 0)
1550 return carry;
1551
1552 sp_orig = refmpn_memdup_limbs (sp, size);
1553 prod = refmpn_malloc_limbs (size);
1554 carry_orig = carry;
1555
1556 carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry);
1557
1558 /* check by multiplying back */
1559#if 0
1560 printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n",
1561 size, divisor, carry_orig, carry);
1562 mpn_trace("s",sp_copy,size);
1563 mpn_trace("r",rp,size);
1564 printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry));
1565 mpn_trace("p",prod,size);
1566#endif
1567 ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig);
1568 ASSERT (refmpn_cmp (prod, sp_orig, size) == 0);
1569 free (sp_orig);
1570 free (prod);
1571
1572 return carry;
1573}
1574
1575mp_limb_t
1576refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1577{
1578 return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0));
1579}
1580
1581
1582mp_limb_t
1583refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1584 mp_limb_t carry)
1585{
1586 mp_ptr p = refmpn_malloc_limbs (size);
1587 carry = refmpn_divmod_1c (p, sp, size, divisor, carry);
1588 free (p);
1589 return carry;
1590}
1591
1592mp_limb_t
1593refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1594{
1595 return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0));
1596}
1597
1598mp_limb_t
1599refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1600 mp_limb_t inverse)
1601{
1602 ASSERT (divisor & GMP_NUMB_HIGHBIT);
1603 ASSERT (inverse == refmpn_invert_limb (divisor));
1604 return refmpn_mod_1 (sp, size, divisor);
1605}
1606
1607/* This implementation will be rather slow, but has the advantage of being
1608 in a different style than the libgmp versions. */
1609mp_limb_t
1610refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
1611{
1612 ASSERT ((GMP_NUMB_BITS % 4) == 0);
1613 return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1);
1614}
1615
1616
1617mp_limb_t
1618refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize,
1619 mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1620 mp_limb_t carry)
1621{
1622 mp_ptr z;
1623
1624 z = refmpn_malloc_limbs (xsize);
1625 refmpn_fill (z, xsize, CNST_LIMB(0));
1626
1627 carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry);
1628 carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry);
1629
1630 free (z);
1631 return carry;
1632}
1633
1634mp_limb_t
1635refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize,
1636 mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1637{
1638 return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0));
1639}
1640
1641mp_limb_t
1642refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize,
1643 mp_srcptr sp, mp_size_t size,
1644 mp_limb_t divisor, mp_limb_t inverse, unsigned shift)
1645{
1646 ASSERT (size >= 0);
1647 ASSERT (shift == refmpn_count_leading_zeros (divisor));
1648 ASSERT (inverse == refmpn_invert_limb (divisor << shift));
1649
1650 return refmpn_divrem_1 (rp, xsize, sp, size, divisor);
1651}
1652
1653mp_limb_t
1654refmpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
1655 mp_ptr np, mp_size_t nn,
1656 mp_srcptr dp)
1657{
1658 mp_ptr tp;
1659 mp_limb_t qh;
1660
1661 tp = refmpn_malloc_limbs (nn + qxn);
1662 refmpn_zero (tp, qxn);
1663 refmpn_copyi (tp + qxn, np, nn);
1664 qh = refmpn_sb_div_qr (qp, tp, nn + qxn, dp, 2);
1665 refmpn_copyi (np, tp, 2);
1666 free (tp);
1667 return qh;
1668}
1669
1670/* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers
1671 paper, figure 8.1 m', where b=2^GMP_LIMB_BITS. Note that -d-1 < d
1672 since d has the high bit set. */
1673
1674mp_limb_t
1675refmpn_invert_limb (mp_limb_t d)
1676{
1677 mp_limb_t r;
1678 ASSERT (d & GMP_LIMB_HIGHBIT);
1679 return refmpn_udiv_qrnnd (&r, -d-1, MP_LIMB_T_MAX, d);
1680}
1681
1682void
1683refmpn_invert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
1684{
1685 mp_ptr qp, tp;
1686 TMP_DECL;
1687 TMP_MARK;
1688
1689 tp = TMP_ALLOC_LIMBS (2 * n);
1690 qp = TMP_ALLOC_LIMBS (n + 1);
1691
1692 MPN_ZERO (tp, 2 * n); mpn_sub_1 (tp, tp, 2 * n, 1);
1693
1694 refmpn_tdiv_qr (qp, rp, 0, tp, 2 * n, up, n);
1695 refmpn_copyi (rp, qp, n);
1696
1697 TMP_FREE;
1698}
1699
1700void
1701refmpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
1702{
1703 mp_ptr tp;
1704 mp_limb_t binv;
1705 TMP_DECL;
1706 TMP_MARK;
1707
1708 /* We use the library mpn_sbpi1_bdiv_q here, which isn't kosher in testing
1709 code. To make up for it, we check that the inverse is correct using a
1710 multiply. */
1711
1712 tp = TMP_ALLOC_LIMBS (2 * n);
1713
1714 MPN_ZERO (tp, n);
1715 tp[0] = 1;
1716 binvert_limb (binv, up[0]);
1717 mpn_sbpi1_bdiv_q (rp, tp, n, up, n, -binv);
1718
1719 refmpn_mul_n (tp, rp, up, n);
1720 ASSERT_ALWAYS (tp[0] == 1 && mpn_zero_p (tp + 1, n - 1));
1721
1722 TMP_FREE;
1723}
1724
1725/* The aim is to produce a dst quotient and return a remainder c, satisfying
1726 c*b^n + src-i == 3*dst, where i is the incoming carry.
1727
1728 Some value c==0, c==1 or c==2 will satisfy, so just try each.
1729
1730 If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero
1731 remainder from the first division attempt determines the correct
1732 remainder (3-c), but don't bother with that, since we can't guarantee
1733 anything about GMP_NUMB_BITS when using nails.
1734
1735 If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos
1736 complement negative, ie. b^n+a-i, and the calculation produces c1
1737 satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1. This
1738 means it's enough to just add any borrow back at the end.
1739
1740 A borrow only occurs when a==0 or a==1, and, by the same reasoning as in
1741 mpn/generic/diveby3.c, the c1 that results in those cases will only be 0
1742 or 1 respectively, so with 1 added the final return value is still in the
1743 prescribed range 0 to 2. */
1744
1745mp_limb_t
1746refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry)
1747{
1748 mp_ptr spcopy;
1749 mp_limb_t c, cs;
1750
1751 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
1752 ASSERT (size >= 1);
1753 ASSERT (carry <= 2);
1754 ASSERT_MPN (sp, size);
1755
1756 spcopy = refmpn_malloc_limbs (size);
1757 cs = refmpn_sub_1 (spcopy, sp, size, carry);
1758
1759 for (c = 0; c <= 2; c++)
1760 if (refmpn_divmod_1c (rp, spcopy, size, CNST_LIMB(3), c) == 0)
1761 goto done;
1762 ASSERT_FAIL (no value of c satisfies);
1763
1764 done:
1765 c += cs;
1766 ASSERT (c <= 2);
1767
1768 free (spcopy);
1769 return c;
1770}
1771
1772mp_limb_t
1773refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1774{
1775 return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0));
1776}
1777
1778
1779/* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */
1780void
1781refmpn_mul_basecase (mp_ptr prodp,
1782 mp_srcptr up, mp_size_t usize,
1783 mp_srcptr vp, mp_size_t vsize)
1784{
1785 mp_size_t i;
1786
1787 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1788 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1789 ASSERT (usize >= vsize);
1790 ASSERT (vsize >= 1);
1791 ASSERT_MPN (up, usize);
1792 ASSERT_MPN (vp, vsize);
1793
1794 prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]);
1795 for (i = 1; i < vsize; i++)
1796 prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
1797}
1798
1799
1800/* The same as mpn/generic/mulmid_basecase.c, but using refmpn functions. */
1801void
1802refmpn_mulmid_basecase (mp_ptr rp,
1803 mp_srcptr up, mp_size_t un,
1804 mp_srcptr vp, mp_size_t vn)
1805{
1806 mp_limb_t cy;
1807 mp_size_t i;
1808
1809 ASSERT (un >= vn);
1810 ASSERT (vn >= 1);
1811 ASSERT (! refmpn_overlap_p (rp, un - vn + 3, up, un));
1812 ASSERT (! refmpn_overlap_p (rp, un - vn + 3, vp, vn));
1813 ASSERT_MPN (up, un);
1814 ASSERT_MPN (vp, vn);
1815
1816 rp[un - vn + 1] = refmpn_mul_1 (rp, up + vn - 1, un - vn + 1, vp[0]);
1817 rp[un - vn + 2] = CNST_LIMB (0);
1818 for (i = 1; i < vn; i++)
1819 {
1820 cy = refmpn_addmul_1 (rp, up + vn - i - 1, un - vn + 1, vp[i]);
1821 cy = ref_addc_limb (&rp[un - vn + 1], rp[un - vn + 1], cy);
1822 cy = ref_addc_limb (&rp[un - vn + 2], rp[un - vn + 2], cy);
1823 ASSERT (cy == 0);
1824 }
1825}
1826
1827void
1828refmpn_toom42_mulmid (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n,
1829 mp_ptr scratch)
1830{
1831 refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
1832}
1833
1834void
1835refmpn_mulmid_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
1836{
1837 /* FIXME: this could be made faster by using refmpn_mul and then subtracting
1838 off products near the middle product region boundary */
1839 refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
1840}
1841
1842void
1843refmpn_mulmid (mp_ptr rp, mp_srcptr up, mp_size_t un,
1844 mp_srcptr vp, mp_size_t vn)
1845{
1846 /* FIXME: this could be made faster by using refmpn_mul and then subtracting
1847 off products near the middle product region boundary */
1848 refmpn_mulmid_basecase (rp, up, un, vp, vn);
1849}
1850
1851
1852
1853#define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD))
1854#define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD))
1855#define TOOM6_THRESHOLD (MAX (MUL_TOOM6H_THRESHOLD, SQR_TOOM6_THRESHOLD))
1856#if WANT_FFT
1857#define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD))
1858#else
1859#define FFT_THRESHOLD MP_SIZE_T_MAX /* don't use toom44 here */
1860#endif
1861
1862void
1863refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
1864{
1865 mp_ptr tp, rp;
1866 mp_size_t tn;
1867
1868 if (vn < TOOM3_THRESHOLD)
1869 {
1870 /* In the mpn_mul_basecase and toom2 ranges, use our own mul_basecase. */
1871 if (vn != 0)
1872 refmpn_mul_basecase (wp, up, un, vp, vn);
1873 else
1874 MPN_ZERO (wp, un);
1875 return;
1876 }
1877
1878 MPN_ZERO (wp, vn);
1879 rp = refmpn_malloc_limbs (2 * vn);
1880
1881 if (vn < TOOM4_THRESHOLD)
1882 tn = mpn_toom22_mul_itch (vn, vn);
1883 else if (vn < TOOM6_THRESHOLD)
1884 tn = mpn_toom33_mul_itch (vn, vn);
1885 else if (vn < FFT_THRESHOLD)
1886 tn = mpn_toom44_mul_itch (vn, vn);
1887 else
1888 tn = mpn_toom6h_mul_itch (vn, vn);
1889 tp = refmpn_malloc_limbs (tn);
1890
1891 while (un >= vn)
1892 {
1893 if (vn < TOOM4_THRESHOLD)
1894 /* In the toom3 range, use mpn_toom22_mul. */
1895 mpn_toom22_mul (rp, up, vn, vp, vn, tp);
1896 else if (vn < TOOM6_THRESHOLD)
1897 /* In the toom4 range, use mpn_toom33_mul. */
1898 mpn_toom33_mul (rp, up, vn, vp, vn, tp);
1899 else if (vn < FFT_THRESHOLD)
1900 /* In the toom6 range, use mpn_toom44_mul. */
1901 mpn_toom44_mul (rp, up, vn, vp, vn, tp);
1902 else
1903 /* For the largest operands, use mpn_toom6h_mul. */
1904 mpn_toom6h_mul (rp, up, vn, vp, vn, tp);
1905
1906 ASSERT_NOCARRY (refmpn_add (wp, rp, 2 * vn, wp, vn));
1907 wp += vn;
1908
1909 up += vn;
1910 un -= vn;
1911 }
1912
1913 free (tp);
1914
1915 if (un != 0)
1916 {
1917 refmpn_mul (rp, vp, vn, up, un);
1918 ASSERT_NOCARRY (refmpn_add (wp, rp, un + vn, wp, vn));
1919 }
1920 free (rp);
1921}
1922
1923void
1924refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
1925{
1926 refmpn_mul (prodp, up, size, vp, size);
1927}
1928
1929void
1930refmpn_mullo_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
1931{
1932 mp_ptr tp = refmpn_malloc_limbs (2*size);
1933 refmpn_mul (tp, up, size, vp, size);
1934 refmpn_copyi (prodp, tp, size);
1935 free (tp);
1936}
1937
1938void
1939refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size)
1940{
1941 refmpn_mul (dst, src, size, src, size);
1942}
1943
1944void
1945refmpn_sqrlo (mp_ptr dst, mp_srcptr src, mp_size_t size)
1946{
1947 refmpn_mullo_n (dst, src, src, size);
1948}
1949
1950/* Allowing usize<vsize, usize==0 or vsize==0. */
1951void
1952refmpn_mul_any (mp_ptr prodp,
1953 mp_srcptr up, mp_size_t usize,
1954 mp_srcptr vp, mp_size_t vsize)
1955{
1956 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1957 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1958 ASSERT (usize >= 0);
1959 ASSERT (vsize >= 0);
1960 ASSERT_MPN (up, usize);
1961 ASSERT_MPN (vp, vsize);
1962
1963 if (usize == 0)
1964 {
1965 refmpn_fill (prodp, vsize, CNST_LIMB(0));
1966 return;
1967 }
1968
1969 if (vsize == 0)
1970 {
1971 refmpn_fill (prodp, usize, CNST_LIMB(0));
1972 return;
1973 }
1974
1975 if (usize >= vsize)
1976 refmpn_mul (prodp, up, usize, vp, vsize);
1977 else
1978 refmpn_mul (prodp, vp, vsize, up, usize);
1979}
1980
1981
1982mp_limb_t
1983refmpn_gcd_11 (mp_limb_t x, mp_limb_t y)
1984{
1985 /* The non-ref function also requires input operands to be odd, but
1986 below refmpn_gcd_1 doesn't guarantee that. */
1987 ASSERT (x > 0);
1988 ASSERT (y > 0);
1989 do
1990 {
1991 while ((x & 1) == 0) x >>= 1;
1992 while ((y & 1) == 0) y >>= 1;
1993
1994 if (x < y)
1995 MP_LIMB_T_SWAP (x, y);
1996
1997 x -= y;
1998 }
1999 while (x != 0);
2000
2001 return y;
2002}
2003
2004mp_double_limb_t
2005refmpn_gcd_22 (mp_limb_t x1, mp_limb_t x0, mp_limb_t y1, mp_limb_t y0)
2006{
2007 ASSERT ((x0 & 1) != 0);
2008 ASSERT ((y0 & 1) != 0);
2009 mp_double_limb_t g;
2010 mp_limb_t cy;
2011
2012 do
2013 {
2014 while ((x0 & 1) == 0)
2015 {
2016 x0 = (x1 << (GMP_NUMB_BITS - 1)) | (x0 >> 1);
2017 x1 >>= 1;
2018 }
2019 while ((y0 & 1) == 0)
2020 {
2021 y0 = (y1 << (GMP_NUMB_BITS - 1)) | (y0 >> 1);
2022 y1 >>= 1;
2023 }
2024
2025
2026 if (x1 < y1 || (x1 == y1 && x0 < y0))
2027 {
2028 mp_limb_t t;
2029 t = x1; x1 = y1; y1 = t;
2030 t = x0; x0 = y0; y0 = t;
2031 }
2032
2033 cy = (x0 < y0);
2034 x0 -= y0;
2035 x1 -= y1 + cy;
2036 }
2037 while ((x1 | x0) != 0);
2038
2039 g.d1 = y1;
2040 g.d0 = y0;
2041 return g;
2042}
2043
2044mp_limb_t
2045refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y)
2046{
2047 mp_limb_t x;
2048 int twos;
2049
2050 ASSERT (y != 0);
2051 ASSERT (! refmpn_zero_p (xp, xsize));
2052 ASSERT_MPN (xp, xsize);
2053 ASSERT_LIMB (y);
2054
2055 x = refmpn_mod_1 (xp, xsize, y);
2056 if (x == 0)
2057 return y;
2058
2059 twos = 0;
2060 while ((x & 1) == 0 && (y & 1) == 0)
2061 {
2062 x >>= 1;
2063 y >>= 1;
2064 twos++;
2065 }
2066
2067 return refmpn_gcd_11 (x, y) << twos;
2068}
2069
2070
2071/* Based on the full limb x, not nails. */
2072unsigned
2073refmpn_count_leading_zeros (mp_limb_t x)
2074{
2075 unsigned n = 0;
2076
2077 ASSERT (x != 0);
2078
2079 while ((x & GMP_LIMB_HIGHBIT) == 0)
2080 {
2081 x <<= 1;
2082 n++;
2083 }
2084 return n;
2085}
2086
2087/* Full limbs allowed, not limited to nails. */
2088unsigned
2089refmpn_count_trailing_zeros (mp_limb_t x)
2090{
2091 unsigned n = 0;
2092
2093 ASSERT (x != 0);
2094 ASSERT_LIMB (x);
2095
2096 while ((x & 1) == 0)
2097 {
2098 x >>= 1;
2099 n++;
2100 }
2101 return n;
2102}
2103
2104/* Strip factors of two (low zero bits) from {p,size} by right shifting.
2105 The return value is the number of twos stripped. */
2106mp_size_t
2107refmpn_strip_twos (mp_ptr p, mp_size_t size)
2108{
2109 mp_size_t limbs;
2110 unsigned shift;
2111
2112 ASSERT (size >= 1);
2113 ASSERT (! refmpn_zero_p (p, size));
2114 ASSERT_MPN (p, size);
2115
2116 for (limbs = 0; p[0] == 0; limbs++)
2117 {
2118 refmpn_copyi (p, p+1, size-1);
2119 p[size-1] = 0;
2120 }
2121
2122 shift = refmpn_count_trailing_zeros (p[0]);
2123 if (shift)
2124 refmpn_rshift (p, p, size, shift);
2125
2126 return limbs*GMP_NUMB_BITS + shift;
2127}
2128
2129mp_limb_t
2130refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize)
2131{
2132 int cmp;
2133
2134 ASSERT (ysize >= 1);
2135 ASSERT (xsize >= ysize);
2136 ASSERT ((xp[0] & 1) != 0);
2137 ASSERT ((yp[0] & 1) != 0);
2138 /* ASSERT (xp[xsize-1] != 0); */ /* don't think x needs to be odd */
2139 ASSERT (yp[ysize-1] != 0);
2140 ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize));
2141 ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize));
2142 ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize));
2143 if (xsize == ysize)
2144 ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1]));
2145 ASSERT_MPN (xp, xsize);
2146 ASSERT_MPN (yp, ysize);
2147
2148 refmpn_strip_twos (xp, xsize);
2149 MPN_NORMALIZE (xp, xsize);
2150 MPN_NORMALIZE (yp, ysize);
2151
2152 for (;;)
2153 {
2154 cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize);
2155 if (cmp == 0)
2156 break;
2157 if (cmp < 0)
2158 MPN_PTR_SWAP (xp,xsize, yp,ysize);
2159
2160 ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize));
2161
2162 refmpn_strip_twos (xp, xsize);
2163 MPN_NORMALIZE (xp, xsize);
2164 }
2165
2166 refmpn_copyi (gp, xp, xsize);
2167 return xsize;
2168}
2169
2170unsigned long
2171ref_popc_limb (mp_limb_t src)
2172{
2173 unsigned long count;
2174 int i;
2175
2176 count = 0;
2177 for (i = 0; i < GMP_LIMB_BITS; i++)
2178 {
2179 count += (src & 1);
2180 src >>= 1;
2181 }
2182 return count;
2183}
2184
2185unsigned long
2186refmpn_popcount (mp_srcptr sp, mp_size_t size)
2187{
2188 unsigned long count = 0;
2189 mp_size_t i;
2190
2191 ASSERT (size >= 0);
2192 ASSERT_MPN (sp, size);
2193
2194 for (i = 0; i < size; i++)
2195 count += ref_popc_limb (sp[i]);
2196 return count;
2197}
2198
2199unsigned long
2200refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
2201{
2202 mp_ptr d;
2203 unsigned long count;
2204
2205 ASSERT (size >= 0);
2206 ASSERT_MPN (s1p, size);
2207 ASSERT_MPN (s2p, size);
2208
2209 if (size == 0)
2210 return 0;
2211
2212 d = refmpn_malloc_limbs (size);
2213 refmpn_xor_n (d, s1p, s2p, size);
2214 count = refmpn_popcount (d, size);
2215 free (d);
2216 return count;
2217}
2218
2219
2220/* set r to a%d */
2221void
2222refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2])
2223{
2224 mp_limb_t D[2];
2225 int n;
2226
2227 ASSERT (! refmpn_overlap_p (r, (mp_size_t) 2, d, (mp_size_t) 2));
2228 ASSERT_MPN (a, 2);
2229 ASSERT_MPN (d, 2);
2230
2231 D[1] = d[1], D[0] = d[0];
2232 r[1] = a[1], r[0] = a[0];
2233 n = 0;
2234
2235 for (;;)
2236 {
2237 if (D[1] & GMP_NUMB_HIGHBIT)
2238 break;
2239 if (refmpn_cmp (r, D, (mp_size_t) 2) <= 0)
2240 break;
2241 refmpn_lshift (D, D, (mp_size_t) 2, 1);
2242 n++;
2243 ASSERT (n <= GMP_NUMB_BITS);
2244 }
2245
2246 while (n >= 0)
2247 {
2248 if (refmpn_cmp (r, D, (mp_size_t) 2) >= 0)
2249 ASSERT_NOCARRY (refmpn_sub_n (r, r, D, (mp_size_t) 2));
2250 refmpn_rshift (D, D, (mp_size_t) 2, 1);
2251 n--;
2252 }
2253
2254 ASSERT (refmpn_cmp (r, d, (mp_size_t) 2) < 0);
2255}
2256
2257
2258
2259/* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
2260 particular the trial quotient is allowed to be 2 too big. */
2261mp_limb_t
2262refmpn_sb_div_qr (mp_ptr qp,
2263 mp_ptr np, mp_size_t nsize,
2264 mp_srcptr dp, mp_size_t dsize)
2265{
2266 mp_limb_t retval = 0;
2267 mp_size_t i;
2268 mp_limb_t d1 = dp[dsize-1];
2269 mp_ptr np_orig = refmpn_memdup_limbs (np, nsize);
2270
2271 ASSERT (nsize >= dsize);
2272 /* ASSERT (dsize > 2); */
2273 ASSERT (dsize >= 2);
2274 ASSERT (dp[dsize-1] & GMP_NUMB_HIGHBIT);
2275 ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np);
2276 ASSERT_MPN (np, nsize);
2277 ASSERT_MPN (dp, dsize);
2278
2279 i = nsize-dsize;
2280 if (refmpn_cmp (np+i, dp, dsize) >= 0)
2281 {
2282 ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize));
2283 retval = 1;
2284 }
2285
2286 for (i--; i >= 0; i--)
2287 {
2288 mp_limb_t n0 = np[i+dsize];
2289 mp_limb_t n1 = np[i+dsize-1];
2290 mp_limb_t q, dummy_r;
2291
2292 ASSERT (n0 <= d1);
2293 if (n0 == d1)
2294 q = GMP_NUMB_MAX;
2295 else
2296 q = refmpn_udiv_qrnnd (&dummy_r, n0, n1 << GMP_NAIL_BITS,
2297 d1 << GMP_NAIL_BITS);
2298
2299 n0 -= refmpn_submul_1 (np+i, dp, dsize, q);
2300 ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX);
2301 if (n0)
2302 {
2303 q--;
2304 if (! refmpn_add_n (np+i, np+i, dp, dsize))
2305 {
2306 q--;
2307 ASSERT_CARRY (refmpn_add_n (np+i, np+i, dp, dsize));
2308 }
2309 }
2310 np[i+dsize] = 0;
2311
2312 qp[i] = q;
2313 }
2314
2315 /* remainder < divisor */
2316#if 0 /* ASSERT triggers gcc 4.2.1 bug */
2317 ASSERT (refmpn_cmp (np, dp, dsize) < 0);
2318#endif
2319
2320 /* multiply back to original */
2321 {
2322 mp_ptr mp = refmpn_malloc_limbs (nsize);
2323
2324 refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize);
2325 if (retval)
2326 ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize));
2327 ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize));
2328 ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0);
2329
2330 free (mp);
2331 }
2332
2333 free (np_orig);
2334 return retval;
2335}
2336
2337/* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
2338 particular the trial quotient is allowed to be 2 too big. */
2339void
2340refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
2341 mp_ptr np, mp_size_t nsize,
2342 mp_srcptr dp, mp_size_t dsize)
2343{
2344 ASSERT (qxn == 0);
2345 ASSERT_MPN (np, nsize);
2346 ASSERT_MPN (dp, dsize);
2347 ASSERT (dsize > 0);
2348 ASSERT (dp[dsize-1] != 0);
2349
2350 if (dsize == 1)
2351 {
2352 rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]);
2353 return;
2354 }
2355 else
2356 {
2357 mp_ptr n2p = refmpn_malloc_limbs (nsize+1);
2358 mp_ptr d2p = refmpn_malloc_limbs (dsize);
2359 int norm = refmpn_count_leading_zeros (dp[dsize-1]) - GMP_NAIL_BITS;
2360
2361 n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm);
2362 ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm));
2363
2364 refmpn_sb_div_qr (qp, n2p, nsize+1, d2p, dsize);
2365 refmpn_rshift_or_copy (rp, n2p, dsize, norm);
2366
2367 /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */
2368 free (n2p);
2369 free (d2p);
2370 }
2371}
2372
2373mp_limb_t
2374refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
2375{
2376 mp_size_t j;
2377 mp_limb_t cy;
2378
2379 ASSERT_MPN (up, 2*n);
2380 /* ASSERT about directed overlap rp, up */
2381 /* ASSERT about overlap rp, mp */
2382 /* ASSERT about overlap up, mp */
2383
2384 for (j = n - 1; j >= 0; j--)
2385 {
2386 up[0] = refmpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
2387 up++;
2388 }
2389 cy = mpn_add_n (rp, up, up - n, n);
2390 return cy;
2391}
2392
2393size_t
2394refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size)
2395{
2396 unsigned char *d;
2397 size_t dsize;
2398
2399 ASSERT (size >= 0);
2400 ASSERT (base >= 2);
2401 ASSERT (base < numberof (mp_bases));
2402 ASSERT (size == 0 || src[size-1] != 0);
2403 ASSERT_MPN (src, size);
2404
2405 MPN_SIZEINBASE (dsize, src, size, base);
2406 ASSERT (dsize >= 1);
2407 ASSERT (! byte_overlap_p (dst, (mp_size_t) dsize, src, size * GMP_LIMB_BYTES));
2408
2409 if (size == 0)
2410 {
2411 dst[0] = 0;
2412 return 1;
2413 }
2414
2415 /* don't clobber input for power of 2 bases */
2416 if (POW2_P (base))
2417 src = refmpn_memdup_limbs (src, size);
2418
2419 d = dst + dsize;
2420 do
2421 {
2422 d--;
2423 ASSERT (d >= dst);
2424 *d = refmpn_divrem_1 (src, (mp_size_t) 0, src, size, (mp_limb_t) base);
2425 size -= (src[size-1] == 0);
2426 }
2427 while (size != 0);
2428
2429 /* Move result back and decrement dsize if we didn't generate
2430 the maximum possible digits. */
2431 if (d != dst)
2432 {
2433 size_t i;
2434 dsize -= d - dst;
2435 for (i = 0; i < dsize; i++)
2436 dst[i] = d[i];
2437 }
2438
2439 if (POW2_P (base))
2440 free (src);
2441
2442 return dsize;
2443}
2444
2445
2446mp_limb_t
2447ref_bswap_limb (mp_limb_t src)
2448{
2449 mp_limb_t dst;
2450 int i;
2451
2452 dst = 0;
2453 for (i = 0; i < GMP_LIMB_BYTES; i++)
2454 {
2455 dst = (dst << 8) + (src & 0xFF);
2456 src >>= 8;
2457 }
2458 return dst;
2459}
2460
2461
2462/* These random functions are mostly for transitional purposes while adding
2463 nail support, since they're independent of the normal mpn routines. They
2464 can probably be removed when those normal routines are reliable, though
2465 perhaps something independent would still be useful at times. */
2466
2467#if GMP_LIMB_BITS == 32
2468#define RAND_A CNST_LIMB(0x29CF535)
2469#endif
2470#if GMP_LIMB_BITS == 64
2471#define RAND_A CNST_LIMB(0xBAECD515DAF0B49D)
2472#endif
2473
2474mp_limb_t refmpn_random_seed;
2475
2476mp_limb_t
2477refmpn_random_half (void)
2478{
2479 refmpn_random_seed = refmpn_random_seed * RAND_A + 1;
2480 return (refmpn_random_seed >> GMP_LIMB_BITS/2);
2481}
2482
2483mp_limb_t
2484refmpn_random_limb (void)
2485{
2486 return ((refmpn_random_half () << (GMP_LIMB_BITS/2))
2487 | refmpn_random_half ()) & GMP_NUMB_MASK;
2488}
2489
2490void
2491refmpn_random (mp_ptr ptr, mp_size_t size)
2492{
2493 mp_size_t i;
2494 if (GMP_NAIL_BITS == 0)
2495 {
2496 mpn_random (ptr, size);
2497 return;
2498 }
2499
2500 for (i = 0; i < size; i++)
2501 ptr[i] = refmpn_random_limb ();
2502}
2503
2504void
2505refmpn_random2 (mp_ptr ptr, mp_size_t size)
2506{
2507 mp_size_t i;
2508 mp_limb_t bit, mask, limb;
2509 int run;
2510
2511 if (GMP_NAIL_BITS == 0)
2512 {
2513 mpn_random2 (ptr, size);
2514 return;
2515 }
2516
2517#define RUN_MODULUS 32
2518
2519 /* start with ones at a random pos in the high limb */
2520 bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS);
2521 mask = 0;
2522 run = 0;
2523
2524 for (i = size-1; i >= 0; i--)
2525 {
2526 limb = 0;
2527 do
2528 {
2529 if (run == 0)
2530 {
2531 run = (refmpn_random_half () % RUN_MODULUS) + 1;
2532 mask = ~mask;
2533 }
2534
2535 limb |= (bit & mask);
2536 bit >>= 1;
2537 run--;
2538 }
2539 while (bit != 0);
2540
2541 ptr[i] = limb;
2542 bit = GMP_NUMB_HIGHBIT;
2543 }
2544}
2545
2546/* This is a simple bitwise algorithm working high to low across "s" and
2547 testing each time whether setting the bit would make s^2 exceed n. */
2548mp_size_t
2549refmpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nsize)
2550{
2551 mp_ptr tp, dp;
2552 mp_size_t ssize, talloc, tsize, dsize, ret, ilimbs;
2553 unsigned ibit;
2554 long i;
2555 mp_limb_t c;
2556
2557 ASSERT (nsize >= 0);
2558
2559 /* If n==0, then s=0 and r=0. */
2560 if (nsize == 0)
2561 return 0;
2562
2563 ASSERT (np[nsize - 1] != 0);
2564 ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nsize));
2565 ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nsize + 1) / 2, rp, nsize));
2566 ASSERT (! MPN_OVERLAP_P (sp, (nsize + 1) / 2, np, nsize));
2567
2568 /* root */
2569 ssize = (nsize+1)/2;
2570 refmpn_zero (sp, ssize);
2571
2572 /* the remainder so far */
2573 dp = refmpn_memdup_limbs (np, nsize);
2574 dsize = nsize;
2575
2576 /* temporary */
2577 talloc = 2*ssize + 1;
2578 tp = refmpn_malloc_limbs (talloc);
2579
2580 for (i = GMP_NUMB_BITS * ssize - 1; i >= 0; i--)
2581 {
2582 /* t = 2*s*2^i + 2^(2*i), being the amount s^2 will increase by if 2^i
2583 is added to it */
2584
2585 ilimbs = (i+1) / GMP_NUMB_BITS;
2586 ibit = (i+1) % GMP_NUMB_BITS;
2587 refmpn_zero (tp, ilimbs);
2588 c = refmpn_lshift_or_copy (tp+ilimbs, sp, ssize, ibit);
2589 tsize = ilimbs + ssize;
2590 tp[tsize] = c;
2591 tsize += (c != 0);
2592
2593 ilimbs = (2*i) / GMP_NUMB_BITS;
2594 ibit = (2*i) % GMP_NUMB_BITS;
2595 if (ilimbs + 1 > tsize)
2596 {
2597 refmpn_zero_extend (tp, tsize, ilimbs + 1);
2598 tsize = ilimbs + 1;
2599 }
2600 c = refmpn_add_1 (tp+ilimbs, tp+ilimbs, tsize-ilimbs,
2601 CNST_LIMB(1) << ibit);
2602 ASSERT (tsize < talloc);
2603 tp[tsize] = c;
2604 tsize += (c != 0);
2605
2606 if (refmpn_cmp_twosizes (dp, dsize, tp, tsize) >= 0)
2607 {
2608 /* set this bit in s and subtract from the remainder */
2609 refmpn_setbit (sp, i);
2610
2611 ASSERT_NOCARRY (refmpn_sub_n (dp, dp, tp, dsize));
2612 dsize = refmpn_normalize (dp, dsize);
2613 }
2614 }
2615
2616 if (rp == NULL)
2617 {
2618 ret = ! refmpn_zero_p (dp, dsize);
2619 }
2620 else
2621 {
2622 ASSERT (dsize == 0 || dp[dsize-1] != 0);
2623 refmpn_copy (rp, dp, dsize);
2624 ret = dsize;
2625 }
2626
2627 free (dp);
2628 free (tp);
2629 return ret;
2630}