blob: 4213bb790c9549423f778a5e369a73d8b2b86784 [file] [log] [blame]
Austin Schuhbb1338c2024-06-15 19:31:16 -07001/* mpz_lucnum_ui -- calculate Lucas number.
2
3Copyright 2001, 2003, 2005, 2011, 2012, 2015, 2016 Free Software Foundation, Inc.
4
5This file is part of the GNU MP Library.
6
7The GNU MP Library is free software; you can redistribute it and/or modify
8it under the terms of either:
9
10 * the GNU Lesser General Public License as published by the Free
11 Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13
14or
15
16 * the GNU General Public License as published by the Free Software
17 Foundation; either version 2 of the License, or (at your option) any
18 later version.
19
20or both in parallel, as here.
21
22The GNU MP Library is distributed in the hope that it will be useful, but
23WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25for more details.
26
27You should have received copies of the GNU General Public License and the
28GNU Lesser General Public License along with the GNU MP Library. If not,
29see https://www.gnu.org/licenses/. */
30
31#include <stdio.h>
32#include "gmp-impl.h"
33
34
35/* change this to "#define TRACE(x) x" for diagnostics */
36#define TRACE(x)
37
38
39/* Notes:
40
41 For the +4 in L[2k+1] when k is even, all L[4m+3] == 4, 5 or 7 mod 8, so
42 there can't be an overflow applying +4 to just the low limb (since that
43 would leave 0, 1, 2 or 3 mod 8).
44
45 For the -4 in L[2k+1] when k is even, it seems (no proof) that
46 L[3*2^(b-2)-3] == -4 mod 2^b, so for instance with a 32-bit limb
47 L[0xBFFFFFFD] == 0xFFFFFFFC mod 2^32, and this implies a borrow from the
48 low limb. Obviously L[0xBFFFFFFD] is a huge number, but it's at least
49 conceivable to calculate it, so it probably should be handled.
50
51 For the -2 in L[2k] with k even, it seems (no proof) L[2^(b-1)] == -1 mod
52 2^b, so for instance in 32-bits L[0x80000000] has a low limb of
53 0xFFFFFFFF so there would have been a borrow. Again L[0x80000000] is
54 obviously huge, but probably should be made to work. */
55
56void
57mpz_lucnum_ui (mpz_ptr ln, unsigned long n)
58{
59 mp_size_t lalloc, xalloc, lsize, xsize;
60 mp_ptr lp, xp;
61 mp_limb_t c;
62 int zeros;
63 TMP_DECL;
64
65 TRACE (printf ("mpn_lucnum_ui n=%lu\n", n));
66
67 if (n <= FIB_TABLE_LUCNUM_LIMIT)
68 {
69 /* L[n] = F[n] + 2F[n-1] */
70 MPZ_NEWALLOC (ln, 1)[0] = FIB_TABLE(n) + 2 * FIB_TABLE ((int) n - 1);
71 SIZ(ln) = 1;
72 return;
73 }
74
75 /* +1 since L[n]=F[n]+2F[n-1] might be 1 limb bigger than F[n], further +1
76 since square or mul used below might need an extra limb over the true
77 size */
78 lalloc = MPN_FIB2_SIZE (n) + 2;
79 lp = MPZ_NEWALLOC (ln, lalloc);
80
81 TMP_MARK;
82 xalloc = lalloc;
83 xp = TMP_ALLOC_LIMBS (xalloc);
84
85 /* Strip trailing zeros from n, until either an odd number is reached
86 where the L[2k+1] formula can be used, or until n fits within the
87 FIB_TABLE data. The table is preferred of course. */
88 zeros = 0;
89 for (;;)
90 {
91 if (n & 1)
92 {
93 /* L[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*(-1)^k */
94
95 mp_size_t yalloc, ysize;
96 mp_ptr yp;
97
98 TRACE (printf (" initial odd n=%lu\n", n));
99
100 yalloc = MPN_FIB2_SIZE (n/2);
101 yp = TMP_ALLOC_LIMBS (yalloc);
102 ASSERT (xalloc >= yalloc);
103
104 xsize = mpn_fib2_ui (xp, yp, n/2);
105
106 /* possible high zero on F[k-1] */
107 ysize = xsize;
108 ysize -= (yp[ysize-1] == 0);
109 ASSERT (yp[ysize-1] != 0);
110
111 /* xp = 2*F[k] + F[k-1] */
112#if HAVE_NATIVE_mpn_addlsh1_n
113 c = mpn_addlsh1_n (xp, yp, xp, xsize);
114#else
115 c = mpn_lshift (xp, xp, xsize, 1);
116 c += mpn_add_n (xp, xp, yp, xsize);
117#endif
118 ASSERT (xalloc >= xsize+1);
119 xp[xsize] = c;
120 xsize += (c != 0);
121 ASSERT (xp[xsize-1] != 0);
122
123 ASSERT (lalloc >= xsize + ysize);
124 c = mpn_mul (lp, xp, xsize, yp, ysize);
125 lsize = xsize + ysize;
126 lsize -= (c == 0);
127
128 /* lp = 5*lp */
129#if HAVE_NATIVE_mpn_addlsh2_n
130 c = mpn_addlsh2_n (lp, lp, lp, lsize);
131#else
132 /* FIXME: Is this faster than mpn_mul_1 ? */
133 c = mpn_lshift (xp, lp, lsize, 2);
134 c += mpn_add_n (lp, lp, xp, lsize);
135#endif
136 ASSERT (lalloc >= lsize+1);
137 lp[lsize] = c;
138 lsize += (c != 0);
139
140 /* lp = lp - 4*(-1)^k */
141 if (n & 2)
142 {
143 /* no overflow, see comments above */
144 ASSERT (lp[0] <= MP_LIMB_T_MAX-4);
145 lp[0] += 4;
146 }
147 else
148 {
149 /* won't go negative */
150 MPN_DECR_U (lp, lsize, CNST_LIMB(4));
151 }
152
153 TRACE (mpn_trace (" l",lp, lsize));
154 break;
155 }
156
157 MP_PTR_SWAP (xp, lp); /* balance the swaps wanted in the L[2k] below */
158 zeros++;
159 n /= 2;
160
161 if (n <= FIB_TABLE_LUCNUM_LIMIT)
162 {
163 /* L[n] = F[n] + 2F[n-1] */
164 lp[0] = FIB_TABLE (n) + 2 * FIB_TABLE ((int) n - 1);
165 lsize = 1;
166
167 TRACE (printf (" initial small n=%lu\n", n);
168 mpn_trace (" l",lp, lsize));
169 break;
170 }
171 }
172
173 for ( ; zeros != 0; zeros--)
174 {
175 /* L[2k] = L[k]^2 + 2*(-1)^k */
176
177 TRACE (printf (" zeros=%d\n", zeros));
178
179 ASSERT (xalloc >= 2*lsize);
180 mpn_sqr (xp, lp, lsize);
181 lsize *= 2;
182 lsize -= (xp[lsize-1] == 0);
183
184 /* First time around the loop k==n determines (-1)^k, after that k is
185 always even and we set n=0 to indicate that. */
186 if (n & 1)
187 {
188 /* L[n]^2 == 0 or 1 mod 4, like all squares, so +2 gives no carry */
189 ASSERT (xp[0] <= MP_LIMB_T_MAX-2);
190 xp[0] += 2;
191 n = 0;
192 }
193 else
194 {
195 /* won't go negative */
196 MPN_DECR_U (xp, lsize, CNST_LIMB(2));
197 }
198
199 MP_PTR_SWAP (xp, lp);
200 ASSERT (lp[lsize-1] != 0);
201 }
202
203 /* should end up in the right spot after all the xp/lp swaps */
204 ASSERT (lp == PTR(ln));
205 SIZ(ln) = lsize;
206
207 TMP_FREE;
208}