Austin Schuh | dace2a6 | 2020-08-18 10:56:48 -0700 | [diff] [blame] | 1 | /* mpn_perfect_power_p -- mpn perfect power detection. |
| 2 | |
| 3 | Contributed to the GNU project by Martin Boij. |
| 4 | |
| 5 | Copyright 2009, 2010, 2012, 2014 Free Software Foundation, Inc. |
| 6 | |
| 7 | This file is part of the GNU MP Library. |
| 8 | |
| 9 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 10 | it under the terms of either: |
| 11 | |
| 12 | * the GNU Lesser General Public License as published by the Free |
| 13 | Software Foundation; either version 3 of the License, or (at your |
| 14 | option) any later version. |
| 15 | |
| 16 | or |
| 17 | |
| 18 | * the GNU General Public License as published by the Free Software |
| 19 | Foundation; either version 2 of the License, or (at your option) any |
| 20 | later version. |
| 21 | |
| 22 | or both in parallel, as here. |
| 23 | |
| 24 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 25 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 26 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 27 | for more details. |
| 28 | |
| 29 | You should have received copies of the GNU General Public License and the |
| 30 | GNU Lesser General Public License along with the GNU MP Library. If not, |
| 31 | see https://www.gnu.org/licenses/. */ |
| 32 | |
| 33 | #include "gmp-impl.h" |
| 34 | #include "longlong.h" |
| 35 | |
| 36 | #define SMALL 20 |
| 37 | #define MEDIUM 100 |
| 38 | |
| 39 | /* Return non-zero if {np,nn} == {xp,xn} ^ k. |
| 40 | Algorithm: |
| 41 | For s = 1, 2, 4, ..., s_max, compute the s least significant limbs of |
| 42 | {xp,xn}^k. Stop if they don't match the s least significant limbs of |
| 43 | {np,nn}. |
| 44 | |
| 45 | FIXME: Low xn limbs can be expected to always match, if computed as a mod |
| 46 | B^{xn} root. So instead of using mpn_powlo, compute an approximation of the |
| 47 | most significant (normalized) limb of {xp,xn} ^ k (and an error bound), and |
| 48 | compare to {np, nn}. Or use an even cruder approximation based on fix-point |
| 49 | base 2 logarithm. */ |
| 50 | static int |
| 51 | pow_equals (mp_srcptr np, mp_size_t n, |
| 52 | mp_srcptr xp,mp_size_t xn, |
| 53 | mp_limb_t k, mp_bitcnt_t f, |
| 54 | mp_ptr tp) |
| 55 | { |
| 56 | mp_bitcnt_t y, z; |
| 57 | mp_size_t bn; |
| 58 | mp_limb_t h, l; |
| 59 | |
| 60 | ASSERT (n > 1 || (n == 1 && np[0] > 1)); |
| 61 | ASSERT (np[n - 1] > 0); |
| 62 | ASSERT (xn > 0); |
| 63 | |
| 64 | if (xn == 1 && xp[0] == 1) |
| 65 | return 0; |
| 66 | |
| 67 | z = 1 + (n >> 1); |
| 68 | for (bn = 1; bn < z; bn <<= 1) |
| 69 | { |
| 70 | mpn_powlo (tp, xp, &k, 1, bn, tp + bn); |
| 71 | if (mpn_cmp (tp, np, bn) != 0) |
| 72 | return 0; |
| 73 | } |
| 74 | |
| 75 | /* Final check. Estimate the size of {xp,xn}^k before computing the power |
| 76 | with full precision. Optimization: It might pay off to make a more |
| 77 | accurate estimation of the logarithm of {xp,xn}, rather than using the |
| 78 | index of the MSB. */ |
| 79 | |
| 80 | MPN_SIZEINBASE_2EXP(y, xp, xn, 1); |
| 81 | y -= 1; /* msb_index (xp, xn) */ |
| 82 | |
| 83 | umul_ppmm (h, l, k, y); |
| 84 | h -= l == 0; --l; /* two-limb decrement */ |
| 85 | |
| 86 | z = f - 1; /* msb_index (np, n) */ |
| 87 | if (h == 0 && l <= z) |
| 88 | { |
| 89 | mp_limb_t *tp2; |
| 90 | mp_size_t i; |
| 91 | int ans; |
| 92 | mp_limb_t size; |
| 93 | TMP_DECL; |
| 94 | |
| 95 | size = l + k; |
| 96 | ASSERT_ALWAYS (size >= k); |
| 97 | |
| 98 | TMP_MARK; |
| 99 | y = 2 + size / GMP_LIMB_BITS; |
| 100 | tp2 = TMP_ALLOC_LIMBS (y); |
| 101 | |
| 102 | i = mpn_pow_1 (tp, xp, xn, k, tp2); |
| 103 | if (i == n && mpn_cmp (tp, np, n) == 0) |
| 104 | ans = 1; |
| 105 | else |
| 106 | ans = 0; |
| 107 | TMP_FREE; |
| 108 | return ans; |
| 109 | } |
| 110 | |
| 111 | return 0; |
| 112 | } |
| 113 | |
| 114 | |
| 115 | /* Return non-zero if N = {np,n} is a kth power. |
| 116 | I = {ip,n} = N^(-1) mod B^n. */ |
| 117 | static int |
| 118 | is_kth_power (mp_ptr rp, mp_srcptr np, |
| 119 | mp_limb_t k, mp_srcptr ip, |
| 120 | mp_size_t n, mp_bitcnt_t f, |
| 121 | mp_ptr tp) |
| 122 | { |
| 123 | mp_bitcnt_t b; |
| 124 | mp_size_t rn, xn; |
| 125 | |
| 126 | ASSERT (n > 0); |
| 127 | ASSERT ((k & 1) != 0 || k == 2); |
| 128 | ASSERT ((np[0] & 1) != 0); |
| 129 | |
| 130 | if (k == 2) |
| 131 | { |
| 132 | b = (f + 1) >> 1; |
| 133 | rn = 1 + b / GMP_LIMB_BITS; |
| 134 | if (mpn_bsqrtinv (rp, ip, b, tp) != 0) |
| 135 | { |
| 136 | rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1; |
| 137 | xn = rn; |
| 138 | MPN_NORMALIZE (rp, xn); |
| 139 | if (pow_equals (np, n, rp, xn, k, f, tp) != 0) |
| 140 | return 1; |
| 141 | |
| 142 | /* Check if (2^b - r)^2 == n */ |
| 143 | mpn_neg (rp, rp, rn); |
| 144 | rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1; |
| 145 | MPN_NORMALIZE (rp, rn); |
| 146 | if (pow_equals (np, n, rp, rn, k, f, tp) != 0) |
| 147 | return 1; |
| 148 | } |
| 149 | } |
| 150 | else |
| 151 | { |
| 152 | b = 1 + (f - 1) / k; |
| 153 | rn = 1 + (b - 1) / GMP_LIMB_BITS; |
| 154 | mpn_brootinv (rp, ip, rn, k, tp); |
| 155 | if ((b % GMP_LIMB_BITS) != 0) |
| 156 | rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1; |
| 157 | MPN_NORMALIZE (rp, rn); |
| 158 | if (pow_equals (np, n, rp, rn, k, f, tp) != 0) |
| 159 | return 1; |
| 160 | } |
| 161 | MPN_ZERO (rp, rn); /* Untrash rp */ |
| 162 | return 0; |
| 163 | } |
| 164 | |
| 165 | static int |
| 166 | perfpow (mp_srcptr np, mp_size_t n, |
| 167 | mp_limb_t ub, mp_limb_t g, |
| 168 | mp_bitcnt_t f, int neg) |
| 169 | { |
| 170 | mp_ptr ip, tp, rp; |
| 171 | mp_limb_t k; |
| 172 | int ans; |
| 173 | mp_bitcnt_t b; |
| 174 | gmp_primesieve_t ps; |
| 175 | TMP_DECL; |
| 176 | |
| 177 | ASSERT (n > 0); |
| 178 | ASSERT ((np[0] & 1) != 0); |
| 179 | ASSERT (ub > 0); |
| 180 | |
| 181 | TMP_MARK; |
| 182 | gmp_init_primesieve (&ps); |
| 183 | b = (f + 3) >> 1; |
| 184 | |
| 185 | TMP_ALLOC_LIMBS_3 (ip, n, rp, n, tp, 5 * n); |
| 186 | |
| 187 | MPN_ZERO (rp, n); |
| 188 | |
| 189 | /* FIXME: It seems the inverse in ninv is needed only to get non-inverted |
| 190 | roots. I.e., is_kth_power computes n^{1/2} as (n^{-1})^{-1/2} and |
| 191 | similarly for nth roots. It should be more efficient to compute n^{1/2} as |
| 192 | n * n^{-1/2}, with a mullo instead of a binvert. And we can do something |
| 193 | similar for kth roots if we switch to an iteration converging to n^{1/k - |
| 194 | 1}, and we can then eliminate this binvert call. */ |
| 195 | mpn_binvert (ip, np, 1 + (b - 1) / GMP_LIMB_BITS, tp); |
| 196 | if (b % GMP_LIMB_BITS) |
| 197 | ip[(b - 1) / GMP_LIMB_BITS] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1; |
| 198 | |
| 199 | if (neg) |
| 200 | gmp_nextprime (&ps); |
| 201 | |
| 202 | ans = 0; |
| 203 | if (g > 0) |
| 204 | { |
| 205 | ub = MIN (ub, g + 1); |
| 206 | while ((k = gmp_nextprime (&ps)) < ub) |
| 207 | { |
| 208 | if ((g % k) == 0) |
| 209 | { |
| 210 | if (is_kth_power (rp, np, k, ip, n, f, tp) != 0) |
| 211 | { |
| 212 | ans = 1; |
| 213 | goto ret; |
| 214 | } |
| 215 | } |
| 216 | } |
| 217 | } |
| 218 | else |
| 219 | { |
| 220 | while ((k = gmp_nextprime (&ps)) < ub) |
| 221 | { |
| 222 | if (is_kth_power (rp, np, k, ip, n, f, tp) != 0) |
| 223 | { |
| 224 | ans = 1; |
| 225 | goto ret; |
| 226 | } |
| 227 | } |
| 228 | } |
| 229 | ret: |
| 230 | TMP_FREE; |
| 231 | return ans; |
| 232 | } |
| 233 | |
| 234 | static const unsigned short nrtrial[] = { 100, 500, 1000 }; |
| 235 | |
| 236 | /* Table of (log_{p_i} 2) values, where p_i is the (nrtrial[i] + 1)'th prime |
| 237 | number. */ |
| 238 | static const double logs[] = |
| 239 | { 0.1099457228193620, 0.0847016403115322, 0.0772048195144415 }; |
| 240 | |
| 241 | int |
| 242 | mpn_perfect_power_p (mp_srcptr np, mp_size_t n) |
| 243 | { |
| 244 | mp_limb_t *nc, factor, g; |
| 245 | mp_limb_t exp, d; |
| 246 | mp_bitcnt_t twos, count; |
| 247 | int ans, where, neg, trial; |
| 248 | TMP_DECL; |
| 249 | |
| 250 | neg = n < 0; |
| 251 | if (neg) |
| 252 | { |
| 253 | n = -n; |
| 254 | } |
| 255 | |
| 256 | if (n == 0 || (n == 1 && np[0] == 1)) /* Valgrind doesn't like |
| 257 | (n <= (np[0] == 1)) */ |
| 258 | return 1; |
| 259 | |
| 260 | TMP_MARK; |
| 261 | |
| 262 | count = 0; |
| 263 | |
| 264 | twos = mpn_scan1 (np, 0); |
| 265 | if (twos != 0) |
| 266 | { |
| 267 | mp_size_t s; |
| 268 | if (twos == 1) |
| 269 | { |
| 270 | return 0; |
| 271 | } |
| 272 | s = twos / GMP_LIMB_BITS; |
| 273 | if (s + 1 == n && POW2_P (np[s])) |
| 274 | { |
| 275 | return ! (neg && POW2_P (twos)); |
| 276 | } |
| 277 | count = twos % GMP_LIMB_BITS; |
| 278 | n -= s; |
| 279 | np += s; |
| 280 | if (count > 0) |
| 281 | { |
| 282 | nc = TMP_ALLOC_LIMBS (n); |
| 283 | mpn_rshift (nc, np, n, count); |
| 284 | n -= (nc[n - 1] == 0); |
| 285 | np = nc; |
| 286 | } |
| 287 | } |
| 288 | g = twos; |
| 289 | |
| 290 | trial = (n > SMALL) + (n > MEDIUM); |
| 291 | |
| 292 | where = 0; |
| 293 | factor = mpn_trialdiv (np, n, nrtrial[trial], &where); |
| 294 | |
| 295 | if (factor != 0) |
| 296 | { |
| 297 | if (count == 0) /* We did not allocate nc yet. */ |
| 298 | { |
| 299 | nc = TMP_ALLOC_LIMBS (n); |
| 300 | } |
| 301 | |
| 302 | /* Remove factors found by trialdiv. Optimization: If remove |
| 303 | define _itch, we can allocate its scratch just once */ |
| 304 | |
| 305 | do |
| 306 | { |
| 307 | binvert_limb (d, factor); |
| 308 | |
| 309 | /* After the first round we always have nc == np */ |
| 310 | exp = mpn_remove (nc, &n, np, n, &d, 1, ~(mp_bitcnt_t)0); |
| 311 | |
| 312 | if (g == 0) |
| 313 | g = exp; |
| 314 | else |
| 315 | g = mpn_gcd_1 (&g, 1, exp); |
| 316 | |
| 317 | if (g == 1) |
| 318 | { |
| 319 | ans = 0; |
| 320 | goto ret; |
| 321 | } |
| 322 | |
| 323 | if ((n == 1) & (nc[0] == 1)) |
| 324 | { |
| 325 | ans = ! (neg && POW2_P (g)); |
| 326 | goto ret; |
| 327 | } |
| 328 | |
| 329 | np = nc; |
| 330 | factor = mpn_trialdiv (np, n, nrtrial[trial], &where); |
| 331 | } |
| 332 | while (factor != 0); |
| 333 | } |
| 334 | |
| 335 | MPN_SIZEINBASE_2EXP(count, np, n, 1); /* log (np) + 1 */ |
| 336 | d = (mp_limb_t) (count * logs[trial] + 1e-9) + 1; |
| 337 | ans = perfpow (np, n, d, g, count, neg); |
| 338 | |
| 339 | ret: |
| 340 | TMP_FREE; |
| 341 | return ans; |
| 342 | } |