Add libgmp 6.2.0 to third_party Don't build it yet. That will come in the next review. Change-Id: Idf3266558165e5ab45f4a41c98cc8c838c8244d5

commit: dace2a60b36477ef4295dea0cd01ae43a65f986f [log] [tgz]
author: Austin Schuh <austin.linux@gmail.com> Tue Aug 18 10:56:48 2020 -0700
committer: Austin Schuh <austin.linux@gmail.com> Wed Aug 19 16:14:33 2020 -0700
tree: f6c709631ecad56062e08308ae06dd1425613c36
parent: 1b6c20f6a7928823e49afda98bf720d99c5689ba [diff]
diff --git a/third_party/gmp/mpn/generic/add.c b/third_party/gmp/mpn/generic/add.c
new file mode 100644
index 0000000..4a6e3ba
--- /dev/null
+++ b/third_party/gmp/mpn/generic/add.c

@@ -0,0 +1,33 @@
+/* mpn_add - add mpn to mpn.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_add 1
+
+#include "gmp-impl.h"

diff --git a/third_party/gmp/mpn/generic/add_1.c b/third_party/gmp/mpn/generic/add_1.c
new file mode 100644
index 0000000..1745aed
--- /dev/null
+++ b/third_party/gmp/mpn/generic/add_1.c

@@ -0,0 +1,33 @@
+/* mpn_add_1 - add limb to mpn.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_add_1 1
+
+#include "gmp-impl.h"

diff --git a/third_party/gmp/mpn/generic/add_err1_n.c b/third_party/gmp/mpn/generic/add_err1_n.c
new file mode 100644
index 0000000..b247f19
--- /dev/null
+++ b/third_party/gmp/mpn/generic/add_err1_n.c

@@ -0,0 +1,100 @@
+/* mpn_add_err1_n -- add_n with one error term
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/*
+  Computes:
+
+  (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
+  return value is carry out.
+
+  (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
+  Computes c[1]*yp[n-1] + ... + c[n]*yp[0], stores two-limb result at ep.
+
+  Requires n >= 1.
+
+  None of the outputs may overlap each other or any of the inputs, except
+  that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_add_err1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+		mp_ptr ep, mp_srcptr yp,
+                mp_size_t n, mp_limb_t cy)
+{
+  mp_limb_t el, eh, ul, vl, yl, zl, rl, sl, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, up, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, vp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, yp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, rp, n));
+
+  yp += n - 1;
+  el = eh = 0;
+
+  do
+    {
+      yl = *yp--;
+      ul = *up++;
+      vl = *vp++;
+
+      /* ordinary add_n */
+      ADDC_LIMB (cy1, sl, ul, vl);
+      ADDC_LIMB (cy2, rl, sl, cy);
+      cy = cy1 | cy2;
+      *rp++ = rl;
+
+      /* update (eh:el) */
+      zl = (-cy) & yl;
+      el += zl;
+      eh += el < zl;
+    }
+  while (--n);
+
+#if GMP_NAIL_BITS != 0
+  eh = (eh << GMP_NAIL_BITS) + (el >> GMP_NUMB_BITS);
+  el &= GMP_NUMB_MASK;
+#endif
+
+  ep[0] = el;
+  ep[1] = eh;
+
+  return cy;
+}

diff --git a/third_party/gmp/mpn/generic/add_err2_n.c b/third_party/gmp/mpn/generic/add_err2_n.c
new file mode 100644
index 0000000..d584d6d
--- /dev/null
+++ b/third_party/gmp/mpn/generic/add_err2_n.c

@@ -0,0 +1,116 @@
+/* mpn_add_err2_n -- add_n with two error terms
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/*
+  Computes:
+
+  (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
+  return value is carry out.
+
+  (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
+  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+  stores two-limb results at {ep,2} and {ep+2,2} respectively.
+
+  Requires n >= 1.
+
+  None of the outputs may overlap each other or any of the inputs, except
+  that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_add_err2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2,
+                mp_size_t n, mp_limb_t cy)
+{
+  mp_limb_t el1, eh1, el2, eh2, ul, vl, yl1, yl2, zl1, zl2, rl, sl, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, up, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, vp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, rp, n));
+
+  yp1 += n - 1;
+  yp2 += n - 1;
+  el1 = eh1 = 0;
+  el2 = eh2 = 0;
+
+  do
+    {
+      yl1 = *yp1--;
+      yl2 = *yp2--;
+      ul = *up++;
+      vl = *vp++;
+
+      /* ordinary add_n */
+      ADDC_LIMB (cy1, sl, ul, vl);
+      ADDC_LIMB (cy2, rl, sl, cy);
+      cy = cy1 | cy2;
+      *rp++ = rl;
+
+      /* update (eh1:el1) */
+      zl1 = (-cy) & yl1;
+      el1 += zl1;
+      eh1 += el1 < zl1;
+
+      /* update (eh2:el2) */
+      zl2 = (-cy) & yl2;
+      el2 += zl2;
+      eh2 += el2 < zl2;
+    }
+  while (--n);
+
+#if GMP_NAIL_BITS != 0
+  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+  el1 &= GMP_NUMB_MASK;
+  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+  el2 &= GMP_NUMB_MASK;
+#endif
+
+  ep[0] = el1;
+  ep[1] = eh1;
+  ep[2] = el2;
+  ep[3] = eh2;
+
+  return cy;
+}

diff --git a/third_party/gmp/mpn/generic/add_err3_n.c b/third_party/gmp/mpn/generic/add_err3_n.c
new file mode 100644
index 0000000..a6ed4dc
--- /dev/null
+++ b/third_party/gmp/mpn/generic/add_err3_n.c

@@ -0,0 +1,131 @@
+/* mpn_add_err3_n -- add_n with three error terms
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/*
+  Computes:
+
+  (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
+  return value is carry out.
+
+  (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
+  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+           c[1]*yp3[n-1] + ... + c[n]*yp3[0],
+  stores two-limb results at {ep,2}, {ep+2,2} and {ep+4,2} respectively.
+
+  Requires n >= 1.
+
+  None of the outputs may overlap each other or any of the inputs, except
+  that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_add_err3_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2, mp_srcptr yp3,
+                mp_size_t n, mp_limb_t cy)
+{
+  mp_limb_t el1, eh1, el2, eh2, el3, eh3, ul, vl, yl1, yl2, yl3, zl1, zl2, zl3, rl, sl, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp3, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, up, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, vp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, yp3, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, rp, n));
+
+  yp1 += n - 1;
+  yp2 += n - 1;
+  yp3 += n - 1;
+  el1 = eh1 = 0;
+  el2 = eh2 = 0;
+  el3 = eh3 = 0;
+
+  do
+    {
+      yl1 = *yp1--;
+      yl2 = *yp2--;
+      yl3 = *yp3--;
+      ul = *up++;
+      vl = *vp++;
+
+      /* ordinary add_n */
+      ADDC_LIMB (cy1, sl, ul, vl);
+      ADDC_LIMB (cy2, rl, sl, cy);
+      cy = cy1 | cy2;
+      *rp++ = rl;
+
+      /* update (eh1:el1) */
+      zl1 = (-cy) & yl1;
+      el1 += zl1;
+      eh1 += el1 < zl1;
+
+      /* update (eh2:el2) */
+      zl2 = (-cy) & yl2;
+      el2 += zl2;
+      eh2 += el2 < zl2;
+
+      /* update (eh3:el3) */
+      zl3 = (-cy) & yl3;
+      el3 += zl3;
+      eh3 += el3 < zl3;
+    }
+  while (--n);
+
+#if GMP_NAIL_BITS != 0
+  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+  el1 &= GMP_NUMB_MASK;
+  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+  el2 &= GMP_NUMB_MASK;
+  eh3 = (eh3 << GMP_NAIL_BITS) + (el3 >> GMP_NUMB_BITS);
+  el3 &= GMP_NUMB_MASK;
+#endif
+
+  ep[0] = el1;
+  ep[1] = eh1;
+  ep[2] = el2;
+  ep[3] = eh2;
+  ep[4] = el3;
+  ep[5] = eh3;
+
+  return cy;
+}

diff --git a/third_party/gmp/mpn/generic/add_n.c b/third_party/gmp/mpn/generic/add_n.c
new file mode 100644
index 0000000..f62ac87
--- /dev/null
+++ b/third_party/gmp/mpn/generic/add_n.c

@@ -0,0 +1,89 @@
+/* mpn_add_n -- Add equal length limb vectors.
+
+Copyright 1992-1994, 1996, 2000, 2002, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++;
+      sl = ul + vl;
+      cy1 = sl < ul;
+      rl = sl + cy;
+      cy2 = rl < sl;
+      cy = cy1 | cy2;
+      *rp++ = rl;
+    }
+  while (--n != 0);
+
+  return cy;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 1
+
+mp_limb_t
+mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t ul, vl, rl, cy;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++;
+      rl = ul + vl + cy;
+      cy = rl >> GMP_NUMB_BITS;
+      *rp++ = rl & GMP_NUMB_MASK;
+    }
+  while (--n != 0);
+
+  return cy;
+}
+
+#endif

diff --git a/third_party/gmp/mpn/generic/add_n_sub_n.c b/third_party/gmp/mpn/generic/add_n_sub_n.c
new file mode 100644
index 0000000..1e72b5d
--- /dev/null
+++ b/third_party/gmp/mpn/generic/add_n_sub_n.c

@@ -0,0 +1,172 @@
+/* mpn_add_n_sub_n -- Add and Subtract two limb vectors of equal, non-zero length.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1999-2001, 2006 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#ifndef L1_CACHE_SIZE
+#define L1_CACHE_SIZE 8192	/* only 68040 has less than this */
+#endif
+
+#define PART_SIZE (L1_CACHE_SIZE / GMP_LIMB_BYTES / 6)
+
+
+/* mpn_add_n_sub_n.
+   r1[] = s1[] + s2[]
+   r2[] = s1[] - s2[]
+   All operands have n limbs.
+   In-place operations allowed.  */
+mp_limb_t
+mpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n)
+{
+  mp_limb_t acyn, acyo;		/* carry for add */
+  mp_limb_t scyn, scyo;		/* carry for subtract */
+  mp_size_t off;		/* offset in operands */
+  mp_size_t this_n;		/* size of current chunk */
+
+  /* We alternatingly add and subtract in chunks that fit into the (L1)
+     cache.  Since the chunks are several hundred limbs, the function call
+     overhead is insignificant, but we get much better locality.  */
+
+  /* We have three variant of the inner loop, the proper loop is chosen
+     depending on whether r1 or r2 are the same operand as s1 or s2.  */
+
+  if (r1p != s1p && r1p != s2p)
+    {
+      /* r1 is not identical to either input operand.  We can therefore write
+	 to r1 directly, without using temporary storage.  */
+      acyo = 0;
+      scyo = 0;
+      for (off = 0; off < n; off += PART_SIZE)
+	{
+	  this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_add_nc
+	  acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
+#else
+	  acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
+	  acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
+#endif
+#if HAVE_NATIVE_mpn_sub_nc
+	  scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+	  scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+	  scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+	}
+    }
+  else if (r2p != s1p && r2p != s2p)
+    {
+      /* r2 is not identical to either input operand.  We can therefore write
+	 to r2 directly, without using temporary storage.  */
+      acyo = 0;
+      scyo = 0;
+      for (off = 0; off < n; off += PART_SIZE)
+	{
+	  this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_sub_nc
+	  scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+	  scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+	  scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+#if HAVE_NATIVE_mpn_add_nc
+	  acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
+#else
+	  acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
+	  acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
+#endif
+	}
+    }
+  else
+    {
+      /* r1 and r2 are identical to s1 and s2 (r1==s1 and r2==s2 or vice versa)
+	 Need temporary storage.  */
+      mp_limb_t tp[PART_SIZE];
+      acyo = 0;
+      scyo = 0;
+      for (off = 0; off < n; off += PART_SIZE)
+	{
+	  this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_add_nc
+	  acyo = mpn_add_nc (tp, s1p + off, s2p + off, this_n, acyo);
+#else
+	  acyn = mpn_add_n (tp, s1p + off, s2p + off, this_n);
+	  acyo = acyn + mpn_add_1 (tp, tp, this_n, acyo);
+#endif
+#if HAVE_NATIVE_mpn_sub_nc
+	  scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+	  scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+	  scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+	  MPN_COPY (r1p + off, tp, this_n);
+	}
+    }
+
+  return 2 * acyo + scyo;
+}
+
+#ifdef MAIN
+#include <stdlib.h>
+#include <stdio.h>
+#include "timing.h"
+
+long cputime ();
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr r1p, r2p, s1p, s2p;
+  double t;
+  mp_size_t n;
+
+  n = strtol (argv[1], 0, 0);
+
+  r1p = malloc (n * GMP_LIMB_BYTES);
+  r2p = malloc (n * GMP_LIMB_BYTES);
+  s1p = malloc (n * GMP_LIMB_BYTES);
+  s2p = malloc (n * GMP_LIMB_BYTES);
+  TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n)));
+  printf ("              separate add and sub: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,s1p,s2p,n));
+  printf ("combined addsub separate variables: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
+  printf ("        combined addsub r1 overlap: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
+  printf ("        combined addsub r2 overlap: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,r2p,n));
+  printf ("          combined addsub in-place: %.3f\n", t);
+
+  return 0;
+}
+#endif

diff --git a/third_party/gmp/mpn/generic/addmul_1.c b/third_party/gmp/mpn/generic/addmul_1.c
new file mode 100644
index 0000000..6140e8e
--- /dev/null
+++ b/third_party/gmp/mpn/generic/addmul_1.c

@@ -0,0 +1,145 @@
+/* mpn_addmul_1 -- multiply the N long limb vector pointed to by UP by VL,
+   add the N least significant limbs of the product to the limb vector
+   pointed to by RP.  Return the most significant limb of the product,
+   adjusted for carry-out from the addition.
+
+Copyright 1992-1994, 1996, 2000, 2002, 2004, 2016 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
+{
+  mp_limb_t u0, crec, c, p1, p0, r0;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+
+  crec = 0;
+  do
+    {
+      u0 = *up++;
+      umul_ppmm (p1, p0, u0, v0);
+
+      r0 = *rp;
+
+      p0 = r0 + p0;
+      c = r0 > p0;
+
+      p1 = p1 + c;
+
+      r0 = p0 + crec;		/* cycle 0, 3, ... */
+      c = p0 > r0;		/* cycle 1, 4, ... */
+
+      crec = p1 + c;		/* cycle 2, 5, ... */
+
+      *rp++ = r0;
+    }
+  while (--n != 0);
+
+  return crec;
+}
+
+#endif
+
+#if GMP_NAIL_BITS == 1
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
+{
+  mp_limb_t shifted_v0, u0, r0, p0, p1, prev_p1, crec, xl, c1, c2, c3;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT_MPN (rp, n);
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (v0);
+
+  shifted_v0 = v0 << GMP_NAIL_BITS;
+  crec = 0;
+  prev_p1 = 0;
+  do
+    {
+      u0 = *up++;
+      r0 = *rp;
+      umul_ppmm (p1, p0, u0, shifted_v0);
+      p0 >>= GMP_NAIL_BITS;
+      ADDC_LIMB (c1, xl, prev_p1, p0);
+      ADDC_LIMB (c2, xl, xl, r0);
+      ADDC_LIMB (c3, xl, xl, crec);
+      crec = c1 + c2 + c3;
+      *rp++ = xl;
+      prev_p1 = p1;
+    }
+  while (--n != 0);
+
+  return prev_p1 + crec;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 2
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
+{
+  mp_limb_t shifted_v0, u0, r0, p0, p1, prev_p1, xw, crec, xl;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT_MPN (rp, n);
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (v0);
+
+  shifted_v0 = v0 << GMP_NAIL_BITS;
+  crec = 0;
+  prev_p1 = 0;
+  do
+    {
+      u0 = *up++;
+      r0 = *rp;
+      umul_ppmm (p1, p0, u0, shifted_v0);
+      p0 >>= GMP_NAIL_BITS;
+      xw = prev_p1 + p0 + r0 + crec;
+      crec = xw >> GMP_NUMB_BITS;
+      xl = xw & GMP_NUMB_MASK;
+      *rp++ = xl;
+      prev_p1 = p1;
+    }
+  while (--n != 0);
+
+  return prev_p1 + crec;
+}
+
+#endif

diff --git a/third_party/gmp/mpn/generic/bdiv_dbm1c.c b/third_party/gmp/mpn/generic/bdiv_dbm1c.c
new file mode 100644
index 0000000..543bb6e
--- /dev/null
+++ b/third_party/gmp/mpn/generic/bdiv_dbm1c.c

@@ -0,0 +1,58 @@
+/* mpn_bdiv_dbm1c -- divide an mpn number by a divisor of B-1, where B is the
+   limb base.  The dbm1c moniker means "Divisor of B Minus 1 with Carry".
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+mp_limb_t
+mpn_bdiv_dbm1c (mp_ptr qp, mp_srcptr ap, mp_size_t n, mp_limb_t bd, mp_limb_t h)
+{
+  mp_limb_t a, p0, p1, cy;
+  mp_size_t i;
+
+  for (i = 0; i < n; i++)
+    {
+      a = ap[i];
+      umul_ppmm (p1, p0, a, bd << GMP_NAIL_BITS);
+      p0 >>= GMP_NAIL_BITS;
+      cy = h < p0;
+      h = (h - p0) & GMP_NUMB_MASK;
+      qp[i] = h;
+      h = h - p1 - cy;
+    }
+
+  return h;
+}

diff --git a/third_party/gmp/mpn/generic/bdiv_q.c b/third_party/gmp/mpn/generic/bdiv_q.c
new file mode 100644
index 0000000..52aa473
--- /dev/null
+++ b/third_party/gmp/mpn/generic/bdiv_q.c

@@ -0,0 +1,76 @@
+/* mpn_bdiv_q -- Hensel division with precomputed inverse, returning quotient.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Computes Q = N / D mod B^n. */
+
+void
+mpn_bdiv_q (mp_ptr qp,
+	    mp_srcptr np, mp_size_t nn,
+	    mp_srcptr dp, mp_size_t dn,
+	    mp_ptr tp)
+{
+  mp_limb_t di;
+
+  if (BELOW_THRESHOLD (dn, DC_BDIV_Q_THRESHOLD))
+    {
+      MPN_COPY (tp, np, nn);
+      binvert_limb (di, dp[0]);  di = -di;
+      mpn_sbpi1_bdiv_q (qp, tp, nn, dp, dn, di);
+    }
+  else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
+    {
+      MPN_COPY (tp, np, nn);
+      binvert_limb (di, dp[0]);  di = -di;
+      mpn_dcpi1_bdiv_q (qp, tp, nn, dp, dn, di);
+    }
+  else
+    {
+      mpn_mu_bdiv_q (qp, np, nn, dp, dn, tp);
+    }
+  return;
+}
+
+mp_size_t
+mpn_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
+{
+  if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
+    return nn;
+  else
+    return mpn_mu_bdiv_q_itch (nn, dn);
+}

diff --git a/third_party/gmp/mpn/generic/bdiv_q_1.c b/third_party/gmp/mpn/generic/bdiv_q_1.c
new file mode 100644
index 0000000..6beb9a0
--- /dev/null
+++ b/third_party/gmp/mpn/generic/bdiv_q_1.c

@@ -0,0 +1,121 @@
+/* mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel division by 1-limb
+   divisor, returning quotient only.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000-2003, 2005, 2009, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_pi1_bdiv_q_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t d,
+		  mp_limb_t di, int shift)
+{
+  mp_size_t  i;
+  mp_limb_t  c, h, l, u, u_next, dummy;
+
+  ASSERT (n >= 1);
+  ASSERT (d != 0);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (d);
+
+  d <<= GMP_NAIL_BITS;
+
+  if (shift != 0)
+    {
+      c = 0;
+
+      u = up[0];
+      rp--;
+      for (i = 1; i < n; i++)
+	{
+	  u_next = up[i];
+	  u = ((u >> shift) | (u_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
+
+	  SUBC_LIMB (c, l, u, c);
+
+	  l = (l * di) & GMP_NUMB_MASK;
+	  rp[i] = l;
+
+	  umul_ppmm (h, dummy, l, d);
+	  c += h;
+	  u = u_next;
+	}
+
+      u = u >> shift;
+      SUBC_LIMB (c, l, u, c);
+
+      l = (l * di) & GMP_NUMB_MASK;
+      rp[n] = l;
+    }
+  else
+    {
+      u = up[0];
+      l = (u * di) & GMP_NUMB_MASK;
+      rp[0] = l;
+      c = 0;
+
+      for (i = 1; i < n; i++)
+	{
+	  umul_ppmm (h, dummy, l, d);
+	  c += h;
+
+	  u = up[i];
+	  SUBC_LIMB (c, l, u, c);
+
+	  l = (l * di) & GMP_NUMB_MASK;
+	  rp[i] = l;
+	}
+    }
+
+  return c;
+}
+
+mp_limb_t
+mpn_bdiv_q_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t d)
+{
+  mp_limb_t di;
+  int shift;
+
+  ASSERT (n >= 1);
+  ASSERT (d != 0);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (d);
+
+  count_trailing_zeros (shift, d);
+  d >>= shift;
+
+  binvert_limb (di, d);
+  return mpn_pi1_bdiv_q_1 (rp, up, n, d, di, shift);
+}

diff --git a/third_party/gmp/mpn/generic/bdiv_qr.c b/third_party/gmp/mpn/generic/bdiv_qr.c
new file mode 100644
index 0000000..a4f0f39
--- /dev/null
+++ b/third_party/gmp/mpn/generic/bdiv_qr.c

@@ -0,0 +1,84 @@
+/* mpn_bdiv_qr -- Hensel division with precomputed inverse, returning quotient
+   and remainder.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Computes Q = N / D mod B^n,
+	    R = N - QD.  */
+
+mp_limb_t
+mpn_bdiv_qr (mp_ptr qp, mp_ptr rp,
+	     mp_srcptr np, mp_size_t nn,
+	     mp_srcptr dp, mp_size_t dn,
+	     mp_ptr tp)
+{
+  mp_limb_t di;
+  mp_limb_t rh;
+
+  ASSERT (nn > dn);
+  if (BELOW_THRESHOLD (dn, DC_BDIV_QR_THRESHOLD) ||
+      BELOW_THRESHOLD (nn - dn, DC_BDIV_QR_THRESHOLD))
+    {
+      MPN_COPY (tp, np, nn);
+      binvert_limb (di, dp[0]);  di = -di;
+      rh = mpn_sbpi1_bdiv_qr (qp, tp, nn, dp, dn, di);
+      MPN_COPY (rp, tp + nn - dn, dn);
+    }
+  else if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
+    {
+      MPN_COPY (tp, np, nn);
+      binvert_limb (di, dp[0]);  di = -di;
+      rh = mpn_dcpi1_bdiv_qr (qp, tp, nn, dp, dn, di);
+      MPN_COPY (rp, tp + nn - dn, dn);
+    }
+  else
+    {
+      rh = mpn_mu_bdiv_qr (qp, rp, np, nn, dp, dn, tp);
+    }
+
+  return rh;
+}
+
+mp_size_t
+mpn_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
+{
+  if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
+    return nn;
+  else
+    return  mpn_mu_bdiv_qr_itch (nn, dn);
+}

diff --git a/third_party/gmp/mpn/generic/binvert.c b/third_party/gmp/mpn/generic/binvert.c
new file mode 100644
index 0000000..6c24ab7
--- /dev/null
+++ b/third_party/gmp/mpn/generic/binvert.c

@@ -0,0 +1,103 @@
+/* Compute {up,n}^(-1) mod B^n.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright (C) 2004-2007, 2009, 2012, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/*
+  r[k+1] = r[k] - r[k] * (u*r[k] - 1)
+  r[k+1] = r[k] + r[k] - r[k]*(u*r[k])
+*/
+
+#if TUNE_PROGRAM_BUILD
+#define NPOWS \
+ ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
+#else
+#define NPOWS \
+ ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (BINV_NEWTON_THRESHOLD))
+#endif
+
+mp_size_t
+mpn_binvert_itch (mp_size_t n)
+{
+  mp_size_t itch_local = mpn_mulmod_bnm1_next_size (n);
+  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, n, (n + 1) >> 1);
+  return itch_local + itch_out;
+}
+
+void
+mpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
+{
+  mp_ptr xp;
+  mp_size_t rn, newrn;
+  mp_size_t sizes[NPOWS], *sizp;
+  mp_limb_t di;
+
+  /* Compute the computation precisions from highest to lowest, leaving the
+     base case size in 'rn'.  */
+  sizp = sizes;
+  for (rn = n; ABOVE_THRESHOLD (rn, BINV_NEWTON_THRESHOLD); rn = (rn + 1) >> 1)
+    *sizp++ = rn;
+
+  xp = scratch;
+
+  /* Compute a base value of rn limbs.  */
+  MPN_ZERO (xp, rn);
+  xp[0] = 1;
+  binvert_limb (di, up[0]);
+  if (BELOW_THRESHOLD (rn, DC_BDIV_Q_THRESHOLD))
+    mpn_sbpi1_bdiv_q (rp, xp, rn, up, rn, -di);
+  else
+    mpn_dcpi1_bdiv_q (rp, xp, rn, up, rn, -di);
+
+  mpn_neg (rp, rp, rn);
+
+  /* Use Newton iterations to get the desired precision.  */
+  for (; rn < n; rn = newrn)
+    {
+      mp_size_t m;
+      newrn = *--sizp;
+
+      /* X <- UR. */
+      m = mpn_mulmod_bnm1_next_size (newrn);
+      mpn_mulmod_bnm1 (xp, m, up, newrn, rp, rn, xp + m);
+      mpn_sub_1 (xp + m, xp, rn - (m - newrn), 1);
+
+      /* R = R(X/B^rn) */
+      mpn_mullo_n (rp + rn, rp, xp + rn, newrn - rn);
+      mpn_neg (rp + rn, rp + rn, newrn - rn);
+    }
+}

diff --git a/third_party/gmp/mpn/generic/broot.c b/third_party/gmp/mpn/generic/broot.c
new file mode 100644
index 0000000..02fe75a
--- /dev/null
+++ b/third_party/gmp/mpn/generic/broot.c

@@ -0,0 +1,195 @@
+/* mpn_broot -- Compute hensel sqrt
+
+   Contributed to the GNU project by Niels Möller
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
+   typical use will have e small. */
+static mp_limb_t
+powlimb (mp_limb_t a, mp_limb_t e)
+{
+  mp_limb_t r = 1;
+  mp_limb_t s = a;
+
+  for (r = 1, s = a; e > 0; e >>= 1, s *= s)
+    if (e & 1)
+      r *= s;
+
+  return r;
+}
+
+/* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
+
+   Iterates
+
+     r' <-- r - r * (a^{k-1} r^k - 1) / n
+
+   If
+
+     a^{k-1} r^k = 1 (mod 2^m),
+
+   then
+
+     a^{k-1} r'^k = 1 (mod 2^{2m}),
+
+   Compute the update term as
+
+     r' = r - (a^{k-1} r^{k+1} - r) / k
+
+   where we still have cancellation of low limbs.
+
+ */
+void
+mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
+{
+  mp_size_t sizes[GMP_LIMB_BITS * 2];
+  mp_ptr akm1, tp, rnp, ep;
+  mp_limb_t a0, r0, km1, kp1h, kinv;
+  mp_size_t rn;
+  unsigned i;
+
+  TMP_DECL;
+
+  ASSERT (n > 0);
+  ASSERT (ap[0] & 1);
+  ASSERT (k & 1);
+  ASSERT (k >= 3);
+
+  TMP_MARK;
+
+  akm1 = TMP_ALLOC_LIMBS (4*n);
+  tp = akm1 + n;
+
+  km1 = k-1;
+  /* FIXME: Could arrange the iteration so we don't need to compute
+     this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
+     that we can use wraparound also for a*r, since the low half is
+     unchanged from the previous iteration. Or possibly mulmid. Also,
+     a r = a^{1/k}, so we get that value too, for free? */
+  mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
+
+  a0 = ap[0];
+  binvert_limb (kinv, k);
+
+  /* 4 bits: a^{1/k - 1} (mod 16):
+
+	a % 8
+	1 3 5 7
+   k%4 +-------
+     1 |1 1 1 1
+     3 |1 9 9 1
+  */
+  r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
+  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
+  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
+  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
+#if GMP_NUMB_BITS > 32
+  {
+    unsigned prec = 32;
+    do
+      {
+	r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
+	prec *= 2;
+      }
+    while (prec < GMP_NUMB_BITS);
+  }
+#endif
+
+  rp[0] = r0;
+  if (n == 1)
+    {
+      TMP_FREE;
+      return;
+    }
+
+  /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
+  kp1h = k/2 + 1;
+
+  /* FIXME: Special case for two limb iteration. */
+  rnp = TMP_ALLOC_LIMBS (2*n + 1);
+  ep = rnp + n;
+
+  /* FIXME: Possible to this on the fly with some bit fiddling. */
+  for (i = 0; n > 1; n = (n + 1)/2)
+    sizes[i++] = n;
+
+  rn = 1;
+
+  while (i-- > 0)
+    {
+      /* Compute x^{k+1}. */
+      mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
+			       final iteration. */
+      mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
+
+      /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */
+
+      mpn_mullo_n (ep, rnp, akm1, sizes[i]);
+      ASSERT (mpn_cmp (ep, rp, rn) == 0);
+
+      ASSERT (sizes[i] <= 2*rn);
+      mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
+      mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
+      rn = sizes[i];
+    }
+  TMP_FREE;
+}
+
+/* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
+void
+mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
+{
+  mp_ptr tp;
+  TMP_DECL;
+
+  ASSERT (n > 0);
+  ASSERT (ap[0] & 1);
+  ASSERT (k & 1);
+
+  if (k == 1)
+    {
+      MPN_COPY (rp, ap, n);
+      return;
+    }
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS (n);
+
+  mpn_broot_invm1 (tp, ap, n, k);
+  mpn_mullo_n (rp, tp, ap, n);
+
+  TMP_FREE;
+}

diff --git a/third_party/gmp/mpn/generic/brootinv.c b/third_party/gmp/mpn/generic/brootinv.c
new file mode 100644
index 0000000..e91b597
--- /dev/null
+++ b/third_party/gmp/mpn/generic/brootinv.c

@@ -0,0 +1,159 @@
+/* mpn_brootinv, compute r such that r^k * y = 1 (mod 2^b).
+
+   Contributed to the GNU project by Martin Boij (as part of perfpow.c).
+
+Copyright 2009, 2010, 2012, 2013, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* Computes a^2e (mod B). Uses right-to-left binary algorithm, since
+   typical use will have e small. */
+static mp_limb_t
+powsquaredlimb (mp_limb_t a, mp_limb_t e)
+{
+  mp_limb_t r;
+
+  r = 1;
+  /* if (LIKELY (e != 0)) */
+  do {
+    a *= a;
+    if (e & 1)
+      r *= a;
+    e >>= 1;
+  } while (e != 0);
+
+  return r;
+}
+
+/* Compute r such that r^k * y = 1 (mod B^n).
+
+   Iterates
+     r' <-- k^{-1} ((k+1) r - r^{k+1} y) (mod 2^b)
+   using Hensel lifting, each time doubling the number of known bits in r.
+
+   Works just for odd k.  Else the Hensel lifting degenerates.
+
+   FIXME:
+
+     (1) Make it work for k == GMP_LIMB_MAX (k+1 below overflows).
+
+     (2) Rewrite iteration as
+	   r' <-- r - k^{-1} r (r^k y - 1)
+	 and take advantage of the zero low part of r^k y - 1.
+
+     (3) Use wrap-around trick.
+
+     (4) Use a small table to get starting value.
+
+   Scratch need: bn + (((bn + 1) >> 1) + 1) + scratch for mpn_powlo
+   Currently mpn_powlo requires 3*bn
+   so that 5*bn is surely enough, where bn = ceil (bnb / GMP_NUMB_BITS).
+*/
+
+void
+mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_size_t bn, mp_limb_t k, mp_ptr tp)
+{
+  mp_ptr tp2, tp3;
+  mp_limb_t kinv, k2, r0, y0;
+  mp_size_t order[GMP_LIMB_BITS + 1];
+  int d;
+
+  ASSERT (bn > 0);
+  ASSERT ((k & 1) != 0);
+
+  tp2 = tp + bn;
+  tp3 = tp + bn + ((bn + 3) >> 1);
+  k2 = (k >> 1) + 1; /* (k + 1) / 2 , but avoid k+1 overflow */
+
+  binvert_limb (kinv, k);
+
+  /* 4-bit initial approximation:
+
+   y%16 | 1  3  5  7  9 11 13 15,
+    k%4 +-------------------------+k2%2
+     1  | 1 11 13  7  9  3  5 15  |  1
+     3  | 1  3  5  7  9 11 13 15  |  0
+
+  */
+  y0 = yp[0];
+
+  r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & (k2 << 3) & 8);			/* 4 bits */
+  r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2 & 0x3f));	/* 8 bits */
+  r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2 & 0x3fff));	/* 16 bits */
+#if GMP_NUMB_BITS > 16
+  {
+    unsigned prec = 16;
+    do
+      {
+	r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2));
+	prec *= 2;
+      }
+    while (prec < GMP_NUMB_BITS);
+  }
+#endif
+
+  rp[0] = r0;
+  if (bn == 1)
+    return;
+
+  d = 0;
+  for (; bn != 2; bn = (bn + 1) >> 1)
+    order[d++] = bn;
+
+  order[d] = 2;
+  bn = 1;
+
+  do
+    {
+      mpn_sqr (tp, rp, bn); /* Result may overlap tp2 */
+      tp2[bn] = mpn_mul_1 (tp2, rp, bn, k2 << 1);
+
+      bn = order[d];
+
+      mpn_powlo (rp, tp, &k2, 1, bn, tp3);
+      mpn_mullo_n (tp, yp, rp, bn);
+
+      /* mpn_sub (tp, tp2, ((bn + 1) >> 1) + 1, tp, bn); */
+      /* The function above is not handled, ((bn + 1) >> 1) + 1 <= bn*/
+      {
+	mp_size_t pbn = (bn + 3) >> 1; /* Size of tp2 */
+	int borrow;
+	borrow = mpn_sub_n (tp, tp2, tp, pbn) != 0;
+	if (bn > pbn) /* 3 < bn */
+	  {
+	    if (borrow)
+	      mpn_com (tp + pbn, tp + pbn, bn - pbn);
+	    else
+	      mpn_neg (tp + pbn, tp + pbn, bn - pbn);
+	  }
+      }
+      mpn_pi1_bdiv_q_1 (rp, tp, bn, k, kinv, 0);
+    }
+  while (--d >= 0);
+}

diff --git a/third_party/gmp/mpn/generic/bsqrt.c b/third_party/gmp/mpn/generic/bsqrt.c
new file mode 100644
index 0000000..27184f0
--- /dev/null
+++ b/third_party/gmp/mpn/generic/bsqrt.c

@@ -0,0 +1,47 @@
+/* mpn_bsqrt, a^{1/2} (mod 2^n).
+
+Copyright 2009, 2010, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+void
+mpn_bsqrt (mp_ptr rp, mp_srcptr ap, mp_bitcnt_t nb, mp_ptr tp)
+{
+  mp_ptr sp;
+  mp_size_t n;
+
+  ASSERT (nb > 0);
+
+  n = nb / GMP_NUMB_BITS;
+  sp = tp + n;
+
+  mpn_bsqrtinv (tp, ap, nb, sp);
+  mpn_mullo_n (rp, tp, ap, n);
+}

diff --git a/third_party/gmp/mpn/generic/bsqrtinv.c b/third_party/gmp/mpn/generic/bsqrtinv.c
new file mode 100644
index 0000000..c286773
--- /dev/null
+++ b/third_party/gmp/mpn/generic/bsqrtinv.c

@@ -0,0 +1,103 @@
+/* mpn_bsqrtinv, compute r such that r^2 * y = 1 (mod 2^{b+1}).
+
+   Contributed to the GNU project by Martin Boij (as part of perfpow.c).
+
+Copyright 2009, 2010, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* Compute r such that r^2 * y = 1 (mod 2^{b+1}).
+   Return non-zero if such an integer r exists.
+
+   Iterates
+     r' <-- (3r - r^3 y) / 2
+   using Hensel lifting.  Since we divide by two, the Hensel lifting is
+   somewhat degenerates.  Therefore, we lift from 2^b to 2^{b+1}-1.
+
+   FIXME:
+     (1) Simplify to do precision book-keeping in limbs rather than bits.
+
+     (2) Rewrite iteration as
+	   r' <-- r - r (r^2 y - 1) / 2
+	 and take advantage of zero low part of r^2 y - 1.
+
+     (3) Use wrap-around trick.
+
+     (4) Use a small table to get starting value.
+*/
+int
+mpn_bsqrtinv (mp_ptr rp, mp_srcptr yp, mp_bitcnt_t bnb, mp_ptr tp)
+{
+  mp_ptr tp2;
+  mp_size_t bn, order[GMP_LIMB_BITS + 1];
+  int i, d;
+
+  ASSERT (bnb > 0);
+
+  bn = 1 + bnb / GMP_LIMB_BITS;
+
+  tp2 = tp + bn;
+
+  rp[0] = 1;
+  if (bnb == 1)
+    {
+      if ((yp[0] & 3) != 1)
+	return 0;
+    }
+  else
+    {
+      if ((yp[0] & 7) != 1)
+	return 0;
+
+      d = 0;
+      for (; bnb != 2; bnb = (bnb + 2) >> 1)
+	order[d++] = bnb;
+
+      for (i = d - 1; i >= 0; i--)
+	{
+	  bnb = order[i];
+	  bn = 1 + bnb / GMP_LIMB_BITS;
+
+	  mpn_sqrlo (tp, rp, bn);
+	  mpn_mullo_n (tp2, rp, tp, bn); /* tp2 <- rp ^ 3 */
+
+	  mpn_mul_1 (tp, rp, bn, 3);
+
+	  mpn_mullo_n (rp, yp, tp2, bn);
+
+#if HAVE_NATIVE_mpn_rsh1sub_n
+	  mpn_rsh1sub_n (rp, tp, rp, bn);
+#else
+	  mpn_sub_n (tp2, tp, rp, bn);
+	  mpn_rshift (rp, tp2, bn, 1);
+#endif
+	}
+    }
+  return 1;
+}

diff --git a/third_party/gmp/mpn/generic/cmp.c b/third_party/gmp/mpn/generic/cmp.c
new file mode 100644
index 0000000..940314b
--- /dev/null
+++ b/third_party/gmp/mpn/generic/cmp.c

@@ -0,0 +1,33 @@
+/* mpn_cmp -- Compare two low-level natural-number integers.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_cmp 1
+
+#include "gmp-impl.h"

diff --git a/third_party/gmp/mpn/generic/cnd_add_n.c b/third_party/gmp/mpn/generic/cnd_add_n.c
new file mode 100644
index 0000000..e6b1373
--- /dev/null
+++ b/third_party/gmp/mpn/generic/cnd_add_n.c

@@ -0,0 +1,69 @@
+/* mpn_cnd_add_n -- Compute R = U + V if CND != 0 or R = U if CND == 0.
+   Both cases should take the same time and perform the exact same memory
+   accesses, since this function is intended to be used where side-channel
+   attack resilience is relevant.
+
+Copyright 1992-1994, 1996, 2000, 2002, 2008, 2009, 2011, 2013 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_cnd_add_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+
+  mask = -(mp_limb_t) (cnd != 0);
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++ & mask;
+#if GMP_NAIL_BITS == 0
+      sl = ul + vl;
+      cy1 = sl < ul;
+      rl = sl + cy;
+      cy2 = rl < sl;
+      cy = cy1 | cy2;
+      *rp++ = rl;
+#else
+      rl = ul + vl;
+      rl += cy;
+      cy = rl >> GMP_NUMB_BITS;
+      *rp++ = rl & GMP_NUMB_MASK;
+#endif
+    }
+  while (--n != 0);
+
+  return cy;
+}

diff --git a/third_party/gmp/mpn/generic/cnd_sub_n.c b/third_party/gmp/mpn/generic/cnd_sub_n.c
new file mode 100644
index 0000000..d04ad8a
--- /dev/null
+++ b/third_party/gmp/mpn/generic/cnd_sub_n.c

@@ -0,0 +1,69 @@
+/* mpn_cnd_sub_n -- Compute R = U - V if CND != 0 or R = U if CND == 0.
+   Both cases should take the same time and perform the exact same memory
+   accesses, since this function is intended to be used where side-channel
+   attack resilience is relevant.
+
+Copyright 1992-1994, 1996, 2000, 2002, 2008, 2009, 2011, 2013 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_cnd_sub_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+
+  mask = -(mp_limb_t) (cnd != 0);
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++ & mask;
+#if GMP_NAIL_BITS == 0
+      sl = ul - vl;
+      cy1 = sl > ul;
+      rl = sl - cy;
+      cy2 = rl > sl;
+      cy = cy1 | cy2;
+      *rp++ = rl;
+#else
+      rl = ul - vl;
+      rl -= cy;
+      cy = rl >> (GMP_LIMB_BITS - 1);
+      *rp++ = rl & GMP_NUMB_MASK;
+#endif
+    }
+  while (--n != 0);
+
+  return cy;
+}

diff --git a/third_party/gmp/mpn/generic/cnd_swap.c b/third_party/gmp/mpn/generic/cnd_swap.c
new file mode 100644
index 0000000..83d856d
--- /dev/null
+++ b/third_party/gmp/mpn/generic/cnd_swap.c

@@ -0,0 +1,50 @@
+/* mpn_cnd_swap
+
+   Contributed to the GNU project by Niels Möller
+
+Copyright 2013, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpn_cnd_swap (mp_limb_t cnd, volatile mp_limb_t *ap, volatile mp_limb_t *bp,
+	      mp_size_t n)
+{
+  volatile mp_limb_t mask = - (mp_limb_t) (cnd != 0);
+  mp_size_t i;
+  for (i = 0; i < n; i++)
+    {
+      mp_limb_t a, b, t;
+      a = ap[i];
+      b = bp[i];
+      t = (a ^ b) & mask;
+      ap[i] = a ^ t;
+      bp[i] = b ^ t;
+    }
+}

diff --git a/third_party/gmp/mpn/generic/com.c b/third_party/gmp/mpn/generic/com.c
new file mode 100644
index 0000000..4de5824
--- /dev/null
+++ b/third_party/gmp/mpn/generic/com.c

@@ -0,0 +1,44 @@
+/* mpn_com - complement an mpn.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#undef mpn_com
+#define mpn_com __MPN(com)
+
+void
+mpn_com (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_limb_t ul;
+  do {
+      ul = *up++;
+      *rp++ = ~ul & GMP_NUMB_MASK;
+  } while (--n != 0);
+}

diff --git a/third_party/gmp/mpn/generic/comb_tables.c b/third_party/gmp/mpn/generic/comb_tables.c
new file mode 100644
index 0000000..dedb77b
--- /dev/null
+++ b/third_party/gmp/mpn/generic/comb_tables.c

@@ -0,0 +1,47 @@
+/* Const tables shared among combinatoric functions.
+
+   THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
+   BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* Entry i contains (i!/2^t) where t is chosen such that the parenthesis
+   is an odd integer. */
+const mp_limb_t __gmp_oddfac_table[] = { ONE_LIMB_ODD_FACTORIAL_TABLE, ONE_LIMB_ODD_FACTORIAL_EXTTABLE };
+
+/* Entry i contains ((2i+1)!!/2^t) where t is chosen such that the parenthesis
+   is an odd integer. */
+const mp_limb_t __gmp_odd2fac_table[] = { ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE };
+
+/* Entry i contains 2i-popc(2i). */
+const unsigned char __gmp_fac2cnt_table[] = { TABLE_2N_MINUS_POPC_2N };
+
+const mp_limb_t __gmp_limbroots_table[] = { NTH_ROOT_NUMB_MASK_TABLE };

diff --git a/third_party/gmp/mpn/generic/compute_powtab.c b/third_party/gmp/mpn/generic/compute_powtab.c
new file mode 100644
index 0000000..f4fbc64
--- /dev/null
+++ b/third_party/gmp/mpn/generic/compute_powtab.c

@@ -0,0 +1,373 @@
+/* mpn_compute_powtab.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1991-2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/*
+  CAVEATS:
+  * The exptab and powtab vectors are in opposite orders.  Probably OK.
+  * Consider getting rid of exptab, doing bit ops on the un argument instead.
+  * Consider rounding greatest power slightly upwards to save adjustments.
+  * In powtab_decide, consider computing cost from just the 2-3 largest
+    operands, since smaller operand contribute little.  This makes most sense
+    if exptab is suppressed.
+*/
+
+#include "gmp-impl.h"
+
+#ifndef DIV_1_VS_MUL_1_PERCENT
+#define DIV_1_VS_MUL_1_PERCENT 150
+#endif
+
+#define SET_powers_t(dest, ptr, size, dib, b, sh)	\
+  do {							\
+    dest.p = ptr;					\
+    dest.n = size;					\
+    dest.digits_in_base = dib;				\
+    dest.base = b;					\
+    dest.shift = sh;					\
+  } while (0)
+
+#if DIV_1_VS_MUL_1_PERCENT > 120
+#define HAVE_mpn_compute_powtab_mul 1
+static void
+mpn_compute_powtab_mul (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
+			int base, const size_t *exptab, size_t n_pows)
+{
+  mp_size_t n;
+  mp_ptr p, t;
+  mp_limb_t cy;
+  long start_idx;
+  int c;
+
+  mp_limb_t big_base = mp_bases[base].big_base;
+  int chars_per_limb = mp_bases[base].chars_per_limb;
+
+  mp_ptr powtab_mem_ptr = powtab_mem;
+
+  size_t digits_in_base = chars_per_limb;
+
+  powers_t *pt = powtab;
+
+  p = powtab_mem_ptr;
+  powtab_mem_ptr += 1;
+  p[0] = big_base;
+
+  SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
+  pt++;
+
+  t = powtab_mem_ptr;
+  powtab_mem_ptr += 2;
+  t[1] = mpn_mul_1 (t, p, 1, big_base);
+  n = 2;
+
+  digits_in_base *= 2;
+
+  c = t[0] == 0;
+  t += c;
+  n -= c;
+  mp_size_t shift = c;
+
+  SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
+  p = t;
+  pt++;
+
+  if (exptab[0] == ((size_t) chars_per_limb << n_pows))
+    {
+      start_idx = n_pows - 2;
+    }
+  else
+    {
+      if (((digits_in_base + chars_per_limb) << (n_pows-2)) <= exptab[0])
+	{
+	  /* 3, sometimes adjusted to 4.  */
+	  t = powtab_mem_ptr;
+	  powtab_mem_ptr += 4;
+	  t[n] = cy = mpn_mul_1 (t, p, n, big_base);
+	  n += cy != 0;;
+
+	  digits_in_base += chars_per_limb;
+
+	  c  = t[0] == 0;
+	  t += c;
+	  n -= c;
+	  shift += c;
+	}
+      else
+	{
+	  /* 2 copy, will always become 3 with back-multiplication.  */
+	  t = powtab_mem_ptr;
+	  powtab_mem_ptr += 3;
+	  t[0] = p[0];
+	  t[1] = p[1];
+	}
+
+      SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
+      p = t;
+      pt++;
+      start_idx = n_pows - 3;
+    }
+
+  for (long pi = start_idx; pi >= 0; pi--)
+    {
+      t = powtab_mem_ptr;
+      powtab_mem_ptr += 2 * n + 2;
+
+      ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
+
+      mpn_sqr (t, p, n);
+
+      digits_in_base *= 2;
+      n *= 2;
+      n -= t[n - 1] == 0;
+      shift *= 2;
+
+      c = t[0] == 0;
+      t += c;
+      n -= c;
+      shift += c;
+
+      /* Adjust new value if it is too small as input to the next squaring.  */
+      if (((digits_in_base + chars_per_limb) << pi) <= exptab[0])
+	{
+	  t[n] = cy = mpn_mul_1 (t, t, n, big_base);
+	  n += cy != 0;
+
+	  digits_in_base += chars_per_limb;
+
+	  c  = t[0] == 0;
+	  t += c;
+	  n -= c;
+	  shift += c;
+	}
+
+      SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
+
+      /* Adjust previous value if it is not at its target power.  */
+      if (pt[-1].digits_in_base < exptab[pi + 1])
+	{
+	  mp_size_t n = pt[-1].n;
+	  mp_ptr p = pt[-1].p;
+	  p[n] = cy = mpn_mul_1 (p, p, n, big_base);
+	  n += cy != 0;
+
+	  ASSERT (pt[-1].digits_in_base + chars_per_limb == exptab[pi + 1]);
+	  pt[-1].digits_in_base = exptab[pi + 1];
+
+	  c = p[0] == 0;
+	  pt[-1].p = p + c;
+	  pt[-1].n = n - c;
+	  pt[-1].shift += c;
+	}
+
+      p = t;
+      pt++;
+    }
+}
+#endif
+
+#if DIV_1_VS_MUL_1_PERCENT < 275
+#define HAVE_mpn_compute_powtab_div 1
+static void
+mpn_compute_powtab_div (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
+			int base, const size_t *exptab, size_t n_pows)
+{
+  mp_ptr p, t;
+
+  mp_limb_t big_base = mp_bases[base].big_base;
+  int chars_per_limb = mp_bases[base].chars_per_limb;
+
+  mp_ptr powtab_mem_ptr = powtab_mem;
+
+  size_t digits_in_base = chars_per_limb;
+
+  powers_t *pt = powtab;
+
+  p = powtab_mem_ptr;
+  powtab_mem_ptr += 1;
+  p[0] = big_base;
+
+  SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
+  pt++;
+
+  mp_size_t n = 1;
+  mp_size_t shift = 0;
+  for (long pi = n_pows - 1; pi >= 0; pi--)
+    {
+      t = powtab_mem_ptr;
+      powtab_mem_ptr += 2 * n;
+
+      ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
+
+      mpn_sqr (t, p, n);
+      n = 2 * n - 1; n += t[n] != 0;
+      digits_in_base *= 2;
+
+      if (digits_in_base != exptab[pi])	/* if ((((un - 1) >> pi) & 2) == 0) */
+	{
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 || ! HAVE_NATIVE_mpn_divexact_1
+	  if (__GMP_LIKELY (base == 10))
+	    mpn_pi1_bdiv_q_1 (t, t, n, big_base >> MP_BASES_BIG_BASE_CTZ_10,
+			      MP_BASES_BIG_BASE_BINVERTED_10,
+			      MP_BASES_BIG_BASE_CTZ_10);
+	  else
+#endif
+	    /* FIXME: We could use _pi1 here if we add big_base_binverted and
+	       big_base_ctz fields to struct bases.  That would add about 2 KiB
+	       to mp_bases.c.
+	       FIXME: Use mpn_bdiv_q_1 here when mpn_divexact_1 is converted to
+	       mpn_bdiv_q_1 for more machines. */
+	    mpn_divexact_1 (t, t, n, big_base);
+
+	  n -= t[n - 1] == 0;
+	  digits_in_base -= chars_per_limb;
+	}
+
+      shift *= 2;
+      /* Strip low zero limbs, but be careful to keep the result divisible by
+	 big_base.  */
+      while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0)
+	{
+	  t++;
+	  n--;
+	  shift++;
+	}
+      p = t;
+
+      SET_powers_t (pt[0], p, n, digits_in_base, base, shift);
+      pt++;
+    }
+
+  /* Strip any remaining low zero limbs.  */
+  pt -= n_pows + 1;
+  for (long pi = n_pows; pi >= 0; pi--)
+    {
+      mp_ptr t = pt[pi].p;
+      mp_size_t shift = pt[pi].shift;
+      mp_size_t n = pt[pi].n;
+      int c;
+      c = t[0] == 0;
+      t += c;
+      n -= c;
+      shift += c;
+      pt[pi].p = t;
+      pt[pi].shift = shift;
+      pt[pi].n = n;
+    }
+}
+#endif
+
+static long
+powtab_decide (size_t *exptab, size_t un, int base)
+{
+  int chars_per_limb = mp_bases[base].chars_per_limb;
+  long n_pows = 0;
+  for (size_t pn = (un + 1) >> 1; pn != 1; pn = (pn + 1) >> 1)
+    {
+      exptab[n_pows] = pn * chars_per_limb;
+      n_pows++;
+    }
+  exptab[n_pows] = chars_per_limb;
+
+#if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
+  size_t pn = un - 1;
+  size_t xn = (un + 1) >> 1;
+  unsigned mcost = 1;
+  unsigned dcost = 1;
+  for (long i = n_pows - 2; i >= 0; i--)
+    {
+      size_t pow = (pn >> (i + 1)) + 1;
+
+      if (pow & 1)
+	dcost += pow;
+
+      if (xn != (pow << i))
+	{
+	  if (pow > 2 && (pow & 1) == 0)
+	    mcost += 2 * pow;
+	  else
+	    mcost += pow;
+	}
+      else
+	{
+	  if (pow & 1)
+	    mcost += pow;
+	}
+    }
+
+  dcost = dcost * DIV_1_VS_MUL_1_PERCENT / 100;
+
+  if (mcost <= dcost)
+    return n_pows;
+  else
+    return -n_pows;
+#elif HAVE_mpn_compute_powtab_mul
+  return n_pows;
+#elif HAVE_mpn_compute_powtab_div
+  return -n_pows;
+#else
+#error "no powtab function available"
+#endif
+}
+
+size_t
+mpn_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base)
+{
+  size_t exptab[GMP_LIMB_BITS];
+
+  long n_pows = powtab_decide (exptab, un, base);
+
+#if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
+  if (n_pows >= 0)
+    {
+      mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
+      return n_pows;
+    }
+  else
+    {
+      mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
+      return -n_pows;
+    }
+#elif HAVE_mpn_compute_powtab_mul
+  ASSERT (n_pows > 0);
+  mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
+  return n_pows;
+#elif HAVE_mpn_compute_powtab_div
+  ASSERT (n_pows < 0);
+  mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
+  return -n_pows;
+#else
+#error "no powtab function available"
+#endif
+}

diff --git a/third_party/gmp/mpn/generic/copyd.c b/third_party/gmp/mpn/generic/copyd.c
new file mode 100644
index 0000000..7def007
--- /dev/null
+++ b/third_party/gmp/mpn/generic/copyd.c

@@ -0,0 +1,40 @@
+/* mpn_copyd
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpn_copyd (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_size_t i;
+
+  for (i = n - 1; i >= 0; i--)
+    rp[i] = up[i];
+}

diff --git a/third_party/gmp/mpn/generic/copyi.c b/third_party/gmp/mpn/generic/copyi.c
new file mode 100644
index 0000000..736e0b5
--- /dev/null
+++ b/third_party/gmp/mpn/generic/copyi.c

@@ -0,0 +1,42 @@
+/* mpn_copyi
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpn_copyi (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_size_t i;
+
+  up += n;
+  rp += n;
+  for (i = -n; i != 0; i++)
+    rp[i] = up[i];
+}

diff --git a/third_party/gmp/mpn/generic/dcpi1_bdiv_q.c b/third_party/gmp/mpn/generic/dcpi1_bdiv_q.c
new file mode 100644
index 0000000..1a4bd2a
--- /dev/null
+++ b/third_party/gmp/mpn/generic/dcpi1_bdiv_q.c

@@ -0,0 +1,159 @@
+/* mpn_dcpi1_bdiv_q -- divide-and-conquer Hensel division with precomputed
+   inverse, returning quotient.
+
+   Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009-2011, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+static mp_size_t
+mpn_dcpi1_bdiv_q_n_itch (mp_size_t n)
+{
+  /* NOTE: Depends on mullo_n and mpn_dcpi1_bdiv_qr_n interface */
+  return n;
+}
+
+/* Computes Q = - N / D mod B^n, destroys N.
+
+   N = {np,n}
+   D = {dp,n}
+*/
+
+static void
+mpn_dcpi1_bdiv_q_n (mp_ptr qp,
+		    mp_ptr np, mp_srcptr dp, mp_size_t n,
+		    mp_limb_t dinv, mp_ptr tp)
+{
+  while (ABOVE_THRESHOLD (n, DC_BDIV_Q_THRESHOLD))
+    {
+      mp_size_t lo, hi;
+      mp_limb_t cy;
+
+      lo = n >> 1;			/* floor(n/2) */
+      hi = n - lo;			/* ceil(n/2) */
+
+      cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, lo, dinv, tp);
+
+      mpn_mullo_n (tp, qp, dp + hi, lo);
+      mpn_add_n (np + hi, np + hi, tp, lo);
+
+      if (lo < hi)
+	{
+	  cy += mpn_addmul_1 (np + lo, qp, lo, dp[lo]);
+	  np[n - 1] += cy;
+	}
+      qp += lo;
+      np += lo;
+      n -= lo;
+    }
+  mpn_sbpi1_bdiv_q (qp, np, n, dp, n, dinv);
+}
+
+/* Computes Q = - N / D mod B^nn, destroys N.
+
+   N = {np,nn}
+   D = {dp,dn}
+*/
+
+void
+mpn_dcpi1_bdiv_q (mp_ptr qp,
+		  mp_ptr np, mp_size_t nn,
+		  mp_srcptr dp, mp_size_t dn,
+		  mp_limb_t dinv)
+{
+  mp_size_t qn;
+  mp_limb_t cy;
+  mp_ptr tp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  ASSERT (dn >= 2);
+  ASSERT (nn - dn >= 0);
+  ASSERT (dp[0] & 1);
+
+  tp = TMP_SALLOC_LIMBS (dn);
+
+  qn = nn;
+
+  if (qn > dn)
+    {
+      /* Reduce qn mod dn in a super-efficient manner.  */
+      do
+	qn -= dn;
+      while (qn > dn);
+
+      /* Perform the typically smaller block first.  */
+      if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
+	cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
+      else
+	cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
+
+      if (qn != dn)
+	{
+	  if (qn > dn - qn)
+	    mpn_mul (tp, qp, qn, dp + qn, dn - qn);
+	  else
+	    mpn_mul (tp, dp + qn, dn - qn, qp, qn);
+	  mpn_incr_u (tp + qn, cy);
+
+	  mpn_add (np + qn, np + qn, nn - qn, tp, dn);
+	  cy = 0;
+	}
+
+      np += qn;
+      qp += qn;
+
+      qn = nn - qn;
+      while (qn > dn)
+	{
+	  mpn_add_1 (np + dn, np + dn, qn - dn, cy);
+	  cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
+	  qp += dn;
+	  np += dn;
+	  qn -= dn;
+	}
+      mpn_dcpi1_bdiv_q_n (qp, np, dp, dn, dinv, tp);
+    }
+  else
+    {
+      if (BELOW_THRESHOLD (qn, DC_BDIV_Q_THRESHOLD))
+	mpn_sbpi1_bdiv_q (qp, np, qn, dp, qn, dinv);
+      else
+	mpn_dcpi1_bdiv_q_n (qp, np, dp, qn, dinv, tp);
+    }
+
+  TMP_FREE;
+}

diff --git a/third_party/gmp/mpn/generic/dcpi1_bdiv_qr.c b/third_party/gmp/mpn/generic/dcpi1_bdiv_qr.c
new file mode 100644
index 0000000..11da44f
--- /dev/null
+++ b/third_party/gmp/mpn/generic/dcpi1_bdiv_qr.c

@@ -0,0 +1,176 @@
+/* mpn_dcpi1_bdiv_qr -- divide-and-conquer Hensel division with precomputed
+   inverse, returning quotient and remainder.
+
+   Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009, 2010, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Computes Hensel binary division of {np, 2*n} by {dp, n}.
+
+   Output:
+
+      q = -n * d^{-1} mod 2^{qn * GMP_NUMB_BITS},
+
+      r = (n + q * d) * 2^{-qn * GMP_NUMB_BITS}
+
+   Stores q at qp. Stores the n least significant limbs of r at the high half
+   of np, and returns the carry from the addition n + q*d.
+
+   d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */
+
+mp_size_t
+mpn_dcpi1_bdiv_qr_n_itch (mp_size_t n)
+{
+  return n;
+}
+
+mp_limb_t
+mpn_dcpi1_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+		     mp_limb_t dinv, mp_ptr tp)
+{
+  mp_size_t lo, hi;
+  mp_limb_t cy;
+  mp_limb_t rh;
+
+  lo = n >> 1;			/* floor(n/2) */
+  hi = n - lo;			/* ceil(n/2) */
+
+  if (BELOW_THRESHOLD (lo, DC_BDIV_QR_THRESHOLD))
+    cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * lo, dp, lo, dinv);
+  else
+    cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, lo, dinv, tp);
+
+  mpn_mul (tp, dp + lo, hi, qp, lo);
+
+  mpn_incr_u (tp + lo, cy);
+  rh = mpn_add (np + lo, np + lo, n + hi, tp, n);
+
+  if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD))
+    cy = mpn_sbpi1_bdiv_qr (qp + lo, np + lo, 2 * hi, dp, hi, dinv);
+  else
+    cy = mpn_dcpi1_bdiv_qr_n (qp + lo, np + lo, dp, hi, dinv, tp);
+
+  mpn_mul (tp, qp + lo, hi, dp + hi, lo);
+
+  mpn_incr_u (tp + hi, cy);
+  rh += mpn_add_n (np + n, np + n, tp, n);
+
+  return rh;
+}
+
+mp_limb_t
+mpn_dcpi1_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn,
+		   mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
+{
+  mp_size_t qn;
+  mp_limb_t rr, cy;
+  mp_ptr tp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  ASSERT (dn >= 2);		/* to adhere to mpn_sbpi1_div_qr's limits */
+  ASSERT (nn - dn >= 1);	/* to adhere to mpn_sbpi1_div_qr's limits */
+  ASSERT (dp[0] & 1);
+
+  tp = TMP_SALLOC_LIMBS (dn);
+
+  qn = nn - dn;
+
+  if (qn > dn)
+    {
+      /* Reduce qn mod dn without division, optimizing small operations.  */
+      do
+	qn -= dn;
+      while (qn > dn);
+
+      /* Perform the typically smaller block first.  */
+      if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
+	cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
+      else
+	cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
+
+      rr = 0;
+      if (qn != dn)
+	{
+	  if (qn > dn - qn)
+	    mpn_mul (tp, qp, qn, dp + qn, dn - qn);
+	  else
+	    mpn_mul (tp, dp + qn, dn - qn, qp, qn);
+	  mpn_incr_u (tp + qn, cy);
+
+	  rr = mpn_add (np + qn, np + qn, nn - qn, tp, dn);
+	  cy = 0;
+	}
+
+      np += qn;
+      qp += qn;
+
+      qn = nn - dn - qn;
+      do
+	{
+	  rr += mpn_add_1 (np + dn, np + dn, qn, cy);
+	  cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
+	  qp += dn;
+	  np += dn;
+	  qn -= dn;
+	}
+      while (qn > 0);
+      TMP_FREE;
+      return rr + cy;
+    }
+
+  if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
+    cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
+  else
+    cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
+
+  rr = 0;
+  if (qn != dn)
+    {
+      if (qn > dn - qn)
+	mpn_mul (tp, qp, qn, dp + qn, dn - qn);
+      else
+	mpn_mul (tp, dp + qn, dn - qn, qp, qn);
+      mpn_incr_u (tp + qn, cy);
+
+      rr = mpn_add (np + qn, np + qn, nn - qn, tp, dn);
+      cy = 0;
+    }
+
+  TMP_FREE;
+  return rr + cy;
+}

diff --git a/third_party/gmp/mpn/generic/dcpi1_div_q.c b/third_party/gmp/mpn/generic/dcpi1_div_q.c
new file mode 100644
index 0000000..1905c98
--- /dev/null
+++ b/third_party/gmp/mpn/generic/dcpi1_div_q.c

@@ -0,0 +1,86 @@
+/* mpn_dc_div_q -- divide-and-conquer division, returning exact quotient
+   only.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+mp_limb_t
+mpn_dcpi1_div_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
+		 mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
+{
+  mp_ptr tp, wp;
+  mp_limb_t qh;
+  mp_size_t qn;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  ASSERT (dn >= 6);
+  ASSERT (nn - dn >= 3);
+  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
+
+  tp = TMP_ALLOC_LIMBS (nn + 1);
+  MPN_COPY (tp + 1, np, nn);
+  tp[0] = 0;
+
+  qn = nn - dn;
+  wp = TMP_ALLOC_LIMBS (qn + 1);
+
+  qh = mpn_dcpi1_divappr_q (wp, tp, nn + 1, dp, dn, dinv);
+
+  if (wp[0] == 0)
+    {
+      mp_limb_t cy;
+
+      if (qn > dn)
+	mpn_mul (tp, wp + 1, qn, dp, dn);
+      else
+	mpn_mul (tp, dp, dn, wp + 1, qn);
+
+      cy = (qh != 0) ? mpn_add_n (tp + qn, tp + qn, dp, dn) : 0;
+
+      if (cy || mpn_cmp (tp, np, nn) > 0) /* At most is wrong by one, no cycle. */
+	qh -= mpn_sub_1 (qp, wp + 1, qn, 1);
+      else /* Same as below */
+	MPN_COPY (qp, wp + 1, qn);
+    }
+  else
+    MPN_COPY (qp, wp + 1, qn);
+
+  TMP_FREE;
+  return qh;
+}

diff --git a/third_party/gmp/mpn/generic/dcpi1_div_qr.c b/third_party/gmp/mpn/generic/dcpi1_div_qr.c
new file mode 100644
index 0000000..d7a65f8
--- /dev/null
+++ b/third_party/gmp/mpn/generic/dcpi1_div_qr.c

@@ -0,0 +1,248 @@
+/* mpn_dcpi1_div_qr_n -- recursive divide-and-conquer division for arbitrary
+   size operands.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+mp_limb_t
+mpn_dcpi1_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+		    gmp_pi1_t *dinv, mp_ptr tp)
+{
+  mp_size_t lo, hi;
+  mp_limb_t cy, qh, ql;
+
+  lo = n >> 1;			/* floor(n/2) */
+  hi = n - lo;			/* ceil(n/2) */
+
+  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
+    qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
+  else
+    qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
+
+  mpn_mul (tp, qp + lo, hi, dp, lo);
+
+  cy = mpn_sub_n (np + lo, np + lo, tp, n);
+  if (qh != 0)
+    cy += mpn_sub_n (np + n, np + n, dp, lo);
+
+  while (cy != 0)
+    {
+      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
+      cy -= mpn_add_n (np + lo, np + lo, dp, n);
+    }
+
+  if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD))
+    ql = mpn_sbpi1_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
+  else
+    ql = mpn_dcpi1_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp);
+
+  mpn_mul (tp, dp, hi, qp, lo);
+
+  cy = mpn_sub_n (np, np, tp, n);
+  if (ql != 0)
+    cy += mpn_sub_n (np + lo, np + lo, dp, hi);
+
+  while (cy != 0)
+    {
+      mpn_sub_1 (qp, qp, lo, 1);
+      cy -= mpn_add_n (np, np, dp, n);
+    }
+
+  return qh;
+}
+
+mp_limb_t
+mpn_dcpi1_div_qr (mp_ptr qp,
+		  mp_ptr np, mp_size_t nn,
+		  mp_srcptr dp, mp_size_t dn,
+		  gmp_pi1_t *dinv)
+{
+  mp_size_t qn;
+  mp_limb_t qh, cy;
+  mp_ptr tp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  ASSERT (dn >= 6);		/* to adhere to mpn_sbpi1_div_qr's limits */
+  ASSERT (nn - dn >= 3);	/* to adhere to mpn_sbpi1_div_qr's limits */
+  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
+
+  tp = TMP_ALLOC_LIMBS (dn);
+
+  qn = nn - dn;
+  qp += qn;
+  np += nn;
+  dp += dn;
+
+  if (qn > dn)
+    {
+      /* Reduce qn mod dn without division, optimizing small operations.  */
+      do
+	qn -= dn;
+      while (qn > dn);
+
+      qp -= qn;			/* point at low limb of next quotient block */
+      np -= qn;			/* point in the middle of partial remainder */
+
+      /* Perform the typically smaller block first.  */
+      if (qn == 1)
+	{
+	  mp_limb_t q, n2, n1, n0, d1, d0;
+
+	  /* Handle qh up front, for simplicity. */
+	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
+	  if (qh)
+	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
+
+	  /* A single iteration of schoolbook: One 3/2 division,
+	     followed by the bignum update and adjustment. */
+	  n2 = np[0];
+	  n1 = np[-1];
+	  n0 = np[-2];
+	  d1 = dp[-1];
+	  d0 = dp[-2];
+
+	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
+
+	  if (UNLIKELY (n2 == d1) && n1 == d0)
+	    {
+	      q = GMP_NUMB_MASK;
+	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
+	      ASSERT (cy == n2);
+	    }
+	  else
+	    {
+	      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
+
+	      if (dn > 2)
+		{
+		  mp_limb_t cy, cy1;
+		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
+
+		  cy1 = n0 < cy;
+		  n0 = (n0 - cy) & GMP_NUMB_MASK;
+		  cy = n1 < cy1;
+		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
+		  np[-2] = n0;
+
+		  if (UNLIKELY (cy != 0))
+		    {
+		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
+		      qh -= (q == 0);
+		      q = (q - 1) & GMP_NUMB_MASK;
+		    }
+		}
+	      else
+		np[-2] = n0;
+
+	      np[-1] = n1;
+	    }
+	  qp[0] = q;
+	}
+      else
+	{
+	  /* Do a 2qn / qn division */
+	  if (qn == 2)
+	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */
+	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+	    qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
+	  else
+	    qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
+
+	  if (qn != dn)
+	    {
+	      if (qn > dn - qn)
+		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+	      else
+		mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+
+	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+	      if (qh != 0)
+		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+
+	      while (cy != 0)
+		{
+		  qh -= mpn_sub_1 (qp, qp, qn, 1);
+		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+		}
+	    }
+	}
+
+      qn = nn - dn - qn;
+      do
+	{
+	  qp -= dn;
+	  np -= dn;
+	  mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
+	  qn -= dn;
+	}
+      while (qn > 0);
+    }
+  else
+    {
+      qp -= qn;			/* point at low limb of next quotient block */
+      np -= qn;			/* point in the middle of partial remainder */
+
+      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+	qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
+      else
+	qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
+
+      if (qn != dn)
+	{
+	  if (qn > dn - qn)
+	    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+	  else
+	    mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+
+	  cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+	  if (qh != 0)
+	    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+
+	  while (cy != 0)
+	    {
+	      qh -= mpn_sub_1 (qp, qp, qn, 1);
+	      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+	    }
+	}
+    }
+
+  TMP_FREE;
+  return qh;
+}

diff --git a/third_party/gmp/mpn/generic/dcpi1_divappr_q.c b/third_party/gmp/mpn/generic/dcpi1_divappr_q.c
new file mode 100644
index 0000000..0abe04e
--- /dev/null
+++ b/third_party/gmp/mpn/generic/dcpi1_divappr_q.c

@@ -0,0 +1,256 @@
+/* mpn_dcpi1_divappr_q -- divide-and-conquer division, returning approximate
+   quotient.  The quotient returned is either correct, or one too large.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+static mp_limb_t
+mpn_dcpi1_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+		       gmp_pi1_t *dinv, mp_ptr tp)
+{
+  mp_size_t lo, hi;
+  mp_limb_t cy, qh, ql;
+
+  lo = n >> 1;			/* floor(n/2) */
+  hi = n - lo;			/* ceil(n/2) */
+
+  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
+    qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
+  else
+    qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
+
+  mpn_mul (tp, qp + lo, hi, dp, lo);
+
+  cy = mpn_sub_n (np + lo, np + lo, tp, n);
+  if (qh != 0)
+    cy += mpn_sub_n (np + n, np + n, dp, lo);
+
+  while (cy != 0)
+    {
+      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
+      cy -= mpn_add_n (np + lo, np + lo, dp, n);
+    }
+
+  if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD))
+    ql = mpn_sbpi1_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
+  else
+    ql = mpn_dcpi1_divappr_q_n (qp, np + hi, dp + hi, lo, dinv, tp);
+
+  if (UNLIKELY (ql != 0))
+    {
+      mp_size_t i;
+      for (i = 0; i < lo; i++)
+	qp[i] = GMP_NUMB_MASK;
+    }
+
+  return qh;
+}
+
+mp_limb_t
+mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
+		     mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
+{
+  mp_size_t qn;
+  mp_limb_t qh, cy, qsave;
+  mp_ptr tp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  ASSERT (dn >= 6);
+  ASSERT (nn > dn);
+  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
+
+  qn = nn - dn;
+  qp += qn;
+  np += nn;
+  dp += dn;
+
+  if (qn >= dn)
+    {
+      qn++;			/* pretend we'll need an extra limb */
+      /* Reduce qn mod dn without division, optimizing small operations.  */
+      do
+	qn -= dn;
+      while (qn > dn);
+
+      qp -= qn;			/* point at low limb of next quotient block */
+      np -= qn;			/* point in the middle of partial remainder */
+
+      tp = TMP_SALLOC_LIMBS (dn);
+
+      /* Perform the typically smaller block first.  */
+      if (qn == 1)
+	{
+	  mp_limb_t q, n2, n1, n0, d1, d0;
+
+	  /* Handle qh up front, for simplicity. */
+	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
+	  if (qh)
+	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
+
+	  /* A single iteration of schoolbook: One 3/2 division,
+	     followed by the bignum update and adjustment. */
+	  n2 = np[0];
+	  n1 = np[-1];
+	  n0 = np[-2];
+	  d1 = dp[-1];
+	  d0 = dp[-2];
+
+	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
+
+	  if (UNLIKELY (n2 == d1) && n1 == d0)
+	    {
+	      q = GMP_NUMB_MASK;
+	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
+	      ASSERT (cy == n2);
+	    }
+	  else
+	    {
+	      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
+
+	      if (dn > 2)
+		{
+		  mp_limb_t cy, cy1;
+		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
+
+		  cy1 = n0 < cy;
+		  n0 = (n0 - cy) & GMP_NUMB_MASK;
+		  cy = n1 < cy1;
+		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
+		  np[-2] = n0;
+
+		  if (UNLIKELY (cy != 0))
+		    {
+		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
+		      qh -= (q == 0);
+		      q = (q - 1) & GMP_NUMB_MASK;
+		    }
+		}
+	      else
+		np[-2] = n0;
+
+	      np[-1] = n1;
+	    }
+	  qp[0] = q;
+	}
+      else
+	{
+	  if (qn == 2)
+	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2);
+	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+	    qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
+	  else
+	    qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
+
+	  if (qn != dn)
+	    {
+	      if (qn > dn - qn)
+		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+	      else
+		mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+
+	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+	      if (qh != 0)
+		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+
+	      while (cy != 0)
+		{
+		  qh -= mpn_sub_1 (qp, qp, qn, 1);
+		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+		}
+	    }
+	}
+      qn = nn - dn - qn + 1;
+      while (qn > dn)
+	{
+	  qp -= dn;
+	  np -= dn;
+	  mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
+	  qn -= dn;
+	}
+
+      /* Since we pretended we'd need an extra quotient limb before, we now
+	 have made sure the code above left just dn-1=qn quotient limbs to
+	 develop.  Develop that plus a guard limb. */
+      qn--;
+      qp -= qn;
+      np -= dn;
+      qsave = qp[qn];
+      mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp);
+      MPN_COPY_INCR (qp, qp + 1, qn);
+      qp[qn] = qsave;
+    }
+  else    /* (qn < dn) */
+    {
+      mp_ptr q2p;
+#if 0				/* not possible since we demand nn > dn */
+      if (qn == 0)
+	{
+	  qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
+	  if (qh)
+	    mpn_sub_n (np - dn, np - dn, dp - dn, dn);
+	  TMP_FREE;
+	  return qh;
+	}
+#endif
+
+      qp -= qn;			/* point at low limb of next quotient block */
+      np -= qn;			/* point in the middle of partial remainder */
+
+      q2p = TMP_SALLOC_LIMBS (qn + 1);
+      /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on
+	 callers not to be silly?  */
+      if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
+	{
+	  qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1),
+				    dp - (qn + 1), qn + 1, dinv->inv32);
+	}
+      else
+	{
+	  /* It is tempting to use qp for recursive scratch and put quotient in
+	     tp, but the recursive scratch needs one limb too many.  */
+	  tp = TMP_SALLOC_LIMBS (qn + 1);
+	  qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp);
+	}
+      MPN_COPY (qp, q2p + 1, qn);
+    }
+
+  TMP_FREE;
+  return qh;
+}

diff --git a/third_party/gmp/mpn/generic/div_q.c b/third_party/gmp/mpn/generic/div_q.c
new file mode 100644
index 0000000..18c4ecf
--- /dev/null
+++ b/third_party/gmp/mpn/generic/div_q.c

@@ -0,0 +1,313 @@
+/* mpn_div_q -- division for arbitrary size operands.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2009, 2010, 2015, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Compute Q = N/D with truncation.
+     N = {np,nn}
+     D = {dp,dn}
+     Q = {qp,nn-dn+1}
+     T = {scratch,nn+1} is scratch space
+   N and D are both untouched by the computation.
+   N and T may overlap; pass the same space if N is irrelevant after the call,
+   but note that tp needs an extra limb.
+
+   Operand requirements:
+     N >= D > 0
+     dp[dn-1] != 0
+     No overlap between the N, D, and Q areas.
+
+   This division function does not clobber its input operands, since it is
+   intended to support average-O(qn) division, and for that to be effective, it
+   cannot put requirements on callers to copy a O(nn) operand.
+
+   If a caller does not care about the value of {np,nn+1} after calling this
+   function, it should pass np also for the scratch argument.  This function
+   will then save some time and space by avoiding allocation and copying.
+   (FIXME: Is this a good design?  We only really save any copying for
+   already-normalised divisors, which should be rare.  It also prevents us from
+   reasonably asking for all scratch space we need.)
+
+   We write nn-dn+1 limbs for the quotient, but return void.  Why not return
+   the most significant quotient limb?  Look at the 4 main code blocks below
+   (consisting of an outer if-else where each arm contains an if-else). It is
+   tricky for the first code block, since the mpn_*_div_q calls will typically
+   generate all nn-dn+1 and return 0 or 1.  I don't see how to fix that unless
+   we generate the most significant quotient limb here, before calling
+   mpn_*_div_q, or put the quotient in a temporary area.  Since this is a
+   critical division case (the SB sub-case in particular) copying is not a good
+   idea.
+
+   It might make sense to split the if-else parts of the (qn + FUDGE
+   >= dn) blocks into separate functions, since we could promise quite
+   different things to callers in these two cases.  The 'then' case
+   benefits from np=scratch, and it could perhaps even tolerate qp=np,
+   saving some headache for many callers.
+
+   FIXME: Scratch allocation leaves a lot to be desired.  E.g., for the MU size
+   operands, we do not reuse the huge scratch for adjustments.  This can be a
+   serious waste of memory for the largest operands.
+*/
+
+/* FUDGE determines when to try getting an approximate quotient from the upper
+   parts of the dividend and divisor, then adjust.  N.B. FUDGE must be >= 2
+   for the code to be correct.  */
+#define FUDGE 5			/* FIXME: tune this */
+
+#define DC_DIV_Q_THRESHOLD      DC_DIVAPPR_Q_THRESHOLD
+#define MU_DIV_Q_THRESHOLD      MU_DIVAPPR_Q_THRESHOLD
+#define MUPI_DIV_Q_THRESHOLD  MUPI_DIVAPPR_Q_THRESHOLD
+#ifndef MUPI_DIVAPPR_Q_THRESHOLD
+#define MUPI_DIVAPPR_Q_THRESHOLD  MUPI_DIV_QR_THRESHOLD
+#endif
+
+void
+mpn_div_q (mp_ptr qp,
+	   mp_srcptr np, mp_size_t nn,
+	   mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
+{
+  mp_ptr new_dp, new_np, tp, rp;
+  mp_limb_t cy, dh, qh;
+  mp_size_t new_nn, qn;
+  gmp_pi1_t dinv;
+  int cnt;
+  TMP_DECL;
+  TMP_MARK;
+
+  ASSERT (nn >= dn);
+  ASSERT (dn > 0);
+  ASSERT (dp[dn - 1] != 0);
+  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));
+
+  ASSERT_ALWAYS (FUDGE >= 2);
+
+  dh = dp[dn - 1];
+  if (dn == 1)
+    {
+      mpn_divrem_1 (qp, 0L, np, nn, dh);
+      return;
+    }
+
+  qn = nn - dn + 1;		/* Quotient size, high limb might be zero */
+
+  if (qn + FUDGE >= dn)
+    {
+      /* |________________________|
+                          |_______|  */
+      new_np = scratch;
+
+      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
+	{
+	  count_leading_zeros (cnt, dh);
+
+	  cy = mpn_lshift (new_np, np, nn, cnt);
+	  new_np[nn] = cy;
+	  new_nn = nn + (cy != 0);
+
+	  new_dp = TMP_ALLOC_LIMBS (dn);
+	  mpn_lshift (new_dp, dp, dn, cnt);
+
+	  if (dn == 2)
+	    {
+	      qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
+	    }
+	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
+		   BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
+	    {
+	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
+	      qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
+	    }
+	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
+		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
+		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
+		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
+	    {
+	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
+	      qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
+	    }
+	  else
+	    {
+	      mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
+	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+	      qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
+	    }
+	  if (cy == 0)
+	    qp[qn - 1] = qh;
+	  else
+	    ASSERT (qh == 0);
+	}
+      else  /* divisor is already normalised */
+	{
+	  if (new_np != np)
+	    MPN_COPY (new_np, np, nn);
+
+	  if (dn == 2)
+	    {
+	      qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
+	    }
+	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
+		   BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
+	    {
+	      invert_pi1 (dinv, dh, dp[dn - 2]);
+	      qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
+	    }
+	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
+		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
+		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
+		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
+	    {
+	      invert_pi1 (dinv, dh, dp[dn - 2]);
+	      qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
+	    }
+	  else
+	    {
+	      mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
+	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+	      qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
+	    }
+	  qp[nn - dn] = qh;
+	}
+    }
+  else
+    {
+      /* |________________________|
+                |_________________|  */
+      tp = TMP_ALLOC_LIMBS (qn + 1);
+
+      new_np = scratch;
+      new_nn = 2 * qn + 1;
+      if (new_np == np)
+	/* We need {np,nn} to remain untouched until the final adjustment, so
+	   we need to allocate separate space for new_np.  */
+	new_np = TMP_ALLOC_LIMBS (new_nn + 1);
+
+
+      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
+	{
+	  count_leading_zeros (cnt, dh);
+
+	  cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
+	  new_np[new_nn] = cy;
+
+	  new_nn += (cy != 0);
+
+	  new_dp = TMP_ALLOC_LIMBS (qn + 1);
+	  mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
+	  new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
+
+	  if (qn + 1 == 2)
+	    {
+	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
+	    }
+	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
+	    {
+	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
+	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
+	    }
+	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
+	    {
+	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
+	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
+	    }
+	  else
+	    {
+	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
+	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
+	    }
+	  if (cy == 0)
+	    tp[qn] = qh;
+	  else if (UNLIKELY (qh != 0))
+	    {
+	      /* This happens only when the quotient is close to B^n and
+		 mpn_*_divappr_q returned B^n.  */
+	      mp_size_t i, n;
+	      n = new_nn - (qn + 1);
+	      for (i = 0; i < n; i++)
+		tp[i] = GMP_NUMB_MAX;
+	      qh = 0;		/* currently ignored */
+	    }
+	}
+      else  /* divisor is already normalised */
+	{
+	  MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless if MU will be used */
+
+	  new_dp = (mp_ptr) dp + dn - (qn + 1);
+
+	  if (qn == 2 - 1)
+	    {
+	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
+	    }
+	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
+	    {
+	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
+	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
+	    }
+	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
+	    {
+	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
+	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
+	    }
+	  else
+	    {
+	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
+	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
+	    }
+	  tp[qn] = qh;
+	}
+
+      MPN_COPY (qp, tp + 1, qn);
+      if (tp[0] <= 4)
+        {
+	  mp_size_t rn;
+
+          rp = TMP_ALLOC_LIMBS (dn + qn);
+          mpn_mul (rp, dp, dn, tp + 1, qn);
+	  rn = dn + qn;
+	  rn -= rp[rn - 1] == 0;
+
+          if (rn > nn || mpn_cmp (np, rp, nn) < 0)
+            MPN_DECR_U (qp, qn, 1);
+        }
+    }
+
+  TMP_FREE;
+}

diff --git a/third_party/gmp/mpn/generic/div_qr_1.c b/third_party/gmp/mpn/generic/div_qr_1.c
new file mode 100644
index 0000000..8f80d37
--- /dev/null
+++ b/third_party/gmp/mpn/generic/div_qr_1.c

@@ -0,0 +1,125 @@
+/* mpn_div_qr_1 -- mpn by limb division.
+
+   Contributed to the GNU project by Niels Möller and Torbjörn Granlund
+
+Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003, 2013 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef DIV_QR_1_NORM_THRESHOLD
+#define DIV_QR_1_NORM_THRESHOLD 3
+#endif
+#ifndef DIV_QR_1_UNNORM_THRESHOLD
+#define DIV_QR_1_UNNORM_THRESHOLD 3
+#endif
+
+#if GMP_NAIL_BITS > 0
+#error Nail bits not supported
+#endif
+
+/* Divides {up, n} by d. Writes the n-1 low quotient limbs at {qp,
+ * n-1}, and the high quotient limb at *qh. Returns remainder. */
+mp_limb_t
+mpn_div_qr_1 (mp_ptr qp, mp_limb_t *qh, mp_srcptr up, mp_size_t n,
+	      mp_limb_t d)
+{
+  unsigned cnt;
+  mp_limb_t uh;
+
+  ASSERT (n > 0);
+  ASSERT (d > 0);
+
+  if (d & GMP_NUMB_HIGHBIT)
+    {
+      /* Normalized case */
+      mp_limb_t dinv, q;
+
+      uh = up[--n];
+
+      q = (uh >= d);
+      *qh = q;
+      uh -= (-q) & d;
+
+      if (BELOW_THRESHOLD (n, DIV_QR_1_NORM_THRESHOLD))
+	{
+	  cnt = 0;
+	plain:
+	  while (n > 0)
+	    {
+	      mp_limb_t ul = up[--n];
+	      udiv_qrnnd (qp[n], uh, uh, ul, d);
+	    }
+	  return uh >> cnt;
+	}
+      invert_limb (dinv, d);
+      return mpn_div_qr_1n_pi1 (qp, up, n, uh, d, dinv);
+    }
+  else
+    {
+      /* Unnormalized case */
+      mp_limb_t dinv, ul;
+
+      if (! UDIV_NEEDS_NORMALIZATION
+	  && BELOW_THRESHOLD (n, DIV_QR_1_UNNORM_THRESHOLD))
+	{
+	  uh = up[--n];
+	  udiv_qrnnd (*qh, uh, CNST_LIMB(0), uh, d);
+	  cnt = 0;
+	  goto plain;
+	}
+
+      count_leading_zeros (cnt, d);
+      d <<= cnt;
+
+#if HAVE_NATIVE_mpn_div_qr_1u_pi1
+      /* FIXME: Call loop doing on-the-fly normalization */
+#endif
+
+      /* Shift up front, use qp area for shifted copy. A bit messy,
+	 since we have only n-1 limbs available, and shift the high
+	 limb manually. */
+      uh = up[--n];
+      ul = (uh << cnt) | mpn_lshift (qp, up, n, cnt);
+      uh >>= (GMP_LIMB_BITS - cnt);
+
+      if (UDIV_NEEDS_NORMALIZATION
+	  && BELOW_THRESHOLD (n, DIV_QR_1_UNNORM_THRESHOLD))
+	{
+	  udiv_qrnnd (*qh, uh, uh, ul, d);
+	  up = qp;
+	  goto plain;
+	}
+      invert_limb (dinv, d);
+
+      udiv_qrnnd_preinv (*qh, uh, uh, ul, d, dinv);
+      return mpn_div_qr_1n_pi1 (qp, qp, n, uh, d, dinv) >> cnt;
+    }
+}

diff --git a/third_party/gmp/mpn/generic/div_qr_1n_pi1.c b/third_party/gmp/mpn/generic/div_qr_1n_pi1.c
new file mode 100644
index 0000000..5c32810
--- /dev/null
+++ b/third_party/gmp/mpn/generic/div_qr_1n_pi1.c

@@ -0,0 +1,277 @@
+/* mpn_div_qr_1n_pi1
+
+   Contributed to the GNU project by Niels Möller
+
+   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if GMP_NAIL_BITS > 0
+#error Nail bits not supported
+#endif
+
+#ifndef DIV_QR_1N_METHOD
+#define DIV_QR_1N_METHOD 2
+#endif
+
+/* FIXME: Duplicated in mod_1_1.c. Move to gmp-impl.h */
+
+#if defined (__GNUC__) && ! defined (NO_ASM)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ (  "add	%6, %k2\n\t"					\
+	     "adc	%4, %k1\n\t"					\
+	     "sbb	%k0, %k0"					\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
+	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ (  "add	%6, %q2\n\t"					\
+	     "adc	%4, %q1\n\t"					\
+	     "sbb	%q0, %q0"					\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
+	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
+	     "addxcc	%r3, %4, %1\n\t"				\
+	     "subx	%%g0, %%g0, %0"					\
+	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
+	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
+	 __CLOBBER_CC)
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
+	     "addccc	%r7, %8, %%g0\n\t"				\
+	     "addccc	%r3, %4, %1\n\t"				\
+	     "clr	%0\n\t"						\
+	     "movcs	%%xcc, -1, %0"					\
+	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
+	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl),		\
+	     "rJ" ((al) >> 32), "rI" ((bl) >> 32)			\
+	 __CLOBBER_CC)
+#if __VIS__ >= 0x300
+#undef add_mssaaaa
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
+	     "addxccc	%r3, %4, %1\n\t"				\
+	     "clr	%0\n\t"						\
+	     "movcs	%%xcc, -1, %0"					\
+	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
+	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
+	 __CLOBBER_CC)
+#endif
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+   processor running in 32-bit mode, since the carry flag then gets the 32-bit
+   carry.  */
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ (  "add%I6c	%2, %5, %6\n\t"					\
+	     "adde	%1, %3, %4\n\t"					\
+	     "subfe	%0, %0, %0\n\t"					\
+	     "nor	%0, %0, %0"					\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0)			\
+	   __CLOBBER_CC)
+#endif
+
+#if defined (__s390x__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ (  "algr	%2, %6\n\t"					\
+	     "alcgr	%1, %4\n\t"					\
+	     "lghi	%0, 0\n\t"					\
+	     "alcgr	%0, %0\n\t"					\
+	     "lcgr	%0, %0"						\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "1"  ((UDItype)(a1)), "r" ((UDItype)(b1)),			\
+	     "%2" ((UDItype)(a0)), "r" ((UDItype)(b0)) __CLOBBER_CC)
+#endif
+
+#if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (  "adds	%2, %5, %6\n\t"					\
+	     "adcs	%1, %3, %4\n\t"					\
+	     "movcc	%0, #0\n\t"					\
+	     "movcs	%0, #-1"					\
+	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
+	   : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
+#endif
+#endif /* defined (__GNUC__) */
+
+#ifndef add_mssaaaa
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  do {									\
+    UWtype __s0, __s1, __c0, __c1;					\
+    __s0 = (a0) + (b0);							\
+    __s1 = (a1) + (b1);							\
+    __c0 = __s0 < (a0);							\
+    __c1 = __s1 < (a1);							\
+    (s0) = __s0;							\
+    __s1 = __s1 + __c0;							\
+    (s1) = __s1;							\
+    (m) = - (__c1 + (__s1 < __c0));					\
+  } while (0)
+#endif
+
+#if DIV_QR_1N_METHOD == 1
+
+/* Divides (uh B^n + {up, n}) by d, storing the quotient at {qp, n}.
+   Requires that uh < d. */
+mp_limb_t
+mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t uh,
+		   mp_limb_t d, mp_limb_t dinv)
+{
+  ASSERT (n > 0);
+  ASSERT (uh < d);
+  ASSERT (d & GMP_NUMB_HIGHBIT);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (qp, up, n));
+
+  do
+    {
+      mp_limb_t q, ul;
+
+      ul = up[--n];
+      udiv_qrnnd_preinv (q, uh, uh, ul, d, dinv);
+      qp[n] = q;
+    }
+  while (n > 0);
+
+  return uh;
+}
+
+#elif DIV_QR_1N_METHOD == 2
+
+mp_limb_t
+mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t u1,
+		   mp_limb_t d, mp_limb_t dinv)
+{
+  mp_limb_t B2;
+  mp_limb_t u0, u2;
+  mp_limb_t q0, q1;
+  mp_limb_t p0, p1;
+  mp_limb_t t;
+  mp_size_t j;
+
+  ASSERT (d & GMP_LIMB_HIGHBIT);
+  ASSERT (n > 0);
+  ASSERT (u1 < d);
+
+  if (n == 1)
+    {
+      udiv_qrnnd_preinv (qp[0], u1, u1, up[0], d, dinv);
+      return u1;
+    }
+
+  /* FIXME: Could be precomputed */
+  B2 = -d*dinv;
+
+  umul_ppmm (q1, q0, dinv, u1);
+  umul_ppmm (p1, p0, B2, u1);
+  q1 += u1;
+  ASSERT (q1 >= u1);
+  u0 = up[n-1];	/* Early read, to allow qp == up. */
+  qp[n-1] = q1;
+
+  add_mssaaaa (u2, u1, u0, u0, up[n-2], p1, p0);
+
+  /* FIXME: Keep q1 in a variable between iterations, to reduce number
+     of memory accesses. */
+  for (j = n-2; j-- > 0; )
+    {
+      mp_limb_t q2, cy;
+
+      /* Additions for the q update:
+       *	+-------+
+       *        |u1 * v |
+       *        +---+---+
+       *        | u1|
+       *    +---+---+
+       *    | 1 | v |  (conditional on u2)
+       *    +---+---+
+       *        | 1 |  (conditional on u0 + u2 B2 carry)
+       *        +---+
+       * +      | q0|
+       *   -+---+---+---+
+       *    | q2| q1| q0|
+       *    +---+---+---+
+      */
+      umul_ppmm (p1, t, u1, dinv);
+      add_ssaaaa (q2, q1, -u2, u2 & dinv, CNST_LIMB(0), u1);
+      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), p1);
+      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), q0);
+      q0 = t;
+
+      umul_ppmm (p1, p0, u1, B2);
+      ADDC_LIMB (cy, u0, u0, u2 & B2);
+      u0 -= (-cy) & d;
+
+      /* Final q update */
+      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), cy);
+      qp[j+1] = q1;
+      MPN_INCR_U (qp+j+2, n-j-2, q2);
+
+      add_mssaaaa (u2, u1, u0, u0, up[j], p1, p0);
+    }
+
+  q1 = (u2 > 0);
+  u1 -= (-q1) & d;
+
+  t = (u1 >= d);
+  q1 += t;
+  u1 -= (-t) & d;
+
+  udiv_qrnnd_preinv (t, u0, u1, u0, d, dinv);
+  add_ssaaaa (q1, q0, q1, q0, CNST_LIMB(0), t);
+
+  MPN_INCR_U (qp+1, n-1, q1);
+
+  qp[0] = q0;
+  return u0;
+}
+
+#else
+#error Unknown DIV_QR_1N_METHOD
+#endif

diff --git a/third_party/gmp/mpn/generic/div_qr_1n_pi2.c b/third_party/gmp/mpn/generic/div_qr_1n_pi2.c
new file mode 100644
index 0000000..d8834ea
--- /dev/null
+++ b/third_party/gmp/mpn/generic/div_qr_1n_pi2.c

@@ -0,0 +1,203 @@
+/* mpn_div_qr_1n_pi2.
+
+   THIS FILE CONTAINS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2013, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* ISSUES:
+
+   * Can we really use the high pi2 inverse limb for udiv_qrnnd_preinv?
+
+   * Are there any problems with generating n quotient limbs in the q area?  It
+     surely simplifies things.
+
+   * Not yet adequately tested.
+*/
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Define some longlong.h-style macros, but for wider operations.
+   * add_sssaaaa is like longlong.h's add_ssaaaa but propagating carry-out into
+     an additional sum operand.
+*/
+#if defined (__GNUC__)  && ! defined (__INTEL_COMPILER) && ! defined (NO_ASM)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0"		\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "0"  ((USItype)(s2)),					\
+	     "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
+	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if defined (__amd64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0"		\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "0"  ((UDItype)(s2)),					\
+	     "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
+	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("adds\t%2, %x6, %7\n\tadcs\t%1, %x4, %x5\n\tadc\t%0, %3, xzr"\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "rZ" (s2), "%rZ"  (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0)	\
+	     __CLOBBER_CC)
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+   processor running in 32-bit mode, since the carry flag then gets the 32-bit
+   carry.  */
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%3"	\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "r"  (s2), "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0)	\
+	     __CLOBBER_CC)
+#endif
+
+#endif /* __GNUC__ */
+
+#ifndef add_sssaaaa
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  do {									\
+    UWtype __s0, __s1, __c0, __c1;					\
+    __s0 = (a0) + (b0);							\
+    __s1 = (a1) + (b1);							\
+    __c0 = __s0 < (a0);							\
+    __c1 = __s1 < (a1);							\
+    (s0) = __s0;							\
+    __s1 = __s1 + __c0;							\
+    (s1) = __s1;							\
+    (s2) += __c1 + (__s1 < __c0);					\
+  } while (0)
+#endif
+
+struct precomp_div_1_pi2
+{
+  mp_limb_t dip[2];
+  mp_limb_t d;
+  int norm_cnt;
+};
+
+mp_limb_t
+mpn_div_qr_1n_pi2 (mp_ptr qp,
+		   mp_srcptr up, mp_size_t un,
+		   struct precomp_div_1_pi2 *pd)
+{
+  mp_limb_t most_significant_q_limb;
+  mp_size_t i;
+  mp_limb_t r, u2, u1, u0;
+  mp_limb_t d0, di1, di0;
+  mp_limb_t q3a, q2a, q2b, q1b, q2c, q1c, q1d, q0d;
+  mp_limb_t cnd;
+
+  ASSERT (un >= 2);
+  ASSERT ((pd->d & GMP_NUMB_HIGHBIT) != 0);
+  ASSERT (! MPN_OVERLAP_P (qp, un-2, up, un) || qp+2 >= up);
+  ASSERT_MPN (up, un);
+
+#define q3 q3a
+#define q2 q2b
+#define q1 q1b
+
+  up += un - 3;
+  r = up[2];
+  d0 = pd->d;
+
+  most_significant_q_limb = (r >= d0);
+  r -= d0 & -most_significant_q_limb;
+
+  qp += un - 3;
+  qp[2] = most_significant_q_limb;
+
+  di1 = pd->dip[1];
+  di0 = pd->dip[0];
+
+  for (i = un - 3; i >= 0; i -= 2)
+    {
+      u2 = r;
+      u1 = up[1];
+      u0 = up[0];
+
+      /* Dividend in {r,u1,u0} */
+
+      umul_ppmm (q1d,q0d, u1, di0);
+      umul_ppmm (q2b,q1b, u1, di1);
+      q2b++;				/* cannot spill */
+      add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);
+
+      umul_ppmm (q2c,q1c, u2,  di0);
+      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
+      umul_ppmm (q3a,q2a, u2, di1);
+
+      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);
+
+      q3 += r;
+
+      r = u0 - q2 * d0;
+
+      cnd = (r >= q1);
+      r += d0 & -cnd;
+      sub_ddmmss (q3,q2,  q3,q2,  0,cnd);
+
+      if (UNLIKELY (r >= d0))
+	{
+	  r -= d0;
+	  add_ssaaaa (q3,q2,  q3,q2,  0,1);
+	}
+
+      qp[0] = q2;
+      qp[1] = q3;
+
+      up -= 2;
+      qp -= 2;
+    }
+
+  if ((un & 1) == 0)
+    {
+      u2 = r;
+      u1 = up[1];
+
+      udiv_qrnnd_preinv (q3, r, u2, u1, d0, di1);
+      qp[1] = q3;
+    }
+
+  return r;
+
+#undef q3
+#undef q2
+#undef q1
+}

diff --git a/third_party/gmp/mpn/generic/div_qr_1u_pi2.c b/third_party/gmp/mpn/generic/div_qr_1u_pi2.c
new file mode 100644
index 0000000..047662f
--- /dev/null
+++ b/third_party/gmp/mpn/generic/div_qr_1u_pi2.c

@@ -0,0 +1,235 @@
+/* mpn_div_qr_1u_pi2.
+
+   THIS FILE CONTAINS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2013, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* ISSUES:
+
+   * Can we really use the high pi2 inverse limb for udiv_qrnnd_preinv?
+
+   * Are there any problems with generating n quotient limbs in the q area?  It
+     surely simplifies things.
+
+   * Not yet adequately tested.
+*/
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Define some longlong.h-style macros, but for wider operations.
+   * add_sssaaaa is like longlong.h's add_ssaaaa but propagating carry-out into
+     an additional sum operand.
+*/
+#if defined (__GNUC__)  && ! defined (__INTEL_COMPILER) && ! defined (NO_ASM)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0"		\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "0"  ((USItype)(s2)),					\
+	     "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
+	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if defined (__amd64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0"		\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "0"  ((UDItype)(s2)),					\
+	     "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
+	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("adds\t%2, %x6, %7\n\tadcs\t%1, %x4, %x5\n\tadc\t%0, %3, xzr"\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "rZ" (s2), "%rZ"  (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0) __CLOBBER_CC)
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+   processor running in 32-bit mode, since the carry flag then gets the 32-bit
+   carry.  */
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%3"	\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "r"  (s2), "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0)	\
+	     __CLOBBER_CC)
+#endif
+
+#endif /* __GNUC__ */
+
+#ifndef add_sssaaaa
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  do {									\
+    UWtype __s0, __s1, __c0, __c1;					\
+    __s0 = (a0) + (b0);							\
+    __s1 = (a1) + (b1);							\
+    __c0 = __s0 < (a0);							\
+    __c1 = __s1 < (a1);							\
+    (s0) = __s0;							\
+    __s1 = __s1 + __c0;							\
+    (s1) = __s1;							\
+    (s2) += __c1 + (__s1 < __c0);					\
+  } while (0)
+#endif
+
+struct precomp_div_1_pi2
+{
+  mp_limb_t dip[2];
+  mp_limb_t d;
+  int norm_cnt;
+};
+
+mp_limb_t
+mpn_div_qr_1u_pi2 (mp_ptr qp,
+		   mp_srcptr up, mp_size_t un,
+		   struct precomp_div_1_pi2 *pd)
+{
+  mp_size_t i;
+  mp_limb_t r, u2, u1, u0;
+  mp_limb_t d0, di1, di0;
+  mp_limb_t q3a, q2a, q2b, q1b, q2c, q1c, q1d, q0d;
+  mp_limb_t cnd;
+  int cnt;
+
+  ASSERT (un >= 2);
+  ASSERT ((pd->d & GMP_NUMB_HIGHBIT) == 0);
+  ASSERT (! MPN_OVERLAP_P (qp, un-2, up, un) || qp+2 >= up);
+  ASSERT_MPN (up, un);
+
+#define q3 q3a
+#define q2 q2b
+#define q1 q1b
+
+  up += un - 3;
+  cnt = pd->norm_cnt;
+  r = up[2] >> (GMP_NUMB_BITS - cnt);
+  d0 = pd->d << cnt;
+
+  qp += un - 2;
+
+  di1 = pd->dip[1];
+  di0 = pd->dip[0];
+
+  for (i = un - 3; i >= 0; i -= 2)
+    {
+      u2 = r;
+      u1 = (up[2] << cnt) | (up[1] >> (GMP_NUMB_BITS - cnt));
+      u0 = (up[1] << cnt) | (up[0] >> (GMP_NUMB_BITS - cnt));
+
+      /* Dividend in {r,u1,u0} */
+
+      umul_ppmm (q1d,q0d, u1, di0);
+      umul_ppmm (q2b,q1b, u1, di1);
+      q2b++;				/* cannot spill */
+      add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);
+
+      umul_ppmm (q2c,q1c, u2,  di0);
+      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
+      umul_ppmm (q3a,q2a, u2, di1);
+
+      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);
+
+      q3 += r;
+
+      r = u0 - q2 * d0;
+
+      cnd = (r >= q1);
+      r += d0 & -cnd;
+      sub_ddmmss (q3,q2,  q3,q2,  0,cnd);
+
+      if (UNLIKELY (r >= d0))
+	{
+	  r -= d0;
+	  add_ssaaaa (q3,q2,  q3,q2,  0,1);
+	}
+
+      qp[0] = q2;
+      qp[1] = q3;
+
+      up -= 2;
+      qp -= 2;
+    }
+
+  if ((un & 1) != 0)
+    {
+      u2 = r;
+      u1 = (up[2] << cnt);
+
+      udiv_qrnnd_preinv (q3, r, u2, u1, d0, di1);
+      qp[1] = q3;
+    }
+  else
+    {
+      u2 = r;
+      u1 = (up[2] << cnt) | (up[1] >> (GMP_NUMB_BITS - cnt));
+      u0 = (up[1] << cnt);
+
+      /* Dividend in {r,u1,u0} */
+
+      umul_ppmm (q1d,q0d, u1, di0);
+      umul_ppmm (q2b,q1b, u1, di1);
+      q2b++;				/* cannot spill */
+      add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);
+
+      umul_ppmm (q2c,q1c, u2,  di0);
+      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
+      umul_ppmm (q3a,q2a, u2, di1);
+
+      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);
+
+      q3 += r;
+
+      r = u0 - q2 * d0;
+
+      cnd = (r >= q1);
+      r += d0 & -cnd;
+      sub_ddmmss (q3,q2,  q3,q2,  0,cnd);
+
+      if (UNLIKELY (r >= d0))
+	{
+	  r -= d0;
+	  add_ssaaaa (q3,q2,  q3,q2,  0,1);
+	}
+
+      qp[0] = q2;
+      qp[1] = q3;
+    }
+
+  return r >> cnt;
+
+#undef q3
+#undef q2
+#undef q1
+}

diff --git a/third_party/gmp/mpn/generic/div_qr_2.c b/third_party/gmp/mpn/generic/div_qr_2.c
new file mode 100644
index 0000000..f7add44
--- /dev/null
+++ b/third_party/gmp/mpn/generic/div_qr_2.c

@@ -0,0 +1,314 @@
+/* mpn_div_qr_2 -- Divide natural numbers, producing both remainder and
+   quotient.  The divisor is two limbs.
+
+   Contributed to the GNU project by Torbjorn Granlund and Niels Möller
+
+   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 1993-1996, 1999-2002, 2011, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef DIV_QR_2_PI2_THRESHOLD
+/* Disabled unless explicitly tuned. */
+#define DIV_QR_2_PI2_THRESHOLD MP_LIMB_T_MAX
+#endif
+
+#ifndef SANITY_CHECK
+#define SANITY_CHECK 0
+#endif
+
+/* Define some longlong.h-style macros, but for wider operations.
+   * add_sssaaaa is like longlong.h's add_ssaaaa but propagating carry-out into
+     an additional sum operand.
+   * add_csaac accepts two addends and a carry in, and generates a sum and a
+     carry out.  A little like a "full adder".
+*/
+#if defined (__GNUC__)  && ! defined (__INTEL_COMPILER) && ! defined (NO_ASM)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0"		\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "0"  ((USItype)(s2)),					\
+	     "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
+	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if defined (__amd64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0"		\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "0"  ((UDItype)(s2)),					\
+	     "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
+	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("adds\t%2, %x6, %7\n\tadcs\t%1, %x4, %x5\n\tadc\t%0, %3, xzr"\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "rZ" (s2), "%rZ"  (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0)	\
+	     __CLOBBER_CC)
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+   processor running in 32-bit mode, since the carry flag then gets the 32-bit
+   carry.  */
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%3"	\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "r"  (s2), "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0)	\
+	     __CLOBBER_CC)
+#endif
+
+#endif /* __GNUC__ */
+
+#ifndef add_sssaaaa
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  do {									\
+    UWtype __s0, __s1, __c0, __c1;					\
+    __s0 = (a0) + (b0);							\
+    __s1 = (a1) + (b1);							\
+    __c0 = __s0 < (a0);							\
+    __c1 = __s1 < (a1);							\
+    (s0) = __s0;							\
+    __s1 = __s1 + __c0;							\
+    (s1) = __s1;							\
+    (s2) += __c1 + (__s1 < __c0);					\
+  } while (0)
+#endif
+
+/* Typically used with r1, r0 same as n3, n2. Other types of overlap
+   between inputs and outputs are not supported. */
+#define udiv_qr_4by2(q1,q0, r1,r0, n3,n2,n1,n0, d1,d0, di1,di0)		\
+  do {									\
+    mp_limb_t _q3, _q2a, _q2, _q1, _q2c, _q1c, _q1d, _q0;		\
+    mp_limb_t _t1, _t0;							\
+    mp_limb_t _mask;							\
+									\
+    /* [q3,q2,q1,q0] = [n3,n2]*[di1,di0] + [n3,n2,n1,n0] + [0,1,0,0] */	\
+    umul_ppmm (_q2,_q1, n2, di1);					\
+    umul_ppmm (_q3,_q2a, n3, di1);					\
+    ++_q2;	/* _q2 cannot overflow */				\
+    add_ssaaaa (_q3,_q2, _q3,_q2, n3,_q2a);				\
+    umul_ppmm (_q2c,_q1c, n3, di0);					\
+    add_sssaaaa (_q3,_q2,_q1, _q2,_q1, n2,_q1c);			\
+    umul_ppmm (_q1d,_q0, n2, di0);					\
+    add_sssaaaa (_q2c,_q1,_q0, _q1,_q0, n1,n0); /* _q2c cannot overflow */ \
+    add_sssaaaa (_q3,_q2,_q1, _q2,_q1, _q2c,_q1d);			\
+									\
+    umul_ppmm (_t1,_t0, _q2, d0);					\
+    _t1 += _q2 * d1 + _q3 * d0;						\
+									\
+    sub_ddmmss (r1, r0, n1, n0, _t1, _t0);				\
+									\
+    _mask = -(mp_limb_t) ((r1 >= _q1) & ((r1 > _q1) | (r0 >= _q0)));  /* (r1,r0) >= (q1,q0) */  \
+    add_ssaaaa (r1, r0, r1, r0, d1 & _mask, d0 & _mask);		\
+    sub_ddmmss (_q3, _q2, _q3, _q2, CNST_LIMB(0), -_mask);		\
+									\
+    if (UNLIKELY (r1 >= d1))						\
+      {									\
+	if (r1 > d1 || r0 >= d0)					\
+	  {								\
+	    sub_ddmmss (r1, r0, r1, r0, d1, d0);			\
+	    add_ssaaaa (_q3, _q2, _q3, _q2, CNST_LIMB(0), CNST_LIMB(1));\
+	  }								\
+      }									\
+    (q1) = _q3;								\
+    (q0) = _q2;								\
+  } while (0)
+
+static void
+invert_4by2 (mp_ptr di, mp_limb_t d1, mp_limb_t d0)
+{
+  mp_limb_t v1, v0, p1, t1, t0, p0, mask;
+  invert_limb (v1, d1);
+  p1 = d1 * v1;
+  /* <1, v1> * d1 = <B-1, p1> */
+  p1 += d0;
+  if (p1 < d0)
+    {
+      v1--;
+      mask = -(mp_limb_t) (p1 >= d1);
+      p1 -= d1;
+      v1 += mask;
+      p1 -= mask & d1;
+    }
+  /* <1, v1> * d1 + d0 = <B-1, p1> */
+  umul_ppmm (t1, p0, d0, v1);
+  p1 += t1;
+  if (p1 < t1)
+    {
+      if (UNLIKELY (p1 >= d1))
+	{
+	  if (p1 > d1 || p0 >= d0)
+	    {
+	      sub_ddmmss (p1, p0, p1, p0, d1, d0);
+	      v1--;
+	    }
+	}
+      sub_ddmmss (p1, p0, p1, p0, d1, d0);
+      v1--;
+    }
+  /* Now v1 is the 3/2 inverse, <1, v1> * <d1, d0> = <B-1, p1, p0>,
+   * with <p1, p0> + <d1, d0> >= B^2.
+   *
+   * The 4/2 inverse is (B^4 - 1) / <d1, d0> = <1, v1, v0>. The
+   * partial remainder after <1, v1> is
+   *
+   * B^4 - 1 - B <1, v1> <d1, d0> = <B-1, B-1, B-1, B-1> - <B-1, p1, p0, 0>
+   *                              = <~p1, ~p0, B-1>
+   */
+  udiv_qr_3by2 (v0, t1, t0, ~p1, ~p0, MP_LIMB_T_MAX, d1, d0, v1);
+  di[0] = v0;
+  di[1] = v1;
+
+#if SANITY_CHECK
+  {
+    mp_limb_t tp[4];
+    mp_limb_t dp[2];
+    dp[0] = d0;
+    dp[1] = d1;
+    mpn_mul_n (tp, dp, di, 2);
+    ASSERT_ALWAYS (mpn_add_n (tp+2, tp+2, dp, 2) == 0);
+    ASSERT_ALWAYS (tp[2] == MP_LIMB_T_MAX);
+    ASSERT_ALWAYS (tp[3] == MP_LIMB_T_MAX);
+    ASSERT_ALWAYS (mpn_add_n (tp, tp, dp, 2) == 1);
+  }
+#endif
+}
+
+static mp_limb_t
+mpn_div_qr_2n_pi2 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+		   mp_limb_t d1, mp_limb_t d0, mp_limb_t di1, mp_limb_t di0)
+{
+  mp_limb_t qh;
+  mp_size_t i;
+  mp_limb_t r1, r0;
+
+  ASSERT (nn >= 2);
+  ASSERT (d1 & GMP_NUMB_HIGHBIT);
+
+  r1 = np[nn-1];
+  r0 = np[nn-2];
+
+  qh = 0;
+  if (r1 >= d1 && (r1 > d1 || r0 >= d0))
+    {
+#if GMP_NAIL_BITS == 0
+      sub_ddmmss (r1, r0, r1, r0, d1, d0);
+#else
+      r0 = r0 - d0;
+      r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
+      r0 &= GMP_NUMB_MASK;
+#endif
+      qh = 1;
+    }
+
+  for (i = nn - 2; i >= 2; i -= 2)
+    {
+      mp_limb_t n1, n0, q1, q0;
+      n1 = np[i-1];
+      n0 = np[i-2];
+      udiv_qr_4by2 (q1, q0, r1, r0, r1, r0, n1, n0, d1, d0, di1, di0);
+      qp[i-1] = q1;
+      qp[i-2] = q0;
+    }
+
+  if (i > 0)
+    {
+      mp_limb_t q;
+      udiv_qr_3by2 (q, r1, r0, r1, r0, np[0], d1, d0, di1);
+      qp[0] = q;
+    }
+  rp[1] = r1;
+  rp[0] = r0;
+
+  return qh;
+}
+
+
+/* Divide num {np,nn} by den {dp,2} and write the nn-2 least
+   significant quotient limbs at qp and the 2 long remainder at np.
+   Return the most significant limb of the quotient.
+
+   Preconditions:
+   1. qp must either not overlap with the other operands at all, or
+      qp >= np + 2 must hold true.  (This means that it's possible to put
+      the quotient in the high part of {np,nn}, right above the remainder.)
+   2. nn >= 2.  */
+
+mp_limb_t
+mpn_div_qr_2 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+	      mp_srcptr dp)
+{
+  mp_limb_t d1;
+  mp_limb_t d0;
+  gmp_pi1_t dinv;
+
+  ASSERT (nn >= 2);
+  ASSERT (! MPN_OVERLAP_P (qp, nn-2, np, nn) || qp >= np + 2);
+  ASSERT_MPN (np, nn);
+  ASSERT_MPN (dp, 2);
+
+  d1 = dp[1]; d0 = dp[0];
+
+  ASSERT (d1 > 0);
+
+  if (UNLIKELY (d1 & GMP_NUMB_HIGHBIT))
+    {
+      if (BELOW_THRESHOLD (nn, DIV_QR_2_PI2_THRESHOLD))
+	{
+	  gmp_pi1_t dinv;
+	  invert_pi1 (dinv, d1, d0);
+	  return mpn_div_qr_2n_pi1 (qp, rp, np, nn, d1, d0, dinv.inv32);
+	}
+      else
+	{
+	  mp_limb_t di[2];
+	  invert_4by2 (di, d1, d0);
+	  return mpn_div_qr_2n_pi2 (qp, rp, np, nn, d1, d0, di[1], di[0]);
+	}
+    }
+  else
+    {
+      int shift;
+      count_leading_zeros (shift, d1);
+      d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
+      d0 <<= shift;
+      invert_pi1 (dinv, d1, d0);
+      return mpn_div_qr_2u_pi1 (qp, rp, np, nn, d1, d0, shift, dinv.inv32);
+    }
+}

diff --git a/third_party/gmp/mpn/generic/div_qr_2n_pi1.c b/third_party/gmp/mpn/generic/div_qr_2n_pi1.c
new file mode 100644
index 0000000..131a811
--- /dev/null
+++ b/third_party/gmp/mpn/generic/div_qr_2n_pi1.c

@@ -0,0 +1,84 @@
+/* mpn_div_qr_2n_pi1
+
+   Contributed to the GNU project by Torbjorn Granlund and Niels Möller
+
+   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 1993-1996, 1999-2002, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* 3/2 loop, for normalized divisor */
+mp_limb_t
+mpn_div_qr_2n_pi1 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+		   mp_limb_t d1, mp_limb_t d0, mp_limb_t di)
+{
+  mp_limb_t qh;
+  mp_size_t i;
+  mp_limb_t r1, r0;
+
+  ASSERT (nn >= 2);
+  ASSERT (d1 & GMP_NUMB_HIGHBIT);
+
+  np += nn - 2;
+  r1 = np[1];
+  r0 = np[0];
+
+  qh = 0;
+  if (r1 >= d1 && (r1 > d1 || r0 >= d0))
+    {
+#if GMP_NAIL_BITS == 0
+      sub_ddmmss (r1, r0, r1, r0, d1, d0);
+#else
+      r0 = r0 - d0;
+      r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
+      r0 &= GMP_NUMB_MASK;
+#endif
+      qh = 1;
+    }
+
+  for (i = nn - 2 - 1; i >= 0; i--)
+    {
+      mp_limb_t n0, q;
+      n0 = np[-1];
+      udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di);
+      np--;
+      qp[i] = q;
+    }
+
+  rp[1] = r1;
+  rp[0] = r0;
+
+  return qh;
+}

diff --git a/third_party/gmp/mpn/generic/div_qr_2u_pi1.c b/third_party/gmp/mpn/generic/div_qr_2u_pi1.c
new file mode 100644
index 0000000..70e617b
--- /dev/null
+++ b/third_party/gmp/mpn/generic/div_qr_2u_pi1.c

@@ -0,0 +1,76 @@
+/* mpn_div_qr_2u_pi1
+
+   Contributed to the GNU project by Niels Möller
+
+   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* 3/2 loop, for unnormalized divisor. Caller must pass shifted d1 and
+   d0, while {np,nn} is shifted on the fly. */
+mp_limb_t
+mpn_div_qr_2u_pi1 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+		   mp_limb_t d1, mp_limb_t d0, int shift, mp_limb_t di)
+{
+  mp_limb_t qh;
+  mp_limb_t r2, r1, r0;
+  mp_size_t i;
+
+  ASSERT (nn >= 2);
+  ASSERT (d1 & GMP_NUMB_HIGHBIT);
+  ASSERT (shift > 0);
+
+  r2 = np[nn-1] >> (GMP_LIMB_BITS - shift);
+  r1 = (np[nn-1] << shift) | (np[nn-2] >> (GMP_LIMB_BITS - shift));
+  r0 = np[nn-2] << shift;
+
+  udiv_qr_3by2 (qh, r2, r1, r2, r1, r0, d1, d0, di);
+
+  for (i = nn - 2 - 1; i >= 0; i--)
+    {
+      mp_limb_t q;
+      r0 = np[i];
+      r1 |= r0 >> (GMP_LIMB_BITS - shift);
+      r0 <<= shift;
+      udiv_qr_3by2 (q, r2, r1, r2, r1, r0, d1, d0, di);
+      qp[i] = q;
+    }
+
+  rp[0] = (r1 >> shift) | (r2 << (GMP_LIMB_BITS - shift));
+  rp[1] = r2 >> shift;
+
+  return qh;
+}

diff --git a/third_party/gmp/mpn/generic/dive_1.c b/third_party/gmp/mpn/generic/dive_1.c
new file mode 100644
index 0000000..056f5b9
--- /dev/null
+++ b/third_party/gmp/mpn/generic/dive_1.c

@@ -0,0 +1,146 @@
+/* mpn_divexact_1 -- mpn by limb exact division.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000-2003, 2005, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+
+/* Divide a={src,size} by d=divisor and store the quotient in q={dst,size}.
+   q will only be correct if d divides a exactly.
+
+   A separate loop is used for shift==0 because n<<GMP_LIMB_BITS doesn't
+   give zero on all CPUs (for instance it doesn't on the x86s).  This
+   separate loop might run faster too, helping odd divisors.
+
+   Possibilities:
+
+   mpn_divexact_1c could be created, accepting and returning c.  This would
+   let a long calculation be done piece by piece.  Currently there's no
+   particular need for that, and not returning c means that a final umul can
+   be skipped.
+
+   Another use for returning c would be letting the caller know whether the
+   division was in fact exact.  It would work just to return the carry bit
+   "c=(l>s)" and let the caller do a final umul if interested.
+
+   When the divisor is even, the factors of two could be handled with a
+   separate mpn_rshift, instead of shifting on the fly.  That might be
+   faster on some CPUs and would mean just the shift==0 style loop would be
+   needed.
+
+   If n<<GMP_LIMB_BITS gives zero on a particular CPU then the separate
+   shift==0 loop is unnecessary, and could be eliminated if there's no great
+   speed difference.
+
+   It's not clear whether "/" is the best way to handle size==1.  Alpha gcc
+   2.95 for instance has a poor "/" and might prefer the modular method.
+   Perhaps a tuned parameter should control this.
+
+   If src[size-1] < divisor then dst[size-1] will be zero, and one divide
+   step could be skipped.  A test at last step for s<divisor (or ls in the
+   even case) might be a good way to do that.  But if this code is often
+   used with small divisors then it might not be worth bothering  */
+
+void
+mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
+{
+  mp_size_t  i;
+  mp_limb_t  c, h, l, ls, s, s_next, inverse, dummy;
+  unsigned   shift;
+
+  ASSERT (size >= 1);
+  ASSERT (divisor != 0);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));
+  ASSERT_MPN (src, size);
+  ASSERT_LIMB (divisor);
+
+  if ((divisor & 1) == 0)
+    {
+      count_trailing_zeros (shift, divisor);
+      divisor >>= shift;
+    }
+  else
+    shift = 0;
+
+  binvert_limb (inverse, divisor);
+  divisor <<= GMP_NAIL_BITS;
+
+  if (shift != 0)
+    {
+      c = 0;
+
+      s = src[0];
+
+      for (i = 1; i < size; i++)
+	{
+	  s_next = src[i];
+	  ls = ((s >> shift) | (s_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
+	  s = s_next;
+
+	  SUBC_LIMB (c, l, ls, c);
+
+	  l = (l * inverse) & GMP_NUMB_MASK;
+	  dst[i - 1] = l;
+
+	  umul_ppmm (h, dummy, l, divisor);
+	  c += h;
+	}
+
+      ls = s >> shift;
+      l = ls - c;
+      l = (l * inverse) & GMP_NUMB_MASK;
+      dst[size - 1] = l;
+    }
+  else
+    {
+      s = src[0];
+
+      l = (s * inverse) & GMP_NUMB_MASK;
+      dst[0] = l;
+      c = 0;
+
+      for (i = 1; i < size; i++)
+	{
+	  umul_ppmm (h, dummy, l, divisor);
+	  c += h;
+
+	  s = src[i];
+	  SUBC_LIMB (c, l, s, c);
+
+	  l = (l * inverse) & GMP_NUMB_MASK;
+	  dst[i] = l;
+	}
+    }
+}

diff --git a/third_party/gmp/mpn/generic/diveby3.c b/third_party/gmp/mpn/generic/diveby3.c
new file mode 100644
index 0000000..7dee0bc
--- /dev/null
+++ b/third_party/gmp/mpn/generic/diveby3.c

@@ -0,0 +1,173 @@
+/* mpn_divexact_by3c -- mpn exact division by 3.
+
+Copyright 2000-2003, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#if DIVEXACT_BY3_METHOD == 0
+
+mp_limb_t
+mpn_divexact_by3c (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_limb_t c)
+{
+  mp_limb_t r;
+  r = mpn_bdiv_dbm1c (rp, up, un, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * c);
+
+  /* Possible bdiv_dbm1 return values are C * (GMP_NUMB_MASK / 3), 0 <= C < 3.
+     We want to return C.  We compute the remainder mod 4 and notice that the
+     inverse of (2^(2k)-1)/3 mod 4 is 1.  */
+  return r & 3;
+}
+
+#endif
+
+#if DIVEXACT_BY3_METHOD == 1
+
+/* The algorithm here is basically the same as mpn_divexact_1, as described
+   in the manual.  Namely at each step q = (src[i]-c)*inverse, and new c =
+   borrow(src[i]-c) + high(divisor*q).  But because the divisor is just 3,
+   high(divisor*q) can be determined with two comparisons instead of a
+   multiply.
+
+   The "c += ..."s add the high limb of 3*l to c.  That high limb will be 0,
+   1 or 2.  Doing two separate "+="s seems to give better code on gcc (as of
+   2.95.2 at least).
+
+   It will be noted that the new c is formed by adding three values each 0
+   or 1.  But the total is only 0, 1 or 2.  When the subtraction src[i]-c
+   causes a borrow, that leaves a limb value of either 0xFF...FF or
+   0xFF...FE.  The multiply by MODLIMB_INVERSE_3 gives 0x55...55 or
+   0xAA...AA respectively, and in those cases high(3*q) is only 0 or 1
+   respectively, hence a total of no more than 2.
+
+   Alternatives:
+
+   This implementation has each multiply on the dependent chain, due to
+   "l=s-c".  See below for alternative code which avoids that.  */
+
+mp_limb_t
+mpn_divexact_by3c (mp_ptr restrict rp, mp_srcptr restrict up, mp_size_t un, mp_limb_t c)
+{
+  mp_limb_t  l, q, s;
+  mp_size_t  i;
+
+  ASSERT (un >= 1);
+  ASSERT (c == 0 || c == 1 || c == 2);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, un));
+
+  i = 0;
+  do
+    {
+      s = up[i];
+      SUBC_LIMB (c, l, s, c);
+
+      q = (l * MODLIMB_INVERSE_3) & GMP_NUMB_MASK;
+      rp[i] = q;
+
+      c += (q >= GMP_NUMB_CEIL_MAX_DIV3);
+      c += (q >= GMP_NUMB_CEIL_2MAX_DIV3);
+    }
+  while (++i < un);
+
+  ASSERT (c == 0 || c == 1 || c == 2);
+  return c;
+}
+
+
+#endif
+
+#if DIVEXACT_BY3_METHOD == 2
+
+/* The following alternative code re-arranges the quotient calculation from
+   (src[i]-c)*inverse to instead
+
+       q = src[i]*inverse - c*inverse
+
+   thereby allowing src[i]*inverse to be scheduled back as far as desired,
+   making full use of multiplier throughput and leaving just some carry
+   handing on the dependent chain.
+
+   The carry handling consists of determining the c for the next iteration.
+   This is the same as described above, namely look for any borrow from
+   src[i]-c, and at the high of 3*q.
+
+   high(3*q) is done with two comparisons as above (in c2 and c3).  The
+   borrow from src[i]-c is incorporated into those by noting that if there's
+   a carry then then we have src[i]-c == 0xFF..FF or 0xFF..FE, in turn
+   giving q = 0x55..55 or 0xAA..AA.  Adding 1 to either of those q values is
+   enough to make high(3*q) come out 1 bigger, as required.
+
+   l = -c*inverse is calculated at the same time as c, since for most chips
+   it can be more conveniently derived from separate c1/c2/c3 values than
+   from a combined c equal to 0, 1 or 2.
+
+   The net effect is that with good pipelining this loop should be able to
+   run at perhaps 4 cycles/limb, depending on available execute resources
+   etc.
+
+   Usage:
+
+   This code is not used by default, since we really can't rely on the
+   compiler generating a good software pipeline, nor on such an approach
+   even being worthwhile on all CPUs.
+
+   Itanium is one chip where this algorithm helps though, see
+   mpn/ia64/diveby3.asm.  */
+
+mp_limb_t
+mpn_divexact_by3c (mp_ptr restrict rp, mp_srcptr restrict up, mp_size_t un, mp_limb_t cy)
+{
+  mp_limb_t  s, sm, cl, q, qx, c2, c3;
+  mp_size_t  i;
+
+  ASSERT (un >= 1);
+  ASSERT (cy == 0 || cy == 1 || cy == 2);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, un));
+
+  cl = cy == 0 ? 0 : cy == 1 ? -MODLIMB_INVERSE_3 : -2*MODLIMB_INVERSE_3;
+
+  for (i = 0; i < un; i++)
+    {
+      s = up[i];
+      sm = (s * MODLIMB_INVERSE_3) & GMP_NUMB_MASK;
+
+      q = (cl + sm) & GMP_NUMB_MASK;
+      rp[i] = q;
+      qx = q + (s < cy);
+
+      c2 = qx >= GMP_NUMB_CEIL_MAX_DIV3;
+      c3 = qx >= GMP_NUMB_CEIL_2MAX_DIV3 ;
+
+      cy = c2 + c3;
+      cl = (-c2 & -MODLIMB_INVERSE_3) + (-c3 & -MODLIMB_INVERSE_3);
+    }
+
+  return cy;
+}
+
+#endif

diff --git a/third_party/gmp/mpn/generic/divexact.c b/third_party/gmp/mpn/generic/divexact.c
new file mode 100644
index 0000000..ec417df
--- /dev/null
+++ b/third_party/gmp/mpn/generic/divexact.c

@@ -0,0 +1,296 @@
+/* mpn_divexact(qp,np,nn,dp,dn,tp) -- Divide N = {np,nn} by D = {dp,dn} storing
+   the result in Q = {qp,nn-dn+1} expecting no remainder.  Overlap allowed
+   between Q and N; all other overlap disallowed.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if 1
+void
+mpn_divexact (mp_ptr qp,
+	      mp_srcptr np, mp_size_t nn,
+	      mp_srcptr dp, mp_size_t dn)
+{
+  unsigned shift;
+  mp_size_t qn;
+  mp_ptr tp;
+  TMP_DECL;
+
+  ASSERT (dn > 0);
+  ASSERT (nn >= dn);
+  ASSERT (dp[dn-1] > 0);
+
+  while (dp[0] == 0)
+    {
+      ASSERT (np[0] == 0);
+      dp++;
+      np++;
+      dn--;
+      nn--;
+    }
+
+  if (dn == 1)
+    {
+      MPN_DIVREM_OR_DIVEXACT_1 (qp, np, nn, dp[0]);
+      return;
+    }
+
+  TMP_MARK;
+
+  qn = nn + 1 - dn;
+  count_trailing_zeros (shift, dp[0]);
+
+  if (shift > 0)
+    {
+      mp_ptr wp;
+      mp_size_t ss;
+      ss = (dn > qn) ? qn + 1 : dn;
+
+      tp = TMP_ALLOC_LIMBS (ss);
+      mpn_rshift (tp, dp, ss, shift);
+      dp = tp;
+
+      /* Since we have excluded dn == 1, we have nn > qn, and we need
+	 to shift one limb beyond qn. */
+      wp = TMP_ALLOC_LIMBS (qn + 1);
+      mpn_rshift (wp, np, qn + 1, shift);
+      np = wp;
+    }
+
+  if (dn > qn)
+    dn = qn;
+
+  tp = TMP_ALLOC_LIMBS (mpn_bdiv_q_itch (qn, dn));
+  mpn_bdiv_q (qp, np, qn, dp, dn, tp);
+  TMP_FREE;
+
+  /* Since bdiv_q computes -N/D (mod B^{qn}), we must negate now. */
+  mpn_neg (qp, qp, qn);
+}
+
+#else
+
+/* We use the Jebelean's bidirectional exact division algorithm.  This is
+   somewhat naively implemented, with equal quotient parts done by 2-adic
+   division and truncating division.  Since 2-adic division is faster, it
+   should be used for a larger chunk.
+
+   This code is horrendously ugly, in all sorts of ways.
+
+   * It was hacked without much care or thought, but with a testing program.
+   * It handles scratch space frivolously, and furthermore the itch function
+     is broken.
+   * Doesn't provide any measures to deal with mu_divappr_q's +3 error.  We
+     have yet to provoke an error due to this, though.
+   * Algorithm selection leaves a lot to be desired.  In particular, the choice
+     between DC and MU isn't a point, but we treat it like one.
+   * It makes the msb part 1 or 2 limbs larger than the lsb part, in spite of
+     that the latter is faster.  We should at least reverse this, but perhaps
+     we should make the lsb part considerably larger.  (How do we tune this?)
+*/
+
+mp_size_t
+mpn_divexact_itch (mp_size_t nn, mp_size_t dn)
+{
+  return nn + dn;		/* FIXME this is not right */
+}
+
+void
+mpn_divexact (mp_ptr qp,
+	      mp_srcptr np, mp_size_t nn,
+	      mp_srcptr dp, mp_size_t dn,
+	      mp_ptr scratch)
+{
+  mp_size_t qn;
+  mp_size_t nn0, qn0;
+  mp_size_t nn1, qn1;
+  mp_ptr tp;
+  mp_limb_t qml;
+  mp_limb_t qh;
+  int cnt;
+  mp_ptr xdp;
+  mp_limb_t di;
+  mp_limb_t cy;
+  gmp_pi1_t dinv;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  qn = nn - dn + 1;
+
+  /* For small divisors, and small quotients, don't use Jebelean's algorithm. */
+  if (dn < DIVEXACT_JEB_THRESHOLD || qn < DIVEXACT_JEB_THRESHOLD)
+    {
+      tp = scratch;
+      MPN_COPY (tp, np, qn);
+      binvert_limb (di, dp[0]);  di = -di;
+      dn = MIN (dn, qn);
+      mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+      TMP_FREE;
+      return;
+    }
+
+  qn0 = ((nn - dn) >> 1) + 1;	/* low quotient size */
+
+  /* If quotient is much larger than the divisor, the bidirectional algorithm
+     does not work as currently implemented.  Fall back to plain bdiv.  */
+  if (qn0 > dn)
+    {
+      if (BELOW_THRESHOLD (dn, DC_BDIV_Q_THRESHOLD))
+	{
+	  tp = scratch;
+	  MPN_COPY (tp, np, qn);
+	  binvert_limb (di, dp[0]);  di = -di;
+	  dn = MIN (dn, qn);
+	  mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+	}
+      else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
+	{
+	  tp = scratch;
+	  MPN_COPY (tp, np, qn);
+	  binvert_limb (di, dp[0]);  di = -di;
+	  mpn_dcpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+	}
+      else
+	{
+	  mpn_mu_bdiv_q (qp, np, qn, dp, dn, scratch);
+	}
+      TMP_FREE;
+      return;
+    }
+
+  nn0 = qn0 + qn0;
+
+  nn1 = nn0 - 1 + ((nn-dn) & 1);
+  qn1 = qn0;
+  if (LIKELY (qn0 != dn))
+    {
+      nn1 = nn1 + 1;
+      qn1 = qn1 + 1;
+      if (UNLIKELY (dp[dn - 1] == 1 && qn1 != dn))
+	{
+	  /* If the leading divisor limb == 1, i.e. has just one bit, we have
+	     to include an extra limb in order to get the needed overlap.  */
+	  /* FIXME: Now with the mu_divappr_q function, we should really need
+	     more overlap. That indicates one of two things: (1) The test code
+	     is not good. (2) We actually overlap too much by default.  */
+	  nn1 = nn1 + 1;
+	  qn1 = qn1 + 1;
+	}
+    }
+
+  tp = TMP_ALLOC_LIMBS (nn1 + 1);
+
+  count_leading_zeros (cnt, dp[dn - 1]);
+
+  /* Normalize divisor, store into tmp area.  */
+  if (cnt != 0)
+    {
+      xdp = TMP_ALLOC_LIMBS (qn1);
+      mpn_lshift (xdp, dp + dn - qn1, qn1, cnt);
+    }
+  else
+    {
+      xdp = (mp_ptr) dp + dn - qn1;
+    }
+
+  /* Shift dividend according to the divisor normalization.  */
+  /* FIXME: We compute too much here for XX_divappr_q, but these functions'
+     interfaces want a pointer to the imaginative least significant limb, not
+     to the least significant *used* limb.  Of course, we could leave nn1-qn1
+     rubbish limbs in the low part, to save some time.  */
+  if (cnt != 0)
+    {
+      cy = mpn_lshift (tp, np + nn - nn1, nn1, cnt);
+      if (cy != 0)
+	{
+	  tp[nn1] = cy;
+	  nn1++;
+	}
+    }
+  else
+    {
+      /* FIXME: This copy is not needed for mpn_mu_divappr_q, except when the
+	 mpn_sub_n right before is executed.  */
+      MPN_COPY (tp, np + nn - nn1, nn1);
+    }
+
+  invert_pi1 (dinv, xdp[qn1 - 1], xdp[qn1 - 2]);
+  if (BELOW_THRESHOLD (qn1, DC_DIVAPPR_Q_THRESHOLD))
+    {
+      qp[qn0 - 1 + nn1 - qn1] = mpn_sbpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, dinv.inv32);
+    }
+  else if (BELOW_THRESHOLD (qn1, MU_DIVAPPR_Q_THRESHOLD))
+    {
+      qp[qn0 - 1 + nn1 - qn1] = mpn_dcpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, &dinv);
+    }
+  else
+    {
+      /* FIXME: mpn_mu_divappr_q doesn't handle qh != 0.  Work around it with a
+	 conditional subtraction here.  */
+      qh = mpn_cmp (tp + nn1 - qn1, xdp, qn1) >= 0;
+      if (qh)
+	mpn_sub_n (tp + nn1 - qn1, tp + nn1 - qn1, xdp, qn1);
+      mpn_mu_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, scratch);
+      qp[qn0 - 1 + nn1 - qn1] = qh;
+    }
+  qml = qp[qn0 - 1];
+
+  binvert_limb (di, dp[0]);  di = -di;
+
+  if (BELOW_THRESHOLD (qn0, DC_BDIV_Q_THRESHOLD))
+    {
+      MPN_COPY (tp, np, qn0);
+      mpn_sbpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
+    }
+  else if (BELOW_THRESHOLD (qn0, MU_BDIV_Q_THRESHOLD))
+    {
+      MPN_COPY (tp, np, qn0);
+      mpn_dcpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
+    }
+  else
+    {
+      mpn_mu_bdiv_q (qp, np, qn0, dp, qn0, scratch);
+    }
+
+  if (qml < qp[qn0 - 1])
+    mpn_decr_u (qp + qn0, 1);
+
+  TMP_FREE;
+}
+#endif

diff --git a/third_party/gmp/mpn/generic/divis.c b/third_party/gmp/mpn/generic/divis.c
new file mode 100644
index 0000000..f989ddb
--- /dev/null
+++ b/third_party/gmp/mpn/generic/divis.c

@@ -0,0 +1,194 @@
+/* mpn_divisible_p -- mpn by mpn divisibility test
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2005, 2009, 2014, 2017, 2018 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Determine whether A={ap,an} is divisible by D={dp,dn}.  Must have both
+   operands normalized, meaning high limbs non-zero, except that an==0 is
+   allowed.
+
+   There usually won't be many low zero bits on D, but the checks for this
+   are fast and might pick up a few operand combinations, in particular they
+   might reduce D to fit the single-limb mod_1/modexact_1 code.
+
+   Future:
+
+   Getting the remainder limb by limb would make an early exit possible on
+   finding a non-zero.  This would probably have to be bdivmod style so
+   there's no addback, but it would need a multi-precision inverse and so
+   might be slower than the plain method (on small sizes at least).
+
+   When D must be normalized (shifted to low bit set), it's possible to
+   suppress the bit-shifting of A down, as long as it's already been checked
+   that A has at least as many trailing zero bits as D.  */
+
+int
+mpn_divisible_p (mp_srcptr ap, mp_size_t an,
+		 mp_srcptr dp, mp_size_t dn)
+{
+  mp_limb_t  alow, dlow, dmask;
+  mp_ptr     qp, rp, tp;
+  mp_limb_t di;
+  unsigned  twos;
+  int c;
+  TMP_DECL;
+
+  ASSERT (an >= 0);
+  ASSERT (an == 0 || ap[an-1] != 0);
+  ASSERT (dn >= 1);
+  ASSERT (dp[dn-1] != 0);
+  ASSERT_MPN (ap, an);
+  ASSERT_MPN (dp, dn);
+
+  /* When a<d only a==0 is divisible.
+     Notice this test covers all cases of an==0. */
+  if (an < dn)
+    return (an == 0);
+
+  /* Strip low zero limbs from d, requiring a==0 on those. */
+  for (;;)
+    {
+      alow = *ap;
+      dlow = *dp;
+
+      if (dlow != 0)
+	break;
+
+      if (alow != 0)
+	return 0;  /* a has fewer low zero limbs than d, so not divisible */
+
+      /* a!=0 and d!=0 so won't get to n==0 */
+      an--; ASSERT (an >= 1);
+      dn--; ASSERT (dn >= 1);
+      ap++;
+      dp++;
+    }
+
+  /* a must have at least as many low zero bits as d */
+  dmask = LOW_ZEROS_MASK (dlow);
+  if ((alow & dmask) != 0)
+    return 0;
+
+  if (dn == 1)
+    {
+      if (ABOVE_THRESHOLD (an, BMOD_1_TO_MOD_1_THRESHOLD))
+	return mpn_mod_1 (ap, an, dlow) == 0;
+
+      count_trailing_zeros (twos, dlow);
+      dlow >>= twos;
+      return mpn_modexact_1_odd (ap, an, dlow) == 0;
+    }
+
+  count_trailing_zeros (twos, dlow);
+  if (dn == 2)
+    {
+      mp_limb_t  dsecond = dp[1];
+      if (dsecond <= dmask)
+	{
+	  dlow = (dlow >> twos) | (dsecond << (GMP_NUMB_BITS-twos));
+	  ASSERT_LIMB (dlow);
+	  return MPN_MOD_OR_MODEXACT_1_ODD (ap, an, dlow) == 0;
+	}
+    }
+
+  /* Should we compute Q = A * D^(-1) mod B^k,
+                       R = A - Q * D  mod B^k
+     here, for some small values of k?  Then check if R = 0 (mod B^k).  */
+
+  /* We could also compute A' = A mod T and D' = D mod P, for some
+     P = 3 * 5 * 7 * 11 ..., and then check if any prime factor from P
+     dividing D' also divides A'.  */
+
+  TMP_MARK;
+
+  TMP_ALLOC_LIMBS_2 (rp, an + 1,
+		     qp, an - dn + 1); /* FIXME: Could we avoid this? */
+
+  if (twos != 0)
+    {
+      tp = TMP_ALLOC_LIMBS (dn);
+      ASSERT_NOCARRY (mpn_rshift (tp, dp, dn, twos));
+      dp = tp;
+
+      ASSERT_NOCARRY (mpn_rshift (rp, ap, an, twos));
+    }
+  else
+    {
+      MPN_COPY (rp, ap, an);
+    }
+  if (rp[an - 1] >= dp[dn - 1])
+    {
+      rp[an] = 0;
+      an++;
+    }
+  else if (an == dn)
+    {
+      TMP_FREE;
+      return 0;
+    }
+
+  ASSERT (an > dn);		/* requirement of functions below */
+
+  if (BELOW_THRESHOLD (dn, DC_BDIV_QR_THRESHOLD) ||
+      BELOW_THRESHOLD (an - dn, DC_BDIV_QR_THRESHOLD))
+    {
+      binvert_limb (di, dp[0]);
+      mpn_sbpi1_bdiv_qr (qp, rp, an, dp, dn, -di);
+      rp += an - dn;
+    }
+  else if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
+    {
+      binvert_limb (di, dp[0]);
+      mpn_dcpi1_bdiv_qr (qp, rp, an, dp, dn, -di);
+      rp += an - dn;
+    }
+  else
+    {
+      tp = TMP_ALLOC_LIMBS (mpn_mu_bdiv_qr_itch (an, dn));
+      mpn_mu_bdiv_qr (qp, rp, rp, an, dp, dn, tp);
+    }
+
+  /* In general, bdiv may return either R = 0 or R = D when D divides
+     A. But R = 0 can happen only when A = 0, which we already have
+     excluded. Furthermore, R == D (mod B^{dn}) implies no carry, so
+     we don't need to check the carry returned from bdiv. */
+
+  MPN_CMP (c, rp, dp, dn);
+
+  TMP_FREE;
+  return c == 0;
+}

diff --git a/third_party/gmp/mpn/generic/divrem.c b/third_party/gmp/mpn/generic/divrem.c
new file mode 100644
index 0000000..1da84a8
--- /dev/null
+++ b/third_party/gmp/mpn/generic/divrem.c

@@ -0,0 +1,103 @@
+/* mpn_divrem -- Divide natural numbers, producing both remainder and
+   quotient.  This is now just a middle layer calling mpn_tdiv_qr.
+
+Copyright 1993-1997, 1999-2002, 2005, 2016 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_divrem (mp_ptr qp, mp_size_t qxn,
+	    mp_ptr np, mp_size_t nn,
+	    mp_srcptr dp, mp_size_t dn)
+{
+  ASSERT (qxn >= 0);
+  ASSERT (nn >= dn);
+  ASSERT (dn >= 1);
+  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, np, nn) || qp==np+dn+qxn);
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, dp, dn));
+  ASSERT_MPN (np, nn);
+  ASSERT_MPN (dp, dn);
+
+  if (dn == 1)
+    {
+      mp_limb_t ret;
+      mp_ptr q2p;
+      mp_size_t qn;
+      TMP_DECL;
+
+      TMP_MARK;
+      q2p = TMP_ALLOC_LIMBS (nn + qxn);
+
+      np[0] = mpn_divrem_1 (q2p, qxn, np, nn, dp[0]);
+      qn = nn + qxn - 1;
+      MPN_COPY (qp, q2p, qn);
+      ret = q2p[qn];
+
+      TMP_FREE;
+      return ret;
+    }
+  else if (dn == 2)
+    {
+      return mpn_divrem_2 (qp, qxn, np, nn, dp);
+    }
+  else
+    {
+      mp_ptr q2p;
+      mp_limb_t qhl;
+      mp_size_t qn;
+      TMP_DECL;
+
+      TMP_MARK;
+      if (UNLIKELY (qxn != 0))
+	{
+	  mp_ptr n2p;
+	  TMP_ALLOC_LIMBS_2 (n2p, nn + qxn,
+			     q2p, nn - dn + qxn + 1);
+	  MPN_ZERO (n2p, qxn);
+	  MPN_COPY (n2p + qxn, np, nn);
+	  mpn_tdiv_qr (q2p, np, 0L, n2p, nn + qxn, dp, dn);
+	  qn = nn - dn + qxn;
+	  MPN_COPY (qp, q2p, qn);
+	  qhl = q2p[qn];
+	}
+      else
+	{
+	  q2p = TMP_ALLOC_LIMBS (nn - dn + 1);
+	  mpn_tdiv_qr (q2p, np, 0L, np, nn, dp, dn);
+	  qn = nn - dn;
+	  MPN_COPY (qp, q2p, qn);
+	  qhl = q2p[qn];
+	}
+      TMP_FREE;
+      return qhl;
+    }
+}

diff --git a/third_party/gmp/mpn/generic/divrem_1.c b/third_party/gmp/mpn/generic/divrem_1.c
new file mode 100644
index 0000000..c13aa79
--- /dev/null
+++ b/third_party/gmp/mpn/generic/divrem_1.c

@@ -0,0 +1,254 @@
+/* mpn_divrem_1 -- mpn by limb division.
+
+Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
+   meaning the quotient size where that should happen, the quotient size
+   being how many udiv divisions will be done.
+
+   The default is to use preinv always, CPUs where this doesn't suit have
+   tuned thresholds.  Note in particular that preinv should certainly be
+   used if that's the only division available (USE_PREINV_ALWAYS).  */
+
+#ifndef DIVREM_1_NORM_THRESHOLD
+#define DIVREM_1_NORM_THRESHOLD  0
+#endif
+#ifndef DIVREM_1_UNNORM_THRESHOLD
+#define DIVREM_1_UNNORM_THRESHOLD  0
+#endif
+
+
+
+/* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
+   and UNNORM thresholds are 0 and only the inversion code is included.
+
+   If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
+   will be MP_SIZE_T_MAX and only the plain division code is included.
+
+   Otherwise mul-by-inverse is better than plain division above some
+   threshold, and best results are obtained by having code for both present.
+
+   The main reason for separating the norm and unnorm cases is that not all
+   CPUs give zero for "n0 >> GMP_LIMB_BITS" which would arise in the unnorm
+   code used on an already normalized divisor.
+
+   If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
+   non-shifting code for both the norm and unnorm cases, though with
+   different criteria for skipping a division, and with different thresholds
+   of course.  And in fact if inversion is never viable, then that simple
+   non-shifting division would be all that's left.
+
+   The NORM and UNNORM thresholds might not differ much, but if there's
+   going to be separate code for norm and unnorm then it makes sense to have
+   separate thresholds.  One thing that's possible is that the
+   mul-by-inverse might be better only for normalized divisors, due to that
+   case not needing variable bit shifts.
+
+   Notice that the thresholds are tested after the decision to possibly skip
+   one divide step, so they're based on the actual number of divisions done.
+
+   For the unnorm case, it would be possible to call mpn_lshift to adjust
+   the dividend all in one go (into the quotient space say), rather than
+   limb-by-limb in the loop.  This might help if mpn_lshift is a lot faster
+   than what the compiler can generate for EXTRACT.  But this is left to CPU
+   specific implementations to consider, especially since EXTRACT isn't on
+   the dependent chain.  */
+
+mp_limb_t
+mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
+	      mp_srcptr up, mp_size_t un, mp_limb_t d)
+{
+  mp_size_t  n;
+  mp_size_t  i;
+  mp_limb_t  n1, n0;
+  mp_limb_t  r = 0;
+
+  ASSERT (qxn >= 0);
+  ASSERT (un >= 0);
+  ASSERT (d != 0);
+  /* FIXME: What's the correct overlap rule when qxn!=0? */
+  ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));
+
+  n = un + qxn;
+  if (n == 0)
+    return 0;
+
+  d <<= GMP_NAIL_BITS;
+
+  qp += (n - 1);   /* Make qp point at most significant quotient limb */
+
+  if ((d & GMP_LIMB_HIGHBIT) != 0)
+    {
+      if (un != 0)
+	{
+	  /* High quotient limb is 0 or 1, skip a divide step. */
+	  mp_limb_t q;
+	  r = up[un - 1] << GMP_NAIL_BITS;
+	  q = (r >= d);
+	  *qp-- = q;
+	  r -= (d & -q);
+	  r >>= GMP_NAIL_BITS;
+	  n--;
+	  un--;
+	}
+
+      if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
+	{
+	plain:
+	  for (i = un - 1; i >= 0; i--)
+	    {
+	      n0 = up[i] << GMP_NAIL_BITS;
+	      udiv_qrnnd (*qp, r, r, n0, d);
+	      r >>= GMP_NAIL_BITS;
+	      qp--;
+	    }
+	  for (i = qxn - 1; i >= 0; i--)
+	    {
+	      udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
+	      r >>= GMP_NAIL_BITS;
+	      qp--;
+	    }
+	  return r;
+	}
+      else
+	{
+	  /* Multiply-by-inverse, divisor already normalized. */
+	  mp_limb_t dinv;
+	  invert_limb (dinv, d);
+
+	  for (i = un - 1; i >= 0; i--)
+	    {
+	      n0 = up[i] << GMP_NAIL_BITS;
+	      udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
+	      r >>= GMP_NAIL_BITS;
+	      qp--;
+	    }
+	  for (i = qxn - 1; i >= 0; i--)
+	    {
+	      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
+	      r >>= GMP_NAIL_BITS;
+	      qp--;
+	    }
+	  return r;
+	}
+    }
+  else
+    {
+      /* Most significant bit of divisor == 0.  */
+      int cnt;
+
+      /* Skip a division if high < divisor (high quotient 0).  Testing here
+	 before normalizing will still skip as often as possible.  */
+      if (un != 0)
+	{
+	  n1 = up[un - 1] << GMP_NAIL_BITS;
+	  if (n1 < d)
+	    {
+	      r = n1 >> GMP_NAIL_BITS;
+	      *qp-- = 0;
+	      n--;
+	      if (n == 0)
+		return r;
+	      un--;
+	    }
+	}
+
+      if (! UDIV_NEEDS_NORMALIZATION
+	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
+	goto plain;
+
+      count_leading_zeros (cnt, d);
+      d <<= cnt;
+      r <<= cnt;
+
+      if (UDIV_NEEDS_NORMALIZATION
+	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
+	{
+	  mp_limb_t nshift;
+	  if (un != 0)
+	    {
+	      n1 = up[un - 1] << GMP_NAIL_BITS;
+	      r |= (n1 >> (GMP_LIMB_BITS - cnt));
+	      for (i = un - 2; i >= 0; i--)
+		{
+		  n0 = up[i] << GMP_NAIL_BITS;
+		  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+		  udiv_qrnnd (*qp, r, r, nshift, d);
+		  r >>= GMP_NAIL_BITS;
+		  qp--;
+		  n1 = n0;
+		}
+	      udiv_qrnnd (*qp, r, r, n1 << cnt, d);
+	      r >>= GMP_NAIL_BITS;
+	      qp--;
+	    }
+	  for (i = qxn - 1; i >= 0; i--)
+	    {
+	      udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
+	      r >>= GMP_NAIL_BITS;
+	      qp--;
+	    }
+	  return r >> cnt;
+	}
+      else
+	{
+	  mp_limb_t  dinv, nshift;
+	  invert_limb (dinv, d);
+	  if (un != 0)
+	    {
+	      n1 = up[un - 1] << GMP_NAIL_BITS;
+	      r |= (n1 >> (GMP_LIMB_BITS - cnt));
+	      for (i = un - 2; i >= 0; i--)
+		{
+		  n0 = up[i] << GMP_NAIL_BITS;
+		  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+		  udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv);
+		  r >>= GMP_NAIL_BITS;
+		  qp--;
+		  n1 = n0;
+		}
+	      udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv);
+	      r >>= GMP_NAIL_BITS;
+	      qp--;
+	    }
+	  for (i = qxn - 1; i >= 0; i--)
+	    {
+	      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
+	      r >>= GMP_NAIL_BITS;
+	      qp--;
+	    }
+	  return r >> cnt;
+	}
+    }
+}

diff --git a/third_party/gmp/mpn/generic/divrem_2.c b/third_party/gmp/mpn/generic/divrem_2.c
new file mode 100644
index 0000000..217f2f6
--- /dev/null
+++ b/third_party/gmp/mpn/generic/divrem_2.c

@@ -0,0 +1,118 @@
+/* mpn_divrem_2 -- Divide natural numbers, producing both remainder and
+   quotient.  The divisor is two limbs.
+
+   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 1993-1996, 1999-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Divide num {np,nn} by den {dp,2} and write the nn-2 least significant
+   quotient limbs at qp and the 2 long remainder at np.  If qxn is non-zero,
+   generate that many fraction bits and append them after the other quotient
+   limbs.  Return the most significant limb of the quotient, this is always 0
+   or 1.
+
+   Preconditions:
+   1. The most significant bit of the divisor must be set.
+   2. qp must either not overlap with the input operands at all, or
+      qp >= np + 2 must hold true.  (This means that it's possible to put
+      the quotient in the high part of {np,nn}, right above the remainder.
+   3. nn >= 2, even if qxn is non-zero.  */
+
+mp_limb_t
+mpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
+	      mp_ptr np, mp_size_t nn,
+	      mp_srcptr dp)
+{
+  mp_limb_t most_significant_q_limb;
+  mp_size_t i;
+  mp_limb_t r1, r0, d1, d0;
+  gmp_pi1_t di;
+
+  ASSERT (nn >= 2);
+  ASSERT (qxn >= 0);
+  ASSERT (dp[1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (qp, nn-2+qxn, np, nn) || qp >= np+2);
+  ASSERT_MPN (np, nn);
+  ASSERT_MPN (dp, 2);
+
+  np += nn - 2;
+  d1 = dp[1];
+  d0 = dp[0];
+  r1 = np[1];
+  r0 = np[0];
+
+  most_significant_q_limb = 0;
+  if (r1 >= d1 && (r1 > d1 || r0 >= d0))
+    {
+#if GMP_NAIL_BITS == 0
+      sub_ddmmss (r1, r0, r1, r0, d1, d0);
+#else
+      r0 = r0 - d0;
+      r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
+      r0 &= GMP_NUMB_MASK;
+#endif
+      most_significant_q_limb = 1;
+    }
+
+  invert_pi1 (di, d1, d0);
+
+  qp += qxn;
+
+  for (i = nn - 2 - 1; i >= 0; i--)
+    {
+      mp_limb_t n0, q;
+      n0 = np[-1];
+      udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di.inv32);
+      np--;
+      qp[i] = q;
+    }
+
+  if (UNLIKELY (qxn != 0))
+    {
+      qp -= qxn;
+      for (i = qxn - 1; i >= 0; i--)
+	{
+	  mp_limb_t q;
+	  udiv_qr_3by2 (q, r1, r0, r1, r0, CNST_LIMB(0), d1, d0, di.inv32);
+	  qp[i] = q;
+	}
+    }
+
+  np[1] = r1;
+  np[0] = r0;
+
+  return most_significant_q_limb;
+}

diff --git a/third_party/gmp/mpn/generic/dump.c b/third_party/gmp/mpn/generic/dump.c
new file mode 100644
index 0000000..9a4ddf4
--- /dev/null
+++ b/third_party/gmp/mpn/generic/dump.c

@@ -0,0 +1,99 @@
+/* THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS NOT SAFE TO
+   CALL THIS FUNCTION DIRECTLY.  IN FACT, IT IS ALMOST GUARANTEED THAT THIS
+   FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 1996, 2000-2002, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+#if GMP_NUMB_BITS % 4 == 0
+void
+mpn_dump (mp_srcptr ptr, mp_size_t n)
+{
+  MPN_NORMALIZE (ptr, n);
+
+  if (n == 0)
+    printf ("0\n");
+  else
+    {
+      n--;
+#if _LONG_LONG_LIMB
+      if ((ptr[n] >> GMP_LIMB_BITS / 2) != 0)
+	{
+	  printf ("%lX", (unsigned long) (ptr[n] >> GMP_LIMB_BITS / 2));
+	  printf ("%0*lX", (GMP_LIMB_BITS / 2 / 4), (unsigned long) ptr[n]);
+	}
+      else
+#endif
+	printf ("%lX", (unsigned long) ptr[n]);
+
+      while (n)
+	{
+	  n--;
+#if _LONG_LONG_LIMB
+	  printf ("%0*lX", (GMP_NUMB_BITS - GMP_LIMB_BITS / 2) / 4,
+		  (unsigned long) (ptr[n] >> GMP_LIMB_BITS / 2));
+	  printf ("%0*lX", GMP_LIMB_BITS / 2 / 4, (unsigned long) ptr[n]);
+#else
+	  printf ("%0*lX", GMP_NUMB_BITS / 4, (unsigned long) ptr[n]);
+#endif
+	}
+      printf ("\n");
+    }
+}
+
+#else
+
+static void
+mpn_recdump (mp_ptr p, mp_size_t n)
+{
+  mp_limb_t lo;
+  if (n != 0)
+    {
+      lo = p[0] & 0xf;
+      mpn_rshift (p, p, n, 4);
+      mpn_recdump (p, n);
+      printf ("%lX", lo);
+    }
+}
+
+void
+mpn_dump (mp_srcptr p, mp_size_t n)
+{
+  mp_ptr tp;
+  TMP_DECL;
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS (n);
+  MPN_COPY (tp, p, n);
+  TMP_FREE;
+}
+
+#endif

diff --git a/third_party/gmp/mpn/generic/fib2_ui.c b/third_party/gmp/mpn/generic/fib2_ui.c
new file mode 100644
index 0000000..0b81571
--- /dev/null
+++ b/third_party/gmp/mpn/generic/fib2_ui.c

@@ -0,0 +1,174 @@
+/* mpn_fib2_ui -- calculate Fibonacci numbers.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2005, 2009, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+/* change this to "#define TRACE(x) x" for diagnostics */
+#define TRACE(x)
+
+
+/* Store F[n] at fp and F[n-1] at f1p.  fp and f1p should have room for
+   MPN_FIB2_SIZE(n) limbs.
+
+   The return value is the actual number of limbs stored, this will be at
+   least 1.  fp[size-1] will be non-zero, except when n==0, in which case
+   fp[0] is 0 and f1p[0] is 1.  f1p[size-1] can be zero, since F[n-1]<F[n]
+   (for n>0).
+
+   Notes: F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
+
+   In F[2k+1] with k even, +2 is applied to 4*F[k]^2 just by ORing into the
+   low limb.
+
+   In F[2k+1] with k odd, -2 is applied to F[k-1]^2 just by ORing into the
+   low limb.
+*/
+
+mp_size_t
+mpn_fib2_ui (mp_ptr fp, mp_ptr f1p, unsigned long int n)
+{
+  mp_size_t      size;
+  unsigned long  nfirst, mask;
+
+  TRACE (printf ("mpn_fib2_ui n=%lu\n", n));
+
+  ASSERT (! MPN_OVERLAP_P (fp, MPN_FIB2_SIZE(n), f1p, MPN_FIB2_SIZE(n)));
+
+  /* Take a starting pair from the table. */
+  mask = 1;
+  for (nfirst = n; nfirst > FIB_TABLE_LIMIT; nfirst /= 2)
+    mask <<= 1;
+  TRACE (printf ("nfirst=%lu mask=0x%lX\n", nfirst, mask));
+
+  f1p[0] = FIB_TABLE ((int) nfirst - 1);
+  fp[0]  = FIB_TABLE (nfirst);
+  size = 1;
+
+  /* Skip to the end if the table lookup gives the final answer. */
+  if (mask != 1)
+    {
+      mp_size_t  alloc;
+      mp_ptr        xp;
+      TMP_DECL;
+
+      TMP_MARK;
+      alloc = MPN_FIB2_SIZE (n);
+      xp = TMP_ALLOC_LIMBS (alloc);
+
+      do
+	{
+	  /* Here fp==F[k] and f1p==F[k-1], with k being the bits of n from
+	     n&mask upwards.
+
+	     The next bit of n is n&(mask>>1) and we'll double to the pair
+	     fp==F[2k],f1p==F[2k-1] or fp==F[2k+1],f1p==F[2k], according as
+	     that bit is 0 or 1 respectively.  */
+
+	  TRACE (printf ("k=%lu mask=0x%lX size=%ld alloc=%ld\n",
+			 n >> refmpn_count_trailing_zeros(mask),
+			 mask, size, alloc);
+		 mpn_trace ("fp ", fp, size);
+		 mpn_trace ("f1p", f1p, size));
+
+	  /* fp normalized, f1p at most one high zero */
+	  ASSERT (fp[size-1] != 0);
+	  ASSERT (f1p[size-1] != 0 || f1p[size-2] != 0);
+
+	  /* f1p[size-1] might be zero, but this occurs rarely, so it's not
+	     worth bothering checking for it */
+	  ASSERT (alloc >= 2*size);
+	  mpn_sqr (xp, fp,  size);
+	  mpn_sqr (fp, f1p, size);
+	  size *= 2;
+
+	  /* Shrink if possible.  Since fp was normalized there'll be at
+	     most one high zero on xp (and if there is then there's one on
+	     yp too).  */
+	  ASSERT (xp[size-1] != 0 || fp[size-1] == 0);
+	  size -= (xp[size-1] == 0);
+	  ASSERT (xp[size-1] != 0);  /* only one xp high zero */
+
+	  /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2. */
+	  f1p[size] = mpn_add_n (f1p, xp, fp, size);
+
+	  /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
+	     n&mask is the low bit of our implied k.  */
+
+	  ASSERT ((fp[0] & 2) == 0);
+	  /* fp is F[k-1]^2 == 0 or 1 mod 4, like all squares. */
+	  fp[0] |= (n & mask ? 2 : 0);			/* possible -2 */
+#if HAVE_NATIVE_mpn_rsblsh2_n
+	  fp[size] = mpn_rsblsh2_n (fp, fp, xp, size);
+	  MPN_INCR_U(fp, size + 1, (n & mask ? 0 : 2));	/* possible +2 */
+#else
+	  {
+	    mp_limb_t  c;
+
+	    c = mpn_lshift (xp, xp, size, 2);
+	    xp[0] |= (n & mask ? 0 : 2);	/* possible +2 */
+	    c -= mpn_sub_n (fp, xp, fp, size);
+	    fp[size] = c;
+	  }
+#endif
+	  ASSERT (alloc >= size+1);
+	  size += (fp[size] != 0);
+
+	  /* now n&mask is the new bit of n being considered */
+	  mask >>= 1;
+
+	  /* Calculate F[2k] = F[2k+1] - F[2k-1], replacing the unwanted one of
+	     F[2k+1] and F[2k-1].  */
+	  if (n & mask)
+	    ASSERT_NOCARRY (mpn_sub_n (f1p, fp, f1p, size));
+	  else {
+	    ASSERT_NOCARRY (mpn_sub_n ( fp, fp, f1p, size));
+
+	    /* Can have a high zero after replacing F[2k+1] with F[2k].
+	       f1p will have a high zero if fp does. */
+	    ASSERT (fp[size-1] != 0 || f1p[size-1] == 0);
+	    size -= (fp[size-1] == 0);
+	  }
+	}
+      while (mask != 1);
+
+      TMP_FREE;
+    }
+
+  TRACE (printf ("done size=%ld\n", size);
+	 mpn_trace ("fp ", fp, size);
+	 mpn_trace ("f1p", f1p, size));
+
+  return size;
+}

diff --git a/third_party/gmp/mpn/generic/fib2m.c b/third_party/gmp/mpn/generic/fib2m.c
new file mode 100644
index 0000000..89d2b86
--- /dev/null
+++ b/third_party/gmp/mpn/generic/fib2m.c

@@ -0,0 +1,252 @@
+/* mpn_fib2m -- calculate Fibonacci numbers, modulo m.
+
+Contributed to the GNU project by Marco Bodrato, based on the previous
+fib2_ui.c file.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2005, 2009, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Stores |{ap,n}-{bp,n}| in {rp,n},
+   returns the sign of {ap,n}-{bp,n}. */
+static int
+abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  mp_limb_t  x, y;
+  while (--n >= 0)
+    {
+      x = ap[n];
+      y = bp[n];
+      if (x != y)
+        {
+          ++n;
+          if (x > y)
+            {
+              ASSERT_NOCARRY (mpn_sub_n (rp, ap, bp, n));
+              return 1;
+            }
+          else
+            {
+              ASSERT_NOCARRY (mpn_sub_n (rp, bp, ap, n));
+              return -1;
+            }
+        }
+      rp[n] = 0;
+    }
+  return 0;
+}
+
+/* Store F[n] at fp and F[n-1] at f1p.  Both are computed modulo m.
+   fp and f1p should have room for mn*2+1 limbs.
+
+   The sign of one or both the values may be flipped (n-F, instead of F),
+   the return value is 0 (zero) if the signs are coherent (both positive
+   or both negative) and 1 (one) otherwise.
+
+   Notes:
+
+   In F[2k+1] with k even, +2 is applied to 4*F[k]^2 just by ORing into the
+   low limb.
+
+   In F[2k+1] with k odd, -2 is applied to F[k-1]^2 just by ORing into the
+   low limb.
+
+   TODO: Should {tp, 2 * mn} be passed as a scratch pointer?
+   Should the call to mpn_fib2_ui() obtain (up to) 2*mn limbs?
+*/
+
+int
+mpn_fib2m (mp_ptr fp, mp_ptr f1p, mp_srcptr np, mp_size_t nn, mp_srcptr mp, mp_size_t mn)
+{
+  unsigned long	nfirst;
+  mp_limb_t	nh;
+  mp_bitcnt_t	nbi;
+  mp_size_t	sn, fn;
+  int		fcnt, ncnt;
+
+  ASSERT (! MPN_OVERLAP_P (fp, MAX(2*mn+1,5), f1p, MAX(2*mn+1,5)));
+  ASSERT (nn > 0 && np[nn - 1] != 0);
+
+  /* Estimate the maximal n such that fibonacci(n) fits in mn limbs. */
+#if GMP_NUMB_BITS % 16 == 0
+  if (UNLIKELY (ULONG_MAX / (23 * (GMP_NUMB_BITS / 16)) <= mn))
+    nfirst = ULONG_MAX;
+  else
+    nfirst = mn * (23 * (GMP_NUMB_BITS / 16));
+#else
+  {
+    mp_bitcnt_t	mbi;
+    mbi = (mp_bitcnt_t) mn * GMP_NUMB_BITS;
+
+    if (UNLIKELY (ULONG_MAX / 23 < mbi))
+      {
+	if (UNLIKELY (ULONG_MAX / 23 * 16 <= mbi))
+	  nfirst = ULONG_MAX;
+	else
+	  nfirst = mbi / 16 * 23;
+      }
+    else
+      nfirst = mbi * 23 / 16;
+  }
+#endif
+
+  sn = nn - 1;
+  nh = np[sn];
+  count_leading_zeros (ncnt, nh);
+  count_leading_zeros (fcnt, nfirst);
+
+  if (fcnt >= ncnt)
+    {
+      ncnt = fcnt - ncnt;
+      nh >>= ncnt;
+    }
+  else if (sn > 0)
+    {
+      ncnt -= fcnt;
+      nh <<= ncnt;
+      ncnt = GMP_NUMB_BITS - ncnt;
+      --sn;
+      nh |= np[sn] >> ncnt;
+    }
+  else
+    ncnt = 0;
+
+  nbi = sn * GMP_NUMB_BITS + ncnt;
+  if (nh > nfirst)
+    {
+      nh >>= 1;
+      ++nbi;
+    }
+
+  ASSERT (nh <= nfirst);
+  /* Take a starting pair from mpn_fib2_ui. */
+  fn = mpn_fib2_ui (fp, f1p, nh);
+  MPN_ZERO (fp + fn, mn - fn);
+  MPN_ZERO (f1p + fn, mn - fn);
+
+  if (nbi == 0)
+    {
+      if (fn == mn)
+	{
+	  mp_limb_t qp[2];
+	  mpn_tdiv_qr (qp, fp, 0, fp, fn, mp, mn);
+	  mpn_tdiv_qr (qp, f1p, 0, f1p, fn, mp, mn);
+	}
+
+      return 0;
+    }
+  else
+    {
+      mp_ptr	tp;
+      unsigned	pb = nh & 1;
+      int	neg;
+      TMP_DECL;
+
+      TMP_MARK;
+
+      tp = TMP_ALLOC_LIMBS (2 * mn + (mn < 2));
+
+      do
+	{
+	  mp_ptr	rp;
+	  /* Here fp==F[k] and f1p==F[k-1], with k being the bits of n from
+	     nbi upwards.
+
+	     Based on the next bit of n, we'll double to the pair
+	     fp==F[2k],f1p==F[2k-1] or fp==F[2k+1],f1p==F[2k], according as
+	     that bit is 0 or 1 respectively.  */
+
+	  mpn_sqr (tp, fp,  mn);
+	  mpn_sqr (fp, f1p, mn);
+
+	  /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2. */
+	  f1p[2 * mn] = mpn_add_n (f1p, tp, fp, 2 * mn);
+
+	  /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
+	     pb is the low bit of our implied k.  */
+
+	  /* fp is F[k-1]^2 == 0 or 1 mod 4, like all squares. */
+	  ASSERT ((fp[0] & 2) == 0);
+	  ASSERT (pb == (pb & 1));
+	  ASSERT ((fp[0] + (pb ? 2 : 0)) == (fp[0] | (pb << 1)));
+	  fp[0] |= pb << 1;		/* possible -2 */
+#if HAVE_NATIVE_mpn_rsblsh2_n
+	  fp[2 * mn] = 1 + mpn_rsblsh2_n (fp, fp, tp, 2 * mn);
+	  MPN_INCR_U(fp, 2 * mn + 1, (1 ^ pb) << 1);	/* possible +2 */
+	  fp[2 * mn] = (fp[2 * mn] - 1) & GMP_NUMB_MAX;
+#else
+	  {
+	    mp_limb_t  c;
+
+	    c = mpn_lshift (tp, tp, 2 * mn, 2);
+	    tp[0] |= (1 ^ pb) << 1;	/* possible +2 */
+	    c -= mpn_sub_n (fp, tp, fp, 2 * mn);
+	    fp[2 * mn] = c & GMP_NUMB_MAX;
+	  }
+#endif
+	  neg = fp[2 * mn] == GMP_NUMB_MAX;
+
+	  /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2 */
+	  /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k */
+
+	  /* Calculate F[2k] = F[2k+1] - F[2k-1], replacing the unwanted one of
+	     F[2k+1] and F[2k-1].  */
+	  --nbi;
+	  pb = (np [nbi / GMP_NUMB_BITS] >> (nbi % GMP_NUMB_BITS)) & 1;
+	  rp = pb ? f1p : fp;
+	  if (neg)
+	    {
+	      /* Calculate -(F[2k+1] - F[2k-1]) */
+	      rp[2 * mn] = f1p[2 * mn] + 1 - mpn_sub_n (rp, f1p, fp, 2 * mn);
+	      neg = ! pb;
+	      if (pb) /* fp not overwritten, negate it. */
+		fp [2 * mn] = 1 ^ mpn_neg (fp, fp, 2 * mn);
+	    }
+	  else
+	    {
+	      neg = abs_sub_n (rp, fp, f1p, 2 * mn + 1) < 0;
+	    }
+
+	  mpn_tdiv_qr (tp, fp, 0, fp, 2 * mn + 1, mp, mn);
+	  mpn_tdiv_qr (tp, f1p, 0, f1p, 2 * mn + 1, mp, mn);
+	}
+      while (nbi != 0);
+
+      TMP_FREE;
+
+      return neg;
+    }
+}

diff --git a/third_party/gmp/mpn/generic/gcd.c b/third_party/gmp/mpn/generic/gcd.c
new file mode 100644
index 0000000..3f92cbf
--- /dev/null
+++ b/third_party/gmp/mpn/generic/gcd.c

@@ -0,0 +1,266 @@
+/* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
+
+Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012, 2019 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Uses the HGCD operation described in
+
+     N. Möller, On Schönhage's algorithm and subquadratic integer gcd
+     computation, Math. Comp. 77 (2008), 589-607.
+
+  to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
+  then uses Lehmer's algorithm.
+*/
+
+/* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
+ * 2)/3, which gives a balanced multiplication in
+ * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
+ * performance. The matrix-vector multiplication is then
+ * 4:1-unbalanced, with matrix elements of size n/6, and vector
+ * elements of size p = 2n/3. */
+
+/* From analysis of the theoretical running time, it appears that when
+ * multiplication takes time O(n^alpha), p should be chosen so that
+ * the ratio of the time for the mpn_hgcd call, and the time for the
+ * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
+ * 1). */
+#ifdef TUNE_GCD_P
+#define P_TABLE_SIZE 10000
+mp_size_t p_table[P_TABLE_SIZE];
+#define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
+#else
+#define CHOOSE_P(n) (2*(n) / 3)
+#endif
+
+struct gcd_ctx
+{
+  mp_ptr gp;
+  mp_size_t gn;
+};
+
+static void
+gcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
+	  mp_srcptr qp, mp_size_t qn, int d)
+{
+  struct gcd_ctx *ctx = (struct gcd_ctx *) p;
+  MPN_COPY (ctx->gp, gp, gn);
+  ctx->gn = gn;
+}
+
+mp_size_t
+mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
+{
+  mp_size_t talloc;
+  mp_size_t scratch;
+  mp_size_t matrix_scratch;
+
+  struct gcd_ctx ctx;
+  mp_ptr tp;
+  TMP_DECL;
+
+  ASSERT (usize >= n);
+  ASSERT (n > 0);
+  ASSERT (vp[n-1] > 0);
+
+  /* FIXME: Check for small sizes first, before setting up temporary
+     storage etc. */
+  talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);
+
+  /* For initial division */
+  scratch = usize - n + 1;
+  if (scratch > talloc)
+    talloc = scratch;
+
+#if TUNE_GCD_P
+  if (CHOOSE_P (n) > 0)
+#else
+  if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
+#endif
+    {
+      mp_size_t hgcd_scratch;
+      mp_size_t update_scratch;
+      mp_size_t p = CHOOSE_P (n);
+      mp_size_t scratch;
+#if TUNE_GCD_P
+      /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
+	 is increasing */
+      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
+      hgcd_scratch = mpn_hgcd_itch (n);
+      update_scratch = 2*(n - 1);
+#else
+      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+      hgcd_scratch = mpn_hgcd_itch (n - p);
+      update_scratch = p + n - 1;
+#endif
+      scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
+      if (scratch > talloc)
+	talloc = scratch;
+    }
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS(talloc);
+
+  if (usize > n)
+    {
+      mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
+
+      if (mpn_zero_p (up, n))
+	{
+	  MPN_COPY (gp, vp, n);
+	  ctx.gn = n;
+	  goto done;
+	}
+    }
+
+  ctx.gp = gp;
+
+#if TUNE_GCD_P
+  while (CHOOSE_P (n) > 0)
+#else
+  while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
+#endif
+    {
+      struct hgcd_matrix M;
+      mp_size_t p = CHOOSE_P (n);
+      mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+      mp_size_t nn;
+      mpn_hgcd_matrix_init (&M, n - p, tp);
+      nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
+      if (nn > 0)
+	{
+	  ASSERT (M.n <= (n - p - 1)/2);
+	  ASSERT (M.n + p <= (p + n - 1) / 2);
+	  /* Temporary storage 2 (p + M->n) <= p + n - 1. */
+	  n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
+	}
+      else
+	{
+	  /* Temporary storage n */
+	  n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);
+	  if (n == 0)
+	    goto done;
+	}
+    }
+
+  while (n > 2)
+    {
+      struct hgcd_matrix1 M;
+      mp_limb_t uh, ul, vh, vl;
+      mp_limb_t mask;
+
+      mask = up[n-1] | vp[n-1];
+      ASSERT (mask > 0);
+
+      if (mask & GMP_NUMB_HIGHBIT)
+	{
+	  uh = up[n-1]; ul = up[n-2];
+	  vh = vp[n-1]; vl = vp[n-2];
+	}
+      else
+	{
+	  int shift;
+
+	  count_leading_zeros (shift, mask);
+	  uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);
+	  ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);
+	  vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);
+	  vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);
+	}
+
+      /* Try an mpn_hgcd2 step */
+      if (mpn_hgcd2 (uh, ul, vh, vl, &M))
+	{
+	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);
+	  MP_PTR_SWAP (up, tp);
+	}
+      else
+	{
+	  /* mpn_hgcd2 has failed. Then either one of a or b is very
+	     small, or the difference is very small. Perform one
+	     subtraction followed by one division. */
+
+	  /* Temporary storage n */
+	  n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);
+	  if (n == 0)
+	    goto done;
+	}
+    }
+
+  ASSERT(up[n-1] | vp[n-1]);
+
+  /* Due to the calling convention for mpn_gcd, at most one can be even. */
+  if ((up[0] & 1) == 0)
+    MP_PTR_SWAP (up, vp);
+  ASSERT ((up[0] & 1) != 0);
+
+  {
+    mp_limb_t u0, u1, v0, v1;
+    mp_double_limb_t g;
+
+    u0 = up[0];
+    v0 = vp[0];
+
+    if (n == 1)
+      {
+	int cnt;
+	count_trailing_zeros (cnt, v0);
+	*gp = mpn_gcd_11 (u0, v0 >> cnt);
+	ctx.gn = 1;
+	goto done;
+      }
+
+    v1 = vp[1];
+    if (UNLIKELY (v0 == 0))
+      {
+	v0 = v1;
+	v1 = 0;
+	/* FIXME: We could invoke a mpn_gcd_21 here, just like mpn_gcd_22 could
+	   when this situation occurs internally.  */
+      }
+    if ((v0 & 1) == 0)
+      {
+	int cnt;
+	count_trailing_zeros (cnt, v0);
+	v0 = ((v1 << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK) | (v0 >> cnt);
+	v1 >>= cnt;
+      }
+
+    u1 = up[1];
+    g = mpn_gcd_22 (u1, u0, v1, v0);
+    gp[0] = g.d0;
+    gp[1] = g.d1;
+    ctx.gn = 1 + (g.d1 > 0);
+  }
+done:
+  TMP_FREE;
+  return ctx.gn;
+}

diff --git a/third_party/gmp/mpn/generic/gcd_1.c b/third_party/gmp/mpn/generic/gcd_1.c
new file mode 100644
index 0000000..22b1422
--- /dev/null
+++ b/third_party/gmp/mpn/generic/gcd_1.c

@@ -0,0 +1,103 @@
+/* mpn_gcd_1 -- mpn and limb greatest common divisor.
+
+Copyright 1994, 1996, 2000, 2001, 2009, 2012, 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Does not work for U == 0 or V == 0.  It would be tough to make it work for
+   V == 0 since gcd(x,0) = x, and U does not generally fit in an mp_limb_t.
+
+   The threshold for doing u%v when size==1 will vary by CPU according to
+   the speed of a division and the code generated for the main loop.  Any
+   tuning for this is left to a CPU specific implementation.  */
+
+mp_limb_t
+mpn_gcd_1 (mp_srcptr up, mp_size_t size, mp_limb_t vlimb)
+{
+  mp_limb_t      ulimb;
+  unsigned long  zero_bits, u_low_zero_bits;
+  int c;
+
+  ASSERT (size >= 1);
+  ASSERT (vlimb != 0);
+  ASSERT_MPN_NONZERO_P (up, size);
+
+  ulimb = up[0];
+
+  /* Need vlimb odd for modexact, want it odd to get common zeros. */
+  count_trailing_zeros (zero_bits, vlimb);
+  vlimb >>= zero_bits;
+
+  if (size > 1)
+    {
+      /* Must get common zeros before the mod reduction.  If ulimb==0 then
+	 vlimb already gives the common zeros.  */
+      if (ulimb != 0)
+	{
+	  count_trailing_zeros (u_low_zero_bits, ulimb);
+	  zero_bits = MIN (zero_bits, u_low_zero_bits);
+	}
+
+      ulimb = MPN_MOD_OR_MODEXACT_1_ODD (up, size, vlimb);
+      if (ulimb == 0)
+	goto done;
+
+      count_trailing_zeros (c, ulimb);
+      ulimb >>= c;
+    }
+  else
+    {
+      /* size==1, so up[0]!=0 */
+      count_trailing_zeros (u_low_zero_bits, ulimb);
+      ulimb >>= u_low_zero_bits;
+      zero_bits = MIN (zero_bits, u_low_zero_bits);
+
+      /* make u bigger */
+      if (vlimb > ulimb)
+	MP_LIMB_T_SWAP (ulimb, vlimb);
+
+      /* if u is much bigger than v, reduce using a division rather than
+	 chipping away at it bit-by-bit */
+      if ((ulimb >> 16) > vlimb)
+	{
+	  ulimb %= vlimb;
+	  if (ulimb == 0)
+	    goto done;
+
+	  count_trailing_zeros (c, ulimb);
+	  ulimb >>= c;
+	}
+    }
+
+  vlimb = mpn_gcd_11 (ulimb, vlimb);
+
+ done:
+  return vlimb << zero_bits;
+}

diff --git a/third_party/gmp/mpn/generic/gcd_11.c b/third_party/gmp/mpn/generic/gcd_11.c
new file mode 100644
index 0000000..214e45c
--- /dev/null
+++ b/third_party/gmp/mpn/generic/gcd_11.c

@@ -0,0 +1,74 @@
+/* mpn_gcd_11 -- limb greatest common divisor.
+
+Copyright 1994, 1996, 2000, 2001, 2009, 2012, 2019 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_gcd_11 (mp_limb_t u, mp_limb_t v)
+{
+  ASSERT (u & v & 1);
+
+  /* In this loop, we represent the odd numbers ulimb and vlimb
+     without the redundant least significant one bit. This reduction
+     in size by one bit ensures that the high bit of t, below, is set
+     if and only if vlimb > ulimb. */
+
+  u >>= 1;
+  v >>= 1;
+
+  while (u != v)
+    {
+      mp_limb_t t;
+      mp_limb_t vgtu;
+      int c;
+
+      t = u - v;
+      vgtu = LIMB_HIGHBIT_TO_MASK (t);
+
+      /* v <-- min (u, v) */
+      v += (vgtu & t);
+
+      /* u <-- |u - v| */
+      u = (t ^ vgtu) - vgtu;
+
+      count_trailing_zeros (c, t);
+      /* We have c <= GMP_LIMB_BITS - 2 here, so that
+
+	   ulimb >>= (c + 1);
+
+	 would be safe. But unlike the addition c + 1, a separate
+	 shift by 1 is independent of c, and can be executed in
+	 parallel with count_trailing_zeros. */
+      u = (u >> 1) >> c;
+    }
+  return (u << 1) + 1;
+}

diff --git a/third_party/gmp/mpn/generic/gcd_22.c b/third_party/gmp/mpn/generic/gcd_22.c
new file mode 100644
index 0000000..d97f096
--- /dev/null
+++ b/third_party/gmp/mpn/generic/gcd_22.c

@@ -0,0 +1,131 @@
+/* mpn_gcd_22 -- double limb greatest common divisor.
+
+Copyright 1994, 1996, 2000, 2001, 2009, 2012, 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if GMP_NAIL_BITS > 0
+#error Nails not supported.
+#endif
+
+mp_double_limb_t
+mpn_gcd_22 (mp_limb_t u1, mp_limb_t u0, mp_limb_t v1, mp_limb_t v0)
+{
+  mp_double_limb_t g;
+  ASSERT (u0 & v0 & 1);
+
+  /* Implicit least significant bit */
+  u0 = (u0 >> 1) | (u1 << (GMP_LIMB_BITS - 1));
+  u1 >>= 1;
+
+  v0 = (v0 >> 1) | (v1 << (GMP_LIMB_BITS - 1));
+  v1 >>= 1;
+
+  while (u1 || v1) /* u1 == 0 can happen at most twice per call */
+    {
+      mp_limb_t vgtu, t1, t0;
+      sub_ddmmss (t1, t0, u1, u0, v1, v0);
+      vgtu = LIMB_HIGHBIT_TO_MASK(t1);
+
+      if (UNLIKELY (t0 == 0))
+	{
+	  if (t1 == 0)
+	    {
+	      g.d1 = (u1 << 1) | (u0 >> (GMP_LIMB_BITS - 1));
+	      g.d0 = (u0 << 1) | 1;
+	      return g;
+	    }
+	  int c;
+	  count_trailing_zeros (c, t1);
+
+	  /* v1 = min (u1, v1) */
+	  v1 += (vgtu & t1);
+	  /* u0 = |u1 - v1| */
+	  u0 = (t1 ^ vgtu) - vgtu;
+	  ASSERT (c < GMP_LIMB_BITS - 1);
+	  u0 >>= c + 1;
+	  u1 = 0;
+	}
+      else
+	{
+	  int c;
+	  count_trailing_zeros (c, t0);
+	  c++;
+	  /* V <-- min (U, V).
+
+	     Assembly version should use cmov. Another alternative,
+	     avoiding carry propagation, would be
+
+	     v0 += vgtu & t0; v1 += vtgu & (u1 - v1);
+	  */
+	  add_ssaaaa (v1, v0, v1, v0, vgtu & t1, vgtu & t0);
+	  /* U  <--  |U - V|
+	     No carry handling needed in this conditional negation,
+	     since t0 != 0. */
+	  u0 = (t0 ^ vgtu) - vgtu;
+	  u1 = t1 ^ vgtu;
+	  if (UNLIKELY (c == GMP_LIMB_BITS))
+	    {
+	      u0 = u1;
+	      u1 = 0;
+	    }
+	  else
+	    {
+	      u0 = (u0 >> c) | (u1 << (GMP_LIMB_BITS - c));
+	      u1 >>= c;
+	    }
+	}
+    }
+  while ((v0 | u0) & GMP_LIMB_HIGHBIT)
+    { /* At most two iterations */
+      mp_limb_t vgtu, t0;
+      int c;
+      sub_ddmmss (vgtu, t0, 0, u0, 0, v0);
+      if (UNLIKELY (t0 == 0))
+	{
+	  g.d1 = u0 >> (GMP_LIMB_BITS - 1);
+	  g.d0 = (u0 << 1) | 1;
+	  return g;
+	}
+
+      /* v <-- min (u, v) */
+      v0 += (vgtu & t0);
+
+      /* u <-- |u - v| */
+      u0 = (t0 ^ vgtu) - vgtu;
+
+      count_trailing_zeros (c, t0);
+      u0 = (u0 >> 1) >> c;
+    }
+
+  g.d0 = mpn_gcd_11 ((u0 << 1) + 1, (v0 << 1) + 1);
+  g.d1 = 0;
+  return g;
+}

diff --git a/third_party/gmp/mpn/generic/gcd_subdiv_step.c b/third_party/gmp/mpn/generic/gcd_subdiv_step.c
new file mode 100644
index 0000000..9c3b88d
--- /dev/null
+++ b/third_party/gmp/mpn/generic/gcd_subdiv_step.c

@@ -0,0 +1,204 @@
+/* gcd_subdiv_step.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003-2005, 2008, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>		/* for NULL */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Used when mpn_hgcd or mpn_hgcd2 has failed. Then either one of a or
+   b is small, or the difference is small. Perform one subtraction
+   followed by one division. The normal case is to compute the reduced
+   a and b, and return the new size.
+
+   If s == 0 (used for gcd and gcdext), returns zero if the gcd is
+   found.
+
+   If s > 0, don't reduce to size <= s, and return zero if no
+   reduction is possible (if either a, b or |a-b| is of size <= s). */
+
+/* The hook function is called as
+
+     hook(ctx, gp, gn, qp, qn, d)
+
+   in the following cases:
+
+   + If A = B at the start, G is the gcd, Q is NULL, d = -1.
+
+   + If one input is zero at the start, G is the gcd, Q is NULL,
+     d = 0 if A = G and d = 1 if B = G.
+
+   Otherwise, if d = 0 we have just subtracted a multiple of A from B,
+   and if d = 1 we have subtracted a multiple of B from A.
+
+   + If A = B after subtraction, G is the gcd, Q is NULL.
+
+   + If we get a zero remainder after division, G is the gcd, Q is the
+     quotient.
+
+   + Otherwise, G is NULL, Q is the quotient (often 1).
+
+ */
+
+mp_size_t
+mpn_gcd_subdiv_step (mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t s,
+		     gcd_subdiv_step_hook *hook, void *ctx,
+		     mp_ptr tp)
+{
+  static const mp_limb_t one = CNST_LIMB(1);
+  mp_size_t an, bn, qn;
+
+  int swapped;
+
+  ASSERT (n > 0);
+  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
+
+  an = bn = n;
+  MPN_NORMALIZE (ap, an);
+  MPN_NORMALIZE (bp, bn);
+
+  swapped = 0;
+
+  /* Arrange so that a < b, subtract b -= a, and maintain
+     normalization. */
+  if (an == bn)
+    {
+      int c;
+      MPN_CMP (c, ap, bp, an);
+      if (UNLIKELY (c == 0))
+	{
+	  /* For gcdext, return the smallest of the two cofactors, so
+	     pass d = -1. */
+	  if (s == 0)
+	    hook (ctx, ap, an, NULL, 0, -1);
+	  return 0;
+	}
+      else if (c > 0)
+	{
+	  MP_PTR_SWAP (ap, bp);
+	  swapped ^= 1;
+	}
+    }
+  else
+    {
+      if (an > bn)
+	{
+	  MPN_PTR_SWAP (ap, an, bp, bn);
+	  swapped ^= 1;
+	}
+    }
+  if (an <= s)
+    {
+      if (s == 0)
+	hook (ctx, bp, bn, NULL, 0, swapped ^ 1);
+      return 0;
+    }
+
+  ASSERT_NOCARRY (mpn_sub (bp, bp, bn, ap, an));
+  MPN_NORMALIZE (bp, bn);
+  ASSERT (bn > 0);
+
+  if (bn <= s)
+    {
+      /* Undo subtraction. */
+      mp_limb_t cy = mpn_add (bp, ap, an, bp, bn);
+      if (cy > 0)
+	bp[an] = cy;
+      return 0;
+    }
+
+  /* Arrange so that a < b */
+  if (an == bn)
+    {
+      int c;
+      MPN_CMP (c, ap, bp, an);
+      if (UNLIKELY (c == 0))
+	{
+	  if (s > 0)
+	    /* Just record subtraction and return */
+	    hook (ctx, NULL, 0, &one, 1, swapped);
+	  else
+	    /* Found gcd. */
+	    hook (ctx, bp, bn, NULL, 0, swapped);
+	  return 0;
+	}
+
+      hook (ctx, NULL, 0, &one, 1, swapped);
+
+      if (c > 0)
+	{
+	  MP_PTR_SWAP (ap, bp);
+	  swapped ^= 1;
+	}
+    }
+  else
+    {
+      hook (ctx, NULL, 0, &one, 1, swapped);
+
+      if (an > bn)
+	{
+	  MPN_PTR_SWAP (ap, an, bp, bn);
+	  swapped ^= 1;
+	}
+    }
+
+  mpn_tdiv_qr (tp, bp, 0, bp, bn, ap, an);
+  qn = bn - an + 1;
+  bn = an;
+  MPN_NORMALIZE (bp, bn);
+
+  if (UNLIKELY (bn <= s))
+    {
+      if (s == 0)
+	{
+	  hook (ctx, ap, an, tp, qn, swapped);
+	  return 0;
+	}
+
+      /* Quotient is one too large, so decrement it and add back A. */
+      if (bn > 0)
+	{
+	  mp_limb_t cy = mpn_add (bp, ap, an, bp, bn);
+	  if (cy)
+	    bp[an++] = cy;
+	}
+      else
+	MPN_COPY (bp, ap, an);
+
+      MPN_DECR_U (tp, qn, 1);
+    }
+
+  hook (ctx, NULL, 0, tp, qn, swapped);
+  return an;
+}

diff --git a/third_party/gmp/mpn/generic/gcdext.c b/third_party/gmp/mpn/generic/gcdext.c
new file mode 100644
index 0000000..5501480
--- /dev/null
+++ b/third_party/gmp/mpn/generic/gcdext.c

@@ -0,0 +1,557 @@
+/* mpn_gcdext -- Extended Greatest Common Divisor.
+
+Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Computes (r;b) = (a; b) M. Result is of size n + M->n +/- 1, and
+   the size is returned (if inputs are non-normalized, result may be
+   non-normalized too). Temporary space needed is M->n + n.
+ */
+static size_t
+hgcd_mul_matrix_vector (struct hgcd_matrix *M,
+			mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)
+{
+  mp_limb_t ah, bh;
+
+  /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
+
+     t  = u00 * a
+     r  = u10 * b
+     r += t;
+
+     t  = u11 * b
+     b  = u01 * a
+     b += t;
+  */
+
+  if (M->n >= n)
+    {
+      mpn_mul (tp, M->p[0][0], M->n, ap, n);
+      mpn_mul (rp, M->p[1][0], M->n, bp, n);
+    }
+  else
+    {
+      mpn_mul (tp, ap, n, M->p[0][0], M->n);
+      mpn_mul (rp, bp, n, M->p[1][0], M->n);
+    }
+
+  ah = mpn_add_n (rp, rp, tp, n + M->n);
+
+  if (M->n >= n)
+    {
+      mpn_mul (tp, M->p[1][1], M->n, bp, n);
+      mpn_mul (bp, M->p[0][1], M->n, ap, n);
+    }
+  else
+    {
+      mpn_mul (tp, bp, n, M->p[1][1], M->n);
+      mpn_mul (bp, ap, n, M->p[0][1], M->n);
+    }
+  bh = mpn_add_n (bp, bp, tp, n + M->n);
+
+  n += M->n;
+  if ( (ah | bh) > 0)
+    {
+      rp[n] = ah;
+      bp[n] = bh;
+      n++;
+    }
+  else
+    {
+      /* Normalize */
+      while ( (rp[n-1] | bp[n-1]) == 0)
+	n--;
+    }
+
+  return n;
+}
+
+#define COMPUTE_V_ITCH(n) (2*(n))
+
+/* Computes |v| = |(g - u a)| / b, where u may be positive or
+   negative, and v is of the opposite sign. max(a, b) is of size n, u and
+   v at most size n, and v must have space for n+1 limbs. */
+static mp_size_t
+compute_v (mp_ptr vp,
+	   mp_srcptr ap, mp_srcptr bp, mp_size_t n,
+	   mp_srcptr gp, mp_size_t gn,
+	   mp_srcptr up, mp_size_t usize,
+	   mp_ptr tp)
+{
+  mp_size_t size;
+  mp_size_t an;
+  mp_size_t bn;
+  mp_size_t vn;
+
+  ASSERT (n > 0);
+  ASSERT (gn > 0);
+  ASSERT (usize != 0);
+
+  size = ABS (usize);
+  ASSERT (size <= n);
+  ASSERT (up[size-1] > 0);
+
+  an = n;
+  MPN_NORMALIZE (ap, an);
+  ASSERT (gn <= an);
+
+  if (an >= size)
+    mpn_mul (tp, ap, an, up, size);
+  else
+    mpn_mul (tp, up, size, ap, an);
+
+  size += an;
+
+  if (usize > 0)
+    {
+      /* |v| = -v = (u a - g) / b */
+
+      ASSERT_NOCARRY (mpn_sub (tp, tp, size, gp, gn));
+      MPN_NORMALIZE (tp, size);
+      if (size == 0)
+	return 0;
+    }
+  else
+    { /* |v| = v = (g - u a) / b = (g + |u| a) / b. Since g <= a,
+	 (g + |u| a) always fits in (|usize| + an) limbs. */
+
+      ASSERT_NOCARRY (mpn_add (tp, tp, size, gp, gn));
+      size -= (tp[size - 1] == 0);
+    }
+
+  /* Now divide t / b. There must be no remainder */
+  bn = n;
+  MPN_NORMALIZE (bp, bn);
+  ASSERT (size >= bn);
+
+  vn = size + 1 - bn;
+  ASSERT (vn <= n + 1);
+
+  mpn_divexact (vp, tp, size, bp, bn);
+  vn -= (vp[vn-1] == 0);
+
+  return vn;
+}
+
+/* Temporary storage:
+
+   Initial division: Quotient of at most an - n + 1 <= an limbs.
+
+   Storage for u0 and u1: 2(n+1).
+
+   Storage for hgcd matrix M, with input ceil(n/2): 5 * ceil(n/4)
+
+   Storage for hgcd, input (n + 1)/2: 9 n/4 plus some.
+
+   When hgcd succeeds: 1 + floor(3n/2) for adjusting a and b, and 2(n+1) for the cofactors.
+
+   When hgcd fails: 2n + 1 for mpn_gcdext_subdiv_step, which is less.
+
+   For the lehmer call after the loop, Let T denote
+   GCDEXT_DC_THRESHOLD. For the gcdext_lehmer call, we need T each for
+   u, a and b, and 4T+3 scratch space. Next, for compute_v, we need T
+   for u, T+1 for v and 2T scratch space. In all, 7T + 3 is
+   sufficient for both operations.
+
+*/
+
+/* Optimal choice of p seems difficult. In each iteration the division
+ * of work between hgcd and the updates of u0 and u1 depends on the
+ * current size of the u. It may be desirable to use a different
+ * choice of p in each iteration. Also the input size seems to matter;
+ * choosing p = n / 3 in the first iteration seems to improve
+ * performance slightly for input size just above the threshold, but
+ * degrade performance for larger inputs. */
+#define CHOOSE_P_1(n) ((n) / 2)
+#define CHOOSE_P_2(n) ((n) / 3)
+
+mp_size_t
+mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
+	    mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n)
+{
+  mp_size_t talloc;
+  mp_size_t scratch;
+  mp_size_t matrix_scratch;
+  mp_size_t ualloc = n + 1;
+
+  struct gcdext_ctx ctx;
+  mp_size_t un;
+  mp_ptr u0;
+  mp_ptr u1;
+
+  mp_ptr tp;
+
+  TMP_DECL;
+
+  ASSERT (an >= n);
+  ASSERT (n > 0);
+  ASSERT (bp[n-1] > 0);
+
+  TMP_MARK;
+
+  /* FIXME: Check for small sizes first, before setting up temporary
+     storage etc. */
+  talloc = MPN_GCDEXT_LEHMER_N_ITCH(n);
+
+  /* For initial division */
+  scratch = an - n + 1;
+  if (scratch > talloc)
+    talloc = scratch;
+
+  if (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
+    {
+      /* For hgcd loop. */
+      mp_size_t hgcd_scratch;
+      mp_size_t update_scratch;
+      mp_size_t p1 = CHOOSE_P_1 (n);
+      mp_size_t p2 = CHOOSE_P_2 (n);
+      mp_size_t min_p = MIN(p1, p2);
+      mp_size_t max_p = MAX(p1, p2);
+      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - min_p);
+      hgcd_scratch = mpn_hgcd_itch (n - min_p);
+      update_scratch = max_p + n - 1;
+
+      scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
+      if (scratch > talloc)
+	talloc = scratch;
+
+      /* Final mpn_gcdext_lehmer_n call. Need space for u and for
+	 copies of a and b. */
+      scratch = MPN_GCDEXT_LEHMER_N_ITCH (GCDEXT_DC_THRESHOLD)
+	+ 3*GCDEXT_DC_THRESHOLD;
+
+      if (scratch > talloc)
+	talloc = scratch;
+
+      /* Cofactors u0 and u1 */
+      talloc += 2*(n+1);
+    }
+
+  tp = TMP_ALLOC_LIMBS(talloc);
+
+  if (an > n)
+    {
+      mpn_tdiv_qr (tp, ap, 0, ap, an, bp, n);
+
+      if (mpn_zero_p (ap, n))
+	{
+	  MPN_COPY (gp, bp, n);
+	  *usizep = 0;
+	  TMP_FREE;
+	  return n;
+	}
+    }
+
+  if (BELOW_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
+    {
+      mp_size_t gn = mpn_gcdext_lehmer_n(gp, up, usizep, ap, bp, n, tp);
+
+      TMP_FREE;
+      return gn;
+    }
+
+  MPN_ZERO (tp, 2*ualloc);
+  u0 = tp; tp += ualloc;
+  u1 = tp; tp += ualloc;
+
+  ctx.gp = gp;
+  ctx.up = up;
+  ctx.usize = usizep;
+
+  {
+    /* For the first hgcd call, there are no u updates, and it makes
+       some sense to use a different choice for p. */
+
+    /* FIXME: We could trim use of temporary storage, since u0 and u1
+       are not used yet. For the hgcd call, we could swap in the u0
+       and u1 pointers for the relevant matrix elements. */
+
+    struct hgcd_matrix M;
+    mp_size_t p = CHOOSE_P_1 (n);
+    mp_size_t nn;
+
+    mpn_hgcd_matrix_init (&M, n - p, tp);
+    nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
+    if (nn > 0)
+      {
+	ASSERT (M.n <= (n - p - 1)/2);
+	ASSERT (M.n + p <= (p + n - 1) / 2);
+
+	/* Temporary storage 2 (p + M->n) <= p + n - 1 */
+	n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
+
+	MPN_COPY (u0, M.p[1][0], M.n);
+	MPN_COPY (u1, M.p[1][1], M.n);
+	un = M.n;
+	while ( (u0[un-1] | u1[un-1] ) == 0)
+	  un--;
+      }
+    else
+      {
+	/* mpn_hgcd has failed. Then either one of a or b is very
+	   small, or the difference is very small. Perform one
+	   subtraction followed by one division. */
+	u1[0] = 1;
+
+	ctx.u0 = u0;
+	ctx.u1 = u1;
+	ctx.tp = tp + n; /* ualloc */
+	ctx.un = 1;
+
+	/* Temporary storage n */
+	n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
+	if (n == 0)
+	  {
+	    TMP_FREE;
+	    return ctx.gn;
+	  }
+
+	un = ctx.un;
+	ASSERT (un < ualloc);
+      }
+  }
+
+  while (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
+    {
+      struct hgcd_matrix M;
+      mp_size_t p = CHOOSE_P_2 (n);
+      mp_size_t nn;
+
+      mpn_hgcd_matrix_init (&M, n - p, tp);
+      nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
+      if (nn > 0)
+	{
+	  mp_ptr t0;
+
+	  t0 = tp + matrix_scratch;
+	  ASSERT (M.n <= (n - p - 1)/2);
+	  ASSERT (M.n + p <= (p + n - 1) / 2);
+
+	  /* Temporary storage 2 (p + M->n) <= p + n - 1 */
+	  n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, t0);
+
+	  /* By the same analysis as for mpn_hgcd_matrix_mul */
+	  ASSERT (M.n + un <= ualloc);
+
+	  /* FIXME: This copying could be avoided by some swapping of
+	   * pointers. May need more temporary storage, though. */
+	  MPN_COPY (t0, u0, un);
+
+	  /* Temporary storage ualloc */
+	  un = hgcd_mul_matrix_vector (&M, u0, t0, u1, un, t0 + un);
+
+	  ASSERT (un < ualloc);
+	  ASSERT ( (u0[un-1] | u1[un-1]) > 0);
+	}
+      else
+	{
+	  /* mpn_hgcd has failed. Then either one of a or b is very
+	     small, or the difference is very small. Perform one
+	     subtraction followed by one division. */
+	  ctx.u0 = u0;
+	  ctx.u1 = u1;
+	  ctx.tp = tp + n; /* ualloc */
+	  ctx.un = un;
+
+	  /* Temporary storage n */
+	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
+	  if (n == 0)
+	    {
+	      TMP_FREE;
+	      return ctx.gn;
+	    }
+
+	  un = ctx.un;
+	  ASSERT (un < ualloc);
+	}
+    }
+  /* We have A = ... a + ... b
+	     B =  u0 a +  u1 b
+
+	     a = u1  A + ... B
+	     b = -u0 A + ... B
+
+     with bounds
+
+       |u0|, |u1| <= B / min(a, b)
+
+     We always have u1 > 0, and u0 == 0 is possible only if u1 == 1,
+     in which case the only reduction done so far is a = A - k B for
+     some k.
+
+     Compute g = u a + v b = (u u1 - v u0) A + (...) B
+     Here, u, v are bounded by
+
+       |u| <= b,
+       |v| <= a
+  */
+
+  ASSERT ( (ap[n-1] | bp[n-1]) > 0);
+
+  if (UNLIKELY (mpn_cmp (ap, bp, n) == 0))
+    {
+      /* Must return the smallest cofactor, +u1 or -u0 */
+      int c;
+
+      MPN_COPY (gp, ap, n);
+
+      MPN_CMP (c, u0, u1, un);
+      /* c == 0 can happen only when A = (2k+1) G, B = 2 G. And in
+	 this case we choose the cofactor + 1, corresponding to G = A
+	 - k B, rather than -1, corresponding to G = - A + (k+1) B. */
+      ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
+      if (c < 0)
+	{
+	  MPN_NORMALIZE (u0, un);
+	  MPN_COPY (up, u0, un);
+	  *usizep = -un;
+	}
+      else
+	{
+	  MPN_NORMALIZE_NOT_ZERO (u1, un);
+	  MPN_COPY (up, u1, un);
+	  *usizep = un;
+	}
+
+      TMP_FREE;
+      return n;
+    }
+  else if (UNLIKELY (u0[0] == 0) && un == 1)
+    {
+      mp_size_t gn;
+      ASSERT (u1[0] == 1);
+
+      /* g = u a + v b = (u u1 - v u0) A + (...) B = u A + (...) B */
+      gn = mpn_gcdext_lehmer_n (gp, up, usizep, ap, bp, n, tp);
+
+      TMP_FREE;
+      return gn;
+    }
+  else
+    {
+      mp_size_t u0n;
+      mp_size_t u1n;
+      mp_size_t lehmer_un;
+      mp_size_t lehmer_vn;
+      mp_size_t gn;
+
+      mp_ptr lehmer_up;
+      mp_ptr lehmer_vp;
+      int negate;
+
+      lehmer_up = tp; tp += n;
+
+      /* Call mpn_gcdext_lehmer_n with copies of a and b. */
+      MPN_COPY (tp, ap, n);
+      MPN_COPY (tp + n, bp, n);
+      gn = mpn_gcdext_lehmer_n (gp, lehmer_up, &lehmer_un, tp, tp + n, n, tp + 2*n);
+
+      u0n = un;
+      MPN_NORMALIZE (u0, u0n);
+      ASSERT (u0n > 0);
+
+      if (lehmer_un == 0)
+	{
+	  /* u == 0  ==>  v = g / b == 1  ==> g = - u0 A + (...) B */
+	  MPN_COPY (up, u0, u0n);
+	  *usizep = -u0n;
+
+	  TMP_FREE;
+	  return gn;
+	}
+
+      lehmer_vp = tp;
+      /* Compute v = (g - u a) / b */
+      lehmer_vn = compute_v (lehmer_vp,
+			     ap, bp, n, gp, gn, lehmer_up, lehmer_un, tp + n + 1);
+
+      if (lehmer_un > 0)
+	negate = 0;
+      else
+	{
+	  lehmer_un = -lehmer_un;
+	  negate = 1;
+	}
+
+      u1n = un;
+      MPN_NORMALIZE (u1, u1n);
+      ASSERT (u1n > 0);
+
+      ASSERT (lehmer_un + u1n <= ualloc);
+      ASSERT (lehmer_vn + u0n <= ualloc);
+
+      /* We may still have v == 0 */
+
+      /* Compute u u0 */
+      if (lehmer_un <= u1n)
+	/* Should be the common case */
+	mpn_mul (up, u1, u1n, lehmer_up, lehmer_un);
+      else
+	mpn_mul (up, lehmer_up, lehmer_un, u1, u1n);
+
+      un = u1n + lehmer_un;
+      un -= (up[un - 1] == 0);
+
+      if (lehmer_vn > 0)
+	{
+	  mp_limb_t cy;
+
+	  /* Overwrites old u1 value */
+	  if (lehmer_vn <= u0n)
+	    /* Should be the common case */
+	    mpn_mul (u1, u0, u0n, lehmer_vp, lehmer_vn);
+	  else
+	    mpn_mul (u1, lehmer_vp, lehmer_vn, u0, u0n);
+
+	  u1n = u0n + lehmer_vn;
+	  u1n -= (u1[u1n - 1] == 0);
+
+	  if (u1n <= un)
+	    {
+	      cy = mpn_add (up, up, un, u1, u1n);
+	    }
+	  else
+	    {
+	      cy = mpn_add (up, u1, u1n, up, un);
+	      un = u1n;
+	    }
+	  up[un] = cy;
+	  un += (cy != 0);
+
+	  ASSERT (un < ualloc);
+	}
+      *usizep = negate ? -un : un;
+
+      TMP_FREE;
+      return gn;
+    }
+}

diff --git a/third_party/gmp/mpn/generic/gcdext_1.c b/third_party/gmp/mpn/generic/gcdext_1.c
new file mode 100644
index 0000000..b221a92
--- /dev/null
+++ b/third_party/gmp/mpn/generic/gcdext_1.c

@@ -0,0 +1,275 @@
+/* mpn_gcdext -- Extended Greatest Common Divisor.
+
+Copyright 1996, 1998, 2000-2005, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef GCDEXT_1_USE_BINARY
+#define GCDEXT_1_USE_BINARY 0
+#endif
+
+#ifndef GCDEXT_1_BINARY_METHOD
+#define GCDEXT_1_BINARY_METHOD 2
+#endif
+
+#if GCDEXT_1_USE_BINARY
+
+mp_limb_t
+mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,
+	      mp_limb_t u, mp_limb_t v)
+{
+  /* Maintain
+
+     U = t1 u + t0 v
+     V = s1 u + s0 v
+
+     where U, V are the inputs (without any shared power of two),
+     and the matrix has determinant ± 2^{shift}.
+  */
+  mp_limb_t s0 = 1;
+  mp_limb_t t0 = 0;
+  mp_limb_t s1 = 0;
+  mp_limb_t t1 = 1;
+  mp_limb_t ug;
+  mp_limb_t vg;
+  mp_limb_t ugh;
+  mp_limb_t vgh;
+  unsigned zero_bits;
+  unsigned shift;
+  unsigned i;
+#if GCDEXT_1_BINARY_METHOD == 2
+  mp_limb_t det_sign;
+#endif
+
+  ASSERT (u > 0);
+  ASSERT (v > 0);
+
+  count_trailing_zeros (zero_bits, u | v);
+  u >>= zero_bits;
+  v >>= zero_bits;
+
+  if ((u & 1) == 0)
+    {
+      count_trailing_zeros (shift, u);
+      u >>= shift;
+      t1 <<= shift;
+    }
+  else if ((v & 1) == 0)
+    {
+      count_trailing_zeros (shift, v);
+      v >>= shift;
+      s0 <<= shift;
+    }
+  else
+    shift = 0;
+
+#if GCDEXT_1_BINARY_METHOD == 1
+  while (u != v)
+    {
+      unsigned count;
+      if (u > v)
+	{
+	  u -= v;
+
+	  count_trailing_zeros (count, u);
+	  u >>= count;
+
+	  t0 += t1; t1 <<= count;
+	  s0 += s1; s1 <<= count;
+	}
+      else
+	{
+	  v -= u;
+
+	  count_trailing_zeros (count, v);
+	  v >>= count;
+
+	  t1 += t0; t0 <<= count;
+	  s1 += s0; s0 <<= count;
+	}
+      shift += count;
+    }
+#else
+# if GCDEXT_1_BINARY_METHOD == 2
+  u >>= 1;
+  v >>= 1;
+
+  det_sign = 0;
+
+  while (u != v)
+    {
+      unsigned count;
+      mp_limb_t d =  u - v;
+      mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d);
+      mp_limb_t sx;
+      mp_limb_t tx;
+
+      /* When v <= u (vgtu == 0), the updates are:
+
+	   (u; v)   <-- ( (u - v) >> count; v)    (det = +(1<<count) for corr. M factor)
+	   (t1, t0) <-- (t1 << count, t0 + t1)
+
+	 and when v > 0, the updates are
+
+	   (u; v)   <-- ( (v - u) >> count; u)    (det = -(1<<count))
+	   (t1, t0) <-- (t0 << count, t0 + t1)
+
+	 and similarly for s1, s0
+      */
+
+      /* v <-- min (u, v) */
+      v += (vgtu & d);
+
+      /* u <-- |u - v| */
+      u = (d ^ vgtu) - vgtu;
+
+      /* Number of trailing zeros is the same no matter if we look at
+       * d or u, but using d gives more parallelism. */
+      count_trailing_zeros (count, d);
+
+      det_sign ^= vgtu;
+
+      tx = vgtu & (t0 - t1);
+      sx = vgtu & (s0 - s1);
+      t0 += t1;
+      s0 += s1;
+      t1 += tx;
+      s1 += sx;
+
+      count++;
+      u >>= count;
+      t1 <<= count;
+      s1 <<= count;
+      shift += count;
+    }
+  u = (u << 1) + 1;
+# else /* GCDEXT_1_BINARY_METHOD == 2 */
+#  error Unknown GCDEXT_1_BINARY_METHOD
+# endif
+#endif
+
+  /* Now u = v = g = gcd (u,v). Compute U/g and V/g */
+  ug = t0 + t1;
+  vg = s0 + s1;
+
+  ugh = ug/2 + (ug & 1);
+  vgh = vg/2 + (vg & 1);
+
+  /* Now 2^{shift} g = s0 U - t0 V. Get rid of the power of two, using
+     s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */
+  for (i = 0; i < shift; i++)
+    {
+      mp_limb_t mask = - ( (s0 | t0) & 1);
+
+      s0 /= 2;
+      t0 /= 2;
+      s0 += mask & vgh;
+      t0 += mask & ugh;
+    }
+
+  ASSERT_ALWAYS (s0 <= vg);
+  ASSERT_ALWAYS (t0 <= ug);
+
+  if (s0 > vg - s0)
+    {
+      s0 -= vg;
+      t0 -= ug;
+    }
+#if GCDEXT_1_BINARY_METHOD == 2
+  /* Conditional negation. */
+  s0 = (s0 ^ det_sign) - det_sign;
+  t0 = (t0 ^ det_sign) - det_sign;
+#endif
+  *sp = s0;
+  *tp = -t0;
+
+  return u << zero_bits;
+}
+
+#else /* !GCDEXT_1_USE_BINARY */
+
+
+/* FIXME: Takes two single-word limbs. It could be extended to a
+ * function that accepts a bignum for the first input, and only
+ * returns the first co-factor. */
+
+mp_limb_t
+mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp,
+	      mp_limb_t a, mp_limb_t b)
+{
+  /* Maintain
+
+     a =  u0 A + v0 B
+     b =  u1 A + v1 B
+
+     where A, B are the original inputs.
+  */
+  mp_limb_signed_t u0 = 1;
+  mp_limb_signed_t v0 = 0;
+  mp_limb_signed_t u1 = 0;
+  mp_limb_signed_t v1 = 1;
+
+  ASSERT (a > 0);
+  ASSERT (b > 0);
+
+  if (a < b)
+    goto divide_by_b;
+
+  for (;;)
+    {
+      mp_limb_t q;
+
+      q = a / b;
+      a -= q * b;
+
+      if (a == 0)
+	{
+	  *up = u1;
+	  *vp = v1;
+	  return b;
+	}
+      u0 -= q * u1;
+      v0 -= q * v1;
+
+    divide_by_b:
+      q = b / a;
+      b -= q * a;
+
+      if (b == 0)
+	{
+	  *up = u0;
+	  *vp = v0;
+	  return a;
+	}
+      u1 -= q * u0;
+      v1 -= q * v0;
+    }
+}
+#endif /* !GCDEXT_1_USE_BINARY */

diff --git a/third_party/gmp/mpn/generic/gcdext_lehmer.c b/third_party/gmp/mpn/generic/gcdext_lehmer.c
new file mode 100644
index 0000000..ea4e86d
--- /dev/null
+++ b/third_party/gmp/mpn/generic/gcdext_lehmer.c

@@ -0,0 +1,336 @@
+/* mpn_gcdext -- Extended Greatest Common Divisor.
+
+Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Here, d is the index of the cofactor to update. FIXME: Could use qn
+   = 0 for the common case q = 1. */
+void
+mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
+		 mp_srcptr qp, mp_size_t qn, int d)
+{
+  struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
+  mp_size_t un = ctx->un;
+
+  if (gp)
+    {
+      mp_srcptr up;
+
+      ASSERT (gn > 0);
+      ASSERT (gp[gn-1] > 0);
+
+      MPN_COPY (ctx->gp, gp, gn);
+      ctx->gn = gn;
+
+      if (d < 0)
+	{
+	  int c;
+
+	  /* Must return the smallest cofactor, +u1 or -u0 */
+	  MPN_CMP (c, ctx->u0, ctx->u1, un);
+	  ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
+
+	  d = c < 0;
+	}
+
+      up = d ? ctx->u0 : ctx->u1;
+
+      MPN_NORMALIZE (up, un);
+      MPN_COPY (ctx->up, up, un);
+
+      *ctx->usize = d ? -un : un;
+    }
+  else
+    {
+      mp_limb_t cy;
+      mp_ptr u0 = ctx->u0;
+      mp_ptr u1 = ctx->u1;
+
+      ASSERT (d >= 0);
+
+      if (d)
+	MP_PTR_SWAP (u0, u1);
+
+      qn -= (qp[qn-1] == 0);
+
+      /* Update u0 += q  * u1 */
+      if (qn == 1)
+	{
+	  mp_limb_t q = qp[0];
+
+	  if (q == 1)
+	    /* A common case. */
+	    cy = mpn_add_n (u0, u0, u1, un);
+	  else
+	    cy = mpn_addmul_1 (u0, u1, un, q);
+	}
+      else
+	{
+	  mp_size_t u1n;
+	  mp_ptr tp;
+
+	  u1n = un;
+	  MPN_NORMALIZE (u1, u1n);
+
+	  if (u1n == 0)
+	    return;
+
+	  /* Should always have u1n == un here, and u1 >= u0. The
+	     reason is that we alternate adding u0 to u1 and u1 to u0
+	     (corresponding to subtractions a - b and b - a), and we
+	     can get a large quotient only just after a switch, which
+	     means that we'll add (a multiple of) the larger u to the
+	     smaller. */
+
+	  tp = ctx->tp;
+
+	  if (qn > u1n)
+	    mpn_mul (tp, qp, qn, u1, u1n);
+	  else
+	    mpn_mul (tp, u1, u1n, qp, qn);
+
+	  u1n += qn;
+	  u1n -= tp[u1n-1] == 0;
+
+	  if (u1n >= un)
+	    {
+	      cy = mpn_add (u0, tp, u1n, u0, un);
+	      un = u1n;
+	    }
+	  else
+	    /* Note: Unlikely case, maybe never happens? */
+	    cy = mpn_add (u0, u0, un, tp, u1n);
+
+	}
+      u0[un] = cy;
+      ctx->un = un + (cy > 0);
+    }
+}
+
+/* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
+   the matrix-vector multiplication adjusting a, b. If hgcd fails, we
+   need at most n for the quotient and n+1 for the u update (reusing
+   the extra u). In all, 4n + 3. */
+
+mp_size_t
+mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
+		     mp_ptr ap, mp_ptr bp, mp_size_t n,
+		     mp_ptr tp)
+{
+  mp_size_t ualloc = n + 1;
+
+  /* Keeps track of the second row of the reduction matrix
+   *
+   *   M = (v0, v1 ; u0, u1)
+   *
+   * which correspond to the first column of the inverse
+   *
+   *   M^{-1} = (u1, -v1; -u0, v0)
+   *
+   * This implies that
+   *
+   *   a =  u1 A (mod B)
+   *   b = -u0 A (mod B)
+   *
+   * where A, B denotes the input values.
+   */
+
+  struct gcdext_ctx ctx;
+  mp_size_t un;
+  mp_ptr u0;
+  mp_ptr u1;
+  mp_ptr u2;
+
+  MPN_ZERO (tp, 3*ualloc);
+  u0 = tp; tp += ualloc;
+  u1 = tp; tp += ualloc;
+  u2 = tp; tp += ualloc;
+
+  u1[0] = 1; un = 1;
+
+  ctx.gp = gp;
+  ctx.up = up;
+  ctx.usize = usize;
+
+  /* FIXME: Handle n == 2 differently, after the loop? */
+  while (n >= 2)
+    {
+      struct hgcd_matrix1 M;
+      mp_limb_t ah, al, bh, bl;
+      mp_limb_t mask;
+
+      mask = ap[n-1] | bp[n-1];
+      ASSERT (mask > 0);
+
+      if (mask & GMP_NUMB_HIGHBIT)
+	{
+	  ah = ap[n-1]; al = ap[n-2];
+	  bh = bp[n-1]; bl = bp[n-2];
+	}
+      else if (n == 2)
+	{
+	  /* We use the full inputs without truncation, so we can
+	     safely shift left. */
+	  int shift;
+
+	  count_leading_zeros (shift, mask);
+	  ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
+	  al = ap[0] << shift;
+	  bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
+	  bl = bp[0] << shift;
+	}
+      else
+	{
+	  int shift;
+
+	  count_leading_zeros (shift, mask);
+	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+	}
+
+      /* Try an mpn_nhgcd2 step */
+      if (mpn_hgcd2 (ah, al, bh, bl, &M))
+	{
+	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
+	  MP_PTR_SWAP (ap, tp);
+	  un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
+	  MP_PTR_SWAP (u0, u2);
+	}
+      else
+	{
+	  /* mpn_hgcd2 has failed. Then either one of a or b is very
+	     small, or the difference is very small. Perform one
+	     subtraction followed by one division. */
+	  ctx.u0 = u0;
+	  ctx.u1 = u1;
+	  ctx.tp = u2;
+	  ctx.un = un;
+
+	  /* Temporary storage n for the quotient and ualloc for the
+	     new cofactor. */
+	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
+	  if (n == 0)
+	    return ctx.gn;
+
+	  un = ctx.un;
+	}
+    }
+  ASSERT_ALWAYS (ap[0] > 0);
+  ASSERT_ALWAYS (bp[0] > 0);
+
+  if (ap[0] == bp[0])
+    {
+      int c;
+
+      /* Which cofactor to return now? Candidates are +u1 and -u0,
+	 depending on which of a and b was most recently reduced,
+	 which we don't keep track of. So compare and get the smallest
+	 one. */
+
+      gp[0] = ap[0];
+
+      MPN_CMP (c, u0, u1, un);
+      ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
+      if (c < 0)
+	{
+	  MPN_NORMALIZE (u0, un);
+	  MPN_COPY (up, u0, un);
+	  *usize = -un;
+	}
+      else
+	{
+	  MPN_NORMALIZE_NOT_ZERO (u1, un);
+	  MPN_COPY (up, u1, un);
+	  *usize = un;
+	}
+      return 1;
+    }
+  else
+    {
+      mp_limb_t uh, vh;
+      mp_limb_signed_t u;
+      mp_limb_signed_t v;
+      int negate;
+
+      gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
+
+      /* Set up = u u1 - v u0. Keep track of size, un grows by one or
+	 two limbs. */
+
+      if (u == 0)
+	{
+	  ASSERT (v == 1);
+	  MPN_NORMALIZE (u0, un);
+	  MPN_COPY (up, u0, un);
+	  *usize = -un;
+	  return 1;
+	}
+      else if (v == 0)
+	{
+	  ASSERT (u == 1);
+	  MPN_NORMALIZE (u1, un);
+	  MPN_COPY (up, u1, un);
+	  *usize = un;
+	  return 1;
+	}
+      else if (u > 0)
+	{
+	  negate = 0;
+	  ASSERT (v < 0);
+	  v = -v;
+	}
+      else
+	{
+	  negate = 1;
+	  ASSERT (v > 0);
+	  u = -u;
+	}
+
+      uh = mpn_mul_1 (up, u1, un, u);
+      vh = mpn_addmul_1 (up, u0, un, v);
+
+      if ( (uh | vh) > 0)
+	{
+	  uh += vh;
+	  up[un++] = uh;
+	  if (uh < vh)
+	    up[un++] = 1;
+	}
+
+      MPN_NORMALIZE_NOT_ZERO (up, un);
+
+      *usize = negate ? -un : un;
+      return 1;
+    }
+}

diff --git a/third_party/gmp/mpn/generic/get_d.c b/third_party/gmp/mpn/generic/get_d.c
new file mode 100644
index 0000000..9784f21
--- /dev/null
+++ b/third_party/gmp/mpn/generic/get_d.c

@@ -0,0 +1,438 @@
+/* mpn_get_d -- limbs to double conversion.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2003, 2004, 2007, 2009, 2010, 2012, 2018 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_FLOAT_H
+#include <float.h>  /* for DBL_MANT_DIG and FLT_RADIX */
+#endif
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef _GMP_IEEE_FLOATS
+#define _GMP_IEEE_FLOATS 0
+#endif
+
+/* To force use of the generic C code for testing, put
+   "#define _GMP_IEEE_FLOATS 0" at this point.  */
+
+
+/* In alpha gcc prior to 3.4, signed DI comparisons involving constants are
+   rearranged from "x < n" to "x+(-n) < 0", which is of course hopelessly
+   wrong if that addition overflows.
+
+   The workaround here avoids this bug by ensuring n is not a literal constant.
+   Note that this is alpha specific.  The offending transformation is/was in
+   alpha.c alpha_emit_conditional_branch() under "We want to use cmpcc/bcc".
+
+   Bizarrely, this happens also with Cray cc on alphaev5-cray-unicosmk2.0.6.X,
+   and has the same solution.  Don't know why or how.  */
+
+#if HAVE_HOST_CPU_FAMILY_alpha				\
+  && ((defined (__GNUC__) && ! __GMP_GNUC_PREREQ(3,4))	\
+      || defined (_CRAY))
+static volatile const long CONST_1024 = 1024;
+static volatile const long CONST_NEG_1023 = -1023;
+static volatile const long CONST_NEG_1022_SUB_53 = -1022 - 53;
+#else
+#define CONST_1024	      (1024)
+#define CONST_NEG_1023	      (-1023)
+#define CONST_NEG_1022_SUB_53 (-1022 - 53)
+#endif
+
+
+/* Return the value {ptr,size}*2^exp, and negative if sign<0.  Must have
+   size>=1, and a non-zero high limb ptr[size-1].
+
+   When we know the fp format, the result is truncated towards zero.  This is
+   consistent with other gmp conversions, like mpz_set_f or mpz_set_q, and is
+   easy to implement and test.
+
+   When we do not know the format, such truncation seems much harder.  One
+   would need to defeat any rounding mode, including round-up.
+
+   It's felt that GMP is not primarily concerned with hardware floats, and
+   really isn't enhanced by getting involved with hardware rounding modes
+   (which could even be some weird unknown style), so something unambiguous and
+   straightforward is best.
+
+
+   The IEEE code below is the usual case, it knows either a 32-bit or 64-bit
+   limb and is done with shifts and masks.  The 64-bit case in particular
+   should come out nice and compact.
+
+   The generic code used to work one bit at a time, which was not only slow,
+   but implicitly relied upon denorms for intermediates, since the lowest bits'
+   weight of a perfectly valid fp number underflows in non-denorm.  Therefore,
+   the generic code now works limb-per-limb, initially creating a number x such
+   that 1 <= x <= BASE.  (BASE is reached only as result of rounding.)  Then
+   x's exponent is scaled with explicit code (not ldexp to avoid libm
+   dependency).  It is a tap-dance to avoid underflow or overflow, beware!
+
+
+   Traps:
+
+   Hardware traps for overflow to infinity, underflow to zero, or unsupported
+   denorms may or may not be taken.  The IEEE code works bitwise and so
+   probably won't trigger them, the generic code works by float operations and
+   so probably will.  This difference might be thought less than ideal, but
+   again its felt straightforward code is better than trying to get intimate
+   with hardware exceptions (of perhaps unknown nature).
+
+
+   Not done:
+
+   mpz_get_d in the past handled size==1 with a cast limb->double.  This might
+   still be worthwhile there (for up to the mantissa many bits), but for
+   mpn_get_d here, the cost of applying "exp" to the resulting exponent would
+   probably use up any benefit a cast may have over bit twiddling.  Also, if
+   the exponent is pushed into denorm range then bit twiddling is the only
+   option, to ensure the desired truncation is obtained.
+
+
+   Other:
+
+   For reference, note that HPPA 8000, 8200, 8500 and 8600 trap FCNV,UDW,DBL
+   to the kernel for values >= 2^63.  This makes it slow, and worse the kernel
+   Linux (what versions?) apparently uses untested code in its trap handling
+   routines, and gets the sign wrong.  We don't use such a limb-to-double
+   cast, neither in the IEEE or generic code.  */
+
+
+
+#undef FORMAT_RECOGNIZED
+
+double
+mpn_get_d (mp_srcptr up, mp_size_t size, mp_size_t sign, long exp)
+{
+  int lshift, nbits;
+  mp_limb_t x, mhi, mlo;
+
+  ASSERT (size >= 0);
+  ASSERT_MPN (up, size);
+  ASSERT (size == 0 || up[size-1] != 0);
+
+  if (size == 0)
+    return 0.0;
+
+  /* Adjust exp to a radix point just above {up,size}, guarding against
+     overflow.  After this exp can of course be reduced to anywhere within
+     the {up,size} region without underflow.  */
+  if (UNLIKELY ((unsigned long) (GMP_NUMB_BITS * size)
+		> ((unsigned long) LONG_MAX - exp)))
+    {
+#if _GMP_IEEE_FLOATS
+      goto ieee_infinity;
+#endif
+
+      /* generic */
+      exp = LONG_MAX;
+    }
+  else
+    {
+      exp += GMP_NUMB_BITS * size;
+    }
+
+#if _GMP_IEEE_FLOATS
+    {
+      union ieee_double_extract u;
+
+      up += size;
+
+#if GMP_LIMB_BITS == 64
+      mlo = up[-1];
+      count_leading_zeros (lshift, mlo);
+
+      exp -= (lshift - GMP_NAIL_BITS) + 1;
+      mlo <<= lshift;
+
+      nbits = GMP_LIMB_BITS - lshift;
+
+      if (nbits < 53 && size > 1)
+	{
+	  x = up[-2];
+	  x <<= GMP_NAIL_BITS;
+	  x >>= nbits;
+	  mlo |= x;
+	  nbits += GMP_NUMB_BITS;
+
+	  if (LIMBS_PER_DOUBLE >= 3 && nbits < 53 && size > 2)
+	    {
+	      x = up[-3];
+	      x <<= GMP_NAIL_BITS;
+	      x >>= nbits;
+	      mlo |= x;
+	      nbits += GMP_NUMB_BITS;
+	    }
+	}
+      mhi = mlo >> (32 + 11);
+      mlo = mlo >> 11;		/* later implicitly truncated to 32 bits */
+#endif
+#if GMP_LIMB_BITS == 32
+      x = *--up;
+      count_leading_zeros (lshift, x);
+
+      exp -= (lshift - GMP_NAIL_BITS) + 1;
+      x <<= lshift;
+      mhi = x >> 11;
+
+      if (lshift < 11)		/* FIXME: never true if NUMB < 20 bits */
+	{
+	  /* All 20 bits in mhi */
+	  mlo = x << 21;
+	  /* >= 1 bit in mlo */
+	  nbits = GMP_LIMB_BITS - lshift - 21;
+	}
+      else
+	{
+	  if (size > 1)
+	    {
+	      nbits = GMP_LIMB_BITS - lshift;
+
+	      x = *--up, size--;
+	      x <<= GMP_NAIL_BITS;
+	      mhi |= x >> nbits >> 11;
+
+	      mlo = x << GMP_LIMB_BITS - nbits - 11;
+	      nbits = nbits + 11 - GMP_NAIL_BITS;
+	    }
+	  else
+	    {
+	      mlo = 0;
+	      goto done;
+	    }
+	}
+
+      /* Now all needed bits in mhi have been accumulated.  Add bits to mlo.  */
+
+      if (LIMBS_PER_DOUBLE >= 2 && nbits < 32 && size > 1)
+	{
+	  x = up[-1];
+	  x <<= GMP_NAIL_BITS;
+	  x >>= nbits;
+	  mlo |= x;
+	  nbits += GMP_NUMB_BITS;
+
+	  if (LIMBS_PER_DOUBLE >= 3 && nbits < 32 && size > 2)
+	    {
+	      x = up[-2];
+	      x <<= GMP_NAIL_BITS;
+	      x >>= nbits;
+	      mlo |= x;
+	      nbits += GMP_NUMB_BITS;
+
+	      if (LIMBS_PER_DOUBLE >= 4 && nbits < 32 && size > 3)
+		{
+		  x = up[-3];
+		  x <<= GMP_NAIL_BITS;
+		  x >>= nbits;
+		  mlo |= x;
+		  nbits += GMP_NUMB_BITS;
+		}
+	    }
+	}
+
+    done:;
+
+#endif
+      if (UNLIKELY (exp >= CONST_1024))
+	{
+	  /* overflow, return infinity */
+	ieee_infinity:
+	  mhi = 0;
+	  mlo = 0;
+	  exp = 1024;
+	}
+      else if (UNLIKELY (exp <= CONST_NEG_1023))
+	{
+	  int rshift;
+
+	  if (LIKELY (exp <= CONST_NEG_1022_SUB_53))
+	    return 0.0;	 /* denorm underflows to zero */
+
+	  rshift = -1022 - exp;
+	  ASSERT (rshift > 0 && rshift < 53);
+#if GMP_LIMB_BITS > 53
+	  mlo >>= rshift;
+	  mhi = mlo >> 32;
+#else
+	  if (rshift >= 32)
+	    {
+	      mlo = mhi;
+	      mhi = 0;
+	      rshift -= 32;
+	    }
+	  lshift = GMP_LIMB_BITS - rshift;
+	  mlo = (mlo >> rshift) | (rshift == 0 ? 0 : mhi << lshift);
+	  mhi >>= rshift;
+#endif
+	  exp = -1023;
+	}
+      u.s.manh = mhi;
+      u.s.manl = mlo;
+      u.s.exp = exp + 1023;
+      u.s.sig = (sign < 0);
+      return u.d;
+    }
+#define FORMAT_RECOGNIZED 1
+#endif
+
+#if HAVE_DOUBLE_VAX_D
+    {
+      union double_extract u;
+
+      up += size;
+
+      mhi = up[-1];
+
+      count_leading_zeros (lshift, mhi);
+      exp -= lshift;
+      mhi <<= lshift;
+
+      mlo = 0;
+      if (size > 1)
+	{
+	  mlo = up[-2];
+	  if (lshift != 0)
+	    mhi += mlo >> (GMP_LIMB_BITS - lshift);
+	  mlo <<= lshift;
+
+	  if (size > 2 && lshift > 8)
+	    {
+	      x = up[-3];
+	      mlo += x >> (GMP_LIMB_BITS - lshift);
+	    }
+	}
+
+      if (UNLIKELY (exp >= 128))
+	{
+	  /* overflow, return maximum number */
+	  mhi = 0xffffffff;
+	  mlo = 0xffffffff;
+	  exp = 127;
+	}
+      else if (UNLIKELY (exp < -128))
+	{
+	  return 0.0;	 /* underflows to zero */
+	}
+
+      u.s.man3 = mhi >> 24;	/* drop msb, since implicit */
+      u.s.man2 = mhi >> 8;
+      u.s.man1 = (mhi << 8) + (mlo >> 24);
+      u.s.man0 = mlo >> 8;
+      u.s.exp = exp + 128;
+      u.s.sig = sign < 0;
+      return u.d;
+    }
+#define FORMAT_RECOGNIZED 1
+#endif
+
+#if ! FORMAT_RECOGNIZED
+
+#if !defined(GMP_DBL_MANT_BITS)
+#if defined(DBL_MANT_DIG) && FLT_RADIX == 2
+#define GMP_DBL_MANT_BITS DBL_MANT_DIG
+#else
+/* FIXME: Chose a smarter default value. */
+#define GMP_DBL_MANT_BITS (16 * sizeof (double))
+#endif
+#endif
+
+    { /* Non-IEEE or strange limb size, generically convert
+	 GMP_DBL_MANT_BITS bits. */
+      mp_limb_t l;
+      int m;
+      mp_size_t i;
+      double d, weight;
+      unsigned long uexp;
+
+      /* First generate an fp number disregarding exp, instead keeping things
+	 within the numb base factor from 1, which should prevent overflow and
+	 underflow even for the most exponent limited fp formats.  */
+      i = size - 1;
+      l = up[i];
+      count_leading_zeros (m, l);
+      m = m + GMP_DBL_MANT_BITS - GMP_LIMB_BITS;
+      if (m < 0)
+	l &= GMP_NUMB_MAX << -m;
+      d = l;
+      for (weight = 1/MP_BASE_AS_DOUBLE; m > 0 && --i >= 0;)
+	{
+	  l = up[i];
+	  m -= GMP_NUMB_BITS;
+	  if (m < 0)
+	    l &= GMP_NUMB_MAX << -m;
+	  d += l * weight;
+	  weight /= MP_BASE_AS_DOUBLE;
+	  if (weight == 0)
+	    break;
+	}
+
+      /* Now apply exp.  */
+      exp -= GMP_NUMB_BITS;
+      if (exp > 0)
+	{
+	  weight = 2.0;
+	  uexp = exp;
+	}
+      else
+	{
+	  weight = 0.5;
+	  uexp = NEG_CAST (unsigned long, exp);
+	}
+#if 1
+      /* Square-and-multiply exponentiation.  */
+      if (uexp & 1)
+	d *= weight;
+      while (uexp >>= 1)
+	{
+	  weight *= weight;
+	  if (uexp & 1)
+	    d *= weight;
+	}
+#else
+      /* Plain exponentiation.  */
+      while (uexp > 0)
+	{
+	  d *= weight;
+	  uexp--;
+	}
+#endif
+
+      return sign >= 0 ? d : -d;
+    }
+#endif
+}

diff --git a/third_party/gmp/mpn/generic/get_str.c b/third_party/gmp/mpn/generic/get_str.c
new file mode 100644
index 0000000..19cc581
--- /dev/null
+++ b/third_party/gmp/mpn/generic/get_str.c

@@ -0,0 +1,451 @@
+/* mpn_get_str -- Convert {UP,USIZE} to a base BASE string in STR.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE, EXCEPT mpn_get_str, ARE INTERNAL WITH MUTABLE
+   INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
+   IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A
+   FUTURE GNU MP RELEASE.
+
+Copyright 1991-2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Conversion of U {up,un} to a string in base b.  Internally, we convert to
+   base B = b^m, the largest power of b that fits a limb.  Basic algorithms:
+
+  A) Divide U repeatedly by B, generating a quotient and remainder, until the
+     quotient becomes zero.  The remainders hold the converted digits.  Digits
+     come out from right to left.  (Used in mpn_bc_get_str.)
+
+  B) Divide U by b^g, for g such that 1/b <= U/b^g < 1, generating a fraction.
+     Then develop digits by multiplying the fraction repeatedly by b.  Digits
+     come out from left to right.  (Currently not used herein, except for in
+     code for converting single limbs to individual digits.)
+
+  C) Compute B^1, B^2, B^4, ..., B^s, for s such that B^s is just above
+     sqrt(U).  Then divide U by B^s, generating quotient and remainder.
+     Recursively convert the quotient, then the remainder, using the
+     precomputed powers.  Digits come out from left to right.  (Used in
+     mpn_dc_get_str.)
+
+  When using algorithm C, algorithm B might be suitable for basecase code,
+  since the required b^g power will be readily accessible.
+
+  Optimization ideas:
+  1. The recursive function of (C) could use less temporary memory.  The powtab
+     allocation could be trimmed with some computation, and the tmp area could
+     be reduced, or perhaps eliminated if up is reused for both quotient and
+     remainder (it is currently used just for remainder).
+  2. Store the powers of (C) in normalized form, with the normalization count.
+     Quotients will usually need to be left-shifted before each divide, and
+     remainders will either need to be left-shifted of right-shifted.
+  3. In the code for developing digits from a single limb, we could avoid using
+     a full umul_ppmm except for the first (or first few) digits, provided base
+     is even.  Subsequent digits can be developed using plain multiplication.
+     (This saves on register-starved machines (read x86) and on all machines
+     that generate the upper product half using a separate instruction (alpha,
+     powerpc, IA-64) or lacks such support altogether (sparc64, hppa64).
+  4. Separate mpn_dc_get_str basecase code from code for small conversions. The
+     former code will have the exact right power readily available in the
+     powtab parameter for dividing the current number into a fraction.  Convert
+     that using algorithm B.
+  5. Completely avoid division.  Compute the inverses of the powers now in
+     powtab instead of the actual powers.
+  6. Decrease powtab allocation for even bases.  E.g. for base 10 we could save
+     about 30% (1-log(5)/log(10)).
+
+  Basic structure of (C):
+    mpn_get_str:
+      if POW2_P (n)
+	...
+      else
+	if (un < GET_STR_PRECOMPUTE_THRESHOLD)
+	  mpn_bx_get_str (str, base, up, un);
+	else
+	  precompute_power_tables
+	  mpn_dc_get_str
+
+    mpn_dc_get_str:
+	mpn_tdiv_qr
+	if (qn < GET_STR_DC_THRESHOLD)
+	  mpn_bc_get_str
+	else
+	  mpn_dc_get_str
+	if (rn < GET_STR_DC_THRESHOLD)
+	  mpn_bc_get_str
+	else
+	  mpn_dc_get_str
+
+
+  The reason for the two threshold values is the cost of
+  precompute_power_tables.  GET_STR_PRECOMPUTE_THRESHOLD will be
+  considerably larger than GET_STR_DC_THRESHOLD.  */
+
+
+/* The x86s and m68020 have a quotient and remainder "div" instruction and
+   gcc recognises an adjacent "/" and "%" can be combined using that.
+   Elsewhere "/" and "%" are either separate instructions, or separate
+   libgcc calls (which unfortunately gcc as of version 3.0 doesn't combine).
+   A multiply and subtract should be faster than a "%" in those cases.  */
+#if HAVE_HOST_CPU_FAMILY_x86            \
+  || HAVE_HOST_CPU_m68020               \
+  || HAVE_HOST_CPU_m68030               \
+  || HAVE_HOST_CPU_m68040               \
+  || HAVE_HOST_CPU_m68060               \
+  || HAVE_HOST_CPU_m68360 /* CPU32 */
+#define udiv_qrnd_unnorm(q,r,n,d)       \
+  do {                                  \
+    mp_limb_t  __q = (n) / (d);         \
+    mp_limb_t  __r = (n) % (d);         \
+    (q) = __q;                          \
+    (r) = __r;                          \
+  } while (0)
+#else
+#define udiv_qrnd_unnorm(q,r,n,d)       \
+  do {                                  \
+    mp_limb_t  __q = (n) / (d);         \
+    mp_limb_t  __r = (n) - __q*(d);     \
+    (q) = __q;                          \
+    (r) = __r;                          \
+  } while (0)
+#endif
+
+
+/* Convert {up,un} to a string in base base, and put the result in str.
+   Generate len characters, possibly padding with zeros to the left.  If len is
+   zero, generate as many characters as required.  Return a pointer immediately
+   after the last digit of the result string.  Complexity is O(un^2); intended
+   for small conversions.  */
+static unsigned char *
+mpn_bc_get_str (unsigned char *str, size_t len,
+		mp_ptr up, mp_size_t un, int base)
+{
+  mp_limb_t rl, ul;
+  unsigned char *s;
+  size_t l;
+  /* Allocate memory for largest possible string, given that we only get here
+     for operands with un < GET_STR_PRECOMPUTE_THRESHOLD and that the smallest
+     base is 3.  7/11 is an approximation to 1/log2(3).  */
+#if TUNE_PROGRAM_BUILD
+#define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * GMP_LIMB_BITS * 7 / 11)
+#else
+#define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * GMP_LIMB_BITS * 7 / 11)
+#endif
+  unsigned char buf[BUF_ALLOC];
+#if TUNE_PROGRAM_BUILD
+  mp_limb_t rp[GET_STR_THRESHOLD_LIMIT];
+#else
+  mp_limb_t rp[GET_STR_PRECOMPUTE_THRESHOLD];
+#endif
+
+  if (base == 10)
+    {
+      /* Special case code for base==10 so that the compiler has a chance to
+	 optimize things.  */
+
+      MPN_COPY (rp + 1, up, un);
+
+      s = buf + BUF_ALLOC;
+      while (un > 1)
+	{
+	  int i;
+	  mp_limb_t frac, digit;
+	  MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
+					 MP_BASES_BIG_BASE_10,
+					 MP_BASES_BIG_BASE_INVERTED_10,
+					 MP_BASES_NORMALIZATION_STEPS_10);
+	  un -= rp[un] == 0;
+	  frac = (rp[0] + 1) << GMP_NAIL_BITS;
+	  s -= MP_BASES_CHARS_PER_LIMB_10;
+#if HAVE_HOST_CPU_FAMILY_x86
+	  /* The code below turns out to be a bit slower for x86 using gcc.
+	     Use plain code.  */
+	  i = MP_BASES_CHARS_PER_LIMB_10;
+	  do
+	    {
+	      umul_ppmm (digit, frac, frac, 10);
+	      *s++ = digit;
+	    }
+	  while (--i);
+#else
+	  /* Use the fact that 10 in binary is 1010, with the lowest bit 0.
+	     After a few umul_ppmm, we will have accumulated enough low zeros
+	     to use a plain multiply.  */
+	  if (MP_BASES_NORMALIZATION_STEPS_10 == 0)
+	    {
+	      umul_ppmm (digit, frac, frac, 10);
+	      *s++ = digit;
+	    }
+	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 1)
+	    {
+	      umul_ppmm (digit, frac, frac, 10);
+	      *s++ = digit;
+	    }
+	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 2)
+	    {
+	      umul_ppmm (digit, frac, frac, 10);
+	      *s++ = digit;
+	    }
+	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 3)
+	    {
+	      umul_ppmm (digit, frac, frac, 10);
+	      *s++ = digit;
+	    }
+	  i = (MP_BASES_CHARS_PER_LIMB_10 - ((MP_BASES_NORMALIZATION_STEPS_10 < 4)
+					     ? (4-MP_BASES_NORMALIZATION_STEPS_10)
+					     : 0));
+	  frac = (frac + 0xf) >> 4;
+	  do
+	    {
+	      frac *= 10;
+	      digit = frac >> (GMP_LIMB_BITS - 4);
+	      *s++ = digit;
+	      frac &= (~(mp_limb_t) 0) >> 4;
+	    }
+	  while (--i);
+#endif
+	  s -= MP_BASES_CHARS_PER_LIMB_10;
+	}
+
+      ul = rp[1];
+      while (ul != 0)
+	{
+	  udiv_qrnd_unnorm (ul, rl, ul, 10);
+	  *--s = rl;
+	}
+    }
+  else /* not base 10 */
+    {
+      unsigned chars_per_limb;
+      mp_limb_t big_base, big_base_inverted;
+      unsigned normalization_steps;
+
+      chars_per_limb = mp_bases[base].chars_per_limb;
+      big_base = mp_bases[base].big_base;
+      big_base_inverted = mp_bases[base].big_base_inverted;
+      count_leading_zeros (normalization_steps, big_base);
+
+      MPN_COPY (rp + 1, up, un);
+
+      s = buf + BUF_ALLOC;
+      while (un > 1)
+	{
+	  int i;
+	  mp_limb_t frac;
+	  MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
+					 big_base, big_base_inverted,
+					 normalization_steps);
+	  un -= rp[un] == 0;
+	  frac = (rp[0] + 1) << GMP_NAIL_BITS;
+	  s -= chars_per_limb;
+	  i = chars_per_limb;
+	  do
+	    {
+	      mp_limb_t digit;
+	      umul_ppmm (digit, frac, frac, base);
+	      *s++ = digit;
+	    }
+	  while (--i);
+	  s -= chars_per_limb;
+	}
+
+      ul = rp[1];
+      while (ul != 0)
+	{
+	  udiv_qrnd_unnorm (ul, rl, ul, base);
+	  *--s = rl;
+	}
+    }
+
+  l = buf + BUF_ALLOC - s;
+  while (l < len)
+    {
+      *str++ = 0;
+      len--;
+    }
+  while (l != 0)
+    {
+      *str++ = *s++;
+      l--;
+    }
+  return str;
+}
+
+
+/* Convert {UP,UN} to a string with a base as represented in POWTAB, and put
+   the string in STR.  Generate LEN characters, possibly padding with zeros to
+   the left.  If LEN is zero, generate as many characters as required.
+   Return a pointer immediately after the last digit of the result string.
+   This uses divide-and-conquer and is intended for large conversions.  */
+static unsigned char *
+mpn_dc_get_str (unsigned char *str, size_t len,
+		mp_ptr up, mp_size_t un,
+		const powers_t *powtab, mp_ptr tmp)
+{
+  if (BELOW_THRESHOLD (un, GET_STR_DC_THRESHOLD))
+    {
+      if (un != 0)
+	str = mpn_bc_get_str (str, len, up, un, powtab->base);
+      else
+	{
+	  while (len != 0)
+	    {
+	      *str++ = 0;
+	      len--;
+	    }
+	}
+    }
+  else
+    {
+      mp_ptr pwp, qp, rp;
+      mp_size_t pwn, qn;
+      mp_size_t sn;
+
+      pwp = powtab->p;
+      pwn = powtab->n;
+      sn = powtab->shift;
+
+      if (un < pwn + sn || (un == pwn + sn && mpn_cmp (up + sn, pwp, un - sn) < 0))
+	{
+	  str = mpn_dc_get_str (str, len, up, un, powtab - 1, tmp);
+	}
+      else
+	{
+	  qp = tmp;		/* (un - pwn + 1) limbs for qp */
+	  rp = up;		/* pwn limbs for rp; overwrite up area */
+
+	  mpn_tdiv_qr (qp, rp + sn, 0L, up + sn, un - sn, pwp, pwn);
+	  qn = un - sn - pwn; qn += qp[qn] != 0;		/* quotient size */
+
+	  ASSERT (qn < pwn + sn || (qn == pwn + sn && mpn_cmp (qp + sn, pwp, pwn) < 0));
+
+	  if (len != 0)
+	    len = len - powtab->digits_in_base;
+
+	  str = mpn_dc_get_str (str, len, qp, qn, powtab - 1, tmp + qn);
+	  str = mpn_dc_get_str (str, powtab->digits_in_base, rp, pwn + sn, powtab - 1, tmp);
+	}
+    }
+  return str;
+}
+
+/* There are no leading zeros on the digits generated at str, but that's not
+   currently a documented feature.  The current mpz_out_str and mpz_get_str
+   rely on it.  */
+
+size_t
+mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
+{
+  mp_ptr powtab_mem;
+  powers_t powtab[GMP_LIMB_BITS];
+  int pi;
+  size_t out_len;
+  mp_ptr tmp;
+  TMP_DECL;
+
+  /* Special case zero, as the code below doesn't handle it.  */
+  if (un == 0)
+    {
+      str[0] = 0;
+      return 1;
+    }
+
+  if (POW2_P (base))
+    {
+      /* The base is a power of 2.  Convert from most significant end.  */
+      mp_limb_t n1, n0;
+      int bits_per_digit = mp_bases[base].big_base;
+      int cnt;
+      int bit_pos;
+      mp_size_t i;
+      unsigned char *s = str;
+      mp_bitcnt_t bits;
+
+      n1 = up[un - 1];
+      count_leading_zeros (cnt, n1);
+
+      /* BIT_POS should be R when input ends in least significant nibble,
+	 R + bits_per_digit * n when input ends in nth least significant
+	 nibble. */
+
+      bits = (mp_bitcnt_t) GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS;
+      cnt = bits % bits_per_digit;
+      if (cnt != 0)
+	bits += bits_per_digit - cnt;
+      bit_pos = bits - (mp_bitcnt_t) (un - 1) * GMP_NUMB_BITS;
+
+      /* Fast loop for bit output.  */
+      i = un - 1;
+      for (;;)
+	{
+	  bit_pos -= bits_per_digit;
+	  while (bit_pos >= 0)
+	    {
+	      *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1);
+	      bit_pos -= bits_per_digit;
+	    }
+	  i--;
+	  if (i < 0)
+	    break;
+	  n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1);
+	  n1 = up[i];
+	  bit_pos += GMP_NUMB_BITS;
+	  *s++ = n0 | (n1 >> bit_pos);
+	}
+
+      return s - str;
+    }
+
+  /* General case.  The base is not a power of 2.  */
+
+  if (BELOW_THRESHOLD (un, GET_STR_PRECOMPUTE_THRESHOLD))
+    return mpn_bc_get_str (str, (size_t) 0, up, un, base) - str;
+
+  TMP_MARK;
+
+  /* Allocate one large block for the powers of big_base.  */
+  powtab_mem = TMP_BALLOC_LIMBS (mpn_str_powtab_alloc (un));
+
+  /* Compute a table of powers, were the largest power is >= sqrt(U).  */
+  size_t ndig;
+  mp_size_t xn;
+  DIGITS_IN_BASE_PER_LIMB (ndig, un, base);
+  xn = 1 + ndig / mp_bases[base].chars_per_limb; /* FIXME: scalar integer division */
+
+  pi = 1 + mpn_compute_powtab (powtab, powtab_mem, xn, base);
+
+  /* Using our precomputed powers, now in powtab[], convert our number.  */
+  tmp = TMP_BALLOC_LIMBS (mpn_dc_get_str_itch (un));
+  out_len = mpn_dc_get_str (str, 0, up, un, powtab + (pi - 1), tmp) - str;
+  TMP_FREE;
+
+  return out_len;
+}

diff --git a/third_party/gmp/mpn/generic/gmp-mparam.h b/third_party/gmp/mpn/generic/gmp-mparam.h
new file mode 100644
index 0000000..7dc057a
--- /dev/null
+++ b/third_party/gmp/mpn/generic/gmp-mparam.h

@@ -0,0 +1,33 @@
+/* Generic C gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/* Values for GMP_LIMB_BITS etc will be determined by ./configure and put
+   in config.h. */

diff --git a/third_party/gmp/mpn/generic/hgcd.c b/third_party/gmp/mpn/generic/hgcd.c
new file mode 100644
index 0000000..e3e9c66
--- /dev/null
+++ b/third_party/gmp/mpn/generic/hgcd.c

@@ -0,0 +1,182 @@
+/* hgcd.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Size analysis for hgcd:
+
+   For the recursive calls, we have n1 <= ceil(n / 2). Then the
+   storage need is determined by the storage for the recursive call
+   computing M1, and hgcd_matrix_adjust and hgcd_matrix_mul calls that use M1
+   (after this, the storage needed for M1 can be recycled).
+
+   Let S(r) denote the required storage. For M1 we need 4 * (ceil(n1/2) + 1)
+   = 4 * (ceil(n/4) + 1), for the hgcd_matrix_adjust call, we need n + 2,
+   and for the hgcd_matrix_mul, we may need 3 ceil(n/2) + 8. In total,
+   4 * ceil(n/4) + 3 ceil(n/2) + 12 <= 10 ceil(n/4) + 12.
+
+   For the recursive call, we need S(n1) = S(ceil(n/2)).
+
+   S(n) <= 10*ceil(n/4) + 12 + S(ceil(n/2))
+	<= 10*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 12k + S(ceil(n/2^k))
+	<= 10*(2 ceil(n/4) + k) + 12k + S(ceil(n/2^k))
+	<= 20 ceil(n/4) + 22k + S(ceil(n/2^k))
+*/
+
+mp_size_t
+mpn_hgcd_itch (mp_size_t n)
+{
+  unsigned k;
+  int count;
+  mp_size_t nscaled;
+
+  if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
+    return n;
+
+  /* Get the recursion depth. */
+  nscaled = (n - 1) / (HGCD_THRESHOLD - 1);
+  count_leading_zeros (count, nscaled);
+  k = GMP_LIMB_BITS - count;
+
+  return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
+}
+
+/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
+   with elements of size at most (n+1)/2 - 1. Returns new size of a,
+   b, or zero if no reduction is possible. */
+
+mp_size_t
+mpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n,
+	  struct hgcd_matrix *M, mp_ptr tp)
+{
+  mp_size_t s = n/2 + 1;
+
+  mp_size_t nn;
+  int success = 0;
+
+  if (n <= s)
+    /* Happens when n <= 2, a fairly uninteresting case but exercised
+       by the random inputs of the testsuite. */
+    return 0;
+
+  ASSERT ((ap[n-1] | bp[n-1]) > 0);
+
+  ASSERT ((n+1)/2 - 1 < M->alloc);
+
+  if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
+    {
+      mp_size_t n2 = (3*n)/4 + 1;
+      mp_size_t p = n/2;
+
+      nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
+      if (nn)
+	{
+	  n = nn;
+	  success = 1;
+	}
+
+      /* NOTE: It appears this loop never runs more than once (at
+	 least when not recursing to hgcd_appr). */
+      while (n > n2)
+	{
+	  /* Needs n + 1 storage */
+	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+	  if (!nn)
+	    return success ? n : 0;
+
+	  n = nn;
+	  success = 1;
+	}
+
+      if (n > s + 2)
+	{
+	  struct hgcd_matrix M1;
+	  mp_size_t scratch;
+
+	  p = 2*s - n + 1;
+	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
+
+	  mpn_hgcd_matrix_init(&M1, n - p, tp);
+
+	  /* FIXME: Should use hgcd_reduce, but that may require more
+	     scratch space, which requires review. */
+
+	  nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch);
+	  if (nn > 0)
+	    {
+	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
+	      ASSERT (M->n + 2 >= M1.n);
+
+	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
+		 then either q or q + 1 is a correct quotient, and M1 will
+		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
+		 rules out the case that the size of M * M1 is much
+		 smaller than the expected M->n + M1->n. */
+
+	      ASSERT (M->n + M1.n < M->alloc);
+
+	      /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
+		 = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
+	      n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
+
+	      /* We need a bound for of M->n + M1.n. Let n be the original
+		 input size. Then
+
+		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
+
+		 and it follows that
+
+		 M.n + M1.n <= ceil(n/2) + 1
+
+		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
+		 amount of needed scratch space. */
+	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
+	      success = 1;
+	    }
+	}
+    }
+
+  for (;;)
+    {
+      /* Needs s+3 < n */
+      nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+      if (!nn)
+	return success ? n : 0;
+
+      n = nn;
+      success = 1;
+    }
+}

diff --git a/third_party/gmp/mpn/generic/hgcd2.c b/third_party/gmp/mpn/generic/hgcd2.c
new file mode 100644
index 0000000..3fa4012
--- /dev/null
+++ b/third_party/gmp/mpn/generic/hgcd2.c

@@ -0,0 +1,734 @@
+/* hgcd2.c
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000-2004, 2008, 2012, 2019 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef HGCD2_DIV1_METHOD
+#define HGCD2_DIV1_METHOD 3
+#endif
+
+#ifndef HGCD2_DIV2_METHOD
+#define HGCD2_DIV2_METHOD 2
+#endif
+
+#if GMP_NAIL_BITS != 0
+#error Nails not implemented
+#endif
+
+#if HAVE_NATIVE_mpn_div_11
+
+#define div1 mpn_div_11
+/* Single-limb division optimized for small quotients.
+   Returned value holds d0 = r, d1 = q. */
+mp_double_limb_t div1 (mp_limb_t, mp_limb_t);
+
+#elif HGCD2_DIV1_METHOD == 1
+
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
+{
+  mp_double_limb_t res;
+  res.d1 = n0 / d0;
+  res.d0 = n0 - res.d1 * d0;
+
+  return res;
+}
+
+#elif HGCD2_DIV1_METHOD == 2
+
+static mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
+{
+  mp_double_limb_t res;
+  int ncnt, dcnt, cnt;
+  mp_limb_t q;
+  mp_limb_t mask;
+
+  ASSERT (n0 >= d0);
+
+  count_leading_zeros (ncnt, n0);
+  count_leading_zeros (dcnt, d0);
+  cnt = dcnt - ncnt;
+
+  d0 <<= cnt;
+
+  q = -(mp_limb_t) (n0 >= d0);
+  n0 -= d0 & q;
+  d0 >>= 1;
+  q = -q;
+
+  while (--cnt >= 0)
+    {
+      mask = -(mp_limb_t) (n0 >= d0);
+      n0 -= d0 & mask;
+      d0 >>= 1;
+      q = (q << 1) - mask;
+    }
+
+  res.d0 = n0;
+  res.d1 = q;
+  return res;
+}
+
+#elif HGCD2_DIV1_METHOD == 3
+
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
+{
+  mp_double_limb_t res;
+  if (UNLIKELY ((d0 >> (GMP_LIMB_BITS - 3)) != 0)
+      || UNLIKELY (n0 >= (d0 << 3)))
+    {
+      res.d1 = n0 / d0;
+      res.d0 = n0 - res.d1 * d0;
+    }
+  else
+    {
+      mp_limb_t q, mask;
+
+      d0 <<= 2;
+
+      mask = -(mp_limb_t) (n0 >= d0);
+      n0 -= d0 & mask;
+      q = 4 & mask;
+
+      d0 >>= 1;
+      mask = -(mp_limb_t) (n0 >= d0);
+      n0 -= d0 & mask;
+      q += 2 & mask;
+
+      d0 >>= 1;
+      mask = -(mp_limb_t) (n0 >= d0);
+      n0 -= d0 & mask;
+      q -= mask;
+
+      res.d0 = n0;
+      res.d1 = q;
+    }
+  return res;
+}
+
+#elif HGCD2_DIV1_METHOD == 4
+
+/* Table quotients.  We extract the NBITS most significant bits of the
+   numerator limb, and the corresponding bits from the divisor limb, and use
+   these to form an index into the table.  This method is probably only useful
+   for short pipelines with slow multiplication.
+
+   Possible improvements:
+
+   * Perhaps extract the highest NBITS of the divisor instead of the same bits
+     as from the numerator.  That would require another count_leading_zeros,
+     and a post-multiply shift of the quotient.
+
+   * Compress tables?  Their values are tiny, and there are lots of zero
+     entries (which are never used).
+
+   * Round the table entries more cleverly?
+*/
+
+#ifndef NBITS
+#define NBITS 5
+#endif
+
+#if NBITS == 5
+/* This needs full division about 13.2% of the time. */
+static const unsigned char tab[512] = {
+17, 9, 5,4,3,2,2,2,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+18, 9, 6,4,3,2,2,2,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+19,10, 6,4,3,3,2,2,2,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+20,10, 6,5,3,3,2,2,2,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
+21,11, 7,5,4,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,
+22,11, 7,5,4,3,3,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
+23,12, 7,5,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,
+24,12, 8,6,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
+25,13, 8,6,5,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,
+26,13, 8,6,5,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
+27,14, 9,6,5,4,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
+28,14, 9,7,5,4,3,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
+29,15,10,7,5,4,4,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,
+30,15,10,7,6,5,4,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,
+31,16,10,7,6,5,4,3,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
+32,16,11,8,6,5,4,3,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+};
+#elif NBITS == 6
+/* This needs full division about 9.8% of the time. */
+static const unsigned char tab[2048] = {
+33,17,11, 8, 6, 5,4,4,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+34,17,11, 8, 6, 5,4,4,3,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+35,18,12, 9, 7, 5,5,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+36,18,12, 9, 7, 6,5,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+37,19,13, 9, 7, 6,5,4,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+38,19,13, 9, 7, 6,5,4,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+39,20,13,10, 7, 6,5,4,4,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+40,20,14,10, 8, 6,5,5,4,3,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+41,21,14,10, 8, 6,5,5,4,4,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+42,21,14,10, 8, 7,6,5,4,4,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+43,22,15,11, 8, 7,6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+44,22,15,11, 9, 7,6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+45,23,15,11, 9, 7,6,5,5,4,4,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+46,23,16,11, 9, 7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+47,24,16,12, 9, 7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+48,24,16,12, 9, 8,6,6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+49,25,17,12,10, 8,7,6,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+50,25,17,13,10, 8,7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+51,26,18,13,10, 8,7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+52,26,18,13,10, 8,7,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
+53,27,18,13,10, 9,7,6,5,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,
+54,27,19,14,11, 9,7,6,6,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
+55,28,19,14,11, 9,7,6,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,
+56,28,19,14,11, 9,8,7,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
+57,29,20,14,11, 9,8,7,6,5,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,
+58,29,20,15,11, 9,8,7,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
+59,30,20,15,12,10,8,7,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
+60,30,21,15,12,10,8,7,6,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
+61,31,21,15,12,10,8,7,6,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,
+62,31,22,16,12,10,9,7,6,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,
+63,32,22,16,13,10,9,7,7,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
+64,32,22,16,13,10,9,8,7,6,5,5,4,4,4,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+};
+#else
+#error No table for provided NBITS
+#endif
+
+static const unsigned char *tabp = tab - (1 << (NBITS - 1) << NBITS);
+
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
+{
+  int ncnt;
+  size_t nbi, dbi;
+  mp_limb_t q0;
+  mp_limb_t r0;
+  mp_limb_t mask;
+  mp_double_limb_t res;
+
+  ASSERT (n0 >= d0);		/* Actually only msb position is critical. */
+
+  count_leading_zeros (ncnt, n0);
+  nbi = n0 << ncnt >> (GMP_LIMB_BITS - NBITS);
+  dbi = d0 << ncnt >> (GMP_LIMB_BITS - NBITS);
+
+  q0 = tabp[(nbi << NBITS) + dbi];
+  r0 = n0 - q0 * d0;
+  mask = -(mp_limb_t) (r0 >= d0);
+  q0 -= mask;
+  r0 -= d0 & mask;
+
+  if (UNLIKELY (r0 >= d0))
+    {
+      q0 = n0 / d0;
+      r0 = n0 - q0 * d0;
+    }
+
+  res.d1 = q0;
+  res.d0 = r0;
+  return res;
+}
+
+#elif HGCD2_DIV1_METHOD == 5
+
+/* Table inverses of divisors.  We don't bother with suppressing the msb from
+   the tables.  We index with the NBITS most significant divisor bits,
+   including the always-set highest bit, but use addressing trickery via tabp
+   to suppress it.
+
+   Possible improvements:
+
+   * Do first multiply using 32-bit operations on 64-bit computers.  At least
+     on most Arm64 cores, that uses 3 times less resources.  It also saves on
+     many x86-64 processors.
+*/
+
+#ifndef NBITS
+#define NBITS 7
+#endif
+
+#if NBITS == 5
+/* This needs full division about 1.63% of the time. */
+static const unsigned char tab[16] = {
+ 63, 59, 55, 52, 50, 47, 45, 43, 41, 39, 38, 36, 35, 34, 33, 32
+};
+static const unsigned char *tabp = tab - (1 << (NBITS - 1));
+#elif NBITS == 6
+/* This needs full division about 0.93% of the time. */
+static const unsigned char tab[32] = {
+127,123,119,116,112,109,106,104,101, 98, 96, 94, 92, 90, 88, 86,
+ 84, 82, 80, 79, 77, 76, 74, 73, 72, 70, 69, 68, 67, 66, 65, 64
+};
+static const unsigned char *tabp = tab - (1 << (NBITS - 1));
+#elif NBITS == 7
+/* This needs full division about 0.49% of the time. */
+static const unsigned char tab[64] = {
+255,251,247,243,239,236,233,229,226,223,220,217,214,211,209,206,
+203,201,198,196,194,191,189,187,185,183,181,179,177,175,173,171,
+169,167,166,164,162,161,159,158,156,155,153,152,150,149,147,146,
+145,143,142,141,140,139,137,136,135,134,133,132,131,130,129,128
+};
+static const unsigned char *tabp = tab - (1 << (NBITS - 1));
+#elif NBITS == 8
+/* This needs full division about 0.26% of the time. */
+static const unsigned short tab[128] = {
+511,507,503,499,495,491,488,484,480,477,473,470,467,463,460,457,
+454,450,447,444,441,438,435,433,430,427,424,421,419,416,413,411,
+408,406,403,401,398,396,393,391,389,386,384,382,380,377,375,373,
+371,369,367,365,363,361,359,357,355,353,351,349,347,345,343,342,
+340,338,336,335,333,331,329,328,326,325,323,321,320,318,317,315,
+314,312,311,309,308,306,305,303,302,301,299,298,296,295,294,292,
+291,290,288,287,286,285,283,282,281,280,279,277,276,275,274,273,
+272,270,269,268,267,266,265,264,263,262,261,260,259,258,257,256
+};
+static const unsigned short *tabp = tab - (1 << (NBITS - 1));
+#else
+#error No table for provided NBITS
+#endif
+
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
+{
+  int ncnt, dcnt;
+  size_t dbi;
+  mp_limb_t inv;
+  mp_limb_t q0;
+  mp_limb_t r0;
+  mp_limb_t mask;
+  mp_double_limb_t res;
+
+  count_leading_zeros (ncnt, n0);
+  count_leading_zeros (dcnt, d0);
+
+  dbi = d0 << dcnt >> (GMP_LIMB_BITS - NBITS);
+  inv = tabp[dbi];
+  q0 = ((n0 << ncnt) >> (NBITS + 1)) * inv >> (GMP_LIMB_BITS - 1 + ncnt - dcnt);
+  r0 = n0 - q0 * d0;
+  mask = -(mp_limb_t) (r0 >= d0);
+  q0 -= mask;
+  r0 -= d0 & mask;
+
+  if (UNLIKELY (r0 >= d0))
+    {
+      q0 = n0 / d0;
+      r0 = n0 - q0 * d0;
+    }
+
+  res.d1 = q0;
+  res.d0 = r0;
+  return res;
+}
+
+#else
+#error Unknown HGCD2_DIV1_METHOD
+#endif
+
+#if HAVE_NATIVE_mpn_div_22
+
+#define div2 mpn_div_22
+/* Two-limb division optimized for small quotients.  */
+mp_limb_t div2 (mp_ptr, mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t);
+
+#elif HGCD2_DIV2_METHOD == 1
+
+static mp_limb_t
+div2 (mp_ptr rp,
+      mp_limb_t n1, mp_limb_t n0,
+      mp_limb_t d1, mp_limb_t d0)
+{
+  mp_double_limb_t rq = div1 (n1, d1);
+  if (UNLIKELY (rq.d1 > d1))
+    {
+      mp_limb_t n2, q, t1, t0;
+      int c;
+
+      /* Normalize */
+      count_leading_zeros (c, d1);
+      ASSERT (c > 0);
+
+      n2 = n1 >> (GMP_LIMB_BITS - c);
+      n1 = (n1 << c) | (n0 >> (GMP_LIMB_BITS - c));
+      n0 <<= c;
+      d1 = (d1 << c) | (d0 >> (GMP_LIMB_BITS - c));
+      d0 <<= c;
+
+      udiv_qrnnd (q, n1, n2, n1, d1);
+      umul_ppmm (t1, t0, q, d0);
+      if (t1 > n1 || (t1 == n1 && t0 > n0))
+	{
+	  ASSERT (q > 0);
+	  q--;
+	  sub_ddmmss (t1, t0, t1, t0, d1, d0);
+	}
+      sub_ddmmss (n1, n0, n1, n0, t1, t0);
+
+      /* Undo normalization */
+      rp[0] = (n0 >> c) | (n1 << (GMP_LIMB_BITS - c));
+      rp[1] = n1 >> c;
+
+      return q;
+    }
+  else
+    {
+      mp_limb_t q, t1, t0;
+      n1 = rq.d0;
+      q = rq.d1;
+      umul_ppmm (t1, t0, q, d0);
+      if (UNLIKELY (t1 >= n1) && (t1 > n1 || t0 > n0))
+	{
+	  ASSERT (q > 0);
+	  q--;
+	  sub_ddmmss (t1, t0, t1, t0, d1, d0);
+	}
+      sub_ddmmss (rp[1], rp[0], n1, n0, t1, t0);
+      return q;
+    }
+}
+
+#elif HGCD2_DIV2_METHOD == 2
+
+/* Bit-wise div2. Relies on fast count_leading_zeros. */
+static mp_limb_t
+div2 (mp_ptr rp,
+      mp_limb_t n1, mp_limb_t n0,
+      mp_limb_t d1, mp_limb_t d0)
+{
+  mp_limb_t q = 0;
+  int ncnt;
+  int dcnt;
+
+  count_leading_zeros (ncnt, n1);
+  count_leading_zeros (dcnt, d1);
+  dcnt -= ncnt;
+
+  d1 = (d1 << dcnt) + (d0 >> 1 >> (GMP_LIMB_BITS - 1 - dcnt));
+  d0 <<= dcnt;
+
+  do
+    {
+      mp_limb_t mask;
+      q <<= 1;
+      if (UNLIKELY (n1 == d1))
+	mask = -(n0 >= d0);
+      else
+	mask = -(n1 > d1);
+
+      q -= mask;
+
+      sub_ddmmss (n1, n0, n1, n0, mask & d1, mask & d0);
+
+      d0 = (d1 << (GMP_LIMB_BITS - 1)) | (d0 >> 1);
+      d1 = d1 >> 1;
+    }
+  while (dcnt--);
+
+  rp[0] = n0;
+  rp[1] = n1;
+
+  return q;
+}
+#else
+#error Unknown HGCD2_DIV2_METHOD
+#endif
+
+/* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
+   matrix M. Returns 1 if we make progress, i.e. can perform at least
+   one subtraction. Otherwise returns zero. */
+
+/* FIXME: Possible optimizations:
+
+   The div2 function starts with checking the most significant bit of
+   the numerator. We can maintained normalized operands here, call
+   hgcd with normalized operands only, which should make the code
+   simpler and possibly faster.
+
+   Experiment with table lookups on the most significant bits.
+
+   This function is also a candidate for assembler implementation.
+*/
+int
+mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
+	   struct hgcd_matrix1 *M)
+{
+  mp_limb_t u00, u01, u10, u11;
+
+  if (ah < 2 || bh < 2)
+    return 0;
+
+  if (ah > bh || (ah == bh && al > bl))
+    {
+      sub_ddmmss (ah, al, ah, al, bh, bl);
+      if (ah < 2)
+	return 0;
+
+      u00 = u01 = u11 = 1;
+      u10 = 0;
+    }
+  else
+    {
+      sub_ddmmss (bh, bl, bh, bl, ah, al);
+      if (bh < 2)
+	return 0;
+
+      u00 = u10 = u11 = 1;
+      u01 = 0;
+    }
+
+  if (ah < bh)
+    goto subtract_a;
+
+  for (;;)
+    {
+      ASSERT (ah >= bh);
+      if (ah == bh)
+	goto done;
+
+      if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
+	{
+	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
+	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
+
+	  break;
+	}
+
+      /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
+	 1), affecting the second column of M. */
+      ASSERT (ah > bh);
+      sub_ddmmss (ah, al, ah, al, bh, bl);
+
+      if (ah < 2)
+	goto done;
+
+      if (ah <= bh)
+	{
+	  /* Use q = 1 */
+	  u01 += u00;
+	  u11 += u10;
+	}
+      else
+	{
+	  mp_limb_t r[2];
+	  mp_limb_t q = div2 (r, ah, al, bh, bl);
+	  al = r[0]; ah = r[1];
+	  if (ah < 2)
+	    {
+	      /* A is too small, but q is correct. */
+	      u01 += q * u00;
+	      u11 += q * u10;
+	      goto done;
+	    }
+	  q++;
+	  u01 += q * u00;
+	  u11 += q * u10;
+	}
+    subtract_a:
+      ASSERT (bh >= ah);
+      if (ah == bh)
+	goto done;
+
+      if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
+	{
+	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
+	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
+
+	  goto subtract_a1;
+	}
+
+      /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
+	 1), affecting the first column of M. */
+      sub_ddmmss (bh, bl, bh, bl, ah, al);
+
+      if (bh < 2)
+	goto done;
+
+      if (bh <= ah)
+	{
+	  /* Use q = 1 */
+	  u00 += u01;
+	  u10 += u11;
+	}
+      else
+	{
+	  mp_limb_t r[2];
+	  mp_limb_t q = div2 (r, bh, bl, ah, al);
+	  bl = r[0]; bh = r[1];
+	  if (bh < 2)
+	    {
+	      /* B is too small, but q is correct. */
+	      u00 += q * u01;
+	      u10 += q * u11;
+	      goto done;
+	    }
+	  q++;
+	  u00 += q * u01;
+	  u10 += q * u11;
+	}
+    }
+
+  /* NOTE: Since we discard the least significant half limb, we don't get a
+     truly maximal M (corresponding to |a - b| < 2^{GMP_LIMB_BITS +1}). */
+  /* Single precision loop */
+  for (;;)
+    {
+      ASSERT (ah >= bh);
+
+      ah -= bh;
+      if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
+	break;
+
+      if (ah <= bh)
+	{
+	  /* Use q = 1 */
+	  u01 += u00;
+	  u11 += u10;
+	}
+      else
+	{
+	  mp_double_limb_t rq = div1 (ah, bh);
+	  mp_limb_t q = rq.d1;
+	  ah = rq.d0;
+
+	  if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
+	    {
+	      /* A is too small, but q is correct. */
+	      u01 += q * u00;
+	      u11 += q * u10;
+	      break;
+	    }
+	  q++;
+	  u01 += q * u00;
+	  u11 += q * u10;
+	}
+    subtract_a1:
+      ASSERT (bh >= ah);
+
+      bh -= ah;
+      if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
+	break;
+
+      if (bh <= ah)
+	{
+	  /* Use q = 1 */
+	  u00 += u01;
+	  u10 += u11;
+	}
+      else
+	{
+	  mp_double_limb_t rq = div1 (bh, ah);
+	  mp_limb_t q = rq.d1;
+	  bh = rq.d0;
+
+	  if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
+	    {
+	      /* B is too small, but q is correct. */
+	      u00 += q * u01;
+	      u10 += q * u11;
+	      break;
+	    }
+	  q++;
+	  u00 += q * u01;
+	  u10 += q * u11;
+	}
+    }
+
+ done:
+  M->u[0][0] = u00; M->u[0][1] = u01;
+  M->u[1][0] = u10; M->u[1][1] = u11;
+
+  return 1;
+}
+
+/* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must
+ * have space for n + 1 limbs. Uses three buffers to avoid a copy*/
+mp_size_t
+mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
+			     mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
+{
+  mp_limb_t ah, bh;
+
+  /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
+
+     r  = u00 * a
+     r += u10 * b
+     b *= u11
+     b += u01 * a
+  */
+
+#if HAVE_NATIVE_mpn_addaddmul_1msb0
+  ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]);
+  bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]);
+#else
+  ah =     mpn_mul_1 (rp, ap, n, M->u[0][0]);
+  ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]);
+
+  bh =     mpn_mul_1 (bp, bp, n, M->u[1][1]);
+  bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]);
+#endif
+  rp[n] = ah;
+  bp[n] = bh;
+
+  n += (ah | bh) > 0;
+  return n;
+}

diff --git a/third_party/gmp/mpn/generic/hgcd2_jacobi.c b/third_party/gmp/mpn/generic/hgcd2_jacobi.c
new file mode 100644
index 0000000..98e079b
--- /dev/null
+++ b/third_party/gmp/mpn/generic/hgcd2_jacobi.c

@@ -0,0 +1,365 @@
+/* hgcd2_jacobi.c
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000-2004, 2008, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if GMP_NAIL_BITS > 0
+#error Nails not supported.
+#endif
+
+/* FIXME: Duplicated in hgcd2.c. Should move to gmp-impl.h, and
+   possibly be renamed. */
+static inline mp_limb_t
+div1 (mp_ptr rp,
+      mp_limb_t n0,
+      mp_limb_t d0)
+{
+  mp_limb_t q = 0;
+
+  if ((mp_limb_signed_t) n0 < 0)
+    {
+      int cnt;
+      for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)
+	{
+	  d0 = d0 << 1;
+	}
+
+      q = 0;
+      while (cnt)
+	{
+	  q <<= 1;
+	  if (n0 >= d0)
+	    {
+	      n0 = n0 - d0;
+	      q |= 1;
+	    }
+	  d0 = d0 >> 1;
+	  cnt--;
+	}
+    }
+  else
+    {
+      int cnt;
+      for (cnt = 0; n0 >= d0; cnt++)
+	{
+	  d0 = d0 << 1;
+	}
+
+      q = 0;
+      while (cnt)
+	{
+	  d0 = d0 >> 1;
+	  q <<= 1;
+	  if (n0 >= d0)
+	    {
+	      n0 = n0 - d0;
+	      q |= 1;
+	    }
+	  cnt--;
+	}
+    }
+  *rp = n0;
+  return q;
+}
+
+/* Two-limb division optimized for small quotients.  */
+static inline mp_limb_t
+div2 (mp_ptr rp,
+      mp_limb_t nh, mp_limb_t nl,
+      mp_limb_t dh, mp_limb_t dl)
+{
+  mp_limb_t q = 0;
+
+  if ((mp_limb_signed_t) nh < 0)
+    {
+      int cnt;
+      for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++)
+	{
+	  dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
+	  dl = dl << 1;
+	}
+
+      while (cnt)
+	{
+	  q <<= 1;
+	  if (nh > dh || (nh == dh && nl >= dl))
+	    {
+	      sub_ddmmss (nh, nl, nh, nl, dh, dl);
+	      q |= 1;
+	    }
+	  dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
+	  dh = dh >> 1;
+	  cnt--;
+	}
+    }
+  else
+    {
+      int cnt;
+      for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)
+	{
+	  dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
+	  dl = dl << 1;
+	}
+
+      while (cnt)
+	{
+	  dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
+	  dh = dh >> 1;
+	  q <<= 1;
+	  if (nh > dh || (nh == dh && nl >= dl))
+	    {
+	      sub_ddmmss (nh, nl, nh, nl, dh, dl);
+	      q |= 1;
+	    }
+	  cnt--;
+	}
+    }
+
+  rp[0] = nl;
+  rp[1] = nh;
+
+  return q;
+}
+
+int
+mpn_hgcd2_jacobi (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
+		  struct hgcd_matrix1 *M, unsigned *bitsp)
+{
+  mp_limb_t u00, u01, u10, u11;
+  unsigned bits = *bitsp;
+
+  if (ah < 2 || bh < 2)
+    return 0;
+
+  if (ah > bh || (ah == bh && al > bl))
+    {
+      sub_ddmmss (ah, al, ah, al, bh, bl);
+      if (ah < 2)
+	return 0;
+
+      u00 = u01 = u11 = 1;
+      u10 = 0;
+      bits = mpn_jacobi_update (bits, 1, 1);
+    }
+  else
+    {
+      sub_ddmmss (bh, bl, bh, bl, ah, al);
+      if (bh < 2)
+	return 0;
+
+      u00 = u10 = u11 = 1;
+      u01 = 0;
+      bits = mpn_jacobi_update (bits, 0, 1);
+    }
+
+  if (ah < bh)
+    goto subtract_a;
+
+  for (;;)
+    {
+      ASSERT (ah >= bh);
+      if (ah == bh)
+	goto done;
+
+      if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
+	{
+	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
+	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
+
+	  break;
+	}
+
+      /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
+	 1), affecting the second column of M. */
+      ASSERT (ah > bh);
+      sub_ddmmss (ah, al, ah, al, bh, bl);
+
+      if (ah < 2)
+	goto done;
+
+      if (ah <= bh)
+	{
+	  /* Use q = 1 */
+	  u01 += u00;
+	  u11 += u10;
+	  bits = mpn_jacobi_update (bits, 1, 1);
+	}
+      else
+	{
+	  mp_limb_t r[2];
+	  mp_limb_t q = div2 (r, ah, al, bh, bl);
+	  al = r[0]; ah = r[1];
+	  if (ah < 2)
+	    {
+	      /* A is too small, but q is correct. */
+	      u01 += q * u00;
+	      u11 += q * u10;
+	      bits = mpn_jacobi_update (bits, 1, q & 3);
+	      goto done;
+	    }
+	  q++;
+	  u01 += q * u00;
+	  u11 += q * u10;
+	  bits = mpn_jacobi_update (bits, 1, q & 3);
+	}
+    subtract_a:
+      ASSERT (bh >= ah);
+      if (ah == bh)
+	goto done;
+
+      if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
+	{
+	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
+	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
+
+	  goto subtract_a1;
+	}
+
+      /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
+	 1), affecting the first column of M. */
+      sub_ddmmss (bh, bl, bh, bl, ah, al);
+
+      if (bh < 2)
+	goto done;
+
+      if (bh <= ah)
+	{
+	  /* Use q = 1 */
+	  u00 += u01;
+	  u10 += u11;
+	  bits = mpn_jacobi_update (bits, 0, 1);
+	}
+      else
+	{
+	  mp_limb_t r[2];
+	  mp_limb_t q = div2 (r, bh, bl, ah, al);
+	  bl = r[0]; bh = r[1];
+	  if (bh < 2)
+	    {
+	      /* B is too small, but q is correct. */
+	      u00 += q * u01;
+	      u10 += q * u11;
+	      bits = mpn_jacobi_update (bits, 0, q & 3);
+	      goto done;
+	    }
+	  q++;
+	  u00 += q * u01;
+	  u10 += q * u11;
+	  bits = mpn_jacobi_update (bits, 0, q & 3);
+	}
+    }
+
+  /* NOTE: Since we discard the least significant half limb, we don't
+     get a truly maximal M (corresponding to |a - b| <
+     2^{GMP_LIMB_BITS +1}). */
+  /* Single precision loop */
+  for (;;)
+    {
+      ASSERT (ah >= bh);
+      if (ah == bh)
+	break;
+
+      ah -= bh;
+      if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
+	break;
+
+      if (ah <= bh)
+	{
+	  /* Use q = 1 */
+	  u01 += u00;
+	  u11 += u10;
+	  bits = mpn_jacobi_update (bits, 1, 1);
+	}
+      else
+	{
+	  mp_limb_t r;
+	  mp_limb_t q = div1 (&r, ah, bh);
+	  ah = r;
+	  if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
+	    {
+	      /* A is too small, but q is correct. */
+	      u01 += q * u00;
+	      u11 += q * u10;
+	      bits = mpn_jacobi_update (bits, 1, q & 3);
+	      break;
+	    }
+	  q++;
+	  u01 += q * u00;
+	  u11 += q * u10;
+	  bits = mpn_jacobi_update (bits, 1, q & 3);
+	}
+    subtract_a1:
+      ASSERT (bh >= ah);
+      if (ah == bh)
+	break;
+
+      bh -= ah;
+      if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
+	break;
+
+      if (bh <= ah)
+	{
+	  /* Use q = 1 */
+	  u00 += u01;
+	  u10 += u11;
+	  bits = mpn_jacobi_update (bits, 0, 1);
+	}
+      else
+	{
+	  mp_limb_t r;
+	  mp_limb_t q = div1 (&r, bh, ah);
+	  bh = r;
+	  if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
+	    {
+	      /* B is too small, but q is correct. */
+	      u00 += q * u01;
+	      u10 += q * u11;
+	      bits = mpn_jacobi_update (bits, 0, q & 3);
+	      break;
+	    }
+	  q++;
+	  u00 += q * u01;
+	  u10 += q * u11;
+	  bits = mpn_jacobi_update (bits, 0, q & 3);
+	}
+    }
+
+ done:
+  M->u[0][0] = u00; M->u[0][1] = u01;
+  M->u[1][0] = u10; M->u[1][1] = u11;
+  *bitsp = bits;
+
+  return 1;
+}

diff --git a/third_party/gmp/mpn/generic/hgcd_appr.c b/third_party/gmp/mpn/generic/hgcd_appr.c
new file mode 100644
index 0000000..bb01738
--- /dev/null
+++ b/third_party/gmp/mpn/generic/hgcd_appr.c

@@ -0,0 +1,267 @@
+/* hgcd_appr.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Identical to mpn_hgcd_itch. FIXME: Do we really need to add
+   HGCD_THRESHOLD at the end? */
+mp_size_t
+mpn_hgcd_appr_itch (mp_size_t n)
+{
+  if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
+    return n;
+  else
+    {
+      unsigned k;
+      int count;
+      mp_size_t nscaled;
+
+      /* Get the recursion depth. */
+      nscaled = (n - 1) / (HGCD_APPR_THRESHOLD - 1);
+      count_leading_zeros (count, nscaled);
+      k = GMP_LIMB_BITS - count;
+
+      return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
+    }
+}
+
+/* Destroys inputs. */
+int
+mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n,
+	       struct hgcd_matrix *M, mp_ptr tp)
+{
+  mp_size_t s;
+  int success = 0;
+
+  ASSERT (n > 0);
+
+  ASSERT ((ap[n-1] | bp[n-1]) != 0);
+
+  if (n <= 2)
+    /* Implies s = n. A fairly uninteresting case but exercised by the
+       random inputs of the testsuite. */
+    return 0;
+
+  ASSERT ((n+1)/2 - 1 < M->alloc);
+
+  /* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time
+     we discard some of the least significant limbs, we must keep one
+     additional bit to account for the truncation error. We maintain
+     the GMP_NUMB_BITS * s - extra_bits as the current target size. */
+
+  s = n/2 + 1;
+  if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
+    {
+      unsigned extra_bits = 0;
+
+      while (n > 2)
+	{
+	  mp_size_t nn;
+
+	  ASSERT (n > s);
+	  ASSERT (n <= 2*s);
+
+	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+	  if (!nn)
+	    break;
+
+	  n = nn;
+	  success = 1;
+
+	  /* We can truncate and discard the lower p bits whenever nbits <=
+	     2*sbits - p. To account for the truncation error, we must
+	     adjust
+
+	     sbits <-- sbits + 1 - p,
+
+	     rather than just sbits <-- sbits - p. This adjustment makes
+	     the produced matrix slightly smaller than it could be. */
+
+	  if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s)
+	    {
+	      mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS;
+
+	      if (extra_bits == 0)
+		{
+		  /* We cross a limb boundary and bump s. We can't do that
+		     if the result is that it makes makes min(U, V)
+		     smaller than 2^{GMP_NUMB_BITS} s. */
+		  if (s + 1 == n
+		      || mpn_zero_p (ap + s + 1, n - s - 1)
+		      || mpn_zero_p (bp + s + 1, n - s - 1))
+		    continue;
+
+		  extra_bits = GMP_NUMB_BITS - 1;
+		  s++;
+		}
+	      else
+		{
+		  extra_bits--;
+		}
+
+	      /* Drop the p least significant limbs */
+	      ap += p; bp += p; n -= p; s -= p;
+	    }
+	}
+
+      ASSERT (s > 0);
+
+      if (extra_bits > 0)
+	{
+	  /* We can get here only of we have dropped at least one of the least
+	     significant bits, so we can decrement ap and bp. We can then shift
+	     left extra bits using mpn_rshift. */
+	  /* NOTE: In the unlikely case that n is large, it would be preferable
+	     to do an initial subdiv step to reduce the size before shifting,
+	     but that would mean duplicating mpn_gcd_subdiv_step with a bit
+	     count rather than a limb count. */
+	  ap--; bp--;
+	  ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits);
+	  bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits);
+	  n += (ap[n] | bp[n]) > 0;
+
+	  ASSERT (success);
+
+	  while (n > 2)
+	    {
+	      mp_size_t nn;
+
+	      ASSERT (n > s);
+	      ASSERT (n <= 2*s);
+
+	      nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+
+	      if (!nn)
+		return 1;
+
+	      n = nn;
+	    }
+	}
+
+      if (n == 2)
+	{
+	  struct hgcd_matrix1 M1;
+	  ASSERT (s == 1);
+
+	  if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1))
+	    {
+	      /* Multiply M <- M * M1 */
+	      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
+	      success = 1;
+	    }
+	}
+      return success;
+    }
+  else
+    {
+      mp_size_t n2 = (3*n)/4 + 1;
+      mp_size_t p = n/2;
+      mp_size_t nn;
+
+      nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
+      if (nn)
+	{
+	  n = nn;
+	  /* FIXME: Discard some of the low limbs immediately? */
+	  success = 1;
+	}
+
+      while (n > n2)
+	{
+	  mp_size_t nn;
+
+	  /* Needs n + 1 storage */
+	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+	  if (!nn)
+	    return success;
+
+	  n = nn;
+	  success = 1;
+	}
+      if (n > s + 2)
+	{
+	  struct hgcd_matrix M1;
+	  mp_size_t scratch;
+
+	  p = 2*s - n + 1;
+	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
+
+	  mpn_hgcd_matrix_init(&M1, n - p, tp);
+	  if (mpn_hgcd_appr (ap + p, bp + p, n - p, &M1, tp + scratch))
+	    {
+	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
+	      ASSERT (M->n + 2 >= M1.n);
+
+	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
+		 then either q or q + 1 is a correct quotient, and M1 will
+		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
+		 rules out the case that the size of M * M1 is much
+		 smaller than the expected M->n + M1->n. */
+
+	      ASSERT (M->n + M1.n < M->alloc);
+
+	      /* We need a bound for of M->n + M1.n. Let n be the original
+		 input size. Then
+
+		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
+
+		 and it follows that
+
+		 M.n + M1.n <= ceil(n/2) + 1
+
+		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
+		 amount of needed scratch space. */
+	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
+	      return 1;
+	    }
+	}
+
+      for(;;)
+	{
+	  mp_size_t nn;
+
+	  ASSERT (n > s);
+	  ASSERT (n <= 2*s);
+
+	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+
+	  if (!nn)
+	    return success;
+
+	  n = nn;
+	  success = 1;
+	}
+    }
+}

diff --git a/third_party/gmp/mpn/generic/hgcd_jacobi.c b/third_party/gmp/mpn/generic/hgcd_jacobi.c
new file mode 100644
index 0000000..24014ce
--- /dev/null
+++ b/third_party/gmp/mpn/generic/hgcd_jacobi.c

@@ -0,0 +1,243 @@
+/* hgcd_jacobi.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* This file is almost a copy of hgcd.c, with some added calls to
+   mpn_jacobi_update */
+
+struct hgcd_jacobi_ctx
+{
+  struct hgcd_matrix *M;
+  unsigned *bitsp;
+};
+
+static void
+hgcd_jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
+		  mp_srcptr qp, mp_size_t qn, int d)
+{
+  ASSERT (!gp);
+  ASSERT (d >= 0);
+
+  MPN_NORMALIZE (qp, qn);
+  if (qn > 0)
+    {
+      struct hgcd_jacobi_ctx *ctx = (struct hgcd_jacobi_ctx *) p;
+      /* NOTES: This is a bit ugly. A tp area is passed to
+	 gcd_subdiv_step, which stores q at the start of that area. We
+	 now use the rest. */
+      mp_ptr tp = (mp_ptr) qp + qn;
+
+      mpn_hgcd_matrix_update_q (ctx->M, qp, qn, d, tp);
+      *ctx->bitsp = mpn_jacobi_update (*ctx->bitsp, d, qp[0] & 3);
+    }
+}
+
+/* Perform a few steps, using some of mpn_hgcd2, subtraction and
+   division. Reduces the size by almost one limb or more, but never
+   below the given size s. Return new size for a and b, or 0 if no
+   more steps are possible.
+
+   If hgcd2 succeeds, needs temporary space for hgcd_matrix_mul_1, M->n
+   limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
+   fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
+   hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
+   resulting size of M.
+
+   If N is the input size to the calling hgcd, then s = floor(N/2) +
+   1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
+   < N, so N is sufficient.
+*/
+
+static mp_size_t
+hgcd_jacobi_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
+		  struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
+{
+  struct hgcd_matrix1 M1;
+  mp_limb_t mask;
+  mp_limb_t ah, al, bh, bl;
+
+  ASSERT (n > s);
+
+  mask = ap[n-1] | bp[n-1];
+  ASSERT (mask > 0);
+
+  if (n == s + 1)
+    {
+      if (mask < 4)
+	goto subtract;
+
+      ah = ap[n-1]; al = ap[n-2];
+      bh = bp[n-1]; bl = bp[n-2];
+    }
+  else if (mask & GMP_NUMB_HIGHBIT)
+    {
+      ah = ap[n-1]; al = ap[n-2];
+      bh = bp[n-1]; bl = bp[n-2];
+    }
+  else
+    {
+      int shift;
+
+      count_leading_zeros (shift, mask);
+      ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+      al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+      bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+      bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+    }
+
+  /* Try an mpn_hgcd2 step */
+  if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M1, bitsp))
+    {
+      /* Multiply M <- M * M1 */
+      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
+
+      /* Can't swap inputs, so we need to copy. */
+      MPN_COPY (tp, ap, n);
+      /* Multiply M1^{-1} (a;b) */
+      return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
+    }
+
+ subtract:
+  {
+    struct hgcd_jacobi_ctx ctx;
+    ctx.M = M;
+    ctx.bitsp = bitsp;
+
+    return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_jacobi_hook, &ctx, tp);
+  }
+}
+
+/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
+   with elements of size at most (n+1)/2 - 1. Returns new size of a,
+   b, or zero if no reduction is possible. */
+
+/* Same scratch requirements as for mpn_hgcd. */
+mp_size_t
+mpn_hgcd_jacobi (mp_ptr ap, mp_ptr bp, mp_size_t n,
+		 struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
+{
+  mp_size_t s = n/2 + 1;
+
+  mp_size_t nn;
+  int success = 0;
+
+  if (n <= s)
+    /* Happens when n <= 2, a fairly uninteresting case but exercised
+       by the random inputs of the testsuite. */
+    return 0;
+
+  ASSERT ((ap[n-1] | bp[n-1]) > 0);
+
+  ASSERT ((n+1)/2 - 1 < M->alloc);
+
+  if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
+    {
+      mp_size_t n2 = (3*n)/4 + 1;
+      mp_size_t p = n/2;
+
+      nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, M, bitsp, tp);
+      if (nn > 0)
+	{
+	  /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
+	     = 2 (n - 1) */
+	  n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
+	  success = 1;
+	}
+      while (n > n2)
+	{
+	  /* Needs n + 1 storage */
+	  nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
+	  if (!nn)
+	    return success ? n : 0;
+	  n = nn;
+	  success = 1;
+	}
+
+      if (n > s + 2)
+	{
+	  struct hgcd_matrix M1;
+	  mp_size_t scratch;
+
+	  p = 2*s - n + 1;
+	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
+
+	  mpn_hgcd_matrix_init(&M1, n - p, tp);
+	  nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M1, bitsp, tp + scratch);
+	  if (nn > 0)
+	    {
+	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
+	      ASSERT (M->n + 2 >= M1.n);
+
+	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
+		 then either q or q + 1 is a correct quotient, and M1 will
+		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
+		 rules out the case that the size of M * M1 is much
+		 smaller than the expected M->n + M1->n. */
+
+	      ASSERT (M->n + M1.n < M->alloc);
+
+	      /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
+		 = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
+	      n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
+
+	      /* We need a bound for of M->n + M1.n. Let n be the original
+		 input size. Then
+
+		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
+
+		 and it follows that
+
+		 M.n + M1.n <= ceil(n/2) + 1
+
+		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
+		 amount of needed scratch space. */
+	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
+	      success = 1;
+	    }
+	}
+    }
+
+  for (;;)
+    {
+      /* Needs s+3 < n */
+      nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
+      if (!nn)
+	return success ? n : 0;
+
+      n = nn;
+      success = 1;
+    }
+}

diff --git a/third_party/gmp/mpn/generic/hgcd_matrix.c b/third_party/gmp/mpn/generic/hgcd_matrix.c
new file mode 100644
index 0000000..54c795d
--- /dev/null
+++ b/third_party/gmp/mpn/generic/hgcd_matrix.c

@@ -0,0 +1,265 @@
+/* hgcd_matrix.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003-2005, 2008, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* For input of size n, matrix elements are of size at most ceil(n/2)
+   - 1, but we need two limbs extra. */
+void
+mpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p)
+{
+  mp_size_t s = (n+1)/2 + 1;
+  M->alloc = s;
+  M->n = 1;
+  MPN_ZERO (p, 4 * s);
+  M->p[0][0] = p;
+  M->p[0][1] = p + s;
+  M->p[1][0] = p + 2 * s;
+  M->p[1][1] = p + 3 * s;
+
+  M->p[0][0][0] = M->p[1][1][0] = 1;
+}
+
+/* Update column COL, adding in Q * column (1-COL). Temporary storage:
+ * qn + n <= M->alloc, where n is the size of the largest element in
+ * column 1 - COL. */
+void
+mpn_hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn,
+			  unsigned col, mp_ptr tp)
+{
+  ASSERT (col < 2);
+
+  if (qn == 1)
+    {
+      mp_limb_t q = qp[0];
+      mp_limb_t c0, c1;
+
+      c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q);
+      c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q);
+
+      M->p[0][col][M->n] = c0;
+      M->p[1][col][M->n] = c1;
+
+      M->n += (c0 | c1) != 0;
+    }
+  else
+    {
+      unsigned row;
+
+      /* Carries for the unlikely case that we get both high words
+	 from the multiplication and carries from the addition. */
+      mp_limb_t c[2];
+      mp_size_t n;
+
+      /* The matrix will not necessarily grow in size by qn, so we
+	 need normalization in order not to overflow M. */
+
+      for (n = M->n; n + qn > M->n; n--)
+	{
+	  ASSERT (n > 0);
+	  if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0)
+	    break;
+	}
+
+      ASSERT (qn + n <= M->alloc);
+
+      for (row = 0; row < 2; row++)
+	{
+	  if (qn <= n)
+	    mpn_mul (tp, M->p[row][1-col], n, qp, qn);
+	  else
+	    mpn_mul (tp, qp, qn, M->p[row][1-col], n);
+
+	  ASSERT (n + qn >= M->n);
+	  c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n);
+	}
+
+      n += qn;
+
+      if (c[0] | c[1])
+	{
+	  M->p[0][col][n] = c[0];
+	  M->p[1][col][n] = c[1];
+	  n++;
+	}
+      else
+	{
+	  n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0;
+	  ASSERT (n >= M->n);
+	}
+      M->n = n;
+    }
+
+  ASSERT (M->n < M->alloc);
+}
+
+/* Multiply M by M1 from the right. Since the M1 elements fit in
+   GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs
+   temporary space M->n */
+void
+mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1,
+		       mp_ptr tp)
+{
+  mp_size_t n0, n1;
+
+  /* Could avoid copy by some swapping of pointers. */
+  MPN_COPY (tp, M->p[0][0], M->n);
+  n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n);
+  MPN_COPY (tp, M->p[1][0], M->n);
+  n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n);
+
+  /* Depends on zero initialization */
+  M->n = MAX(n0, n1);
+  ASSERT (M->n < M->alloc);
+}
+
+/* Multiply M by M1 from the right. Needs 3*(M->n + M1->n) + 5 limbs
+   of temporary storage (see mpn_matrix22_mul_itch). */
+void
+mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1,
+		     mp_ptr tp)
+{
+  mp_size_t n;
+
+  /* About the new size of M:s elements. Since M1's diagonal elements
+     are > 0, no element can decrease. The new elements are of size
+     M->n + M1->n, one limb more or less. The computation of the
+     matrix product produces elements of size M->n + M1->n + 1. But
+     the true size, after normalization, may be three limbs smaller.
+
+     The reason that the product has normalized size >= M->n + M1->n -
+     2 is subtle. It depends on the fact that M and M1 can be factored
+     as products of (1,1; 0,1) and (1,0; 1,1), and that we can't have
+     M ending with a large power and M1 starting with a large power of
+     the same matrix. */
+
+  /* FIXME: Strassen multiplication gives only a small speedup. In FFT
+     multiplication range, this function could be sped up quite a lot
+     using invariance. */
+  ASSERT (M->n + M1->n < M->alloc);
+
+  ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1]
+	   | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0);
+
+  ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1]
+	   | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0);
+
+  mpn_matrix22_mul (M->p[0][0], M->p[0][1],
+		    M->p[1][0], M->p[1][1], M->n,
+		    M1->p[0][0], M1->p[0][1],
+		    M1->p[1][0], M1->p[1][1], M1->n, tp);
+
+  /* Index of last potentially non-zero limb, size is one greater. */
+  n = M->n + M1->n;
+
+  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+
+  ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0);
+
+  M->n = n + 1;
+}
+
+/* Multiplies the least significant p limbs of (a;b) by M^-1.
+   Temporary space needed: 2 * (p + M->n)*/
+mp_size_t
+mpn_hgcd_matrix_adjust (const struct hgcd_matrix *M,
+			mp_size_t n, mp_ptr ap, mp_ptr bp,
+			mp_size_t p, mp_ptr tp)
+{
+  /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b)
+     = (r11 a - r01 b; - r10 a + r00 b */
+
+  mp_ptr t0 = tp;
+  mp_ptr t1 = tp + p + M->n;
+  mp_limb_t ah, bh;
+  mp_limb_t cy;
+
+  ASSERT (p + M->n  < n);
+
+  /* First compute the two values depending on a, before overwriting a */
+
+  if (M->n >= p)
+    {
+      mpn_mul (t0, M->p[1][1], M->n, ap, p);
+      mpn_mul (t1, M->p[1][0], M->n, ap, p);
+    }
+  else
+    {
+      mpn_mul (t0, ap, p, M->p[1][1], M->n);
+      mpn_mul (t1, ap, p, M->p[1][0], M->n);
+    }
+
+  /* Update a */
+  MPN_COPY (ap, t0, p);
+  ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n);
+
+  if (M->n >= p)
+    mpn_mul (t0, M->p[0][1], M->n, bp, p);
+  else
+    mpn_mul (t0, bp, p, M->p[0][1], M->n);
+
+  cy = mpn_sub (ap, ap, n, t0, p + M->n);
+  ASSERT (cy <= ah);
+  ah -= cy;
+
+  /* Update b */
+  if (M->n >= p)
+    mpn_mul (t0, M->p[0][0], M->n, bp, p);
+  else
+    mpn_mul (t0, bp, p, M->p[0][0], M->n);
+
+  MPN_COPY (bp, t0, p);
+  bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n);
+  cy = mpn_sub (bp, bp, n, t1, p + M->n);
+  ASSERT (cy <= bh);
+  bh -= cy;
+
+  if (ah > 0 || bh > 0)
+    {
+      ap[n] = ah;
+      bp[n] = bh;
+      n++;
+    }
+  else
+    {
+      /* The subtraction can reduce the size by at most one limb. */
+      if (ap[n-1] == 0 && bp[n-1] == 0)
+	n--;
+    }
+  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
+  return n;
+}

diff --git a/third_party/gmp/mpn/generic/hgcd_reduce.c b/third_party/gmp/mpn/generic/hgcd_reduce.c
new file mode 100644
index 0000000..3aee77d
--- /dev/null
+++ b/third_party/gmp/mpn/generic/hgcd_reduce.c

@@ -0,0 +1,242 @@
+/* hgcd_reduce.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Computes R -= A * B. Result must be non-negative. Normalized down
+   to size an, and resulting size is returned. */
+static mp_size_t
+submul (mp_ptr rp, mp_size_t rn,
+	mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+  mp_ptr tp;
+  TMP_DECL;
+
+  ASSERT (bn > 0);
+  ASSERT (an >= bn);
+  ASSERT (rn >= an);
+  ASSERT (an + bn <= rn + 1);
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS (an + bn);
+
+  mpn_mul (tp, ap, an, bp, bn);
+  ASSERT ((an + bn <= rn) || (tp[rn] == 0));
+  ASSERT_NOCARRY (mpn_sub (rp, rp, rn, tp, an + bn - (an + bn > rn)));
+  TMP_FREE;
+
+  while (rn > an && (rp[rn-1] == 0))
+    rn--;
+
+  return rn;
+}
+
+/* Computes (a, b)  <--  M^{-1} (a; b) */
+/* FIXME:
+    x Take scratch parameter, and figure out scratch need.
+
+    x Use some fallback for small M->n?
+*/
+static mp_size_t
+hgcd_matrix_apply (const struct hgcd_matrix *M,
+		   mp_ptr ap, mp_ptr bp,
+		   mp_size_t n)
+{
+  mp_size_t an, bn, un, vn, nn;
+  mp_size_t mn[2][2];
+  mp_size_t modn;
+  mp_ptr tp, sp, scratch;
+  mp_limb_t cy;
+  unsigned i, j;
+
+  TMP_DECL;
+
+  ASSERT ( (ap[n-1] | bp[n-1]) > 0);
+
+  an = n;
+  MPN_NORMALIZE (ap, an);
+  bn = n;
+  MPN_NORMALIZE (bp, bn);
+
+  for (i = 0; i < 2; i++)
+    for (j = 0; j < 2; j++)
+      {
+	mp_size_t k;
+	k = M->n;
+	MPN_NORMALIZE (M->p[i][j], k);
+	mn[i][j] = k;
+      }
+
+  ASSERT (mn[0][0] > 0);
+  ASSERT (mn[1][1] > 0);
+  ASSERT ( (mn[0][1] | mn[1][0]) > 0);
+
+  TMP_MARK;
+
+  if (mn[0][1] == 0)
+    {
+      /* A unchanged, M = (1, 0; q, 1) */
+      ASSERT (mn[0][0] == 1);
+      ASSERT (M->p[0][0][0] == 1);
+      ASSERT (mn[1][1] == 1);
+      ASSERT (M->p[1][1][0] == 1);
+
+      /* Put B <-- B - q A */
+      nn = submul (bp, bn, ap, an, M->p[1][0], mn[1][0]);
+    }
+  else if (mn[1][0] == 0)
+    {
+      /* B unchanged, M = (1, q; 0, 1) */
+      ASSERT (mn[0][0] == 1);
+      ASSERT (M->p[0][0][0] == 1);
+      ASSERT (mn[1][1] == 1);
+      ASSERT (M->p[1][1][0] == 1);
+
+      /* Put A  <-- A - q * B */
+      nn = submul (ap, an, bp, bn, M->p[0][1], mn[0][1]);
+    }
+  else
+    {
+      /* A = m00 a + m01 b  ==> a <= A / m00, b <= A / m01.
+	 B = m10 a + m11 b  ==> a <= B / m10, b <= B / m11. */
+      un = MIN (an - mn[0][0], bn - mn[1][0]) + 1;
+      vn = MIN (an - mn[0][1], bn - mn[1][1]) + 1;
+
+      nn = MAX (un, vn);
+      /* In the range of interest, mulmod_bnm1 should always beat mullo. */
+      modn = mpn_mulmod_bnm1_next_size (nn + 1);
+
+      TMP_ALLOC_LIMBS_3 (tp, modn,
+			 sp, modn,
+			 scratch, mpn_mulmod_bnm1_itch (modn, modn, M->n));
+
+      ASSERT (n <= 2*modn);
+
+      if (n > modn)
+	{
+	  cy = mpn_add (ap, ap, modn, ap + modn, n - modn);
+	  MPN_INCR_U (ap, modn, cy);
+
+	  cy = mpn_add (bp, bp, modn, bp + modn, n - modn);
+	  MPN_INCR_U (bp, modn, cy);
+
+	  n = modn;
+	}
+
+      mpn_mulmod_bnm1 (tp, modn, ap, n, M->p[1][1], mn[1][1], scratch);
+      mpn_mulmod_bnm1 (sp, modn, bp, n, M->p[0][1], mn[0][1], scratch);
+
+      /* FIXME: Handle the small n case in some better way. */
+      if (n + mn[1][1] < modn)
+	MPN_ZERO (tp + n + mn[1][1], modn - n - mn[1][1]);
+      if (n + mn[0][1] < modn)
+	MPN_ZERO (sp + n + mn[0][1], modn - n - mn[0][1]);
+
+      cy = mpn_sub_n (tp, tp, sp, modn);
+      MPN_DECR_U (tp, modn, cy);
+
+      ASSERT (mpn_zero_p (tp + nn, modn - nn));
+
+      mpn_mulmod_bnm1 (sp, modn, ap, n, M->p[1][0], mn[1][0], scratch);
+      MPN_COPY (ap, tp, nn);
+      mpn_mulmod_bnm1 (tp, modn, bp, n, M->p[0][0], mn[0][0], scratch);
+
+      if (n + mn[1][0] < modn)
+	MPN_ZERO (sp + n + mn[1][0], modn - n - mn[1][0]);
+      if (n + mn[0][0] < modn)
+	MPN_ZERO (tp + n + mn[0][0], modn - n - mn[0][0]);
+
+      cy = mpn_sub_n (tp, tp, sp, modn);
+      MPN_DECR_U (tp, modn, cy);
+
+      ASSERT (mpn_zero_p (tp + nn, modn - nn));
+      MPN_COPY (bp, tp, nn);
+
+      while ( (ap[nn-1] | bp[nn-1]) == 0)
+	{
+	  nn--;
+	  ASSERT (nn > 0);
+	}
+    }
+  TMP_FREE;
+
+  return nn;
+}
+
+mp_size_t
+mpn_hgcd_reduce_itch (mp_size_t n, mp_size_t p)
+{
+  mp_size_t itch;
+  if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
+    {
+      itch = mpn_hgcd_itch (n-p);
+
+      /* For arbitrary p, the storage for _adjust is 2*(p + M->n) = 2 *
+	 (p + ceil((n-p)/2) - 1 <= n + p - 1 */
+      if (itch < n + p - 1)
+	itch = n + p - 1;
+    }
+  else
+    {
+      itch = 2*(n-p) + mpn_hgcd_itch (n-p);
+      /* Currently, hgcd_matrix_apply allocates its own storage. */
+    }
+  return itch;
+}
+
+/* FIXME: Document storage need. */
+mp_size_t
+mpn_hgcd_reduce (struct hgcd_matrix *M,
+		 mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t p,
+		 mp_ptr tp)
+{
+  mp_size_t nn;
+  if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
+    {
+      nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);
+      if (nn > 0)
+	/* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
+	   = 2 (n - 1) */
+	return mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
+    }
+  else
+    {
+      MPN_COPY (tp, ap + p, n - p);
+      MPN_COPY (tp + n - p, bp + p, n - p);
+      if (mpn_hgcd_appr (tp, tp + n - p, n - p, M, tp + 2*(n-p)))
+	return hgcd_matrix_apply (M, ap, bp, n);
+    }
+  return 0;
+}

diff --git a/third_party/gmp/mpn/generic/hgcd_step.c b/third_party/gmp/mpn/generic/hgcd_step.c
new file mode 100644
index 0000000..a978a88
--- /dev/null
+++ b/third_party/gmp/mpn/generic/hgcd_step.c

@@ -0,0 +1,127 @@
+/* hgcd_step.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+static void
+hgcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
+	   mp_srcptr qp, mp_size_t qn, int d)
+{
+  ASSERT (!gp);
+  ASSERT (d >= 0);
+  ASSERT (d <= 1);
+
+  MPN_NORMALIZE (qp, qn);
+  if (qn > 0)
+    {
+      struct hgcd_matrix *M = (struct hgcd_matrix *) p;
+      /* NOTES: This is a bit ugly. A tp area is passed to
+	 gcd_subdiv_step, which stores q at the start of that area. We
+	 now use the rest. */
+      mp_ptr tp = (mp_ptr) qp + qn;
+      mpn_hgcd_matrix_update_q (M, qp, qn, d, tp);
+    }
+}
+
+/* Perform a few steps, using some of mpn_hgcd2, subtraction and
+   division. Reduces the size by almost one limb or more, but never
+   below the given size s. Return new size for a and b, or 0 if no
+   more steps are possible.
+
+   If hgcd2 succeeds, needs temporary space for hgcd_matrix_mul_1, M->n
+   limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
+   fails, needs space for the quotient, qn <= n - s limbs, for and
+   hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
+   (resulting size of M) + 1.
+
+   If N is the input size to the calling hgcd, then s = floor(N/2) +
+   1, M->n < N, qn + product size <= n - s + n - s + 1 = 2 (n - s) + 1
+   <= N.
+*/
+
+mp_size_t
+mpn_hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
+	       struct hgcd_matrix *M, mp_ptr tp)
+{
+  struct hgcd_matrix1 M1;
+  mp_limb_t mask;
+  mp_limb_t ah, al, bh, bl;
+
+  ASSERT (n > s);
+
+  mask = ap[n-1] | bp[n-1];
+  ASSERT (mask > 0);
+
+  if (n == s + 1)
+    {
+      if (mask < 4)
+	goto subtract;
+
+      ah = ap[n-1]; al = ap[n-2];
+      bh = bp[n-1]; bl = bp[n-2];
+    }
+  else if (mask & GMP_NUMB_HIGHBIT)
+    {
+      ah = ap[n-1]; al = ap[n-2];
+      bh = bp[n-1]; bl = bp[n-2];
+    }
+  else
+    {
+      int shift;
+
+      count_leading_zeros (shift, mask);
+      ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+      al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+      bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+      bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+    }
+
+  /* Try an mpn_hgcd2 step */
+  if (mpn_hgcd2 (ah, al, bh, bl, &M1))
+    {
+      /* Multiply M <- M * M1 */
+      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
+
+      /* Can't swap inputs, so we need to copy. */
+      MPN_COPY (tp, ap, n);
+      /* Multiply M1^{-1} (a;b) */
+      return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
+    }
+
+ subtract:
+
+  return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_hook, M, tp);
+}

diff --git a/third_party/gmp/mpn/generic/invert.c b/third_party/gmp/mpn/generic/invert.c
new file mode 100644
index 0000000..157ff2b
--- /dev/null
+++ b/third_party/gmp/mpn/generic/invert.c

@@ -0,0 +1,86 @@
+/* invert.c -- Compute floor((B^{2n}-1)/U) - B^n.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright (C) 2007, 2009, 2010, 2012, 2014-2016 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
+{
+  ASSERT (n > 0);
+  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
+  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
+  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
+
+  if (n == 1)
+    invert_limb (*ip, *dp);
+  else if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD))
+    {
+	/* Maximum scratch needed by this branch: 2*n */
+	mp_ptr xp;
+
+	xp = scratch;				/* 2 * n limbs */
+	/* n > 1 here */
+	MPN_FILL (xp, n, GMP_NUMB_MAX);
+	mpn_com (xp + n, dp, n);
+	if (n == 2) {
+	  mpn_divrem_2 (ip, 0, xp, 4, dp);
+	} else {
+	  gmp_pi1_t inv;
+	  invert_pi1 (inv, dp[n-1], dp[n-2]);
+	  /* FIXME: should we use dcpi1_div_q, for big sizes? */
+	  mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32);
+	}
+    }
+  else { /* Use approximated inverse; correct the result if needed. */
+      mp_limb_t e; /* The possible error in the approximate inverse */
+
+      ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) );
+      e = mpn_ni_invertappr (ip, dp, n, scratch);
+
+      if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */
+	/* Code to detect and correct the "off by one" approximation. */
+	mpn_mul_n (scratch, ip, dp, n);
+	e = mpn_add_n (scratch, scratch, dp, n); /* FIXME: we only need e.*/
+	if (LIKELY(e)) /* The high part can not give a carry by itself. */
+	  e = mpn_add_nc (scratch + n, scratch + n, dp, n, e); /* FIXME:e */
+	/* If the value was wrong (no carry), correct it (increment). */
+	e ^= CNST_LIMB (1);
+	MPN_INCR_U (ip, n, e);
+      }
+  }
+}

diff --git a/third_party/gmp/mpn/generic/invertappr.c b/third_party/gmp/mpn/generic/invertappr.c
new file mode 100644
index 0000000..3be5596
--- /dev/null
+++ b/third_party/gmp/mpn/generic/invertappr.c

@@ -0,0 +1,300 @@
+/* mpn_invertappr and helper functions.  Compute I such that
+   floor((B^{2n}-1)/U - 1 <= I + B^n <= floor((B^{2n}-1)/U.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   The algorithm used here was inspired by ApproximateReciprocal from "Modern
+   Computer Arithmetic", by Richard P. Brent and Paul Zimmermann.  Special
+   thanks to Paul Zimmermann for his very valuable suggestions on all the
+   theoretical aspects during the work on this code.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright (C) 2007, 2009, 2010, 2012, 2015, 2016 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* FIXME: The iterative version splits the operand in two slightly unbalanced
+   parts, the use of log_2 (or counting the bits) underestimate the maximum
+   number of iterations.  */
+
+#if TUNE_PROGRAM_BUILD
+#define NPOWS \
+ ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
+#define MAYBE_dcpi1_divappr   1
+#else
+#define NPOWS \
+ ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (INV_NEWTON_THRESHOLD))
+#define MAYBE_dcpi1_divappr \
+  (INV_NEWTON_THRESHOLD < DC_DIVAPPR_Q_THRESHOLD)
+#if (INV_NEWTON_THRESHOLD > INV_MULMOD_BNM1_THRESHOLD) && \
+    (INV_APPR_THRESHOLD > INV_MULMOD_BNM1_THRESHOLD)
+#undef  INV_MULMOD_BNM1_THRESHOLD
+#define INV_MULMOD_BNM1_THRESHOLD 0 /* always when Newton */
+#endif
+#endif
+
+/* All the three functions mpn{,_bc,_ni}_invertappr (ip, dp, n, scratch), take
+   the strictly normalised value {dp,n} (i.e., most significant bit must be set)
+   as an input, and compute {ip,n}: the approximate reciprocal of {dp,n}.
+
+   Let e = mpn*_invertappr (ip, dp, n, scratch) be the returned value; the
+   following conditions are satisfied by the output:
+     0 <= e <= 1;
+     {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1+e) .
+   I.e. e=0 means that the result {ip,n} equals the one given by mpn_invert.
+	e=1 means that the result _may_ be one less than expected.
+
+   The _bc version returns e=1 most of the time.
+   The _ni version should return e=0 most of the time; only about 1% of
+   possible random input should give e=1.
+
+   When the strict result is needed, i.e., e=0 in the relation above:
+     {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1) ;
+   the function mpn_invert (ip, dp, n, scratch) should be used instead.  */
+
+/* Maximum scratch needed by this branch (at xp): 2*n */
+static mp_limb_t
+mpn_bc_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr xp)
+{
+  ASSERT (n > 0);
+  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
+  ASSERT (! MPN_OVERLAP_P (ip, n, xp, mpn_invertappr_itch(n)));
+  ASSERT (! MPN_OVERLAP_P (dp, n, xp, mpn_invertappr_itch(n)));
+
+  /* Compute a base value of r limbs. */
+  if (n == 1)
+    invert_limb (*ip, *dp);
+  else {
+    /* n > 1 here */
+    MPN_FILL (xp, n, GMP_NUMB_MAX);
+    mpn_com (xp + n, dp, n);
+
+    /* Now xp contains B^2n - {dp,n}*B^n - 1 */
+
+    /* FIXME: if mpn_*pi1_divappr_q handles n==2, use it! */
+    if (n == 2) {
+      mpn_divrem_2 (ip, 0, xp, 4, dp);
+    } else {
+      gmp_pi1_t inv;
+      invert_pi1 (inv, dp[n-1], dp[n-2]);
+      if (! MAYBE_dcpi1_divappr
+	  || BELOW_THRESHOLD (n, DC_DIVAPPR_Q_THRESHOLD))
+	mpn_sbpi1_divappr_q (ip, xp, 2 * n, dp, n, inv.inv32);
+      else
+	mpn_dcpi1_divappr_q (ip, xp, 2 * n, dp, n, &inv);
+      MPN_DECR_U(ip, n, CNST_LIMB (1));
+      return 1;
+    }
+  }
+  return 0;
+}
+
+/* mpn_ni_invertappr: computes the approximate reciprocal using Newton's
+   iterations (at least one).
+
+   Inspired by Algorithm "ApproximateReciprocal", published in "Modern Computer
+   Arithmetic" by Richard P. Brent and Paul Zimmermann, algorithm 3.5, page 121
+   in version 0.4 of the book.
+
+   Some adaptations were introduced, to allow product mod B^m-1 and return the
+   value e.
+
+   We introduced a correction in such a way that "the value of
+   B^{n+h}-T computed at step 8 cannot exceed B^n-1" (the book reads
+   "2B^n-1").
+
+   Maximum scratch needed by this branch <= 2*n, but have to fit 3*rn
+   in the scratch, i.e. 3*rn <= 2*n: we require n>4.
+
+   We use a wrapped product modulo B^m-1.  NOTE: is there any normalisation
+   problem for the [0] class?  It shouldn't: we compute 2*|A*X_h - B^{n+h}| <
+   B^m-1.  We may get [0] if and only if we get AX_h = B^{n+h}.  This can
+   happen only if A=B^{n}/2, but this implies X_h = B^{h}*2-1 i.e., AX_h =
+   B^{n+h} - A, then we get into the "negative" branch, where X_h is not
+   incremented (because A < B^n).
+
+   FIXME: the scratch for mulmod_bnm1 does not currently fit in the scratch, it
+   is allocated apart.
+ */
+
+mp_limb_t
+mpn_ni_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
+{
+  mp_limb_t cy;
+  mp_size_t rn, mn;
+  mp_size_t sizes[NPOWS], *sizp;
+  mp_ptr tp;
+  TMP_DECL;
+#define xp scratch
+
+  ASSERT (n > 4);
+  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
+  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
+  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
+
+  /* Compute the computation precisions from highest to lowest, leaving the
+     base case size in 'rn'.  */
+  sizp = sizes;
+  rn = n;
+  do {
+    *sizp = rn;
+    rn = (rn >> 1) + 1;
+    ++sizp;
+  } while (ABOVE_THRESHOLD (rn, INV_NEWTON_THRESHOLD));
+
+  /* We search the inverse of 0.{dp,n}, we compute it as 1.{ip,n} */
+  dp += n;
+  ip += n;
+
+  /* Compute a base value of rn limbs. */
+  mpn_bc_invertappr (ip - rn, dp - rn, rn, scratch);
+
+  TMP_MARK;
+
+  if (ABOVE_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD))
+    {
+      mn = mpn_mulmod_bnm1_next_size (n + 1);
+      tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (mn, n, (n >> 1) + 1));
+    }
+  /* Use Newton's iterations to get the desired precision.*/
+
+  while (1) {
+    n = *--sizp;
+    /*
+      v    n  v
+      +----+--+
+      ^ rn ^
+    */
+
+    /* Compute i_jd . */
+    if (BELOW_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD)
+	|| ((mn = mpn_mulmod_bnm1_next_size (n + 1)) > (n + rn))) {
+      /* FIXME: We do only need {xp,n+1}*/
+      mpn_mul (xp, dp - n, n, ip - rn, rn);
+      mpn_add_n (xp + rn, xp + rn, dp - n, n - rn + 1);
+      cy = CNST_LIMB(1); /* Remember we truncated, Mod B^(n+1) */
+      /* We computed (truncated) {xp,n+1} <- 1.{ip,rn} * 0.{dp,n} */
+    } else { /* Use B^mn-1 wraparound */
+      mpn_mulmod_bnm1 (xp, mn, dp - n, n, ip - rn, rn, tp);
+      /* We computed {xp,mn} <- {ip,rn} * {dp,n} mod (B^mn-1) */
+      /* We know that 2*|ip*dp + dp*B^rn - B^{rn+n}| < B^mn-1 */
+      /* Add dp*B^rn mod (B^mn-1) */
+      ASSERT (n >= mn - rn);
+      cy = mpn_add_n (xp + rn, xp + rn, dp - n, mn - rn);
+      cy = mpn_add_nc (xp, xp, dp - (n - (mn - rn)), n - (mn - rn), cy);
+      /* Subtract B^{rn+n}, maybe only compensate the carry*/
+      xp[mn] = CNST_LIMB (1); /* set a limit for DECR_U */
+      MPN_DECR_U (xp + rn + n - mn, 2 * mn + 1 - rn - n, CNST_LIMB (1) - cy);
+      MPN_DECR_U (xp, mn, CNST_LIMB (1) - xp[mn]); /* if DECR_U eroded xp[mn] */
+      cy = CNST_LIMB(0); /* Remember we are working Mod B^mn-1 */
+    }
+
+    if (xp[n] < CNST_LIMB (2)) { /* "positive" residue class */
+      cy = xp[n]; /* 0 <= cy <= 1 here. */
+#if HAVE_NATIVE_mpn_sublsh1_n
+      if (cy++) {
+	if (mpn_cmp (xp, dp - n, n) > 0) {
+	  mp_limb_t chk;
+	  chk = mpn_sublsh1_n (xp, xp, dp - n, n);
+	  ASSERT (chk == xp[n]);
+	  ++ cy;
+	} else
+	  ASSERT_CARRY (mpn_sub_n (xp, xp, dp - n, n));
+      }
+#else /* no mpn_sublsh1_n*/
+      if (cy++ && !mpn_sub_n (xp, xp, dp - n, n)) {
+	ASSERT_CARRY (mpn_sub_n (xp, xp, dp - n, n));
+	++cy;
+      }
+#endif
+      /* 1 <= cy <= 3 here. */
+#if HAVE_NATIVE_mpn_rsblsh1_n
+      if (mpn_cmp (xp, dp - n, n) > 0) {
+	ASSERT_NOCARRY (mpn_rsblsh1_n (xp + n, xp, dp - n, n));
+	++cy;
+      } else
+	ASSERT_NOCARRY (mpn_sub_nc (xp + 2 * n - rn, dp - rn, xp + n - rn, rn, mpn_cmp (xp, dp - n, n - rn) > 0));
+#else /* no mpn_rsblsh1_n*/
+      if (mpn_cmp (xp, dp - n, n) > 0) {
+	ASSERT_NOCARRY (mpn_sub_n (xp, xp, dp - n, n));
+	++cy;
+      }
+      ASSERT_NOCARRY (mpn_sub_nc (xp + 2 * n - rn, dp - rn, xp + n - rn, rn, mpn_cmp (xp, dp - n, n - rn) > 0));
+#endif
+      MPN_DECR_U(ip - rn, rn, cy); /* 1 <= cy <= 4 here. */
+    } else { /* "negative" residue class */
+      ASSERT (xp[n] >= GMP_NUMB_MAX - CNST_LIMB(1));
+      MPN_DECR_U(xp, n + 1, cy);
+      if (xp[n] != GMP_NUMB_MAX) {
+	MPN_INCR_U(ip - rn, rn, CNST_LIMB (1));
+	ASSERT_CARRY (mpn_add_n (xp, xp, dp - n, n));
+      }
+      mpn_com (xp + 2 * n - rn, xp + n - rn, rn);
+    }
+
+    /* Compute x_ju_j. FIXME:We need {xp+rn,rn}, mulhi? */
+    mpn_mul_n (xp, xp + 2 * n - rn, ip - rn, rn);
+    cy = mpn_add_n (xp + rn, xp + rn, xp + 2 * n - rn, 2 * rn - n);
+    cy = mpn_add_nc (ip - n, xp + 3 * rn - n, xp + n + rn, n - rn, cy);
+    MPN_INCR_U (ip - rn, rn, cy);
+    if (sizp == sizes) { /* Get out of the cycle */
+      /* Check for possible carry propagation from below. */
+      cy = xp[3 * rn - n - 1] > GMP_NUMB_MAX - CNST_LIMB (7); /* Be conservative. */
+      /*    cy = mpn_add_1 (xp + rn, xp + rn, 2*rn - n, 4); */
+      break;
+    }
+    rn = n;
+  }
+  TMP_FREE;
+
+  return cy;
+#undef xp
+}
+
+mp_limb_t
+mpn_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
+{
+  ASSERT (n > 0);
+  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
+  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
+  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
+
+  if (BELOW_THRESHOLD (n, INV_NEWTON_THRESHOLD))
+    return mpn_bc_invertappr (ip, dp, n, scratch);
+  else
+    return mpn_ni_invertappr (ip, dp, n, scratch);
+}

diff --git a/third_party/gmp/mpn/generic/jacbase.c b/third_party/gmp/mpn/generic/jacbase.c
new file mode 100644
index 0000000..735ad7a
--- /dev/null
+++ b/third_party/gmp/mpn/generic/jacbase.c

@@ -0,0 +1,242 @@
+/* mpn_jacobi_base -- limb/limb Jacobi symbol with restricted arguments.
+
+   THIS INTERFACE IS PRELIMINARY AND MIGHT DISAPPEAR OR BE SUBJECT TO
+   INCOMPATIBLE CHANGES IN A FUTURE RELEASE OF GMP.
+
+Copyright 1999-2002, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Use the simple loop by default.  The generic count_trailing_zeros is not
+   very fast, and the extra trickery of method 3 has proven to be less use
+   than might have been though.  */
+#ifndef JACOBI_BASE_METHOD
+#define JACOBI_BASE_METHOD  2
+#endif
+
+
+/* Use count_trailing_zeros.  */
+#if JACOBI_BASE_METHOD == 1
+#define PROCESS_TWOS_ANY                                \
+  {                                                     \
+    mp_limb_t  twos;                                    \
+    count_trailing_zeros (twos, a);                     \
+    result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b);        \
+    a >>= twos;                                         \
+  }
+#define PROCESS_TWOS_EVEN  PROCESS_TWOS_ANY
+#endif
+
+/* Use a simple loop.  A disadvantage of this is that there's a branch on a
+   50/50 chance of a 0 or 1 low bit.  */
+#if JACOBI_BASE_METHOD == 2
+#define PROCESS_TWOS_EVEN               \
+  {                                     \
+    int  two;                           \
+    two = JACOBI_TWO_U_BIT1 (b);        \
+    do                                  \
+      {                                 \
+	a >>= 1;                        \
+	result_bit1 ^= two;             \
+	ASSERT (a != 0);                \
+      }                                 \
+    while ((a & 1) == 0);               \
+  }
+#define PROCESS_TWOS_ANY        \
+  if ((a & 1) == 0)             \
+    PROCESS_TWOS_EVEN;
+#endif
+
+/* Process one bit arithmetically, then a simple loop.  This cuts the loop
+   condition down to a 25/75 chance, which should branch predict better.
+   The CPU will need a reasonable variable left shift.  */
+#if JACOBI_BASE_METHOD == 3
+#define PROCESS_TWOS_EVEN               \
+  {                                     \
+    int  two, mask, shift;              \
+					\
+    two = JACOBI_TWO_U_BIT1 (b);        \
+    mask = (~a & 2);                    \
+    a >>= 1;                            \
+					\
+    shift = (~a & 1);                   \
+    a >>= shift;                        \
+    result_bit1 ^= two ^ (two & mask);  \
+					\
+    while ((a & 1) == 0)                \
+      {                                 \
+	a >>= 1;                        \
+	result_bit1 ^= two;             \
+	ASSERT (a != 0);                \
+      }                                 \
+  }
+#define PROCESS_TWOS_ANY                \
+  {                                     \
+    int  two, mask, shift;              \
+					\
+    two = JACOBI_TWO_U_BIT1 (b);        \
+    shift = (~a & 1);                   \
+    a >>= shift;                        \
+					\
+    mask = shift << 1;                  \
+    result_bit1 ^= (two & mask);        \
+					\
+    while ((a & 1) == 0)                \
+      {                                 \
+	a >>= 1;                        \
+	result_bit1 ^= two;             \
+	ASSERT (a != 0);                \
+      }                                 \
+  }
+#endif
+
+#if JACOBI_BASE_METHOD < 4
+/* Calculate the value of the Jacobi symbol (a/b) of two mp_limb_t's, but
+   with a restricted range of inputs accepted, namely b>1, b odd.
+
+   The initial result_bit1 is taken as a parameter for the convenience of
+   mpz_kronecker_ui() et al.  The sign changes both here and in those
+   routines accumulate nicely in bit 1, see the JACOBI macros.
+
+   The return value here is the normal +1, 0, or -1.  Note that +1 and -1
+   have bit 1 in the "BIT1" sense, which could be useful if the caller is
+   accumulating it into some extended calculation.
+
+   Duplicating the loop body to avoid the MP_LIMB_T_SWAP(a,b) would be
+   possible, but a couple of tests suggest it's not a significant speedup,
+   and may even be a slowdown, so what's here is good enough for now. */
+
+int
+mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)
+{
+  ASSERT (b & 1);  /* b odd */
+  ASSERT (b != 1);
+
+  if (a == 0)
+    return 0;
+
+  PROCESS_TWOS_ANY;
+  if (a == 1)
+    goto done;
+
+  if (a >= b)
+    goto a_gt_b;
+
+  for (;;)
+    {
+      result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b);
+      MP_LIMB_T_SWAP (a, b);
+
+    a_gt_b:
+      do
+	{
+	  /* working on (a/b), a,b odd, a>=b */
+	  ASSERT (a & 1);
+	  ASSERT (b & 1);
+	  ASSERT (a >= b);
+
+	  if ((a -= b) == 0)
+	    return 0;
+
+	  PROCESS_TWOS_EVEN;
+	  if (a == 1)
+	    goto done;
+	}
+      while (a >= b);
+    }
+
+ done:
+  return JACOBI_BIT1_TO_PN (result_bit1);
+}
+#endif
+
+#if JACOBI_BASE_METHOD == 4
+/* Computes (a/b) for odd b > 1 and any a. The initial bit is taken as a
+ * parameter. We have no need for the convention that the sign is in
+ * bit 1, internally we use bit 0. */
+
+/* FIXME: Could try table-based count_trailing_zeros. */
+int
+mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int bit)
+{
+  int c;
+
+  ASSERT (b & 1);
+  ASSERT (b > 1);
+
+  if (a == 0)
+    /* This is the only line which depends on b > 1 */
+    return 0;
+
+  bit >>= 1;
+
+  /* Below, we represent a and b shifted right so that the least
+     significant one bit is implicit. */
+
+  b >>= 1;
+
+  count_trailing_zeros (c, a);
+  bit ^= c & (b ^ (b >> 1));
+
+  /* We may have c==GMP_LIMB_BITS-1, so we can't use a>>c+1. */
+  a >>= c;
+  a >>= 1;
+
+  do
+    {
+      mp_limb_t t = a - b;
+      mp_limb_t bgta = LIMB_HIGHBIT_TO_MASK (t);
+
+      if (t == 0)
+	return 0;
+
+      /* If b > a, invoke reciprocity */
+      bit ^= (bgta & a & b);
+
+      /* b <-- min (a, b) */
+      b += (bgta & t);
+
+      /* a <-- |a - b| */
+      a = (t ^ bgta) - bgta;
+
+      /* Number of trailing zeros is the same no matter if we look at
+       * t or a, but using t gives more parallelism. */
+      count_trailing_zeros (c, t);
+      c ++;
+      /* (2/b) = -1 if b = 3 or 5 mod 8 */
+      bit ^= c & (b ^ (b >> 1));
+      a >>= c;
+    }
+  while (b > 0);
+
+  return 1-2*(bit & 1);
+}
+#endif /* JACOBI_BASE_METHOD == 4 */

diff --git a/third_party/gmp/mpn/generic/jacobi.c b/third_party/gmp/mpn/generic/jacobi.c
new file mode 100644
index 0000000..d98b126
--- /dev/null
+++ b/third_party/gmp/mpn/generic/jacobi.c

@@ -0,0 +1,294 @@
+/* jacobi.c
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000-2004, 2008, 2010, 2011 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef JACOBI_DC_THRESHOLD
+#define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD
+#endif
+
+/* Schönhage's rules:
+ *
+ * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3
+ *
+ * If r1 is odd, then
+ *
+ *   (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1)
+ *
+ * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)].
+ *
+ * If r1 is even, r2 must be odd. We have
+ *
+ *   (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0)
+ *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1)
+ *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1)
+ *
+ * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating
+ * q1 times gives
+ *
+ *   (r1 | r0) = (r1 | r2) = (r3 | r2)
+ *
+ * On the other hand, if r1 = 2 (mod 4), the sign factor is
+ * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent
+ *
+ *   (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2
+ *   = q1 (r0-1)/2 + q1 (q1-1)/2
+ *
+ * and we can summarize the even case as
+ *
+ *   (r1 | r0) = t(r1, r0, q1) (r3 | r2)
+ *
+ * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)}
+ *
+ * What about termination? The remainder sequence ends with (0|1) = 1
+ * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is
+ * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and
+ * hence non-zero. We may have r3 = r1 - q2 r2 = 0.
+ *
+ * Examples: (11|15) = - (15|11) = - (4|11)
+ *            (4|11) =    (4| 3) =   (1| 3)
+ *            (1| 3) = (3|1) = (0|1) = 1
+ *
+ *             (2|7) = (2|1) = (0|1) = 1
+ *
+ * Detail:     (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5)
+ *             (2|5) = (2-5|5) = (-1|5)(3|5) =  (5|3) =  (2|3)
+ *             (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1)
+ *
+ */
+
+/* In principle, the state consists of four variables: e (one bit), a,
+   b (two bits each), d (one bit). Collected factors are (-1)^e. a and
+   b are the least significant bits of the current remainders. d
+   (denominator) is 0 if we're currently subtracting multiplies of a
+   from b, and 1 if we're subtracting b from a.
+
+   e is stored in the least significant bit, while a, b and d are
+   coded as only 13 distinct values in bits 1-4, according to the
+   following table. For rows not mentioning d, the value is either
+   implied, or it doesn't matter. */
+
+#if WANT_ASSERT
+static const struct
+{
+  unsigned char a;
+  unsigned char b;
+} decode_table[13] = {
+  /*  0 */ { 0, 1 },
+  /*  1 */ { 0, 3 },
+  /*  2 */ { 1, 1 },
+  /*  3 */ { 1, 3 },
+  /*  4 */ { 2, 1 },
+  /*  5 */ { 2, 3 },
+  /*  6 */ { 3, 1 },
+  /*  7 */ { 3, 3 }, /* d = 1 */
+  /*  8 */ { 1, 0 },
+  /*  9 */ { 1, 2 },
+  /* 10 */ { 3, 0 },
+  /* 11 */ { 3, 2 },
+  /* 12 */ { 3, 3 }, /* d = 0 */
+};
+#define JACOBI_A(bits) (decode_table[(bits)>>1].a)
+#define JACOBI_B(bits) (decode_table[(bits)>>1].b)
+#endif /* WANT_ASSERT */
+
+const unsigned char jacobi_table[208] = {
+#include "jacobitab.h"
+};
+
+#define BITS_FAIL 31
+
+static void
+jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
+	     mp_srcptr qp, mp_size_t qn, int d)
+{
+  unsigned *bitsp = (unsigned *) p;
+
+  if (gp)
+    {
+      ASSERT (gn > 0);
+      if (gn != 1 || gp[0] != 1)
+	{
+	  *bitsp = BITS_FAIL;
+	  return;
+	}
+    }
+
+  if (qp)
+    {
+      ASSERT (qn > 0);
+      ASSERT (d >= 0);
+      *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3);
+    }
+}
+
+#define CHOOSE_P(n) (2*(n) / 3)
+
+int
+mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits)
+{
+  mp_size_t scratch;
+  mp_size_t matrix_scratch;
+  mp_ptr tp;
+
+  TMP_DECL;
+
+  ASSERT (n > 0);
+  ASSERT ( (ap[n-1] | bp[n-1]) > 0);
+  ASSERT ( (bp[0] | ap[0]) & 1);
+
+  /* FIXME: Check for small sizes first, before setting up temporary
+     storage etc. */
+  scratch = MPN_GCD_SUBDIV_STEP_ITCH(n);
+
+  if (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
+    {
+      mp_size_t hgcd_scratch;
+      mp_size_t update_scratch;
+      mp_size_t p = CHOOSE_P (n);
+      mp_size_t dc_scratch;
+
+      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+      hgcd_scratch = mpn_hgcd_itch (n - p);
+      update_scratch = p + n - 1;
+
+      dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
+      if (dc_scratch > scratch)
+	scratch = dc_scratch;
+    }
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS(scratch);
+
+  while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
+    {
+      struct hgcd_matrix M;
+      mp_size_t p = 2*n/3;
+      mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+      mp_size_t nn;
+      mpn_hgcd_matrix_init (&M, n - p, tp);
+
+      nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits,
+			    tp + matrix_scratch);
+      if (nn > 0)
+	{
+	  ASSERT (M.n <= (n - p - 1)/2);
+	  ASSERT (M.n + p <= (p + n - 1) / 2);
+	  /* Temporary storage 2 (p + M->n) <= p + n - 1. */
+	  n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
+	}
+      else
+	{
+	  /* Temporary storage n */
+	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp);
+	  if (!n)
+	    {
+	      TMP_FREE;
+	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
+	    }
+	}
+    }
+
+  while (n > 2)
+    {
+      struct hgcd_matrix1 M;
+      mp_limb_t ah, al, bh, bl;
+      mp_limb_t mask;
+
+      mask = ap[n-1] | bp[n-1];
+      ASSERT (mask > 0);
+
+      if (mask & GMP_NUMB_HIGHBIT)
+	{
+	  ah = ap[n-1]; al = ap[n-2];
+	  bh = bp[n-1]; bl = bp[n-2];
+	}
+      else
+	{
+	  int shift;
+
+	  count_leading_zeros (shift, mask);
+	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+	}
+
+      /* Try an mpn_nhgcd2 step */
+      if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits))
+	{
+	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
+	  MP_PTR_SWAP (ap, tp);
+	}
+      else
+	{
+	  /* mpn_hgcd2 has failed. Then either one of a or b is very
+	     small, or the difference is very small. Perform one
+	     subtraction followed by one division. */
+	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp);
+	  if (!n)
+	    {
+	      TMP_FREE;
+	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
+	    }
+	}
+    }
+
+  if (bits >= 16)
+    MP_PTR_SWAP (ap, bp);
+
+  ASSERT (bp[0] & 1);
+
+  if (n == 1)
+    {
+      mp_limb_t al, bl;
+      al = ap[0];
+      bl = bp[0];
+
+      TMP_FREE;
+      if (bl == 1)
+	return 1 - 2*(bits & 1);
+      else
+	return mpn_jacobi_base (al, bl, bits << 1);
+    }
+
+  else
+    {
+      int res = mpn_jacobi_2 (ap, bp, bits & 1);
+      TMP_FREE;
+      return res;
+    }
+}

diff --git a/third_party/gmp/mpn/generic/jacobi_2.c b/third_party/gmp/mpn/generic/jacobi_2.c
new file mode 100644
index 0000000..028b8a4
--- /dev/null
+++ b/third_party/gmp/mpn/generic/jacobi_2.c

@@ -0,0 +1,351 @@
+/* jacobi_2.c
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000-2004, 2008, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef JACOBI_2_METHOD
+#define JACOBI_2_METHOD 2
+#endif
+
+/* Computes (a / b) where b is odd, and a and b are otherwise arbitrary
+   two-limb numbers. */
+#if JACOBI_2_METHOD == 1
+int
+mpn_jacobi_2 (mp_srcptr ap, mp_srcptr bp, unsigned bit)
+{
+  mp_limb_t ah, al, bh, bl;
+  int c;
+
+  al = ap[0];
+  ah = ap[1];
+  bl = bp[0];
+  bh = bp[1];
+
+  ASSERT (bl & 1);
+
+  bl = ((bh << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK) | (bl >> 1);
+  bh >>= 1;
+
+  if ( (bh | bl) == 0)
+    return 1 - 2*(bit & 1);
+
+  if ( (ah | al) == 0)
+    return 0;
+
+  if (al == 0)
+    {
+      al = ah;
+      ah = 0;
+      bit ^= GMP_NUMB_BITS & (bl ^ (bl >> 1));
+    }
+  count_trailing_zeros (c, al);
+  bit ^= c & (bl ^ (bl >> 1));
+
+  c++;
+  if (UNLIKELY (c == GMP_NUMB_BITS))
+    {
+      al = ah;
+      ah = 0;
+    }
+  else
+    {
+      al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+      ah >>= c;
+    }
+  while ( (ah | bh) > 0)
+    {
+      mp_limb_t th, tl;
+      mp_limb_t bgta;
+
+      sub_ddmmss (th, tl, ah, al, bh, bl);
+      if ( (tl | th) == 0)
+	return 0;
+
+      bgta = LIMB_HIGHBIT_TO_MASK (th);
+
+      /* If b > a, invoke reciprocity */
+      bit ^= (bgta & al & bl);
+
+      /* b <-- min (a, b) */
+      add_ssaaaa (bh, bl, bh, bl, th & bgta, tl & bgta);
+
+      if ( (bh | bl) == 0)
+	return 1 - 2*(bit & 1);
+
+      /* a <-- |a - b| */
+      al = (bgta ^ tl) - bgta;
+      ah = (bgta ^ th);
+
+      if (UNLIKELY (al == 0))
+	{
+	  /* If b > a, al == 0 implies that we have a carry to
+	     propagate. */
+	  al = ah - bgta;
+	  ah = 0;
+	  bit ^= GMP_NUMB_BITS & (bl ^ (bl >> 1));
+	}
+      count_trailing_zeros (c, al);
+      c++;
+      bit ^= c & (bl ^ (bl >> 1));
+
+      if (UNLIKELY (c == GMP_NUMB_BITS))
+	{
+	  al = ah;
+	  ah = 0;
+	}
+      else
+	{
+	  al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+	  ah >>= c;
+	}
+    }
+
+  ASSERT (bl > 0);
+
+  while ( (al | bl) & GMP_LIMB_HIGHBIT)
+    {
+      /* Need an extra comparison to get the mask. */
+      mp_limb_t t = al - bl;
+      mp_limb_t bgta = - (bl > al);
+
+      if (t == 0)
+	return 0;
+
+      /* If b > a, invoke reciprocity */
+      bit ^= (bgta & al & bl);
+
+      /* b <-- min (a, b) */
+      bl += (bgta & t);
+
+      /* a <-- |a - b| */
+      al = (t ^ bgta) - bgta;
+
+      /* Number of trailing zeros is the same no matter if we look at
+       * t or a, but using t gives more parallelism. */
+      count_trailing_zeros (c, t);
+      c ++;
+      /* (2/b) = -1 if b = 3 or 5 mod 8 */
+      bit ^= c & (bl ^ (bl >> 1));
+
+      if (UNLIKELY (c == GMP_NUMB_BITS))
+	return 1 - 2*(bit & 1);
+
+      al >>= c;
+    }
+
+  /* Here we have a little impedance mismatch. Better to inline it? */
+  return mpn_jacobi_base (2*al+1, 2*bl+1, bit << 1);
+}
+#elif JACOBI_2_METHOD == 2
+int
+mpn_jacobi_2 (mp_srcptr ap, mp_srcptr bp, unsigned bit)
+{
+  mp_limb_t ah, al, bh, bl;
+  int c;
+
+  al = ap[0];
+  ah = ap[1];
+  bl = bp[0];
+  bh = bp[1];
+
+  ASSERT (bl & 1);
+
+  /* Use bit 1. */
+  bit <<= 1;
+
+  if (bh == 0 && bl == 1)
+    /* (a|1) = 1 */
+    return 1 - (bit & 2);
+
+  if (al == 0)
+    {
+      if (ah == 0)
+	/* (0|b) = 0, b > 1 */
+	return 0;
+
+      count_trailing_zeros (c, ah);
+      bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
+
+      al = bl;
+      bl = ah >> c;
+
+      if (bl == 1)
+	/* (1|b) = 1 */
+	return 1 - (bit & 2);
+
+      ah = bh;
+
+      bit ^= al & bl;
+
+      goto b_reduced;
+    }
+  if ( (al & 1) == 0)
+    {
+      count_trailing_zeros (c, al);
+
+      al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+      ah >>= c;
+      bit ^= (c << 1) & (bl ^ (bl >> 1));
+    }
+  if (ah == 0)
+    {
+      if (bh > 0)
+	{
+	  bit ^= al & bl;
+	  MP_LIMB_T_SWAP (al, bl);
+	  ah = bh;
+	  goto b_reduced;
+	}
+      goto ab_reduced;
+    }
+
+  while (bh > 0)
+    {
+      /* Compute (a|b) */
+      while (ah > bh)
+	{
+	  sub_ddmmss (ah, al, ah, al, bh, bl);
+	  if (al == 0)
+	    {
+	      count_trailing_zeros (c, ah);
+	      bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
+
+	      al = bl;
+	      bl = ah >> c;
+	      ah = bh;
+
+	      bit ^= al & bl;
+	      goto b_reduced;
+	    }
+	  count_trailing_zeros (c, al);
+	  bit ^= (c << 1) & (bl ^ (bl >> 1));
+	  al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+	  ah >>= c;
+	}
+      if (ah == bh)
+	goto cancel_hi;
+
+      if (ah == 0)
+	{
+	  bit ^= al & bl;
+	  MP_LIMB_T_SWAP (al, bl);
+	  ah = bh;
+	  break;
+	}
+
+      bit ^= al & bl;
+
+      /* Compute (b|a) */
+      while (bh > ah)
+	{
+	  sub_ddmmss (bh, bl, bh, bl, ah, al);
+	  if (bl == 0)
+	    {
+	      count_trailing_zeros (c, bh);
+	      bit ^= ((GMP_NUMB_BITS + c) << 1) & (al ^ (al >> 1));
+
+	      bl = bh >> c;
+	      bit ^= al & bl;
+	      goto b_reduced;
+	    }
+	  count_trailing_zeros (c, bl);
+	  bit ^= (c << 1) & (al ^ (al >> 1));
+	  bl = ((bh << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (bl >> c);
+	  bh >>= c;
+	}
+      bit ^= al & bl;
+
+      /* Compute (a|b) */
+      if (ah == bh)
+	{
+	cancel_hi:
+	  if (al < bl)
+	    {
+	      MP_LIMB_T_SWAP (al, bl);
+	      bit ^= al & bl;
+	    }
+	  al -= bl;
+	  if (al == 0)
+	    return 0;
+
+	  count_trailing_zeros (c, al);
+	  bit ^= (c << 1) & (bl ^ (bl >> 1));
+	  al >>= c;
+
+	  if (al == 1)
+	    return 1 - (bit & 2);
+
+	  MP_LIMB_T_SWAP (al, bl);
+	  bit ^= al & bl;
+	  break;
+	}
+    }
+
+ b_reduced:
+  /* Compute (a|b), with b a single limb. */
+  ASSERT (bl & 1);
+
+  if (bl == 1)
+    /* (a|1) = 1 */
+    return 1 - (bit & 2);
+
+  while (ah > 0)
+    {
+      ah -= (al < bl);
+      al -= bl;
+      if (al == 0)
+	{
+	  if (ah == 0)
+	    return 0;
+	  count_trailing_zeros (c, ah);
+	  bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
+	  al = ah >> c;
+	  goto ab_reduced;
+	}
+      count_trailing_zeros (c, al);
+
+      al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+      ah >>= c;
+      bit ^= (c << 1) & (bl ^ (bl >> 1));
+    }
+ ab_reduced:
+  ASSERT (bl & 1);
+  ASSERT (bl > 1);
+
+  return mpn_jacobi_base (al, bl, bit);
+}
+#else
+#error Unsupported value for JACOBI_2_METHOD
+#endif

diff --git a/third_party/gmp/mpn/generic/logops_n.c b/third_party/gmp/mpn/generic/logops_n.c
new file mode 100644
index 0000000..3adba2c
--- /dev/null
+++ b/third_party/gmp/mpn/generic/logops_n.c

@@ -0,0 +1,77 @@
+/* mpn_and_n, mpn_ior_n, etc -- mpn logical operations.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#ifdef OPERATION_and_n
+#define func __MPN(and_n)
+#define call mpn_and_n
+#endif
+
+#ifdef OPERATION_andn_n
+#define func __MPN(andn_n)
+#define call mpn_andn_n
+#endif
+
+#ifdef OPERATION_nand_n
+#define func __MPN(nand_n)
+#define call mpn_nand_n
+#endif
+
+#ifdef OPERATION_ior_n
+#define func __MPN(ior_n)
+#define call mpn_ior_n
+#endif
+
+#ifdef OPERATION_iorn_n
+#define func __MPN(iorn_n)
+#define call mpn_iorn_n
+#endif
+
+#ifdef OPERATION_nior_n
+#define func __MPN(nior_n)
+#define call mpn_nior_n
+#endif
+
+#ifdef OPERATION_xor_n
+#define func __MPN(xor_n)
+#define call mpn_xor_n
+#endif
+
+#ifdef OPERATION_xnor_n
+#define func __MPN(xnor_n)
+#define call mpn_xnor_n
+#endif
+
+void
+func (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  call (rp, up, vp, n);
+}

diff --git a/third_party/gmp/mpn/generic/lshift.c b/third_party/gmp/mpn/generic/lshift.c
new file mode 100644
index 0000000..7e1fdef
--- /dev/null
+++ b/third_party/gmp/mpn/generic/lshift.c

@@ -0,0 +1,72 @@
+/* mpn_lshift -- Shift left low level.
+
+Copyright 1991, 1993, 1994, 1996, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by up and n limbs long) cnt bits to the left
+   and store the n least significant limbs of the result at rp.
+   Return the bits shifted out from the most significant limb.
+
+   Argument constraints:
+   1. 0 < cnt < GMP_NUMB_BITS.
+   2. If the result is to be written over the input, rp must be >= up.
+*/
+
+mp_limb_t
+mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  mp_limb_t high_limb, low_limb;
+  unsigned int tnc;
+  mp_size_t i;
+  mp_limb_t retval;
+
+  ASSERT (n >= 1);
+  ASSERT (cnt >= 1);
+  ASSERT (cnt < GMP_NUMB_BITS);
+  ASSERT (MPN_SAME_OR_DECR_P (rp, up, n));
+
+  up += n;
+  rp += n;
+
+  tnc = GMP_NUMB_BITS - cnt;
+  low_limb = *--up;
+  retval = low_limb >> tnc;
+  high_limb = (low_limb << cnt) & GMP_NUMB_MASK;
+
+  for (i = n - 1; i != 0; i--)
+    {
+      low_limb = *--up;
+      *--rp = high_limb | (low_limb >> tnc);
+      high_limb = (low_limb << cnt) & GMP_NUMB_MASK;
+    }
+  *--rp = high_limb;
+
+  return retval;
+}

diff --git a/third_party/gmp/mpn/generic/lshiftc.c b/third_party/gmp/mpn/generic/lshiftc.c
new file mode 100644
index 0000000..a583602
--- /dev/null
+++ b/third_party/gmp/mpn/generic/lshiftc.c

@@ -0,0 +1,73 @@
+/* mpn_lshiftc -- Shift left low level with complement.
+
+Copyright 1991, 1993, 1994, 1996, 2000-2002, 2009 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by up and n limbs long) cnt bits to the left
+   and store the n least significant limbs of the result at rp.
+   Return the bits shifted out from the most significant limb.
+
+   Argument constraints:
+   1. 0 < cnt < GMP_NUMB_BITS.
+   2. If the result is to be written over the input, rp must be >= up.
+*/
+
+mp_limb_t
+mpn_lshiftc (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  mp_limb_t high_limb, low_limb;
+  unsigned int tnc;
+  mp_size_t i;
+  mp_limb_t retval;
+
+  ASSERT (n >= 1);
+  ASSERT (cnt >= 1);
+  ASSERT (cnt < GMP_NUMB_BITS);
+  ASSERT (MPN_SAME_OR_DECR_P (rp, up, n));
+
+  up += n;
+  rp += n;
+
+  tnc = GMP_NUMB_BITS - cnt;
+  low_limb = *--up;
+  retval = low_limb >> tnc;
+  high_limb = (low_limb << cnt);
+
+  for (i = n - 1; i != 0; i--)
+    {
+      low_limb = *--up;
+      *--rp = (~(high_limb | (low_limb >> tnc))) & GMP_NUMB_MASK;
+      high_limb = low_limb << cnt;
+    }
+  *--rp = (~high_limb) & GMP_NUMB_MASK;
+
+  return retval;
+}

diff --git a/third_party/gmp/mpn/generic/matrix22_mul.c b/third_party/gmp/mpn/generic/matrix22_mul.c
new file mode 100644
index 0000000..6a1299a
--- /dev/null
+++ b/third_party/gmp/mpn/generic/matrix22_mul.c

@@ -0,0 +1,321 @@
+/* matrix22_mul.c.
+
+   Contributed by Niels Möller and Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003-2005, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define MUL(rp, ap, an, bp, bn) do {		\
+  if (an >= bn)					\
+    mpn_mul (rp, ap, an, bp, bn);		\
+  else						\
+    mpn_mul (rp, bp, bn, ap, an);		\
+} while (0)
+
+/* Inputs are unsigned. */
+static int
+abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  int c;
+  MPN_CMP (c, ap, bp, n);
+  if (c >= 0)
+    {
+      mpn_sub_n (rp, ap, bp, n);
+      return 0;
+    }
+  else
+    {
+      mpn_sub_n (rp, bp, ap, n);
+      return 1;
+    }
+}
+
+static int
+add_signed_n (mp_ptr rp,
+	      mp_srcptr ap, int as, mp_srcptr bp, int bs, mp_size_t n)
+{
+  if (as != bs)
+    return as ^ abs_sub_n (rp, ap, bp, n);
+  else
+    {
+      ASSERT_NOCARRY (mpn_add_n (rp, ap, bp, n));
+      return as;
+    }
+}
+
+mp_size_t
+mpn_matrix22_mul_itch (mp_size_t rn, mp_size_t mn)
+{
+  if (BELOW_THRESHOLD (rn, MATRIX22_STRASSEN_THRESHOLD)
+      || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD))
+    return 3*rn + 2*mn;
+  else
+    return 3*(rn + mn) + 5;
+}
+
+/* Algorithm:
+
+    / s0 \   /  1  0  0  0 \ / r0 \
+    | s1 |   |  0  1  0  1 | | r1 |
+    | s2 |   |  0  0 -1  1 | | r2 |
+    | s3 | = |  0  1 -1  1 | \ r3 /
+    | s4 |   | -1  1 -1  1 |
+    | s5 |   |  0  1  0  0 |
+    \ s6 /   \  0  0  1  0 /
+
+    / t0 \   /  1  0  0  0 \ / m0 \
+    | t1 |   |  0  1  0  1 | | m1 |
+    | t2 |   |  0  0 -1  1 | | m2 |
+    | t3 | = |  0  1 -1  1 | \ m3 /
+    | t4 |   | -1  1 -1  1 |
+    | t5 |   |  0  1  0  0 |
+    \ t6 /   \  0  0  1  0 /
+
+  Note: the two matrices above are the same, but s_i and t_i are used
+  in the same product, only for i<4, see "A Strassen-like Matrix
+  Multiplication suited for squaring and higher power computation" by
+  M. Bodrato, in Proceedings of ISSAC 2010.
+
+    / r0 \   / 1 0  0  0  0  1  0 \ / s0*t0 \
+    | r1 | = | 0 0 -1  1 -1  1  0 | | s1*t1 |
+    | r2 |   | 0 1  0 -1  0 -1 -1 | | s2*t2 |
+    \ r3 /   \ 0 1  1 -1  0 -1  0 / | s3*t3 |
+				    | s4*t5 |
+				    | s5*t6 |
+				    \ s6*t4 /
+
+  The scheduling uses two temporaries U0 and U1 to store products, and
+  two, S0 and T0, to store combinations of entries of the two
+  operands.
+*/
+
+/* Computes R = R * M. Elements are numbers R = (r0, r1; r2, r3).
+ *
+ * Resulting elements are of size up to rn + mn + 1.
+ *
+ * Temporary storage: 3 rn + 3 mn + 5. */
+static void
+mpn_matrix22_mul_strassen (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,
+			   mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,
+			   mp_ptr tp)
+{
+  mp_ptr s0, t0, u0, u1;
+  int r1s, r3s, s0s, t0s, u1s;
+  s0 = tp; tp += rn + 1;
+  t0 = tp; tp += mn + 1;
+  u0 = tp; tp += rn + mn + 1;
+  u1 = tp; /* rn + mn + 2 */
+
+  MUL (u0, r1, rn, m2, mn);		/* u5 = s5 * t6 */
+  r3s = abs_sub_n (r3, r3, r2, rn);	/* r3 - r2 */
+  if (r3s)
+    {
+      r1s = abs_sub_n (r1, r1, r3, rn);
+      r1[rn] = 0;
+    }
+  else
+    {
+      r1[rn] = mpn_add_n (r1, r1, r3, rn);
+      r1s = 0;				/* r1 - r2 + r3  */
+    }
+  if (r1s)
+    {
+      s0[rn] = mpn_add_n (s0, r1, r0, rn);
+      s0s = 0;
+    }
+  else if (r1[rn] != 0)
+    {
+      s0[rn] = r1[rn] - mpn_sub_n (s0, r1, r0, rn);
+      s0s = 1;				/* s4 = -r0 + r1 - r2 + r3 */
+					/* Reverse sign! */
+    }
+  else
+    {
+      s0s = abs_sub_n (s0, r0, r1, rn);
+      s0[rn] = 0;
+    }
+  MUL (u1, r0, rn, m0, mn);		/* u0 = s0 * t0 */
+  r0[rn+mn] = mpn_add_n (r0, u0, u1, rn + mn);
+  ASSERT (r0[rn+mn] < 2);		/* u0 + u5 */
+
+  t0s = abs_sub_n (t0, m3, m2, mn);
+  u1s = r3s^t0s^1;			/* Reverse sign! */
+  MUL (u1, r3, rn, t0, mn);		/* u2 = s2 * t2 */
+  u1[rn+mn] = 0;
+  if (t0s)
+    {
+      t0s = abs_sub_n (t0, m1, t0, mn);
+      t0[mn] = 0;
+    }
+  else
+    {
+      t0[mn] = mpn_add_n (t0, t0, m1, mn);
+    }
+
+  /* FIXME: Could be simplified if we had space for rn + mn + 2 limbs
+     at r3. I'd expect that for matrices of random size, the high
+     words t0[mn] and r1[rn] are non-zero with a pretty small
+     probability. If that can be confirmed this should be done as an
+     unconditional rn x (mn+1) followed by an if (UNLIKELY (r1[rn]))
+     add_n. */
+  if (t0[mn] != 0)
+    {
+      MUL (r3, r1, rn, t0, mn + 1);	/* u3 = s3 * t3 */
+      ASSERT (r1[rn] < 2);
+      if (r1[rn] != 0)
+	mpn_add_n (r3 + rn, r3 + rn, t0, mn + 1);
+    }
+  else
+    {
+      MUL (r3, r1, rn + 1, t0, mn);
+    }
+
+  ASSERT (r3[rn+mn] < 4);
+
+  u0[rn+mn] = 0;
+  if (r1s^t0s)
+    {
+      r3s = abs_sub_n (r3, u0, r3, rn + mn + 1);
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_add_n (r3, r3, u0, rn + mn + 1));
+      r3s = 0;				/* u3 + u5 */
+    }
+
+  if (t0s)
+    {
+      t0[mn] = mpn_add_n (t0, t0, m0, mn);
+    }
+  else if (t0[mn] != 0)
+    {
+      t0[mn] -= mpn_sub_n (t0, t0, m0, mn);
+    }
+  else
+    {
+      t0s = abs_sub_n (t0, t0, m0, mn);
+    }
+  MUL (u0, r2, rn, t0, mn + 1);		/* u6 = s6 * t4 */
+  ASSERT (u0[rn+mn] < 2);
+  if (r1s)
+    {
+      ASSERT_NOCARRY (mpn_sub_n (r1, r2, r1, rn));
+    }
+  else
+    {
+      r1[rn] += mpn_add_n (r1, r1, r2, rn);
+    }
+  rn++;
+  t0s = add_signed_n (r2, r3, r3s, u0, t0s, rn + mn);
+					/* u3 + u5 + u6 */
+  ASSERT (r2[rn+mn-1] < 4);
+  r3s = add_signed_n (r3, r3, r3s, u1, u1s, rn + mn);
+					/* -u2 + u3 + u5  */
+  ASSERT (r3[rn+mn-1] < 3);
+  MUL (u0, s0, rn, m1, mn);		/* u4 = s4 * t5 */
+  ASSERT (u0[rn+mn-1] < 2);
+  t0[mn] = mpn_add_n (t0, m3, m1, mn);
+  MUL (u1, r1, rn, t0, mn + 1);		/* u1 = s1 * t1 */
+  mn += rn;
+  ASSERT (u1[mn-1] < 4);
+  ASSERT (u1[mn] == 0);
+  ASSERT_NOCARRY (add_signed_n (r1, r3, r3s, u0, s0s, mn));
+					/* -u2 + u3 - u4 + u5  */
+  ASSERT (r1[mn-1] < 2);
+  if (r3s)
+    {
+      ASSERT_NOCARRY (mpn_add_n (r3, u1, r3, mn));
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_sub_n (r3, u1, r3, mn));
+					/* u1 + u2 - u3 - u5  */
+    }
+  ASSERT (r3[mn-1] < 2);
+  if (t0s)
+    {
+      ASSERT_NOCARRY (mpn_add_n (r2, u1, r2, mn));
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_sub_n (r2, u1, r2, mn));
+					/* u1 - u3 - u5 - u6  */
+    }
+  ASSERT (r2[mn-1] < 2);
+}
+
+void
+mpn_matrix22_mul (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,
+		  mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,
+		  mp_ptr tp)
+{
+  if (BELOW_THRESHOLD (rn, MATRIX22_STRASSEN_THRESHOLD)
+      || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD))
+    {
+      mp_ptr p0, p1;
+      unsigned i;
+
+      /* Temporary storage: 3 rn + 2 mn */
+      p0 = tp + rn;
+      p1 = p0 + rn + mn;
+
+      for (i = 0; i < 2; i++)
+	{
+	  MPN_COPY (tp, r0, rn);
+
+	  if (rn >= mn)
+	    {
+	      mpn_mul (p0, r0, rn, m0, mn);
+	      mpn_mul (p1, r1, rn, m3, mn);
+	      mpn_mul (r0, r1, rn, m2, mn);
+	      mpn_mul (r1, tp, rn, m1, mn);
+	    }
+	  else
+	    {
+	      mpn_mul (p0, m0, mn, r0, rn);
+	      mpn_mul (p1, m3, mn, r1, rn);
+	      mpn_mul (r0, m2, mn, r1, rn);
+	      mpn_mul (r1, m1, mn, tp, rn);
+	    }
+	  r0[rn+mn] = mpn_add_n (r0, r0, p0, rn + mn);
+	  r1[rn+mn] = mpn_add_n (r1, r1, p1, rn + mn);
+
+	  r0 = r2; r1 = r3;
+	}
+    }
+  else
+    mpn_matrix22_mul_strassen (r0, r1, r2, r3, rn,
+			       m0, m1, m2, m3, mn, tp);
+}

diff --git a/third_party/gmp/mpn/generic/matrix22_mul1_inverse_vector.c b/third_party/gmp/mpn/generic/matrix22_mul1_inverse_vector.c
new file mode 100644
index 0000000..68d50b7
--- /dev/null
+++ b/third_party/gmp/mpn/generic/matrix22_mul1_inverse_vector.c

@@ -0,0 +1,64 @@
+/* matrix22_mul1_inverse_vector.c
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Sets (r;b) = M^{-1}(a;b), with M^{-1} = (u11, -u01; -u10, u00) from
+   the left. Uses three buffers, to avoid a copy. */
+mp_size_t
+mpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *M,
+				  mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
+{
+  mp_limb_t h0, h1;
+
+  /* Compute (r;b) <-- (u11 a - u01 b; -u10 a + u00 b) as
+
+     r  = u11 * a
+     r -= u01 * b
+     b *= u00
+     b -= u10 * a
+  */
+
+  h0 =    mpn_mul_1 (rp, ap, n, M->u[1][1]);
+  h1 = mpn_submul_1 (rp, bp, n, M->u[0][1]);
+  ASSERT (h0 == h1);
+
+  h0 =    mpn_mul_1 (bp, bp, n, M->u[0][0]);
+  h1 = mpn_submul_1 (bp, ap, n, M->u[1][0]);
+  ASSERT (h0 == h1);
+
+  n -= (rp[n-1] | bp[n-1]) == 0;
+  return n;
+}

diff --git a/third_party/gmp/mpn/generic/mod_1.c b/third_party/gmp/mpn/generic/mod_1.c
new file mode 100644
index 0000000..8e415df
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mod_1.c

@@ -0,0 +1,280 @@
+/* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) --
+   Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+   Return the single-limb remainder.
+   There are no constraints on the value of the divisor.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2002, 2007-2009, 2012 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
+   meaning the quotient size where that should happen, the quotient size
+   being how many udiv divisions will be done.
+
+   The default is to use preinv always, CPUs where this doesn't suit have
+   tuned thresholds.  Note in particular that preinv should certainly be
+   used if that's the only division available (USE_PREINV_ALWAYS).  */
+
+#ifndef MOD_1_NORM_THRESHOLD
+#define MOD_1_NORM_THRESHOLD  0
+#endif
+
+#ifndef MOD_1_UNNORM_THRESHOLD
+#define MOD_1_UNNORM_THRESHOLD  0
+#endif
+
+#ifndef MOD_1U_TO_MOD_1_1_THRESHOLD
+#define MOD_1U_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
+#endif
+
+#ifndef MOD_1N_TO_MOD_1_1_THRESHOLD
+#define MOD_1N_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
+#endif
+
+#ifndef MOD_1_1_TO_MOD_1_2_THRESHOLD
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD  10
+#endif
+
+#ifndef MOD_1_2_TO_MOD_1_4_THRESHOLD
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD  20
+#endif
+
+#if TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p
+/* Duplicates declarations in tune/speed.h */
+mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
+mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
+
+void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t);
+void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t);
+
+#undef mpn_mod_1_1p
+#define mpn_mod_1_1p(ap, n, b, pre)			     \
+  (mod_1_1p_method == 1 ? mpn_mod_1_1p_1 (ap, n, b, pre)     \
+   : (mod_1_1p_method == 2 ? mpn_mod_1_1p_2 (ap, n, b, pre)  \
+      : __gmpn_mod_1_1p (ap, n, b, pre)))
+
+#undef mpn_mod_1_1p_cps
+#define mpn_mod_1_1p_cps(pre, b)				\
+  (mod_1_1p_method == 1 ? mpn_mod_1_1p_cps_1 (pre, b)		\
+   : (mod_1_1p_method == 2 ? mpn_mod_1_1p_cps_2 (pre, b)	\
+      : __gmpn_mod_1_1p_cps (pre, b)))
+#endif /* TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p */
+
+
+/* The comments in mpn/generic/divrem_1.c apply here too.
+
+   As noted in the algorithms section of the manual, the shifts in the loop
+   for the unnorm case can be avoided by calculating r = a%(d*2^n), followed
+   by a final (r*2^n)%(d*2^n).  In fact if it happens that a%(d*2^n) can
+   skip a division where (a*2^n)%(d*2^n) can't then there's the same number
+   of divide steps, though how often that happens depends on the assumed
+   distributions of dividend and divisor.  In any case this idea is left to
+   CPU specific implementations to consider.  */
+
+static mp_limb_t
+mpn_mod_1_unnorm (mp_srcptr up, mp_size_t un, mp_limb_t d)
+{
+  mp_size_t  i;
+  mp_limb_t  n1, n0, r;
+  mp_limb_t  dummy;
+  int cnt;
+
+  ASSERT (un > 0);
+  ASSERT (d != 0);
+
+  d <<= GMP_NAIL_BITS;
+
+  /* Skip a division if high < divisor.  Having the test here before
+     normalizing will still skip as often as possible.  */
+  r = up[un - 1] << GMP_NAIL_BITS;
+  if (r < d)
+    {
+      r >>= GMP_NAIL_BITS;
+      un--;
+      if (un == 0)
+	return r;
+    }
+  else
+    r = 0;
+
+  /* If udiv_qrnnd doesn't need a normalized divisor, can use the simple
+     code above. */
+  if (! UDIV_NEEDS_NORMALIZATION
+      && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
+    {
+      for (i = un - 1; i >= 0; i--)
+	{
+	  n0 = up[i] << GMP_NAIL_BITS;
+	  udiv_qrnnd (dummy, r, r, n0, d);
+	  r >>= GMP_NAIL_BITS;
+	}
+      return r;
+    }
+
+  count_leading_zeros (cnt, d);
+  d <<= cnt;
+
+  n1 = up[un - 1] << GMP_NAIL_BITS;
+  r = (r << cnt) | (n1 >> (GMP_LIMB_BITS - cnt));
+
+  if (UDIV_NEEDS_NORMALIZATION
+      && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
+    {
+      mp_limb_t nshift;
+      for (i = un - 2; i >= 0; i--)
+	{
+	  n0 = up[i] << GMP_NAIL_BITS;
+	  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+	  udiv_qrnnd (dummy, r, r, nshift, d);
+	  r >>= GMP_NAIL_BITS;
+	  n1 = n0;
+	}
+      udiv_qrnnd (dummy, r, r, n1 << cnt, d);
+      r >>= GMP_NAIL_BITS;
+      return r >> cnt;
+    }
+  else
+    {
+      mp_limb_t inv, nshift;
+      invert_limb (inv, d);
+
+      for (i = un - 2; i >= 0; i--)
+	{
+	  n0 = up[i] << GMP_NAIL_BITS;
+	  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+	  udiv_rnnd_preinv (r, r, nshift, d, inv);
+	  r >>= GMP_NAIL_BITS;
+	  n1 = n0;
+	}
+      udiv_rnnd_preinv (r, r, n1 << cnt, d, inv);
+      r >>= GMP_NAIL_BITS;
+      return r >> cnt;
+    }
+}
+
+static mp_limb_t
+mpn_mod_1_norm (mp_srcptr up, mp_size_t un, mp_limb_t d)
+{
+  mp_size_t  i;
+  mp_limb_t  n0, r;
+  mp_limb_t  dummy;
+
+  ASSERT (un > 0);
+
+  d <<= GMP_NAIL_BITS;
+
+  ASSERT (d & GMP_LIMB_HIGHBIT);
+
+  /* High limb is initial remainder, possibly with one subtract of
+     d to get r<d.  */
+  r = up[un - 1] << GMP_NAIL_BITS;
+  if (r >= d)
+    r -= d;
+  r >>= GMP_NAIL_BITS;
+  un--;
+  if (un == 0)
+    return r;
+
+  if (BELOW_THRESHOLD (un, MOD_1_NORM_THRESHOLD))
+    {
+      for (i = un - 1; i >= 0; i--)
+	{
+	  n0 = up[i] << GMP_NAIL_BITS;
+	  udiv_qrnnd (dummy, r, r, n0, d);
+	  r >>= GMP_NAIL_BITS;
+	}
+      return r;
+    }
+  else
+    {
+      mp_limb_t  inv;
+      invert_limb (inv, d);
+      for (i = un - 1; i >= 0; i--)
+	{
+	  n0 = up[i] << GMP_NAIL_BITS;
+	  udiv_rnnd_preinv (r, r, n0, d, inv);
+	  r >>= GMP_NAIL_BITS;
+	}
+      return r;
+    }
+}
+
+mp_limb_t
+mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+  ASSERT (n >= 0);
+  ASSERT (b != 0);
+
+  /* Should this be handled at all?  Rely on callers?  Note un==0 is currently
+     required by mpz/fdiv_r_ui.c and possibly other places.  */
+  if (n == 0)
+    return 0;
+
+  if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0))
+    {
+      if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD))
+	{
+	  return mpn_mod_1_norm (ap, n, b);
+	}
+      else
+	{
+	  mp_limb_t pre[4];
+	  mpn_mod_1_1p_cps (pre, b);
+	  return mpn_mod_1_1p (ap, n, b, pre);
+	}
+    }
+  else
+    {
+      if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD))
+	{
+	  return mpn_mod_1_unnorm (ap, n, b);
+	}
+      else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD))
+	{
+	  mp_limb_t pre[4];
+	  mpn_mod_1_1p_cps (pre, b);
+	  return mpn_mod_1_1p (ap, n, b << pre[1], pre);
+	}
+      else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4))
+	{
+	  mp_limb_t pre[5];
+	  mpn_mod_1s_2p_cps (pre, b);
+	  return mpn_mod_1s_2p (ap, n, b << pre[1], pre);
+	}
+      else
+	{
+	  mp_limb_t pre[7];
+	  mpn_mod_1s_4p_cps (pre, b);
+	  return mpn_mod_1s_4p (ap, n, b << pre[1], pre);
+	}
+    }
+}

diff --git a/third_party/gmp/mpn/generic/mod_1_1.c b/third_party/gmp/mpn/generic/mod_1_1.c
new file mode 100644
index 0000000..f6342d6
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mod_1_1.c

@@ -0,0 +1,332 @@
+/* mpn_mod_1_1p (ap, n, b, cps)
+   Divide (ap,,n) by b.  Return the single-limb remainder.
+
+   Contributed to the GNU project by Torbjorn Granlund and Niels Möller.
+   Based on a suggestion by Peter L. Montgomery.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008-2011, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef MOD_1_1P_METHOD
+# define MOD_1_1P_METHOD 1    /* need to make sure this is 2 for asm testing */
+#endif
+
+/* Define some longlong.h-style macros, but for wider operations.
+ * add_mssaaaa is like longlong.h's add_ssaaaa, but also generates
+ * carry out, in the form of a mask. */
+
+#if defined (__GNUC__) && ! defined (NO_ASM)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ (  "add	%6, %k2\n\t"					\
+	     "adc	%4, %k1\n\t"					\
+	     "sbb	%k0, %k0"					\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
+	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ (  "add	%6, %q2\n\t"					\
+	     "adc	%4, %q1\n\t"					\
+	     "sbb	%q0, %q0"					\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
+	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
+	     "addxcc	%r3, %4, %1\n\t"				\
+	     "subx	%%g0, %%g0, %0"					\
+	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
+	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
+	 __CLOBBER_CC)
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
+	     "addccc	%r7, %8, %%g0\n\t"				\
+	     "addccc	%r3, %4, %1\n\t"				\
+	     "clr	%0\n\t"						\
+	     "movcs	%%xcc, -1, %0"					\
+	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
+	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl),		\
+	     "rJ" ((al) >> 32), "rI" ((bl) >> 32)			\
+	 __CLOBBER_CC)
+#if __VIS__ >= 0x300
+#undef add_mssaaaa
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
+	     "addxccc	%r3, %4, %1\n\t"				\
+	     "clr	%0\n\t"						\
+	     "movcs	%%xcc, -1, %0"					\
+	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
+	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
+	 __CLOBBER_CC)
+#endif
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+   processor running in 32-bit mode, since the carry flag then gets the 32-bit
+   carry.  */
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ (  "add%I6c	%2, %5, %6\n\t"					\
+	     "adde	%1, %3, %4\n\t"					\
+	     "subfe	%0, %0, %0\n\t"					\
+	     "nor	%0, %0, %0"					\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0)			\
+	     __CLOBBER_CC)
+#endif
+
+#if defined (__s390x__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ (  "algr	%2, %6\n\t"					\
+	     "alcgr	%1, %4\n\t"					\
+	     "lghi	%0, 0\n\t"					\
+	     "alcgr	%0, %0\n\t"					\
+	     "lcgr	%0, %0"						\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "1"  ((UDItype)(a1)), "r" ((UDItype)(b1)),			\
+	     "%2" ((UDItype)(a0)), "r" ((UDItype)(b0)) __CLOBBER_CC)
+#endif
+
+#if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (  "adds	%2, %5, %6\n\t"					\
+	     "adcs	%1, %3, %4\n\t"					\
+	     "movcc	%0, #0\n\t"					\
+	     "movcs	%0, #-1"					\
+	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
+	   : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
+#endif
+#endif /* defined (__GNUC__) */
+
+#ifndef add_mssaaaa
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  do {									\
+    UWtype __s0, __s1, __c0, __c1;					\
+    __s0 = (a0) + (b0);							\
+    __s1 = (a1) + (b1);							\
+    __c0 = __s0 < (a0);							\
+    __c1 = __s1 < (a1);							\
+    (s0) = __s0;							\
+    __s1 = __s1 + __c0;							\
+    (s1) = __s1;							\
+    (m) = - (__c1 + (__s1 < __c0));					\
+  } while (0)
+#endif
+
+#if MOD_1_1P_METHOD == 1
+void
+mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
+{
+  mp_limb_t bi;
+  mp_limb_t B1modb, B2modb;
+  int cnt;
+
+  count_leading_zeros (cnt, b);
+
+  b <<= cnt;
+  invert_limb (bi, b);
+
+  cps[0] = bi;
+  cps[1] = cnt;
+
+  B1modb = -b;
+  if (LIKELY (cnt != 0))
+    B1modb *= ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
+  cps[2] = B1modb >> cnt;
+
+  /* In the normalized case, this can be simplified to
+   *
+   *   B2modb = - b * bi;
+   *   ASSERT (B2modb <= b);    // NB: equality iff b = B/2
+   */
+  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
+  cps[3] = B2modb >> cnt;
+}
+
+mp_limb_t
+mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t bmodb[4])
+{
+  mp_limb_t rh, rl, bi, ph, pl, r;
+  mp_limb_t B1modb, B2modb;
+  mp_size_t i;
+  int cnt;
+  mp_limb_t mask;
+
+  ASSERT (n >= 2);		/* fix tuneup.c if this is changed */
+
+  B1modb = bmodb[2];
+  B2modb = bmodb[3];
+
+  rl = ap[n - 1];
+  umul_ppmm (ph, pl, rl, B1modb);
+  add_ssaaaa (rh, rl, ph, pl, CNST_LIMB(0), ap[n - 2]);
+
+  for (i = n - 3; i >= 0; i -= 1)
+    {
+      /* rr = ap[i]				< B
+	    + LO(rr)  * (B mod b)		<= (B-1)(b-1)
+	    + HI(rr)  * (B^2 mod b)		<= (B-1)(b-1)
+      */
+      umul_ppmm (ph, pl, rl, B1modb);
+      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i]);
+
+      umul_ppmm (rh, rl, rh, B2modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+    }
+
+  cnt = bmodb[1];
+  bi = bmodb[0];
+
+  if (LIKELY (cnt != 0))
+    rh = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+
+  mask = -(mp_limb_t) (rh >= b);
+  rh -= mask & b;
+
+  udiv_rnnd_preinv (r, rh, rl << cnt, b, bi);
+
+  return r >> cnt;
+}
+#endif /* MOD_1_1P_METHOD == 1 */
+
+#if MOD_1_1P_METHOD == 2
+void
+mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
+{
+  mp_limb_t bi;
+  mp_limb_t B2modb;
+  int cnt;
+
+  count_leading_zeros (cnt, b);
+
+  b <<= cnt;
+  invert_limb (bi, b);
+
+  cps[0] = bi;
+  cps[1] = cnt;
+
+  if (LIKELY (cnt != 0))
+    {
+      mp_limb_t B1modb = -b;
+      B1modb *= ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+      ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
+      cps[2] = B1modb >> cnt;
+    }
+  B2modb = - b * bi;
+  ASSERT (B2modb <= b);    // NB: equality iff b = B/2
+  cps[3] = B2modb;
+}
+
+mp_limb_t
+mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t bmodb[4])
+{
+  int cnt;
+  mp_limb_t bi, B1modb;
+  mp_limb_t r0, r1;
+  mp_limb_t r;
+
+  ASSERT (n >= 2);		/* fix tuneup.c if this is changed */
+
+  r0 = ap[n-2];
+  r1 = ap[n-1];
+
+  if (n > 2)
+    {
+      mp_limb_t B2modb, B2mb;
+      mp_limb_t p0, p1;
+      mp_limb_t r2;
+      mp_size_t j;
+
+      B2modb = bmodb[3];
+      B2mb = B2modb - b;
+
+      umul_ppmm (p1, p0, r1, B2modb);
+      add_mssaaaa (r2, r1, r0, r0, ap[n-3], p1, p0);
+
+      for (j = n-4; j >= 0; j--)
+	{
+	  mp_limb_t cy;
+	  /* mp_limb_t t = r0 + B2mb; */
+	  umul_ppmm (p1, p0, r1, B2modb);
+
+	  ADDC_LIMB (cy, r0, r0, r2 & B2modb);
+	  /* Alternative, for cmov: if (cy) r0 = t; */
+	  r0 -= (-cy) & b;
+	  add_mssaaaa (r2, r1, r0, r0, ap[j], p1, p0);
+	}
+
+      r1 -= (r2 & b);
+    }
+
+  cnt = bmodb[1];
+
+  if (LIKELY (cnt != 0))
+    {
+      mp_limb_t t;
+      mp_limb_t B1modb = bmodb[2];
+
+      umul_ppmm (r1, t, r1, B1modb);
+      r0 += t;
+      r1 += (r0 < t);
+
+      /* Normalize */
+      r1 = (r1 << cnt) | (r0 >> (GMP_LIMB_BITS - cnt));
+      r0 <<= cnt;
+
+      /* NOTE: Might get r1 == b here, but udiv_rnnd_preinv allows that. */
+    }
+  else
+    {
+      mp_limb_t mask = -(mp_limb_t) (r1 >= b);
+      r1 -= mask & b;
+    }
+
+  bi = bmodb[0];
+
+  udiv_rnnd_preinv (r, r1, r0, b, bi);
+  return r >> cnt;
+}
+#endif /* MOD_1_1P_METHOD == 2 */

diff --git a/third_party/gmp/mpn/generic/mod_1_2.c b/third_party/gmp/mpn/generic/mod_1_2.c
new file mode 100644
index 0000000..b00d19e
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mod_1_2.c

@@ -0,0 +1,148 @@
+/* mpn_mod_1s_2p (ap, n, b, cps)
+   Divide (ap,,n) by b.  Return the single-limb remainder.
+   Requires that b < B / 2.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Based on a suggestion by Peter L. Montgomery.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008-2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_mod_1s_2p_cps (mp_limb_t cps[5], mp_limb_t b)
+{
+  mp_limb_t bi;
+  mp_limb_t B1modb, B2modb, B3modb;
+  int cnt;
+
+  ASSERT (b <= (~(mp_limb_t) 0) / 2);
+
+  count_leading_zeros (cnt, b);
+
+  b <<= cnt;
+  invert_limb (bi, b);
+
+  cps[0] = bi;
+  cps[1] = cnt;
+
+  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
+  cps[2] = B1modb >> cnt;
+
+  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
+  cps[3] = B2modb >> cnt;
+
+  udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
+  cps[4] = B3modb >> cnt;
+
+#if WANT_ASSERT
+  {
+    int i;
+    b = cps[2];
+    for (i = 3; i <= 4; i++)
+      {
+	b += cps[i];
+	ASSERT (b >= cps[i]);
+      }
+  }
+#endif
+}
+
+mp_limb_t
+mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[5])
+{
+  mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
+  mp_limb_t B1modb, B2modb, B3modb;
+  mp_size_t i;
+  int cnt;
+
+  ASSERT (n >= 1);
+
+  B1modb = cps[2];
+  B2modb = cps[3];
+  B3modb = cps[4];
+
+  if ((n & 1) != 0)
+    {
+      if (n == 1)
+	{
+	  rl = ap[n - 1];
+	  bi = cps[0];
+	  cnt = cps[1];
+	  udiv_rnnd_preinv (r, rl >> (GMP_LIMB_BITS - cnt),
+			     rl << cnt, b, bi);
+	  return r >> cnt;
+	}
+
+      umul_ppmm (ph, pl, ap[n - 2], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
+      umul_ppmm (rh, rl, ap[n - 1], B2modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+      n--;
+    }
+  else
+    {
+      rh = ap[n - 1];
+      rl = ap[n - 2];
+    }
+
+  for (i = n - 4; i >= 0; i -= 2)
+    {
+      /* rr = ap[i]				< B
+	    + ap[i+1] * (B mod b)		<= (B-1)(b-1)
+	    + LO(rr)  * (B^2 mod b)		<= (B-1)(b-1)
+	    + HI(rr)  * (B^3 mod b)		<= (B-1)(b-1)
+      */
+      umul_ppmm (ph, pl, ap[i + 1], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
+
+      umul_ppmm (ch, cl, rl, B2modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (rh, rl, rh, B3modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+    }
+
+  umul_ppmm (rh, cl, rh, B1modb);
+  add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
+
+  cnt = cps[1];
+  bi = cps[0];
+
+  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+  udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
+
+  return r >> cnt;
+}

diff --git a/third_party/gmp/mpn/generic/mod_1_3.c b/third_party/gmp/mpn/generic/mod_1_3.c
new file mode 100644
index 0000000..4d4be5d
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mod_1_3.c

@@ -0,0 +1,156 @@
+/* mpn_mod_1s_3p (ap, n, b, cps)
+   Divide (ap,,n) by b.  Return the single-limb remainder.
+   Requires that b < B / 3.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Based on a suggestion by Peter L. Montgomery.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008-2010, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_mod_1s_3p_cps (mp_limb_t cps[6], mp_limb_t b)
+{
+  mp_limb_t bi;
+  mp_limb_t B1modb, B2modb, B3modb, B4modb;
+  int cnt;
+
+  ASSERT (b <= (~(mp_limb_t) 0) / 3);
+
+  count_leading_zeros (cnt, b);
+
+  b <<= cnt;
+  invert_limb (bi, b);
+
+  cps[0] = bi;
+  cps[1] = cnt;
+
+  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
+  cps[2] = B1modb >> cnt;
+
+  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
+  cps[3] = B2modb >> cnt;
+
+  udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
+  cps[4] = B3modb >> cnt;
+
+  udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi);
+  cps[5] = B4modb >> cnt;
+
+#if WANT_ASSERT
+  {
+    int i;
+    b = cps[2];
+    for (i = 3; i <= 5; i++)
+      {
+	b += cps[i];
+	ASSERT (b >= cps[i]);
+      }
+  }
+#endif
+}
+
+mp_limb_t
+mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[6])
+{
+  mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
+  mp_limb_t B1modb, B2modb, B3modb, B4modb;
+  mp_size_t i;
+  int cnt;
+
+  ASSERT (n >= 1);
+
+  B1modb = cps[2];
+  B2modb = cps[3];
+  B3modb = cps[4];
+  B4modb = cps[5];
+
+  /* We compute n mod 3 in a tricky way, which works except for when n is so
+     close to the maximum size that we don't need to support it.  The final
+     cast to int is a workaround for HP cc.  */
+  switch ((int) ((mp_limb_t) n * MODLIMB_INVERSE_3 >> (GMP_NUMB_BITS - 2)))
+    {
+    case 0:
+      umul_ppmm (ph, pl, ap[n - 2], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
+      umul_ppmm (rh, rl, ap[n - 1], B2modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+      n -= 3;
+      break;
+    case 2:	/* n mod 3 = 1 */
+      rh = 0;
+      rl = ap[n - 1];
+      n -= 1;
+      break;
+    case 1:	/* n mod 3 = 2 */
+      rh = ap[n - 1];
+      rl = ap[n - 2];
+      n -= 2;
+      break;
+    }
+
+  for (i = n - 3; i >= 0; i -= 3)
+    {
+      /* rr = ap[i]				< B
+	    + ap[i+1] * (B mod b)		<= (B-1)(b-1)
+	    + ap[i+2] * (B^2 mod b)		<= (B-1)(b-1)
+	    + LO(rr)  * (B^3 mod b)		<= (B-1)(b-1)
+	    + HI(rr)  * (B^4 mod b)		<= (B-1)(b-1)
+      */
+      umul_ppmm (ph, pl, ap[i + 1], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
+
+      umul_ppmm (ch, cl, ap[i + 2], B2modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (ch, cl, rl, B3modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (rh, rl, rh, B4modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+    }
+
+  umul_ppmm (rh, cl, rh, B1modb);
+  add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
+
+  cnt = cps[1];
+  bi = cps[0];
+
+  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+  udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
+
+  return r >> cnt;
+}

diff --git a/third_party/gmp/mpn/generic/mod_1_4.c b/third_party/gmp/mpn/generic/mod_1_4.c
new file mode 100644
index 0000000..80b42ba
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mod_1_4.c

@@ -0,0 +1,170 @@
+/* mpn_mod_1s_4p (ap, n, b, cps)
+   Divide (ap,,n) by b.  Return the single-limb remainder.
+   Requires that b < B / 4.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Based on a suggestion by Peter L. Montgomery.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008-2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
+{
+  mp_limb_t bi;
+  mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
+  int cnt;
+
+  ASSERT (b <= (~(mp_limb_t) 0) / 4);
+
+  count_leading_zeros (cnt, b);
+
+  b <<= cnt;
+  invert_limb (bi, b);
+
+  cps[0] = bi;
+  cps[1] = cnt;
+
+  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
+  cps[2] = B1modb >> cnt;
+
+  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
+  cps[3] = B2modb >> cnt;
+
+  udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
+  cps[4] = B3modb >> cnt;
+
+  udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi);
+  cps[5] = B4modb >> cnt;
+
+  udiv_rnnd_preinv (B5modb, B4modb, CNST_LIMB(0), b, bi);
+  cps[6] = B5modb >> cnt;
+
+#if WANT_ASSERT
+  {
+    int i;
+    b = cps[2];
+    for (i = 3; i <= 6; i++)
+      {
+	b += cps[i];
+	ASSERT (b >= cps[i]);
+      }
+  }
+#endif
+}
+
+mp_limb_t
+mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[7])
+{
+  mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
+  mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
+  mp_size_t i;
+  int cnt;
+
+  ASSERT (n >= 1);
+
+  B1modb = cps[2];
+  B2modb = cps[3];
+  B3modb = cps[4];
+  B4modb = cps[5];
+  B5modb = cps[6];
+
+  switch (n & 3)
+    {
+    case 0:
+      umul_ppmm (ph, pl, ap[n - 3], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 4]);
+      umul_ppmm (ch, cl, ap[n - 2], B2modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+      umul_ppmm (rh, rl, ap[n - 1], B3modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+      n -= 4;
+      break;
+    case 1:
+      rh = 0;
+      rl = ap[n - 1];
+      n -= 1;
+      break;
+    case 2:
+      rh = ap[n - 1];
+      rl = ap[n - 2];
+      n -= 2;
+      break;
+    case 3:
+      umul_ppmm (ph, pl, ap[n - 2], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
+      umul_ppmm (rh, rl, ap[n - 1], B2modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+      n -= 3;
+      break;
+    }
+
+  for (i = n - 4; i >= 0; i -= 4)
+    {
+      /* rr = ap[i]				< B
+	    + ap[i+1] * (B mod b)		<= (B-1)(b-1)
+	    + ap[i+2] * (B^2 mod b)		<= (B-1)(b-1)
+	    + ap[i+3] * (B^3 mod b)		<= (B-1)(b-1)
+	    + LO(rr)  * (B^4 mod b)		<= (B-1)(b-1)
+	    + HI(rr)  * (B^5 mod b)		<= (B-1)(b-1)
+      */
+      umul_ppmm (ph, pl, ap[i + 1], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
+
+      umul_ppmm (ch, cl, ap[i + 2], B2modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (ch, cl, ap[i + 3], B3modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (ch, cl, rl, B4modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (rh, rl, rh, B5modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+    }
+
+  umul_ppmm (rh, cl, rh, B1modb);
+  add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
+
+  cnt = cps[1];
+  bi = cps[0];
+
+  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+  udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
+
+  return r >> cnt;
+}

diff --git a/third_party/gmp/mpn/generic/mod_34lsub1.c b/third_party/gmp/mpn/generic/mod_34lsub1.c
new file mode 100644
index 0000000..af9c6c6
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mod_34lsub1.c

@@ -0,0 +1,128 @@
+/* mpn_mod_34lsub1 -- remainder modulo 2^(GMP_NUMB_BITS*3/4)-1.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+
+/* Calculate a remainder from {p,n} divided by 2^(GMP_NUMB_BITS*3/4)-1.
+   The remainder is not fully reduced, it's any limb value congruent to
+   {p,n} modulo that divisor.
+
+   This implementation is only correct when GMP_NUMB_BITS is a multiple of
+   4.
+
+   FIXME: If GMP_NAIL_BITS is some silly big value during development then
+   it's possible the carry accumulators c0,c1,c2 could overflow.
+
+   General notes:
+
+   The basic idea is to use a set of N accumulators (N=3 in this case) to
+   effectively get a remainder mod 2^(GMP_NUMB_BITS*N)-1 followed at the end
+   by a reduction to GMP_NUMB_BITS*N/M bits (M=4 in this case) for a
+   remainder mod 2^(GMP_NUMB_BITS*N/M)-1.  N and M are chosen to give a good
+   set of small prime factors in 2^(GMP_NUMB_BITS*N/M)-1.
+
+   N=3 M=4 suits GMP_NUMB_BITS==32 and GMP_NUMB_BITS==64 quite well, giving
+   a few more primes than a single accumulator N=1 does, and for no extra
+   cost (assuming the processor has a decent number of registers).
+
+   For strange nailified values of GMP_NUMB_BITS the idea would be to look
+   for what N and M give good primes.  With GMP_NUMB_BITS not a power of 2
+   the choices for M may be opened up a bit.  But such things are probably
+   best done in separate code, not grafted on here.  */
+
+#if GMP_NUMB_BITS % 4 == 0
+
+#define B1  (GMP_NUMB_BITS / 4)
+#define B2  (B1 * 2)
+#define B3  (B1 * 3)
+
+#define M1  ((CNST_LIMB(1) << B1) - 1)
+#define M2  ((CNST_LIMB(1) << B2) - 1)
+#define M3  ((CNST_LIMB(1) << B3) - 1)
+
+#define LOW0(n)      ((n) & M3)
+#define HIGH0(n)     ((n) >> B3)
+
+#define LOW1(n)      (((n) & M2) << B1)
+#define HIGH1(n)     ((n) >> B2)
+
+#define LOW2(n)      (((n) & M1) << B2)
+#define HIGH2(n)     ((n) >> B1)
+
+#define PARTS0(n)    (LOW0(n) + HIGH0(n))
+#define PARTS1(n)    (LOW1(n) + HIGH1(n))
+#define PARTS2(n)    (LOW2(n) + HIGH2(n))
+
+#define ADD(c,a,val)                    \
+  do {                                  \
+    mp_limb_t  new_c;                   \
+    ADDC_LIMB (new_c, a, a, val);       \
+    (c) += new_c;                       \
+  } while (0)
+
+mp_limb_t
+mpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
+{
+  mp_limb_t  c0, c1, c2;
+  mp_limb_t  a0, a1, a2;
+
+  ASSERT (n >= 1);
+  ASSERT (n/3 < GMP_NUMB_MAX);
+
+  a0 = a1 = a2 = 0;
+  c0 = c1 = c2 = 0;
+
+  while ((n -= 3) >= 0)
+    {
+      ADD (c0, a0, p[0]);
+      ADD (c1, a1, p[1]);
+      ADD (c2, a2, p[2]);
+      p += 3;
+    }
+
+  if (n != -3)
+    {
+      ADD (c0, a0, p[0]);
+      if (n != -2)
+	ADD (c1, a1, p[1]);
+    }
+
+  return
+    PARTS0 (a0) + PARTS1 (a1) + PARTS2 (a2)
+    + PARTS1 (c0) + PARTS2 (c1) + PARTS0 (c2);
+}
+
+#endif

diff --git a/third_party/gmp/mpn/generic/mode1o.c b/third_party/gmp/mpn/generic/mode1o.c
new file mode 100644
index 0000000..9ba0ae1
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mode1o.c

@@ -0,0 +1,235 @@
+/* mpn_modexact_1c_odd -- mpn by limb exact division style remainder.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000-2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Calculate an r satisfying
+
+           r*B^k + a - c == q*d
+
+   where B=2^GMP_LIMB_BITS, a is {src,size}, k is either size or size-1
+   (the caller won't know which), and q is the quotient (discarded).  d must
+   be odd, c can be any limb value.
+
+   If c<d then r will be in the range 0<=r<d, or if c>=d then 0<=r<=d.
+
+   This slightly strange function suits the initial Nx1 reduction for GCDs
+   or Jacobi symbols since the factors of 2 in B^k can be ignored, leaving
+   -r == a mod d (by passing c=0).  For a GCD the factor of -1 on r can be
+   ignored, or for the Jacobi symbol it can be accounted for.  The function
+   also suits divisibility and congruence testing since if r=0 (or r=d) is
+   obtained then a==c mod d.
+
+
+   r is a bit like the remainder returned by mpn_divexact_by3c, and is the
+   sort of remainder mpn_divexact_1 might return.  Like mpn_divexact_by3c, r
+   represents a borrow, since effectively quotient limbs are chosen so that
+   subtracting that multiple of d from src at each step will produce a zero
+   limb.
+
+   A long calculation can be done piece by piece from low to high by passing
+   the return value from one part as the carry parameter to the next part.
+   The effective final k becomes anything between size and size-n, if n
+   pieces are used.
+
+
+   A similar sort of routine could be constructed based on adding multiples
+   of d at each limb, much like redc in mpz_powm does.  Subtracting however
+   has a small advantage that when subtracting to cancel out l there's never
+   a borrow into h, whereas using an addition would put a carry into h
+   depending whether l==0 or l!=0.
+
+
+   In terms of efficiency, this function is similar to a mul-by-inverse
+   mpn_mod_1.  Both are essentially two multiplies and are best suited to
+   CPUs with low latency multipliers (in comparison to a divide instruction
+   at least.)  But modexact has a few less supplementary operations, only
+   needs low part and high part multiplies, and has fewer working quantities
+   (helping CPUs with few registers).
+
+
+   In the main loop it will be noted that the new carry (call it r) is the
+   sum of the high product h and any borrow from l=s-c.  If c<d then we will
+   have r<d too, for the following reasons.  Let q=l*inverse be the quotient
+   limb, so that q*d = B*h + l, where B=2^GMP_NUMB_BITS.  Now if h=d-1 then
+
+       l = q*d - B*(d-1) <= (B-1)*d - B*(d-1) = B-d
+
+   But if l=s-c produces a borrow when c<d, then l>=B-d+1 and hence will
+   never have h=d-1 and so r=h+borrow <= d-1.
+
+   When c>=d, on the other hand, h=d-1 can certainly occur together with a
+   borrow, thereby giving only r<=d, as per the function definition above.
+
+   As a design decision it's left to the caller to check for r=d if it might
+   be passing c>=d.  Several applications have c<d initially so the extra
+   test is often unnecessary, for example the GCDs or a plain divisibility
+   d|a test will pass c=0.
+
+
+   The special case for size==1 is so that it can be assumed c<=d in the
+   high<=divisor test at the end.  c<=d is only guaranteed after at least
+   one iteration of the main loop.  There's also a decent chance one % is
+   faster than a binvert_limb, though that will depend on the processor.
+
+   A CPU specific implementation might want to omit the size==1 code or the
+   high<divisor test.  mpn/x86/k6/mode1o.asm for instance finds neither
+   useful.  */
+
+
+mp_limb_t
+mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d,
+                     mp_limb_t orig_c)
+{
+  mp_limb_t  s, h, l, inverse, dummy, dmul, ret;
+  mp_limb_t  c = orig_c;
+  mp_size_t  i;
+
+  ASSERT (size >= 1);
+  ASSERT (d & 1);
+  ASSERT_MPN (src, size);
+  ASSERT_LIMB (d);
+  ASSERT_LIMB (c);
+
+  if (size == 1)
+    {
+      s = src[0];
+      if (s > c)
+	{
+	  l = s-c;
+	  h = l % d;
+	  if (h != 0)
+	    h = d - h;
+	}
+      else
+	{
+	  l = c-s;
+	  h = l % d;
+	}
+      return h;
+    }
+
+
+  binvert_limb (inverse, d);
+  dmul = d << GMP_NAIL_BITS;
+
+  i = 0;
+  do
+    {
+      s = src[i];
+      SUBC_LIMB (c, l, s, c);
+      l = (l * inverse) & GMP_NUMB_MASK;
+      umul_ppmm (h, dummy, l, dmul);
+      c += h;
+    }
+  while (++i < size-1);
+
+
+  s = src[i];
+  if (s <= d)
+    {
+      /* With high<=d the final step can be a subtract and addback.  If c==0
+	 then the addback will restore to l>=0.  If c==d then will get l==d
+	 if s==0, but that's ok per the function definition.  */
+
+      l = c - s;
+      if (c < s)
+	l += d;
+
+      ret = l;
+    }
+  else
+    {
+      /* Can't skip a divide, just do the loop code once more. */
+
+      SUBC_LIMB (c, l, s, c);
+      l = (l * inverse) & GMP_NUMB_MASK;
+      umul_ppmm (h, dummy, l, dmul);
+      c += h;
+      ret = c;
+    }
+
+  ASSERT (orig_c < d ? ret < d : ret <= d);
+  return ret;
+}
+
+
+
+#if 0
+
+/* The following is an alternate form that might shave one cycle on a
+   superscalar processor since it takes c+=h off the dependent chain,
+   leaving just a low product, high product, and a subtract.
+
+   This is for CPU specific implementations to consider.  A special case for
+   high<divisor and/or size==1 can be added if desired.
+
+   Notice that c is only ever 0 or 1, since if s-c produces a borrow then
+   x=0xFF..FF and x-h cannot produce a borrow.  The c=(x>s) could become
+   c=(x==0xFF..FF) too, if that helped.  */
+
+mp_limb_t
+mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t h)
+{
+  mp_limb_t  s, x, y, inverse, dummy, dmul, c1, c2;
+  mp_limb_t  c = 0;
+  mp_size_t  i;
+
+  ASSERT (size >= 1);
+  ASSERT (d & 1);
+
+  binvert_limb (inverse, d);
+  dmul = d << GMP_NAIL_BITS;
+
+  for (i = 0; i < size; i++)
+    {
+      ASSERT (c==0 || c==1);
+
+      s = src[i];
+      SUBC_LIMB (c1, x, s, c);
+
+      SUBC_LIMB (c2, y, x, h);
+      c = c1 + c2;
+
+      y = (y * inverse) & GMP_NUMB_MASK;
+      umul_ppmm (h, dummy, y, dmul);
+    }
+
+  h += c;
+  return h;
+}
+
+#endif

diff --git a/third_party/gmp/mpn/generic/mu_bdiv_q.c b/third_party/gmp/mpn/generic/mu_bdiv_q.c
new file mode 100644
index 0000000..0ef3bd8
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mu_bdiv_q.c

@@ -0,0 +1,281 @@
+/* mpn_mu_bdiv_q(qp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^nn.
+   storing the result in {qp,nn}.  Overlap allowed between Q and N; all other
+   overlap disallowed.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005-2007, 2009, 2010, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/*
+   The idea of the algorithm used herein is to compute a smaller inverted value
+   than used in the standard Barrett algorithm, and thus save time in the
+   Newton iterations, and pay just a small price when using the inverted value
+   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+*/
+
+#include "gmp-impl.h"
+
+
+/* N = {np,nn}
+   D = {dp,dn}
+
+   Requirements: N >= D
+		 D >= 1
+		 D odd
+		 dn >= 2
+		 nn >= 2
+		 scratch space as determined by mpn_mu_bdiv_q_itch(nn,dn).
+
+   Write quotient to Q = {qp,nn}.
+
+   FIXME: When iterating, perhaps do the small step before loop, not after.
+   FIXME: Try to avoid the scalar divisions when computing inverse size.
+   FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible.  In
+	  particular, when dn==in, tp and rp could use the same space.
+   FIXME: Trim final quotient calculation to qn limbs of precision.
+*/
+static void
+mpn_mu_bdiv_q_old (mp_ptr qp,
+	       mp_srcptr np, mp_size_t nn,
+	       mp_srcptr dp, mp_size_t dn,
+	       mp_ptr scratch)
+{
+  mp_size_t qn;
+  mp_size_t in;
+  int cy, c0;
+  mp_size_t tn, wn;
+
+  qn = nn;
+
+  ASSERT (dn >= 2);
+  ASSERT (qn >= 2);
+
+  if (qn > dn)
+    {
+      mp_size_t b;
+
+      /* |_______________________|   dividend
+			|________|   divisor  */
+
+#define ip           scratch			/* in */
+#define rp           (scratch + in)		/* dn or rest >= binvert_itch(in) */
+#define tp           (scratch + in + dn)	/* dn+in or next_size(dn) */
+#define scratch_out  (scratch + in + dn + tn)	/* mulmod_bnm1_itch(next_size(dn)) */
+
+      /* Compute an inverse size that is a nice partition of the quotient.  */
+      b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
+      in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+
+      /* Some notes on allocation:
+
+	 When in = dn, R dies when mpn_mullo returns, if in < dn the low in
+	 limbs of R dies at that point.  We could save memory by letting T live
+	 just under R, and let the upper part of T expand into R. These changes
+	 should reduce itch to perhaps 3dn.
+       */
+
+      mpn_binvert (ip, dp, in, rp);
+
+      cy = 0;
+
+      MPN_COPY (rp, np, dn);
+      np += dn;
+      mpn_mullo_n (qp, rp, ip, in);
+      qn -= in;
+
+      while (qn > in)
+	{
+	  if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	    mpn_mul (tp, dp, dn, qp, in);	/* mulhi, need tp[dn+in-1...in] */
+	  else
+	    {
+	      tn = mpn_mulmod_bnm1_next_size (dn);
+	      mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+	      wn = dn + in - tn;		/* number of wrapped limbs */
+	      if (wn > 0)
+		{
+		  c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+		  mpn_decr_u (tp + wn, c0);
+		}
+	    }
+
+	  qp += in;
+	  if (dn != in)
+	    {
+	      /* Subtract tp[dn-1...in] from partial remainder.  */
+	      cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
+	      if (cy == 2)
+		{
+		  mpn_incr_u (tp + dn, 1);
+		  cy = 1;
+		}
+	    }
+	  /* Subtract tp[dn+in-1...dn] from dividend.  */
+	  cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy);
+	  np += in;
+	  mpn_mullo_n (qp, rp, ip, in);
+	  qn -= in;
+	}
+
+      /* Generate last qn limbs.
+	 FIXME: It should be possible to limit precision here, since qn is
+	 typically somewhat smaller than dn.  No big gains expected.  */
+
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	mpn_mul (tp, dp, dn, qp, in);		/* mulhi, need tp[qn+in-1...in] */
+      else
+	{
+	  tn = mpn_mulmod_bnm1_next_size (dn);
+	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+	  wn = dn + in - tn;			/* number of wrapped limbs */
+	  if (wn > 0)
+	    {
+	      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+	      mpn_decr_u (tp + wn, c0);
+	    }
+	}
+
+      qp += in;
+      if (dn != in)
+	{
+	  cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
+	  if (cy == 2)
+	    {
+	      mpn_incr_u (tp + dn, 1);
+	      cy = 1;
+	    }
+	}
+
+      mpn_sub_nc (rp + dn - in, np, tp + dn, qn - (dn - in), cy);
+      mpn_mullo_n (qp, rp, ip, qn);
+
+#undef ip
+#undef rp
+#undef tp
+#undef scratch_out
+   }
+  else
+    {
+      /* |_______________________|   dividend
+		|________________|   divisor  */
+
+#define ip           scratch		/* in */
+#define tp           (scratch + in)	/* qn+in or next_size(qn) or rest >= binvert_itch(in) */
+#define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(qn)) */
+
+      /* Compute half-sized inverse.  */
+      in = qn - (qn >> 1);
+
+      mpn_binvert (ip, dp, in, tp);
+
+      mpn_mullo_n (qp, np, ip, in);		/* low `in' quotient limbs */
+
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	mpn_mul (tp, dp, qn, qp, in);		/* mulhigh */
+      else
+	{
+	  tn = mpn_mulmod_bnm1_next_size (qn);
+	  mpn_mulmod_bnm1 (tp, tn, dp, qn, qp, in, scratch_out);
+	  wn = qn + in - tn;			/* number of wrapped limbs */
+	  if (wn > 0)
+	    {
+	      c0 = mpn_cmp (tp, np, wn) < 0;
+	      mpn_decr_u (tp + wn, c0);
+	    }
+	}
+
+      mpn_sub_n (tp, np + in, tp + in, qn - in);
+      mpn_mullo_n (qp + in, tp, ip, qn - in);	/* high qn-in quotient limbs */
+
+#undef ip
+#undef tp
+#undef scratch_out
+    }
+}
+
+void
+mpn_mu_bdiv_q (mp_ptr qp,
+	       mp_srcptr np, mp_size_t nn,
+	       mp_srcptr dp, mp_size_t dn,
+	       mp_ptr scratch)
+{
+  mpn_mu_bdiv_q_old (qp, np, nn, dp, dn, scratch);
+  mpn_neg (qp, qp, nn);
+}
+
+mp_size_t
+mpn_mu_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
+{
+  mp_size_t qn, in, tn, itch_binvert, itch_out, itches;
+  mp_size_t b;
+
+  ASSERT_ALWAYS (DC_BDIV_Q_THRESHOLD < MU_BDIV_Q_THRESHOLD);
+
+  qn = nn;
+
+  if (qn > dn)
+    {
+      b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
+      in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	{
+	  tn = dn + in;
+	  itch_out = 0;
+	}
+      else
+	{
+	  tn = mpn_mulmod_bnm1_next_size (dn);
+	  itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
+	}
+      itches = dn + tn + itch_out;
+    }
+  else
+    {
+      in = qn - (qn >> 1);
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	{
+	  tn = qn + in;
+	  itch_out = 0;
+	}
+      else
+	{
+	  tn = mpn_mulmod_bnm1_next_size (qn);
+	  itch_out = mpn_mulmod_bnm1_itch (tn, qn, in);
+	}
+      itches = tn + itch_out;
+    }
+
+  itch_binvert = mpn_binvert_itch (in);
+  return in + MAX (itches, itch_binvert);
+}

diff --git a/third_party/gmp/mpn/generic/mu_bdiv_qr.c b/third_party/gmp/mpn/generic/mu_bdiv_qr.c
new file mode 100644
index 0000000..540ad73
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mu_bdiv_qr.c

@@ -0,0 +1,312 @@
+/* mpn_mu_bdiv_qr(qp,rp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^qn,
+   where qn = nn-dn, storing the result in {qp,qn}.  Overlap allowed between Q
+   and N; all other overlap disallowed.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005-2007, 2009, 2010, 2012, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/*
+   The idea of the algorithm used herein is to compute a smaller inverted value
+   than used in the standard Barrett algorithm, and thus save time in the
+   Newton iterations, and pay just a small price when using the inverted value
+   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+*/
+
+#include "gmp-impl.h"
+
+
+/* N = {np,nn}
+   D = {dp,dn}
+
+   Requirements: N >= D
+		 D >= 1
+		 D odd
+		 dn >= 2
+		 nn >= 2
+		 scratch space as determined by mpn_mu_bdiv_qr_itch(nn,dn).
+
+   Write quotient to Q = {qp,nn-dn}.
+
+   FIXME: When iterating, perhaps do the small step before loop, not after.
+   FIXME: Try to avoid the scalar divisions when computing inverse size.
+   FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible.  In
+	  particular, when dn==in, tp and rp could use the same space.
+*/
+static mp_limb_t
+mpn_mu_bdiv_qr_old (mp_ptr qp,
+		    mp_ptr rp,
+		    mp_srcptr np, mp_size_t nn,
+		    mp_srcptr dp, mp_size_t dn,
+		    mp_ptr scratch)
+{
+  mp_size_t qn;
+  mp_size_t in;
+  mp_limb_t cy, c0;
+  mp_size_t tn, wn;
+
+  qn = nn - dn;
+
+  ASSERT (dn >= 2);
+  ASSERT (qn >= 2);
+
+  if (qn > dn)
+    {
+      mp_size_t b;
+
+      /* |_______________________|   dividend
+			|________|   divisor  */
+
+#define ip           scratch		/* in */
+#define tp           (scratch + in)	/* dn+in or next_size(dn) or rest >= binvert_itch(in) */
+#define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */
+
+      /* Compute an inverse size that is a nice partition of the quotient.  */
+      b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
+      in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+
+      /* Some notes on allocation:
+
+	 When in = dn, R dies when mpn_mullo returns, if in < dn the low in
+	 limbs of R dies at that point.  We could save memory by letting T live
+	 just under R, and let the upper part of T expand into R. These changes
+	 should reduce itch to perhaps 3dn.
+       */
+
+      mpn_binvert (ip, dp, in, tp);
+
+      MPN_COPY (rp, np, dn);
+      np += dn;
+      cy = 0;
+
+      while (qn > in)
+	{
+	  mpn_mullo_n (qp, rp, ip, in);
+
+	  if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	    mpn_mul (tp, dp, dn, qp, in);	/* mulhi, need tp[dn+in-1...in] */
+	  else
+	    {
+	      tn = mpn_mulmod_bnm1_next_size (dn);
+	      mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+	      wn = dn + in - tn;		/* number of wrapped limbs */
+	      if (wn > 0)
+		{
+		  c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+		  mpn_decr_u (tp + wn, c0);
+		}
+	    }
+
+	  qp += in;
+	  qn -= in;
+
+	  if (dn != in)
+	    {
+	      /* Subtract tp[dn-1...in] from partial remainder.  */
+	      cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
+	      if (cy == 2)
+		{
+		  mpn_incr_u (tp + dn, 1);
+		  cy = 1;
+		}
+	    }
+	  /* Subtract tp[dn+in-1...dn] from dividend.  */
+	  cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy);
+	  np += in;
+	}
+
+      /* Generate last qn limbs.  */
+      mpn_mullo_n (qp, rp, ip, qn);
+
+      if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	mpn_mul (tp, dp, dn, qp, qn);		/* mulhi, need tp[qn+in-1...in] */
+      else
+	{
+	  tn = mpn_mulmod_bnm1_next_size (dn);
+	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out);
+	  wn = dn + qn - tn;			/* number of wrapped limbs */
+	  if (wn > 0)
+	    {
+	      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+	      mpn_decr_u (tp + wn, c0);
+	    }
+	}
+
+      if (dn != qn)
+	{
+	  cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
+	  if (cy == 2)
+	    {
+	      mpn_incr_u (tp + dn, 1);
+	      cy = 1;
+	    }
+	}
+      return mpn_sub_nc (rp + dn - qn, np, tp + dn, qn, cy);
+
+#undef ip
+#undef tp
+#undef scratch_out
+    }
+  else
+    {
+      /* |_______________________|   dividend
+		|________________|   divisor  */
+
+#define ip           scratch		/* in */
+#define tp           (scratch + in)	/* dn+in or next_size(dn) or rest >= binvert_itch(in) */
+#define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */
+
+      /* Compute half-sized inverse.  */
+      in = qn - (qn >> 1);
+
+      mpn_binvert (ip, dp, in, tp);
+
+      mpn_mullo_n (qp, np, ip, in);		/* low `in' quotient limbs */
+
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	mpn_mul (tp, dp, dn, qp, in);		/* mulhigh */
+      else
+	{
+	  tn = mpn_mulmod_bnm1_next_size (dn);
+	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+	  wn = dn + in - tn;			/* number of wrapped limbs */
+	  if (wn > 0)
+	    {
+	      c0 = mpn_sub_n (tp + tn, tp, np, wn);
+	      mpn_decr_u (tp + wn, c0);
+	    }
+	}
+
+      qp += in;
+      qn -= in;
+
+      cy = mpn_sub_n (rp, np + in, tp + in, dn);
+      mpn_mullo_n (qp, rp, ip, qn);		/* high qn quotient limbs */
+
+      if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	mpn_mul (tp, dp, dn, qp, qn);		/* mulhigh */
+      else
+	{
+	  tn = mpn_mulmod_bnm1_next_size (dn);
+	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out);
+	  wn = dn + qn - tn;			/* number of wrapped limbs */
+	  if (wn > 0)
+	    {
+	      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+	      mpn_decr_u (tp + wn, c0);
+	    }
+	}
+
+      cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
+      if (cy == 2)
+	{
+	  mpn_incr_u (tp + dn, 1);
+	  cy = 1;
+	}
+      return mpn_sub_nc (rp + dn - qn, np + dn + in, tp + dn, qn, cy);
+
+#undef ip
+#undef tp
+#undef scratch_out
+    }
+}
+
+mp_limb_t
+mpn_mu_bdiv_qr (mp_ptr qp,
+		mp_ptr rp,
+		mp_srcptr np, mp_size_t nn,
+		mp_srcptr dp, mp_size_t dn,
+		mp_ptr scratch)
+{
+  mp_limb_t cy = mpn_mu_bdiv_qr_old (qp, rp, np, nn, dp, dn, scratch);
+
+  /* R' B^{qn} = U - Q' D
+   *
+   * Q = B^{qn} - Q' (assuming Q' != 0)
+   *
+   * R B^{qn} = U + Q D = U + B^{qn} D - Q' D
+   *          = B^{qn} D + R'
+   */
+
+  if (UNLIKELY (!mpn_neg (qp, qp, nn - dn)))
+    {
+      /* Zero quotient. */
+      ASSERT (cy == 0);
+      return 0;
+    }
+  else
+    {
+      mp_limb_t cy2 = mpn_add_n (rp, rp, dp, dn);
+      ASSERT (cy2 >= cy);
+
+      return cy2 - cy;
+    }
+}
+
+
+mp_size_t
+mpn_mu_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
+{
+  mp_size_t qn, in, tn, itch_binvert, itch_out, itches;
+  mp_size_t b;
+
+  ASSERT_ALWAYS (DC_BDIV_Q_THRESHOLD < MU_BDIV_Q_THRESHOLD);
+
+  qn = nn - dn;
+
+  if (qn > dn)
+    {
+      b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
+      in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+    }
+  else
+    {
+      in = qn - (qn >> 1);
+    }
+
+  if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+    {
+      tn = dn + in;
+      itch_out = 0;
+    }
+  else
+    {
+      tn = mpn_mulmod_bnm1_next_size (dn);
+      itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
+    }
+
+  itch_binvert = mpn_binvert_itch (in);
+  itches = tn + itch_out;
+  return in + MAX (itches, itch_binvert);
+}

diff --git a/third_party/gmp/mpn/generic/mu_div_q.c b/third_party/gmp/mpn/generic/mu_div_q.c
new file mode 100644
index 0000000..44cfb40
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mu_div_q.c

@@ -0,0 +1,184 @@
+/* mpn_mu_div_q.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/*
+   The idea of the algorithm used herein is to compute a smaller inverted value
+   than used in the standard Barrett algorithm, and thus save time in the
+   Newton iterations, and pay just a small price when using the inverted value
+   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+*/
+
+/*
+  Things to work on:
+
+  1. This is a rudimentary implementation of mpn_mu_div_q.  The algorithm is
+     probably close to optimal, except when mpn_mu_divappr_q fails.
+
+  2. We used to fall back to mpn_mu_div_qr when we detect a possible
+     mpn_mu_divappr_q rounding problem, now we multiply and compare.
+     Unfortunately, since mpn_mu_divappr_q does not return the partial
+     remainder, this also doesn't become optimal.  A mpn_mu_divappr_qr could
+     solve that.
+
+  3. The allocations done here should be made from the scratch area, which
+     then would need to be amended.
+*/
+
+#include <stdlib.h>		/* for NULL */
+#include "gmp-impl.h"
+
+
+mp_limb_t
+mpn_mu_div_q (mp_ptr qp,
+	      mp_srcptr np, mp_size_t nn,
+	      mp_srcptr dp, mp_size_t dn,
+	      mp_ptr scratch)
+{
+  mp_ptr tp, rp;
+  mp_size_t qn;
+  mp_limb_t cy, qh;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  qn = nn - dn;
+
+  tp = TMP_BALLOC_LIMBS (qn + 1);
+
+  if (qn >= dn)			/* nn >= 2*dn + 1 */
+    {
+       /* |_______________________|   dividend
+			 |________|   divisor  */
+
+      rp = TMP_BALLOC_LIMBS (nn + 1);
+      MPN_COPY (rp + 1, np, nn);
+      rp[0] = 0;
+
+      qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0;
+      if (qh != 0)
+	mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn);
+
+      cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch);
+
+      if (UNLIKELY (cy != 0))
+	{
+	  /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
+	     canonically reduced, replace the returned value of B^(qn-dn)+eps
+	     by the largest possible value.  */
+	  mp_size_t i;
+	  for (i = 0; i < qn + 1; i++)
+	    tp[i] = GMP_NUMB_MAX;
+	}
+
+      /* The max error of mpn_mu_divappr_q is +4.  If the low quotient limb is
+	 smaller than the max error, we cannot trust the quotient.  */
+      if (tp[0] > 4)
+	{
+	  MPN_COPY (qp, tp + 1, qn);
+	}
+      else
+	{
+	  mp_limb_t cy;
+	  mp_ptr pp;
+
+	  pp = rp;
+	  mpn_mul (pp, tp + 1, qn, dp, dn);
+
+	  cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;
+
+	  if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
+	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
+	  else /* Same as above */
+	    MPN_COPY (qp, tp + 1, qn);
+	}
+    }
+  else
+    {
+       /* |_______________________|   dividend
+		 |________________|   divisor  */
+
+      /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
+	 here becomes 2dn, i.e., more than nn.  This shouldn't hurt, since only
+	 the most significant dn-1 limbs will actually be read, but it is not
+	 pretty.  */
+
+      qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
+			     dp + dn - (qn + 1), qn + 1, scratch);
+
+      /* The max error of mpn_mu_divappr_q is +4, but we get an additional
+         error from the divisor truncation.  */
+      if (tp[0] > 6)
+	{
+	  MPN_COPY (qp, tp + 1, qn);
+	}
+      else
+	{
+	  mp_limb_t cy;
+
+	  /* FIXME: a shorter product should be enough; we may use already
+	     allocated space... */
+	  rp = TMP_BALLOC_LIMBS (nn);
+	  mpn_mul (rp, dp, dn, tp + 1, qn);
+
+	  cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;
+
+	  if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
+	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
+	  else /* Same as above */
+	    MPN_COPY (qp, tp + 1, qn);
+	}
+    }
+
+  TMP_FREE;
+  return qh;
+}
+
+mp_size_t
+mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
+{
+  mp_size_t qn;
+
+  qn = nn - dn;
+  if (qn >= dn)
+    {
+      return mpn_mu_divappr_q_itch (nn + 1, dn, mua_k);
+    }
+  else
+    {
+      return mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);
+    }
+}

diff --git a/third_party/gmp/mpn/generic/mu_div_qr.c b/third_party/gmp/mpn/generic/mu_div_qr.c
new file mode 100644
index 0000000..8b9c702
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mu_div_qr.c

@@ -0,0 +1,417 @@
+/* mpn_mu_div_qr, mpn_preinv_mu_div_qr.
+
+   Compute Q = floor(N / D) and R = N-QD.  N is nn limbs and D is dn limbs and
+   must be normalized, and Q must be nn-dn limbs.  The requirement that Q is
+   nn-dn limbs (and not nn-dn+1 limbs) was put in place in order to allow us to
+   let N be unmodified during the operation.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005-2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/*
+   The idea of the algorithm used herein is to compute a smaller inverted value
+   than used in the standard Barrett algorithm, and thus save time in the
+   Newton iterations, and pay just a small price when using the inverted value
+   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+*/
+
+/* CAUTION: This code and the code in mu_divappr_q.c should be edited in sync.
+
+ Things to work on:
+
+  * This isn't optimal when the quotient isn't needed, as it might take a lot
+    of space.  The computation is always needed, though, so there is no time to
+    save with special code.
+
+  * The itch/scratch scheme isn't perhaps such a good idea as it once seemed,
+    demonstrated by the fact that the mpn_invertappr function's scratch needs
+    mean that we need to keep a large allocation long after it is needed.
+    Things are worse as mpn_mul_fft does not accept any scratch parameter,
+    which means we'll have a large memory hole while in mpn_mul_fft.  In
+    general, a peak scratch need in the beginning of a function isn't
+    well-handled by the itch/scratch scheme.
+*/
+
+#ifdef STAT
+#undef STAT
+#define STAT(x) x
+#else
+#define STAT(x)
+#endif
+
+#include <stdlib.h>		/* for NULL */
+#include "gmp-impl.h"
+
+
+/* FIXME: The MU_DIV_QR_SKEW_THRESHOLD was not analysed properly.  It gives a
+   speedup according to old measurements, but does the decision mechanism
+   really make sense?  It seem like the quotient between dn and qn might be
+   what we really should be checking.  */
+#ifndef MU_DIV_QR_SKEW_THRESHOLD
+#define MU_DIV_QR_SKEW_THRESHOLD 100
+#endif
+
+#ifdef CHECK				/* FIXME: Enable in minithres */
+#undef  MU_DIV_QR_SKEW_THRESHOLD
+#define MU_DIV_QR_SKEW_THRESHOLD 1
+#endif
+
+
+static mp_limb_t mpn_mu_div_qr2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+static mp_size_t mpn_mu_div_qr_choose_in (mp_size_t, mp_size_t, int);
+
+
+mp_limb_t
+mpn_mu_div_qr (mp_ptr qp,
+	       mp_ptr rp,
+	       mp_srcptr np,
+	       mp_size_t nn,
+	       mp_srcptr dp,
+	       mp_size_t dn,
+	       mp_ptr scratch)
+{
+  mp_size_t qn;
+  mp_limb_t cy, qh;
+
+  qn = nn - dn;
+  if (qn + MU_DIV_QR_SKEW_THRESHOLD < dn)
+    {
+      /* |______________|_ign_first__|   dividend			  nn
+		|_______|_ign_first__|   divisor			  dn
+
+		|______|	     quotient (prel)			  qn
+
+		 |___________________|   quotient * ignored-divisor-part  dn-1
+      */
+
+      /* Compute a preliminary quotient and a partial remainder by dividing the
+	 most significant limbs of each operand.  */
+      qh = mpn_mu_div_qr2 (qp, rp + nn - (2 * qn + 1),
+			   np + nn - (2 * qn + 1), 2 * qn + 1,
+			   dp + dn - (qn + 1), qn + 1,
+			   scratch);
+
+      /* Multiply the quotient by the divisor limbs ignored above.  */
+      if (dn - (qn + 1) > qn)
+	mpn_mul (scratch, dp, dn - (qn + 1), qp, qn);  /* prod is dn-1 limbs */
+      else
+	mpn_mul (scratch, qp, qn, dp, dn - (qn + 1));  /* prod is dn-1 limbs */
+
+      if (qh)
+	cy = mpn_add_n (scratch + qn, scratch + qn, dp, dn - (qn + 1));
+      else
+	cy = 0;
+      scratch[dn - 1] = cy;
+
+      cy = mpn_sub_n (rp, np, scratch, nn - (2 * qn + 1));
+      cy = mpn_sub_nc (rp + nn - (2 * qn + 1),
+		       rp + nn - (2 * qn + 1),
+		       scratch + nn - (2 * qn + 1),
+		       qn + 1, cy);
+      if (cy)
+	{
+	  qh -= mpn_sub_1 (qp, qp, qn, 1);
+	  mpn_add_n (rp, rp, dp, dn);
+	}
+    }
+  else
+    {
+      qh = mpn_mu_div_qr2 (qp, rp, np, nn, dp, dn, scratch);
+    }
+
+  return qh;
+}
+
+static mp_limb_t
+mpn_mu_div_qr2 (mp_ptr qp,
+		mp_ptr rp,
+		mp_srcptr np,
+		mp_size_t nn,
+		mp_srcptr dp,
+		mp_size_t dn,
+		mp_ptr scratch)
+{
+  mp_size_t qn, in;
+  mp_limb_t cy, qh;
+  mp_ptr ip, tp;
+
+  ASSERT (dn > 1);
+
+  qn = nn - dn;
+
+  /* Compute the inverse size.  */
+  in = mpn_mu_div_qr_choose_in (qn, dn, 0);
+  ASSERT (in <= dn);
+
+#if 1
+  /* This alternative inverse computation method gets slightly more accurate
+     results.  FIXMEs: (1) Temp allocation needs not analysed (2) itch function
+     not adapted (3) mpn_invertappr scratch needs not met.  */
+  ip = scratch;
+  tp = scratch + in + 1;
+
+  /* compute an approximate inverse on (in+1) limbs */
+  if (dn == in)
+    {
+      MPN_COPY (tp + 1, dp, in);
+      tp[0] = 1;
+      mpn_invertappr (ip, tp, in + 1, tp + in + 1);
+      MPN_COPY_INCR (ip, ip + 1, in);
+    }
+  else
+    {
+      cy = mpn_add_1 (tp, dp + dn - (in + 1), in + 1, 1);
+      if (UNLIKELY (cy != 0))
+	MPN_ZERO (ip, in);
+      else
+	{
+	  mpn_invertappr (ip, tp, in + 1, tp + in + 1);
+	  MPN_COPY_INCR (ip, ip + 1, in);
+	}
+    }
+#else
+  /* This older inverse computation method gets slightly worse results than the
+     one above.  */
+  ip = scratch;
+  tp = scratch + in;
+
+  /* Compute inverse of D to in+1 limbs, then round to 'in' limbs.  Ideally the
+     inversion function should do this automatically.  */
+  if (dn == in)
+    {
+      tp[in + 1] = 0;
+      MPN_COPY (tp + in + 2, dp, in);
+      mpn_invertappr (tp, tp + in + 1, in + 1, NULL);
+    }
+  else
+    {
+      mpn_invertappr (tp, dp + dn - (in + 1), in + 1, NULL);
+    }
+  cy = mpn_sub_1 (tp, tp, in + 1, GMP_NUMB_HIGHBIT);
+  if (UNLIKELY (cy != 0))
+    MPN_ZERO (tp + 1, in);
+  MPN_COPY (ip, tp + 1, in);
+#endif
+
+  qh = mpn_preinv_mu_div_qr (qp, rp, np, nn, dp, dn, ip, in, scratch + in);
+
+  return qh;
+}
+
+mp_limb_t
+mpn_preinv_mu_div_qr (mp_ptr qp,
+		      mp_ptr rp,
+		      mp_srcptr np,
+		      mp_size_t nn,
+		      mp_srcptr dp,
+		      mp_size_t dn,
+		      mp_srcptr ip,
+		      mp_size_t in,
+		      mp_ptr scratch)
+{
+  mp_size_t qn;
+  mp_limb_t cy, cx, qh;
+  mp_limb_t r;
+  mp_size_t tn, wn;
+
+#define tp           scratch
+#define scratch_out  (scratch + tn)
+
+  qn = nn - dn;
+
+  np += qn;
+  qp += qn;
+
+  qh = mpn_cmp (np, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (rp, np, dp, dn);
+  else
+    MPN_COPY_INCR (rp, np, dn);
+
+  /* if (qn == 0) */			/* The while below handles this case */
+  /*   return qh; */			/* Degenerate use.  Should we allow this? */
+
+  while (qn > 0)
+    {
+      if (qn < in)
+	{
+	  ip += in - qn;
+	  in = qn;
+	}
+      np -= in;
+      qp -= in;
+
+      /* Compute the next block of quotient limbs by multiplying the inverse I
+	 by the upper part of the partial remainder R.  */
+      mpn_mul_n (tp, rp + dn - in, ip, in);		/* mulhi  */
+      cy = mpn_add_n (qp, tp + in, rp + dn - in, in);	/* I's msb implicit */
+      ASSERT_ALWAYS (cy == 0);
+
+      qn -= in;
+
+      /* Compute the product of the quotient block and the divisor D, to be
+	 subtracted from the partial remainder combined with new limbs from the
+	 dividend N.  We only really need the low dn+1 limbs.  */
+
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	mpn_mul (tp, dp, dn, qp, in);		/* dn+in limbs, high 'in' cancels */
+      else
+	{
+	  tn = mpn_mulmod_bnm1_next_size (dn + 1);
+	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+	  wn = dn + in - tn;			/* number of wrapped limbs */
+	  if (wn > 0)
+	    {
+	      cy = mpn_sub_n (tp, tp, rp + dn - wn, wn);
+	      cy = mpn_sub_1 (tp + wn, tp + wn, tn - wn, cy);
+	      cx = mpn_cmp (rp + dn - in, tp + dn, tn - dn) < 0;
+	      ASSERT_ALWAYS (cx >= cy);
+	      mpn_incr_u (tp, cx - cy);
+	    }
+	}
+
+      r = rp[dn - in] - tp[dn];
+
+      /* Subtract the product from the partial remainder combined with new
+	 limbs from the dividend N, generating a new partial remainder R.  */
+      if (dn != in)
+	{
+	  cy = mpn_sub_n (tp, np, tp, in);	/* get next 'in' limbs from N */
+	  cy = mpn_sub_nc (tp + in, rp, tp + in, dn - in, cy);
+	  MPN_COPY (rp, tp, dn);		/* FIXME: try to avoid this */
+	}
+      else
+	{
+	  cy = mpn_sub_n (rp, np, tp, in);	/* get next 'in' limbs from N */
+	}
+
+      STAT (int i; int err = 0;
+	    static int errarr[5]; static int err_rec; static int tot);
+
+      /* Check the remainder R and adjust the quotient as needed.  */
+      r -= cy;
+      while (r != 0)
+	{
+	  /* We loop 0 times with about 69% probability, 1 time with about 31%
+	     probability, 2 times with about 0.6% probability, if inverse is
+	     computed as recommended.  */
+	  mpn_incr_u (qp, 1);
+	  cy = mpn_sub_n (rp, rp, dp, dn);
+	  r -= cy;
+	  STAT (err++);
+	}
+      if (mpn_cmp (rp, dp, dn) >= 0)
+	{
+	  /* This is executed with about 76% probability.  */
+	  mpn_incr_u (qp, 1);
+	  cy = mpn_sub_n (rp, rp, dp, dn);
+	  STAT (err++);
+	}
+
+      STAT (
+	    tot++;
+	    errarr[err]++;
+	    if (err > err_rec)
+	      err_rec = err;
+	    if (tot % 0x10000 == 0)
+	      {
+		for (i = 0; i <= err_rec; i++)
+		  printf ("  %d(%.1f%%)", errarr[i], 100.0*errarr[i]/tot);
+		printf ("\n");
+	      }
+	    );
+    }
+
+  return qh;
+}
+
+/* In case k=0 (automatic choice), we distinguish 3 cases:
+   (a) dn < qn:         in = ceil(qn / ceil(qn/dn))
+   (b) dn/3 < qn <= dn: in = ceil(qn / 2)
+   (c) qn < dn/3:       in = qn
+   In all cases we have in <= dn.
+ */
+static mp_size_t
+mpn_mu_div_qr_choose_in (mp_size_t qn, mp_size_t dn, int k)
+{
+  mp_size_t in;
+
+  if (k == 0)
+    {
+      mp_size_t b;
+      if (qn > dn)
+	{
+	  /* Compute an inverse size that is a nice partition of the quotient.  */
+	  b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
+	  in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+	}
+      else if (3 * qn > dn)
+	{
+	  in = (qn - 1) / 2 + 1;	/* b = 2 */
+	}
+      else
+	{
+	  in = (qn - 1) / 1 + 1;	/* b = 1 */
+	}
+    }
+  else
+    {
+      mp_size_t xn;
+      xn = MIN (dn, qn);
+      in = (xn - 1) / k + 1;
+    }
+
+  return in;
+}
+
+mp_size_t
+mpn_mu_div_qr_itch (mp_size_t nn, mp_size_t dn, int mua_k)
+{
+  mp_size_t in = mpn_mu_div_qr_choose_in (nn - dn, dn, mua_k);
+  mp_size_t itch_preinv = mpn_preinv_mu_div_qr_itch (nn, dn, in);
+  mp_size_t itch_invapp = mpn_invertappr_itch (in + 1) + in + 2; /* 3in + 4 */
+
+  ASSERT (itch_preinv >= itch_invapp);
+  return in + MAX (itch_invapp, itch_preinv);
+}
+
+mp_size_t
+mpn_preinv_mu_div_qr_itch (mp_size_t nn, mp_size_t dn, mp_size_t in)
+{
+  mp_size_t itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
+  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+
+  return itch_local + itch_out;
+}

diff --git a/third_party/gmp/mpn/generic/mu_divappr_q.c b/third_party/gmp/mpn/generic/mu_divappr_q.c
new file mode 100644
index 0000000..c022b4f
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mu_divappr_q.c

@@ -0,0 +1,368 @@
+/* mpn_mu_divappr_q, mpn_preinv_mu_divappr_q.
+
+   Compute Q = floor(N / D) + e.  N is nn limbs, D is dn limbs and must be
+   normalized, and Q must be nn-dn limbs, 0 <= e <= 4.  The requirement that Q
+   is nn-dn limbs (and not nn-dn+1 limbs) was put in place in order to allow us
+   to let N be unmodified during the operation.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005-2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/*
+   The idea of the algorithm used herein is to compute a smaller inverted value
+   than used in the standard Barrett algorithm, and thus save time in the
+   Newton iterations, and pay just a small price when using the inverted value
+   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+*/
+
+/* CAUTION: This code and the code in mu_div_qr.c should be edited in sync.
+
+ Things to work on:
+
+  * The itch/scratch scheme isn't perhaps such a good idea as it once seemed,
+    demonstrated by the fact that the mpn_invertappr function's scratch needs
+    mean that we need to keep a large allocation long after it is needed.
+    Things are worse as mpn_mul_fft does not accept any scratch parameter,
+    which means we'll have a large memory hole while in mpn_mul_fft.  In
+    general, a peak scratch need in the beginning of a function isn't
+    well-handled by the itch/scratch scheme.
+*/
+
+#ifdef STAT
+#undef STAT
+#define STAT(x) x
+#else
+#define STAT(x)
+#endif
+
+#include <stdlib.h>		/* for NULL */
+#include "gmp-impl.h"
+
+static mp_limb_t mpn_preinv_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t,
+			 mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+static mp_size_t mpn_mu_divappr_q_choose_in (mp_size_t, mp_size_t, int);
+
+mp_limb_t
+mpn_mu_divappr_q (mp_ptr qp,
+		  mp_srcptr np,
+		  mp_size_t nn,
+		  mp_srcptr dp,
+		  mp_size_t dn,
+		  mp_ptr scratch)
+{
+  mp_size_t qn, in;
+  mp_limb_t cy, qh;
+  mp_ptr ip, tp;
+
+  ASSERT (dn > 1);
+
+  qn = nn - dn;
+
+  /* If Q is smaller than D, truncate operands. */
+  if (qn + 1 < dn)
+    {
+      np += dn - (qn + 1);
+      nn -= dn - (qn + 1);
+      dp += dn - (qn + 1);
+      dn = qn + 1;
+    }
+
+  /* Compute the inverse size.  */
+  in = mpn_mu_divappr_q_choose_in (qn, dn, 0);
+  ASSERT (in <= dn);
+
+#if 1
+  /* This alternative inverse computation method gets slightly more accurate
+     results.  FIXMEs: (1) Temp allocation needs not analysed (2) itch function
+     not adapted (3) mpn_invertappr scratch needs not met.  */
+  ip = scratch;
+  tp = scratch + in + 1;
+
+  /* compute an approximate inverse on (in+1) limbs */
+  if (dn == in)
+    {
+      MPN_COPY (tp + 1, dp, in);
+      tp[0] = 1;
+      mpn_invertappr (ip, tp, in + 1, tp + in + 1);
+      MPN_COPY_INCR (ip, ip + 1, in);
+    }
+  else
+    {
+      cy = mpn_add_1 (tp, dp + dn - (in + 1), in + 1, 1);
+      if (UNLIKELY (cy != 0))
+	MPN_ZERO (ip, in);
+      else
+	{
+	  mpn_invertappr (ip, tp, in + 1, tp + in + 1);
+	  MPN_COPY_INCR (ip, ip + 1, in);
+	}
+    }
+#else
+  /* This older inverse computation method gets slightly worse results than the
+     one above.  */
+  ip = scratch;
+  tp = scratch + in;
+
+  /* Compute inverse of D to in+1 limbs, then round to 'in' limbs.  Ideally the
+     inversion function should do this automatically.  */
+  if (dn == in)
+    {
+      tp[in + 1] = 0;
+      MPN_COPY (tp + in + 2, dp, in);
+      mpn_invertappr (tp, tp + in + 1, in + 1, NULL);
+    }
+  else
+    {
+      mpn_invertappr (tp, dp + dn - (in + 1), in + 1, NULL);
+    }
+  cy = mpn_sub_1 (tp, tp, in + 1, GMP_NUMB_HIGHBIT);
+  if (UNLIKELY (cy != 0))
+    MPN_ZERO (tp + 1, in);
+  MPN_COPY (ip, tp + 1, in);
+#endif
+
+  qh = mpn_preinv_mu_divappr_q (qp, np, nn, dp, dn, ip, in, scratch + in);
+
+  return qh;
+}
+
+static mp_limb_t
+mpn_preinv_mu_divappr_q (mp_ptr qp,
+			 mp_srcptr np,
+			 mp_size_t nn,
+			 mp_srcptr dp,
+			 mp_size_t dn,
+			 mp_srcptr ip,
+			 mp_size_t in,
+			 mp_ptr scratch)
+{
+  mp_size_t qn;
+  mp_limb_t cy, cx, qh;
+  mp_limb_t r;
+  mp_size_t tn, wn;
+
+#define rp           scratch
+#define tp           (scratch + dn)
+#define scratch_out  (scratch + dn + tn)
+
+  qn = nn - dn;
+
+  np += qn;
+  qp += qn;
+
+  qh = mpn_cmp (np, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (rp, np, dp, dn);
+  else
+    MPN_COPY (rp, np, dn);
+
+  if (qn == 0)
+    return qh;			/* Degenerate use.  Should we allow this? */
+
+  while (qn > 0)
+    {
+      if (qn < in)
+	{
+	  ip += in - qn;
+	  in = qn;
+	}
+      np -= in;
+      qp -= in;
+
+      /* Compute the next block of quotient limbs by multiplying the inverse I
+	 by the upper part of the partial remainder R.  */
+      mpn_mul_n (tp, rp + dn - in, ip, in);		/* mulhi  */
+      cy = mpn_add_n (qp, tp + in, rp + dn - in, in);	/* I's msb implicit */
+      ASSERT_ALWAYS (cy == 0);
+
+      qn -= in;
+      if (qn == 0)
+	break;
+
+      /* Compute the product of the quotient block and the divisor D, to be
+	 subtracted from the partial remainder combined with new limbs from the
+	 dividend N.  We only really need the low dn limbs.  */
+
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	mpn_mul (tp, dp, dn, qp, in);		/* dn+in limbs, high 'in' cancels */
+      else
+	{
+	  tn = mpn_mulmod_bnm1_next_size (dn + 1);
+	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+	  wn = dn + in - tn;			/* number of wrapped limbs */
+	  if (wn > 0)
+	    {
+	      cy = mpn_sub_n (tp, tp, rp + dn - wn, wn);
+	      cy = mpn_sub_1 (tp + wn, tp + wn, tn - wn, cy);
+	      cx = mpn_cmp (rp + dn - in, tp + dn, tn - dn) < 0;
+	      ASSERT_ALWAYS (cx >= cy);
+	      mpn_incr_u (tp, cx - cy);
+	    }
+	}
+
+      r = rp[dn - in] - tp[dn];
+
+      /* Subtract the product from the partial remainder combined with new
+	 limbs from the dividend N, generating a new partial remainder R.  */
+      if (dn != in)
+	{
+	  cy = mpn_sub_n (tp, np, tp, in);	/* get next 'in' limbs from N */
+	  cy = mpn_sub_nc (tp + in, rp, tp + in, dn - in, cy);
+	  MPN_COPY (rp, tp, dn);		/* FIXME: try to avoid this */
+	}
+      else
+	{
+	  cy = mpn_sub_n (rp, np, tp, in);	/* get next 'in' limbs from N */
+	}
+
+      STAT (int i; int err = 0;
+	    static int errarr[5]; static int err_rec; static int tot);
+
+      /* Check the remainder R and adjust the quotient as needed.  */
+      r -= cy;
+      while (r != 0)
+	{
+	  /* We loop 0 times with about 69% probability, 1 time with about 31%
+	     probability, 2 times with about 0.6% probability, if inverse is
+	     computed as recommended.  */
+	  mpn_incr_u (qp, 1);
+	  cy = mpn_sub_n (rp, rp, dp, dn);
+	  r -= cy;
+	  STAT (err++);
+	}
+      if (mpn_cmp (rp, dp, dn) >= 0)
+	{
+	  /* This is executed with about 76% probability.  */
+	  mpn_incr_u (qp, 1);
+	  cy = mpn_sub_n (rp, rp, dp, dn);
+	  STAT (err++);
+	}
+
+      STAT (
+	    tot++;
+	    errarr[err]++;
+	    if (err > err_rec)
+	      err_rec = err;
+	    if (tot % 0x10000 == 0)
+	      {
+		for (i = 0; i <= err_rec; i++)
+		  printf ("  %d(%.1f%%)", errarr[i], 100.0*errarr[i]/tot);
+		printf ("\n");
+	      }
+	    );
+    }
+
+  /* FIXME: We should perhaps be somewhat more elegant in our rounding of the
+     quotient.  For now, just make sure the returned quotient is >= the real
+     quotient; add 3 with saturating arithmetic.  */
+  qn = nn - dn;
+  cy += mpn_add_1 (qp, qp, qn, 3);
+  if (cy != 0)
+    {
+      if (qh != 0)
+	{
+	  /* Return a quotient of just 1-bits, with qh set.  */
+	  mp_size_t i;
+	  for (i = 0; i < qn; i++)
+	    qp[i] = GMP_NUMB_MAX;
+	}
+      else
+	{
+	  /* Propagate carry into qh.  */
+	  qh = 1;
+	}
+    }
+
+  return qh;
+}
+
+/* In case k=0 (automatic choice), we distinguish 3 cases:
+   (a) dn < qn:         in = ceil(qn / ceil(qn/dn))
+   (b) dn/3 < qn <= dn: in = ceil(qn / 2)
+   (c) qn < dn/3:       in = qn
+   In all cases we have in <= dn.
+ */
+static mp_size_t
+mpn_mu_divappr_q_choose_in (mp_size_t qn, mp_size_t dn, int k)
+{
+  mp_size_t in;
+
+  if (k == 0)
+    {
+      mp_size_t b;
+      if (qn > dn)
+	{
+	  /* Compute an inverse size that is a nice partition of the quotient.  */
+	  b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
+	  in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+	}
+      else if (3 * qn > dn)
+	{
+	  in = (qn - 1) / 2 + 1;	/* b = 2 */
+	}
+      else
+	{
+	  in = (qn - 1) / 1 + 1;	/* b = 1 */
+	}
+    }
+  else
+    {
+      mp_size_t xn;
+      xn = MIN (dn, qn);
+      in = (xn - 1) / k + 1;
+    }
+
+  return in;
+}
+
+mp_size_t
+mpn_mu_divappr_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
+{
+  mp_size_t qn, in, itch_local, itch_out, itch_invapp;
+
+  qn = nn - dn;
+  if (qn + 1 < dn)
+    {
+      dn = qn + 1;
+    }
+  in = mpn_mu_divappr_q_choose_in (qn, dn, mua_k);
+
+  itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
+  itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+  itch_invapp = mpn_invertappr_itch (in + 1) + in + 2; /* 3in + 4 */
+
+  ASSERT (dn + itch_local + itch_out >= itch_invapp);
+  return in + MAX (dn + itch_local + itch_out, itch_invapp);
+}

diff --git a/third_party/gmp/mpn/generic/mul.c b/third_party/gmp/mpn/generic/mul.c
new file mode 100644
index 0000000..37444e9
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mul.c

@@ -0,0 +1,441 @@
+/* mpn_mul -- Multiply two natural numbers.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 1999-2003, 2005-2007, 2009, 2010, 2012,
+2014, 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+#ifndef MUL_BASECASE_MAX_UN
+#define MUL_BASECASE_MAX_UN 500
+#endif
+
+/* Areas where the different toom algorithms can be called (extracted
+   from the t-toom*.c files, and ignoring small constant offsets):
+
+   1/6  1/5 1/4 4/13 1/3 3/8 2/5 5/11 1/2 3/5 2/3 3/4 4/5   1 vn/un
+                                        4/7              6/7
+				       6/11
+                                       |--------------------| toom22 (small)
+                                                           || toom22 (large)
+                                                       |xxxx| toom22 called
+                      |-------------------------------------| toom32
+                                         |xxxxxxxxxxxxxxxx| | toom32 called
+                                               |------------| toom33
+                                                          |x| toom33 called
+             |---------------------------------|            | toom42
+	              |xxxxxxxxxxxxxxxxxxxxxxxx|            | toom42 called
+                                       |--------------------| toom43
+                                               |xxxxxxxxxx|   toom43 called
+         |-----------------------------|                      toom52 (unused)
+                                                   |--------| toom44
+						   |xxxxxxxx| toom44 called
+                              |--------------------|        | toom53
+                                        |xxxxxx|              toom53 called
+    |-------------------------|                               toom62 (unused)
+                                           |----------------| toom54 (unused)
+                      |--------------------|                  toom63
+	                      |xxxxxxxxx|                   | toom63 called
+                          |---------------------------------| toom6h
+						   |xxxxxxxx| toom6h called
+                                  |-------------------------| toom8h (32 bit)
+                 |------------------------------------------| toom8h (64 bit)
+						   |xxxxxxxx| toom8h called
+*/
+
+#define TOOM33_OK(an,bn) (6 + 2 * an < 3 * bn)
+#define TOOM44_OK(an,bn) (12 + 3 * an < 4 * bn)
+
+/* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v
+   (pointed to by VP, with VN limbs), and store the result at PRODP.  The
+   result is UN + VN limbs.  Return the most significant limb of the result.
+
+   NOTE: The space pointed to by PRODP is overwritten before finished with U
+   and V, so overlap is an error.
+
+   Argument constraints:
+   1. UN >= VN.
+   2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from
+      the multiplier and the multiplicand.  */
+
+/*
+  * The cutoff lines in the toomX2 and toomX3 code are now exactly between the
+    ideal lines of the surrounding algorithms.  Is that optimal?
+
+  * The toomX3 code now uses a structure similar to the one of toomX2, except
+    that it loops longer in the unbalanced case.  The result is that the
+    remaining area might have un < vn.  Should we fix the toomX2 code in a
+    similar way?
+
+  * The toomX3 code is used for the largest non-FFT unbalanced operands.  It
+    therefore calls mpn_mul recursively for certain cases.
+
+  * Allocate static temp space using THRESHOLD variables (except for toom44
+    when !WANT_FFT).  That way, we can typically have no TMP_ALLOC at all.
+
+  * We sort ToomX2 algorithms together, assuming the toom22, toom32, toom42
+    have the same vn threshold.  This is not true, we should actually use
+    mul_basecase for slightly larger operands for toom32 than for toom22, and
+    even larger for toom42.
+
+  * That problem is even more prevalent for toomX3.  We therefore use special
+    THRESHOLD variables there.
+*/
+
+mp_limb_t
+mpn_mul (mp_ptr prodp,
+	 mp_srcptr up, mp_size_t un,
+	 mp_srcptr vp, mp_size_t vn)
+{
+  ASSERT (un >= vn);
+  ASSERT (vn >= 1);
+  ASSERT (! MPN_OVERLAP_P (prodp, un+vn, up, un));
+  ASSERT (! MPN_OVERLAP_P (prodp, un+vn, vp, vn));
+
+  if (BELOW_THRESHOLD (un, MUL_TOOM22_THRESHOLD))
+    {
+      /* When un (and thus vn) is below the toom22 range, do mul_basecase.
+	 Test un and not vn here not to thwart the un >> vn code below.
+	 This special case is not necessary, but cuts the overhead for the
+	 smallest operands. */
+      mpn_mul_basecase (prodp, up, un, vp, vn);
+    }
+  else if (un == vn)
+    {
+      mpn_mul_n (prodp, up, vp, un);
+    }
+  else if (vn < MUL_TOOM22_THRESHOLD)
+    { /* plain schoolbook multiplication */
+
+      /* Unless un is very large, or else if have an applicable mpn_mul_N,
+	 perform basecase multiply directly.  */
+      if (un <= MUL_BASECASE_MAX_UN
+#if HAVE_NATIVE_mpn_mul_2
+	  || vn <= 2
+#else
+	  || vn == 1
+#endif
+	  )
+	mpn_mul_basecase (prodp, up, un, vp, vn);
+      else
+	{
+	  /* We have un >> MUL_BASECASE_MAX_UN > vn.  For better memory
+	     locality, split up[] into MUL_BASECASE_MAX_UN pieces and multiply
+	     these pieces with the vp[] operand.  After each such partial
+	     multiplication (but the last) we copy the most significant vn
+	     limbs into a temporary buffer since that part would otherwise be
+	     overwritten by the next multiplication.  After the next
+	     multiplication, we add it back.  This illustrates the situation:
+
+                                                    -->vn<--
+                                                      |  |<------- un ------->|
+                                                         _____________________|
+                                                        X                    /|
+                                                      /XX__________________/  |
+                                    _____________________                     |
+                                   X                    /                     |
+                                 /XX__________________/                       |
+               _____________________                                          |
+              /                    /                                          |
+            /____________________/                                            |
+	    ==================================================================
+
+	    The parts marked with X are the parts whose sums are copied into
+	    the temporary buffer.  */
+
+	  mp_limb_t tp[MUL_TOOM22_THRESHOLD_LIMIT];
+	  mp_limb_t cy;
+	  ASSERT (MUL_TOOM22_THRESHOLD <= MUL_TOOM22_THRESHOLD_LIMIT);
+
+	  mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
+	  prodp += MUL_BASECASE_MAX_UN;
+	  MPN_COPY (tp, prodp, vn);		/* preserve high triangle */
+	  up += MUL_BASECASE_MAX_UN;
+	  un -= MUL_BASECASE_MAX_UN;
+	  while (un > MUL_BASECASE_MAX_UN)
+	    {
+	      mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
+	      cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
+	      mpn_incr_u (prodp + vn, cy);
+	      prodp += MUL_BASECASE_MAX_UN;
+	      MPN_COPY (tp, prodp, vn);		/* preserve high triangle */
+	      up += MUL_BASECASE_MAX_UN;
+	      un -= MUL_BASECASE_MAX_UN;
+	    }
+	  if (un > vn)
+	    {
+	      mpn_mul_basecase (prodp, up, un, vp, vn);
+	    }
+	  else
+	    {
+	      ASSERT (un > 0);
+	      mpn_mul_basecase (prodp, vp, vn, up, un);
+	    }
+	  cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
+	  mpn_incr_u (prodp + vn, cy);
+	}
+    }
+  else if (BELOW_THRESHOLD (vn, MUL_TOOM33_THRESHOLD))
+    {
+      /* Use ToomX2 variants */
+      mp_ptr scratch;
+      TMP_SDECL; TMP_SMARK;
+
+#define ITCH_TOOMX2 (9 * vn / 2 + GMP_NUMB_BITS * 2)
+      scratch = TMP_SALLOC_LIMBS (ITCH_TOOMX2);
+      ASSERT (mpn_toom22_mul_itch ((5*vn-1)/4, vn) <= ITCH_TOOMX2); /* 5vn/2+ */
+      ASSERT (mpn_toom32_mul_itch ((7*vn-1)/4, vn) <= ITCH_TOOMX2); /* 7vn/6+ */
+      ASSERT (mpn_toom42_mul_itch (3 * vn - 1, vn) <= ITCH_TOOMX2); /* 9vn/2+ */
+#undef ITCH_TOOMX2
+
+      /* FIXME: This condition (repeated in the loop below) leaves from a vn*vn
+	 square to a (3vn-1)*vn rectangle.  Leaving such a rectangle is hardly
+	 wise; we would get better balance by slightly moving the bound.  We
+	 will sometimes end up with un < vn, like in the X3 arm below.  */
+      if (un >= 3 * vn)
+	{
+	  mp_limb_t cy;
+	  mp_ptr ws;
+
+	  /* The maximum ws usage is for the mpn_mul result.  */
+	  ws = TMP_SALLOC_LIMBS (4 * vn);
+
+	  mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
+	  un -= 2 * vn;
+	  up += 2 * vn;
+	  prodp += 2 * vn;
+
+	  while (un >= 3 * vn)
+	    {
+	      mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
+	      un -= 2 * vn;
+	      up += 2 * vn;
+	      cy = mpn_add_n (prodp, prodp, ws, vn);
+	      MPN_COPY (prodp + vn, ws + vn, 2 * vn);
+	      mpn_incr_u (prodp + vn, cy);
+	      prodp += 2 * vn;
+	    }
+
+	  /* vn <= un < 3vn */
+
+	  if (4 * un < 5 * vn)
+	    mpn_toom22_mul (ws, up, un, vp, vn, scratch);
+	  else if (4 * un < 7 * vn)
+	    mpn_toom32_mul (ws, up, un, vp, vn, scratch);
+	  else
+	    mpn_toom42_mul (ws, up, un, vp, vn, scratch);
+
+	  cy = mpn_add_n (prodp, prodp, ws, vn);
+	  MPN_COPY (prodp + vn, ws + vn, un);
+	  mpn_incr_u (prodp + vn, cy);
+	}
+      else
+	{
+	  if (4 * un < 5 * vn)
+	    mpn_toom22_mul (prodp, up, un, vp, vn, scratch);
+	  else if (4 * un < 7 * vn)
+	    mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
+	  else
+	    mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
+	}
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD ((un + vn) >> 1, MUL_FFT_THRESHOLD) ||
+	   BELOW_THRESHOLD (3 * vn, MUL_FFT_THRESHOLD))
+    {
+      /* Handle the largest operands that are not in the FFT range.  The 2nd
+	 condition makes very unbalanced operands avoid the FFT code (except
+	 perhaps as coefficient products of the Toom code.  */
+
+      if (BELOW_THRESHOLD (vn, MUL_TOOM44_THRESHOLD) || !TOOM44_OK (un, vn))
+	{
+	  /* Use ToomX3 variants */
+	  mp_ptr scratch;
+	  TMP_DECL; TMP_MARK;
+
+#define ITCH_TOOMX3 (4 * vn + GMP_NUMB_BITS)
+	  scratch = TMP_ALLOC_LIMBS (ITCH_TOOMX3);
+	  ASSERT (mpn_toom33_mul_itch ((7*vn-1)/6, vn) <= ITCH_TOOMX3); /* 7vn/2+ */
+	  ASSERT (mpn_toom43_mul_itch ((3*vn-1)/2, vn) <= ITCH_TOOMX3); /* 9vn/4+ */
+	  ASSERT (mpn_toom32_mul_itch ((7*vn-1)/4, vn) <= ITCH_TOOMX3); /* 7vn/6+ */
+	  ASSERT (mpn_toom53_mul_itch ((11*vn-1)/6, vn) <= ITCH_TOOMX3); /* 11vn/3+ */
+	  ASSERT (mpn_toom42_mul_itch ((5*vn-1)/2, vn) <= ITCH_TOOMX3); /* 15vn/4+ */
+	  ASSERT (mpn_toom63_mul_itch ((5*vn-1)/2, vn) <= ITCH_TOOMX3); /* 15vn/4+ */
+#undef ITCH_TOOMX3
+
+	  if (2 * un >= 5 * vn)
+	    {
+	      mp_limb_t cy;
+	      mp_ptr ws;
+
+	      /* The maximum ws usage is for the mpn_mul result.  */
+	      ws = TMP_ALLOC_LIMBS (7 * vn >> 1);
+
+	      if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
+		mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
+	      else
+		mpn_toom63_mul (prodp, up, 2 * vn, vp, vn, scratch);
+	      un -= 2 * vn;
+	      up += 2 * vn;
+	      prodp += 2 * vn;
+
+	      while (2 * un >= 5 * vn)	/* un >= 2.5vn */
+		{
+		  if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
+		    mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
+		  else
+		    mpn_toom63_mul (ws, up, 2 * vn, vp, vn, scratch);
+		  un -= 2 * vn;
+		  up += 2 * vn;
+		  cy = mpn_add_n (prodp, prodp, ws, vn);
+		  MPN_COPY (prodp + vn, ws + vn, 2 * vn);
+		  mpn_incr_u (prodp + vn, cy);
+		  prodp += 2 * vn;
+		}
+
+	      /* vn / 2 <= un < 2.5vn */
+
+	      if (un < vn)
+		mpn_mul (ws, vp, vn, up, un);
+	      else
+		mpn_mul (ws, up, un, vp, vn);
+
+	      cy = mpn_add_n (prodp, prodp, ws, vn);
+	      MPN_COPY (prodp + vn, ws + vn, un);
+	      mpn_incr_u (prodp + vn, cy);
+	    }
+	  else
+	    {
+	      if (6 * un < 7 * vn)
+		mpn_toom33_mul (prodp, up, un, vp, vn, scratch);
+	      else if (2 * un < 3 * vn)
+		{
+		  if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM43_THRESHOLD))
+		    mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
+		  else
+		    mpn_toom43_mul (prodp, up, un, vp, vn, scratch);
+		}
+	      else if (6 * un < 11 * vn)
+		{
+		  if (4 * un < 7 * vn)
+		    {
+		      if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM53_THRESHOLD))
+			mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
+		      else
+			mpn_toom53_mul (prodp, up, un, vp, vn, scratch);
+		    }
+		  else
+		    {
+		      if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM53_THRESHOLD))
+			mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
+		      else
+			mpn_toom53_mul (prodp, up, un, vp, vn, scratch);
+		    }
+		}
+	      else
+		{
+		  if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
+		    mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
+		  else
+		    mpn_toom63_mul (prodp, up, un, vp, vn, scratch);
+		}
+	    }
+	  TMP_FREE;
+	}
+      else
+	{
+	  mp_ptr scratch;
+	  TMP_DECL; TMP_MARK;
+
+	  if (BELOW_THRESHOLD (vn, MUL_TOOM6H_THRESHOLD))
+	    {
+	      scratch = TMP_SALLOC_LIMBS (mpn_toom44_mul_itch (un, vn));
+	      mpn_toom44_mul (prodp, up, un, vp, vn, scratch);
+	    }
+	  else if (BELOW_THRESHOLD (vn, MUL_TOOM8H_THRESHOLD))
+	    {
+	      scratch = TMP_SALLOC_LIMBS (mpn_toom6h_mul_itch (un, vn));
+	      mpn_toom6h_mul (prodp, up, un, vp, vn, scratch);
+	    }
+	  else
+	    {
+	      scratch = TMP_ALLOC_LIMBS (mpn_toom8h_mul_itch (un, vn));
+	      mpn_toom8h_mul (prodp, up, un, vp, vn, scratch);
+	    }
+	  TMP_FREE;
+	}
+    }
+  else
+    {
+      if (un >= 8 * vn)
+	{
+	  mp_limb_t cy;
+	  mp_ptr ws;
+	  TMP_DECL; TMP_MARK;
+
+	  /* The maximum ws usage is for the mpn_mul result.  */
+	  ws = TMP_BALLOC_LIMBS (9 * vn >> 1);
+
+	  mpn_fft_mul (prodp, up, 3 * vn, vp, vn);
+	  un -= 3 * vn;
+	  up += 3 * vn;
+	  prodp += 3 * vn;
+
+	  while (2 * un >= 7 * vn)	/* un >= 3.5vn  */
+	    {
+	      mpn_fft_mul (ws, up, 3 * vn, vp, vn);
+	      un -= 3 * vn;
+	      up += 3 * vn;
+	      cy = mpn_add_n (prodp, prodp, ws, vn);
+	      MPN_COPY (prodp + vn, ws + vn, 3 * vn);
+	      mpn_incr_u (prodp + vn, cy);
+	      prodp += 3 * vn;
+	    }
+
+	  /* vn / 2 <= un < 3.5vn */
+
+	  if (un < vn)
+	    mpn_mul (ws, vp, vn, up, un);
+	  else
+	    mpn_mul (ws, up, un, vp, vn);
+
+	  cy = mpn_add_n (prodp, prodp, ws, vn);
+	  MPN_COPY (prodp + vn, ws + vn, un);
+	  mpn_incr_u (prodp + vn, cy);
+
+	  TMP_FREE;
+	}
+      else
+	mpn_fft_mul (prodp, up, un, vp, vn);
+    }
+
+  return prodp[un + vn - 1];	/* historic */
+}

diff --git a/third_party/gmp/mpn/generic/mul_1.c b/third_party/gmp/mpn/generic/mul_1.c
new file mode 100644
index 0000000..52d46da
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mul_1.c

@@ -0,0 +1,96 @@
+/* mpn_mul_1 -- Multiply a limb vector with a single limb and store the
+   product in a second limb vector.
+
+Copyright 1991-1994, 1996, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t ul, cl, hpl, lpl;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+
+  cl = 0;
+  do
+    {
+      ul = *up++;
+      umul_ppmm (hpl, lpl, ul, vl);
+
+      lpl += cl;
+      cl = (lpl < cl) + hpl;
+
+      *rp++ = lpl;
+    }
+  while (--n != 0);
+
+  return cl;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 1
+
+mp_limb_t
+mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t shifted_vl, ul, lpl, hpl, prev_hpl, xw, cl, xl;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (vl);
+
+  shifted_vl = vl << GMP_NAIL_BITS;
+  cl = 0;
+  prev_hpl = 0;
+  do
+    {
+      ul = *up++;
+
+      umul_ppmm (hpl, lpl, ul, shifted_vl);
+      lpl >>= GMP_NAIL_BITS;
+      xw = prev_hpl + lpl + cl;
+      cl = xw >> GMP_NUMB_BITS;
+      xl = xw & GMP_NUMB_MASK;
+      *rp++ = xl;
+      prev_hpl = hpl;
+    }
+  while (--n != 0);
+
+  return prev_hpl + cl;
+}
+
+#endif

diff --git a/third_party/gmp/mpn/generic/mul_basecase.c b/third_party/gmp/mpn/generic/mul_basecase.c
new file mode 100644
index 0000000..2487fba
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mul_basecase.c

@@ -0,0 +1,165 @@
+/* mpn_mul_basecase -- Internal routine to multiply two natural numbers
+   of length m and n.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright 1991-1994, 1996, 1997, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Multiply {up,usize} by {vp,vsize} and write the result to
+   {prodp,usize+vsize}.  Must have usize>=vsize.
+
+   Note that prodp gets usize+vsize limbs stored, even if the actual result
+   only needs usize+vsize-1.
+
+   There's no good reason to call here with vsize>=MUL_TOOM22_THRESHOLD.
+   Currently this is allowed, but it might not be in the future.
+
+   This is the most critical code for multiplication.  All multiplies rely
+   on this, both small and huge.  Small ones arrive here immediately, huge
+   ones arrive here as this is the base case for Karatsuba's recursive
+   algorithm.  */
+
+void
+mpn_mul_basecase (mp_ptr rp,
+		  mp_srcptr up, mp_size_t un,
+		  mp_srcptr vp, mp_size_t vn)
+{
+  ASSERT (un >= vn);
+  ASSERT (vn >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, un+vn, up, un));
+  ASSERT (! MPN_OVERLAP_P (rp, un+vn, vp, vn));
+
+  /* We first multiply by the low order limb (or depending on optional function
+     availability, limbs).  This result can be stored, not added, to rp.  We
+     also avoid a loop for zeroing this way.  */
+
+#if HAVE_NATIVE_mpn_mul_2
+  if (vn >= 2)
+    {
+      rp[un + 1] = mpn_mul_2 (rp, up, un, vp);
+      rp += 2, vp += 2, vn -= 2;
+    }
+  else
+    {
+      rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
+      return;
+    }
+#else
+  rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
+  rp += 1, vp += 1, vn -= 1;
+#endif
+
+  /* Now accumulate the product of up[] and the next higher limb (or depending
+     on optional function availability, limbs) from vp[].  */
+
+#define MAX_LEFT MP_SIZE_T_MAX	/* Used to simplify loops into if statements */
+
+
+#if HAVE_NATIVE_mpn_addmul_6
+  while (vn >= 6)
+    {
+      rp[un + 6 - 1] = mpn_addmul_6 (rp, up, un, vp);
+      if (MAX_LEFT == 6)
+	return;
+      rp += 6, vp += 6, vn -= 6;
+      if (MAX_LEFT < 2 * 6)
+	break;
+    }
+#undef MAX_LEFT
+#define MAX_LEFT (6 - 1)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_5
+  while (vn >= 5)
+    {
+      rp[un + 5 - 1] = mpn_addmul_5 (rp, up, un, vp);
+      if (MAX_LEFT == 5)
+	return;
+      rp += 5, vp += 5, vn -= 5;
+      if (MAX_LEFT < 2 * 5)
+	break;
+    }
+#undef MAX_LEFT
+#define MAX_LEFT (5 - 1)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_4
+  while (vn >= 4)
+    {
+      rp[un + 4 - 1] = mpn_addmul_4 (rp, up, un, vp);
+      if (MAX_LEFT == 4)
+	return;
+      rp += 4, vp += 4, vn -= 4;
+      if (MAX_LEFT < 2 * 4)
+	break;
+    }
+#undef MAX_LEFT
+#define MAX_LEFT (4 - 1)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_3
+  while (vn >= 3)
+    {
+      rp[un + 3 - 1] = mpn_addmul_3 (rp, up, un, vp);
+      if (MAX_LEFT == 3)
+	return;
+      rp += 3, vp += 3, vn -= 3;
+      if (MAX_LEFT < 2 * 3)
+	break;
+    }
+#undef MAX_LEFT
+#define MAX_LEFT (3 - 1)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_2
+  while (vn >= 2)
+    {
+      rp[un + 2 - 1] = mpn_addmul_2 (rp, up, un, vp);
+      if (MAX_LEFT == 2)
+	return;
+      rp += 2, vp += 2, vn -= 2;
+      if (MAX_LEFT < 2 * 2)
+	break;
+    }
+#undef MAX_LEFT
+#define MAX_LEFT (2 - 1)
+#endif
+
+  while (vn >= 1)
+    {
+      rp[un] = mpn_addmul_1 (rp, up, un, vp[0]);
+      if (MAX_LEFT == 1)
+	return;
+      rp += 1, vp += 1, vn -= 1;
+    }
+}

diff --git a/third_party/gmp/mpn/generic/mul_fft.c b/third_party/gmp/mpn/generic/mul_fft.c
new file mode 100644
index 0000000..df8ee63
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mul_fft.c

@@ -0,0 +1,1041 @@
+/* Schoenhage's fast multiplication modulo 2^N+1.
+
+   Contributed by Paul Zimmermann.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1998-2010, 2012, 2013, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/* References:
+
+   Schnelle Multiplikation grosser Zahlen, by Arnold Schoenhage and Volker
+   Strassen, Computing 7, p. 281-292, 1971.
+
+   Asymptotically fast algorithms for the numerical multiplication and division
+   of polynomials with complex coefficients, by Arnold Schoenhage, Computer
+   Algebra, EUROCAM'82, LNCS 144, p. 3-15, 1982.
+
+   Tapes versus Pointers, a study in implementing fast algorithms, by Arnold
+   Schoenhage, Bulletin of the EATCS, 30, p. 23-32, 1986.
+
+   TODO:
+
+   Implement some of the tricks published at ISSAC'2007 by Gaudry, Kruppa, and
+   Zimmermann.
+
+   It might be possible to avoid a small number of MPN_COPYs by using a
+   rotating temporary or two.
+
+   Cleanup and simplify the code!
+*/
+
+#ifdef TRACE
+#undef TRACE
+#define TRACE(x) x
+#include <stdio.h>
+#else
+#define TRACE(x)
+#endif
+
+#include "gmp-impl.h"
+
+#ifdef WANT_ADDSUB
+#include "generic/add_n_sub_n.c"
+#define HAVE_NATIVE_mpn_add_n_sub_n 1
+#endif
+
+static mp_limb_t mpn_mul_fft_internal (mp_ptr, mp_size_t, int, mp_ptr *,
+				       mp_ptr *, mp_ptr, mp_ptr, mp_size_t,
+				       mp_size_t, mp_size_t, int **, mp_ptr, int);
+static void mpn_mul_fft_decompose (mp_ptr, mp_ptr *, mp_size_t, mp_size_t, mp_srcptr,
+				   mp_size_t, mp_size_t, mp_size_t, mp_ptr);
+
+
+/* Find the best k to use for a mod 2^(m*GMP_NUMB_BITS)+1 FFT for m >= n.
+   We have sqr=0 if for a multiply, sqr=1 for a square.
+   There are three generations of this code; we keep the old ones as long as
+   some gmp-mparam.h is not updated.  */
+
+
+/*****************************************************************************/
+
+#if TUNE_PROGRAM_BUILD || (defined (MUL_FFT_TABLE3) && defined (SQR_FFT_TABLE3))
+
+#ifndef FFT_TABLE3_SIZE		/* When tuning this is defined in gmp-impl.h */
+#if defined (MUL_FFT_TABLE3_SIZE) && defined (SQR_FFT_TABLE3_SIZE)
+#if MUL_FFT_TABLE3_SIZE > SQR_FFT_TABLE3_SIZE
+#define FFT_TABLE3_SIZE MUL_FFT_TABLE3_SIZE
+#else
+#define FFT_TABLE3_SIZE SQR_FFT_TABLE3_SIZE
+#endif
+#endif
+#endif
+
+#ifndef FFT_TABLE3_SIZE
+#define FFT_TABLE3_SIZE 200
+#endif
+
+FFT_TABLE_ATTRS struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE] =
+{
+  MUL_FFT_TABLE3,
+  SQR_FFT_TABLE3
+};
+
+int
+mpn_fft_best_k (mp_size_t n, int sqr)
+{
+  const struct fft_table_nk *fft_tab, *tab;
+  mp_size_t tab_n, thres;
+  int last_k;
+
+  fft_tab = mpn_fft_table3[sqr];
+  last_k = fft_tab->k;
+  for (tab = fft_tab + 1; ; tab++)
+    {
+      tab_n = tab->n;
+      thres = tab_n << last_k;
+      if (n <= thres)
+	break;
+      last_k = tab->k;
+    }
+  return last_k;
+}
+
+#define MPN_FFT_BEST_READY 1
+#endif
+
+/*****************************************************************************/
+
+#if ! defined (MPN_FFT_BEST_READY)
+FFT_TABLE_ATTRS mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE] =
+{
+  MUL_FFT_TABLE,
+  SQR_FFT_TABLE
+};
+
+int
+mpn_fft_best_k (mp_size_t n, int sqr)
+{
+  int i;
+
+  for (i = 0; mpn_fft_table[sqr][i] != 0; i++)
+    if (n < mpn_fft_table[sqr][i])
+      return i + FFT_FIRST_K;
+
+  /* treat 4*last as one further entry */
+  if (i == 0 || n < 4 * mpn_fft_table[sqr][i - 1])
+    return i + FFT_FIRST_K;
+  else
+    return i + FFT_FIRST_K + 1;
+}
+#endif
+
+/*****************************************************************************/
+
+
+/* Returns smallest possible number of limbs >= pl for a fft of size 2^k,
+   i.e. smallest multiple of 2^k >= pl.
+
+   Don't declare static: needed by tuneup.
+*/
+
+mp_size_t
+mpn_fft_next_size (mp_size_t pl, int k)
+{
+  pl = 1 + ((pl - 1) >> k); /* ceil (pl/2^k) */
+  return pl << k;
+}
+
+
+/* Initialize l[i][j] with bitrev(j) */
+static void
+mpn_fft_initl (int **l, int k)
+{
+  int i, j, K;
+  int *li;
+
+  l[0][0] = 0;
+  for (i = 1, K = 1; i <= k; i++, K *= 2)
+    {
+      li = l[i];
+      for (j = 0; j < K; j++)
+	{
+	  li[j] = 2 * l[i - 1][j];
+	  li[K + j] = 1 + li[j];
+	}
+    }
+}
+
+
+/* r <- a*2^d mod 2^(n*GMP_NUMB_BITS)+1 with a = {a, n+1}
+   Assumes a is semi-normalized, i.e. a[n] <= 1.
+   r and a must have n+1 limbs, and not overlap.
+*/
+static void
+mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, mp_bitcnt_t d, mp_size_t n)
+{
+  unsigned int sh;
+  mp_size_t m;
+  mp_limb_t cc, rd;
+
+  sh = d % GMP_NUMB_BITS;
+  m = d / GMP_NUMB_BITS;
+
+  if (m >= n)			/* negate */
+    {
+      /* r[0..m-1]  <-- lshift(a[n-m]..a[n-1], sh)
+	 r[m..n-1]  <-- -lshift(a[0]..a[n-m-1],  sh) */
+
+      m -= n;
+      if (sh != 0)
+	{
+	  /* no out shift below since a[n] <= 1 */
+	  mpn_lshift (r, a + n - m, m + 1, sh);
+	  rd = r[m];
+	  cc = mpn_lshiftc (r + m, a, n - m, sh);
+	}
+      else
+	{
+	  MPN_COPY (r, a + n - m, m);
+	  rd = a[n];
+	  mpn_com (r + m, a, n - m);
+	  cc = 0;
+	}
+
+      /* add cc to r[0], and add rd to r[m] */
+
+      /* now add 1 in r[m], subtract 1 in r[n], i.e. add 1 in r[0] */
+
+      r[n] = 0;
+      /* cc < 2^sh <= 2^(GMP_NUMB_BITS-1) thus no overflow here */
+      cc++;
+      mpn_incr_u (r, cc);
+
+      rd++;
+      /* rd might overflow when sh=GMP_NUMB_BITS-1 */
+      cc = (rd == 0) ? 1 : rd;
+      r = r + m + (rd == 0);
+      mpn_incr_u (r, cc);
+    }
+  else
+    {
+      /* r[0..m-1]  <-- -lshift(a[n-m]..a[n-1], sh)
+	 r[m..n-1]  <-- lshift(a[0]..a[n-m-1],  sh)  */
+      if (sh != 0)
+	{
+	  /* no out bits below since a[n] <= 1 */
+	  mpn_lshiftc (r, a + n - m, m + 1, sh);
+	  rd = ~r[m];
+	  /* {r, m+1} = {a+n-m, m+1} << sh */
+	  cc = mpn_lshift (r + m, a, n - m, sh); /* {r+m, n-m} = {a, n-m}<<sh */
+	}
+      else
+	{
+	  /* r[m] is not used below, but we save a test for m=0 */
+	  mpn_com (r, a + n - m, m + 1);
+	  rd = a[n];
+	  MPN_COPY (r + m, a, n - m);
+	  cc = 0;
+	}
+
+      /* now complement {r, m}, subtract cc from r[0], subtract rd from r[m] */
+
+      /* if m=0 we just have r[0]=a[n] << sh */
+      if (m != 0)
+	{
+	  /* now add 1 in r[0], subtract 1 in r[m] */
+	  if (cc-- == 0) /* then add 1 to r[0] */
+	    cc = mpn_add_1 (r, r, n, CNST_LIMB(1));
+	  cc = mpn_sub_1 (r, r, m, cc) + 1;
+	  /* add 1 to cc instead of rd since rd might overflow */
+	}
+
+      /* now subtract cc and rd from r[m..n] */
+
+      r[n] = -mpn_sub_1 (r + m, r + m, n - m, cc);
+      r[n] -= mpn_sub_1 (r + m, r + m, n - m, rd);
+      if (r[n] & GMP_LIMB_HIGHBIT)
+	r[n] = mpn_add_1 (r, r, n, CNST_LIMB(1));
+    }
+}
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+static inline void
+mpn_fft_add_sub_modF (mp_ptr A0, mp_ptr Ai, mp_srcptr tp, mp_size_t n)
+{
+  mp_limb_t cyas, c, x;
+
+  cyas = mpn_add_n_sub_n (A0, Ai, A0, tp, n);
+
+  c = A0[n] - tp[n] - (cyas & 1);
+  x = (-c) & -((c & GMP_LIMB_HIGHBIT) != 0);
+  Ai[n] = x + c;
+  MPN_INCR_U (Ai, n + 1, x);
+
+  c = A0[n] + tp[n] + (cyas >> 1);
+  x = (c - 1) & -(c != 0);
+  A0[n] = c - x;
+  MPN_DECR_U (A0, n + 1, x);
+}
+
+#else /* ! HAVE_NATIVE_mpn_add_n_sub_n  */
+
+/* r <- a+b mod 2^(n*GMP_NUMB_BITS)+1.
+   Assumes a and b are semi-normalized.
+*/
+static inline void
+mpn_fft_add_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n)
+{
+  mp_limb_t c, x;
+
+  c = a[n] + b[n] + mpn_add_n (r, a, b, n);
+  /* 0 <= c <= 3 */
+
+#if 1
+  /* GCC 4.1 outsmarts most expressions here, and generates a 50% branch.  The
+     result is slower code, of course.  But the following outsmarts GCC.  */
+  x = (c - 1) & -(c != 0);
+  r[n] = c - x;
+  MPN_DECR_U (r, n + 1, x);
+#endif
+#if 0
+  if (c > 1)
+    {
+      r[n] = 1;                       /* r[n] - c = 1 */
+      MPN_DECR_U (r, n + 1, c - 1);
+    }
+  else
+    {
+      r[n] = c;
+    }
+#endif
+}
+
+/* r <- a-b mod 2^(n*GMP_NUMB_BITS)+1.
+   Assumes a and b are semi-normalized.
+*/
+static inline void
+mpn_fft_sub_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n)
+{
+  mp_limb_t c, x;
+
+  c = a[n] - b[n] - mpn_sub_n (r, a, b, n);
+  /* -2 <= c <= 1 */
+
+#if 1
+  /* GCC 4.1 outsmarts most expressions here, and generates a 50% branch.  The
+     result is slower code, of course.  But the following outsmarts GCC.  */
+  x = (-c) & -((c & GMP_LIMB_HIGHBIT) != 0);
+  r[n] = x + c;
+  MPN_INCR_U (r, n + 1, x);
+#endif
+#if 0
+  if ((c & GMP_LIMB_HIGHBIT) != 0)
+    {
+      r[n] = 0;
+      MPN_INCR_U (r, n + 1, -c);
+    }
+  else
+    {
+      r[n] = c;
+    }
+#endif
+}
+#endif /* HAVE_NATIVE_mpn_add_n_sub_n */
+
+/* input: A[0] ... A[inc*(K-1)] are residues mod 2^N+1 where
+	  N=n*GMP_NUMB_BITS, and 2^omega is a primitive root mod 2^N+1
+   output: A[inc*l[k][i]] <- \sum (2^omega)^(ij) A[inc*j] mod 2^N+1 */
+
+static void
+mpn_fft_fft (mp_ptr *Ap, mp_size_t K, int **ll,
+	     mp_size_t omega, mp_size_t n, mp_size_t inc, mp_ptr tp)
+{
+  if (K == 2)
+    {
+      mp_limb_t cy;
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      cy = mpn_add_n_sub_n (Ap[0], Ap[inc], Ap[0], Ap[inc], n + 1) & 1;
+#else
+      MPN_COPY (tp, Ap[0], n + 1);
+      mpn_add_n (Ap[0], Ap[0], Ap[inc], n + 1);
+      cy = mpn_sub_n (Ap[inc], tp, Ap[inc], n + 1);
+#endif
+      if (Ap[0][n] > 1) /* can be 2 or 3 */
+	Ap[0][n] = 1 - mpn_sub_1 (Ap[0], Ap[0], n, Ap[0][n] - 1);
+      if (cy) /* Ap[inc][n] can be -1 or -2 */
+	Ap[inc][n] = mpn_add_1 (Ap[inc], Ap[inc], n, ~Ap[inc][n] + 1);
+    }
+  else
+    {
+      mp_size_t j, K2 = K >> 1;
+      int *lk = *ll;
+
+      mpn_fft_fft (Ap,     K2, ll-1, 2 * omega, n, inc * 2, tp);
+      mpn_fft_fft (Ap+inc, K2, ll-1, 2 * omega, n, inc * 2, tp);
+      /* A[2*j*inc]   <- A[2*j*inc] + omega^l[k][2*j*inc] A[(2j+1)inc]
+	 A[(2j+1)inc] <- A[2*j*inc] + omega^l[k][(2j+1)inc] A[(2j+1)inc] */
+      for (j = 0; j < K2; j++, lk += 2, Ap += 2 * inc)
+	{
+	  /* Ap[inc] <- Ap[0] + Ap[inc] * 2^(lk[1] * omega)
+	     Ap[0]   <- Ap[0] + Ap[inc] * 2^(lk[0] * omega) */
+	  mpn_fft_mul_2exp_modF (tp, Ap[inc], lk[0] * omega, n);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+	  mpn_fft_add_sub_modF (Ap[0], Ap[inc], tp, n);
+#else
+	  mpn_fft_sub_modF (Ap[inc], Ap[0], tp, n);
+	  mpn_fft_add_modF (Ap[0],   Ap[0], tp, n);
+#endif
+	}
+    }
+}
+
+/* input: A[0] ... A[inc*(K-1)] are residues mod 2^N+1 where
+	  N=n*GMP_NUMB_BITS, and 2^omega is a primitive root mod 2^N+1
+   output: A[inc*l[k][i]] <- \sum (2^omega)^(ij) A[inc*j] mod 2^N+1
+   tp must have space for 2*(n+1) limbs.
+*/
+
+
+/* Given ap[0..n] with ap[n]<=1, reduce it modulo 2^(n*GMP_NUMB_BITS)+1,
+   by subtracting that modulus if necessary.
+
+   If ap[0..n] is exactly 2^(n*GMP_NUMB_BITS) then mpn_sub_1 produces a
+   borrow and the limbs must be zeroed out again.  This will occur very
+   infrequently.  */
+
+static inline void
+mpn_fft_normalize (mp_ptr ap, mp_size_t n)
+{
+  if (ap[n] != 0)
+    {
+      MPN_DECR_U (ap, n + 1, CNST_LIMB(1));
+      if (ap[n] == 0)
+	{
+	  /* This happens with very low probability; we have yet to trigger it,
+	     and thereby make sure this code is correct.  */
+	  MPN_ZERO (ap, n);
+	  ap[n] = 1;
+	}
+      else
+	ap[n] = 0;
+    }
+}
+
+/* a[i] <- a[i]*b[i] mod 2^(n*GMP_NUMB_BITS)+1 for 0 <= i < K */
+static void
+mpn_fft_mul_modF_K (mp_ptr *ap, mp_ptr *bp, mp_size_t n, mp_size_t K)
+{
+  int i;
+  int sqr = (ap == bp);
+  TMP_DECL;
+
+  TMP_MARK;
+
+  if (n >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
+    {
+      mp_size_t K2, nprime2, Nprime2, M2, maxLK, l, Mp2;
+      int k;
+      int **fft_l, *tmp;
+      mp_ptr *Ap, *Bp, A, B, T;
+
+      k = mpn_fft_best_k (n, sqr);
+      K2 = (mp_size_t) 1 << k;
+      ASSERT_ALWAYS((n & (K2 - 1)) == 0);
+      maxLK = (K2 > GMP_NUMB_BITS) ? K2 : GMP_NUMB_BITS;
+      M2 = n * GMP_NUMB_BITS >> k;
+      l = n >> k;
+      Nprime2 = ((2 * M2 + k + 2 + maxLK) / maxLK) * maxLK;
+      /* Nprime2 = ceil((2*M2+k+3)/maxLK)*maxLK*/
+      nprime2 = Nprime2 / GMP_NUMB_BITS;
+
+      /* we should ensure that nprime2 is a multiple of the next K */
+      if (nprime2 >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
+	{
+	  mp_size_t K3;
+	  for (;;)
+	    {
+	      K3 = (mp_size_t) 1 << mpn_fft_best_k (nprime2, sqr);
+	      if ((nprime2 & (K3 - 1)) == 0)
+		break;
+	      nprime2 = (nprime2 + K3 - 1) & -K3;
+	      Nprime2 = nprime2 * GMP_LIMB_BITS;
+	      /* warning: since nprime2 changed, K3 may change too! */
+	    }
+	}
+      ASSERT_ALWAYS(nprime2 < n); /* otherwise we'll loop */
+
+      Mp2 = Nprime2 >> k;
+
+      Ap = TMP_BALLOC_MP_PTRS (K2);
+      Bp = TMP_BALLOC_MP_PTRS (K2);
+      A = TMP_BALLOC_LIMBS (2 * (nprime2 + 1) << k);
+      T = TMP_BALLOC_LIMBS (2 * (nprime2 + 1));
+      B = A + ((nprime2 + 1) << k);
+      fft_l = TMP_BALLOC_TYPE (k + 1, int *);
+      tmp = TMP_BALLOC_TYPE ((size_t) 2 << k, int);
+      for (i = 0; i <= k; i++)
+	{
+	  fft_l[i] = tmp;
+	  tmp += (mp_size_t) 1 << i;
+	}
+
+      mpn_fft_initl (fft_l, k);
+
+      TRACE (printf ("recurse: %ldx%ld limbs -> %ld times %ldx%ld (%1.2f)\n", n,
+		    n, K2, nprime2, nprime2, 2.0*(double)n/nprime2/K2));
+      for (i = 0; i < K; i++, ap++, bp++)
+	{
+	  mp_limb_t cy;
+	  mpn_fft_normalize (*ap, n);
+	  if (!sqr)
+	    mpn_fft_normalize (*bp, n);
+
+	  mpn_mul_fft_decompose (A, Ap, K2, nprime2, *ap, (l << k) + 1, l, Mp2, T);
+	  if (!sqr)
+	    mpn_mul_fft_decompose (B, Bp, K2, nprime2, *bp, (l << k) + 1, l, Mp2, T);
+
+	  cy = mpn_mul_fft_internal (*ap, n, k, Ap, Bp, A, B, nprime2,
+				     l, Mp2, fft_l, T, sqr);
+	  (*ap)[n] = cy;
+	}
+    }
+  else
+    {
+      mp_ptr a, b, tp, tpn;
+      mp_limb_t cc;
+      mp_size_t n2 = 2 * n;
+      tp = TMP_BALLOC_LIMBS (n2);
+      tpn = tp + n;
+      TRACE (printf ("  mpn_mul_n %ld of %ld limbs\n", K, n));
+      for (i = 0; i < K; i++)
+	{
+	  a = *ap++;
+	  b = *bp++;
+	  if (sqr)
+	    mpn_sqr (tp, a, n);
+	  else
+	    mpn_mul_n (tp, b, a, n);
+	  if (a[n] != 0)
+	    cc = mpn_add_n (tpn, tpn, b, n);
+	  else
+	    cc = 0;
+	  if (b[n] != 0)
+	    cc += mpn_add_n (tpn, tpn, a, n) + a[n];
+	  if (cc != 0)
+	    {
+	      /* FIXME: use MPN_INCR_U here, since carry is not expected.  */
+	      cc = mpn_add_1 (tp, tp, n2, cc);
+	      ASSERT (cc == 0);
+	    }
+	  a[n] = mpn_sub_n (a, tp, tpn, n) && mpn_add_1 (a, a, n, CNST_LIMB(1));
+	}
+    }
+  TMP_FREE;
+}
+
+
+/* input: A^[l[k][0]] A^[l[k][1]] ... A^[l[k][K-1]]
+   output: K*A[0] K*A[K-1] ... K*A[1].
+   Assumes the Ap[] are pseudo-normalized, i.e. 0 <= Ap[][n] <= 1.
+   This condition is also fulfilled at exit.
+*/
+static void
+mpn_fft_fftinv (mp_ptr *Ap, mp_size_t K, mp_size_t omega, mp_size_t n, mp_ptr tp)
+{
+  if (K == 2)
+    {
+      mp_limb_t cy;
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      cy = mpn_add_n_sub_n (Ap[0], Ap[1], Ap[0], Ap[1], n + 1) & 1;
+#else
+      MPN_COPY (tp, Ap[0], n + 1);
+      mpn_add_n (Ap[0], Ap[0], Ap[1], n + 1);
+      cy = mpn_sub_n (Ap[1], tp, Ap[1], n + 1);
+#endif
+      if (Ap[0][n] > 1) /* can be 2 or 3 */
+	Ap[0][n] = 1 - mpn_sub_1 (Ap[0], Ap[0], n, Ap[0][n] - 1);
+      if (cy) /* Ap[1][n] can be -1 or -2 */
+	Ap[1][n] = mpn_add_1 (Ap[1], Ap[1], n, ~Ap[1][n] + 1);
+    }
+  else
+    {
+      mp_size_t j, K2 = K >> 1;
+
+      mpn_fft_fftinv (Ap,      K2, 2 * omega, n, tp);
+      mpn_fft_fftinv (Ap + K2, K2, 2 * omega, n, tp);
+      /* A[j]     <- A[j] + omega^j A[j+K/2]
+	 A[j+K/2] <- A[j] + omega^(j+K/2) A[j+K/2] */
+      for (j = 0; j < K2; j++, Ap++)
+	{
+	  /* Ap[K2] <- Ap[0] + Ap[K2] * 2^((j + K2) * omega)
+	     Ap[0]  <- Ap[0] + Ap[K2] * 2^(j * omega) */
+	  mpn_fft_mul_2exp_modF (tp, Ap[K2], j * omega, n);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+	  mpn_fft_add_sub_modF (Ap[0], Ap[K2], tp, n);
+#else
+	  mpn_fft_sub_modF (Ap[K2], Ap[0], tp, n);
+	  mpn_fft_add_modF (Ap[0],  Ap[0], tp, n);
+#endif
+	}
+    }
+}
+
+
+/* R <- A/2^k mod 2^(n*GMP_NUMB_BITS)+1 */
+static void
+mpn_fft_div_2exp_modF (mp_ptr r, mp_srcptr a, mp_bitcnt_t k, mp_size_t n)
+{
+  mp_bitcnt_t i;
+
+  ASSERT (r != a);
+  i = (mp_bitcnt_t) 2 * n * GMP_NUMB_BITS - k;
+  mpn_fft_mul_2exp_modF (r, a, i, n);
+  /* 1/2^k = 2^(2nL-k) mod 2^(n*GMP_NUMB_BITS)+1 */
+  /* normalize so that R < 2^(n*GMP_NUMB_BITS)+1 */
+  mpn_fft_normalize (r, n);
+}
+
+
+/* {rp,n} <- {ap,an} mod 2^(n*GMP_NUMB_BITS)+1, n <= an <= 3*n.
+   Returns carry out, i.e. 1 iff {ap,an} = -1 mod 2^(n*GMP_NUMB_BITS)+1,
+   then {rp,n}=0.
+*/
+static mp_size_t
+mpn_fft_norm_modF (mp_ptr rp, mp_size_t n, mp_ptr ap, mp_size_t an)
+{
+  mp_size_t l, m, rpn;
+  mp_limb_t cc;
+
+  ASSERT ((n <= an) && (an <= 3 * n));
+  m = an - 2 * n;
+  if (m > 0)
+    {
+      l = n;
+      /* add {ap, m} and {ap+2n, m} in {rp, m} */
+      cc = mpn_add_n (rp, ap, ap + 2 * n, m);
+      /* copy {ap+m, n-m} to {rp+m, n-m} */
+      rpn = mpn_add_1 (rp + m, ap + m, n - m, cc);
+    }
+  else
+    {
+      l = an - n; /* l <= n */
+      MPN_COPY (rp, ap, n);
+      rpn = 0;
+    }
+
+  /* remains to subtract {ap+n, l} from {rp, n+1} */
+  cc = mpn_sub_n (rp, rp, ap + n, l);
+  rpn -= mpn_sub_1 (rp + l, rp + l, n - l, cc);
+  if (rpn < 0) /* necessarily rpn = -1 */
+    rpn = mpn_add_1 (rp, rp, n, CNST_LIMB(1));
+  return rpn;
+}
+
+/* store in A[0..nprime] the first M bits from {n, nl},
+   in A[nprime+1..] the following M bits, ...
+   Assumes M is a multiple of GMP_NUMB_BITS (M = l * GMP_NUMB_BITS).
+   T must have space for at least (nprime + 1) limbs.
+   We must have nl <= 2*K*l.
+*/
+static void
+mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, mp_size_t K, mp_size_t nprime,
+		       mp_srcptr n, mp_size_t nl, mp_size_t l, mp_size_t Mp,
+		       mp_ptr T)
+{
+  mp_size_t i, j;
+  mp_ptr tmp;
+  mp_size_t Kl = K * l;
+  TMP_DECL;
+  TMP_MARK;
+
+  if (nl > Kl) /* normalize {n, nl} mod 2^(Kl*GMP_NUMB_BITS)+1 */
+    {
+      mp_size_t dif = nl - Kl;
+      mp_limb_signed_t cy;
+
+      tmp = TMP_BALLOC_LIMBS(Kl + 1);
+
+      if (dif > Kl)
+	{
+	  int subp = 0;
+
+	  cy = mpn_sub_n (tmp, n, n + Kl, Kl);
+	  n += 2 * Kl;
+	  dif -= Kl;
+
+	  /* now dif > 0 */
+	  while (dif > Kl)
+	    {
+	      if (subp)
+		cy += mpn_sub_n (tmp, tmp, n, Kl);
+	      else
+		cy -= mpn_add_n (tmp, tmp, n, Kl);
+	      subp ^= 1;
+	      n += Kl;
+	      dif -= Kl;
+	    }
+	  /* now dif <= Kl */
+	  if (subp)
+	    cy += mpn_sub (tmp, tmp, Kl, n, dif);
+	  else
+	    cy -= mpn_add (tmp, tmp, Kl, n, dif);
+	  if (cy >= 0)
+	    cy = mpn_add_1 (tmp, tmp, Kl, cy);
+	  else
+	    cy = mpn_sub_1 (tmp, tmp, Kl, -cy);
+	}
+      else /* dif <= Kl, i.e. nl <= 2 * Kl */
+	{
+	  cy = mpn_sub (tmp, n, Kl, n + Kl, dif);
+	  cy = mpn_add_1 (tmp, tmp, Kl, cy);
+	}
+      tmp[Kl] = cy;
+      nl = Kl + 1;
+      n = tmp;
+    }
+  for (i = 0; i < K; i++)
+    {
+      Ap[i] = A;
+      /* store the next M bits of n into A[0..nprime] */
+      if (nl > 0) /* nl is the number of remaining limbs */
+	{
+	  j = (l <= nl && i < K - 1) ? l : nl; /* store j next limbs */
+	  nl -= j;
+	  MPN_COPY (T, n, j);
+	  MPN_ZERO (T + j, nprime + 1 - j);
+	  n += l;
+	  mpn_fft_mul_2exp_modF (A, T, i * Mp, nprime);
+	}
+      else
+	MPN_ZERO (A, nprime + 1);
+      A += nprime + 1;
+    }
+  ASSERT_ALWAYS (nl == 0);
+  TMP_FREE;
+}
+
+/* op <- n*m mod 2^N+1 with fft of size 2^k where N=pl*GMP_NUMB_BITS
+   op is pl limbs, its high bit is returned.
+   One must have pl = mpn_fft_next_size (pl, k).
+   T must have space for 2 * (nprime + 1) limbs.
+*/
+
+static mp_limb_t
+mpn_mul_fft_internal (mp_ptr op, mp_size_t pl, int k,
+		      mp_ptr *Ap, mp_ptr *Bp, mp_ptr A, mp_ptr B,
+		      mp_size_t nprime, mp_size_t l, mp_size_t Mp,
+		      int **fft_l, mp_ptr T, int sqr)
+{
+  mp_size_t K, i, pla, lo, sh, j;
+  mp_ptr p;
+  mp_limb_t cc;
+
+  K = (mp_size_t) 1 << k;
+
+  /* direct fft's */
+  mpn_fft_fft (Ap, K, fft_l + k, 2 * Mp, nprime, 1, T);
+  if (!sqr)
+    mpn_fft_fft (Bp, K, fft_l + k, 2 * Mp, nprime, 1, T);
+
+  /* term to term multiplications */
+  mpn_fft_mul_modF_K (Ap, sqr ? Ap : Bp, nprime, K);
+
+  /* inverse fft's */
+  mpn_fft_fftinv (Ap, K, 2 * Mp, nprime, T);
+
+  /* division of terms after inverse fft */
+  Bp[0] = T + nprime + 1;
+  mpn_fft_div_2exp_modF (Bp[0], Ap[0], k, nprime);
+  for (i = 1; i < K; i++)
+    {
+      Bp[i] = Ap[i - 1];
+      mpn_fft_div_2exp_modF (Bp[i], Ap[i], k + (K - i) * Mp, nprime);
+    }
+
+  /* addition of terms in result p */
+  MPN_ZERO (T, nprime + 1);
+  pla = l * (K - 1) + nprime + 1; /* number of required limbs for p */
+  p = B; /* B has K*(n' + 1) limbs, which is >= pla, i.e. enough */
+  MPN_ZERO (p, pla);
+  cc = 0; /* will accumulate the (signed) carry at p[pla] */
+  for (i = K - 1, lo = l * i + nprime,sh = l * i; i >= 0; i--,lo -= l,sh -= l)
+    {
+      mp_ptr n = p + sh;
+
+      j = (K - i) & (K - 1);
+
+      if (mpn_add_n (n, n, Bp[j], nprime + 1))
+	cc += mpn_add_1 (n + nprime + 1, n + nprime + 1,
+			  pla - sh - nprime - 1, CNST_LIMB(1));
+      T[2 * l] = i + 1; /* T = (i + 1)*2^(2*M) */
+      if (mpn_cmp (Bp[j], T, nprime + 1) > 0)
+	{ /* subtract 2^N'+1 */
+	  cc -= mpn_sub_1 (n, n, pla - sh, CNST_LIMB(1));
+	  cc -= mpn_sub_1 (p + lo, p + lo, pla - lo, CNST_LIMB(1));
+	}
+    }
+  if (cc == -CNST_LIMB(1))
+    {
+      if ((cc = mpn_add_1 (p + pla - pl, p + pla - pl, pl, CNST_LIMB(1))))
+	{
+	  /* p[pla-pl]...p[pla-1] are all zero */
+	  mpn_sub_1 (p + pla - pl - 1, p + pla - pl - 1, pl + 1, CNST_LIMB(1));
+	  mpn_sub_1 (p + pla - 1, p + pla - 1, 1, CNST_LIMB(1));
+	}
+    }
+  else if (cc == 1)
+    {
+      if (pla >= 2 * pl)
+	{
+	  while ((cc = mpn_add_1 (p + pla - 2 * pl, p + pla - 2 * pl, 2 * pl, cc)))
+	    ;
+	}
+      else
+	{
+	  cc = mpn_sub_1 (p + pla - pl, p + pla - pl, pl, cc);
+	  ASSERT (cc == 0);
+	}
+    }
+  else
+    ASSERT (cc == 0);
+
+  /* here p < 2^(2M) [K 2^(M(K-1)) + (K-1) 2^(M(K-2)) + ... ]
+     < K 2^(2M) [2^(M(K-1)) + 2^(M(K-2)) + ... ]
+     < K 2^(2M) 2^(M(K-1))*2 = 2^(M*K+M+k+1) */
+  return mpn_fft_norm_modF (op, pl, p, pla);
+}
+
+/* return the lcm of a and 2^k */
+static mp_bitcnt_t
+mpn_mul_fft_lcm (mp_bitcnt_t a, int k)
+{
+  mp_bitcnt_t l = k;
+
+  while (a % 2 == 0 && k > 0)
+    {
+      a >>= 1;
+      k --;
+    }
+  return a << l;
+}
+
+
+mp_limb_t
+mpn_mul_fft (mp_ptr op, mp_size_t pl,
+	     mp_srcptr n, mp_size_t nl,
+	     mp_srcptr m, mp_size_t ml,
+	     int k)
+{
+  int i;
+  mp_size_t K, maxLK;
+  mp_size_t N, Nprime, nprime, M, Mp, l;
+  mp_ptr *Ap, *Bp, A, T, B;
+  int **fft_l, *tmp;
+  int sqr = (n == m && nl == ml);
+  mp_limb_t h;
+  TMP_DECL;
+
+  TRACE (printf ("\nmpn_mul_fft pl=%ld nl=%ld ml=%ld k=%d\n", pl, nl, ml, k));
+  ASSERT_ALWAYS (mpn_fft_next_size (pl, k) == pl);
+
+  TMP_MARK;
+  N = pl * GMP_NUMB_BITS;
+  fft_l = TMP_BALLOC_TYPE (k + 1, int *);
+  tmp = TMP_BALLOC_TYPE ((size_t) 2 << k, int);
+  for (i = 0; i <= k; i++)
+    {
+      fft_l[i] = tmp;
+      tmp += (mp_size_t) 1 << i;
+    }
+
+  mpn_fft_initl (fft_l, k);
+  K = (mp_size_t) 1 << k;
+  M = N >> k;	/* N = 2^k M */
+  l = 1 + (M - 1) / GMP_NUMB_BITS;
+  maxLK = mpn_mul_fft_lcm (GMP_NUMB_BITS, k); /* lcm (GMP_NUMB_BITS, 2^k) */
+
+  Nprime = (1 + (2 * M + k + 2) / maxLK) * maxLK;
+  /* Nprime = ceil((2*M+k+3)/maxLK)*maxLK; */
+  nprime = Nprime / GMP_NUMB_BITS;
+  TRACE (printf ("N=%ld K=%ld, M=%ld, l=%ld, maxLK=%ld, Np=%ld, np=%ld\n",
+		 N, K, M, l, maxLK, Nprime, nprime));
+  /* we should ensure that recursively, nprime is a multiple of the next K */
+  if (nprime >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
+    {
+      mp_size_t K2;
+      for (;;)
+	{
+	  K2 = (mp_size_t) 1 << mpn_fft_best_k (nprime, sqr);
+	  if ((nprime & (K2 - 1)) == 0)
+	    break;
+	  nprime = (nprime + K2 - 1) & -K2;
+	  Nprime = nprime * GMP_LIMB_BITS;
+	  /* warning: since nprime changed, K2 may change too! */
+	}
+      TRACE (printf ("new maxLK=%ld, Np=%ld, np=%ld\n", maxLK, Nprime, nprime));
+    }
+  ASSERT_ALWAYS (nprime < pl); /* otherwise we'll loop */
+
+  T = TMP_BALLOC_LIMBS (2 * (nprime + 1));
+  Mp = Nprime >> k;
+
+  TRACE (printf ("%ldx%ld limbs -> %ld times %ldx%ld limbs (%1.2f)\n",
+		pl, pl, K, nprime, nprime, 2.0 * (double) N / Nprime / K);
+	 printf ("   temp space %ld\n", 2 * K * (nprime + 1)));
+
+  A = TMP_BALLOC_LIMBS (K * (nprime + 1));
+  Ap = TMP_BALLOC_MP_PTRS (K);
+  mpn_mul_fft_decompose (A, Ap, K, nprime, n, nl, l, Mp, T);
+  if (sqr)
+    {
+      mp_size_t pla;
+      pla = l * (K - 1) + nprime + 1; /* number of required limbs for p */
+      B = TMP_BALLOC_LIMBS (pla);
+      Bp = TMP_BALLOC_MP_PTRS (K);
+    }
+  else
+    {
+      B = TMP_BALLOC_LIMBS (K * (nprime + 1));
+      Bp = TMP_BALLOC_MP_PTRS (K);
+      mpn_mul_fft_decompose (B, Bp, K, nprime, m, ml, l, Mp, T);
+    }
+  h = mpn_mul_fft_internal (op, pl, k, Ap, Bp, A, B, nprime, l, Mp, fft_l, T, sqr);
+
+  TMP_FREE;
+  return h;
+}
+
+#if WANT_OLD_FFT_FULL
+/* multiply {n, nl} by {m, ml}, and put the result in {op, nl+ml} */
+void
+mpn_mul_fft_full (mp_ptr op,
+		  mp_srcptr n, mp_size_t nl,
+		  mp_srcptr m, mp_size_t ml)
+{
+  mp_ptr pad_op;
+  mp_size_t pl, pl2, pl3, l;
+  mp_size_t cc, c2, oldcc;
+  int k2, k3;
+  int sqr = (n == m && nl == ml);
+
+  pl = nl + ml; /* total number of limbs of the result */
+
+  /* perform a fft mod 2^(2N)+1 and one mod 2^(3N)+1.
+     We must have pl3 = 3/2 * pl2, with pl2 a multiple of 2^k2, and
+     pl3 a multiple of 2^k3. Since k3 >= k2, both are multiples of 2^k2,
+     and pl2 must be an even multiple of 2^k2. Thus (pl2,pl3) =
+     (2*j*2^k2,3*j*2^k2), which works for 3*j <= pl/2^k2 <= 5*j.
+     We need that consecutive intervals overlap, i.e. 5*j >= 3*(j+1),
+     which requires j>=2. Thus this scheme requires pl >= 6 * 2^FFT_FIRST_K. */
+
+  /*  ASSERT_ALWAYS(pl >= 6 * (1 << FFT_FIRST_K)); */
+
+  pl2 = (2 * pl - 1) / 5; /* ceil (2pl/5) - 1 */
+  do
+    {
+      pl2++;
+      k2 = mpn_fft_best_k (pl2, sqr); /* best fft size for pl2 limbs */
+      pl2 = mpn_fft_next_size (pl2, k2);
+      pl3 = 3 * pl2 / 2; /* since k>=FFT_FIRST_K=4, pl2 is a multiple of 2^4,
+			    thus pl2 / 2 is exact */
+      k3 = mpn_fft_best_k (pl3, sqr);
+    }
+  while (mpn_fft_next_size (pl3, k3) != pl3);
+
+  TRACE (printf ("mpn_mul_fft_full nl=%ld ml=%ld -> pl2=%ld pl3=%ld k=%d\n",
+		 nl, ml, pl2, pl3, k2));
+
+  ASSERT_ALWAYS(pl3 <= pl);
+  cc = mpn_mul_fft (op, pl3, n, nl, m, ml, k3);     /* mu */
+  ASSERT(cc == 0);
+  pad_op = __GMP_ALLOCATE_FUNC_LIMBS (pl2);
+  cc = mpn_mul_fft (pad_op, pl2, n, nl, m, ml, k2); /* lambda */
+  cc = -cc + mpn_sub_n (pad_op, pad_op, op, pl2);    /* lambda - low(mu) */
+  /* 0 <= cc <= 1 */
+  ASSERT(0 <= cc && cc <= 1);
+  l = pl3 - pl2; /* l = pl2 / 2 since pl3 = 3/2 * pl2 */
+  c2 = mpn_add_n (pad_op, pad_op, op + pl2, l);
+  cc = mpn_add_1 (pad_op + l, pad_op + l, l, (mp_limb_t) c2) - cc;
+  ASSERT(-1 <= cc && cc <= 1);
+  if (cc < 0)
+    cc = mpn_add_1 (pad_op, pad_op, pl2, (mp_limb_t) -cc);
+  ASSERT(0 <= cc && cc <= 1);
+  /* now lambda-mu = {pad_op, pl2} - cc mod 2^(pl2*GMP_NUMB_BITS)+1 */
+  oldcc = cc;
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  c2 = mpn_add_n_sub_n (pad_op + l, pad_op, pad_op, pad_op + l, l);
+  cc += c2 >> 1; /* carry out from high <- low + high */
+  c2 = c2 & 1; /* borrow out from low <- low - high */
+#else
+  {
+    mp_ptr tmp;
+    TMP_DECL;
+
+    TMP_MARK;
+    tmp = TMP_BALLOC_LIMBS (l);
+    MPN_COPY (tmp, pad_op, l);
+    c2 = mpn_sub_n (pad_op,      pad_op, pad_op + l, l);
+    cc += mpn_add_n (pad_op + l, tmp,    pad_op + l, l);
+    TMP_FREE;
+  }
+#endif
+  c2 += oldcc;
+  /* first normalize {pad_op, pl2} before dividing by 2: c2 is the borrow
+     at pad_op + l, cc is the carry at pad_op + pl2 */
+  /* 0 <= cc <= 2 */
+  cc -= mpn_sub_1 (pad_op + l, pad_op + l, l, (mp_limb_t) c2);
+  /* -1 <= cc <= 2 */
+  if (cc > 0)
+    cc = -mpn_sub_1 (pad_op, pad_op, pl2, (mp_limb_t) cc);
+  /* now -1 <= cc <= 0 */
+  if (cc < 0)
+    cc = mpn_add_1 (pad_op, pad_op, pl2, (mp_limb_t) -cc);
+  /* now {pad_op, pl2} is normalized, with 0 <= cc <= 1 */
+  if (pad_op[0] & 1) /* if odd, add 2^(pl2*GMP_NUMB_BITS)+1 */
+    cc += 1 + mpn_add_1 (pad_op, pad_op, pl2, CNST_LIMB(1));
+  /* now 0 <= cc <= 2, but cc=2 cannot occur since it would give a carry
+     out below */
+  mpn_rshift (pad_op, pad_op, pl2, 1); /* divide by two */
+  if (cc) /* then cc=1 */
+    pad_op [pl2 - 1] |= (mp_limb_t) 1 << (GMP_NUMB_BITS - 1);
+  /* now {pad_op,pl2}-cc = (lambda-mu)/(1-2^(l*GMP_NUMB_BITS))
+     mod 2^(pl2*GMP_NUMB_BITS) + 1 */
+  c2 = mpn_add_n (op, op, pad_op, pl2); /* no need to add cc (is 0) */
+  /* since pl2+pl3 >= pl, necessary the extra limbs (including cc) are zero */
+  MPN_COPY (op + pl3, pad_op, pl - pl3);
+  ASSERT_MPN_ZERO_P (pad_op + pl - pl3, pl2 + pl3 - pl);
+  __GMP_FREE_FUNC_LIMBS (pad_op, pl2);
+  /* since the final result has at most pl limbs, no carry out below */
+  mpn_add_1 (op + pl2, op + pl2, pl - pl2, (mp_limb_t) c2);
+}
+#endif

diff --git a/third_party/gmp/mpn/generic/mul_n.c b/third_party/gmp/mpn/generic/mul_n.c
new file mode 100644
index 0000000..36bd923
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mul_n.c

@@ -0,0 +1,96 @@
+/* mpn_mul_n -- multiply natural numbers.
+
+Copyright 1991, 1993, 1994, 1996-2003, 2005, 2008, 2009 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n)
+{
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));
+  ASSERT (! MPN_OVERLAP_P (p, 2 * n, b, n));
+
+  if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+    {
+      mpn_mul_basecase (p, a, n, b, n);
+    }
+  else if (BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))
+    {
+      /* Allocate workspace of fixed size on stack: fast! */
+      mp_limb_t ws[mpn_toom22_mul_itch (MUL_TOOM33_THRESHOLD_LIMIT-1,
+					MUL_TOOM33_THRESHOLD_LIMIT-1)];
+      ASSERT (MUL_TOOM33_THRESHOLD <= MUL_TOOM33_THRESHOLD_LIMIT);
+      mpn_toom22_mul (p, a, n, b, n, ws);
+    }
+  else if (BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom33_mul_itch (n, n));
+      mpn_toom33_mul (p, a, n, b, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom44_mul_itch (n, n));
+      mpn_toom44_mul (p, a, n, b, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom6_mul_n_itch (n));
+      mpn_toom6h_mul (p, a, n, b, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, MUL_FFT_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_DECL;
+      TMP_MARK;
+      ws = TMP_ALLOC_LIMBS (mpn_toom8_mul_n_itch (n));
+      mpn_toom8h_mul (p, a, n, b, n, ws);
+      TMP_FREE;
+    }
+  else
+    {
+      /* The current FFT code allocates its own space.  That should probably
+	 change.  */
+      mpn_fft_mul (p, a, n, b, n);
+    }
+}

diff --git a/third_party/gmp/mpn/generic/mullo_basecase.c b/third_party/gmp/mpn/generic/mullo_basecase.c
new file mode 100644
index 0000000..9a4cd3d
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mullo_basecase.c

@@ -0,0 +1,90 @@
+/* mpn_mullo_basecase -- Internal routine to multiply two natural
+   numbers of length n and return the low part.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+
+Copyright (C) 2000, 2002, 2004, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* FIXME: Should optionally use mpn_mul_2/mpn_addmul_2.  */
+
+#ifndef MULLO_VARIANT
+#define MULLO_VARIANT 2
+#endif
+
+
+#if MULLO_VARIANT == 1
+void
+mpn_mullo_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_size_t i;
+
+  mpn_mul_1 (rp, up, n, vp[0]);
+
+  for (i = n - 1; i > 0; i--)
+    {
+      vp++;
+      rp++;
+      mpn_addmul_1 (rp, up, i, vp[0]);
+    }
+}
+#endif
+
+
+#if MULLO_VARIANT == 2
+void
+mpn_mullo_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t h;
+
+  h = up[0] * vp[n - 1];
+
+  if (n != 1)
+    {
+      mp_size_t i;
+      mp_limb_t v0;
+
+      v0 = *vp++;
+      h += up[n - 1] * v0 + mpn_mul_1 (rp, up, n - 1, v0);
+      rp++;
+
+      for (i = n - 2; i > 0; i--)
+	{
+	  v0 = *vp++;
+	  h += up[i] * v0 + mpn_addmul_1 (rp, up, i, v0);
+	  rp++;
+	}
+    }
+
+  rp[0] = h;
+}
+#endif

diff --git a/third_party/gmp/mpn/generic/mullo_n.c b/third_party/gmp/mpn/generic/mullo_n.c
new file mode 100644
index 0000000..6f4e7ae
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mullo_n.c

@@ -0,0 +1,243 @@
+/* mpn_mullo_n -- multiply two n-limb numbers and return the low n limbs
+   of their products.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   THIS IS (FOR NOW) AN INTERNAL FUNCTION.  IT IS ONLY SAFE TO REACH THIS
+   FUNCTION THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED
+   THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2004, 2005, 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_range_basecase 1
+#define MAYBE_range_toom22   1
+#else
+#define MAYBE_range_basecase                                           \
+  ((MULLO_DC_THRESHOLD == 0 ? MULLO_BASECASE_THRESHOLD : MULLO_DC_THRESHOLD) < MUL_TOOM22_THRESHOLD*36/(36-11))
+#define MAYBE_range_toom22                                             \
+  ((MULLO_DC_THRESHOLD == 0 ? MULLO_BASECASE_THRESHOLD : MULLO_DC_THRESHOLD) < MUL_TOOM33_THRESHOLD*36/(36-11) )
+#endif
+
+/*  THINK: The DC strategy uses different constants in different Toom's
+	 ranges. Something smoother?
+*/
+
+/*
+  Compute the least significant half of the product {xy,n}*{yp,n}, or
+  formally {rp,n} = {xy,n}*{yp,n} Mod (B^n).
+
+  Above the given threshold, the Divide and Conquer strategy is used.
+  The operands are split in two, and a full product plus two mullo
+  are used to obtain the final result. The more natural strategy is to
+  split in two halves, but this is far from optimal when a
+  sub-quadratic multiplication is used.
+
+  Mulders suggests an unbalanced split in favour of the full product,
+  split n = n1 + n2, where an = n1 <= n2 = (1-a)n; i.e. 0 < a <= 1/2.
+
+  To compute the value of a, we assume that the cost of mullo for a
+  given size ML(n) is a fraction of the cost of a full product with
+  same size M(n), and the cost M(n)=n^e for some exponent 1 < e <= 2;
+  then we can write:
+
+  ML(n) = 2*ML(an) + M((1-a)n) => k*M(n) = 2*k*M(n)*a^e + M(n)*(1-a)^e
+
+  Given a value for e, want to minimise the value of k, i.e. the
+  function k=(1-a)^e/(1-2*a^e).
+
+  With e=2, the exponent for schoolbook multiplication, the minimum is
+  given by the values a=1-a=1/2.
+
+  With e=log(3)/log(2), the exponent for Karatsuba (aka toom22),
+  Mulders compute (1-a) = 0.694... and we approximate a with 11/36.
+
+  Other possible approximations follow:
+  e=log(5)/log(3) [Toom-3] -> a ~= 9/40
+  e=log(7)/log(4) [Toom-4] -> a ~= 7/39
+  e=log(11)/log(6) [Toom-6] -> a ~= 1/8
+  e=log(15)/log(8) [Toom-8] -> a ~= 1/10
+
+  The values above where obtained with the following trivial commands
+  in the gp-pari shell:
+
+fun(e,a)=(1-a)^e/(1-2*a^e)
+mul(a,b,c)={local(m,x,p);if(b-c<1/10000,(b+c)/2,m=1;x=b;forstep(p=c,b,(b-c)/8,if(fun(a,p)<m,m=fun(a,p);x=p));mul(a,(b+x)/2,(c+x)/2))}
+contfracpnqn(contfrac(mul(log(2*2-1)/log(2),1/2,0),5))
+contfracpnqn(contfrac(mul(log(3*2-1)/log(3),1/2,0),5))
+contfracpnqn(contfrac(mul(log(4*2-1)/log(4),1/2,0),5))
+contfracpnqn(contfrac(mul(log(6*2-1)/log(6),1/2,0),3))
+contfracpnqn(contfrac(mul(log(8*2-1)/log(8),1/2,0),3))
+
+  ,
+  |\
+  | \
+  +----,
+  |    |
+  |    |
+  |    |\
+  |    | \
+  +----+--`
+  ^ n2 ^n1^
+
+  For an actual implementation, the assumption that M(n)=n^e is
+  incorrect, as a consequence also the assumption that ML(n)=k*M(n)
+  with a constant k is wrong.
+
+  But theory suggest us two things:
+  - the best the multiplication product is (lower e), the more k
+    approaches 1, and a approaches 0.
+
+  - A value for a smaller than optimal is probably less bad than a
+    bigger one: e.g. let e=log(3)/log(2), a=0.3058_ the optimal
+    value, and k(a)=0.808_ the mul/mullo speed ratio. We get
+    k(a+1/6)=0.929_ but k(a-1/6)=0.865_.
+*/
+
+static mp_size_t
+mpn_mullo_n_itch (mp_size_t n)
+{
+  return 2*n;
+}
+
+/*
+    mpn_dc_mullo_n requires a scratch space of 2*n limbs at tp.
+    It accepts tp == rp.
+*/
+static void
+mpn_dc_mullo_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n, mp_ptr tp)
+{
+  mp_size_t n2, n1;
+  ASSERT (n >= 2);
+  ASSERT (! MPN_OVERLAP_P (rp, n, xp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
+  ASSERT (MPN_SAME_OR_SEPARATE2_P(rp, n, tp, 2*n));
+
+  /* Divide-and-conquer */
+
+  /* We need fractional approximation of the value 0 < a <= 1/2
+     giving the minimum in the function k=(1-a)^e/(1-2*a^e).
+  */
+  if (MAYBE_range_basecase && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD*36/(36-11)))
+    n1 = n >> 1;
+  else if (MAYBE_range_toom22 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD*36/(36-11)))
+    n1 = n * 11 / (size_t) 36;	/* n1 ~= n*(1-.694...) */
+  else if (BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD*40/(40-9)))
+    n1 = n * 9 / (size_t) 40;	/* n1 ~= n*(1-.775...) */
+  else if (BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD*10/9))
+    n1 = n * 7 / (size_t) 39;	/* n1 ~= n*(1-.821...) */
+  /* n1 = n * 4 / (size_t) 31;	// n1 ~= n*(1-.871...) [TOOM66] */
+  else
+    n1 = n / (size_t) 10;		/* n1 ~= n*(1-.899...) [TOOM88] */
+
+  n2 = n - n1;
+
+  /* Split as x = x1 2^(n2 GMP_NUMB_BITS) + x0,
+	      y = y1 2^(n2 GMP_NUMB_BITS) + y0 */
+
+  /* x0 * y0 */
+  mpn_mul_n (tp, xp, yp, n2);
+  MPN_COPY (rp, tp, n2);
+
+  /* x1 * y0 * 2^(n2 GMP_NUMB_BITS) */
+  if (BELOW_THRESHOLD (n1, MULLO_BASECASE_THRESHOLD))
+    mpn_mul_basecase (tp + n, xp + n2, n1, yp, n1);
+  else if (BELOW_THRESHOLD (n1, MULLO_DC_THRESHOLD))
+    mpn_mullo_basecase (tp + n, xp + n2, yp, n1);
+  else
+    mpn_dc_mullo_n (tp + n, xp + n2, yp, n1, tp + n);
+  mpn_add_n (rp + n2, tp + n2, tp + n, n1);
+
+  /* x0 * y1 * 2^(n2 GMP_NUMB_BITS) */
+  if (BELOW_THRESHOLD (n1, MULLO_BASECASE_THRESHOLD))
+    mpn_mul_basecase (tp + n, xp, n1, yp + n2, n1);
+  else if (BELOW_THRESHOLD (n1, MULLO_DC_THRESHOLD))
+    mpn_mullo_basecase (tp + n, xp, yp + n2, n1);
+  else
+    mpn_dc_mullo_n (tp + n, xp, yp + n2, n1, tp + n);
+  mpn_add_n (rp + n2, rp + n2, tp + n, n1);
+}
+
+/* Avoid zero allocations when MULLO_BASECASE_THRESHOLD is 0.  */
+#define MUL_BASECASE_ALLOC \
+ (MULLO_BASECASE_THRESHOLD_LIMIT == 0 ? 1 : 2*MULLO_BASECASE_THRESHOLD_LIMIT)
+
+/* FIXME: This function should accept a temporary area; dc_mullow_n
+   accepts a pointer tp, and handle the case tp == rp, do the same here.
+   Maybe recombine the two functions.
+   THINK: If mpn_mul_basecase is always faster than mpn_mullo_basecase
+	  (typically thanks to mpn_addmul_2) should we unconditionally use
+	  mpn_mul_n?
+*/
+
+void
+mpn_mullo_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
+{
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, n, xp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
+
+  if (BELOW_THRESHOLD (n, MULLO_BASECASE_THRESHOLD))
+    {
+      /* Allocate workspace of fixed size on stack: fast! */
+      mp_limb_t tp[MUL_BASECASE_ALLOC];
+      mpn_mul_basecase (tp, xp, n, yp, n);
+      MPN_COPY (rp, tp, n);
+    }
+  else if (BELOW_THRESHOLD (n, MULLO_DC_THRESHOLD))
+    {
+      mpn_mullo_basecase (rp, xp, yp, n);
+    }
+  else
+    {
+      mp_ptr tp;
+      TMP_DECL;
+      TMP_MARK;
+      tp = TMP_ALLOC_LIMBS (mpn_mullo_n_itch (n));
+      if (BELOW_THRESHOLD (n, MULLO_MUL_N_THRESHOLD))
+	{
+	  mpn_dc_mullo_n (rp, xp, yp, n, tp);
+	}
+      else
+	{
+	  /* For really large operands, use plain mpn_mul_n but throw away upper n
+	     limbs of result.  */
+#if !TUNE_PROGRAM_BUILD && (MULLO_MUL_N_THRESHOLD > MUL_FFT_THRESHOLD)
+	  mpn_fft_mul (tp, xp, n, yp, n);
+#else
+	  mpn_mul_n (tp, xp, yp, n);
+#endif
+	  MPN_COPY (rp, tp, n);
+	}
+      TMP_FREE;
+    }
+}

diff --git a/third_party/gmp/mpn/generic/mulmid.c b/third_party/gmp/mpn/generic/mulmid.c
new file mode 100644
index 0000000..f35c5fb
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mulmid.c

@@ -0,0 +1,255 @@
+/* mpn_mulmid -- middle product
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+
+#define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
+
+
+void
+mpn_mulmid (mp_ptr rp,
+            mp_srcptr ap, mp_size_t an,
+            mp_srcptr bp, mp_size_t bn)
+{
+  mp_size_t rn, k;
+  mp_ptr scratch, temp;
+
+  ASSERT (an >= bn);
+  ASSERT (bn >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));
+  ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));
+
+  if (bn < MULMID_TOOM42_THRESHOLD)
+    {
+      /* region not tall enough to make toom42 worthwhile for any portion */
+
+      if (an < CHUNK)
+	{
+	  /* region not too wide either, just call basecase directly */
+	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
+	  return;
+	}
+
+      /* Region quite wide. For better locality, use basecase on chunks:
+
+	 AAABBBCC..
+	 .AAABBBCC.
+	 ..AAABBBCC
+      */
+
+      k = CHUNK - bn + 1;    /* number of diagonals per chunk */
+
+      /* first chunk (marked A in the above diagram) */
+      mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
+
+      /* remaining chunks (B, C, etc) */
+      an -= k;
+
+      while (an >= CHUNK)
+	{
+	  mp_limb_t t0, t1, cy;
+	  ap += k, rp += k;
+	  t0 = rp[0], t1 = rp[1];
+	  mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
+	  ADDC_LIMB (cy, rp[0], rp[0], t0);    /* add back saved limbs */
+	  MPN_INCR_U (rp + 1, k + 1, t1 + cy);
+	  an -= k;
+	}
+
+      if (an >= bn)
+	{
+	  /* last remaining chunk */
+	  mp_limb_t t0, t1, cy;
+	  ap += k, rp += k;
+	  t0 = rp[0], t1 = rp[1];
+	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
+	  ADDC_LIMB (cy, rp[0], rp[0], t0);
+	  MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);
+	}
+
+      return;
+    }
+
+  /* region is tall enough for toom42 */
+
+  rn = an - bn + 1;
+
+  if (rn < MULMID_TOOM42_THRESHOLD)
+    {
+      /* region not wide enough to make toom42 worthwhile for any portion */
+
+      TMP_DECL;
+
+      if (bn < CHUNK)
+	{
+	  /* region not too tall either, just call basecase directly */
+	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
+	  return;
+	}
+
+      /* Region quite tall. For better locality, use basecase on chunks:
+
+	 AAAAA....
+	 .AAAAA...
+	 ..BBBBB..
+	 ...BBBBB.
+	 ....CCCCC
+      */
+
+      TMP_MARK;
+
+      temp = TMP_ALLOC_LIMBS (rn + 2);
+
+      /* first chunk (marked A in the above diagram) */
+      bp += bn - CHUNK, an -= bn - CHUNK;
+      mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
+
+      /* remaining chunks (B, C, etc) */
+      bn -= CHUNK;
+
+      while (bn >= CHUNK)
+	{
+	  ap += CHUNK, bp -= CHUNK;
+	  mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
+	  mpn_add_n (rp, rp, temp, rn + 2);
+	  bn -= CHUNK;
+	}
+
+      if (bn)
+	{
+	  /* last remaining chunk */
+	  ap += CHUNK, bp -= bn;
+	  mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);
+	  mpn_add_n (rp, rp, temp, rn + 2);
+	}
+
+      TMP_FREE;
+      return;
+    }
+
+  /* we're definitely going to use toom42 somewhere */
+
+  if (bn > rn)
+    {
+      /* slice region into chunks, use toom42 on all chunks except possibly
+	 the last:
+
+         AA....
+         .AA...
+         ..BB..
+         ...BB.
+         ....CC
+      */
+
+      TMP_DECL;
+      TMP_MARK;
+
+      temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));
+      scratch = temp + rn + 2;
+
+      /* first chunk (marked A in the above diagram) */
+      bp += bn - rn;
+      mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
+
+      /* remaining chunks (B, C, etc) */
+      bn -= rn;
+
+      while (bn >= rn)
+        {
+          ap += rn, bp -= rn;
+	  mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
+          mpn_add_n (rp, rp, temp, rn + 2);
+          bn -= rn;
+        }
+
+      if (bn)
+        {
+          /* last remaining chunk */
+          ap += rn, bp -= bn;
+	  mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);
+          mpn_add_n (rp, rp, temp, rn + 2);
+        }
+
+      TMP_FREE;
+    }
+  else
+    {
+      /* slice region into chunks, use toom42 on all chunks except possibly
+	 the last:
+
+         AAABBBCC..
+         .AAABBBCC.
+         ..AAABBBCC
+      */
+
+      TMP_DECL;
+      TMP_MARK;
+
+      scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
+
+      /* first chunk (marked A in the above diagram) */
+      mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
+
+      /* remaining chunks (B, C, etc) */
+      rn -= bn;
+
+      while (rn >= bn)
+        {
+	  mp_limb_t t0, t1, cy;
+          ap += bn, rp += bn;
+          t0 = rp[0], t1 = rp[1];
+          mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
+	  ADDC_LIMB (cy, rp[0], rp[0], t0);     /* add back saved limbs */
+	  MPN_INCR_U (rp + 1, bn + 1, t1 + cy);
+	  rn -= bn;
+        }
+
+      TMP_FREE;
+
+      if (rn)
+        {
+          /* last remaining chunk */
+	  mp_limb_t t0, t1, cy;
+          ap += bn, rp += bn;
+          t0 = rp[0], t1 = rp[1];
+          mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);
+	  ADDC_LIMB (cy, rp[0], rp[0], t0);
+	  MPN_INCR_U (rp + 1, rn + 1, t1 + cy);
+        }
+    }
+}

diff --git a/third_party/gmp/mpn/generic/mulmid_basecase.c b/third_party/gmp/mpn/generic/mulmid_basecase.c
new file mode 100644
index 0000000..d5434ea
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mulmid_basecase.c

@@ -0,0 +1,82 @@
+/* mpn_mulmid_basecase -- classical middle product algorithm
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Middle product of {up,un} and {vp,vn}, write result to {rp,un-vn+3}.
+   Must have un >= vn >= 1.
+
+   Neither input buffer may overlap with the output buffer. */
+
+void
+mpn_mulmid_basecase (mp_ptr rp,
+                     mp_srcptr up, mp_size_t un,
+                     mp_srcptr vp, mp_size_t vn)
+{
+  mp_limb_t lo, hi;  /* last two limbs of output */
+  mp_limb_t cy;
+
+  ASSERT (un >= vn);
+  ASSERT (vn >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, un - vn + 3, up, un));
+  ASSERT (! MPN_OVERLAP_P (rp, un - vn + 3, vp, vn));
+
+  up += vn - 1;
+  un -= vn - 1;
+
+  /* multiply by first limb, store result */
+  lo = mpn_mul_1 (rp, up, un, vp[0]);
+  hi = 0;
+
+  /* accumulate remaining rows */
+  for (vn--; vn; vn--)
+    {
+      up--, vp++;
+      cy = mpn_addmul_1 (rp, up, un, vp[0]);
+      add_ssaaaa (hi, lo, hi, lo, CNST_LIMB(0), cy);
+    }
+
+  /* store final limbs */
+#if GMP_NAIL_BITS != 0
+  hi = (hi << GMP_NAIL_BITS) + (lo >> GMP_NUMB_BITS);
+  lo &= GMP_NUMB_MASK;
+#endif
+
+  rp[un] = lo;
+  rp[un + 1] = hi;
+}

diff --git a/third_party/gmp/mpn/generic/mulmid_n.c b/third_party/gmp/mpn/generic/mulmid_n.c
new file mode 100644
index 0000000..ac7e8f1
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mulmid_n.c

@@ -0,0 +1,61 @@
+/* mpn_mulmid_n -- balanced middle product
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+
+void
+mpn_mulmid_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, n + 2, ap, 2*n - 1));
+  ASSERT (! MPN_OVERLAP_P (rp, n + 2, bp, n));
+
+  if (n < MULMID_TOOM42_THRESHOLD)
+    {
+      mpn_mulmid_basecase (rp, ap, 2*n - 1, bp, n);
+    }
+  else
+    {
+      mp_ptr scratch;
+      TMP_DECL;
+      TMP_MARK;
+      scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (n));
+      mpn_toom42_mulmid (rp, ap, bp, n, scratch);
+      TMP_FREE;
+    }
+}

diff --git a/third_party/gmp/mpn/generic/mulmod_bnm1.c b/third_party/gmp/mpn/generic/mulmod_bnm1.c
new file mode 100644
index 0000000..769bdbc
--- /dev/null
+++ b/third_party/gmp/mpn/generic/mulmod_bnm1.c

@@ -0,0 +1,354 @@
+/* mulmod_bnm1.c -- multiplication mod B^n-1.
+
+   Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
+   Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Inputs are {ap,rn} and {bp,rn}; output is {rp,rn}, computation is
+   mod B^rn - 1, and values are semi-normalised; zero is represented
+   as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
+   tp==rp is allowed. */
+void
+mpn_bc_mulmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
+		    mp_ptr tp)
+{
+  mp_limb_t cy;
+
+  ASSERT (0 < rn);
+
+  mpn_mul_n (tp, ap, bp, rn);
+  cy = mpn_add_n (rp, tp, tp + rn, rn);
+  /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
+   * be no overflow when adding in the carry. */
+  MPN_INCR_U (rp, rn, cy);
+}
+
+
+/* Inputs are {ap,rn+1} and {bp,rn+1}; output is {rp,rn+1}, in
+   semi-normalised representation, computation is mod B^rn + 1. Needs
+   a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
+   Output is normalised. */
+static void
+mpn_bc_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
+		    mp_ptr tp)
+{
+  mp_limb_t cy;
+
+  ASSERT (0 < rn);
+
+  mpn_mul_n (tp, ap, bp, rn + 1);
+  ASSERT (tp[2*rn+1] == 0);
+  ASSERT (tp[2*rn] < GMP_NUMB_MAX);
+  cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
+  rp[rn] = 0;
+  MPN_INCR_U (rp, rn+1, cy);
+}
+
+
+/* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1)
+ *
+ * The result is expected to be ZERO if and only if one of the operand
+ * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
+ * B^rn-1. This should not be a problem if mulmod_bnm1 is used to
+ * combine results and obtain a natural number when one knows in
+ * advance that the final value is less than (B^rn-1).
+ * Moreover it should not be a problem if mulmod_bnm1 is used to
+ * compute the full product with an+bn <= rn, because this condition
+ * implies (B^an-1)(B^bn-1) < (B^rn-1) .
+ *
+ * Requires 0 < bn <= an <= rn and an + bn > rn/2
+ * Scratch need: rn + (need for recursive call OR rn + 4). This gives
+ *
+ * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4
+ */
+void
+mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp)
+{
+  ASSERT (0 < bn);
+  ASSERT (bn <= an);
+  ASSERT (an <= rn);
+
+  if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, MULMOD_BNM1_THRESHOLD))
+    {
+      if (UNLIKELY (bn < rn))
+	{
+	  if (UNLIKELY (an + bn <= rn))
+	    {
+	      mpn_mul (rp, ap, an, bp, bn);
+	    }
+	  else
+	    {
+	      mp_limb_t cy;
+	      mpn_mul (tp, ap, an, bp, bn);
+	      cy = mpn_add (rp, tp, rn, tp + rn, an + bn - rn);
+	      MPN_INCR_U (rp, rn, cy);
+	    }
+	}
+      else
+	mpn_bc_mulmod_bnm1 (rp, ap, bp, rn, tp);
+    }
+  else
+    {
+      mp_size_t n;
+      mp_limb_t cy;
+      mp_limb_t hi;
+
+      n = rn >> 1;
+
+      /* We need at least an + bn >= n, to be able to fit one of the
+	 recursive products at rp. Requiring strict inequality makes
+	 the code slightly simpler. If desired, we could avoid this
+	 restriction by initially halving rn as long as rn is even and
+	 an + bn <= rn/2. */
+
+      ASSERT (an + bn > n);
+
+      /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1)
+	 and crt together as
+
+	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
+      */
+
+#define a0 ap
+#define a1 (ap + n)
+#define b0 bp
+#define b1 (bp + n)
+
+#define xp  tp	/* 2n + 2 */
+      /* am1  maybe in {xp, n} */
+      /* bm1  maybe in {xp + n, n} */
+#define sp1 (tp + 2*n + 2)
+      /* ap1  maybe in {sp1, n + 1} */
+      /* bp1  maybe in {sp1 + n + 1, n + 1} */
+
+      {
+	mp_srcptr am1, bm1;
+	mp_size_t anm, bnm;
+	mp_ptr so;
+
+	bm1 = b0;
+	bnm = bn;
+	if (LIKELY (an > n))
+	  {
+	    am1 = xp;
+	    cy = mpn_add (xp, a0, n, a1, an - n);
+	    MPN_INCR_U (xp, n, cy);
+	    anm = n;
+	    so = xp + n;
+	    if (LIKELY (bn > n))
+	      {
+		bm1 = so;
+		cy = mpn_add (so, b0, n, b1, bn - n);
+		MPN_INCR_U (so, n, cy);
+		bnm = n;
+		so += n;
+	      }
+	  }
+	else
+	  {
+	    so = xp;
+	    am1 = a0;
+	    anm = an;
+	  }
+
+	mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so);
+      }
+
+      {
+	int       k;
+	mp_srcptr ap1, bp1;
+	mp_size_t anp, bnp;
+
+	bp1 = b0;
+	bnp = bn;
+	if (LIKELY (an > n)) {
+	  ap1 = sp1;
+	  cy = mpn_sub (sp1, a0, n, a1, an - n);
+	  sp1[n] = 0;
+	  MPN_INCR_U (sp1, n + 1, cy);
+	  anp = n + ap1[n];
+	  if (LIKELY (bn > n)) {
+	    bp1 = sp1 + n + 1;
+	    cy = mpn_sub (sp1 + n + 1, b0, n, b1, bn - n);
+	    sp1[2*n+1] = 0;
+	    MPN_INCR_U (sp1 + n + 1, n + 1, cy);
+	    bnp = n + bp1[n];
+	  }
+	} else {
+	  ap1 = a0;
+	  anp = an;
+	}
+
+	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
+	  k=0;
+	else
+	  {
+	    int mask;
+	    k = mpn_fft_best_k (n, 0);
+	    mask = (1<<k) - 1;
+	    while (n & mask) {k--; mask >>=1;};
+	  }
+	if (k >= FFT_FIRST_K)
+	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, bp1, bnp, k);
+	else if (UNLIKELY (bp1 == b0))
+	  {
+	    ASSERT (anp + bnp <= 2*n+1);
+	    ASSERT (anp + bnp > n);
+	    ASSERT (anp >= bnp);
+	    mpn_mul (xp, ap1, anp, bp1, bnp);
+	    anp = anp + bnp - n;
+	    ASSERT (anp <= n || xp[2*n]==0);
+	    anp-= anp > n;
+	    cy = mpn_sub (xp, xp, n, xp + n, anp);
+	    xp[n] = 0;
+	    MPN_INCR_U (xp, n+1, cy);
+	  }
+	else
+	  mpn_bc_mulmod_bnp1 (xp, ap1, bp1, n, xp);
+      }
+
+      /* Here the CRT recomposition begins.
+
+	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
+	 Division by 2 is a bitwise rotation.
+
+	 Assumes xp normalised mod (B^n+1).
+
+	 The residue class [0] is represented by [B^n-1]; except when
+	 both input are ZERO.
+      */
+
+#if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
+#if HAVE_NATIVE_mpn_rsh1add_nc
+      cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
+      hi = cy << (GMP_NUMB_BITS - 1);
+      cy = 0;
+      /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
+	 overflows, i.e. a further increment will not overflow again. */
+#else /* ! _nc */
+      cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
+      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+      cy >>= 1;
+      /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
+	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
+#endif
+#if GMP_NAIL_BITS == 0
+      add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
+#else
+      cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
+      rp[n-1] ^= hi;
+#endif
+#else /* ! HAVE_NATIVE_mpn_rsh1add_n */
+#if HAVE_NATIVE_mpn_add_nc
+      cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
+#else /* ! _nc */
+      cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
+#endif
+      cy += (rp[0]&1);
+      mpn_rshift(rp, rp, n, 1);
+      ASSERT (cy <= 2);
+      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+      cy >>= 1;
+      /* We can have cy != 0 only if hi = 0... */
+      ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
+      rp[n-1] |= hi;
+      /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
+#endif
+      ASSERT (cy <= 1);
+      /* Next increment can not overflow, read the previous comments about cy. */
+      ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
+      MPN_INCR_U(rp, n, cy);
+
+      /* Compute the highest half:
+	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
+       */
+      if (UNLIKELY (an + bn < rn))
+	{
+	  /* Note that in this case, the only way the result can equal
+	     zero mod B^{rn} - 1 is if one of the inputs is zero, and
+	     then the output of both the recursive calls and this CRT
+	     reconstruction is zero, not B^{rn} - 1. Which is good,
+	     since the latter representation doesn't fit in the output
+	     area.*/
+	  cy = mpn_sub_n (rp + n, rp, xp, an + bn - n);
+
+	  /* FIXME: This subtraction of the high parts is not really
+	     necessary, we do it to get the carry out, and for sanity
+	     checking. */
+	  cy = xp[n] + mpn_sub_nc (xp + an + bn - n, rp + an + bn - n,
+				   xp + an + bn - n, rn - (an + bn), cy);
+	  ASSERT (an + bn == rn - 1 ||
+		  mpn_zero_p (xp + an + bn - n + 1, rn - 1 - (an + bn)));
+	  cy = mpn_sub_1 (rp, rp, an + bn, cy);
+	  ASSERT (cy == (xp + an + bn - n)[0]);
+	}
+      else
+	{
+	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
+	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
+	     DECR will affect _at most_ the lowest n limbs. */
+	  MPN_DECR_U (rp, 2*n, cy);
+	}
+#undef a0
+#undef a1
+#undef b0
+#undef b1
+#undef xp
+#undef sp1
+    }
+}
+
+mp_size_t
+mpn_mulmod_bnm1_next_size (mp_size_t n)
+{
+  mp_size_t nh;
+
+  if (BELOW_THRESHOLD (n,     MULMOD_BNM1_THRESHOLD))
+    return n;
+  if (BELOW_THRESHOLD (n, 4 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
+    return (n + (2-1)) & (-2);
+  if (BELOW_THRESHOLD (n, 8 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
+    return (n + (4-1)) & (-4);
+
+  nh = (n + 1) >> 1;
+
+  if (BELOW_THRESHOLD (nh, MUL_FFT_MODF_THRESHOLD))
+    return (n + (8-1)) & (-8);
+
+  return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 0));
+}

diff --git a/third_party/gmp/mpn/generic/neg.c b/third_party/gmp/mpn/generic/neg.c
new file mode 100644
index 0000000..bec2a32
--- /dev/null
+++ b/third_party/gmp/mpn/generic/neg.c

@@ -0,0 +1,33 @@
+/* mpn_neg - negate an mpn.
+
+Copyright 2001, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_neg 1
+
+#include "gmp-impl.h"

diff --git a/third_party/gmp/mpn/generic/nussbaumer_mul.c b/third_party/gmp/mpn/generic/nussbaumer_mul.c
new file mode 100644
index 0000000..3e0cf27
--- /dev/null
+++ b/third_party/gmp/mpn/generic/nussbaumer_mul.c

@@ -0,0 +1,70 @@
+/* mpn_nussbaumer_mul -- Multiply {ap,an} and {bp,bn} using
+   Nussbaumer's negacyclic convolution.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Multiply {ap,an} by {bp,bn}, and put the result in {pp, an+bn} */
+void
+mpn_nussbaumer_mul (mp_ptr pp,
+		    mp_srcptr ap, mp_size_t an,
+		    mp_srcptr bp, mp_size_t bn)
+{
+  mp_size_t rn;
+  mp_ptr tp;
+  TMP_DECL;
+
+  ASSERT (an >= bn);
+  ASSERT (bn > 0);
+
+  TMP_MARK;
+
+  if ((ap == bp) && (an == bn))
+    {
+      rn = mpn_sqrmod_bnm1_next_size (2*an);
+      tp = TMP_ALLOC_LIMBS (mpn_sqrmod_bnm1_itch (rn, an));
+      mpn_sqrmod_bnm1 (pp, rn, ap, an, tp);
+    }
+  else
+    {
+      rn = mpn_mulmod_bnm1_next_size (an + bn);
+      tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (rn, an, bn));
+      mpn_mulmod_bnm1 (pp, rn, ap, an, bp, bn, tp);
+    }
+
+  TMP_FREE;
+}

diff --git a/third_party/gmp/mpn/generic/perfpow.c b/third_party/gmp/mpn/generic/perfpow.c
new file mode 100644
index 0000000..9d46477
--- /dev/null
+++ b/third_party/gmp/mpn/generic/perfpow.c

@@ -0,0 +1,342 @@
+/* mpn_perfect_power_p -- mpn perfect power detection.
+
+   Contributed to the GNU project by Martin Boij.
+
+Copyright 2009, 2010, 2012, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define SMALL 20
+#define MEDIUM 100
+
+/* Return non-zero if {np,nn} == {xp,xn} ^ k.
+   Algorithm:
+       For s = 1, 2, 4, ..., s_max, compute the s least significant limbs of
+       {xp,xn}^k. Stop if they don't match the s least significant limbs of
+       {np,nn}.
+
+   FIXME: Low xn limbs can be expected to always match, if computed as a mod
+   B^{xn} root. So instead of using mpn_powlo, compute an approximation of the
+   most significant (normalized) limb of {xp,xn} ^ k (and an error bound), and
+   compare to {np, nn}. Or use an even cruder approximation based on fix-point
+   base 2 logarithm.  */
+static int
+pow_equals (mp_srcptr np, mp_size_t n,
+	    mp_srcptr xp,mp_size_t xn,
+	    mp_limb_t k, mp_bitcnt_t f,
+	    mp_ptr tp)
+{
+  mp_bitcnt_t y, z;
+  mp_size_t bn;
+  mp_limb_t h, l;
+
+  ASSERT (n > 1 || (n == 1 && np[0] > 1));
+  ASSERT (np[n - 1] > 0);
+  ASSERT (xn > 0);
+
+  if (xn == 1 && xp[0] == 1)
+    return 0;
+
+  z = 1 + (n >> 1);
+  for (bn = 1; bn < z; bn <<= 1)
+    {
+      mpn_powlo (tp, xp, &k, 1, bn, tp + bn);
+      if (mpn_cmp (tp, np, bn) != 0)
+	return 0;
+    }
+
+  /* Final check. Estimate the size of {xp,xn}^k before computing the power
+     with full precision.  Optimization: It might pay off to make a more
+     accurate estimation of the logarithm of {xp,xn}, rather than using the
+     index of the MSB.  */
+
+  MPN_SIZEINBASE_2EXP(y, xp, xn, 1);
+  y -= 1;  /* msb_index (xp, xn) */
+
+  umul_ppmm (h, l, k, y);
+  h -= l == 0;  --l;	/* two-limb decrement */
+
+  z = f - 1; /* msb_index (np, n) */
+  if (h == 0 && l <= z)
+    {
+      mp_limb_t *tp2;
+      mp_size_t i;
+      int ans;
+      mp_limb_t size;
+      TMP_DECL;
+
+      size = l + k;
+      ASSERT_ALWAYS (size >= k);
+
+      TMP_MARK;
+      y = 2 + size / GMP_LIMB_BITS;
+      tp2 = TMP_ALLOC_LIMBS (y);
+
+      i = mpn_pow_1 (tp, xp, xn, k, tp2);
+      if (i == n && mpn_cmp (tp, np, n) == 0)
+	ans = 1;
+      else
+	ans = 0;
+      TMP_FREE;
+      return ans;
+    }
+
+  return 0;
+}
+
+
+/* Return non-zero if N = {np,n} is a kth power.
+   I = {ip,n} = N^(-1) mod B^n.  */
+static int
+is_kth_power (mp_ptr rp, mp_srcptr np,
+	      mp_limb_t k, mp_srcptr ip,
+	      mp_size_t n, mp_bitcnt_t f,
+	      mp_ptr tp)
+{
+  mp_bitcnt_t b;
+  mp_size_t rn, xn;
+
+  ASSERT (n > 0);
+  ASSERT ((k & 1) != 0 || k == 2);
+  ASSERT ((np[0] & 1) != 0);
+
+  if (k == 2)
+    {
+      b = (f + 1) >> 1;
+      rn = 1 + b / GMP_LIMB_BITS;
+      if (mpn_bsqrtinv (rp, ip, b, tp) != 0)
+	{
+	  rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
+	  xn = rn;
+	  MPN_NORMALIZE (rp, xn);
+	  if (pow_equals (np, n, rp, xn, k, f, tp) != 0)
+	    return 1;
+
+	  /* Check if (2^b - r)^2 == n */
+	  mpn_neg (rp, rp, rn);
+	  rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
+	  MPN_NORMALIZE (rp, rn);
+	  if (pow_equals (np, n, rp, rn, k, f, tp) != 0)
+	    return 1;
+	}
+    }
+  else
+    {
+      b = 1 + (f - 1) / k;
+      rn = 1 + (b - 1) / GMP_LIMB_BITS;
+      mpn_brootinv (rp, ip, rn, k, tp);
+      if ((b % GMP_LIMB_BITS) != 0)
+	rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
+      MPN_NORMALIZE (rp, rn);
+      if (pow_equals (np, n, rp, rn, k, f, tp) != 0)
+	return 1;
+    }
+  MPN_ZERO (rp, rn); /* Untrash rp */
+  return 0;
+}
+
+static int
+perfpow (mp_srcptr np, mp_size_t n,
+	 mp_limb_t ub, mp_limb_t g,
+	 mp_bitcnt_t f, int neg)
+{
+  mp_ptr ip, tp, rp;
+  mp_limb_t k;
+  int ans;
+  mp_bitcnt_t b;
+  gmp_primesieve_t ps;
+  TMP_DECL;
+
+  ASSERT (n > 0);
+  ASSERT ((np[0] & 1) != 0);
+  ASSERT (ub > 0);
+
+  TMP_MARK;
+  gmp_init_primesieve (&ps);
+  b = (f + 3) >> 1;
+
+  TMP_ALLOC_LIMBS_3 (ip, n, rp, n, tp, 5 * n);
+
+  MPN_ZERO (rp, n);
+
+  /* FIXME: It seems the inverse in ninv is needed only to get non-inverted
+     roots. I.e., is_kth_power computes n^{1/2} as (n^{-1})^{-1/2} and
+     similarly for nth roots. It should be more efficient to compute n^{1/2} as
+     n * n^{-1/2}, with a mullo instead of a binvert. And we can do something
+     similar for kth roots if we switch to an iteration converging to n^{1/k -
+     1}, and we can then eliminate this binvert call. */
+  mpn_binvert (ip, np, 1 + (b - 1) / GMP_LIMB_BITS, tp);
+  if (b % GMP_LIMB_BITS)
+    ip[(b - 1) / GMP_LIMB_BITS] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
+
+  if (neg)
+    gmp_nextprime (&ps);
+
+  ans = 0;
+  if (g > 0)
+    {
+      ub = MIN (ub, g + 1);
+      while ((k = gmp_nextprime (&ps)) < ub)
+	{
+	  if ((g % k) == 0)
+	    {
+	      if (is_kth_power (rp, np, k, ip, n, f, tp) != 0)
+		{
+		  ans = 1;
+		  goto ret;
+		}
+	    }
+	}
+    }
+  else
+    {
+      while ((k = gmp_nextprime (&ps)) < ub)
+	{
+	  if (is_kth_power (rp, np, k, ip, n, f, tp) != 0)
+	    {
+	      ans = 1;
+	      goto ret;
+	    }
+	}
+    }
+ ret:
+  TMP_FREE;
+  return ans;
+}
+
+static const unsigned short nrtrial[] = { 100, 500, 1000 };
+
+/* Table of (log_{p_i} 2) values, where p_i is the (nrtrial[i] + 1)'th prime
+   number.  */
+static const double logs[] =
+  { 0.1099457228193620, 0.0847016403115322, 0.0772048195144415 };
+
+int
+mpn_perfect_power_p (mp_srcptr np, mp_size_t n)
+{
+  mp_limb_t *nc, factor, g;
+  mp_limb_t exp, d;
+  mp_bitcnt_t twos, count;
+  int ans, where, neg, trial;
+  TMP_DECL;
+
+  neg = n < 0;
+  if (neg)
+    {
+      n = -n;
+    }
+
+  if (n == 0 || (n == 1 && np[0] == 1)) /* Valgrind doesn't like
+					   (n <= (np[0] == 1)) */
+    return 1;
+
+  TMP_MARK;
+
+  count = 0;
+
+  twos = mpn_scan1 (np, 0);
+  if (twos != 0)
+    {
+      mp_size_t s;
+      if (twos == 1)
+	{
+	  return 0;
+	}
+      s = twos / GMP_LIMB_BITS;
+      if (s + 1 == n && POW2_P (np[s]))
+	{
+	  return ! (neg && POW2_P (twos));
+	}
+      count = twos % GMP_LIMB_BITS;
+      n -= s;
+      np += s;
+      if (count > 0)
+	{
+	  nc = TMP_ALLOC_LIMBS (n);
+	  mpn_rshift (nc, np, n, count);
+	  n -= (nc[n - 1] == 0);
+	  np = nc;
+	}
+    }
+  g = twos;
+
+  trial = (n > SMALL) + (n > MEDIUM);
+
+  where = 0;
+  factor = mpn_trialdiv (np, n, nrtrial[trial], &where);
+
+  if (factor != 0)
+    {
+      if (count == 0) /* We did not allocate nc yet. */
+	{
+	  nc = TMP_ALLOC_LIMBS (n);
+	}
+
+      /* Remove factors found by trialdiv.  Optimization: If remove
+	 define _itch, we can allocate its scratch just once */
+
+      do
+	{
+	  binvert_limb (d, factor);
+
+	  /* After the first round we always have nc == np */
+	  exp = mpn_remove (nc, &n, np, n, &d, 1, ~(mp_bitcnt_t)0);
+
+	  if (g == 0)
+	    g = exp;
+	  else
+	    g = mpn_gcd_1 (&g, 1, exp);
+
+	  if (g == 1)
+	    {
+	      ans = 0;
+	      goto ret;
+	    }
+
+	  if ((n == 1) & (nc[0] == 1))
+	    {
+	      ans = ! (neg && POW2_P (g));
+	      goto ret;
+	    }
+
+	  np = nc;
+	  factor = mpn_trialdiv (np, n, nrtrial[trial], &where);
+	}
+      while (factor != 0);
+    }
+
+  MPN_SIZEINBASE_2EXP(count, np, n, 1);   /* log (np) + 1 */
+  d = (mp_limb_t) (count * logs[trial] + 1e-9) + 1;
+  ans = perfpow (np, n, d, g, count, neg);
+
+ ret:
+  TMP_FREE;
+  return ans;
+}

diff --git a/third_party/gmp/mpn/generic/perfsqr.c b/third_party/gmp/mpn/generic/perfsqr.c
new file mode 100644
index 0000000..38a1a91
--- /dev/null
+++ b/third_party/gmp/mpn/generic/perfsqr.c

@@ -0,0 +1,239 @@
+/* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square,
+   zero otherwise.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2005, 2012 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h> /* for NULL */
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "perfsqr.h"
+
+
+/* change this to "#define TRACE(x) x" for diagnostics */
+#define TRACE(x)
+
+
+
+/* PERFSQR_MOD_* detects non-squares using residue tests.
+
+   A macro PERFSQR_MOD_TEST is setup by gen-psqr.c in perfsqr.h.  It takes
+   {up,usize} modulo a selected modulus to get a remainder r.  For 32-bit or
+   64-bit limbs this modulus will be 2^24-1 or 2^48-1 using PERFSQR_MOD_34,
+   or for other limb or nail sizes a PERFSQR_PP is chosen and PERFSQR_MOD_PP
+   used.  PERFSQR_PP_NORM and PERFSQR_PP_INVERTED are pre-calculated in this
+   case too.
+
+   PERFSQR_MOD_TEST then makes various calls to PERFSQR_MOD_1 or
+   PERFSQR_MOD_2 with divisors d which are factors of the modulus, and table
+   data indicating residues and non-residues modulo those divisors.  The
+   table data is in 1 or 2 limbs worth of bits respectively, per the size of
+   each d.
+
+   A "modexact" style remainder is taken to reduce r modulo d.
+   PERFSQR_MOD_IDX implements this, producing an index "idx" for use with
+   the table data.  Notice there's just one multiplication by a constant
+   "inv", for each d.
+
+   The modexact doesn't produce a true r%d remainder, instead idx satisfies
+   "-(idx<<PERFSQR_MOD_BITS) == r mod d".  Because d is odd, this factor
+   -2^PERFSQR_MOD_BITS is a one-to-one mapping between r and idx, and is
+   accounted for by having the table data suitably permuted.
+
+   The remainder r fits within PERFSQR_MOD_BITS which is less than a limb.
+   In fact the GMP_LIMB_BITS - PERFSQR_MOD_BITS spare bits are enough to fit
+   each divisor d meaning the modexact multiply can take place entirely
+   within one limb, giving the compiler the chance to optimize it, in a way
+   that say umul_ppmm would not give.
+
+   There's no need for the divisors d to be prime, in fact gen-psqr.c makes
+   a deliberate effort to combine factors so as to reduce the number of
+   separate tests done on r.  But such combining is limited to d <=
+   2*GMP_LIMB_BITS so that the table data fits in at most 2 limbs.
+
+   Alternatives:
+
+   It'd be possible to use bigger divisors d, and more than 2 limbs of table
+   data, but this doesn't look like it would be of much help to the prime
+   factors in the usual moduli 2^24-1 or 2^48-1.
+
+   The moduli 2^24-1 or 2^48-1 are nothing particularly special, they're
+   just easy to calculate (see mpn_mod_34lsub1) and have a nice set of prime
+   factors.  2^32-1 and 2^64-1 would be equally easy to calculate, but have
+   fewer prime factors.
+
+   The nails case usually ends up using mpn_mod_1, which is a lot slower
+   than mpn_mod_34lsub1.  Perhaps other such special moduli could be found
+   for the nails case.  Two-term things like 2^30-2^15-1 might be
+   candidates.  Or at worst some on-the-fly de-nailing would allow the plain
+   2^24-1 to be used.  Currently nails are too preliminary to be worried
+   about.
+
+*/
+
+#define PERFSQR_MOD_MASK       ((CNST_LIMB(1) << PERFSQR_MOD_BITS) - 1)
+
+#define MOD34_BITS  (GMP_NUMB_BITS / 4 * 3)
+#define MOD34_MASK  ((CNST_LIMB(1) << MOD34_BITS) - 1)
+
+#define PERFSQR_MOD_34(r, up, usize)				\
+  do {								\
+    (r) = mpn_mod_34lsub1 (up, usize);				\
+    (r) = ((r) & MOD34_MASK) + ((r) >> MOD34_BITS);		\
+  } while (0)
+
+/* FIXME: The %= here isn't good, and might destroy any savings from keeping
+   the PERFSQR_MOD_IDX stuff within a limb (rather than needing umul_ppmm).
+   Maybe a new sort of mpn_preinv_mod_1 could accept an unnormalized divisor
+   and a shift count, like mpn_preinv_divrem_1.  But mod_34lsub1 is our
+   normal case, so lets not worry too much about mod_1.  */
+#define PERFSQR_MOD_PP(r, up, usize)					\
+  do {									\
+    if (BELOW_THRESHOLD (usize, PREINV_MOD_1_TO_MOD_1_THRESHOLD))	\
+      {									\
+	(r) = mpn_preinv_mod_1 (up, usize, PERFSQR_PP_NORM,		\
+				PERFSQR_PP_INVERTED);			\
+	(r) %= PERFSQR_PP;						\
+      }									\
+    else								\
+      {									\
+	(r) = mpn_mod_1 (up, usize, PERFSQR_PP);			\
+      }									\
+  } while (0)
+
+#define PERFSQR_MOD_IDX(idx, r, d, inv)				\
+  do {								\
+    mp_limb_t  q;						\
+    ASSERT ((r) <= PERFSQR_MOD_MASK);				\
+    ASSERT ((((inv) * (d)) & PERFSQR_MOD_MASK) == 1);		\
+    ASSERT (MP_LIMB_T_MAX / (d) >= PERFSQR_MOD_MASK);		\
+								\
+    q = ((r) * (inv)) & PERFSQR_MOD_MASK;			\
+    ASSERT (r == ((q * (d)) & PERFSQR_MOD_MASK));		\
+    (idx) = (q * (d)) >> PERFSQR_MOD_BITS;			\
+  } while (0)
+
+#define PERFSQR_MOD_1(r, d, inv, mask)				\
+  do {								\
+    unsigned   idx;						\
+    ASSERT ((d) <= GMP_LIMB_BITS);				\
+    PERFSQR_MOD_IDX(idx, r, d, inv);				\
+    TRACE (printf ("  PERFSQR_MOD_1 d=%u r=%lu idx=%u\n",	\
+		   d, r%d, idx));				\
+    if ((((mask) >> idx) & 1) == 0)				\
+      {								\
+	TRACE (printf ("  non-square\n"));			\
+	return 0;						\
+      }								\
+  } while (0)
+
+/* The expression "(int) idx - GMP_LIMB_BITS < 0" lets the compiler use the
+   sign bit from "idx-GMP_LIMB_BITS", which might help avoid a branch.  */
+#define PERFSQR_MOD_2(r, d, inv, mhi, mlo)			\
+  do {								\
+    mp_limb_t  m;						\
+    unsigned   idx;						\
+    ASSERT ((d) <= 2*GMP_LIMB_BITS);				\
+								\
+    PERFSQR_MOD_IDX (idx, r, d, inv);				\
+    TRACE (printf ("  PERFSQR_MOD_2 d=%u r=%lu idx=%u\n",	\
+		   d, r%d, idx));				\
+    m = ((int) idx - GMP_LIMB_BITS < 0 ? (mlo) : (mhi));	\
+    idx %= GMP_LIMB_BITS;					\
+    if (((m >> idx) & 1) == 0)					\
+      {								\
+	TRACE (printf ("  non-square\n"));			\
+	return 0;						\
+      }								\
+  } while (0)
+
+
+int
+mpn_perfect_square_p (mp_srcptr up, mp_size_t usize)
+{
+  ASSERT (usize >= 1);
+
+  TRACE (gmp_printf ("mpn_perfect_square_p %Nd\n", up, usize));
+
+  /* The first test excludes 212/256 (82.8%) of the perfect square candidates
+     in O(1) time.  */
+  {
+    unsigned  idx = up[0] % 0x100;
+    if (((sq_res_0x100[idx / GMP_LIMB_BITS]
+	  >> (idx % GMP_LIMB_BITS)) & 1) == 0)
+      return 0;
+  }
+
+#if 0
+  /* Check that we have even multiplicity of 2, and then check that the rest is
+     a possible perfect square.  Leave disabled until we can determine this
+     really is an improvement.  It it is, it could completely replace the
+     simple probe above, since this should throw out more non-squares, but at
+     the expense of somewhat more cycles.  */
+  {
+    mp_limb_t lo;
+    int cnt;
+    lo = up[0];
+    while (lo == 0)
+      up++, lo = up[0], usize--;
+    count_trailing_zeros (cnt, lo);
+    if ((cnt & 1) != 0)
+      return 0;			/* return of not even multiplicity of 2 */
+    lo >>= cnt;			/* shift down to align lowest non-zero bit */
+    lo >>= 1;			/* shift away lowest non-zero bit */
+    if ((lo & 3) != 0)
+      return 0;
+  }
+#endif
+
+
+  /* The second test uses mpn_mod_34lsub1 or mpn_mod_1 to detect non-squares
+     according to their residues modulo small primes (or powers of
+     primes).  See perfsqr.h.  */
+  PERFSQR_MOD_TEST (up, usize);
+
+
+  /* For the third and last test, we finally compute the square root,
+     to make sure we've really got a perfect square.  */
+  {
+    mp_ptr root_ptr;
+    int res;
+    TMP_DECL;
+
+    TMP_MARK;
+    root_ptr = TMP_ALLOC_LIMBS ((usize + 1) / 2);
+
+    /* Iff mpn_sqrtrem returns zero, the square is perfect.  */
+    res = ! mpn_sqrtrem (root_ptr, NULL, up, usize);
+    TMP_FREE;
+
+    return res;
+  }
+}

diff --git a/third_party/gmp/mpn/generic/popham.c b/third_party/gmp/mpn/generic/popham.c
new file mode 100644
index 0000000..87974d7
--- /dev/null
+++ b/third_party/gmp/mpn/generic/popham.c

@@ -0,0 +1,125 @@
+/* mpn_popcount, mpn_hamdist -- mpn bit population count/hamming distance.
+
+Copyright 1994, 1996, 2000-2002, 2005, 2011, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#if OPERATION_popcount
+#define FNAME mpn_popcount
+#define POPHAM(u,v) u
+#endif
+
+#if OPERATION_hamdist
+#define FNAME mpn_hamdist
+#define POPHAM(u,v) u ^ v
+#endif
+
+mp_bitcnt_t
+FNAME (mp_srcptr up,
+#if OPERATION_hamdist
+       mp_srcptr vp,
+#endif
+       mp_size_t n) __GMP_NOTHROW
+{
+  mp_bitcnt_t result = 0;
+  mp_limb_t p0, p1, p2, p3, x, p01, p23;
+  mp_size_t i;
+
+  ASSERT (n >= 1);		/* Actually, this code handles any n, but some
+				   assembly implementations do not.  */
+
+  for (i = n >> 2; i != 0; i--)
+    {
+      p0 = POPHAM (up[0], vp[0]);
+      p0 -= (p0 >> 1) & MP_LIMB_T_MAX/3;				/* 2 0-2 */
+      p0 = ((p0 >> 2) & MP_LIMB_T_MAX/5) + (p0 & MP_LIMB_T_MAX/5);	/* 4 0-4 */
+
+      p1 = POPHAM (up[1], vp[1]);
+      p1 -= (p1 >> 1) & MP_LIMB_T_MAX/3;				/* 2 0-2 */
+      p1 = ((p1 >> 2) & MP_LIMB_T_MAX/5) + (p1 & MP_LIMB_T_MAX/5);	/* 4 0-4 */
+
+      p01 = p0 + p1;							/* 8 0-8 */
+      p01 = ((p01 >> 4) & MP_LIMB_T_MAX/17) + (p01 & MP_LIMB_T_MAX/17);	/* 8 0-16 */
+
+      p2 = POPHAM (up[2], vp[2]);
+      p2 -= (p2 >> 1) & MP_LIMB_T_MAX/3;				/* 2 0-2 */
+      p2 = ((p2 >> 2) & MP_LIMB_T_MAX/5) + (p2 & MP_LIMB_T_MAX/5);	/* 4 0-4 */
+
+      p3 = POPHAM (up[3], vp[3]);
+      p3 -= (p3 >> 1) & MP_LIMB_T_MAX/3;				/* 2 0-2 */
+      p3 = ((p3 >> 2) & MP_LIMB_T_MAX/5) + (p3 & MP_LIMB_T_MAX/5);	/* 4 0-4 */
+
+      p23 = p2 + p3;							/* 8 0-8 */
+      p23 = ((p23 >> 4) & MP_LIMB_T_MAX/17) + (p23 & MP_LIMB_T_MAX/17);	/* 8 0-16 */
+
+      x = p01 + p23;							/* 8 0-32 */
+      x = (x >> 8) + x;							/* 8 0-64 */
+      x = (x >> 16) + x;						/* 8 0-128 */
+#if GMP_LIMB_BITS > 32
+      x = ((x >> 32) & 0xff) + (x & 0xff);				/* 8 0-256 */
+      result += x;
+#else
+      result += x & 0xff;
+#endif
+      up += 4;
+#if OPERATION_hamdist
+      vp += 4;
+#endif
+    }
+
+  n &= 3;
+  if (n != 0)
+    {
+      x = 0;
+      do
+	{
+	  p0 = POPHAM (up[0], vp[0]);
+	  p0 -= (p0 >> 1) & MP_LIMB_T_MAX/3;				/* 2 0-2 */
+	  p0 = ((p0 >> 2) & MP_LIMB_T_MAX/5) + (p0 & MP_LIMB_T_MAX/5);	/* 4 0-4 */
+	  p0 = ((p0 >> 4) + p0) & MP_LIMB_T_MAX/17;			/* 8 0-8 */
+
+	  x += p0;
+	  up += 1;
+#if OPERATION_hamdist
+	  vp += 1;
+#endif
+	}
+      while (--n);
+
+      x = (x >> 8) + x;
+      x = (x >> 16) + x;
+#if GMP_LIMB_BITS > 32
+      x = (x >> 32) + x;
+#endif
+      result += x & 0xff;
+    }
+
+  return result;
+}

diff --git a/third_party/gmp/mpn/generic/pow_1.c b/third_party/gmp/mpn/generic/pow_1.c
new file mode 100644
index 0000000..de11cd2
--- /dev/null
+++ b/third_party/gmp/mpn/generic/pow_1.c

@@ -0,0 +1,135 @@
+/* mpn_pow_1 -- Compute powers R = U^exp.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2002, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_size_t
+mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp)
+{
+  mp_limb_t x;
+  int cnt, i;
+  mp_size_t rn;
+  int par;
+
+  ASSERT (bn >= 1);
+  /* FIXME: Add operand overlap criteria */
+
+  if (exp <= 1)
+    {
+      if (exp == 0)
+	{
+	  rp[0] = 1;
+	  return 1;
+	}
+      else
+	{
+	  MPN_COPY (rp, bp, bn);
+	  return bn;
+	}
+    }
+
+  /* Count number of bits in exp, and compute where to put initial square in
+     order to magically get results in the entry rp.  Use simple code,
+     optimized for small exp.  For large exp, the bignum operations will take
+     so much time that the slowness of this code will be negligible.  */
+  par = 0;
+  cnt = GMP_LIMB_BITS;
+  x = exp;
+  do
+    {
+      par ^= x;
+      cnt--;
+      x >>= 1;
+    } while (x != 0);
+  exp <<= cnt;
+
+  if (bn == 1)
+    {
+      mp_limb_t rl, rh, bl = bp[0];
+
+      if ((cnt & 1) != 0)
+	MP_PTR_SWAP (rp, tp);
+
+      umul_ppmm (rh, rl, bl, bl << GMP_NAIL_BITS);
+      rp[0] = rl >> GMP_NAIL_BITS;
+      rp[1] = rh;
+      rn = 1 + (rh != 0);
+
+      for (i = GMP_LIMB_BITS - cnt - 1;;)
+	{
+	  exp <<= 1;
+	  if ((exp & GMP_LIMB_HIGHBIT) != 0)
+	    {
+	      rp[rn] = rh = mpn_mul_1 (rp, rp, rn, bl);
+	      rn += rh != 0;
+	    }
+
+	  if (--i == 0)
+	    break;
+
+	  mpn_sqr (tp, rp, rn);
+	  rn = 2 * rn; rn -= tp[rn - 1] == 0;
+	  MP_PTR_SWAP (rp, tp);
+	}
+    }
+  else
+    {
+      if (((par ^ cnt) & 1) == 0)
+	MP_PTR_SWAP (rp, tp);
+
+      mpn_sqr (rp, bp, bn);
+      rn = 2 * bn; rn -= rp[rn - 1] == 0;
+
+      for (i = GMP_LIMB_BITS - cnt - 1;;)
+	{
+	  exp <<= 1;
+	  if ((exp & GMP_LIMB_HIGHBIT) != 0)
+	    {
+	      rn = rn + bn - (mpn_mul (tp, rp, rn, bp, bn) == 0);
+	      MP_PTR_SWAP (rp, tp);
+	    }
+
+	  if (--i == 0)
+	    break;
+
+	  mpn_sqr (tp, rp, rn);
+	  rn = 2 * rn; rn -= tp[rn - 1] == 0;
+	  MP_PTR_SWAP (rp, tp);
+	}
+    }
+
+  return rn;
+}

diff --git a/third_party/gmp/mpn/generic/powlo.c b/third_party/gmp/mpn/generic/powlo.c
new file mode 100644
index 0000000..3b26bca
--- /dev/null
+++ b/third_party/gmp/mpn/generic/powlo.c

@@ -0,0 +1,188 @@
+/* mpn_powlo -- Compute R = U^E mod B^n, where B is the limb base.
+
+Copyright 2007-2009, 2012, 2015, 2016, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#define getbit(p,bi) \
+  ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
+
+static inline mp_limb_t
+getbits (const mp_limb_t *p, mp_bitcnt_t bi, unsigned nbits)
+{
+  unsigned nbits_in_r;
+  mp_limb_t r;
+  mp_size_t i;
+
+  if (bi < nbits)
+    {
+      return p[0] & (((mp_limb_t) 1 << bi) - 1);
+    }
+  else
+    {
+      bi -= nbits;			/* bit index of low bit to extract */
+      i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
+      bi %= GMP_NUMB_BITS;		/* bit index in low word */
+      r = p[i] >> bi;			/* extract (low) bits */
+      nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
+      if (nbits_in_r < nbits)		/* did we get enough bits? */
+	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
+      return r & (((mp_limb_t ) 1 << nbits) - 1);
+    }
+}
+
+static inline unsigned
+win_size (mp_bitcnt_t eb)
+{
+  unsigned k;
+  static mp_bitcnt_t x[] = {7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
+  ASSERT (eb > 1);
+  for (k = 0; eb > x[k++];)
+    ;
+  return k;
+}
+
+/* rp[n-1..0] = bp[n-1..0] ^ ep[en-1..0] mod B^n, B is the limb base.
+   Requires that ep[en-1] is non-zero.
+   Uses scratch space tp[3n-1..0], i.e., 3n words.  */
+/* We only use n words in the scratch space, we should pass tp + n to
+   mullo/sqrlo as a temporary area, it is needed. */
+void
+mpn_powlo (mp_ptr rp, mp_srcptr bp,
+	   mp_srcptr ep, mp_size_t en,
+	   mp_size_t n, mp_ptr tp)
+{
+  unsigned cnt;
+  mp_bitcnt_t ebi;
+  unsigned windowsize, this_windowsize;
+  mp_limb_t expbits;
+  mp_limb_t *pp;
+  long i;
+  int flipflop;
+  TMP_DECL;
+
+  ASSERT (en > 1 || (en == 1 && ep[0] > 1));
+
+  TMP_MARK;
+
+  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
+
+  windowsize = win_size (ebi);
+  if (windowsize > 1)
+    {
+      mp_limb_t *this_pp, *last_pp;
+      ASSERT (windowsize < ebi);
+
+      pp = TMP_ALLOC_LIMBS ((n << (windowsize - 1)));
+
+      this_pp = pp;
+
+      MPN_COPY (this_pp, bp, n);
+
+      /* Store b^2 in tp.  */
+      mpn_sqrlo (tp, bp, n);
+
+      /* Precompute odd powers of b and put them in the temporary area at pp.  */
+      i = (1 << (windowsize - 1)) - 1;
+      do
+	{
+	  last_pp = this_pp;
+	  this_pp += n;
+	  mpn_mullo_n (this_pp, last_pp, tp, n);
+	} while (--i != 0);
+
+      expbits = getbits (ep, ebi, windowsize);
+
+      /* THINK: Should we initialise the case expbits % 4 == 0 with a mullo? */
+      count_trailing_zeros (cnt, expbits);
+      ebi -= windowsize;
+      ebi += cnt;
+      expbits >>= cnt;
+
+      MPN_COPY (rp, pp + n * (expbits >> 1), n);
+    }
+  else
+    {
+      pp = tp + n;
+      MPN_COPY (pp, bp, n);
+      MPN_COPY (rp, bp, n);
+      --ebi;
+    }
+
+  flipflop = 0;
+
+  do
+    {
+      while (getbit (ep, ebi) == 0)
+	{
+	  mpn_sqrlo (tp, rp, n);
+	  MP_PTR_SWAP (rp, tp);
+	  flipflop = ! flipflop;
+	  if (--ebi == 0)
+	    goto done;
+	}
+
+      /* The next bit of the exponent is 1.  Now extract the largest block of
+	 bits <= windowsize, and such that the least significant bit is 1.  */
+
+      expbits = getbits (ep, ebi, windowsize);
+      this_windowsize = MIN (windowsize, ebi);
+      ebi -= this_windowsize;
+
+      count_trailing_zeros (cnt, expbits);
+      this_windowsize -= cnt;
+      ebi += cnt;
+      expbits >>= cnt;
+
+      while (this_windowsize > 1)
+	{
+	  mpn_sqrlo (tp, rp, n);
+	  mpn_sqrlo (rp, tp, n);
+	  this_windowsize -= 2;
+	}
+
+      if (this_windowsize != 0)
+	mpn_sqrlo (tp, rp, n);
+      else
+	{
+	  MP_PTR_SWAP (rp, tp);
+	  flipflop = ! flipflop;
+	}
+
+      mpn_mullo_n (rp, tp, pp + n * (expbits >> 1), n);
+    } while (ebi != 0);
+
+ done:
+  if (flipflop)
+    MPN_COPY (tp, rp, n);
+  TMP_FREE;
+}

diff --git a/third_party/gmp/mpn/generic/powm.c b/third_party/gmp/mpn/generic/powm.c
new file mode 100644
index 0000000..2828103
--- /dev/null
+++ b/third_party/gmp/mpn/generic/powm.c

@@ -0,0 +1,635 @@
+/* mpn_powm -- Compute R = U^E mod M.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2007-2012, 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/*
+  BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
+
+  1. W <- U
+
+  2. T <- (B^n * U) mod M                Convert to REDC form
+
+  3. Compute table U^1, U^3, U^5... of E-dependent size
+
+  4. While there are more bits in E
+       W <- power left-to-right base-k
+
+
+  TODO:
+
+   * Make getbits a macro, thereby allowing it to update the index operand.
+     That will simplify the code using getbits.  (Perhaps make getbits' sibling
+     getbit then have similar form, for symmetry.)
+
+   * Write an itch function.  Or perhaps get rid of tp parameter since the huge
+     pp area is allocated locally anyway?
+
+   * Choose window size without looping.  (Superoptimize or think(tm).)
+
+   * Handle small bases with initial, reduction-free exponentiation.
+
+   * Call new division functions, not mpn_tdiv_qr.
+
+   * Consider special code for one-limb M.
+
+   * How should we handle the redc1/redc2/redc_n choice?
+     - redc1:  T(binvert_1limb)  + e * (n)   * (T(mullo-1x1) + n*T(addmul_1))
+     - redc2:  T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
+     - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
+     This disregards the addmul_N constant term, but we could think of
+     that as part of the respective mullo.
+
+   * When U (the base) is small, we should start the exponentiation with plain
+     operations, then convert that partial result to REDC form.
+
+   * When U is just one limb, should it be handled without the k-ary tricks?
+     We could keep a factor of B^n in W, but use U' = BU as base.  After
+     multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
+     mod M.
+*/
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#undef MPN_REDC_0
+#define MPN_REDC_0(rp, up, mp, invm)					\
+  do {									\
+    mp_limb_t p1, r0, u0, _dummy;					\
+    u0 = *(up);								\
+    umul_ppmm (p1, _dummy, *(mp), (u0 * (invm)) & GMP_NUMB_MASK);	\
+    ASSERT (((u0 + _dummy) & GMP_NUMB_MASK) == 0);			\
+    p1 += (u0 != 0);							\
+    r0 = (up)[1] + p1;							\
+    if (p1 > r0)							\
+      r0 -= *(mp);							\
+    *(rp) = r0;								\
+  } while (0)
+
+#undef MPN_REDC_1
+#if HAVE_NATIVE_mpn_sbpi1_bdiv_r
+#define MPN_REDC_1(rp, up, mp, n, invm)					\
+  do {									\
+    mp_limb_t cy;							\
+    cy = mpn_sbpi1_bdiv_r (up, 2 * n, mp, n, invm);			\
+    if (cy != 0)							\
+      mpn_sub_n (rp, up + n, mp, n);					\
+    else								\
+      MPN_COPY (rp, up + n, n);						\
+  } while (0)
+#else
+#define MPN_REDC_1(rp, up, mp, n, invm)					\
+  do {									\
+    mp_limb_t cy;							\
+    cy = mpn_redc_1 (rp, up, mp, n, invm);				\
+    if (cy != 0)							\
+      mpn_sub_n (rp, rp, mp, n);					\
+  } while (0)
+#endif
+
+#undef MPN_REDC_2
+#define MPN_REDC_2(rp, up, mp, n, mip)					\
+  do {									\
+    mp_limb_t cy;							\
+    cy = mpn_redc_2 (rp, up, mp, n, mip);				\
+    if (cy != 0)							\
+      mpn_sub_n (rp, rp, mp, n);					\
+  } while (0)
+
+#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
+#define WANT_REDC_2 1
+#endif
+
+#define getbit(p,bi) \
+  ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
+
+static inline mp_limb_t
+getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
+{
+  int nbits_in_r;
+  mp_limb_t r;
+  mp_size_t i;
+
+  if (bi < nbits)
+    {
+      return p[0] & (((mp_limb_t) 1 << bi) - 1);
+    }
+  else
+    {
+      bi -= nbits;			/* bit index of low bit to extract */
+      i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
+      bi %= GMP_NUMB_BITS;		/* bit index in low word */
+      r = p[i] >> bi;			/* extract (low) bits */
+      nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
+      if (nbits_in_r < nbits)		/* did we get enough bits? */
+	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
+      return r & (((mp_limb_t) 1 << nbits) - 1);
+    }
+}
+
+static inline int
+win_size (mp_bitcnt_t eb)
+{
+  int k;
+  static mp_bitcnt_t x[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
+  for (k = 1; eb > x[k]; k++)
+    ;
+  return k;
+}
+
+/* Convert U to REDC form, U_r = B^n * U mod M */
+static void
+redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
+{
+  mp_ptr tp, qp;
+  TMP_DECL;
+  TMP_MARK;
+
+  TMP_ALLOC_LIMBS_2 (tp, un + n, qp, un + 1);
+
+  MPN_ZERO (tp, n);
+  MPN_COPY (tp + n, up, un);
+  mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
+  TMP_FREE;
+}
+
+/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
+   Requires that mp[n-1..0] is odd.
+   Requires that ep[en-1..0] is > 1.
+   Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */
+void
+mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
+	  mp_srcptr ep, mp_size_t en,
+	  mp_srcptr mp, mp_size_t n, mp_ptr tp)
+{
+  mp_limb_t ip[2], *mip;
+  int cnt;
+  mp_bitcnt_t ebi;
+  int windowsize, this_windowsize;
+  mp_limb_t expbits;
+  mp_ptr pp, this_pp;
+  long i;
+  TMP_DECL;
+
+  ASSERT (en > 1 || (en == 1 && ep[0] > 1));
+  ASSERT (n >= 1 && ((mp[0] & 1) != 0));
+
+  TMP_MARK;
+
+  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
+
+#if 0
+  if (bn < n)
+    {
+      /* Do the first few exponent bits without mod reductions,
+	 until the result is greater than the mod argument.  */
+      for (;;)
+	{
+	  mpn_sqr (tp, this_pp, tn);
+	  tn = tn * 2 - 1,  tn += tp[tn] != 0;
+	  if (getbit (ep, ebi) != 0)
+	    mpn_mul (..., tp, tn, bp, bn);
+	  ebi--;
+	}
+    }
+#endif
+
+  windowsize = win_size (ebi);
+
+#if WANT_REDC_2
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    {
+      mip = ip;
+      binvert_limb (mip[0], mp[0]);
+      mip[0] = -mip[0];
+    }
+  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+    {
+      mip = ip;
+      mpn_binvert (mip, mp, 2, tp);
+      mip[0] = -mip[0]; mip[1] = ~mip[1];
+    }
+#else
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+    {
+      mip = ip;
+      binvert_limb (mip[0], mp[0]);
+      mip[0] = -mip[0];
+    }
+#endif
+  else
+    {
+      mip = TMP_ALLOC_LIMBS (n);
+      mpn_binvert (mip, mp, n, tp);
+    }
+
+  pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
+
+  this_pp = pp;
+  redcify (this_pp, bp, bn, mp, n);
+
+  /* Store b^2 at rp.  */
+  mpn_sqr (tp, this_pp, n);
+#if 0
+  if (n == 1) {
+    MPN_REDC_0 (rp, tp, mp, mip[0]);
+  } else
+#endif
+#if WANT_REDC_2
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
+  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+    MPN_REDC_2 (rp, tp, mp, n, mip);
+#else
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
+#endif
+  else
+    mpn_redc_n (rp, tp, mp, n, mip);
+
+  /* Precompute odd powers of b and put them in the temporary area at pp.  */
+  for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
+#if 1
+    if (n == 1) {
+      umul_ppmm((tp)[1], *(tp), *(this_pp), *(rp));
+      ++this_pp ;
+      MPN_REDC_0 (this_pp, tp, mp, mip[0]);
+    } else
+#endif
+    {
+      mpn_mul_n (tp, this_pp, rp, n);
+      this_pp += n;
+#if WANT_REDC_2
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
+      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+	MPN_REDC_2 (this_pp, tp, mp, n, mip);
+#else
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
+#endif
+      else
+	mpn_redc_n (this_pp, tp, mp, n, mip);
+    }
+
+  expbits = getbits (ep, ebi, windowsize);
+  if (ebi < windowsize)
+    ebi = 0;
+  else
+    ebi -= windowsize;
+
+  count_trailing_zeros (cnt, expbits);
+  ebi += cnt;
+  expbits >>= cnt;
+
+  MPN_COPY (rp, pp + n * (expbits >> 1), n);
+
+#define INNERLOOP							\
+  while (ebi != 0)							\
+    {									\
+      while (getbit (ep, ebi) == 0)					\
+	{								\
+	  MPN_SQR (tp, rp, n);						\
+	  MPN_REDUCE (rp, tp, mp, n, mip);				\
+	  if (--ebi == 0)						\
+	    goto done;							\
+	}								\
+									\
+      /* The next bit of the exponent is 1.  Now extract the largest	\
+	 block of bits <= windowsize, and such that the least		\
+	 significant bit is 1.  */					\
+									\
+      expbits = getbits (ep, ebi, windowsize);				\
+      this_windowsize = windowsize;					\
+      if (ebi < windowsize)						\
+	{								\
+	  this_windowsize -= windowsize - ebi;				\
+	  ebi = 0;							\
+	}								\
+      else								\
+        ebi -= windowsize;						\
+									\
+      count_trailing_zeros (cnt, expbits);				\
+      this_windowsize -= cnt;						\
+      ebi += cnt;							\
+      expbits >>= cnt;							\
+									\
+      do								\
+	{								\
+	  MPN_SQR (tp, rp, n);						\
+	  MPN_REDUCE (rp, tp, mp, n, mip);				\
+	}								\
+      while (--this_windowsize != 0);					\
+									\
+      MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n);			\
+      MPN_REDUCE (rp, tp, mp, n, mip);					\
+    }
+
+
+  if (n == 1)
+    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		umul_ppmm((r)[1], *(r), *(a), *(b))
+#define MPN_SQR(r,a,n)			umul_ppmm((r)[1], *(r), *(a), *(a))
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_0(rp, tp, mp, mip[0])
+      INNERLOOP;
+    }
+  else
+#if WANT_REDC_2
+  if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
+    {
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+	{
+	  if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP;
+	    }
+	  else
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
+	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
+	      INNERLOOP;
+	    }
+	  else
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
+	      INNERLOOP;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+	{
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
+	  INNERLOOP;
+	}
+      else
+	{
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	  INNERLOOP;
+	}
+    }
+  else
+    {
+      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
+	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP;
+	    }
+	  else
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+	{
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	  INNERLOOP;
+	}
+      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+	{
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
+	  INNERLOOP;
+	}
+      else
+	{
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	  INNERLOOP;
+	}
+    }
+
+#else  /* WANT_REDC_2 */
+
+  if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
+    {
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+	{
+	  if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP;
+	    }
+	  else
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
+	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	      INNERLOOP;
+	    }
+	  else
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	      INNERLOOP;
+	    }
+	}
+      else
+	{
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	  INNERLOOP;
+	}
+    }
+  else
+    {
+      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
+	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP;
+	    }
+	  else
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+	{
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	  INNERLOOP;
+	}
+      else
+	{
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	  INNERLOOP;
+	}
+    }
+#endif  /* WANT_REDC_2 */
+
+ done:
+
+  MPN_COPY (tp, rp, n);
+  MPN_ZERO (tp + n, n);
+
+#if WANT_REDC_2
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
+  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+    MPN_REDC_2 (rp, tp, mp, n, mip);
+#else
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
+#endif
+  else
+    mpn_redc_n (rp, tp, mp, n, mip);
+
+  if (mpn_cmp (rp, mp, n) >= 0)
+    mpn_sub_n (rp, rp, mp, n);
+
+  TMP_FREE;
+}

diff --git a/third_party/gmp/mpn/generic/pre_divrem_1.c b/third_party/gmp/mpn/generic/pre_divrem_1.c
new file mode 100644
index 0000000..3b29d77
--- /dev/null
+++ b/third_party/gmp/mpn/generic/pre_divrem_1.c

@@ -0,0 +1,145 @@
+/* mpn_preinv_divrem_1 -- mpn by limb division with pre-inverted divisor.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000-2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Don't bloat a shared library with unused code. */
+#if USE_PREINV_DIVREM_1
+
+/* Same test here for skipping one divide step as in mpn_divrem_1.
+
+   The main reason for a separate shift==0 case is that not all CPUs give
+   zero for "n0 >> GMP_LIMB_BITS" which would arise in the general case
+   code used on shift==0.  shift==0 is also reasonably common in mp_bases
+   big_base, for instance base==10 on a 64-bit limb.
+
+   Under shift!=0 it would be possible to call mpn_lshift to adjust the
+   dividend all in one go (into the quotient space say), rather than
+   limb-by-limb in the loop.  This might help if mpn_lshift is a lot faster
+   than what the compiler can generate for EXTRACT.  But this is left to CPU
+   specific implementations to consider, especially since EXTRACT isn't on
+   the dependent chain.
+
+   If size==0 then the result is simply xsize limbs of zeros, but nothing
+   special is done for that, since it wouldn't be a usual call, and
+   certainly never arises from mpn_get_str which is our main caller.  */
+
+mp_limb_t
+mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t xsize,
+		     mp_srcptr ap, mp_size_t size, mp_limb_t d_unnorm,
+		     mp_limb_t dinv, int shift)
+{
+  mp_limb_t  ahigh, qhigh, r;
+  mp_size_t  i;
+  mp_limb_t  n1, n0;
+  mp_limb_t  d;
+
+  ASSERT (xsize >= 0);
+  ASSERT (size >= 1);
+  ASSERT (d_unnorm != 0);
+#if WANT_ASSERT
+  {
+    int        want_shift;
+    mp_limb_t  want_dinv;
+    count_leading_zeros (want_shift, d_unnorm);
+    ASSERT (shift == want_shift);
+    invert_limb (want_dinv, d_unnorm << shift);
+    ASSERT (dinv == want_dinv);
+  }
+#endif
+  /* FIXME: What's the correct overlap rule when xsize!=0? */
+  ASSERT (MPN_SAME_OR_SEPARATE_P (qp+xsize, ap, size));
+
+  ahigh = ap[size-1];
+  d = d_unnorm << shift;
+  qp += (size + xsize - 1);   /* dest high limb */
+
+  if (shift == 0)
+    {
+      /* High quotient limb is 0 or 1, and skip a divide step. */
+      r = ahigh;
+      qhigh = (r >= d);
+      r = (qhigh ? r-d : r);
+      *qp-- = qhigh;
+      size--;
+
+      for (i = size-1; i >= 0; i--)
+	{
+	  n0 = ap[i];
+	  udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
+	  qp--;
+	}
+    }
+  else
+    {
+      r = 0;
+      if (ahigh < d_unnorm)
+	{
+	  r = ahigh << shift;
+	  *qp-- = 0;
+	  size--;
+	  if (size == 0)
+	    goto done_integer;
+	}
+
+      n1 = ap[size-1];
+      r |= n1 >> (GMP_LIMB_BITS - shift);
+
+      for (i = size-2; i >= 0; i--)
+	{
+	  ASSERT (r < d);
+	  n0 = ap[i];
+	  udiv_qrnnd_preinv (*qp, r, r,
+			     ((n1 << shift) | (n0 >> (GMP_LIMB_BITS - shift))),
+			     d, dinv);
+	  qp--;
+	  n1 = n0;
+	}
+      udiv_qrnnd_preinv (*qp, r, r, n1 << shift, d, dinv);
+      qp--;
+    }
+
+ done_integer:
+  for (i = 0; i < xsize; i++)
+    {
+      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
+      qp--;
+    }
+
+  return r >> shift;
+}
+
+#endif /* USE_PREINV_DIVREM_1 */

diff --git a/third_party/gmp/mpn/generic/pre_mod_1.c b/third_party/gmp/mpn/generic/pre_mod_1.c
new file mode 100644
index 0000000..78ae308
--- /dev/null
+++ b/third_party/gmp/mpn/generic/pre_mod_1.c

@@ -0,0 +1,61 @@
+/* mpn_preinv_mod_1 (up, un, d, dinv) -- Divide (UP,,UN) by the normalized D.
+   DINV should be 2^(2*GMP_LIMB_BITS) / D - 2^GMP_LIMB_BITS.
+   Return the single-limb remainder.
+
+Copyright 1991, 1993, 1994, 2000-2002, 2004, 2005 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* This function used to be documented, but is now considered obsolete.  It
+   continues to exist for binary compatibility, even when not required
+   internally.  */
+
+mp_limb_t
+mpn_preinv_mod_1 (mp_srcptr up, mp_size_t un, mp_limb_t d, mp_limb_t dinv)
+{
+  mp_size_t i;
+  mp_limb_t n0, r;
+
+  ASSERT (un >= 1);
+  ASSERT (d & GMP_LIMB_HIGHBIT);
+
+  r = up[un - 1];
+  if (r >= d)
+    r -= d;
+
+  for (i = un - 2; i >= 0; i--)
+    {
+      n0 = up[i];
+      udiv_rnnd_preinv (r, r, n0, d, dinv);
+    }
+  return r;
+}

diff --git a/third_party/gmp/mpn/generic/random.c b/third_party/gmp/mpn/generic/random.c
new file mode 100644
index 0000000..485f9eb
--- /dev/null
+++ b/third_party/gmp/mpn/generic/random.c

@@ -0,0 +1,50 @@
+/* mpn_random -- Generate random numbers.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpn_random (mp_ptr ptr, mp_size_t size)
+{
+  gmp_randstate_ptr  rands;
+
+  /* FIXME: Is size==0 supposed to be allowed? */
+  ASSERT (size >= 0);
+
+  if (size == 0)
+    return;
+
+  rands = RANDS;
+  _gmp_rand (ptr, rands, size * GMP_NUMB_BITS);
+
+  /* Make sure the most significant limb is non-zero.  */
+  while (ptr[size-1] == 0)
+    _gmp_rand (&ptr[size-1], rands, GMP_NUMB_BITS);
+}

diff --git a/third_party/gmp/mpn/generic/random2.c b/third_party/gmp/mpn/generic/random2.c
new file mode 100644
index 0000000..1eede67
--- /dev/null
+++ b/third_party/gmp/mpn/generic/random2.c

@@ -0,0 +1,105 @@
+/* mpn_random2 -- Generate random numbers with relatively long strings
+   of ones and zeroes.  Suitable for border testing.
+
+Copyright 1992-1994, 1996, 2000-2002, 2004, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+static void gmp_rrandomb (mp_ptr, gmp_randstate_t, mp_bitcnt_t);
+
+/* Ask _gmp_rand for 32 bits per call unless that's more than a limb can hold.
+   Thus, we get the same random number sequence in the common cases.
+   FIXME: We should always generate the same random number sequence!  */
+#if GMP_NUMB_BITS < 32
+#define BITS_PER_RANDCALL GMP_NUMB_BITS
+#else
+#define BITS_PER_RANDCALL 32
+#endif
+
+void
+mpn_random2 (mp_ptr rp, mp_size_t n)
+{
+  gmp_randstate_ptr rstate = RANDS;
+  int bit_pos;			/* bit number of least significant bit where
+				   next bit field to be inserted */
+  mp_limb_t ran, ranm;		/* buffer for random bits */
+
+  /* FIXME: Is n==0 supposed to be allowed? */
+  ASSERT (n >= 0);
+
+  _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+  ran = ranm;
+
+  /* Start off at a random bit position in the most significant limb.  */
+  bit_pos = ran % GMP_NUMB_BITS;
+
+  gmp_rrandomb (rp, rstate, n * GMP_NUMB_BITS - bit_pos);
+}
+
+static void
+gmp_rrandomb (mp_ptr rp, gmp_randstate_t rstate, mp_bitcnt_t nbits)
+{
+  mp_bitcnt_t bi;
+  mp_limb_t ranm;		/* buffer for random bits */
+  unsigned cap_chunksize, chunksize;
+  mp_size_t i;
+
+  /* Set entire result to 111..1  */
+  i = BITS_TO_LIMBS (nbits) - 1;
+  rp[i] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - (nbits % GMP_NUMB_BITS)) % GMP_NUMB_BITS;
+  for (i = i - 1; i >= 0; i--)
+    rp[i] = GMP_NUMB_MAX;
+
+  _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+  cap_chunksize = nbits / (ranm % 4 + 1);
+  cap_chunksize += cap_chunksize == 0; /* make it at least 1 */
+
+  bi = nbits;
+
+  for (;;)
+    {
+      _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+      chunksize = 1 + ranm % cap_chunksize;
+      bi = (bi < chunksize) ? 0 : bi - chunksize;
+
+      if (bi == 0)
+	break;			/* low chunk is ...1 */
+
+      rp[bi / GMP_NUMB_BITS] ^= CNST_LIMB (1) << bi % GMP_NUMB_BITS;
+
+      _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+      chunksize = 1 + ranm % cap_chunksize;
+      bi = (bi < chunksize) ? 0 : bi - chunksize;
+
+      mpn_incr_u (rp + bi / GMP_NUMB_BITS, CNST_LIMB (1) << bi % GMP_NUMB_BITS);
+
+      if (bi == 0)
+	break;			/* low chunk is ...0 */
+    }
+}

diff --git a/third_party/gmp/mpn/generic/redc_1.c b/third_party/gmp/mpn/generic/redc_1.c
new file mode 100644
index 0000000..eab128f
--- /dev/null
+++ b/third_party/gmp/mpn/generic/redc_1.c

@@ -0,0 +1,56 @@
+/* mpn_redc_1.  Set rp[] <- up[]/R^n mod mp[].  Clobber up[].
+   mp[] is n limbs; up[] is 2n limbs.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright (C) 2000-2002, 2004, 2008, 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
+{
+  mp_size_t j;
+  mp_limb_t cy;
+
+  ASSERT (n > 0);
+  ASSERT_MPN (up, 2*n);
+
+  for (j = n - 1; j >= 0; j--)
+    {
+      cy = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
+      ASSERT (up[0] == 0);
+      up[0] = cy;
+      up++;
+    }
+
+  cy = mpn_add_n (rp, up, up - n, n);
+  return cy;
+}

diff --git a/third_party/gmp/mpn/generic/redc_2.c b/third_party/gmp/mpn/generic/redc_2.c
new file mode 100644
index 0000000..8d15589
--- /dev/null
+++ b/third_party/gmp/mpn/generic/redc_2.c

@@ -0,0 +1,110 @@
+/* mpn_redc_2.  Set rp[] <- up[]/R^n mod mp[].  Clobber up[].
+   mp[] is n limbs; up[] is 2n limbs.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright (C) 2000-2002, 2004, 2008, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if GMP_NAIL_BITS != 0
+you lose
+#endif
+
+/* For testing purposes, define our own mpn_addmul_2 if there is none already
+   available.  */
+#ifndef HAVE_NATIVE_mpn_addmul_2
+#undef mpn_addmul_2
+static mp_limb_t
+mpn_addmul_2 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_srcptr vp)
+{
+  rp[n] = mpn_addmul_1 (rp, up, n, vp[0]);
+  return mpn_addmul_1 (rp + 1, up, n, vp[1]);
+}
+#endif
+
+#if defined (__GNUC__) && ! defined (NO_ASM) \
+  && defined (__ia64) && W_TYPE_SIZE == 64
+#define umul2low(ph, pl, uh, ul, vh, vl) \
+  do {									\
+    mp_limb_t _ph, _pl;							\
+    __asm__ ("xma.hu %0 = %3, %5, f0\n\t"				\
+	     "xma.l %1 = %3, %5, f0\n\t"				\
+	     ";;\n\t"							\
+	     "xma.l %0 = %3, %4, %0\n\t"				\
+	     ";;\n\t"							\
+	     "xma.l %0 = %2, %5, %0"					\
+	     : "=&f" (ph), "=&f" (pl)					\
+	     : "f" (uh), "f" (ul), "f" (vh), "f" (vl));			\
+  } while (0)
+#endif
+
+#ifndef umul2low
+#define umul2low(ph, pl, uh, ul, vh, vl) \
+  do {									\
+    mp_limb_t _ph, _pl;							\
+    umul_ppmm (_ph, _pl, ul, vl);					\
+    (ph) = _ph + (ul) * (vh) + (uh) * (vl);				\
+    (pl) = _pl;								\
+  } while (0)
+#endif
+
+mp_limb_t
+mpn_redc_2 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr mip)
+{
+  mp_limb_t q[2];
+  mp_size_t j;
+  mp_limb_t upn;
+  mp_limb_t cy;
+
+  ASSERT (n > 0);
+  ASSERT_MPN (up, 2*n);
+
+  if ((n & 1) != 0)
+    {
+      up[0] = mpn_addmul_1 (up, mp, n, (up[0] * mip[0]) & GMP_NUMB_MASK);
+      up++;
+    }
+
+  for (j = n - 2; j >= 0; j -= 2)
+    {
+      umul2low (q[1], q[0], mip[1], mip[0], up[1], up[0]);
+      upn = up[n];		/* mpn_addmul_2 overwrites this */
+      up[1] = mpn_addmul_2 (up, mp, n, q);
+      up[0] = up[n];
+      up[n] = upn;
+      up += 2;
+    }
+
+  cy = mpn_add_n (rp, up, up - n, n);
+  return cy;
+}

diff --git a/third_party/gmp/mpn/generic/redc_n.c b/third_party/gmp/mpn/generic/redc_n.c
new file mode 100644
index 0000000..0c94b7c
--- /dev/null
+++ b/third_party/gmp/mpn/generic/redc_n.c

@@ -0,0 +1,80 @@
+/* mpn_redc_n.  Set rp[] <- up[]/R^n mod mp[].  Clobber up[].
+   mp[] is n limbs; up[] is 2n limbs, the inverse ip[] is n limbs.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/*
+  TODO
+
+  * We assume mpn_mulmod_bnm1 is always faster than plain mpn_mul_n (or a
+    future mpn_mulhi) for the range we will be called.  Follow up that
+    assumption.
+
+  * Decrease scratch usage.
+
+  * Consider removing the residue canonicalisation.
+*/
+
+void
+mpn_redc_n (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr ip)
+{
+  mp_ptr xp, yp, scratch;
+  mp_limb_t cy;
+  mp_size_t rn;
+  TMP_DECL;
+  TMP_MARK;
+
+  ASSERT (n > 8);
+
+  rn = mpn_mulmod_bnm1_next_size (n);
+
+  scratch = TMP_ALLOC_LIMBS (n + rn + mpn_mulmod_bnm1_itch (rn, n, n));
+
+  xp = scratch;
+  mpn_mullo_n (xp, up, ip, n);
+
+  yp = scratch + n;
+  mpn_mulmod_bnm1 (yp, rn, xp, n, mp, n, scratch + n + rn);
+
+  ASSERT_ALWAYS (2 * n > rn);				/* could handle this */
+
+  cy = mpn_sub_n (yp + rn, yp, up, 2*n - rn);		/* undo wrap around */
+  MPN_DECR_U (yp + 2*n - rn, rn, cy);
+
+  cy = mpn_sub_n (rp, up + n, yp + n, n);
+  if (cy != 0)
+    mpn_add_n (rp, rp, mp, n);
+
+  TMP_FREE;
+}

diff --git a/third_party/gmp/mpn/generic/remove.c b/third_party/gmp/mpn/generic/remove.c
new file mode 100644
index 0000000..cbb0742
--- /dev/null
+++ b/third_party/gmp/mpn/generic/remove.c

@@ -0,0 +1,182 @@
+/* mpn_remove -- divide out all multiples of odd mpn number from another mpn
+   number.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2009, 2012-2014, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#if GMP_LIMB_BITS > 50
+#define LOG 50
+#else
+#define LOG GMP_LIMB_BITS
+#endif
+
+
+/* Input: U = {up,un}, V = {vp,vn} must be odd, cap
+   Ouput  W = {wp,*wn} allocation need is exactly *wn
+
+   Set W = U / V^k, where k is the largest integer <= cap such that the
+   division yields an integer.
+
+   FIXME: We currently allow any operand overlap.  This is quite non mpn-ish
+   and might be changed, since it cost significant temporary space.
+   * If we require W to have space for un + 1 limbs, we could save qp or qp2
+     (but we will still need to copy things into wp 50% of the time).
+   * If we allow ourselves to clobber U, we could save the other of qp and qp2,
+     and the initial COPY (but also here we would need un + 1 limbs).
+*/
+
+/* FIXME: We need to wrap mpn_bdiv_qr due to the itch interface.  This need
+   indicates a flaw in the current itch mechanism: Which operands not greater
+   than un,un will incur the worst itch?  We need a parallel foo_maxitch set
+   of functions.  */
+static void
+mpn_bdiv_qr_wrap (mp_ptr qp, mp_ptr rp,
+		  mp_srcptr np, mp_size_t nn,
+		  mp_srcptr dp, mp_size_t dn)
+{
+  mp_ptr scratch_out;
+  TMP_DECL;
+
+  TMP_MARK;
+  scratch_out = TMP_ALLOC_LIMBS (mpn_bdiv_qr_itch (nn, dn));
+  mpn_bdiv_qr (qp, rp, np, nn, dp, dn, scratch_out);
+
+  TMP_FREE;
+}
+
+mp_bitcnt_t
+mpn_remove (mp_ptr wp, mp_size_t *wn,
+	    mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn,
+	    mp_bitcnt_t cap)
+{
+  mp_srcptr pwpsp[LOG];
+  mp_size_t pwpsn[LOG];
+  mp_size_t npowers;
+  mp_ptr tp, qp, np, qp2;
+  mp_srcptr pp;
+  mp_size_t pn, nn, qn, i;
+  mp_bitcnt_t pwr;
+  TMP_DECL;
+
+  ASSERT (un > 0);
+  ASSERT (vn > 0);
+  ASSERT (vp[0] % 2 != 0);	/* 2-adic division wants odd numbers */
+  ASSERT (vn > 1 || vp[0] > 1);	/* else we would loop indefinitely */
+
+  TMP_MARK;
+
+  TMP_ALLOC_LIMBS_3 (qp, un + 1,	/* quotient, alternating */
+		     qp2, un + 1,	/* quotient, alternating */
+		     tp, (un + 1 + vn) / 2); /* remainder */
+  pp = vp;
+  pn = vn;
+
+  MPN_COPY (qp, up, un);
+  qn = un;
+
+  npowers = 0;
+  while (qn >= pn)
+    {
+      qp[qn] = 0;
+      mpn_bdiv_qr_wrap (qp2, tp, qp, qn + 1, pp, pn);
+      if (!mpn_zero_p (tp, pn))
+	{
+	  if (mpn_cmp (tp, pp, pn) != 0)
+	    break;		/* could not divide by V^npowers */
+	}
+
+      MP_PTR_SWAP (qp, qp2);
+      qn = qn - pn;
+      mpn_neg (qp, qp, qn+1);
+
+      qn += qp[qn] != 0;
+
+      pwpsp[npowers] = pp;
+      pwpsn[npowers] = pn;
+      ++npowers;
+
+      if (((mp_bitcnt_t) 2 << npowers) - 1 > cap)
+	break;
+
+      nn = 2 * pn - 1;		/* next power will be at least this large */
+      if (nn > qn)
+	break;			/* next power would be overlarge */
+
+      if (npowers == 1)		/* Alloc once, but only if it's needed */
+	np = TMP_ALLOC_LIMBS (qn + LOG);	/* powers of V */
+      else
+	np += pn;
+
+      mpn_sqr (np, pp, pn);
+      pn = nn + (np[nn] != 0);
+      pp = np;
+    }
+
+  pwr = ((mp_bitcnt_t) 1 << npowers) - 1;
+
+  for (i = npowers; --i >= 0;)
+    {
+      pn = pwpsn[i];
+      if (qn < pn)
+	continue;
+
+      if (pwr + ((mp_bitcnt_t) 1 << i) > cap)
+	continue;		/* V^i would bring us past cap */
+
+      qp[qn] = 0;
+      mpn_bdiv_qr_wrap (qp2, tp, qp, qn + 1, pwpsp[i], pn);
+      if (!mpn_zero_p (tp, pn))
+	{
+	  if (mpn_cmp (tp, pwpsp[i], pn) != 0)
+	    continue;		/* could not divide by V^i */
+	}
+
+      MP_PTR_SWAP (qp, qp2);
+      qn = qn - pn;
+      mpn_neg (qp, qp, qn+1);
+
+      qn += qp[qn] != 0;
+
+      pwr += (mp_bitcnt_t) 1 << i;
+    }
+
+  MPN_COPY (wp, qp, qn);
+  *wn = qn;
+
+  TMP_FREE;
+
+  return pwr;
+}

diff --git a/third_party/gmp/mpn/generic/rootrem.c b/third_party/gmp/mpn/generic/rootrem.c
new file mode 100644
index 0000000..a79099e
--- /dev/null
+++ b/third_party/gmp/mpn/generic/rootrem.c

@@ -0,0 +1,515 @@
+/* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and
+   store the truncated integer part at rootp and the remainder at remp.
+
+   Contributed by Paul Zimmermann (algorithm) and
+   Paul Zimmermann and Torbjorn Granlund (implementation).
+   Marco Bodrato wrote logbased_root to seed the loop.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES.  IT'S
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT'S ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2002, 2005, 2009-2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* FIXME:
+     This implementation is not optimal when remp == NULL, since the complexity
+     is M(n), whereas it should be M(n/k) on average.
+*/
+
+#include <stdio.h>		/* for NULL */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t,
+				       mp_limb_t, int);
+
+#define MPN_RSHIFT(rp,up,un,cnt) \
+  do {									\
+    if ((cnt) != 0)							\
+      mpn_rshift (rp, up, un, cnt);					\
+    else								\
+      {									\
+	MPN_COPY_INCR (rp, up, un);					\
+      }									\
+  } while (0)
+
+#define MPN_LSHIFT(cy,rp,up,un,cnt) \
+  do {									\
+    if ((cnt) != 0)							\
+      cy = mpn_lshift (rp, up, un, cnt);				\
+    else								\
+      {									\
+	MPN_COPY_DECR (rp, up, un);					\
+	cy = 0;								\
+      }									\
+  } while (0)
+
+
+/* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero.
+   If remp <> NULL, put in {remp, un} the remainder.
+   Return the size (in limbs) of the remainder if remp <> NULL,
+	  or a non-zero value iff the remainder is non-zero when remp = NULL.
+   Assumes:
+   (a) up[un-1] is not zero
+   (b) rootp has at least space for ceil(un/k) limbs
+   (c) remp has at least space for un limbs (in case remp <> NULL)
+   (d) the operands do not overlap.
+
+   The auxiliary memory usage is 3*un+2 if remp = NULL,
+   and 2*un+2 if remp <> NULL.  FIXME: This is an incorrect comment.
+*/
+mp_size_t
+mpn_rootrem (mp_ptr rootp, mp_ptr remp,
+	     mp_srcptr up, mp_size_t un, mp_limb_t k)
+{
+  ASSERT (un > 0);
+  ASSERT (up[un - 1] != 0);
+  ASSERT (k > 1);
+
+  if (UNLIKELY (k == 2))
+    return mpn_sqrtrem (rootp, remp, up, un);
+  /* (un-1)/k > 2 <=> un > 3k <=> (un + 2)/3 > k */
+  if (remp == NULL && (un + 2) / 3 > k)
+    /* Pad {up,un} with k zero limbs.  This will produce an approximate root
+       with one more limb, allowing us to compute the exact integral result. */
+    {
+      mp_ptr sp, wp;
+      mp_size_t rn, sn, wn;
+      TMP_DECL;
+      TMP_MARK;
+      wn = un + k;
+      sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */
+      TMP_ALLOC_LIMBS_2 (wp, wn, /* will contain the padded input */
+			 sp, sn); /* approximate root of padded input */
+      MPN_COPY (wp + k, up, un);
+      MPN_FILL (wp, k, 0);
+      rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1);
+      /* The approximate root S = {sp,sn} is either the correct root of
+	 {sp,sn}, or 1 too large.  Thus unless the least significant limb of
+	 S is 0 or 1, we can deduce the root of {up,un} is S truncated by one
+	 limb.  (In case sp[0]=1, we can deduce the root, but not decide
+	 whether it is exact or not.) */
+      MPN_COPY (rootp, sp + 1, sn - 1);
+      TMP_FREE;
+      return rn;
+    }
+  else
+    {
+      return mpn_rootrem_internal (rootp, remp, up, un, k, 0);
+    }
+}
+
+#define LOGROOT_USED_BITS 8
+#define LOGROOT_NEEDS_TWO_CORRECTIONS 1
+#define LOGROOT_RETURNED_BITS (LOGROOT_USED_BITS + LOGROOT_NEEDS_TWO_CORRECTIONS)
+/* Puts in *rootp some bits of the k^nt root of the number
+   2^bitn * 1.op ; where op represents the "fractional" bits.
+
+   The returned value is the number of bits of the root minus one;
+   i.e. an approximation of the root will be
+   (*rootp) * 2^(retval-LOGROOT_RETURNED_BITS+1).
+
+   Currently, only LOGROOT_USED_BITS bits of op are used (the implicit
+   one is not counted).
+ */
+static unsigned
+logbased_root (mp_ptr rootp, mp_limb_t op, mp_bitcnt_t bitn, mp_limb_t k)
+{
+  /* vlog=vector(256,i,floor((log(256+i)/log(2)-8)*256)-(i>255)) */
+  static const
+  unsigned char vlog[] = {1,   2,   4,   5,   7,   8,   9,  11,  12,  14,  15,  16,  18,  19,  21,  22,
+			 23,  25,  26,  27,  29,  30,  31,  33,  34,  35,  37,  38,  39,  40,  42,  43,
+			 44,  46,  47,  48,  49,  51,  52,  53,  54,  56,  57,  58,  59,  61,  62,  63,
+			 64,  65,  67,  68,  69,  70,  71,  73,  74,  75,  76,  77,  78,  80,  81,  82,
+			 83,  84,  85,  87,  88,  89,  90,  91,  92,  93,  94,  96,  97,  98,  99, 100,
+			101, 102, 103, 104, 105, 106, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
+			118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 131, 132, 133, 134,
+			135, 136, 137, 138, 139, 140, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+			150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 162, 163, 164,
+			165, 166, 167, 168, 169, 170, 171, 172, 173, 173, 174, 175, 176, 177, 178, 179,
+			180, 181, 181, 182, 183, 184, 185, 186, 187, 188, 188, 189, 190, 191, 192, 193,
+			194, 194, 195, 196, 197, 198, 199, 200, 200, 201, 202, 203, 204, 205, 205, 206,
+			207, 208, 209, 209, 210, 211, 212, 213, 214, 214, 215, 216, 217, 218, 218, 219,
+			220, 221, 222, 222, 223, 224, 225, 225, 226, 227, 228, 229, 229, 230, 231, 232,
+			232, 233, 234, 235, 235, 236, 237, 238, 239, 239, 240, 241, 242, 242, 243, 244,
+			245, 245, 246, 247, 247, 248, 249, 250, 250, 251, 252, 253, 253, 254, 255, 255};
+
+  /* vexp=vector(256,i,floor(2^(8+i/256)-256)-(i>255)) */
+  static const
+  unsigned char vexp[] = {0,   1,   2,   2,   3,   4,   4,   5,   6,   7,   7,   8,   9,   9,  10,  11,
+			 12,  12,  13,  14,  14,  15,  16,  17,  17,  18,  19,  20,  20,  21,  22,  23,
+			 23,  24,  25,  26,  26,  27,  28,  29,  30,  30,  31,  32,  33,  33,  34,  35,
+			 36,  37,  37,  38,  39,  40,  41,  41,  42,  43,  44,  45,  45,  46,  47,  48,
+			 49,  50,  50,  51,  52,  53,  54,  55,  55,  56,  57,  58,  59,  60,  61,  61,
+			 62,  63,  64,  65,  66,  67,  67,  68,  69,  70,  71,  72,  73,  74,  75,  75,
+			 76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  86,  87,  88,  89,  90,
+			 91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
+			107, 108, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 119, 120, 121, 122,
+			123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138,
+			139, 140, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 154, 155, 156,
+			157, 158, 159, 160, 161, 163, 164, 165, 166, 167, 168, 169, 171, 172, 173, 174,
+			175, 176, 178, 179, 180, 181, 182, 183, 185, 186, 187, 188, 189, 191, 192, 193,
+			194, 196, 197, 198, 199, 200, 202, 203, 204, 205, 207, 208, 209, 210, 212, 213,
+			214, 216, 217, 218, 219, 221, 222, 223, 225, 226, 227, 229, 230, 231, 232, 234,
+			235, 236, 238, 239, 240, 242, 243, 245, 246, 247, 249, 250, 251, 253, 254, 255};
+  mp_bitcnt_t retval;
+
+  if (UNLIKELY (bitn > (~ (mp_bitcnt_t) 0) >> LOGROOT_USED_BITS))
+    {
+      /* In the unlikely case, we use two divisions and a modulo. */
+      retval = bitn / k;
+      bitn %= k;
+      bitn = (bitn << LOGROOT_USED_BITS |
+	      vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k;
+    }
+  else
+    {
+      bitn = (bitn << LOGROOT_USED_BITS |
+	      vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k;
+      retval = bitn >> LOGROOT_USED_BITS;
+      bitn &= (CNST_LIMB (1) << LOGROOT_USED_BITS) - 1;
+    }
+  ASSERT(bitn < CNST_LIMB (1) << LOGROOT_USED_BITS);
+  *rootp = CNST_LIMB(1) << (LOGROOT_USED_BITS - ! LOGROOT_NEEDS_TWO_CORRECTIONS)
+    | vexp[bitn] >> ! LOGROOT_NEEDS_TWO_CORRECTIONS;
+  return retval;
+}
+
+/* if approx is non-zero, does not compute the final remainder */
+static mp_size_t
+mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
+		      mp_limb_t k, int approx)
+{
+  mp_ptr qp, rp, sp, wp, scratch;
+  mp_size_t qn, rn, sn, wn, nl, bn;
+  mp_limb_t save, save2, cy, uh;
+  mp_bitcnt_t unb; /* number of significant bits of {up,un} */
+  mp_bitcnt_t xnb; /* number of significant bits of the result */
+  mp_bitcnt_t b, kk;
+  mp_bitcnt_t sizes[GMP_NUMB_BITS + 1];
+  int ni;
+  int perf_pow;
+  unsigned ulz, snb, c, logk;
+  TMP_DECL;
+
+  /* MPN_SIZEINBASE_2EXP(unb, up, un, 1); --unb; */
+  uh = up[un - 1];
+  count_leading_zeros (ulz, uh);
+  ulz = ulz - GMP_NAIL_BITS + 1; /* Ignore the first 1. */
+  unb = (mp_bitcnt_t) un * GMP_NUMB_BITS - ulz;
+  /* unb is the (truncated) logarithm of the input U in base 2*/
+
+  if (unb < k) /* root is 1 */
+    {
+      rootp[0] = 1;
+      if (remp == NULL)
+	un -= (*up == CNST_LIMB (1)); /* Non-zero iif {up,un} > 1 */
+      else
+	{
+	  mpn_sub_1 (remp, up, un, CNST_LIMB (1));
+	  un -= (remp [un - 1] == 0);	/* There should be at most one zero limb,
+				   if we demand u to be normalized  */
+	}
+      return un;
+    }
+  /* if (unb - k < k/2 + k/16) // root is 2 */
+
+  if (ulz == GMP_NUMB_BITS)
+    uh = up[un - 2];
+  else
+    uh = (uh << ulz & GMP_NUMB_MASK) | up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz);
+  ASSERT (un != 1 || up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz) == 1);
+
+  xnb = logbased_root (rootp, uh, unb, k);
+  snb = LOGROOT_RETURNED_BITS - 1;
+  /* xnb+1 is the number of bits of the root R */
+  /* snb+1 is the number of bits of the current approximation S */
+
+  kk = k * xnb;		/* number of truncated bits in the input */
+
+  /* FIXME: Should we skip the next two loops when xnb <= snb ? */
+  for (uh = (k - 1) / 2, logk = 3; (uh >>= 1) != 0; ++logk )
+    ;
+  /* logk = ceil(log(k)/log(2)) + 1 */
+
+  /* xnb is the number of remaining bits to determine in the kth root */
+  for (ni = 0; (sizes[ni] = xnb) > snb; ++ni)
+    {
+      /* invariant: here we want xnb+1 total bits for the kth root */
+
+      /* if c is the new value of xnb, this means that we'll go from a
+	 root of c+1 bits (say s') to a root of xnb+1 bits.
+	 It is proved in the book "Modern Computer Arithmetic" by Brent
+	 and Zimmermann, Chapter 1, that
+	 if s' >= k*beta, then at most one correction is necessary.
+	 Here beta = 2^(xnb-c), and s' >= 2^c, thus it suffices that
+	 c >= ceil((xnb + log2(k))/2). */
+      if (xnb > logk)
+	xnb = (xnb + logk) / 2;
+      else
+	--xnb;	/* add just one bit at a time */
+    }
+
+  *rootp >>= snb - xnb;
+  kk -= xnb;
+
+  ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1);
+  /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with
+     sizes[i] <= 2 * sizes[i+1].
+     Newton iteration will first compute sizes[ni-1] extra bits,
+     then sizes[ni-2], ..., then sizes[0] = b. */
+
+  TMP_MARK;
+  /* qp and wp need enough space to store S'^k where S' is an approximate
+     root. Since S' can be as large as S+2, the worst case is when S=2 and
+     S'=4. But then since we know the number of bits of S in advance, S'
+     can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
+     So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
+     fits in un limbs, the number of extra limbs needed is bounded by
+     ceil(k*log2(3/2)/GMP_NUMB_BITS). */
+  /* THINK: with the use of logbased_root, maybe the constant is
+     258/256 instead of 3/2 ? log2(258/256) < 1/89 < 1/64 */
+#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
+  TMP_ALLOC_LIMBS_3 (scratch, un + 1, /* used by mpn_div_q */
+		     qp, un + EXTRA,  /* will contain quotient and remainder
+					 of R/(k*S^(k-1)), and S^k */
+		     wp, un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
+					 and temporary for mpn_pow_1 */
+
+  if (remp == NULL)
+    rp = scratch;	/* will contain the remainder */
+  else
+    rp = remp;
+  sp = rootp;
+
+  sn = 1;		/* Initial approximation has one limb */
+
+  for (b = xnb; ni != 0; --ni)
+    {
+      /* 1: loop invariant:
+	 {sp, sn} is the current approximation of the root, which has
+		  exactly 1 + sizes[ni] bits.
+	 {rp, rn} is the current remainder
+	 {wp, wn} = {sp, sn}^(k-1)
+	 kk = number of truncated bits of the input
+      */
+
+      /* Since each iteration treats b bits from the root and thus k*b bits
+	 from the input, and we already considered b bits from the input,
+	 we now have to take another (k-1)*b bits from the input. */
+      kk -= (k - 1) * b; /* remaining input bits */
+      /* {rp, rn} = floor({up, un} / 2^kk) */
+      rn = un - kk / GMP_NUMB_BITS;
+      MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
+      rn -= rp[rn - 1] == 0;
+
+      /* 9: current buffers: {sp,sn}, {rp,rn} */
+
+      for (c = 0;; c++)
+	{
+	  /* Compute S^k in {qp,qn}. */
+	  /* W <- S^(k-1) for the next iteration,
+	     and S^k = W * S. */
+	  wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
+	  mpn_mul (qp, wp, wn, sp, sn);
+	  qn = wn + sn;
+	  qn -= qp[qn - 1] == 0;
+
+	  perf_pow = 1;
+	  /* if S^k > floor(U/2^kk), the root approximation was too large */
+	  if (qn > rn || (qn == rn && (perf_pow=mpn_cmp (qp, rp, rn)) > 0))
+	    MPN_DECR_U (sp, sn, 1);
+	  else
+	    break;
+	}
+
+      /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */
+
+      /* sometimes two corrections are needed with logbased_root*/
+      ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
+      ASSERT_ALWAYS (rn >= qn);
+
+      b = sizes[ni - 1] - sizes[ni]; /* number of bits to compute in the
+				      next iteration */
+      bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[], after shift */
+
+      kk = kk - b;
+      /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */
+      nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS);
+      /* nl  = 1 + floor((kk + b - 1) / GMP_NUMB_BITS)
+		 - floor(kk / GMP_NUMB_BITS)
+	     <= 1 + (kk + b - 1) / GMP_NUMB_BITS
+		  - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS
+	     = 2 + (b - 2) / GMP_NUMB_BITS
+	 thus since nl is an integer:
+	 nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */
+
+      /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
+
+      /* R = R - Q = floor(U/2^kk) - S^k */
+      if (perf_pow != 0)
+	{
+	  mpn_sub (rp, rp, rn, qp, qn);
+	  MPN_NORMALIZE_NOT_ZERO (rp, rn);
+
+	  /* first multiply the remainder by 2^b */
+	  MPN_LSHIFT (cy, rp + bn, rp, rn, b % GMP_NUMB_BITS);
+	  rn = rn + bn;
+	  if (cy != 0)
+	    {
+	      rp[rn] = cy;
+	      rn++;
+	    }
+
+	  save = rp[bn];
+	  /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */
+	  if (nl - 1 > bn)
+	    save2 = rp[bn + 1];
+	}
+      else
+	{
+	  rn = bn;
+	  save2 = save = 0;
+	}
+      /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
+
+      /* Now insert bits [kk,kk+b-1] from the input U */
+      MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS);
+      /* set to zero high bits of rp[bn] */
+      rp[bn] &= (CNST_LIMB (1) << (b % GMP_NUMB_BITS)) - 1;
+      /* restore corresponding bits */
+      rp[bn] |= save;
+      if (nl - 1 > bn)
+	rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since
+			       they start by bit 0 in rp[0], so they use
+			       at most ceil(b/GMP_NUMB_BITS) limbs */
+      /* FIXME: Should we normalise {rp,rn} here ?*/
+
+      /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
+
+      /* compute {wp, wn} = k * {sp, sn}^(k-1) */
+      cy = mpn_mul_1 (wp, wp, wn, k);
+      wp[wn] = cy;
+      wn += cy != 0;
+
+      /* 6: current buffers: {sp,sn}, {qp,qn} */
+
+      /* multiply the root approximation by 2^b */
+      MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS);
+      sn = sn + b / GMP_NUMB_BITS;
+      if (cy != 0)
+	{
+	  sp[sn] = cy;
+	  sn++;
+	}
+
+      save = sp[b / GMP_NUMB_BITS];
+
+      /* Number of limbs used by b bits, when least significant bit is
+	 aligned to least limb */
+      bn = (b - 1) / GMP_NUMB_BITS + 1;
+
+      /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
+
+      /* now divide {rp, rn} by {wp, wn} to get the low part of the root */
+      if (UNLIKELY (rn < wn))
+	{
+	  MPN_FILL (sp, bn, 0);
+	}
+      else
+	{
+	  qn = rn - wn; /* expected quotient size */
+	  if (qn <= bn) { /* Divide only if result is not too big. */
+	    mpn_div_q (qp, rp, rn, wp, wn, scratch);
+	    qn += qp[qn] != 0;
+	  }
+
+      /* 5: current buffers: {sp,sn}, {qp,qn}.
+	 Note: {rp,rn} is not needed any more since we'll compute it from
+	 scratch at the end of the loop.
+       */
+
+      /* the quotient should be smaller than 2^b, since the previous
+	 approximation was correctly rounded toward zero */
+	  if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&
+			  qp[qn - 1] >= (CNST_LIMB (1) << (b % GMP_NUMB_BITS))))
+	    {
+	      for (qn = 1; qn < bn; ++qn)
+		sp[qn - 1] = GMP_NUMB_MAX;
+	      sp[qn - 1] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - 1 - ((b - 1) % GMP_NUMB_BITS));
+	    }
+	  else
+	    {
+      /* 7: current buffers: {sp,sn}, {qp,qn} */
+
+      /* Combine sB and q to form sB + q.  */
+	      MPN_COPY (sp, qp, qn);
+	      MPN_ZERO (sp + qn, bn - qn);
+	    }
+	}
+      sp[b / GMP_NUMB_BITS] |= save;
+
+      /* 8: current buffer: {sp,sn} */
+
+    }
+
+  /* otherwise we have rn > 0, thus the return value is ok */
+  if (!approx || sp[0] <= CNST_LIMB (1))
+    {
+      for (c = 0;; c++)
+	{
+	  /* Compute S^k in {qp,qn}. */
+	  /* Last iteration: we don't need W anymore. */
+	  /* mpn_pow_1 requires that both qp and wp have enough
+	     space to store the result {sp,sn}^k + 1 limb */
+	  qn = mpn_pow_1 (qp, sp, sn, k, wp);
+
+	  perf_pow = 1;
+	  if (qn > un || (qn == un && (perf_pow=mpn_cmp (qp, up, un)) > 0))
+	    MPN_DECR_U (sp, sn, 1);
+	  else
+	    break;
+	};
+
+      /* sometimes two corrections are needed with logbased_root*/
+      ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
+
+      rn = perf_pow != 0;
+      if (rn != 0 && remp != NULL)
+	{
+	  mpn_sub (remp, up, un, qp, qn);
+	  rn = un;
+	  MPN_NORMALIZE_NOT_ZERO (remp, rn);
+	}
+    }
+
+  TMP_FREE;
+  return rn;
+}

diff --git a/third_party/gmp/mpn/generic/rshift.c b/third_party/gmp/mpn/generic/rshift.c
new file mode 100644
index 0000000..15d427d
--- /dev/null
+++ b/third_party/gmp/mpn/generic/rshift.c

@@ -0,0 +1,69 @@
+/* mpn_rshift -- Shift right low level.
+
+Copyright 1991, 1993, 1994, 1996, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by up and N limbs long) cnt bits to the right
+   and store the n least significant limbs of the result at rp.
+   The bits shifted out to the right are returned.
+
+   Argument constraints:
+   1. 0 < cnt < GMP_NUMB_BITS.
+   2. If the result is to be written over the input, rp must be <= up.
+*/
+
+mp_limb_t
+mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  mp_limb_t high_limb, low_limb;
+  unsigned int tnc;
+  mp_size_t i;
+  mp_limb_t retval;
+
+  ASSERT (n >= 1);
+  ASSERT (cnt >= 1);
+  ASSERT (cnt < GMP_NUMB_BITS);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+
+  tnc = GMP_NUMB_BITS - cnt;
+  high_limb = *up++;
+  retval = (high_limb << tnc) & GMP_NUMB_MASK;
+  low_limb = high_limb >> cnt;
+
+  for (i = n - 1; i != 0; i--)
+    {
+      high_limb = *up++;
+      *rp++ = low_limb | ((high_limb << tnc) & GMP_NUMB_MASK);
+      low_limb = high_limb >> cnt;
+    }
+  *rp = low_limb;
+
+  return retval;
+}

diff --git a/third_party/gmp/mpn/generic/sbpi1_bdiv_q.c b/third_party/gmp/mpn/generic/sbpi1_bdiv_q.c
new file mode 100644
index 0000000..850e593
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sbpi1_bdiv_q.c

@@ -0,0 +1,96 @@
+/* mpn_sbpi1_bdiv_q -- schoolbook Hensel division with precomputed inverse,
+   returning quotient only.
+
+   Contributed to the GNU project by Niels Möller and Torbjörn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.
+   IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005, 2006, 2009, 2011, 2012, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* Computes Q = - U / D mod B^un, destroys U.
+
+   D must be odd. dinv is (-D)^-1 mod B.
+
+*/
+
+void
+mpn_sbpi1_bdiv_q (mp_ptr qp,
+		  mp_ptr up, mp_size_t un,
+		  mp_srcptr dp, mp_size_t dn,
+		  mp_limb_t dinv)
+{
+  mp_size_t i;
+  mp_limb_t q;
+
+  ASSERT (dn > 0);
+  ASSERT (un >= dn);
+  ASSERT ((dp[0] & 1) != 0);
+  ASSERT (-(dp[0] * dinv) == 1);
+  ASSERT (up == qp || !MPN_OVERLAP_P (up, un, qp, un - dn));
+
+  if (un > dn)
+    {
+      mp_limb_t cy, hi;
+      for (i = un - dn - 1, cy = 0; i > 0; i--)
+	{
+	  q = dinv * up[0];
+	  hi = mpn_addmul_1 (up, dp, dn, q);
+
+	  ASSERT (up[0] == 0);
+	  *qp++ = q;
+	  hi += cy;
+	  cy = hi < cy;
+	  hi += up[dn];
+	  cy += hi < up[dn];
+	  up[dn] = hi;
+	  up++;
+	}
+      q = dinv * up[0];
+      hi = cy + mpn_addmul_1 (up, dp, dn, q);
+      ASSERT (up[0] == 0);
+      *qp++ = q;
+      up[dn] += hi;
+      up++;
+    }
+  for (i = dn; i > 1; i--)
+    {
+      mp_limb_t q = dinv * up[0];
+      mpn_addmul_1 (up, dp, i, q);
+      ASSERT (up[0] == 0);
+      *qp++ = q;
+      up++;
+    }
+
+  /* Final limb */
+  *qp = dinv * up[0];
+}

diff --git a/third_party/gmp/mpn/generic/sbpi1_bdiv_qr.c b/third_party/gmp/mpn/generic/sbpi1_bdiv_qr.c
new file mode 100644
index 0000000..6146c45
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sbpi1_bdiv_qr.c

@@ -0,0 +1,82 @@
+/* mpn_sbpi1_bdiv_qr -- schoolbook Hensel division with precomputed inverse,
+   returning quotient and remainder.
+
+   Contributed to the GNU project by Niels Möller and Torbjörn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.
+   IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2009, 2011, 2012, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Computes a binary quotient of size qn = un - dn.
+   Output:
+
+      Q = -U * D^{-1} mod B^qn,
+
+      R = (U + Q * D) * B^(-qn)
+
+   Stores the dn least significant limbs of R at {up + un - dn, dn},
+   and returns the carry from the addition N + Q*D.
+
+   D must be odd. dinv is (-D)^-1 mod B. */
+
+mp_limb_t
+mpn_sbpi1_bdiv_qr (mp_ptr qp,
+		   mp_ptr up, mp_size_t un,
+		   mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
+{
+  mp_size_t i;
+  mp_limb_t cy;
+
+  ASSERT (dn > 0);
+  ASSERT (un > dn);
+  ASSERT ((dp[0] & 1) != 0);
+  ASSERT (-(dp[0] * dinv) == 1);
+  ASSERT (up == qp || !MPN_OVERLAP_P (up, un, qp, un - dn));
+
+  for (i = un - dn, cy = 0; i != 0; i--)
+    {
+      mp_limb_t q = dinv * up[0];
+      mp_limb_t hi = mpn_addmul_1 (up, dp, dn, q);
+      *qp++ = q;
+
+      hi += cy;
+      cy = hi < cy;
+      hi += up[dn];
+      cy += hi < up[dn];
+      up[dn] = hi;
+      up++;
+    }
+
+  return cy;
+}

diff --git a/third_party/gmp/mpn/generic/sbpi1_bdiv_r.c b/third_party/gmp/mpn/generic/sbpi1_bdiv_r.c
new file mode 100644
index 0000000..a609951
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sbpi1_bdiv_r.c

@@ -0,0 +1,79 @@
+/* mpn_sbpi1_bdiv_r -- schoolbook Hensel division with precomputed inverse,
+   returning remainder.
+
+   Contributed to the GNU project by Niels Möller and Torbjörn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.
+   IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2009, 2011, 2012, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Computes a binary quotient of size qn = un - dn.
+   Output:
+
+      Q = -U * D^{-1} mod B^qn,
+
+      R = (U + Q * D) * B^(-qn)
+
+   Stores the dn least significant limbs of R at {up + un - dn, dn},
+   and returns the carry from the addition N + Q*D.
+
+   D must be odd. dinv is (-D)^-1 mod B. */
+
+mp_limb_t
+mpn_sbpi1_bdiv_r (mp_ptr up, mp_size_t un,
+		  mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
+{
+  mp_size_t i;
+  mp_limb_t cy;
+
+  ASSERT (dn > 0);
+  ASSERT (un > dn);
+  ASSERT ((dp[0] & 1) != 0);
+  ASSERT (-(dp[0] * dinv) == 1);
+
+  for (i = un - dn, cy = 0; i != 0; i--)
+    {
+      mp_limb_t q = dinv * up[0];
+      mp_limb_t hi = mpn_addmul_1 (up, dp, dn, q);
+
+      hi += cy;
+      cy = hi < cy;
+      hi += up[dn];
+      cy += hi < up[dn];
+      up[dn] = hi;
+      up++;
+    }
+
+  return cy;
+}

diff --git a/third_party/gmp/mpn/generic/sbpi1_div_q.c b/third_party/gmp/mpn/generic/sbpi1_div_q.c
new file mode 100644
index 0000000..a9975eb
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sbpi1_div_q.c

@@ -0,0 +1,302 @@
+/* mpn_sbpi1_div_q -- Schoolbook division using the Möller-Granlund 3/2
+   division algorithm.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_sbpi1_div_q (mp_ptr qp,
+		 mp_ptr np, mp_size_t nn,
+		 mp_srcptr dp, mp_size_t dn,
+		 mp_limb_t dinv)
+{
+  mp_limb_t qh;
+  mp_size_t qn, i;
+  mp_limb_t n1, n0;
+  mp_limb_t d1, d0;
+  mp_limb_t cy, cy1;
+  mp_limb_t q;
+  mp_limb_t flag;
+
+  mp_size_t dn_orig = dn;
+  mp_srcptr dp_orig = dp;
+  mp_ptr np_orig = np;
+
+  ASSERT (dn > 2);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+
+  np += nn;
+
+  qn = nn - dn;
+  if (qn + 1 < dn)
+    {
+      dp += dn - (qn + 1);
+      dn = qn + 1;
+    }
+
+  qh = mpn_cmp (np - dn, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (np - dn, np - dn, dp, dn);
+
+  qp += qn;
+
+  dn -= 2;			/* offset dn by 2 for main division loops,
+				   saving two iterations in mpn_submul_1.  */
+  d1 = dp[dn + 1];
+  d0 = dp[dn + 0];
+
+  np -= 2;
+
+  n1 = np[1];
+
+  for (i = qn - (dn + 2); i >= 0; i--)
+    {
+      np--;
+      if (UNLIKELY (n1 == d1) && np[1] == d0)
+	{
+	  q = GMP_NUMB_MASK;
+	  mpn_submul_1 (np - dn, dp, dn + 2, q);
+	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
+	}
+      else
+	{
+	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+	  cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+	  cy1 = n0 < cy;
+	  n0 = (n0 - cy) & GMP_NUMB_MASK;
+	  cy = n1 < cy1;
+	  n1 -= cy1;
+	  np[0] = n0;
+
+	  if (UNLIKELY (cy != 0))
+	    {
+	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+	      q--;
+	    }
+	}
+
+      *--qp = q;
+    }
+
+  flag = ~CNST_LIMB(0);
+
+  if (dn >= 0)
+    {
+      for (i = dn; i > 0; i--)
+	{
+	  np--;
+	  if (UNLIKELY (n1 >= (d1 & flag)))
+	    {
+	      q = GMP_NUMB_MASK;
+	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
+
+	      if (UNLIKELY (n1 != cy))
+		{
+		  if (n1 < (cy & flag))
+		    {
+		      q--;
+		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
+		    }
+		  else
+		    flag = 0;
+		}
+	      n1 = np[1];
+	    }
+	  else
+	    {
+	      udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+	      cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+	      cy1 = n0 < cy;
+	      n0 = (n0 - cy) & GMP_NUMB_MASK;
+	      cy = n1 < cy1;
+	      n1 -= cy1;
+	      np[0] = n0;
+
+	      if (UNLIKELY (cy != 0))
+		{
+		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+		  q--;
+		}
+	    }
+
+	  *--qp = q;
+
+	  /* Truncate operands.  */
+	  dn--;
+	  dp++;
+	}
+
+      np--;
+      if (UNLIKELY (n1 >= (d1 & flag)))
+	{
+	  q = GMP_NUMB_MASK;
+	  cy = mpn_submul_1 (np, dp, 2, q);
+
+	  if (UNLIKELY (n1 != cy))
+	    {
+	      if (n1 < (cy & flag))
+		{
+		  q--;
+		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
+		}
+	      else
+		flag = 0;
+	    }
+	  n1 = np[1];
+	}
+      else
+	{
+	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+	  np[0] = n0;
+	  np[1] = n1;
+	}
+
+      *--qp = q;
+    }
+  ASSERT_ALWAYS (np[1] == n1);
+  np += 2;
+
+
+  dn = dn_orig;
+  if (UNLIKELY (n1 < (dn & flag)))
+    {
+      mp_limb_t q, x;
+
+      /* The quotient may be too large if the remainder is small.  Recompute
+	 for above ignored operand parts, until the remainder spills.
+
+	 FIXME: The quality of this code isn't the same as the code above.
+	 1. We don't compute things in an optimal order, high-to-low, in order
+	    to terminate as quickly as possible.
+	 2. We mess with pointers and sizes, adding and subtracting and
+	    adjusting to get things right.  It surely could be streamlined.
+	 3. The only termination criteria are that we determine that the
+	    quotient needs to be adjusted, or that we have recomputed
+	    everything.  We should stop when the remainder is so large
+	    that no additional subtracting could make it spill.
+	 4. If nothing else, we should not do two loops of submul_1 over the
+	    data, instead handle both the triangularization and chopping at
+	    once.  */
+
+      x = n1;
+
+      if (dn > 2)
+	{
+	  /* Compensate for triangularization.  */
+	  mp_limb_t y;
+
+	  dp = dp_orig;
+	  if (qn + 1 < dn)
+	    {
+	      dp += dn - (qn + 1);
+	      dn = qn + 1;
+	    }
+
+	  y = np[-2];
+
+	  for (i = dn - 3; i >= 0; i--)
+	    {
+	      q = qp[i];
+	      cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);
+
+	      if (y < cy)
+		{
+		  if (x == 0)
+		    {
+		      cy = mpn_sub_1 (qp, qp, qn, 1);
+		      ASSERT_ALWAYS (cy == 0);
+		      return qh - cy;
+		    }
+		  x--;
+		}
+	      y -= cy;
+	    }
+	  np[-2] = y;
+	}
+
+      dn = dn_orig;
+      if (qn + 1 < dn)
+	{
+	  /* Compensate for ignored dividend and divisor tails.  */
+
+	  dp = dp_orig;
+	  np = np_orig;
+
+	  if (qh != 0)
+	    {
+	      cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
+	      if (cy != 0)
+		{
+		  if (x == 0)
+		    {
+		      if (qn != 0)
+			cy = mpn_sub_1 (qp, qp, qn, 1);
+		      return qh - cy;
+		    }
+		  x--;
+		}
+	    }
+
+	  if (qn == 0)
+	    return qh;
+
+	  for (i = dn - qn - 2; i >= 0; i--)
+	    {
+	      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
+	      cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
+	      if (cy != 0)
+		{
+		  if (x == 0)
+		    {
+		      cy = mpn_sub_1 (qp, qp, qn, 1);
+		      return qh;
+		    }
+		  x--;
+		}
+	    }
+	}
+    }
+
+  return qh;
+}

diff --git a/third_party/gmp/mpn/generic/sbpi1_div_qr.c b/third_party/gmp/mpn/generic/sbpi1_div_qr.c
new file mode 100644
index 0000000..7330a77
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sbpi1_div_qr.c

@@ -0,0 +1,109 @@
+/* mpn_sbpi1_div_qr -- Schoolbook division using the Möller-Granlund 3/2
+   division algorithm.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_sbpi1_div_qr (mp_ptr qp,
+		  mp_ptr np, mp_size_t nn,
+		  mp_srcptr dp, mp_size_t dn,
+		  mp_limb_t dinv)
+{
+  mp_limb_t qh;
+  mp_size_t i;
+  mp_limb_t n1, n0;
+  mp_limb_t d1, d0;
+  mp_limb_t cy, cy1;
+  mp_limb_t q;
+
+  ASSERT (dn > 2);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+
+  np += nn;
+
+  qh = mpn_cmp (np - dn, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (np - dn, np - dn, dp, dn);
+
+  qp += nn - dn;
+
+  dn -= 2;			/* offset dn by 2 for main division loops,
+				   saving two iterations in mpn_submul_1.  */
+  d1 = dp[dn + 1];
+  d0 = dp[dn + 0];
+
+  np -= 2;
+
+  n1 = np[1];
+
+  for (i = nn - (dn + 2); i > 0; i--)
+    {
+      np--;
+      if (UNLIKELY (n1 == d1) && np[1] == d0)
+	{
+	  q = GMP_NUMB_MASK;
+	  mpn_submul_1 (np - dn, dp, dn + 2, q);
+	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
+	}
+      else
+	{
+	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+	  cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+	  cy1 = n0 < cy;
+	  n0 = (n0 - cy) & GMP_NUMB_MASK;
+	  cy = n1 < cy1;
+	  n1 = (n1 - cy1) & GMP_NUMB_MASK;
+	  np[0] = n0;
+
+	  if (UNLIKELY (cy != 0))
+	    {
+	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+	      q--;
+	    }
+	}
+
+      *--qp = q;
+    }
+  np[1] = n1;
+
+  return qh;
+}

diff --git a/third_party/gmp/mpn/generic/sbpi1_divappr_q.c b/third_party/gmp/mpn/generic/sbpi1_divappr_q.c
new file mode 100644
index 0000000..ef7ca26
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sbpi1_divappr_q.c

@@ -0,0 +1,198 @@
+/* mpn_sbpi1_divappr_q -- Schoolbook division using the Möller-Granlund 3/2
+   division algorithm, returning approximate quotient.  The quotient returned
+   is either correct, or one too large.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_sbpi1_divappr_q (mp_ptr qp,
+		     mp_ptr np, mp_size_t nn,
+		     mp_srcptr dp, mp_size_t dn,
+		     mp_limb_t dinv)
+{
+  mp_limb_t qh;
+  mp_size_t qn, i;
+  mp_limb_t n1, n0;
+  mp_limb_t d1, d0;
+  mp_limb_t cy, cy1;
+  mp_limb_t q;
+  mp_limb_t flag;
+
+  ASSERT (dn > 2);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+
+  np += nn;
+
+  qn = nn - dn;
+  if (qn + 1 < dn)
+    {
+      dp += dn - (qn + 1);
+      dn = qn + 1;
+    }
+
+  qh = mpn_cmp (np - dn, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (np - dn, np - dn, dp, dn);
+
+  qp += qn;
+
+  dn -= 2;			/* offset dn by 2 for main division loops,
+				   saving two iterations in mpn_submul_1.  */
+  d1 = dp[dn + 1];
+  d0 = dp[dn + 0];
+
+  np -= 2;
+
+  n1 = np[1];
+
+  for (i = qn - (dn + 2); i >= 0; i--)
+    {
+      np--;
+      if (UNLIKELY (n1 == d1) && np[1] == d0)
+	{
+	  q = GMP_NUMB_MASK;
+	  mpn_submul_1 (np - dn, dp, dn + 2, q);
+	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
+	}
+      else
+	{
+	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+	  cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+	  cy1 = n0 < cy;
+	  n0 = (n0 - cy) & GMP_NUMB_MASK;
+	  cy = n1 < cy1;
+	  n1 -= cy1;
+	  np[0] = n0;
+
+	  if (UNLIKELY (cy != 0))
+	    {
+	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+	      q--;
+	    }
+	}
+
+      *--qp = q;
+    }
+
+  flag = ~CNST_LIMB(0);
+
+  if (dn >= 0)
+    {
+      for (i = dn; i > 0; i--)
+	{
+	  np--;
+	  if (UNLIKELY (n1 >= (d1 & flag)))
+	    {
+	      q = GMP_NUMB_MASK;
+	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
+
+	      if (UNLIKELY (n1 != cy))
+		{
+		  if (n1 < (cy & flag))
+		    {
+		      q--;
+		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
+		    }
+		  else
+		    flag = 0;
+		}
+	      n1 = np[1];
+	    }
+	  else
+	    {
+	      udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+	      cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+	      cy1 = n0 < cy;
+	      n0 = (n0 - cy) & GMP_NUMB_MASK;
+	      cy = n1 < cy1;
+	      n1 -= cy1;
+	      np[0] = n0;
+
+	      if (UNLIKELY (cy != 0))
+		{
+		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+		  q--;
+		}
+	    }
+
+	  *--qp = q;
+
+	  /* Truncate operands.  */
+	  dn--;
+	  dp++;
+	}
+
+      np--;
+      if (UNLIKELY (n1 >= (d1 & flag)))
+	{
+	  q = GMP_NUMB_MASK;
+	  cy = mpn_submul_1 (np, dp, 2, q);
+
+	  if (UNLIKELY (n1 != cy))
+	    {
+	      if (n1 < (cy & flag))
+		{
+		  q--;
+		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
+		}
+	      else
+		flag = 0;
+	    }
+	  n1 = np[1];
+	}
+      else
+	{
+	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+	  np[1] = n1;
+	  np[0] = n0;
+	}
+
+      *--qp = q;
+    }
+
+  ASSERT_ALWAYS (np[1] == n1);
+
+  return qh;
+}

diff --git a/third_party/gmp/mpn/generic/scan0.c b/third_party/gmp/mpn/generic/scan0.c
new file mode 100644
index 0000000..d71832e
--- /dev/null
+++ b/third_party/gmp/mpn/generic/scan0.c

@@ -0,0 +1,59 @@
+/* mpn_scan0 -- Scan from a given bit position for the next clear bit.
+
+Copyright 1994, 1996, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Argument constraints:
+   1. U must sooner or later have a limb with a clear bit.
+ */
+
+mp_bitcnt_t
+mpn_scan0 (mp_srcptr up, mp_bitcnt_t starting_bit)
+{
+  mp_size_t starting_word;
+  mp_limb_t alimb;
+  int cnt;
+  mp_srcptr p;
+
+  /* Start at the word implied by STARTING_BIT.  */
+  starting_word = starting_bit / GMP_NUMB_BITS;
+  p = up + starting_word;
+  alimb = *p++ ^ GMP_NUMB_MASK;
+
+  /* Mask off any bits before STARTING_BIT in the first limb.  */
+  alimb &= - (mp_limb_t) 1 << (starting_bit % GMP_NUMB_BITS);
+
+  while (alimb == 0)
+    alimb = *p++ ^ GMP_NUMB_MASK;
+
+  count_trailing_zeros (cnt, alimb);
+  return (p - up - 1) * GMP_NUMB_BITS + cnt;
+}

diff --git a/third_party/gmp/mpn/generic/scan1.c b/third_party/gmp/mpn/generic/scan1.c
new file mode 100644
index 0000000..09e8060
--- /dev/null
+++ b/third_party/gmp/mpn/generic/scan1.c

@@ -0,0 +1,59 @@
+/* mpn_scan1 -- Scan from a given bit position for the next set bit.
+
+Copyright 1994, 1996, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Argument constraints:
+   1. U must sooner or later have a limb != 0.
+ */
+
+mp_bitcnt_t
+mpn_scan1 (mp_srcptr up, mp_bitcnt_t starting_bit)
+{
+  mp_size_t starting_word;
+  mp_limb_t alimb;
+  int cnt;
+  mp_srcptr p;
+
+  /* Start at the word implied by STARTING_BIT.  */
+  starting_word = starting_bit / GMP_NUMB_BITS;
+  p = up + starting_word;
+  alimb = *p++;
+
+  /* Mask off any bits before STARTING_BIT in the first limb.  */
+  alimb &= - (mp_limb_t) 1 << (starting_bit % GMP_NUMB_BITS);
+
+  while (alimb == 0)
+    alimb = *p++;
+
+  count_trailing_zeros (cnt, alimb);
+  return (p - up - 1) * GMP_NUMB_BITS + cnt;
+}

diff --git a/third_party/gmp/mpn/generic/sec_aors_1.c b/third_party/gmp/mpn/generic/sec_aors_1.c
new file mode 100644
index 0000000..6480fa1
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sec_aors_1.c

@@ -0,0 +1,59 @@
+/* mpn_sec_add_1, mpn_sec_sub_1
+
+   Contributed to the GNU project by Niels Möller
+
+Copyright 2013, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#if OPERATION_sec_add_1
+#define FNAME mpn_sec_add_1
+#define FNAME_itch mpn_sec_add_1_itch
+#define OP_N mpn_add_n
+#endif
+#if OPERATION_sec_sub_1
+#define FNAME mpn_sec_sub_1
+#define FNAME_itch mpn_sec_sub_1_itch
+#define OP_N mpn_sub_n
+#endif
+
+/* It's annoying to that we need scratch space */
+mp_size_t
+FNAME_itch (mp_size_t n)
+{
+  return n;
+}
+
+mp_limb_t
+FNAME (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_ptr scratch)
+{
+  scratch[0] = b;
+  MPN_ZERO (scratch + 1, n-1);
+  return OP_N (rp, ap, scratch, n);
+}

diff --git a/third_party/gmp/mpn/generic/sec_div.c b/third_party/gmp/mpn/generic/sec_div.c
new file mode 100644
index 0000000..1f08649
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sec_div.c

@@ -0,0 +1,131 @@
+/* mpn_sec_div_qr, mpn_sec_div_r -- Compute Q = floor(U / V), U = U mod V.
+   Side-channel silent under the assumption that the used instructions are
+   side-channel silent.
+
+   Contributed to the GNU project by Torbjörn Granlund.
+
+Copyright 2011-2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if OPERATION_sec_div_qr
+#define FNAME mpn_sec_div_qr
+#define FNAME_itch mpn_sec_div_qr_itch
+#define Q(q) q,
+#define RETTYPE mp_limb_t
+#endif
+#if OPERATION_sec_div_r
+#define FNAME mpn_sec_div_r
+#define FNAME_itch mpn_sec_div_r_itch
+#define Q(q)
+#define RETTYPE void
+#endif
+
+mp_size_t
+FNAME_itch (mp_size_t nn, mp_size_t dn)
+{
+#if OPERATION_sec_div_qr
+/* Needs (nn + dn + 1) + mpn_sec_pi1_div_qr's needs of (2nn' - dn + 1) for a
+   total of 3nn + 4 limbs at tp.  Note that mpn_sec_pi1_div_qr's nn is one
+   greater than ours, therefore +4 and not just +2.  */
+  return 3 * nn + 4;
+#endif
+#if OPERATION_sec_div_r
+/* Needs (nn + dn + 1) + mpn_sec_pi1_div_r's needs of (dn + 1) for a total of
+   nn + 2dn + 2 limbs at tp.  */
+  return nn + 2 * dn + 2;
+#endif
+}
+
+RETTYPE
+FNAME (Q(mp_ptr qp)
+       mp_ptr np, mp_size_t nn,
+       mp_srcptr dp, mp_size_t dn,
+       mp_ptr tp)
+{
+  mp_limb_t d1, d0;
+  unsigned int cnt;
+  mp_limb_t inv32;
+
+  ASSERT (dn >= 1);
+  ASSERT (nn >= dn);
+  ASSERT (dp[dn - 1] != 0);
+
+  d1 = dp[dn - 1];
+  count_leading_zeros (cnt, d1);
+
+  if (cnt != 0)
+    {
+      mp_limb_t qh, cy;
+      mp_ptr np2, dp2;
+      dp2 = tp;					/* dn limbs */
+      mpn_lshift (dp2, dp, dn, cnt);
+
+      np2 = tp + dn;				/* (nn + 1) limbs */
+      cy = mpn_lshift (np2, np, nn, cnt);
+      np2[nn++] = cy;
+
+      d0 = dp2[dn - 1];
+      d0 += (~d0 != 0);
+      invert_limb (inv32, d0);
+
+      /* We add nn + dn to tp here, not nn + 1 + dn, as expected.  This is
+	 since nn here will have been incremented.  */
+#if OPERATION_sec_div_qr
+      qh = mpn_sec_pi1_div_qr (np2 + dn, np2, nn, dp2, dn, inv32, tp + nn + dn);
+      ASSERT (qh == 0);		/* FIXME: this indicates inefficiency! */
+      MPN_COPY (qp, np2 + dn, nn - dn - 1);
+      qh = np2[nn - 1];
+#else
+      mpn_sec_pi1_div_r (np2, nn, dp2, dn, inv32, tp + nn + dn);
+#endif
+
+      mpn_rshift (np, np2, dn, cnt);
+
+#if OPERATION_sec_div_qr
+      return qh;
+#endif
+    }
+  else
+    {
+      /* FIXME: Consider copying np => np2 here, adding a 0-limb at the top.
+	 That would simplify the underlying pi1 function, since then it could
+	 assume nn > dn.  */
+      d0 = dp[dn - 1];
+      d0 += (~d0 != 0);
+      invert_limb (inv32, d0);
+
+#if OPERATION_sec_div_qr
+      return mpn_sec_pi1_div_qr (qp, np, nn, dp, dn, inv32, tp);
+#else
+      mpn_sec_pi1_div_r (np, nn, dp, dn, inv32, tp);
+#endif
+    }
+}

diff --git a/third_party/gmp/mpn/generic/sec_invert.c b/third_party/gmp/mpn/generic/sec_invert.c
new file mode 100644
index 0000000..07665d1
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sec_invert.c

@@ -0,0 +1,177 @@
+/* mpn_sec_invert
+
+   Contributed to the GNU project by Niels Möller
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#if 0
+/* Currently unused. Should be resurrected once mpn_cnd_neg is
+   advertised. */
+static mp_size_t
+mpn_cnd_neg_itch (mp_size_t n)
+{
+  return n;
+}
+#endif
+
+/* FIXME: Ought to return carry */
+static void
+mpn_cnd_neg (int cnd, mp_limb_t *rp, const mp_limb_t *ap, mp_size_t n,
+	     mp_ptr scratch)
+{
+  mpn_lshift (scratch, ap, n, 1);
+  mpn_cnd_sub_n (cnd, rp, ap, scratch, n);
+}
+
+static int
+mpn_sec_eq_ui (mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+  mp_limb_t d;
+  ASSERT (n > 0);
+
+  d = ap[0] ^ b;
+
+  while (--n > 0)
+    d |= ap[n];
+
+  return d == 0;
+}
+
+mp_size_t
+mpn_sec_invert_itch (mp_size_t n)
+{
+  return 4*n;
+}
+
+/* Compute V <-- A^{-1} (mod M), in data-independent time. M must be
+   odd. Returns 1 on success, and 0 on failure (i.e., if gcd (A, m) !=
+   1). Inputs and outputs of size n, and no overlap allowed. The {ap,
+   n} area is destroyed. For arbitrary inputs, bit_size should be
+   2*n*GMP_NUMB_BITS, but if A or M are known to be smaller, e.g., if
+   M = 2^521 - 1 and A < M, bit_size can be any bound on the sum of
+   the bit sizes of A and M. */
+int
+mpn_sec_invert (mp_ptr vp, mp_ptr ap, mp_srcptr mp,
+		mp_size_t n, mp_bitcnt_t bit_size,
+		mp_ptr scratch)
+{
+  ASSERT (n > 0);
+  ASSERT (bit_size > 0);
+  ASSERT (mp[0] & 1);
+  ASSERT (! MPN_OVERLAP_P (ap, n, vp, n));
+#define bp (scratch + n)
+#define up (scratch + 2*n)
+#define m1hp (scratch + 3*n)
+
+  /* Maintain
+
+       a = u * orig_a (mod m)
+       b = v * orig_a (mod m)
+
+     and b odd at all times. Initially,
+
+       a = a_orig, u = 1
+       b = m,      v = 0
+     */
+
+
+  up[0] = 1;
+  mpn_zero (up+1, n - 1);
+  mpn_copyi (bp, mp, n);
+  mpn_zero (vp, n);
+
+  ASSERT_CARRY (mpn_rshift (m1hp, mp, n, 1));
+  ASSERT_NOCARRY (mpn_sec_add_1 (m1hp, m1hp, n, 1, scratch));
+
+  while (bit_size-- > 0)
+    {
+      mp_limb_t odd, swap, cy;
+
+      /* Always maintain b odd. The logic of the iteration is as
+	 follows. For a, b:
+
+	   odd = a & 1
+	   a -= odd * b
+	   if (underflow from a-b)
+	     {
+	       b += a, assigns old a
+	       a = B^n-a
+	     }
+
+	   a /= 2
+
+	 For u, v:
+
+	   if (underflow from a - b)
+	     swap u, v
+	   u -= odd * v
+	   if (underflow from u - v)
+	     u += m
+
+	   u /= 2
+	   if (a one bit was shifted out)
+	     u += (m+1)/2
+
+	 As long as a > 0, the quantity
+
+	   (bitsize of a) + (bitsize of b)
+
+	 is reduced by at least one bit per iteration, hence after (bit_size of
+	 orig_a) + (bit_size of m) - 1 iterations we surely have a = 0. Then b
+	 = gcd(orig_a, m) and if b = 1 then also v = orig_a^{-1} (mod m).
+      */
+
+      ASSERT (bp[0] & 1);
+      odd = ap[0] & 1;
+
+      swap = mpn_cnd_sub_n (odd, ap, ap, bp, n);
+      mpn_cnd_add_n (swap, bp, bp, ap, n);
+      mpn_cnd_neg (swap, ap, ap, n, scratch);
+
+      mpn_cnd_swap (swap, up, vp, n);
+      cy = mpn_cnd_sub_n (odd, up, up, vp, n);
+      cy -= mpn_cnd_add_n (cy, up, up, mp, n);
+      ASSERT (cy == 0);
+
+      cy = mpn_rshift (ap, ap, n, 1);
+      ASSERT (cy == 0);
+      cy = mpn_rshift (up, up, n, 1);
+      cy = mpn_cnd_add_n (cy, up, up, m1hp, n);
+      ASSERT (cy == 0);
+    }
+  /* Should be all zeros, but check only extreme limbs */
+  ASSERT ( (ap[0] | ap[n-1]) == 0);
+  /* Check if indeed gcd == 1. */
+  return mpn_sec_eq_ui (bp, n, 1);
+#undef bp
+#undef up
+#undef m1hp
+}

diff --git a/third_party/gmp/mpn/generic/sec_mul.c b/third_party/gmp/mpn/generic/sec_mul.c
new file mode 100644
index 0000000..4bbfa61
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sec_mul.c

@@ -0,0 +1,48 @@
+/* mpn_sec_mul.
+
+   Contributed to the GNU project by Torbjörn Granlund.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpn_sec_mul (mp_ptr rp,
+	     mp_srcptr ap, mp_size_t an,
+	     mp_srcptr bp, mp_size_t bn,
+	     mp_ptr tp)
+{
+  mpn_mul_basecase (rp, ap, an, bp, bn);
+}
+
+mp_size_t
+mpn_sec_mul_itch (mp_size_t an, mp_size_t bn)
+{
+  return 0;
+}

diff --git a/third_party/gmp/mpn/generic/sec_pi1_div.c b/third_party/gmp/mpn/generic/sec_pi1_div.c
new file mode 100644
index 0000000..29d01e7
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sec_pi1_div.c

@@ -0,0 +1,172 @@
+/* mpn_sec_pi1_div_qr, mpn_sec_pi1_div_r -- Compute Q = floor(U / V), U = U
+   mod V.  Side-channel silent under the assumption that the used instructions
+   are side-channel silent.
+
+   Contributed to the GNU project by Torbjörn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011-2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* This side-channel silent division algorithm reduces the partial remainder by
+   GMP_NUMB_BITS/2 bits at a time, compared to GMP_NUMB_BITS for the main
+   division algorithm.  We actually do not insist on reducing by exactly
+   GMP_NUMB_BITS/2, but may leave a partial remainder that is D*B^i to 3D*B^i
+   too large (B is the limb base, D is the divisor, and i is the induction
+   variable); the subsequent step will handle the extra partial remainder bits.
+
+   With that partial remainder reduction, each step generates a quotient "half
+   limb".  The outer loop generates two quotient half limbs, an upper (q1h) and
+   a lower (q0h) which are stored sparsely in separate limb arrays.  These
+   arrays are added at the end; using separate arrays avoids data-dependent
+   carry propagation which could else pose a side-channel leakage problem.
+
+   The quotient half limbs may be between -3 to 0 from the accurate value
+   ("accurate" being the one which corresponds to a reduction to a principal
+   partial remainder).  Too small quotient half limbs correspond to too large
+   remainders, which we reduce later, as described above.
+
+   In order to keep quotients from getting too big, corresponding to a negative
+   partial remainder, we use an inverse which is slightly smaller than usually.
+*/
+
+#if OPERATION_sec_pi1_div_qr
+/* Needs (dn + 1) + (nn - dn) + (nn - dn) = 2nn - dn + 1 limbs at tp. */
+#define FNAME mpn_sec_pi1_div_qr
+#define Q(q) q,
+#define RETTYPE mp_limb_t
+#endif
+#if OPERATION_sec_pi1_div_r
+/* Needs (dn + 1) limbs at tp.  */
+#define FNAME mpn_sec_pi1_div_r
+#define Q(q)
+#define RETTYPE void
+#endif
+
+RETTYPE
+FNAME (Q(mp_ptr qp)
+       mp_ptr np, mp_size_t nn,
+       mp_srcptr dp, mp_size_t dn,
+       mp_limb_t dinv,
+       mp_ptr tp)
+{
+  mp_limb_t nh, cy, q1h, q0h, dummy, cnd;
+  mp_size_t i;
+  mp_ptr hp;
+#if OPERATION_sec_pi1_div_qr
+  mp_limb_t qh;
+  mp_ptr qlp, qhp;
+#endif
+
+  ASSERT (dn >= 1);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn - 1] & GMP_NUMB_HIGHBIT) != 0);
+
+  if (nn == dn)
+    {
+      cy = mpn_sub_n (np, np, dp, dn);
+      mpn_cnd_add_n (cy, np, np, dp, dn);
+#if OPERATION_sec_pi1_div_qr
+      return 1 - cy;
+#else
+      return;
+#endif
+    }
+
+  /* Create a divisor copy shifted half a limb.  */
+  hp = tp;					/* (dn + 1) limbs */
+  hp[dn] = mpn_lshift (hp, dp, dn, GMP_NUMB_BITS / 2);
+
+#if OPERATION_sec_pi1_div_qr
+  qlp = tp + (dn + 1);				/* (nn - dn) limbs */
+  qhp = tp + (nn + 1);				/* (nn - dn) limbs */
+#endif
+
+  np += nn - dn;
+  nh = 0;
+
+  for (i = nn - dn - 1; i >= 0; i--)
+    {
+      np--;
+
+      nh = (nh << GMP_NUMB_BITS/2) + (np[dn] >> GMP_NUMB_BITS/2);
+      umul_ppmm (q1h, dummy, nh, dinv);
+      q1h += nh;
+#if OPERATION_sec_pi1_div_qr
+      qhp[i] = q1h;
+#endif
+      mpn_submul_1 (np, hp, dn + 1, q1h);
+
+      nh = np[dn];
+      umul_ppmm (q0h, dummy, nh, dinv);
+      q0h += nh;
+#if OPERATION_sec_pi1_div_qr
+      qlp[i] = q0h;
+#endif
+      nh -= mpn_submul_1 (np, dp, dn, q0h);
+    }
+
+  /* 1st adjustment depends on extra high remainder limb.  */
+  cnd = nh != 0;				/* FIXME: cmp-to-int */
+#if OPERATION_sec_pi1_div_qr
+  qlp[0] += cnd;
+#endif
+  nh -= mpn_cnd_sub_n (cnd, np, np, dp, dn);
+
+  /* 2nd adjustment depends on remainder/divisor comparison as well as whether
+     extra remainder limb was nullified by previous subtract.  */
+  cy = mpn_sub_n (np, np, dp, dn);
+  cy = cy - nh;
+#if OPERATION_sec_pi1_div_qr
+  qlp[0] += 1 - cy;
+#endif
+  mpn_cnd_add_n (cy, np, np, dp, dn);
+
+  /* 3rd adjustment depends on remainder/divisor comparison.  */
+  cy = mpn_sub_n (np, np, dp, dn);
+#if OPERATION_sec_pi1_div_qr
+  qlp[0] += 1 - cy;
+#endif
+  mpn_cnd_add_n (cy, np, np, dp, dn);
+
+#if OPERATION_sec_pi1_div_qr
+  /* Combine quotient halves into final quotient.  */
+  qh = mpn_lshift (qhp, qhp, nn - dn, GMP_NUMB_BITS/2);
+  qh += mpn_add_n (qp, qhp, qlp, nn - dn);
+
+  return qh;
+#else
+  return;
+#endif
+}

diff --git a/third_party/gmp/mpn/generic/sec_powm.c b/third_party/gmp/mpn/generic/sec_powm.c
new file mode 100644
index 0000000..3a78c66
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sec_powm.c

@@ -0,0 +1,383 @@
+/* mpn_sec_powm -- Compute R = U^E mod M.  Secure variant, side-channel silent
+   under the assumption that the multiply instruction is side channel silent.
+
+   Contributed to the GNU project by Torbjörn Granlund.
+
+Copyright 2007-2009, 2011-2014, 2018-2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/*
+  BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
+
+  1. T <- (B^n * U) mod M; convert to REDC form
+
+  2. Compute table U^0, U^1, U^2... of floor(log(E))-dependent size
+
+  3. While there are more bits in E
+       W <- power left-to-right base-k
+
+  The article "Defeating modexp side-channel attacks with data-independent
+  execution traces", https://gmplib.org/~tege/modexp-silent.pdf, has details.
+
+
+  TODO:
+
+   * Make getbits a macro, thereby allowing it to update the index operand.
+     That will simplify the code using getbits.  (Perhaps make getbits' sibling
+     getbit then have similar form, for symmetry.)
+
+   * Choose window size without looping.  (Superoptimize or think(tm).)
+
+   * REDC_1_TO_REDC_2_THRESHOLD might actually represent the cutoff between
+     redc_1 and redc_n.  On such systems, we will switch to redc_2 causing
+     slowdown.
+*/
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#undef MPN_REDC_1_SEC
+#if HAVE_NATIVE_mpn_sbpi1_bdiv_r
+#define MPN_REDC_1_SEC(rp, up, mp, n, invm)				\
+  do {									\
+    mp_limb_t cy;							\
+    cy = mpn_sbpi1_bdiv_r (up, 2 * n, mp, n, invm);			\
+    mpn_cnd_sub_n (cy, rp, up + n, mp, n);				\
+  } while (0)
+#else
+#define MPN_REDC_1_SEC(rp, up, mp, n, invm)				\
+  do {									\
+    mp_limb_t cy;							\
+    cy = mpn_redc_1 (rp, up, mp, n, invm);				\
+    mpn_cnd_sub_n (cy, rp, rp, mp, n);					\
+  } while (0)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
+#undef MPN_REDC_2_SEC
+#define MPN_REDC_2_SEC(rp, up, mp, n, mip)				\
+  do {									\
+    mp_limb_t cy;							\
+    cy = mpn_redc_2 (rp, up, mp, n, mip);				\
+    mpn_cnd_sub_n (cy, rp, rp, mp, n);					\
+  } while (0)
+#else
+#define MPN_REDC_2_SEC(rp, up, mp, n, mip) /* empty */
+#undef REDC_1_TO_REDC_2_THRESHOLD
+#define REDC_1_TO_REDC_2_THRESHOLD MP_SIZE_T_MAX
+#endif
+
+/* Define our own mpn squaring function.  We do this since we cannot use a
+   native mpn_sqr_basecase over TUNE_SQR_TOOM2_MAX, or a non-native one over
+   SQR_TOOM2_THRESHOLD.  This is so because of fixed size stack allocations
+   made inside mpn_sqr_basecase.  */
+
+#if ! HAVE_NATIVE_mpn_sqr_basecase
+/* The limit of the generic code is SQR_TOOM2_THRESHOLD.  */
+#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
+#endif
+
+#if HAVE_NATIVE_mpn_sqr_basecase
+#ifdef TUNE_SQR_TOOM2_MAX
+/* We slightly abuse TUNE_SQR_TOOM2_MAX here.  If it is set for an assembly
+   mpn_sqr_basecase, it comes from SQR_TOOM2_THRESHOLD_MAX in the assembly
+   file.  An assembly mpn_sqr_basecase that does not define it should allow
+   any size.  */
+#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
+#endif
+#endif
+
+#ifdef WANT_FAT_BINARY
+/* For fat builds, we use SQR_TOOM2_THRESHOLD which will expand to a read from
+   __gmpn_cpuvec.  Perhaps any possible sqr_basecase.asm allow any size, and we
+   limit the use unnecessarily.  We cannot tell, so play it safe.  FIXME.  */
+#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
+#endif
+
+#ifndef SQR_BASECASE_LIM
+/* If SQR_BASECASE_LIM is now not defined, use mpn_sqr_basecase for any operand
+   size.  */
+#define SQR_BASECASE_LIM  MP_SIZE_T_MAX
+#endif
+
+#define mpn_local_sqr(rp,up,n)						\
+  do {									\
+    if (ABOVE_THRESHOLD (n, SQR_BASECASE_THRESHOLD)			\
+	&& BELOW_THRESHOLD (n, SQR_BASECASE_LIM))			\
+      mpn_sqr_basecase (rp, up, n);					\
+    else								\
+      mpn_mul_basecase(rp, up, n, up, n);				\
+  } while (0)
+
+#define getbit(p,bi) \
+  ((p[(bi - 1) / GMP_NUMB_BITS] >> (bi - 1) % GMP_NUMB_BITS) & 1)
+
+/* FIXME: Maybe some things would get simpler if all callers ensure
+   that bi >= nbits. As far as I understand, with the current code bi
+   < nbits can happen only for the final iteration. */
+static inline mp_limb_t
+getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
+{
+  int nbits_in_r;
+  mp_limb_t r;
+  mp_size_t i;
+
+  if (bi < nbits)
+    {
+      return p[0] & (((mp_limb_t) 1 << bi) - 1);
+    }
+  else
+    {
+      bi -= nbits;			/* bit index of low bit to extract */
+      i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
+      bi %= GMP_NUMB_BITS;		/* bit index in low word */
+      r = p[i] >> bi;			/* extract (low) bits */
+      nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
+      if (nbits_in_r < nbits)		/* did we get enough bits? */
+	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
+      return r & (((mp_limb_t ) 1 << nbits) - 1);
+    }
+}
+
+#ifndef POWM_SEC_TABLE
+#if GMP_NUMB_BITS < 50
+#define POWM_SEC_TABLE  2,33,96,780,2741
+#else
+#define POWM_SEC_TABLE  2,130,524,2578
+#endif
+#endif
+
+#if TUNE_PROGRAM_BUILD
+extern int win_size (mp_bitcnt_t);
+#else
+static inline int
+win_size (mp_bitcnt_t enb)
+{
+  int k;
+  /* Find k, such that x[k-1] < enb <= x[k].
+
+     We require that x[k] >= k, then it follows that enb > x[k-1] >=
+     k-1, which implies k <= enb.
+  */
+  static const mp_bitcnt_t x[] = {0,POWM_SEC_TABLE,~(mp_bitcnt_t)0};
+  for (k = 1; enb > x[k]; k++)
+    ;
+  ASSERT (k <= enb);
+  return k;
+}
+#endif
+
+/* Convert U to REDC form, U_r = B^n * U mod M.
+   Uses scratch space at tp of size 2un + n + 1.  */
+static void
+redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n, mp_ptr tp)
+{
+  MPN_ZERO (tp, n);
+  MPN_COPY (tp + n, up, un);
+
+  mpn_sec_div_r (tp, un + n, mp, n, tp + un + n);
+  MPN_COPY (rp, tp, n);
+}
+
+/* {rp, n} <-- {bp, bn} ^ {ep, en} mod {mp, n},
+   where en = ceil (enb / GMP_NUMB_BITS)
+   Requires that {mp, n} is odd (and hence also mp[0] odd).
+   Uses scratch space at tp as defined by mpn_sec_powm_itch.  */
+void
+mpn_sec_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
+	      mp_srcptr ep, mp_bitcnt_t enb,
+	      mp_srcptr mp, mp_size_t n, mp_ptr tp)
+{
+  mp_limb_t ip[2], *mip;
+  int windowsize, this_windowsize;
+  mp_limb_t expbits;
+  mp_ptr pp, this_pp, ps;
+  long i;
+  int cnd;
+
+  ASSERT (enb > 0);
+  ASSERT (n > 0);
+  /* The code works for bn = 0, but the defined scratch space is 2 limbs
+     greater than we supply, when converting 1 to redc form .  */
+  ASSERT (bn > 0);
+  ASSERT ((mp[0] & 1) != 0);
+
+  windowsize = win_size (enb);
+
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    {
+      mip = ip;
+      binvert_limb (mip[0], mp[0]);
+      mip[0] = -mip[0];
+    }
+  else
+    {
+      mip = ip;
+      mpn_binvert (mip, mp, 2, tp);
+      mip[0] = -mip[0]; mip[1] = ~mip[1];
+    }
+
+  pp = tp;
+  tp += (n << windowsize);	/* put tp after power table */
+
+  /* Compute pp[0] table entry */
+  /* scratch: |   n   | 1 |   n+2    |  */
+  /*          | pp[0] | 1 | redcify  |  */
+  this_pp = pp;
+  this_pp[n] = 1;
+  redcify (this_pp, this_pp + n, 1, mp, n, this_pp + n + 1);
+  this_pp += n;
+
+  /* Compute pp[1] table entry.  To avoid excessive scratch usage in the
+     degenerate situation where B >> M, we let redcify use scratch space which
+     will later be used by the pp table (element 2 and up).  */
+  /* scratch: |   n   |   n   |  bn + n + 1  |  */
+  /*          | pp[0] | pp[1] |   redcify    |  */
+  redcify (this_pp, bp, bn, mp, n, this_pp + n);
+
+  /* Precompute powers of b and put them in the temporary area at pp.  */
+  /* scratch: |   n   |   n   | ...  |                    |   2n      |  */
+  /*          | pp[0] | pp[1] | ...  | pp[2^windowsize-1] |  product  |  */
+  ps = pp + n;		/* initially B^1 */
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    {
+      for (i = (1 << windowsize) - 2; i > 0; i -= 2)
+	{
+	  mpn_local_sqr (tp, ps, n);
+	  ps += n;
+	  this_pp += n;
+	  MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
+
+	  mpn_mul_basecase (tp, this_pp, n, pp + n, n);
+	  this_pp += n;
+	  MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
+	}
+    }
+  else
+    {
+      for (i = (1 << windowsize) - 2; i > 0; i -= 2)
+	{
+	  mpn_local_sqr (tp, ps, n);
+	  ps += n;
+	  this_pp += n;
+	  MPN_REDC_2_SEC (this_pp, tp, mp, n, mip);
+
+	  mpn_mul_basecase (tp, this_pp, n, pp + n, n);
+	  this_pp += n;
+	  MPN_REDC_2_SEC (this_pp, tp, mp, n, mip);
+	}
+    }
+
+  expbits = getbits (ep, enb, windowsize);
+  ASSERT_ALWAYS (enb >= windowsize);
+  enb -= windowsize;
+
+  mpn_sec_tabselect (rp, pp, n, 1 << windowsize, expbits);
+
+  /* Main exponentiation loop.  */
+  /* scratch: |   n   |   n   | ...  |                    |     3n-4n     |  */
+  /*          | pp[0] | pp[1] | ...  | pp[2^windowsize-1] |  loop scratch |  */
+
+#define INNERLOOP							\
+  while (enb != 0)							\
+    {									\
+      expbits = getbits (ep, enb, windowsize);				\
+      this_windowsize = windowsize;					\
+      if (enb < windowsize)						\
+	{								\
+	  this_windowsize -= windowsize - enb;				\
+	  enb = 0;							\
+	}								\
+      else								\
+	enb -= windowsize;						\
+									\
+      do								\
+	{								\
+	  mpn_local_sqr (tp, rp, n);					\
+	  MPN_REDUCE (rp, tp, mp, n, mip);				\
+	  this_windowsize--;						\
+	}								\
+      while (this_windowsize != 0);					\
+									\
+      mpn_sec_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits);	\
+      mpn_mul_basecase (tp, rp, n, tp + 2*n, n);			\
+									\
+      MPN_REDUCE (rp, tp, mp, n, mip);					\
+    }
+
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    {
+#undef MPN_REDUCE
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
+      INNERLOOP;
+    }
+  else
+    {
+#undef MPN_REDUCE
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2_SEC (rp, tp, mp, n, mip)
+      INNERLOOP;
+    }
+
+  MPN_COPY (tp, rp, n);
+  MPN_ZERO (tp + n, n);
+
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
+  else
+    MPN_REDC_2_SEC (rp, tp, mp, n, mip);
+
+  cnd = mpn_sub_n (tp, rp, mp, n);	/* we need just retval */
+  mpn_cnd_sub_n (!cnd, rp, rp, mp, n);
+}
+
+mp_size_t
+mpn_sec_powm_itch (mp_size_t bn, mp_bitcnt_t enb, mp_size_t n)
+{
+  int windowsize;
+  mp_size_t redcify_itch, itch;
+
+  /* FIXME: no more _local/_basecase difference. */
+  /* The top scratch usage will either be when reducing B in the 2nd redcify
+     call, or more typically n*2^windowsize + 3n or 4n, in the main loop.  (It
+     is 3n or 4n depending on if we use mpn_local_sqr or a native
+     mpn_sqr_basecase.  We assume 4n always for now.) */
+
+  windowsize = win_size (enb);
+
+  /* The 2n term is due to pp[0] and pp[1] at the time of the 2nd redcify call,
+     the (bn + n) term is due to redcify's own usage, and the rest is due to
+     mpn_sec_div_r's usage when called from redcify.  */
+  redcify_itch = (2 * n) + (bn + n) + ((bn + n) + 2 * n + 2);
+
+  /* The n * 2^windowsize term is due to the power table, the 4n term is due to
+     scratch needs of squaring/multiplication in the exponentiation loop.  */
+  itch = (n << windowsize) + (4 * n);
+
+  return MAX (itch, redcify_itch);
+}

diff --git a/third_party/gmp/mpn/generic/sec_sqr.c b/third_party/gmp/mpn/generic/sec_sqr.c
new file mode 100644
index 0000000..83fc7d9
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sec_sqr.c

@@ -0,0 +1,76 @@
+/* mpn_sec_sqr.
+
+   Contributed to the GNU project by Torbjörn Granlund.
+
+Copyright 2013, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#if ! HAVE_NATIVE_mpn_sqr_basecase
+/* The limit of the generic code is SQR_TOOM2_THRESHOLD.  */
+#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
+#endif
+
+#if HAVE_NATIVE_mpn_sqr_basecase
+#ifdef TUNE_SQR_TOOM2_MAX
+/* We slightly abuse TUNE_SQR_TOOM2_MAX here.  If it is set for an assembly
+   mpn_sqr_basecase, it comes from SQR_TOOM2_THRESHOLD_MAX in the assembly
+   file.  An assembly mpn_sqr_basecase that does not define it should allow
+   any size.  */
+#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
+#endif
+#endif
+
+#ifdef WANT_FAT_BINARY
+/* For fat builds, we use SQR_TOOM2_THRESHOLD which will expand to a read from
+   __gmpn_cpuvec.  Perhaps any possible sqr_basecase.asm allow any size, and we
+   limit the use unnecessarily.  We cannot tell, so play it safe.  FIXME.  */
+#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
+#endif
+
+void
+mpn_sec_sqr (mp_ptr rp,
+	     mp_srcptr ap, mp_size_t an,
+	     mp_ptr tp)
+{
+#ifndef SQR_BASECASE_LIM
+/* If SQR_BASECASE_LIM is now not defined, use mpn_sqr_basecase for any operand
+   size.  */
+  mpn_sqr_basecase (rp, ap, an);
+#else
+/* Else use mpn_mul_basecase.  */
+  mpn_mul_basecase (rp, ap, an, ap, an);
+#endif
+}
+
+mp_size_t
+mpn_sec_sqr_itch (mp_size_t an)
+{
+  return 0;
+}

diff --git a/third_party/gmp/mpn/generic/sec_tabselect.c b/third_party/gmp/mpn/generic/sec_tabselect.c
new file mode 100644
index 0000000..5767e27
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sec_tabselect.c

@@ -0,0 +1,54 @@
+/* mpn_sec_tabselect.
+
+Copyright 2007-2009, 2011, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Select entry `which' from table `tab', which has nents entries, each `n'
+   limbs.  Store the selected entry at rp.  Reads entire table to avoid
+   side-channel information leaks.  O(n*nents).  */
+void
+mpn_sec_tabselect (volatile mp_limb_t *rp, volatile const mp_limb_t *tab,
+		   mp_size_t n, mp_size_t nents, mp_size_t which)
+{
+  mp_size_t k, i;
+  mp_limb_t mask;
+  volatile const mp_limb_t *tp;
+
+  for (k = 0; k < nents; k++)
+    {
+      mask = -(mp_limb_t) (which == k);
+      tp = tab + n * k;
+      for (i = 0; i < n; i++)
+	{
+	  rp[i] = (rp[i] & ~mask) | (tp[i] & mask);
+	}
+    }
+}

diff --git a/third_party/gmp/mpn/generic/set_str.c b/third_party/gmp/mpn/generic/set_str.c
new file mode 100644
index 0000000..2bd584c
--- /dev/null
+++ b/third_party/gmp/mpn/generic/set_str.c

@@ -0,0 +1,290 @@
+/* mpn_set_str (mp_ptr res_ptr, const char *str, size_t str_len, int base) --
+   Convert a STR_LEN long base BASE byte string pointed to by STR to a limb
+   vector pointed to by RES_PTR.  Return the number of limbs in RES_PTR.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE, EXCEPT mpn_set_str, ARE INTERNAL WITH MUTABLE
+   INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
+   IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A
+   FUTURE GNU MP RELEASE.
+
+Copyright 1991-2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/* TODO:
+
+      Perhaps do not compute the highest power?
+      Instead, multiply twice by the 2nd highest power:
+
+	       _______
+	      |_______|  hp
+	      |_______|  pow
+       _______________
+      |_______________|  final result
+
+
+	       _______
+	      |_______|  hp
+		  |___|  pow[-1]
+	   ___________
+	  |___________|  intermediate result
+		  |___|  pow[-1]
+       _______________
+      |_______________|  final result
+
+      Generalizing that idea, perhaps we should make powtab contain successive
+      cubes, not squares.
+*/
+
+#include "gmp-impl.h"
+
+mp_size_t
+mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
+{
+  if (POW2_P (base))
+    {
+      /* The base is a power of 2.  Read the input string from least to most
+	 significant character/digit.  */
+
+      const unsigned char *s;
+      int next_bitpos;
+      mp_limb_t res_digit;
+      mp_size_t size;
+      int bits_per_indigit = mp_bases[base].big_base;
+
+      size = 0;
+      res_digit = 0;
+      next_bitpos = 0;
+
+      for (s = str + str_len - 1; s >= str; s--)
+	{
+	  int inp_digit = *s;
+
+	  res_digit |= ((mp_limb_t) inp_digit << next_bitpos) & GMP_NUMB_MASK;
+	  next_bitpos += bits_per_indigit;
+	  if (next_bitpos >= GMP_NUMB_BITS)
+	    {
+	      rp[size++] = res_digit;
+	      next_bitpos -= GMP_NUMB_BITS;
+	      res_digit = inp_digit >> (bits_per_indigit - next_bitpos);
+	    }
+	}
+
+      if (res_digit != 0)
+	rp[size++] = res_digit;
+      return size;
+    }
+
+  if (BELOW_THRESHOLD (str_len, SET_STR_PRECOMPUTE_THRESHOLD))
+    return mpn_bc_set_str (rp, str, str_len, base);
+  else
+    {
+      mp_ptr powtab_mem, tp;
+      powers_t powtab[GMP_LIMB_BITS];
+      int chars_per_limb;
+      mp_size_t size;
+      mp_size_t un;
+      TMP_DECL;
+
+      TMP_MARK;
+
+      chars_per_limb = mp_bases[base].chars_per_limb;
+
+      un = str_len / chars_per_limb + 1; /* FIXME: scalar integer division */
+
+      /* Allocate one large block for the powers of big_base.  */
+      powtab_mem = TMP_BALLOC_LIMBS (mpn_str_powtab_alloc (un));
+
+      size_t n_pows = mpn_compute_powtab (powtab, powtab_mem, un, base);
+      powers_t *pt = powtab + n_pows;
+
+      tp = TMP_BALLOC_LIMBS (mpn_dc_set_str_itch (un));
+      size = mpn_dc_set_str (rp, str, str_len, pt, tp);
+
+      TMP_FREE;
+      return size;
+    }
+}
+
+mp_size_t
+mpn_dc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len,
+		const powers_t *powtab, mp_ptr tp)
+{
+  size_t len_lo, len_hi;
+  mp_limb_t cy;
+  mp_size_t ln, hn, n, sn;
+
+  len_lo = powtab->digits_in_base;
+
+  if (str_len <= len_lo)
+    {
+      if (BELOW_THRESHOLD (str_len, SET_STR_DC_THRESHOLD))
+	return mpn_bc_set_str (rp, str, str_len, powtab->base);
+      else
+	return mpn_dc_set_str (rp, str, str_len, powtab - 1, tp);
+    }
+
+  len_hi = str_len - len_lo;
+  ASSERT (len_lo >= len_hi);
+
+  if (BELOW_THRESHOLD (len_hi, SET_STR_DC_THRESHOLD))
+    hn = mpn_bc_set_str (tp, str, len_hi, powtab->base);
+  else
+    hn = mpn_dc_set_str (tp, str, len_hi, powtab - 1, rp);
+
+  sn = powtab->shift;
+
+  if (hn == 0)
+    {
+      /* Zero +1 limb here, to avoid reading an allocated but uninitialised
+	 limb in mpn_incr_u below.  */
+      MPN_ZERO (rp, powtab->n + sn + 1);
+    }
+  else
+    {
+      if (powtab->n > hn)
+	mpn_mul (rp + sn, powtab->p, powtab->n, tp, hn);
+      else
+	mpn_mul (rp + sn, tp, hn, powtab->p, powtab->n);
+      MPN_ZERO (rp, sn);
+    }
+
+  str = str + str_len - len_lo;
+  if (BELOW_THRESHOLD (len_lo, SET_STR_DC_THRESHOLD))
+    ln = mpn_bc_set_str (tp, str, len_lo, powtab->base);
+  else
+    ln = mpn_dc_set_str (tp, str, len_lo, powtab - 1, tp + powtab->n + sn + 1);
+
+  if (ln != 0)
+    {
+      cy = mpn_add_n (rp, rp, tp, ln);
+      mpn_incr_u (rp + ln, cy);
+    }
+  n = hn + powtab->n + sn;
+  return n - (rp[n - 1] == 0);
+}
+
+mp_size_t
+mpn_bc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
+{
+  mp_size_t size;
+  size_t i;
+  long j;
+  mp_limb_t cy_limb;
+
+  mp_limb_t big_base;
+  int chars_per_limb;
+  mp_limb_t res_digit;
+
+  ASSERT (base >= 2);
+  ASSERT (base < numberof (mp_bases));
+  ASSERT (str_len >= 1);
+
+  big_base = mp_bases[base].big_base;
+  chars_per_limb = mp_bases[base].chars_per_limb;
+
+  size = 0;
+  for (i = chars_per_limb; i < str_len; i += chars_per_limb)
+    {
+      res_digit = *str++;
+      if (base == 10)
+	{ /* This is a common case.
+	     Help the compiler to avoid multiplication.  */
+	  for (j = MP_BASES_CHARS_PER_LIMB_10 - 1; j != 0; j--)
+	    res_digit = res_digit * 10 + *str++;
+	}
+      else
+	{
+	  for (j = chars_per_limb - 1; j != 0; j--)
+	    res_digit = res_digit * base + *str++;
+	}
+
+      if (size == 0)
+	{
+	  if (res_digit != 0)
+	    {
+	      rp[0] = res_digit;
+	      size = 1;
+	    }
+	}
+      else
+	{
+#if HAVE_NATIVE_mpn_mul_1c
+	  cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
+#else
+	  cy_limb = mpn_mul_1 (rp, rp, size, big_base);
+	  cy_limb += mpn_add_1 (rp, rp, size, res_digit);
+#endif
+	  if (cy_limb != 0)
+	    rp[size++] = cy_limb;
+	}
+    }
+
+  big_base = base;
+  res_digit = *str++;
+  if (base == 10)
+    { /* This is a common case.
+	 Help the compiler to avoid multiplication.  */
+      for (j = str_len - (i - MP_BASES_CHARS_PER_LIMB_10) - 1; j > 0; j--)
+	{
+	  res_digit = res_digit * 10 + *str++;
+	  big_base *= 10;
+	}
+    }
+  else
+    {
+      for (j = str_len - (i - chars_per_limb) - 1; j > 0; j--)
+	{
+	  res_digit = res_digit * base + *str++;
+	  big_base *= base;
+	}
+    }
+
+  if (size == 0)
+    {
+      if (res_digit != 0)
+	{
+	  rp[0] = res_digit;
+	  size = 1;
+	}
+    }
+  else
+    {
+#if HAVE_NATIVE_mpn_mul_1c
+      cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
+#else
+      cy_limb = mpn_mul_1 (rp, rp, size, big_base);
+      cy_limb += mpn_add_1 (rp, rp, size, res_digit);
+#endif
+      if (cy_limb != 0)
+	rp[size++] = cy_limb;
+    }
+  return size;
+}

diff --git a/third_party/gmp/mpn/generic/sizeinbase.c b/third_party/gmp/mpn/generic/sizeinbase.c
new file mode 100644
index 0000000..faee947
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sizeinbase.c

@@ -0,0 +1,49 @@
+/* mpn_sizeinbase -- approximation to chars required for an mpn.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 1991, 1993-1995, 2001, 2002, 2011, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Same as mpz_sizeinbase, meaning exact for power-of-2 bases, and either
+   exact or 1 too big for other bases.  */
+
+size_t
+mpn_sizeinbase (mp_srcptr xp, mp_size_t xsize, int base)
+{
+  size_t  result;
+  MPN_SIZEINBASE (result, xp, xsize, base);
+  return result;
+}

diff --git a/third_party/gmp/mpn/generic/sqr.c b/third_party/gmp/mpn/generic/sqr.c
new file mode 100644
index 0000000..74fbff0
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sqr.c

@@ -0,0 +1,98 @@
+/* mpn_sqr -- square natural numbers.
+
+Copyright 1991, 1993, 1994, 1996-2003, 2005, 2008, 2009 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_sqr (mp_ptr p, mp_srcptr a, mp_size_t n)
+{
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));
+
+  if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+    { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
+      mpn_mul_basecase (p, a, n, a, n);
+    }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))
+    {
+      mpn_sqr_basecase (p, a, n);
+    }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))
+    {
+      /* Allocate workspace of fixed size on stack: fast! */
+      mp_limb_t ws[mpn_toom2_sqr_itch (SQR_TOOM3_THRESHOLD_LIMIT-1)];
+      ASSERT (SQR_TOOM3_THRESHOLD <= SQR_TOOM3_THRESHOLD_LIMIT);
+      mpn_toom2_sqr (p, a, n, ws);
+    }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom3_sqr_itch (n));
+      mpn_toom3_sqr (p, a, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom4_sqr_itch (n));
+      mpn_toom4_sqr (p, a, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom6_sqr_itch (n));
+      mpn_toom6_sqr (p, a, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_DECL;
+      TMP_MARK;
+      ws = TMP_ALLOC_LIMBS (mpn_toom8_sqr_itch (n));
+      mpn_toom8_sqr (p, a, n, ws);
+      TMP_FREE;
+    }
+  else
+    {
+      /* The current FFT code allocates its own space.  That should probably
+	 change.  */
+      mpn_fft_mul (p, a, n, a, n);
+    }
+}

diff --git a/third_party/gmp/mpn/generic/sqr_basecase.c b/third_party/gmp/mpn/generic/sqr_basecase.c
new file mode 100644
index 0000000..2645bad
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sqr_basecase.c

@@ -0,0 +1,361 @@
+/* mpn_sqr_basecase -- Internal routine to square a natural number
+   of length n.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+
+Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2017 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if HAVE_NATIVE_mpn_sqr_diagonal
+#define MPN_SQR_DIAGONAL(rp, up, n)					\
+  mpn_sqr_diagonal (rp, up, n)
+#else
+#define MPN_SQR_DIAGONAL(rp, up, n)					\
+  do {									\
+    mp_size_t _i;							\
+    for (_i = 0; _i < (n); _i++)					\
+      {									\
+	mp_limb_t ul, lpl;						\
+	ul = (up)[_i];							\
+	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
+	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
+      }									\
+  } while (0)
+#endif
+
+#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
+#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
+  mpn_sqr_diag_addlsh1 (rp, tp, up, n)
+#else
+#if HAVE_NATIVE_mpn_addlsh1_n
+#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
+  do {									\
+    mp_limb_t cy;							\
+    MPN_SQR_DIAGONAL (rp, up, n);					\
+    cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);			\
+    rp[2 * n - 1] += cy;						\
+  } while (0)
+#else
+#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
+  do {									\
+    mp_limb_t cy;							\
+    MPN_SQR_DIAGONAL (rp, up, n);					\
+    cy = mpn_lshift (tp, tp, 2 * n - 2, 1);				\
+    cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);			\
+    rp[2 * n - 1] += cy;						\
+  } while (0)
+#endif
+#endif
+
+
+#undef READY_WITH_mpn_sqr_basecase
+
+
+#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
+void
+mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_size_t i;
+  mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
+  mp_ptr tp = tarr;
+  mp_limb_t cy;
+
+  /* must fit 2*n limbs in tarr */
+  ASSERT (n <= SQR_TOOM2_THRESHOLD);
+
+  if ((n & 1) != 0)
+    {
+      if (n == 1)
+	{
+	  mp_limb_t ul, lpl;
+	  ul = up[0];
+	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
+	  rp[0] = lpl >> GMP_NAIL_BITS;
+	  return;
+	}
+
+      MPN_ZERO (tp, n);
+
+      for (i = 0; i <= n - 2; i += 2)
+	{
+	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
+	  tp[n + i] = cy;
+	}
+    }
+  else
+    {
+      if (n == 2)
+	{
+#if HAVE_NATIVE_mpn_mul_2
+	  rp[3] = mpn_mul_2 (rp, up, 2, up);
+#else
+	  rp[0] = 0;
+	  rp[1] = 0;
+	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
+#endif
+	  return;
+	}
+
+      MPN_ZERO (tp, n);
+
+      for (i = 0; i <= n - 4; i += 2)
+	{
+	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
+	  tp[n + i] = cy;
+	}
+      cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
+      tp[2 * n - 3] = cy;
+    }
+
+  MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
+}
+#define READY_WITH_mpn_sqr_basecase
+#endif
+
+
+#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
+
+/* mpn_sqr_basecase using plain mpn_addmul_2.
+
+   This is tricky, since we have to let mpn_addmul_2 make some undesirable
+   multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
+   This forces us to conditionally add or subtract the mpn_sqr_diagonal
+   results.  Examples of the product we form:
+
+   n = 4              n = 5		n = 6
+   u1u0 * u3u2u1      u1u0 * u4u3u2u1	u1u0 * u5u4u3u2u1
+   u2 * u3	      u3u2 * u4u3	u3u2 * u5u4u3
+					u4 * u5
+   add: u0 u2 u3      add: u0 u2 u4	add: u0 u2 u4 u5
+   sub: u1	      sub: u1 u3	sub: u1 u3
+*/
+
+void
+mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_size_t i;
+  mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
+  mp_ptr tp = tarr;
+  mp_limb_t cy;
+
+  /* must fit 2*n limbs in tarr */
+  ASSERT (n <= SQR_TOOM2_THRESHOLD);
+
+  if ((n & 1) != 0)
+    {
+      mp_limb_t x0, x1;
+
+      if (n == 1)
+	{
+	  mp_limb_t ul, lpl;
+	  ul = up[0];
+	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
+	  rp[0] = lpl >> GMP_NAIL_BITS;
+	  return;
+	}
+
+      /* The code below doesn't like unnormalized operands.  Since such
+	 operands are unusual, handle them with a dumb recursion.  */
+      if (up[n - 1] == 0)
+	{
+	  rp[2 * n - 2] = 0;
+	  rp[2 * n - 1] = 0;
+	  mpn_sqr_basecase (rp, up, n - 1);
+	  return;
+	}
+
+      MPN_ZERO (tp, n);
+
+      for (i = 0; i <= n - 2; i += 2)
+	{
+	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
+	  tp[n + i] = cy;
+	}
+
+      MPN_SQR_DIAGONAL (rp, up, n);
+
+      for (i = 2;; i += 4)
+	{
+	  x0 = rp[i + 0];
+	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
+	  x1 = rp[i + 1];
+	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
+	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
+	  if (i + 4 >= 2 * n)
+	    break;
+	  mpn_incr_u (rp + i + 4, cy);
+	}
+    }
+  else
+    {
+      mp_limb_t x0, x1;
+
+      if (n == 2)
+	{
+#if HAVE_NATIVE_mpn_mul_2
+	  rp[3] = mpn_mul_2 (rp, up, 2, up);
+#else
+	  rp[0] = 0;
+	  rp[1] = 0;
+	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
+#endif
+	  return;
+	}
+
+      /* The code below doesn't like unnormalized operands.  Since such
+	 operands are unusual, handle them with a dumb recursion.  */
+      if (up[n - 1] == 0)
+	{
+	  rp[2 * n - 2] = 0;
+	  rp[2 * n - 1] = 0;
+	  mpn_sqr_basecase (rp, up, n - 1);
+	  return;
+	}
+
+      MPN_ZERO (tp, n);
+
+      for (i = 0; i <= n - 4; i += 2)
+	{
+	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
+	  tp[n + i] = cy;
+	}
+      cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
+      tp[2 * n - 3] = cy;
+
+      MPN_SQR_DIAGONAL (rp, up, n);
+
+      for (i = 2;; i += 4)
+	{
+	  x0 = rp[i + 0];
+	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
+	  x1 = rp[i + 1];
+	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
+	  if (i + 6 >= 2 * n)
+	    break;
+	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
+	  mpn_incr_u (rp + i + 4, cy);
+	}
+      mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
+    }
+
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#else
+  cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
+  cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#endif
+  rp[2 * n - 1] += cy;
+}
+#define READY_WITH_mpn_sqr_basecase
+#endif
+
+
+#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_sqr_diag_addlsh1
+
+/* mpn_sqr_basecase using mpn_addmul_1 and mpn_sqr_diag_addlsh1, avoiding stack
+   allocation.  */
+void
+mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  if (n == 1)
+    {
+      mp_limb_t ul, lpl;
+      ul = up[0];
+      umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
+      rp[0] = lpl >> GMP_NAIL_BITS;
+    }
+  else
+    {
+      mp_size_t i;
+      mp_ptr xp;
+
+      rp += 1;
+      rp[n - 1] = mpn_mul_1 (rp, up + 1, n - 1, up[0]);
+      for (i = n - 2; i != 0; i--)
+	{
+	  up += 1;
+	  rp += 2;
+	  rp[i] = mpn_addmul_1 (rp, up + 1, i, up[0]);
+	}
+
+      xp = rp - 2 * n + 3;
+      mpn_sqr_diag_addlsh1 (xp, xp + 1, up - n + 2, n);
+    }
+}
+#define READY_WITH_mpn_sqr_basecase
+#endif
+
+
+#if ! defined (READY_WITH_mpn_sqr_basecase)
+
+/* Default mpn_sqr_basecase using mpn_addmul_1.  */
+void
+mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_size_t i;
+
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
+
+  if (n == 1)
+    {
+      mp_limb_t ul, lpl;
+      ul = up[0];
+      umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
+      rp[0] = lpl >> GMP_NAIL_BITS;
+    }
+  else
+    {
+      mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
+      mp_ptr tp = tarr;
+      mp_limb_t cy;
+
+      /* must fit 2*n limbs in tarr */
+      ASSERT (n <= SQR_TOOM2_THRESHOLD);
+
+      cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
+      tp[n - 1] = cy;
+      for (i = 2; i < n; i++)
+	{
+	  mp_limb_t cy;
+	  cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
+	  tp[n + i - 2] = cy;
+	}
+
+      MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
+    }
+}
+#define READY_WITH_mpn_sqr_basecase
+#endif

diff --git a/third_party/gmp/mpn/generic/sqrlo.c b/third_party/gmp/mpn/generic/sqrlo.c
new file mode 100644
index 0000000..71530b6
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sqrlo.c

@@ -0,0 +1,239 @@
+/* mpn_sqrlo -- squares an n-limb number and returns the low n limbs
+   of the result.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   THIS IS (FOR NOW) AN INTERNAL FUNCTION.  IT IS ONLY SAFE TO REACH THIS
+   FUNCTION THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED
+   THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2004, 2005, 2009, 2010, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_range_basecase 1
+#define MAYBE_range_toom22   1
+#else
+#define MAYBE_range_basecase                                           \
+  ((SQRLO_DC_THRESHOLD == 0 ? SQRLO_BASECASE_THRESHOLD : SQRLO_DC_THRESHOLD) < SQR_TOOM2_THRESHOLD*36/(36-11))
+#define MAYBE_range_toom22                                             \
+  ((SQRLO_DC_THRESHOLD == 0 ? SQRLO_BASECASE_THRESHOLD : SQRLO_DC_THRESHOLD) < SQR_TOOM3_THRESHOLD*36/(36-11) )
+#endif
+
+/*  THINK: The DC strategy uses different constants in different Toom's
+	 ranges. Something smoother?
+*/
+
+/*
+  Compute the least significant half of the product {xy,n}*{yp,n}, or
+  formally {rp,n} = {xy,n}*{yp,n} Mod (B^n).
+
+  Above the given threshold, the Divide and Conquer strategy is used.
+  The operand is split in two, and a full square plus a mullo
+  is used to obtain the final result. The more natural strategy is to
+  split in two halves, but this is far from optimal when a
+  sub-quadratic multiplication is used.
+
+  Mulders suggests an unbalanced split in favour of the full product,
+  split n = n1 + n2, where an = n1 <= n2 = (1-a)n; i.e. 0 < a <= 1/2.
+
+  To compute the value of a, we assume that the cost of mullo for a
+  given size ML(n) is a fraction of the cost of a full product with
+  same size M(n), and the cost M(n)=n^e for some exponent 1 < e <= 2;
+  then we can write:
+
+  ML(n) = 2*ML(an) + M((1-a)n) => k*M(n) = 2*k*M(n)*a^e + M(n)*(1-a)^e
+
+  Given a value for e, want to minimise the value of k, i.e. the
+  function k=(1-a)^e/(1-2*a^e).
+
+  With e=2, the exponent for schoolbook multiplication, the minimum is
+  given by the values a=1-a=1/2.
+
+  With e=log(3)/log(2), the exponent for Karatsuba (aka toom22),
+  Mulders compute (1-a) = 0.694... and we approximate a with 11/36.
+
+  Other possible approximations follow:
+  e=log(5)/log(3) [Toom-3] -> a ~= 9/40
+  e=log(7)/log(4) [Toom-4] -> a ~= 7/39
+  e=log(11)/log(6) [Toom-6] -> a ~= 1/8
+  e=log(15)/log(8) [Toom-8] -> a ~= 1/10
+
+  The values above where obtained with the following trivial commands
+  in the gp-pari shell:
+
+fun(e,a)=(1-a)^e/(1-2*a^e)
+mul(a,b,c)={local(m,x,p);if(b-c<1/10000,(b+c)/2,m=1;x=b;forstep(p=c,b,(b-c)/8,if(fun(a,p)<m,m=fun(a,p);x=p));mul(a,(b+x)/2,(c+x)/2))}
+contfracpnqn(contfrac(mul(log(2*2-1)/log(2),1/2,0),5))
+contfracpnqn(contfrac(mul(log(3*2-1)/log(3),1/2,0),5))
+contfracpnqn(contfrac(mul(log(4*2-1)/log(4),1/2,0),5))
+contfracpnqn(contfrac(mul(log(6*2-1)/log(6),1/2,0),3))
+contfracpnqn(contfrac(mul(log(8*2-1)/log(8),1/2,0),3))
+
+  ,
+  |\
+  | \
+  +----,
+  |    |
+  |    |
+  |    |\
+  |    | \
+  +----+--`
+  ^ n2 ^n1^
+
+  For an actual implementation, the assumption that M(n)=n^e is
+  incorrect, as a consequence also the assumption that ML(n)=k*M(n)
+  with a constant k is wrong.
+
+  But theory suggest us two things:
+  - the best the multiplication product is (lower e), the more k
+    approaches 1, and a approaches 0.
+
+  - A value for a smaller than optimal is probably less bad than a
+    bigger one: e.g. let e=log(3)/log(2), a=0.3058_ the optimal
+    value, and k(a)=0.808_ the mul/mullo speed ratio. We get
+    k(a+1/6)=0.929_ but k(a-1/6)=0.865_.
+*/
+
+static mp_size_t
+mpn_sqrlo_itch (mp_size_t n)
+{
+  return 2*n;
+}
+
+/*
+    mpn_dc_sqrlo requires a scratch space of 2*n limbs at tp.
+    It accepts tp == rp.
+*/
+static void
+mpn_dc_sqrlo (mp_ptr rp, mp_srcptr xp, mp_size_t n, mp_ptr tp)
+{
+  mp_size_t n2, n1;
+  ASSERT (n >= 2);
+  ASSERT (! MPN_OVERLAP_P (rp, n, xp, n));
+  ASSERT (MPN_SAME_OR_SEPARATE2_P(rp, n, tp, 2*n));
+
+  /* Divide-and-conquer */
+
+  /* We need fractional approximation of the value 0 < a <= 1/2
+     giving the minimum in the function k=(1-a)^e/(1-2*a^e).
+  */
+  if (MAYBE_range_basecase && BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD*36/(36-11)))
+    n1 = n >> 1;
+  else if (MAYBE_range_toom22 && BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD*36/(36-11)))
+    n1 = n * 11 / (size_t) 36;	/* n1 ~= n*(1-.694...) */
+  else if (BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD*40/(40-9)))
+    n1 = n * 9 / (size_t) 40;	/* n1 ~= n*(1-.775...) */
+  else if (BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD*10/9))
+    n1 = n * 7 / (size_t) 39;	/* n1 ~= n*(1-.821...) */
+  /* n1 = n * 4 / (size_t) 31;	// n1 ~= n*(1-.871...) [TOOM66] */
+  else
+    n1 = n / (size_t) 10;		/* n1 ~= n*(1-.899...) [TOOM88] */
+
+  n2 = n - n1;
+
+  /* Split as x = x1 2^(n2 GMP_NUMB_BITS) + x0 */
+
+  /* x0 ^ 2 */
+  mpn_sqr (tp, xp, n2);
+  MPN_COPY (rp, tp, n2);
+
+  /* x1 * x0 * 2^(n2 GMP_NUMB_BITS) */
+  if (BELOW_THRESHOLD (n1, MULLO_BASECASE_THRESHOLD))
+    mpn_mul_basecase (tp + n, xp + n2, n1, xp, n1);
+  else if (BELOW_THRESHOLD (n1, MULLO_DC_THRESHOLD))
+    mpn_mullo_basecase (tp + n, xp + n2, xp, n1);
+  else
+    mpn_mullo_n (tp + n, xp + n2, xp, n1);
+  /* mpn_dc_mullo_n (tp + n, xp + n2, xp, n1, tp + n); */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  mpn_addlsh1_n (rp + n2, tp + n2, tp + n, n1);
+#else
+  mpn_lshift (rp + n2, tp + n, n1, 1);
+  mpn_add_n (rp + n2, rp + n2, tp + n2, n1);
+#endif
+}
+
+/* Avoid zero allocations when MULLO_BASECASE_THRESHOLD is 0.  */
+#define SQR_BASECASE_ALLOC \
+ (SQRLO_BASECASE_THRESHOLD_LIMIT == 0 ? 1 : 2*SQRLO_BASECASE_THRESHOLD_LIMIT)
+
+/* FIXME: This function should accept a temporary area; dc_sqrlo
+   accepts a pointer tp, and handle the case tp == rp, do the same here.
+*/
+
+void
+mpn_sqrlo (mp_ptr rp, mp_srcptr xp, mp_size_t n)
+{
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, n, xp, n));
+
+  if (BELOW_THRESHOLD (n, SQRLO_BASECASE_THRESHOLD))
+    {
+      /* FIXME: smarter criteria? */
+#if HAVE_NATIVE_mpn_mullo_basecase || ! HAVE_NATIVE_mpn_sqr_basecase
+      /* mullo computes as many products as sqr, but directly writes
+	 on the result area. */
+      mpn_mullo_basecase (rp, xp, xp, n);
+#else
+      /* Allocate workspace of fixed size on stack: fast! */
+      mp_limb_t tp[SQR_BASECASE_ALLOC];
+      mpn_sqr_basecase (tp, xp, n);
+      MPN_COPY (rp, tp, n);
+#endif
+    }
+  else if (BELOW_THRESHOLD (n, SQRLO_DC_THRESHOLD))
+    {
+      mpn_sqrlo_basecase (rp, xp, n);
+    }
+  else
+    {
+      mp_ptr tp;
+      TMP_DECL;
+      TMP_MARK;
+      tp = TMP_ALLOC_LIMBS (mpn_sqrlo_itch (n));
+      if (BELOW_THRESHOLD (n, SQRLO_SQR_THRESHOLD))
+	{
+	  mpn_dc_sqrlo (rp, xp, n, tp);
+	}
+      else
+	{
+	  /* For really large operands, use plain mpn_mul_n but throw away upper n
+	     limbs of result.  */
+#if !TUNE_PROGRAM_BUILD && (SQRLO_SQR_THRESHOLD > SQR_FFT_THRESHOLD)
+	  mpn_fft_mul (tp, xp, n, xp, n);
+#else
+	  mpn_sqr (tp, xp, n);
+#endif
+	  MPN_COPY (rp, tp, n);
+	}
+      TMP_FREE;
+    }
+}

diff --git a/third_party/gmp/mpn/generic/sqrlo_basecase.c b/third_party/gmp/mpn/generic/sqrlo_basecase.c
new file mode 100644
index 0000000..3148609
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sqrlo_basecase.c

@@ -0,0 +1,194 @@
+/* mpn_sqrlo_basecase -- Internal routine to square a natural number
+   of length n.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+
+Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2015,
+2016 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef SQRLO_SHORTCUT_MULTIPLICATIONS
+#if HAVE_NATIVE_mpn_addmul_1
+#define SQRLO_SHORTCUT_MULTIPLICATIONS 0
+#else
+#define SQRLO_SHORTCUT_MULTIPLICATIONS 1
+#endif
+#endif
+
+#if HAVE_NATIVE_mpn_sqr_diagonal
+#define MPN_SQR_DIAGONAL(rp, up, n)					\
+  mpn_sqr_diagonal (rp, up, n)
+#else
+#define MPN_SQR_DIAGONAL(rp, up, n)					\
+  do {									\
+    mp_size_t _i;							\
+    for (_i = 0; _i < (n); _i++)					\
+      {									\
+	mp_limb_t ul, lpl;						\
+	ul = (up)[_i];							\
+	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
+	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
+      }									\
+  } while (0)
+#endif
+
+#define MPN_SQRLO_DIAGONAL(rp, up, n)					\
+  do {									\
+    mp_size_t nhalf;							\
+    nhalf = (n) >> 1;							\
+    MPN_SQR_DIAGONAL ((rp), (up), nhalf);				\
+    if (((n) & 1) != 0)							\
+      {									\
+	mp_limb_t op;							\
+	op = (up)[nhalf];						\
+	(rp)[(n) - 1] = (op * op) & GMP_NUMB_MASK;			\
+      }									\
+  } while (0)
+
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+#define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)				\
+  do {									\
+    MPN_SQRLO_DIAGONAL((rp), (up), (n));				\
+    mpn_addlsh1_n_ip1 ((rp) + 1, (tp), (n) - 1);			\
+  } while (0)
+#else
+#define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)				\
+  do {									\
+    MPN_SQRLO_DIAGONAL((rp), (up), (n));				\
+    mpn_lshift ((tp), (tp), (n) - 1, 1);				\
+    mpn_add_n ((rp) + 1, (rp) + 1, (tp), (n) - 1);			\
+  } while (0)
+#endif
+
+/* Avoid zero allocations when SQRLO_LO_THRESHOLD is 0 (this code not used). */
+#define SQRLO_BASECASE_ALLOC						\
+  (SQRLO_DC_THRESHOLD_LIMIT < 2 ? 1 : SQRLO_DC_THRESHOLD_LIMIT - 1)
+
+/* Default mpn_sqrlo_basecase using mpn_addmul_1.  */
+#ifndef SQRLO_SPECIAL_CASES
+#define SQRLO_SPECIAL_CASES 2
+#endif
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_special_cases 1
+#else
+#define MAYBE_special_cases \
+  ((SQRLO_BASECASE_THRESHOLD <= SQRLO_SPECIAL_CASES) && (SQRLO_DC_THRESHOLD != 0))
+#endif
+
+void
+mpn_sqrlo_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_limb_t ul;
+
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, n, up, n));
+
+  ul = up[0];
+
+  if (MAYBE_special_cases && n <= SQRLO_SPECIAL_CASES)
+    {
+#if SQRLO_SPECIAL_CASES == 1
+      rp[0] = (ul * ul) & GMP_NUMB_MASK;
+#else
+      if (n == 1)
+	rp[0] = (ul * ul) & GMP_NUMB_MASK;
+      else
+	{
+	  mp_limb_t hi, lo, ul1;
+	  umul_ppmm (hi, lo, ul, ul << GMP_NAIL_BITS);
+	  rp[0] = lo >> GMP_NAIL_BITS;
+	  ul1 = up[1];
+#if SQRLO_SPECIAL_CASES == 2
+	  rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
+#else
+	  if (n == 2)
+	    rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
+	  else
+	    {
+	      mp_limb_t hi1;
+#if GMP_NAIL_BITS != 0
+	      ul <<= 1;
+#endif
+	      umul_ppmm (hi1, lo, ul1 << GMP_NAIL_BITS, ul);
+	      hi1 += ul * up[2];
+#if GMP_NAIL_BITS == 0
+	      hi1 = (hi1 << 1) | (lo >> (GMP_LIMB_BITS - 1));
+	      add_ssaaaa(rp[2], rp[1], hi1, lo << 1, ul1 * ul1, hi);
+#else
+	      hi += lo >> GMP_NAIL_BITS;
+	      rp[1] = hi & GMP_NUMB_MASK;
+	      rp[2] = (hi1 + ul1 * ul1 + (hi >> GMP_NUMB_BITS)) & GMP_NUMB_MASK;
+#endif
+	    }
+#endif
+	}
+#endif
+    }
+  else
+    {
+      mp_limb_t tp[SQRLO_BASECASE_ALLOC];
+      mp_size_t i;
+
+      /* must fit n-1 limbs in tp */
+      ASSERT (n <= SQRLO_DC_THRESHOLD_LIMIT);
+
+      --n;
+#if SQRLO_SHORTCUT_MULTIPLICATIONS
+      {
+	mp_limb_t cy;
+
+	cy = ul * up[n] + mpn_mul_1 (tp, up + 1, n - 1, ul);
+	for (i = 1; 2 * i + 1 < n; ++i)
+	  {
+	    ul = up[i];
+	    cy += ul * up[n - i] + mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i - 1, ul);
+	  }
+	tp [n-1] = (cy + ((n & 1)?up[i] * up[i + 1]:0)) & GMP_NUMB_MASK;
+      }
+#else
+      mpn_mul_1 (tp, up + 1, n, ul);
+      for (i = 1; 2 * i < n; ++i)
+	mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i, up[i]);
+#endif
+
+      MPN_SQRLO_DIAG_ADDLSH1 (rp, tp, up, n + 1);
+    }
+}
+#undef SQRLO_SPECIAL_CASES
+#undef MAYBE_special_cases
+#undef SQRLO_BASECASE_ALLOC
+#undef SQRLO_SHORTCUT_MULTIPLICATIONS
+#undef MPN_SQR_DIAGONAL
+#undef MPN_SQRLO_DIAGONAL
+#undef MPN_SQRLO_DIAG_ADDLSH1

diff --git a/third_party/gmp/mpn/generic/sqrmod_bnm1.c b/third_party/gmp/mpn/generic/sqrmod_bnm1.c
new file mode 100644
index 0000000..0a27d7b
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sqrmod_bnm1.c

@@ -0,0 +1,312 @@
+/* sqrmod_bnm1.c -- squaring mod B^n-1.
+
+   Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
+   Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Input is {ap,rn}; output is {rp,rn}, computation is
+   mod B^rn - 1, and values are semi-normalised; zero is represented
+   as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
+   tp==rp is allowed. */
+static void
+mpn_bc_sqrmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
+{
+  mp_limb_t cy;
+
+  ASSERT (0 < rn);
+
+  mpn_sqr (tp, ap, rn);
+  cy = mpn_add_n (rp, tp, tp + rn, rn);
+  /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
+   * be no overflow when adding in the carry. */
+  MPN_INCR_U (rp, rn, cy);
+}
+
+
+/* Input is {ap,rn+1}; output is {rp,rn+1}, in
+   semi-normalised representation, computation is mod B^rn + 1. Needs
+   a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
+   Output is normalised. */
+static void
+mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
+{
+  mp_limb_t cy;
+
+  ASSERT (0 < rn);
+
+  mpn_sqr (tp, ap, rn + 1);
+  ASSERT (tp[2*rn+1] == 0);
+  ASSERT (tp[2*rn] < GMP_NUMB_MAX);
+  cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
+  rp[rn] = 0;
+  MPN_INCR_U (rp, rn+1, cy);
+}
+
+
+/* Computes {rp,MIN(rn,2an)} <- {ap,an}^2 Mod(B^rn-1)
+ *
+ * The result is expected to be ZERO if and only if the operand
+ * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
+ * B^rn-1.
+ * It should not be a problem if sqrmod_bnm1 is used to
+ * compute the full square with an <= 2*rn, because this condition
+ * implies (B^an-1)^2 < (B^rn-1) .
+ *
+ * Requires rn/4 < an <= rn
+ * Scratch need: rn/2 + (need for recursive call OR rn + 3). This gives
+ *
+ * S(n) <= rn/2 + MAX (rn + 4, S(n/2)) <= 3/2 rn + 4
+ */
+void
+mpn_sqrmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_ptr tp)
+{
+  ASSERT (0 < an);
+  ASSERT (an <= rn);
+
+  if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, SQRMOD_BNM1_THRESHOLD))
+    {
+      if (UNLIKELY (an < rn))
+	{
+	  if (UNLIKELY (2*an <= rn))
+	    {
+	      mpn_sqr (rp, ap, an);
+	    }
+	  else
+	    {
+	      mp_limb_t cy;
+	      mpn_sqr (tp, ap, an);
+	      cy = mpn_add (rp, tp, rn, tp + rn, 2*an - rn);
+	      MPN_INCR_U (rp, rn, cy);
+	    }
+	}
+      else
+	mpn_bc_sqrmod_bnm1 (rp, ap, rn, tp);
+    }
+  else
+    {
+      mp_size_t n;
+      mp_limb_t cy;
+      mp_limb_t hi;
+
+      n = rn >> 1;
+
+      ASSERT (2*an > n);
+
+      /* Compute xm = a^2 mod (B^n - 1), xp = a^2 mod (B^n + 1)
+	 and crt together as
+
+	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
+      */
+
+#define a0 ap
+#define a1 (ap + n)
+
+#define xp  tp	/* 2n + 2 */
+      /* am1  maybe in {xp, n} */
+#define sp1 (tp + 2*n + 2)
+      /* ap1  maybe in {sp1, n + 1} */
+
+      {
+	mp_srcptr am1;
+	mp_size_t anm;
+	mp_ptr so;
+
+	if (LIKELY (an > n))
+	  {
+	    so = xp + n;
+	    am1 = xp;
+	    cy = mpn_add (xp, a0, n, a1, an - n);
+	    MPN_INCR_U (xp, n, cy);
+	    anm = n;
+	  }
+	else
+	  {
+	    so = xp;
+	    am1 = a0;
+	    anm = an;
+	  }
+
+	mpn_sqrmod_bnm1 (rp, n, am1, anm, so);
+      }
+
+      {
+	int       k;
+	mp_srcptr ap1;
+	mp_size_t anp;
+
+	if (LIKELY (an > n)) {
+	  ap1 = sp1;
+	  cy = mpn_sub (sp1, a0, n, a1, an - n);
+	  sp1[n] = 0;
+	  MPN_INCR_U (sp1, n + 1, cy);
+	  anp = n + ap1[n];
+	} else {
+	  ap1 = a0;
+	  anp = an;
+	}
+
+	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
+	  k=0;
+	else
+	  {
+	    int mask;
+	    k = mpn_fft_best_k (n, 1);
+	    mask = (1<<k) -1;
+	    while (n & mask) {k--; mask >>=1;};
+	  }
+	if (k >= FFT_FIRST_K)
+	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, ap1, anp, k);
+	else if (UNLIKELY (ap1 == a0))
+	  {
+	    ASSERT (anp <= n);
+	    ASSERT (2*anp > n);
+	    mpn_sqr (xp, a0, an);
+	    anp = 2*an - n;
+	    cy = mpn_sub (xp, xp, n, xp + n, anp);
+	    xp[n] = 0;
+	    MPN_INCR_U (xp, n+1, cy);
+	  }
+	else
+	  mpn_bc_sqrmod_bnp1 (xp, ap1, n, xp);
+      }
+
+      /* Here the CRT recomposition begins.
+
+	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
+	 Division by 2 is a bitwise rotation.
+
+	 Assumes xp normalised mod (B^n+1).
+
+	 The residue class [0] is represented by [B^n-1]; except when
+	 both input are ZERO.
+      */
+
+#if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
+#if HAVE_NATIVE_mpn_rsh1add_nc
+      cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
+      hi = cy << (GMP_NUMB_BITS - 1);
+      cy = 0;
+      /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
+	 overflows, i.e. a further increment will not overflow again. */
+#else /* ! _nc */
+      cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
+      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+      cy >>= 1;
+      /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
+	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
+#endif
+#if GMP_NAIL_BITS == 0
+      add_ssaaaa(cy, rp[n-1], cy, rp[n-1], CNST_LIMB(0), hi);
+#else
+      cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
+      rp[n-1] ^= hi;
+#endif
+#else /* ! HAVE_NATIVE_mpn_rsh1add_n */
+#if HAVE_NATIVE_mpn_add_nc
+      cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
+#else /* ! _nc */
+      cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
+#endif
+      cy += (rp[0]&1);
+      mpn_rshift(rp, rp, n, 1);
+      ASSERT (cy <= 2);
+      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+      cy >>= 1;
+      /* We can have cy != 0 only if hi = 0... */
+      ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
+      rp[n-1] |= hi;
+      /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
+#endif
+      ASSERT (cy <= 1);
+      /* Next increment can not overflow, read the previous comments about cy. */
+      ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
+      MPN_INCR_U(rp, n, cy);
+
+      /* Compute the highest half:
+	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
+       */
+      if (UNLIKELY (2*an < rn))
+	{
+	  /* Note that in this case, the only way the result can equal
+	     zero mod B^{rn} - 1 is if the input is zero, and
+	     then the output of both the recursive calls and this CRT
+	     reconstruction is zero, not B^{rn} - 1. */
+	  cy = mpn_sub_n (rp + n, rp, xp, 2*an - n);
+
+	  /* FIXME: This subtraction of the high parts is not really
+	     necessary, we do it to get the carry out, and for sanity
+	     checking. */
+	  cy = xp[n] + mpn_sub_nc (xp + 2*an - n, rp + 2*an - n,
+				   xp + 2*an - n, rn - 2*an, cy);
+	  ASSERT (mpn_zero_p (xp + 2*an - n+1, rn - 1 - 2*an));
+	  cy = mpn_sub_1 (rp, rp, 2*an, cy);
+	  ASSERT (cy == (xp + 2*an - n)[0]);
+	}
+      else
+	{
+	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
+	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
+	     DECR will affect _at most_ the lowest n limbs. */
+	  MPN_DECR_U (rp, 2*n, cy);
+	}
+#undef a0
+#undef a1
+#undef xp
+#undef sp1
+    }
+}
+
+mp_size_t
+mpn_sqrmod_bnm1_next_size (mp_size_t n)
+{
+  mp_size_t nh;
+
+  if (BELOW_THRESHOLD (n,     SQRMOD_BNM1_THRESHOLD))
+    return n;
+  if (BELOW_THRESHOLD (n, 4 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
+    return (n + (2-1)) & (-2);
+  if (BELOW_THRESHOLD (n, 8 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
+    return (n + (4-1)) & (-4);
+
+  nh = (n + 1) >> 1;
+
+  if (BELOW_THRESHOLD (nh, SQR_FFT_MODF_THRESHOLD))
+    return (n + (8-1)) & (-8);
+
+  return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 1));
+}

diff --git a/third_party/gmp/mpn/generic/sqrtrem.c b/third_party/gmp/mpn/generic/sqrtrem.c
new file mode 100644
index 0000000..cc6dd9c
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sqrtrem.c

@@ -0,0 +1,555 @@
+/* mpn_sqrtrem -- square root and remainder
+
+   Contributed to the GNU project by Paul Zimmermann (most code),
+   Torbjorn Granlund (mpn_sqrtrem1) and Marco Bodrato (mpn_dc_sqrt).
+
+   THE FUNCTIONS IN THIS FILE EXCEPT mpn_sqrtrem ARE INTERNAL WITH MUTABLE
+   INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
+   IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A
+   FUTURE GMP RELEASE.
+
+Copyright 1999-2002, 2004, 2005, 2008, 2010, 2012, 2015, 2017 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/* See "Karatsuba Square Root", reference in gmp.texi.  */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "longlong.h"
+#define USE_DIVAPPR_Q 1
+#define TRACE(x)
+
+static const unsigned char invsqrttab[384] = /* The common 0x100 was removed */
+{
+  0xff,0xfd,0xfb,0xf9,0xf7,0xf5,0xf3,0xf2, /* sqrt(1/80)..sqrt(1/87) */
+  0xf0,0xee,0xec,0xea,0xe9,0xe7,0xe5,0xe4, /* sqrt(1/88)..sqrt(1/8f) */
+  0xe2,0xe0,0xdf,0xdd,0xdb,0xda,0xd8,0xd7, /* sqrt(1/90)..sqrt(1/97) */
+  0xd5,0xd4,0xd2,0xd1,0xcf,0xce,0xcc,0xcb, /* sqrt(1/98)..sqrt(1/9f) */
+  0xc9,0xc8,0xc6,0xc5,0xc4,0xc2,0xc1,0xc0, /* sqrt(1/a0)..sqrt(1/a7) */
+  0xbe,0xbd,0xbc,0xba,0xb9,0xb8,0xb7,0xb5, /* sqrt(1/a8)..sqrt(1/af) */
+  0xb4,0xb3,0xb2,0xb0,0xaf,0xae,0xad,0xac, /* sqrt(1/b0)..sqrt(1/b7) */
+  0xaa,0xa9,0xa8,0xa7,0xa6,0xa5,0xa4,0xa3, /* sqrt(1/b8)..sqrt(1/bf) */
+  0xa2,0xa0,0x9f,0x9e,0x9d,0x9c,0x9b,0x9a, /* sqrt(1/c0)..sqrt(1/c7) */
+  0x99,0x98,0x97,0x96,0x95,0x94,0x93,0x92, /* sqrt(1/c8)..sqrt(1/cf) */
+  0x91,0x90,0x8f,0x8e,0x8d,0x8c,0x8c,0x8b, /* sqrt(1/d0)..sqrt(1/d7) */
+  0x8a,0x89,0x88,0x87,0x86,0x85,0x84,0x83, /* sqrt(1/d8)..sqrt(1/df) */
+  0x83,0x82,0x81,0x80,0x7f,0x7e,0x7e,0x7d, /* sqrt(1/e0)..sqrt(1/e7) */
+  0x7c,0x7b,0x7a,0x79,0x79,0x78,0x77,0x76, /* sqrt(1/e8)..sqrt(1/ef) */
+  0x76,0x75,0x74,0x73,0x72,0x72,0x71,0x70, /* sqrt(1/f0)..sqrt(1/f7) */
+  0x6f,0x6f,0x6e,0x6d,0x6d,0x6c,0x6b,0x6a, /* sqrt(1/f8)..sqrt(1/ff) */
+  0x6a,0x69,0x68,0x68,0x67,0x66,0x66,0x65, /* sqrt(1/100)..sqrt(1/107) */
+  0x64,0x64,0x63,0x62,0x62,0x61,0x60,0x60, /* sqrt(1/108)..sqrt(1/10f) */
+  0x5f,0x5e,0x5e,0x5d,0x5c,0x5c,0x5b,0x5a, /* sqrt(1/110)..sqrt(1/117) */
+  0x5a,0x59,0x59,0x58,0x57,0x57,0x56,0x56, /* sqrt(1/118)..sqrt(1/11f) */
+  0x55,0x54,0x54,0x53,0x53,0x52,0x52,0x51, /* sqrt(1/120)..sqrt(1/127) */
+  0x50,0x50,0x4f,0x4f,0x4e,0x4e,0x4d,0x4d, /* sqrt(1/128)..sqrt(1/12f) */
+  0x4c,0x4b,0x4b,0x4a,0x4a,0x49,0x49,0x48, /* sqrt(1/130)..sqrt(1/137) */
+  0x48,0x47,0x47,0x46,0x46,0x45,0x45,0x44, /* sqrt(1/138)..sqrt(1/13f) */
+  0x44,0x43,0x43,0x42,0x42,0x41,0x41,0x40, /* sqrt(1/140)..sqrt(1/147) */
+  0x40,0x3f,0x3f,0x3e,0x3e,0x3d,0x3d,0x3c, /* sqrt(1/148)..sqrt(1/14f) */
+  0x3c,0x3b,0x3b,0x3a,0x3a,0x39,0x39,0x39, /* sqrt(1/150)..sqrt(1/157) */
+  0x38,0x38,0x37,0x37,0x36,0x36,0x35,0x35, /* sqrt(1/158)..sqrt(1/15f) */
+  0x35,0x34,0x34,0x33,0x33,0x32,0x32,0x32, /* sqrt(1/160)..sqrt(1/167) */
+  0x31,0x31,0x30,0x30,0x2f,0x2f,0x2f,0x2e, /* sqrt(1/168)..sqrt(1/16f) */
+  0x2e,0x2d,0x2d,0x2d,0x2c,0x2c,0x2b,0x2b, /* sqrt(1/170)..sqrt(1/177) */
+  0x2b,0x2a,0x2a,0x29,0x29,0x29,0x28,0x28, /* sqrt(1/178)..sqrt(1/17f) */
+  0x27,0x27,0x27,0x26,0x26,0x26,0x25,0x25, /* sqrt(1/180)..sqrt(1/187) */
+  0x24,0x24,0x24,0x23,0x23,0x23,0x22,0x22, /* sqrt(1/188)..sqrt(1/18f) */
+  0x21,0x21,0x21,0x20,0x20,0x20,0x1f,0x1f, /* sqrt(1/190)..sqrt(1/197) */
+  0x1f,0x1e,0x1e,0x1e,0x1d,0x1d,0x1d,0x1c, /* sqrt(1/198)..sqrt(1/19f) */
+  0x1c,0x1b,0x1b,0x1b,0x1a,0x1a,0x1a,0x19, /* sqrt(1/1a0)..sqrt(1/1a7) */
+  0x19,0x19,0x18,0x18,0x18,0x18,0x17,0x17, /* sqrt(1/1a8)..sqrt(1/1af) */
+  0x17,0x16,0x16,0x16,0x15,0x15,0x15,0x14, /* sqrt(1/1b0)..sqrt(1/1b7) */
+  0x14,0x14,0x13,0x13,0x13,0x12,0x12,0x12, /* sqrt(1/1b8)..sqrt(1/1bf) */
+  0x12,0x11,0x11,0x11,0x10,0x10,0x10,0x0f, /* sqrt(1/1c0)..sqrt(1/1c7) */
+  0x0f,0x0f,0x0f,0x0e,0x0e,0x0e,0x0d,0x0d, /* sqrt(1/1c8)..sqrt(1/1cf) */
+  0x0d,0x0c,0x0c,0x0c,0x0c,0x0b,0x0b,0x0b, /* sqrt(1/1d0)..sqrt(1/1d7) */
+  0x0a,0x0a,0x0a,0x0a,0x09,0x09,0x09,0x09, /* sqrt(1/1d8)..sqrt(1/1df) */
+  0x08,0x08,0x08,0x07,0x07,0x07,0x07,0x06, /* sqrt(1/1e0)..sqrt(1/1e7) */
+  0x06,0x06,0x06,0x05,0x05,0x05,0x04,0x04, /* sqrt(1/1e8)..sqrt(1/1ef) */
+  0x04,0x04,0x03,0x03,0x03,0x03,0x02,0x02, /* sqrt(1/1f0)..sqrt(1/1f7) */
+  0x02,0x02,0x01,0x01,0x01,0x01,0x00,0x00  /* sqrt(1/1f8)..sqrt(1/1ff) */
+};
+
+/* Compute s = floor(sqrt(a0)), and *rp = a0 - s^2.  */
+
+#if GMP_NUMB_BITS > 32
+#define MAGIC CNST_LIMB(0x10000000000)	/* 0xffe7debbfc < MAGIC < 0x232b1850f410 */
+#else
+#define MAGIC CNST_LIMB(0x100000)		/* 0xfee6f < MAGIC < 0x29cbc8 */
+#endif
+
+static mp_limb_t
+mpn_sqrtrem1 (mp_ptr rp, mp_limb_t a0)
+{
+#if GMP_NUMB_BITS > 32
+  mp_limb_t a1;
+#endif
+  mp_limb_t x0, t2, t, x2;
+  unsigned abits;
+
+  ASSERT_ALWAYS (GMP_NAIL_BITS == 0);
+  ASSERT_ALWAYS (GMP_LIMB_BITS == 32 || GMP_LIMB_BITS == 64);
+  ASSERT (a0 >= GMP_NUMB_HIGHBIT / 2);
+
+  /* Use Newton iterations for approximating 1/sqrt(a) instead of sqrt(a),
+     since we can do the former without division.  As part of the last
+     iteration convert from 1/sqrt(a) to sqrt(a).  */
+
+  abits = a0 >> (GMP_LIMB_BITS - 1 - 8);	/* extract bits for table lookup */
+  x0 = 0x100 | invsqrttab[abits - 0x80];	/* initial 1/sqrt(a) */
+
+  /* x0 is now an 8 bits approximation of 1/sqrt(a0) */
+
+#if GMP_NUMB_BITS > 32
+  a1 = a0 >> (GMP_LIMB_BITS - 1 - 32);
+  t = (mp_limb_signed_t) (CNST_LIMB(0x2000000000000) - 0x30000 - a1 * x0 * x0) >> 16;
+  x0 = (x0 << 16) + ((mp_limb_signed_t) (x0 * t) >> (16+2));
+
+  /* x0 is now a 16 bits approximation of 1/sqrt(a0) */
+
+  t2 = x0 * (a0 >> (32-8));
+  t = t2 >> 25;
+  t = ((mp_limb_signed_t) ((a0 << 14) - t * t - MAGIC) >> (32-8));
+  x0 = t2 + ((mp_limb_signed_t) (x0 * t) >> 15);
+  x0 >>= 32;
+#else
+  t2 = x0 * (a0 >> (16-8));
+  t = t2 >> 13;
+  t = ((mp_limb_signed_t) ((a0 << 6) - t * t - MAGIC) >> (16-8));
+  x0 = t2 + ((mp_limb_signed_t) (x0 * t) >> 7);
+  x0 >>= 16;
+#endif
+
+  /* x0 is now a full limb approximation of sqrt(a0) */
+
+  x2 = x0 * x0;
+  if (x2 + 2*x0 <= a0 - 1)
+    {
+      x2 += 2*x0 + 1;
+      x0++;
+    }
+
+  *rp = a0 - x2;
+  return x0;
+}
+
+
+#define Prec (GMP_NUMB_BITS >> 1)
+#if ! defined(SQRTREM2_INPLACE)
+#define SQRTREM2_INPLACE 0
+#endif
+
+/* same as mpn_sqrtrem, but for size=2 and {np, 2} normalized
+   return cc such that {np, 2} = sp[0]^2 + cc*2^GMP_NUMB_BITS + rp[0] */
+#if SQRTREM2_INPLACE
+#define CALL_SQRTREM2_INPLACE(sp,rp) mpn_sqrtrem2 (sp, rp)
+static mp_limb_t
+mpn_sqrtrem2 (mp_ptr sp, mp_ptr rp)
+{
+  mp_srcptr np = rp;
+#else
+#define CALL_SQRTREM2_INPLACE(sp,rp) mpn_sqrtrem2 (sp, rp, rp)
+static mp_limb_t
+mpn_sqrtrem2 (mp_ptr sp, mp_ptr rp, mp_srcptr np)
+{
+#endif
+  mp_limb_t q, u, np0, sp0, rp0, q2;
+  int cc;
+
+  ASSERT (np[1] >= GMP_NUMB_HIGHBIT / 2);
+
+  np0 = np[0];
+  sp0 = mpn_sqrtrem1 (rp, np[1]);
+  rp0 = rp[0];
+  /* rp0 <= 2*sp0 < 2^(Prec + 1) */
+  rp0 = (rp0 << (Prec - 1)) + (np0 >> (Prec + 1));
+  q = rp0 / sp0;
+  /* q <= 2^Prec, if q = 2^Prec, reduce the overestimate. */
+  q -= q >> Prec;
+  /* now we have q < 2^Prec */
+  u = rp0 - q * sp0;
+  /* now we have (rp[0]<<Prec + np0>>Prec)/2 = q * sp0 + u */
+  sp0 = (sp0 << Prec) | q;
+  cc = u >> (Prec - 1);
+  rp0 = ((u << (Prec + 1)) & GMP_NUMB_MASK) + (np0 & ((CNST_LIMB (1) << (Prec + 1)) - 1));
+  /* subtract q * q from rp */
+  q2 = q * q;
+  cc -= rp0 < q2;
+  rp0 -= q2;
+  if (cc < 0)
+    {
+      rp0 += sp0;
+      cc += rp0 < sp0;
+      --sp0;
+      rp0 += sp0;
+      cc += rp0 < sp0;
+    }
+
+  rp[0] = rp0;
+  sp[0] = sp0;
+  return cc;
+}
+
+/* writes in {sp, n} the square root (rounded towards zero) of {np, 2n},
+   and in {np, n} the low n limbs of the remainder, returns the high
+   limb of the remainder (which is 0 or 1).
+   Assumes {np, 2n} is normalized, i.e. np[2n-1] >= B/4
+   where B=2^GMP_NUMB_BITS.
+   Needs a scratch of n/2+1 limbs. */
+static mp_limb_t
+mpn_dc_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n, mp_limb_t approx, mp_ptr scratch)
+{
+  mp_limb_t q;			/* carry out of {sp, n} */
+  int c, b;			/* carry out of remainder */
+  mp_size_t l, h;
+
+  ASSERT (n > 1);
+  ASSERT (np[2 * n - 1] >= GMP_NUMB_HIGHBIT / 2);
+
+  l = n / 2;
+  h = n - l;
+  if (h == 1)
+    q = CALL_SQRTREM2_INPLACE (sp + l, np + 2 * l);
+  else
+    q = mpn_dc_sqrtrem (sp + l, np + 2 * l, h, 0, scratch);
+  if (q != 0)
+    ASSERT_CARRY (mpn_sub_n (np + 2 * l, np + 2 * l, sp + l, h));
+  TRACE(printf("tdiv_qr(,,,,%u,,%u) -> %u\n", (unsigned) n, (unsigned) h, (unsigned) (n - h + 1)));
+  mpn_tdiv_qr (scratch, np + l, 0, np + l, n, sp + l, h);
+  q += scratch[l];
+  c = scratch[0] & 1;
+  mpn_rshift (sp, scratch, l, 1);
+  sp[l - 1] |= (q << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK;
+  if (UNLIKELY ((sp[0] & approx) != 0)) /* (sp[0] & mask) > 1 */
+    return 1; /* Remainder is non-zero */
+  q >>= 1;
+  if (c != 0)
+    c = mpn_add_n (np + l, np + l, sp + l, h);
+  TRACE(printf("sqr(,,%u)\n", (unsigned) l));
+  mpn_sqr (np + n, sp, l);
+  b = q + mpn_sub_n (np, np, np + n, 2 * l);
+  c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, (mp_limb_t) b);
+
+  if (c < 0)
+    {
+      q = mpn_add_1 (sp + l, sp + l, h, q);
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1 || HAVE_NATIVE_mpn_addlsh1_n
+      c += mpn_addlsh1_n_ip1 (np, sp, n) + 2 * q;
+#else
+      c += mpn_addmul_1 (np, sp, n, CNST_LIMB(2)) + 2 * q;
+#endif
+      c -= mpn_sub_1 (np, np, n, CNST_LIMB(1));
+      q -= mpn_sub_1 (sp, sp, n, CNST_LIMB(1));
+    }
+
+  return c;
+}
+
+#if USE_DIVAPPR_Q
+static void
+mpn_divappr_q (mp_ptr qp, mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
+{
+  gmp_pi1_t inv;
+  mp_limb_t qh;
+  ASSERT (dn > 2);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+
+  MPN_COPY (scratch, np, nn);
+  invert_pi1 (inv, dp[dn-1], dp[dn-2]);
+  if (BELOW_THRESHOLD (dn, DC_DIVAPPR_Q_THRESHOLD))
+    qh = mpn_sbpi1_divappr_q (qp, scratch, nn, dp, dn, inv.inv32);
+  else if (BELOW_THRESHOLD (dn, MU_DIVAPPR_Q_THRESHOLD))
+    qh = mpn_dcpi1_divappr_q (qp, scratch, nn, dp, dn, &inv);
+  else
+    {
+      mp_size_t itch = mpn_mu_divappr_q_itch (nn, dn, 0);
+      TMP_DECL;
+      TMP_MARK;
+      /* Sadly, scratch is too small. */
+      qh = mpn_mu_divappr_q (qp, np, nn, dp, dn, TMP_ALLOC_LIMBS (itch));
+      TMP_FREE;
+    }
+  qp [nn - dn] = qh;
+}
+#endif
+
+/* writes in {sp, n} the square root (rounded towards zero) of {np, 2n-odd},
+   returns zero if the operand was a perfect square, one otherwise.
+   Assumes {np, 2n-odd}*4^nsh is normalized, i.e. B > np[2n-1-odd]*4^nsh >= B/4
+   where B=2^GMP_NUMB_BITS.
+   THINK: In the odd case, three more (dummy) limbs are taken into account,
+   when nsh is maximal, two limbs are discarded from the result of the
+   division. Too much? Is a single dummy limb enough? */
+static int
+mpn_dc_sqrt (mp_ptr sp, mp_srcptr np, mp_size_t n, unsigned nsh, unsigned odd)
+{
+  mp_limb_t q;			/* carry out of {sp, n} */
+  int c;			/* carry out of remainder */
+  mp_size_t l, h;
+  mp_ptr qp, tp, scratch;
+  TMP_DECL;
+  TMP_MARK;
+
+  ASSERT (np[2 * n - 1 - odd] != 0);
+  ASSERT (n > 4);
+  ASSERT (nsh < GMP_NUMB_BITS / 2);
+
+  l = (n - 1) / 2;
+  h = n - l;
+  ASSERT (n >= l + 2 && l + 2 >= h && h > l && l >= 1 + odd);
+  scratch = TMP_ALLOC_LIMBS (l + 2 * n + 5 - USE_DIVAPPR_Q); /* n + 2-USE_DIVAPPR_Q */
+  tp = scratch + n + 2 - USE_DIVAPPR_Q; /* n + h + 1, but tp [-1] is writable */
+  if (nsh != 0)
+    {
+      /* o is used to exactly set the lowest bits of the dividend, is it needed? */
+      int o = l > (1 + odd);
+      ASSERT_NOCARRY (mpn_lshift (tp - o, np + l - 1 - o - odd, n + h + 1 + o, 2 * nsh));
+    }
+  else
+    MPN_COPY (tp, np + l - 1 - odd, n + h + 1);
+  q = mpn_dc_sqrtrem (sp + l, tp + l + 1, h, 0, scratch);
+  if (q != 0)
+    ASSERT_CARRY (mpn_sub_n (tp + l + 1, tp + l + 1, sp + l, h));
+  qp = tp + n + 1; /* l + 2 */
+  TRACE(printf("div(appr)_q(,,%u,,%u) -> %u \n", (unsigned) n+1, (unsigned) h, (unsigned) (n + 1 - h + 1)));
+#if USE_DIVAPPR_Q
+  mpn_divappr_q (qp, tp, n + 1, sp + l, h, scratch);
+#else
+  mpn_div_q (qp, tp, n + 1, sp + l, h, scratch);
+#endif
+  q += qp [l + 1];
+  c = 1;
+  if (q > 1)
+    {
+      /* FIXME: if s!=0 we will shift later, a noop on this area. */
+      MPN_FILL (sp, l, GMP_NUMB_MAX);
+    }
+  else
+    {
+      /* FIXME: if s!=0 we will shift again later, shift just once. */
+      mpn_rshift (sp, qp + 1, l, 1);
+      sp[l - 1] |= q << (GMP_NUMB_BITS - 1);
+      if (((qp[0] >> (2 + USE_DIVAPPR_Q)) | /* < 3 + 4*USE_DIVAPPR_Q */
+	   (qp[1] & (GMP_NUMB_MASK >> ((GMP_NUMB_BITS >> odd)- nsh - 1)))) == 0)
+	{
+	  mp_limb_t cy;
+	  /* Approximation is not good enough, the extra limb(+ nsh bits)
+	     is smaller than needed to absorb the possible error. */
+	  /* {qp + 1, l + 1} equals 2*{sp, l} */
+	  /* FIXME: use mullo or wrap-around, or directly evaluate
+	     remainder with a single sqrmod_bnm1. */
+	  TRACE(printf("mul(,,%u,,%u)\n", (unsigned) h, (unsigned) (l+1)));
+	  ASSERT_NOCARRY (mpn_mul (scratch, sp + l, h, qp + 1, l + 1));
+	  /* Compute the remainder of the previous mpn_div(appr)_q. */
+	  cy = mpn_sub_n (tp + 1, tp + 1, scratch, h);
+#if USE_DIVAPPR_Q || WANT_ASSERT
+	  MPN_DECR_U (tp + 1 + h, l, cy);
+#if USE_DIVAPPR_Q
+	  ASSERT (mpn_cmp (tp + 1 + h, scratch + h, l) <= 0);
+	  if (mpn_cmp (tp + 1 + h, scratch + h, l) < 0)
+	    {
+	      /* May happen only if div result was not exact. */
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1 || HAVE_NATIVE_mpn_addlsh1_n
+	      cy = mpn_addlsh1_n_ip1 (tp + 1, sp + l, h);
+#else
+	      cy = mpn_addmul_1 (tp + 1, sp + l, h, CNST_LIMB(2));
+#endif
+	      ASSERT_NOCARRY (mpn_add_1 (tp + 1 + h, tp + 1 + h, l, cy));
+	      MPN_DECR_U (sp, l, 1);
+	    }
+	  /* Can the root be exact when a correction was needed? We
+	     did not find an example, but it depends on divappr
+	     internals, and we can not assume it true in general...*/
+	  /* else */
+#else /* WANT_ASSERT */
+	  ASSERT (mpn_cmp (tp + 1 + h, scratch + h, l) == 0);
+#endif
+#endif
+	  if (mpn_zero_p (tp + l + 1, h - l))
+	    {
+	      TRACE(printf("sqr(,,%u)\n", (unsigned) l));
+	      mpn_sqr (scratch, sp, l);
+	      c = mpn_cmp (tp + 1, scratch + l, l);
+	      if (c == 0)
+		{
+		  if (nsh != 0)
+		    {
+		      mpn_lshift (tp, np, l, 2 * nsh);
+		      np = tp;
+		    }
+		  c = mpn_cmp (np, scratch + odd, l - odd);
+		}
+	      if (c < 0)
+		{
+		  MPN_DECR_U (sp, l, 1);
+		  c = 1;
+		}
+	    }
+	}
+    }
+  TMP_FREE;
+
+  if ((odd | nsh) != 0)
+    mpn_rshift (sp, sp, n, nsh + (odd ? GMP_NUMB_BITS / 2 : 0));
+  return c;
+}
+
+
+mp_size_t
+mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nn)
+{
+  mp_limb_t cc, high, rl;
+  int c;
+  mp_size_t rn, tn;
+  TMP_DECL;
+
+  ASSERT (nn > 0);
+  ASSERT_MPN (np, nn);
+
+  ASSERT (np[nn - 1] != 0);
+  ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nn));
+  ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nn + 1) / 2, rp, nn));
+  ASSERT (! MPN_OVERLAP_P (sp, (nn + 1) / 2, np, nn));
+
+  high = np[nn - 1];
+  if (high & (GMP_NUMB_HIGHBIT | (GMP_NUMB_HIGHBIT / 2)))
+    c = 0;
+  else
+    {
+      count_leading_zeros (c, high);
+      c -= GMP_NAIL_BITS;
+
+      c = c / 2; /* we have to shift left by 2c bits to normalize {np, nn} */
+    }
+  if (nn == 1) {
+    if (c == 0)
+      {
+	sp[0] = mpn_sqrtrem1 (&rl, high);
+	if (rp != NULL)
+	  rp[0] = rl;
+      }
+    else
+      {
+	cc = mpn_sqrtrem1 (&rl, high << (2*c)) >> c;
+	sp[0] = cc;
+	if (rp != NULL)
+	  rp[0] = rl = high - cc*cc;
+      }
+    return rl != 0;
+  }
+  if (nn == 2) {
+    mp_limb_t tp [2];
+    if (rp == NULL) rp = tp;
+    if (c == 0)
+      {
+#if SQRTREM2_INPLACE
+	rp[1] = high;
+	rp[0] = np[0];
+	cc = CALL_SQRTREM2_INPLACE (sp, rp);
+#else
+	cc = mpn_sqrtrem2 (sp, rp, np);
+#endif
+	rp[1] = cc;
+	return ((rp[0] | cc) != 0) + cc;
+      }
+    else
+      {
+	rl = np[0];
+	rp[1] = (high << (2*c)) | (rl >> (GMP_NUMB_BITS - 2*c));
+	rp[0] = rl << (2*c);
+	CALL_SQRTREM2_INPLACE (sp, rp);
+	cc = sp[0] >>= c;	/* c != 0, the highest bit of the root cc is 0. */
+	rp[0] = rl -= cc*cc;	/* Computed modulo 2^GMP_LIMB_BITS, because it's smaller. */
+	return rl != 0;
+      }
+  }
+  tn = (nn + 1) / 2; /* 2*tn is the smallest even integer >= nn */
+
+  if ((rp == NULL) && (nn > 8))
+    return mpn_dc_sqrt (sp, np, tn, c, nn & 1);
+  TMP_MARK;
+  if (((nn & 1) | c) != 0)
+    {
+      mp_limb_t s0[1], mask;
+      mp_ptr tp, scratch;
+      TMP_ALLOC_LIMBS_2 (tp, 2 * tn, scratch, tn / 2 + 1);
+      tp[0] = 0;	     /* needed only when 2*tn > nn, but saves a test */
+      if (c != 0)
+	mpn_lshift (tp + (nn & 1), np, nn, 2 * c);
+      else
+	MPN_COPY (tp + (nn & 1), np, nn);
+      c += (nn & 1) ? GMP_NUMB_BITS / 2 : 0;		/* c now represents k */
+      mask = (CNST_LIMB (1) << c) - 1;
+      rl = mpn_dc_sqrtrem (sp, tp, tn, (rp == NULL) ? mask - 1 : 0, scratch);
+      /* We have 2^(2k)*N = S^2 + R where k = c + (2tn-nn)*GMP_NUMB_BITS/2,
+	 thus 2^(2k)*N = (S-s0)^2 + 2*S*s0 - s0^2 + R where s0=S mod 2^k */
+      s0[0] = sp[0] & mask;	/* S mod 2^k */
+      rl += mpn_addmul_1 (tp, sp, tn, 2 * s0[0]);	/* R = R + 2*s0*S */
+      cc = mpn_submul_1 (tp, s0, 1, s0[0]);
+      rl -= (tn > 1) ? mpn_sub_1 (tp + 1, tp + 1, tn - 1, cc) : cc;
+      mpn_rshift (sp, sp, tn, c);
+      tp[tn] = rl;
+      if (rp == NULL)
+	rp = tp;
+      c = c << 1;
+      if (c < GMP_NUMB_BITS)
+	tn++;
+      else
+	{
+	  tp++;
+	  c -= GMP_NUMB_BITS;
+	}
+      if (c != 0)
+	mpn_rshift (rp, tp, tn, c);
+      else
+	MPN_COPY_INCR (rp, tp, tn);
+      rn = tn;
+    }
+  else
+    {
+      if (rp != np)
+	{
+	  if (rp == NULL) /* nn <= 8 */
+	    rp = TMP_SALLOC_LIMBS (nn);
+	  MPN_COPY (rp, np, nn);
+	}
+      rn = tn + (rp[tn] = mpn_dc_sqrtrem (sp, rp, tn, 0, TMP_ALLOC_LIMBS(tn / 2 + 1)));
+    }
+
+  MPN_NORMALIZE (rp, rn);
+
+  TMP_FREE;
+  return rn;
+}

diff --git a/third_party/gmp/mpn/generic/strongfibo.c b/third_party/gmp/mpn/generic/strongfibo.c
new file mode 100644
index 0000000..ffd038a
--- /dev/null
+++ b/third_party/gmp/mpn/generic/strongfibo.c

@@ -0,0 +1,216 @@
+/* mpn_fib2m -- calculate Fibonacci numbers, modulo m.
+
+Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2005, 2009, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+/* Stores |{ap,n}-{bp,n}| in {rp,n},
+   returns the sign of {ap,n}-{bp,n}. */
+static int
+abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  mp_limb_t  x, y;
+  while (--n >= 0)
+    {
+      x = ap[n];
+      y = bp[n];
+      if (x != y)
+        {
+          ++n;
+          if (x > y)
+            {
+              ASSERT_NOCARRY (mpn_sub_n (rp, ap, bp, n));
+              return 1;
+            }
+          else
+            {
+              ASSERT_NOCARRY (mpn_sub_n (rp, bp, ap, n));
+              return -1;
+            }
+        }
+      rp[n] = 0;
+    }
+  return 0;
+}
+
+/* Computes at most count terms of the sequence needed by the
+   Lucas-Lehmer-Riesel test, indexing backward:
+   L_i = L_{i+1}^2 - 2
+
+   The sequence is computed modulo M = {mp, mn}.
+   The starting point is given in L_{count+1} = {lp, mn}.
+   The scratch pointed by sp, needs a space of at least 3 * mn + 1 limbs.
+
+   Returns the index i>0 if L_i = 0 (mod M) is found within the
+   computed count terms of the sequence.  Otherwise it returns zero.
+
+   Note: (+/-2)^2-2=2, (+/-1)^2-2=-1, 0^2-2=-2
+ */
+
+static mp_bitcnt_t
+mpn_llriter (mp_ptr lp, mp_srcptr mp, mp_size_t mn, mp_bitcnt_t count, mp_ptr sp)
+{
+  do
+    {
+      mpn_sqr (sp, lp, mn);
+      mpn_tdiv_qr (sp + 2 * mn, lp, 0, sp, 2 * mn, mp, mn);
+      if (lp[0] < 5)
+	{
+	  /* If L^2 % M < 5, |L^2 % M - 2| <= 2 */
+	  if (mn == 1 || mpn_zero_p (lp + 1, mn - 1))
+	    return (lp[0] == 2) ? count : 0;
+	  else
+	    MPN_DECR_U (lp, mn, 2);
+	}
+      else
+	lp[0] -= 2;
+    } while (--count != 0);
+  return 0;
+}
+
+/* Store the Lucas' number L[n] at lp (maybe), computed modulo m.  lp
+   and scratch should have room for mn*2+1 limbs.
+
+   Returns the size of L[n] normally.
+
+   If F[n] is zero modulo m, or L[n] is, returns 0 and lp is
+   undefined.
+*/
+
+static mp_size_t
+mpn_lucm (mp_ptr lp, mp_srcptr np, mp_size_t nn, mp_srcptr mp, mp_size_t mn, mp_ptr scratch)
+{
+  int		neg;
+  mp_limb_t	cy;
+
+  ASSERT (! MPN_OVERLAP_P (lp, MAX(2*mn+1,5), scratch, MAX(2*mn+1,5)));
+  ASSERT (nn > 0);
+
+  neg = mpn_fib2m (lp, scratch, np, nn, mp, mn);
+
+  /* F[n] = +/-{lp, mn}, F[n-1] = +/-{scratch, mn} */
+  if (mpn_zero_p (lp, mn))
+    return 0;
+
+  if (neg) /* One sign is opposite, use sub instead of add. */
+    {
+#if HAVE_NATIVE_mpn_rsblsh1_n || HAVE_NATIVE_mpn_sublsh1_n
+#if HAVE_NATIVE_mpn_rsblsh1_n
+      cy = mpn_rsblsh1_n (lp, lp, scratch, mn); /* L[n] = +/-(2F[n-1]-(-F[n])) */
+#else
+      cy = mpn_sublsh1_n (lp, lp, scratch, mn); /* L[n] = -/+(F[n]-(-2F[n-1])) */
+      if (cy != 0)
+	cy = mpn_add_n (lp, lp, mp, mn) - cy;
+#endif
+      if (cy > 1)
+	cy += mpn_add_n (lp, lp, mp, mn);
+#else
+      cy = mpn_lshift (scratch, scratch, mn, 1); /* 2F[n-1] */
+      if (UNLIKELY (cy))
+	cy -= mpn_sub_n (lp, scratch, lp, mn); /* L[n] = +/-(2F[n-1]-(-F[n])) */
+      else
+	abs_sub_n (lp, lp, scratch, mn);
+#endif
+      ASSERT (cy <= 1);
+    }
+  else
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n
+      cy = mpn_addlsh1_n (lp, lp, scratch, mn); /* L[n] = +/-(2F[n-1]+F[n])) */
+#else
+      cy = mpn_lshift (scratch, scratch, mn, 1);
+      cy+= mpn_add_n (lp, lp, scratch, mn);
+#endif
+      ASSERT (cy <= 2);
+    }
+  while (cy || mpn_cmp (lp, mp, mn) >= 0)
+    cy -= mpn_sub_n (lp, lp, mp, mn);
+  MPN_NORMALIZE (lp, mn);
+  return mn;
+}
+
+int
+mpn_strongfibo (mp_srcptr mp, mp_size_t mn, mp_ptr scratch)
+{
+  mp_ptr	lp, sp;
+  mp_size_t	en;
+  mp_bitcnt_t	b0;
+  TMP_DECL;
+
+#if GMP_NUMB_BITS % 4 == 0
+  b0 = mpn_scan0 (mp, 0);
+#else
+  {
+    mpz_t m = MPZ_ROINIT_N(mp, mn);
+    b0 = mpz_scan0 (m, 0);
+  }
+  if (UNLIKELY (b0 == mn * GMP_NUMB_BITS))
+    {
+      en = 1;
+      scratch [0] = 1;
+    }
+  else
+#endif
+    {
+      int cnt = b0 % GMP_NUMB_BITS;
+      en = b0 / GMP_NUMB_BITS;
+      if (LIKELY (cnt != 0))
+	mpn_rshift (scratch, mp + en, mn - en, cnt);
+      else
+	MPN_COPY (scratch, mp + en, mn - en);
+      en = mn - en;
+      scratch [0] |= 1;
+      en -= scratch [en - 1] == 0;
+    }
+  TMP_MARK;
+
+  lp = TMP_ALLOC_LIMBS (4 * mn + 6);
+  sp = lp + 2 * mn + 3;
+  en = mpn_lucm (sp, scratch, en, mp, mn, lp);
+  if (en != 0 && LIKELY (--b0 != 0))
+    {
+      mpn_sqr (lp, sp, en);
+      lp [0] |= 2; /* V^2 + 2 */
+      if (LIKELY (2 * en >= mn))
+	mpn_tdiv_qr (sp, lp, 0, lp, 2 * en, mp, mn);
+      else
+	MPN_ZERO (lp + 2 * en, mn - 2 * en);
+      if (! mpn_zero_p (lp, mn) && LIKELY (--b0 != 0))
+	b0 = mpn_llriter (lp, mp, mn, b0, lp + mn + 1);
+    }
+  TMP_FREE;
+  return (b0 != 0);
+}

diff --git a/third_party/gmp/mpn/generic/sub.c b/third_party/gmp/mpn/generic/sub.c
new file mode 100644
index 0000000..df0afd6
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sub.c

@@ -0,0 +1,33 @@
+/* mpn_sub - subtract mpn from mpn.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_sub 1
+
+#include "gmp-impl.h"

diff --git a/third_party/gmp/mpn/generic/sub_1.c b/third_party/gmp/mpn/generic/sub_1.c
new file mode 100644
index 0000000..a20f191
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sub_1.c

@@ -0,0 +1,33 @@
+/* mpn_sub_1 - subtract limb from mpn.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_sub_1 1
+
+#include "gmp-impl.h"

diff --git a/third_party/gmp/mpn/generic/sub_err1_n.c b/third_party/gmp/mpn/generic/sub_err1_n.c
new file mode 100644
index 0000000..beca57e
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sub_err1_n.c

@@ -0,0 +1,100 @@
+/* mpn_sub_err1_n -- sub_n with one error term
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/*
+  Computes:
+
+  (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
+  return value is borrow out.
+
+  (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
+  Computes c[1]*yp[n-1] + ... + c[n]*yp[0], stores two-limb result at ep.
+
+  Requires n >= 1.
+
+  None of the outputs may overlap each other or any of the inputs, except
+  that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_sub_err1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+		mp_ptr ep, mp_srcptr yp,
+                mp_size_t n, mp_limb_t cy)
+{
+  mp_limb_t el, eh, ul, vl, yl, zl, rl, sl, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, up, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, vp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, yp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, rp, n));
+
+  yp += n - 1;
+  el = eh = 0;
+
+  do
+    {
+      yl = *yp--;
+      ul = *up++;
+      vl = *vp++;
+
+      /* ordinary sub_n */
+      SUBC_LIMB (cy1, sl, ul, vl);
+      SUBC_LIMB (cy2, rl, sl, cy);
+      cy = cy1 | cy2;
+      *rp++ = rl;
+
+      /* update (eh:el) */
+      zl = (-cy) & yl;
+      el += zl;
+      eh += el < zl;
+    }
+  while (--n);
+
+#if GMP_NAIL_BITS != 0
+  eh = (eh << GMP_NAIL_BITS) + (el >> GMP_NUMB_BITS);
+  el &= GMP_NUMB_MASK;
+#endif
+
+  ep[0] = el;
+  ep[1] = eh;
+
+  return cy;
+}

diff --git a/third_party/gmp/mpn/generic/sub_err2_n.c b/third_party/gmp/mpn/generic/sub_err2_n.c
new file mode 100644
index 0000000..1edf8d6
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sub_err2_n.c

@@ -0,0 +1,116 @@
+/* mpn_sub_err2_n -- sub_n with two error terms
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/*
+  Computes:
+
+  (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
+  return value is borrow out.
+
+  (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
+  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+  stores two-limb results at {ep,2} and {ep+2,2} respectively.
+
+  Requires n >= 1.
+
+  None of the outputs may overlap each other or any of the inputs, except
+  that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_sub_err2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2,
+                mp_size_t n, mp_limb_t cy)
+{
+  mp_limb_t el1, eh1, el2, eh2, ul, vl, yl1, yl2, zl1, zl2, rl, sl, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, up, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, vp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, rp, n));
+
+  yp1 += n - 1;
+  yp2 += n - 1;
+  el1 = eh1 = 0;
+  el2 = eh2 = 0;
+
+  do
+    {
+      yl1 = *yp1--;
+      yl2 = *yp2--;
+      ul = *up++;
+      vl = *vp++;
+
+      /* ordinary sub_n */
+      SUBC_LIMB (cy1, sl, ul, vl);
+      SUBC_LIMB (cy2, rl, sl, cy);
+      cy = cy1 | cy2;
+      *rp++ = rl;
+
+      /* update (eh1:el1) */
+      zl1 = (-cy) & yl1;
+      el1 += zl1;
+      eh1 += el1 < zl1;
+
+      /* update (eh2:el2) */
+      zl2 = (-cy) & yl2;
+      el2 += zl2;
+      eh2 += el2 < zl2;
+    }
+  while (--n);
+
+#if GMP_NAIL_BITS != 0
+  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+  el1 &= GMP_NUMB_MASK;
+  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+  el2 &= GMP_NUMB_MASK;
+#endif
+
+  ep[0] = el1;
+  ep[1] = eh1;
+  ep[2] = el2;
+  ep[3] = eh2;
+
+  return cy;
+}

diff --git a/third_party/gmp/mpn/generic/sub_err3_n.c b/third_party/gmp/mpn/generic/sub_err3_n.c
new file mode 100644
index 0000000..2db3c63
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sub_err3_n.c

@@ -0,0 +1,131 @@
+/* mpn_sub_err3_n -- sub_n with three error terms
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/*
+  Computes:
+
+  (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
+  return value is borrow out.
+
+  (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
+  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+           c[1]*yp3[n-1] + ... + c[n]*yp3[0],
+  stores two-limb results at {ep,2}, {ep+2,2} and {ep+4,2} respectively.
+
+  Requires n >= 1.
+
+  None of the outputs may overlap each other or any of the inputs, except
+  that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_sub_err3_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2, mp_srcptr yp3,
+                mp_size_t n, mp_limb_t cy)
+{
+  mp_limb_t el1, eh1, el2, eh2, el3, eh3, ul, vl, yl1, yl2, yl3, zl1, zl2, zl3, rl, sl, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp3, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, up, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, vp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, yp3, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, rp, n));
+
+  yp1 += n - 1;
+  yp2 += n - 1;
+  yp3 += n - 1;
+  el1 = eh1 = 0;
+  el2 = eh2 = 0;
+  el3 = eh3 = 0;
+
+  do
+    {
+      yl1 = *yp1--;
+      yl2 = *yp2--;
+      yl3 = *yp3--;
+      ul = *up++;
+      vl = *vp++;
+
+      /* ordinary sub_n */
+      SUBC_LIMB (cy1, sl, ul, vl);
+      SUBC_LIMB (cy2, rl, sl, cy);
+      cy = cy1 | cy2;
+      *rp++ = rl;
+
+      /* update (eh1:el1) */
+      zl1 = (-cy) & yl1;
+      el1 += zl1;
+      eh1 += el1 < zl1;
+
+      /* update (eh2:el2) */
+      zl2 = (-cy) & yl2;
+      el2 += zl2;
+      eh2 += el2 < zl2;
+
+      /* update (eh3:el3) */
+      zl3 = (-cy) & yl3;
+      el3 += zl3;
+      eh3 += el3 < zl3;
+    }
+  while (--n);
+
+#if GMP_NAIL_BITS != 0
+  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+  el1 &= GMP_NUMB_MASK;
+  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+  el2 &= GMP_NUMB_MASK;
+  eh3 = (eh3 << GMP_NAIL_BITS) + (el3 >> GMP_NUMB_BITS);
+  el3 &= GMP_NUMB_MASK;
+#endif
+
+  ep[0] = el1;
+  ep[1] = eh1;
+  ep[2] = el2;
+  ep[3] = eh2;
+  ep[4] = el3;
+  ep[5] = eh3;
+
+  return cy;
+}

diff --git a/third_party/gmp/mpn/generic/sub_n.c b/third_party/gmp/mpn/generic/sub_n.c
new file mode 100644
index 0000000..b192c96
--- /dev/null
+++ b/third_party/gmp/mpn/generic/sub_n.c

@@ -0,0 +1,89 @@
+/* mpn_sub_n -- Subtract equal length limb vectors.
+
+Copyright 1992-1994, 1996, 2000, 2002, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++;
+      sl = ul - vl;
+      cy1 = sl > ul;
+      rl = sl - cy;
+      cy2 = rl > sl;
+      cy = cy1 | cy2;
+      *rp++ = rl;
+    }
+  while (--n != 0);
+
+  return cy;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 1
+
+mp_limb_t
+mpn_sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t ul, vl, rl, cy;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++;
+      rl = ul - vl - cy;
+      cy = rl >> (GMP_LIMB_BITS - 1);
+      *rp++ = rl & GMP_NUMB_MASK;
+    }
+  while (--n != 0);
+
+  return cy;
+}
+
+#endif

diff --git a/third_party/gmp/mpn/generic/submul_1.c b/third_party/gmp/mpn/generic/submul_1.c
new file mode 100644
index 0000000..4744274
--- /dev/null
+++ b/third_party/gmp/mpn/generic/submul_1.c

@@ -0,0 +1,144 @@
+/* mpn_submul_1 -- multiply the N long limb vector pointed to by UP by VL,
+   subtract the N least significant limbs of the product from the limb
+   vector pointed to by RP.  Return the most significant limb of the
+   product, adjusted for carry-out from the subtraction.
+
+Copyright 1992-1994, 1996, 2000, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
+{
+  mp_limb_t u0, crec, c, p1, p0, r0;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+
+  crec = 0;
+  do
+    {
+      u0 = *up++;
+      umul_ppmm (p1, p0, u0, v0);
+
+      r0 = *rp;
+
+      p0 = r0 - p0;
+      c = r0 < p0;
+
+      p1 = p1 + c;
+
+      r0 = p0 - crec;		/* cycle 0, 3, ... */
+      c = p0 < r0;		/* cycle 1, 4, ... */
+
+      crec = p1 + c;		/* cycle 2, 5, ... */
+
+      *rp++ = r0;
+    }
+  while (--n != 0);
+
+  return crec;
+}
+
+#endif
+
+#if GMP_NAIL_BITS == 1
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
+{
+  mp_limb_t shifted_v0, u0, r0, p0, p1, prev_p1, cl, xl, c1, c2, c3;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT_MPN (rp, n);
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (v0);
+
+  shifted_v0 = v0 << GMP_NAIL_BITS;
+  cl = 0;
+  prev_p1 = 0;
+  do
+    {
+      u0 = *up++;
+      r0 = *rp;
+      umul_ppmm (p1, p0, u0, shifted_v0);
+      p0 >>= GMP_NAIL_BITS;
+      SUBC_LIMB (c1, xl, r0, prev_p1);
+      SUBC_LIMB (c2, xl, xl, p0);
+      SUBC_LIMB (c3, xl, xl, cl);
+      cl = c1 + c2 + c3;
+      *rp++ = xl;
+      prev_p1 = p1;
+    }
+  while (--n != 0);
+
+  return prev_p1 + cl;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 2
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
+{
+  mp_limb_t shifted_v0, u0, r0, p0, p1, prev_p1, xw, cl, xl;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT_MPN (rp, n);
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (v0);
+
+  shifted_v0 = v0 << GMP_NAIL_BITS;
+  cl = 0;
+  prev_p1 = 0;
+  do
+    {
+      u0 = *up++;
+      r0 = *rp;
+      umul_ppmm (p1, p0, u0, shifted_v0);
+      p0 >>= GMP_NAIL_BITS;
+      xw = r0 - (prev_p1 + p0) + cl;
+      cl = (mp_limb_signed_t) xw >> GMP_NUMB_BITS; /* FIXME: non-portable */
+      xl = xw & GMP_NUMB_MASK;
+      *rp++ = xl;
+      prev_p1 = p1;
+    }
+  while (--n != 0);
+
+  return prev_p1 - cl;
+}
+
+#endif

diff --git a/third_party/gmp/mpn/generic/tdiv_qr.c b/third_party/gmp/mpn/generic/tdiv_qr.c
new file mode 100644
index 0000000..92ff33c
--- /dev/null
+++ b/third_party/gmp/mpn/generic/tdiv_qr.c

@@ -0,0 +1,386 @@
+/* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
+   write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
+   qxn is non-zero, generate that many fraction limbs and append them after the
+   other quotient limbs, and update the remainder accordingly.  The input
+   operands are unaffected.
+
+   Preconditions:
+   1. The most significant limb of the divisor must be non-zero.
+   2. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
+
+   The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
+   complexity of multiplication.
+
+Copyright 1997, 2000-2002, 2005, 2009, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+void
+mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
+	     mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
+{
+  ASSERT_ALWAYS (qxn == 0);
+
+  ASSERT (nn >= 0);
+  ASSERT (dn >= 0);
+  ASSERT (dn == 0 || dp[dn - 1] != 0);
+  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));
+
+  switch (dn)
+    {
+    case 0:
+      DIVIDE_BY_ZERO;
+
+    case 1:
+      {
+	rp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
+	return;
+      }
+
+    case 2:
+      {
+	mp_ptr n2p;
+	mp_limb_t qhl, cy;
+	TMP_DECL;
+	TMP_MARK;
+	if ((dp[1] & GMP_NUMB_HIGHBIT) == 0)
+	  {
+	    int cnt;
+	    mp_limb_t d2p[2];
+	    count_leading_zeros (cnt, dp[1]);
+	    cnt -= GMP_NAIL_BITS;
+	    d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));
+	    d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;
+	    n2p = TMP_ALLOC_LIMBS (nn + 1);
+	    cy = mpn_lshift (n2p, np, nn, cnt);
+	    n2p[nn] = cy;
+	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
+	    if (cy == 0)
+	      qp[nn - 2] = qhl;	/* always store nn-2+1 quotient limbs */
+	    rp[0] = (n2p[0] >> cnt)
+	      | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK);
+	    rp[1] = (n2p[1] >> cnt);
+	  }
+	else
+	  {
+	    n2p = TMP_ALLOC_LIMBS (nn);
+	    MPN_COPY (n2p, np, nn);
+	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn, dp);
+	    qp[nn - 2] = qhl;	/* always store nn-2+1 quotient limbs */
+	    rp[0] = n2p[0];
+	    rp[1] = n2p[1];
+	  }
+	TMP_FREE;
+	return;
+      }
+
+    default:
+      {
+	int adjust;
+	gmp_pi1_t dinv;
+	TMP_DECL;
+	TMP_MARK;
+	adjust = np[nn - 1] >= dp[dn - 1];	/* conservative tests for quotient size */
+	if (nn + adjust >= 2 * dn)
+	  {
+	    mp_ptr n2p, d2p;
+	    mp_limb_t cy;
+	    int cnt;
+
+	    qp[nn - dn] = 0;			  /* zero high quotient limb */
+	    if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */
+	      {
+		count_leading_zeros (cnt, dp[dn - 1]);
+		cnt -= GMP_NAIL_BITS;
+		d2p = TMP_ALLOC_LIMBS (dn);
+		mpn_lshift (d2p, dp, dn, cnt);
+		n2p = TMP_ALLOC_LIMBS (nn + 1);
+		cy = mpn_lshift (n2p, np, nn, cnt);
+		n2p[nn] = cy;
+		nn += adjust;
+	      }
+	    else
+	      {
+		cnt = 0;
+		d2p = (mp_ptr) dp;
+		n2p = TMP_ALLOC_LIMBS (nn + 1);
+		MPN_COPY (n2p, np, nn);
+		n2p[nn] = 0;
+		nn += adjust;
+	      }
+
+	    invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);
+	    if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD))
+	      mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32);
+	    else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */
+		     BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
+		     (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
+		     + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */
+	      mpn_dcpi1_div_qr (qp, n2p, nn, d2p, dn, &dinv);
+	    else
+	      {
+		mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
+		mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+		mpn_mu_div_qr (qp, rp, n2p, nn, d2p, dn, scratch);
+		n2p = rp;
+	      }
+
+	    if (cnt != 0)
+	      mpn_rshift (rp, n2p, dn, cnt);
+	    else
+	      MPN_COPY (rp, n2p, dn);
+	    TMP_FREE;
+	    return;
+	  }
+
+	/* When we come here, the numerator/partial remainder is less
+	   than twice the size of the denominator.  */
+
+	  {
+	    /* Problem:
+
+	       Divide a numerator N with nn limbs by a denominator D with dn
+	       limbs forming a quotient of qn=nn-dn+1 limbs.  When qn is small
+	       compared to dn, conventional division algorithms perform poorly.
+	       We want an algorithm that has an expected running time that is
+	       dependent only on qn.
+
+	       Algorithm (very informally stated):
+
+	       1) Divide the 2 x qn most significant limbs from the numerator
+		  by the qn most significant limbs from the denominator.  Call
+		  the result qest.  This is either the correct quotient, but
+		  might be 1 or 2 too large.  Compute the remainder from the
+		  division.  (This step is implemented by an mpn_divrem call.)
+
+	       2) Is the most significant limb from the remainder < p, where p
+		  is the product of the most significant limb from the quotient
+		  and the next(d)?  (Next(d) denotes the next ignored limb from
+		  the denominator.)  If it is, decrement qest, and adjust the
+		  remainder accordingly.
+
+	       3) Is the remainder >= qest?  If it is, qest is the desired
+		  quotient.  The algorithm terminates.
+
+	       4) Subtract qest x next(d) from the remainder.  If there is
+		  borrow out, decrement qest, and adjust the remainder
+		  accordingly.
+
+	       5) Skip one word from the denominator (i.e., let next(d) denote
+		  the next less significant limb.  */
+
+	    mp_size_t qn;
+	    mp_ptr n2p, d2p;
+	    mp_ptr tp;
+	    mp_limb_t cy;
+	    mp_size_t in, rn;
+	    mp_limb_t quotient_too_large;
+	    unsigned int cnt;
+
+	    qn = nn - dn;
+	    qp[qn] = 0;				/* zero high quotient limb */
+	    qn += adjust;			/* qn cannot become bigger */
+
+	    if (qn == 0)
+	      {
+		MPN_COPY (rp, np, dn);
+		TMP_FREE;
+		return;
+	      }
+
+	    in = dn - qn;		/* (at least partially) ignored # of limbs in ops */
+	    /* Normalize denominator by shifting it to the left such that its
+	       most significant bit is set.  Then shift the numerator the same
+	       amount, to mathematically preserve quotient.  */
+	    if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0)
+	      {
+		count_leading_zeros (cnt, dp[dn - 1]);
+		cnt -= GMP_NAIL_BITS;
+
+		d2p = TMP_ALLOC_LIMBS (qn);
+		mpn_lshift (d2p, dp + in, qn, cnt);
+		d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);
+
+		n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
+		cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
+		if (adjust)
+		  {
+		    n2p[2 * qn] = cy;
+		    n2p++;
+		  }
+		else
+		  {
+		    n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt);
+		  }
+	      }
+	    else
+	      {
+		cnt = 0;
+		d2p = (mp_ptr) dp + in;
+
+		n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
+		MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
+		if (adjust)
+		  {
+		    n2p[2 * qn] = 0;
+		    n2p++;
+		  }
+	      }
+
+	    /* Get an approximate quotient using the extracted operands.  */
+	    if (qn == 1)
+	      {
+		mp_limb_t q0, r0;
+		udiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS);
+		n2p[0] = r0 >> GMP_NAIL_BITS;
+		qp[0] = q0;
+	      }
+	    else if (qn == 2)
+	      mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */
+	    else
+	      {
+		invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]);
+		if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+		  mpn_sbpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv.inv32);
+		else if (BELOW_THRESHOLD (qn, MU_DIV_QR_THRESHOLD))
+		  mpn_dcpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, &dinv);
+		else
+		  {
+		    mp_size_t itch = mpn_mu_div_qr_itch (2 * qn, qn, 0);
+		    mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+		    mp_ptr r2p = rp;
+		    if (np == r2p)	/* If N and R share space, put ... */
+		      r2p += nn - qn;	/* intermediate remainder at N's upper end. */
+		    mpn_mu_div_qr (qp, r2p, n2p, 2 * qn, d2p, qn, scratch);
+		    MPN_COPY (n2p, r2p, qn);
+		  }
+	      }
+
+	    rn = qn;
+	    /* Multiply the first ignored divisor limb by the most significant
+	       quotient limb.  If that product is > the partial remainder's
+	       most significant limb, we know the quotient is too large.  This
+	       test quickly catches most cases where the quotient is too large;
+	       it catches all cases where the quotient is 2 too large.  */
+	    {
+	      mp_limb_t dl, x;
+	      mp_limb_t h, dummy;
+
+	      if (in - 2 < 0)
+		dl = 0;
+	      else
+		dl = dp[in - 2];
+
+#if GMP_NAIL_BITS == 0
+	      x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS));
+#else
+	      x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;
+	      if (cnt != 0)
+		x |= dl >> (GMP_NUMB_BITS - cnt);
+#endif
+	      umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS);
+
+	      if (n2p[qn - 1] < h)
+		{
+		  mp_limb_t cy;
+
+		  mpn_decr_u (qp, (mp_limb_t) 1);
+		  cy = mpn_add_n (n2p, n2p, d2p, qn);
+		  if (cy)
+		    {
+		      /* The partial remainder is safely large.  */
+		      n2p[qn] = cy;
+		      ++rn;
+		    }
+		}
+	    }
+
+	    quotient_too_large = 0;
+	    if (cnt != 0)
+	      {
+		mp_limb_t cy1, cy2;
+
+		/* Append partially used numerator limb to partial remainder.  */
+		cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt);
+		n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt);
+
+		/* Update partial remainder with partially used divisor limb.  */
+		cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt));
+		if (qn != rn)
+		  {
+		    ASSERT_ALWAYS (n2p[qn] >= cy2);
+		    n2p[qn] -= cy2;
+		  }
+		else
+		  {
+		    n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */
+
+		    quotient_too_large = (cy1 < cy2);
+		    ++rn;
+		  }
+		--in;
+	      }
+	    /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
+
+	    tp = TMP_ALLOC_LIMBS (dn);
+
+	    if (in < qn)
+	      {
+		if (in == 0)
+		  {
+		    MPN_COPY (rp, n2p, rn);
+		    ASSERT_ALWAYS (rn == dn);
+		    goto foo;
+		  }
+		mpn_mul (tp, qp, qn, dp, in);
+	      }
+	    else
+	      mpn_mul (tp, dp, in, qp, qn);
+
+	    cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
+	    MPN_COPY (rp + in, n2p, dn - in);
+	    quotient_too_large |= cy;
+	    cy = mpn_sub_n (rp, np, tp, in);
+	    cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
+	    quotient_too_large |= cy;
+	  foo:
+	    if (quotient_too_large)
+	      {
+		mpn_decr_u (qp, (mp_limb_t) 1);
+		mpn_add_n (rp, rp, dp, dn);
+	      }
+	  }
+	TMP_FREE;
+	return;
+      }
+    }
+}

diff --git a/third_party/gmp/mpn/generic/toom22_mul.c b/third_party/gmp/mpn/generic/toom22_mul.c
new file mode 100644
index 0000000..64f024a
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom22_mul.c

@@ -0,0 +1,221 @@
+/* mpn_toom22_mul -- Multiply {ap,an} and {bp,bn} where an >= bn.  Or more
+   accurately, bn <= an < 2bn.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2010, 2012, 2014, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +inf
+
+  <-s--><--n-->
+   ____ ______
+  |_a1_|___a0_|
+   |b1_|___b0_|
+   <-t-><--n-->
+
+  v0  =  a0     * b0       #   A(0)*B(0)
+  vm1 = (a0- a1)*(b0- b1)  #  A(-1)*B(-1)
+  vinf=      a1 *     b1   # A(inf)*B(inf)
+*/
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_mul_toom22   1
+#else
+#define MAYBE_mul_toom22						\
+  (MUL_TOOM33_THRESHOLD >= 2 * MUL_TOOM22_THRESHOLD)
+#endif
+
+#define TOOM22_MUL_N_REC(p, a, b, n, ws)				\
+  do {									\
+    if (! MAYBE_mul_toom22						\
+	|| BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))			\
+      mpn_mul_basecase (p, a, n, b, n);					\
+    else								\
+      mpn_toom22_mul (p, a, n, b, n, ws);				\
+  } while (0)
+
+/* Normally, this calls mul_basecase or toom22_mul.  But when when the fraction
+   MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD is large, an initially small
+   relative unbalance will become a larger and larger relative unbalance with
+   each recursion (the difference s-t will be invariant over recursive calls).
+   Therefore, we need to call toom32_mul.  FIXME: Suppress depending on
+   MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD and on MUL_TOOM22_THRESHOLD.  */
+#define TOOM22_MUL_REC(p, a, an, b, bn, ws)				\
+  do {									\
+    if (! MAYBE_mul_toom22						\
+	|| BELOW_THRESHOLD (bn, MUL_TOOM22_THRESHOLD))			\
+      mpn_mul_basecase (p, a, an, b, bn);				\
+    else if (4 * an < 5 * bn)						\
+      mpn_toom22_mul (p, a, an, b, bn, ws);				\
+    else								\
+      mpn_toom32_mul (p, a, an, b, bn, ws);				\
+  } while (0)
+
+void
+mpn_toom22_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn,
+		mp_ptr scratch)
+{
+  const int __gmpn_cpuvec_initialized = 1;
+  mp_size_t n, s, t;
+  int vm1_neg;
+  mp_limb_t cy, cy2;
+  mp_ptr asm1;
+  mp_ptr bsm1;
+
+#define a0  ap
+#define a1  (ap + n)
+#define b0  bp
+#define b1  (bp + n)
+
+  s = an >> 1;
+  n = an - s;
+  t = bn - n;
+
+  ASSERT (an >= bn);
+
+  ASSERT (0 < s && s <= n && s >= n - 1);
+  ASSERT (0 < t && t <= s);
+
+  asm1 = pp;
+  bsm1 = pp + n;
+
+  vm1_neg = 0;
+
+  /* Compute asm1.  */
+  if (s == n)
+    {
+      if (mpn_cmp (a0, a1, n) < 0)
+	{
+	  mpn_sub_n (asm1, a1, a0, n);
+	  vm1_neg = 1;
+	}
+      else
+	{
+	  mpn_sub_n (asm1, a0, a1, n);
+	}
+    }
+  else /* n - s == 1 */
+    {
+      if (a0[s] == 0 && mpn_cmp (a0, a1, s) < 0)
+	{
+	  mpn_sub_n (asm1, a1, a0, s);
+	  asm1[s] = 0;
+	  vm1_neg = 1;
+	}
+      else
+	{
+	  asm1[s] = a0[s] - mpn_sub_n (asm1, a0, a1, s);
+	}
+    }
+
+  /* Compute bsm1.  */
+  if (t == n)
+    {
+      if (mpn_cmp (b0, b1, n) < 0)
+	{
+	  mpn_sub_n (bsm1, b1, b0, n);
+	  vm1_neg ^= 1;
+	}
+      else
+	{
+	  mpn_sub_n (bsm1, b0, b1, n);
+	}
+    }
+  else
+    {
+      if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+	{
+	  mpn_sub_n (bsm1, b1, b0, t);
+	  MPN_ZERO (bsm1 + t, n - t);
+	  vm1_neg ^= 1;
+	}
+      else
+	{
+	  mpn_sub (bsm1, b0, n, b1, t);
+	}
+    }
+
+#define v0	pp				/* 2n */
+#define vinf	(pp + 2 * n)			/* s+t */
+#define vm1	scratch				/* 2n */
+#define scratch_out	scratch + 2 * n
+
+  /* vm1, 2n limbs */
+  TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
+
+  if (s > t)  TOOM22_MUL_REC (vinf, a1, s, b1, t, scratch_out);
+  else        TOOM22_MUL_N_REC (vinf, a1, b1, s, scratch_out);
+
+  /* v0, 2n limbs */
+  TOOM22_MUL_N_REC (v0, ap, bp, n, scratch_out);
+
+  /* H(v0) + L(vinf) */
+  cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
+
+  /* L(v0) + H(v0) */
+  cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
+
+  /* L(vinf) + H(vinf) */
+  cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + t - n);
+
+  if (vm1_neg)
+    cy += mpn_add_n (pp + n, pp + n, vm1, 2 * n);
+  else {
+    cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
+    if (UNLIKELY (cy + 1 == 0)) { /* cy is negative */
+      /* The total contribution of v0+vinf-vm1 can not be negative. */
+#if WANT_ASSERT
+      /* The borrow in cy stops the propagation of the carry cy2, */
+      ASSERT (cy2 == 1);
+      cy += mpn_add_1 (pp + 2 * n, pp + 2 * n, n, cy2);
+      ASSERT (cy == 0);
+#else
+      /* we simply fill the area with zeros. */
+      MPN_FILL (pp + 2 * n, n, 0);
+#endif
+      return;
+    }
+  }
+
+  ASSERT (cy  <= 2);
+  ASSERT (cy2 <= 2);
+
+  MPN_INCR_U (pp + 2 * n, s + t, cy2);
+  /* if s+t==n, cy is zero, but we should not access pp[3*n] at all. */
+  MPN_INCR_U (pp + 3 * n, s + t - n, cy);
+}

diff --git a/third_party/gmp/mpn/generic/toom2_sqr.c b/third_party/gmp/mpn/generic/toom2_sqr.c
new file mode 100644
index 0000000..4eaa141
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom2_sqr.c

@@ -0,0 +1,155 @@
+/* mpn_toom2_sqr -- Square {ap,an}.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2010, 2012, 2014, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +inf
+
+  <-s--><--n-->
+   ____ ______
+  |_a1_|___a0_|
+
+  v0  =  a0     ^2  #   A(0)^2
+  vm1 = (a0- a1)^2  #  A(-1)^2
+  vinf=      a1 ^2  # A(inf)^2
+*/
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_sqr_toom2   1
+#else
+#define MAYBE_sqr_toom2							\
+  (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD)
+#endif
+
+#define TOOM2_SQR_REC(p, a, n, ws)					\
+  do {									\
+    if (! MAYBE_sqr_toom2						\
+	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
+      mpn_sqr_basecase (p, a, n);					\
+    else								\
+      mpn_toom2_sqr (p, a, n, ws);					\
+  } while (0)
+
+void
+mpn_toom2_sqr (mp_ptr pp,
+	       mp_srcptr ap, mp_size_t an,
+	       mp_ptr scratch)
+{
+  const int __gmpn_cpuvec_initialized = 1;
+  mp_size_t n, s;
+  mp_limb_t cy, cy2;
+  mp_ptr asm1;
+
+#define a0  ap
+#define a1  (ap + n)
+
+  s = an >> 1;
+  n = an - s;
+
+  ASSERT (0 < s && s <= n && s >= n - 1);
+
+  asm1 = pp;
+
+  /* Compute asm1.  */
+  if (s == n)
+    {
+      if (mpn_cmp (a0, a1, n) < 0)
+	{
+	  mpn_sub_n (asm1, a1, a0, n);
+	}
+      else
+	{
+	  mpn_sub_n (asm1, a0, a1, n);
+	}
+    }
+  else /* n - s == 1 */
+    {
+      if (a0[s] == 0 && mpn_cmp (a0, a1, s) < 0)
+	{
+	  mpn_sub_n (asm1, a1, a0, s);
+	  asm1[s] = 0;
+	}
+      else
+	{
+	  asm1[s] = a0[s] - mpn_sub_n (asm1, a0, a1, s);
+	}
+    }
+
+#define v0	pp				/* 2n */
+#define vinf	(pp + 2 * n)			/* s+s */
+#define vm1	scratch				/* 2n */
+#define scratch_out	scratch + 2 * n
+
+  /* vm1, 2n limbs */
+  TOOM2_SQR_REC (vm1, asm1, n, scratch_out);
+
+  /* vinf, s+s limbs */
+  TOOM2_SQR_REC (vinf, a1, s, scratch_out);
+
+  /* v0, 2n limbs */
+  TOOM2_SQR_REC (v0, ap, n, scratch_out);
+
+  /* H(v0) + L(vinf) */
+  cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
+
+  /* L(v0) + H(v0) */
+  cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
+
+  /* L(vinf) + H(vinf) */
+  cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + s - n);
+
+  cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
+
+  ASSERT (cy + 1 <= 3);
+  ASSERT (cy2 <= 2);
+
+  if (LIKELY (cy <= 2)) {
+    MPN_INCR_U (pp + 2 * n, s + s, cy2);
+    MPN_INCR_U (pp + 3 * n, s + s - n, cy);
+  } else { /* cy is negative */
+    /* The total contribution of v0+vinf-vm1 can not be negative. */
+#if WANT_ASSERT
+    /* The borrow in cy stops the propagation of the carry cy2, */
+    ASSERT (cy2 == 1);
+    cy += mpn_add_1 (pp + 2 * n, pp + 2 * n, n, cy2);
+    ASSERT (cy == 0);
+#else
+    /* we simply fill the area with zeros. */
+    MPN_FILL (pp + 2 * n, n, 0);
+#endif
+  }
+}

diff --git a/third_party/gmp/mpn/generic/toom32_mul.c b/third_party/gmp/mpn/generic/toom32_mul.c
new file mode 100644
index 0000000..f03ba56
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom32_mul.c

@@ -0,0 +1,322 @@
+/* mpn_toom32_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 1.5
+   times as large as bn.  Or more accurately, bn < an < 3bn.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Improvements by Marco Bodrato and Niels Möller.
+
+   The idea of applying toom to unbalanced multiplication is due to Marco
+   Bodrato and Alberto Zanoni.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +1, +inf
+
+  <-s-><--n--><--n-->
+   ___ ______ ______
+  |a2_|___a1_|___a0_|
+	|_b1_|___b0_|
+	<-t--><--n-->
+
+  v0  =  a0         * b0      #   A(0)*B(0)
+  v1  = (a0+ a1+ a2)*(b0+ b1) #   A(1)*B(1)      ah  <= 2  bh <= 1
+  vm1 = (a0- a1+ a2)*(b0- b1) #  A(-1)*B(-1)    |ah| <= 1  bh = 0
+  vinf=          a2 *     b1  # A(inf)*B(inf)
+*/
+
+#define TOOM32_MUL_N_REC(p, a, b, n, ws)				\
+  do {									\
+    mpn_mul_n (p, a, b, n);						\
+  } while (0)
+
+void
+mpn_toom32_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn,
+		mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  int vm1_neg;
+  mp_limb_t cy;
+  mp_limb_signed_t hi;
+  mp_limb_t ap1_hi, bp1_hi;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2 * n)
+#define b0  bp
+#define b1  (bp + n)
+
+  /* Required, to ensure that s + t >= n. */
+  ASSERT (bn + 2 <= an && an + 6 <= 3*bn);
+
+  n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
+
+  s = an - 2 * n;
+  t = bn - n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+  ASSERT (s + t >= n);
+
+  /* Product area of size an + bn = 3*n + s + t >= 4*n + 2. */
+#define ap1 (pp)		/* n, most significant limb in ap1_hi */
+#define bp1 (pp + n)		/* n, most significant bit in bp1_hi */
+#define am1 (pp + 2*n)		/* n, most significant bit in hi */
+#define bm1 (pp + 3*n)		/* n */
+#define v1 (scratch)		/* 2n + 1 */
+#define vm1 (pp)		/* 2n + 1 */
+#define scratch_out (scratch + 2*n + 1) /* Currently unused. */
+
+  /* Scratch need: 2*n + 1 + scratch for the recursive multiplications. */
+
+  /* FIXME: Keep v1[2*n] and vm1[2*n] in scalar variables? */
+
+  /* Compute ap1 = a0 + a1 + a2, am1 = a0 - a1 + a2 */
+  ap1_hi = mpn_add (ap1, a0, n, a2, s);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (ap1_hi == 0 && mpn_cmp (ap1, a1, n) < 0)
+    {
+      ap1_hi = mpn_add_n_sub_n (ap1, am1, a1, ap1, n) >> 1;
+      hi = 0;
+      vm1_neg = 1;
+    }
+  else
+    {
+      cy = mpn_add_n_sub_n (ap1, am1, ap1, a1, n);
+      hi = ap1_hi - (cy & 1);
+      ap1_hi += (cy >> 1);
+      vm1_neg = 0;
+    }
+#else
+  if (ap1_hi == 0 && mpn_cmp (ap1, a1, n) < 0)
+    {
+      ASSERT_NOCARRY (mpn_sub_n (am1, a1, ap1, n));
+      hi = 0;
+      vm1_neg = 1;
+    }
+  else
+    {
+      hi = ap1_hi - mpn_sub_n (am1, ap1, a1, n);
+      vm1_neg = 0;
+    }
+  ap1_hi += mpn_add_n (ap1, ap1, a1, n);
+#endif
+
+  /* Compute bp1 = b0 + b1 and bm1 = b0 - b1. */
+  if (t == n)
+    {
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      if (mpn_cmp (b0, b1, n) < 0)
+	{
+	  cy = mpn_add_n_sub_n (bp1, bm1, b1, b0, n);
+	  vm1_neg ^= 1;
+	}
+      else
+	{
+	  cy = mpn_add_n_sub_n (bp1, bm1, b0, b1, n);
+	}
+      bp1_hi = cy >> 1;
+#else
+      bp1_hi = mpn_add_n (bp1, b0, b1, n);
+
+      if (mpn_cmp (b0, b1, n) < 0)
+	{
+	  ASSERT_NOCARRY (mpn_sub_n (bm1, b1, b0, n));
+	  vm1_neg ^= 1;
+	}
+      else
+	{
+	  ASSERT_NOCARRY (mpn_sub_n (bm1, b0, b1, n));
+	}
+#endif
+    }
+  else
+    {
+      /* FIXME: Should still use mpn_add_n_sub_n for the main part. */
+      bp1_hi = mpn_add (bp1, b0, n, b1, t);
+
+      if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+	{
+	  ASSERT_NOCARRY (mpn_sub_n (bm1, b1, b0, t));
+	  MPN_ZERO (bm1 + t, n - t);
+	  vm1_neg ^= 1;
+	}
+      else
+	{
+	  ASSERT_NOCARRY (mpn_sub (bm1, b0, n, b1, t));
+	}
+    }
+
+  TOOM32_MUL_N_REC (v1, ap1, bp1, n, scratch_out);
+  if (ap1_hi == 1)
+    {
+      cy = bp1_hi + mpn_add_n (v1 + n, v1 + n, bp1, n);
+    }
+  else if (ap1_hi == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n
+      cy = 2 * bp1_hi + mpn_addlsh1_n (v1 + n, v1 + n, bp1, n);
+#else
+      cy = 2 * bp1_hi + mpn_addmul_1 (v1 + n, bp1, n, CNST_LIMB(2));
+#endif
+    }
+  else
+    cy = 0;
+  if (bp1_hi != 0)
+    cy += mpn_add_n (v1 + n, v1 + n, ap1, n);
+  v1[2 * n] = cy;
+
+  TOOM32_MUL_N_REC (vm1, am1, bm1, n, scratch_out);
+  if (hi)
+    hi = mpn_add_n (vm1+n, vm1+n, bm1, n);
+
+  vm1[2*n] = hi;
+
+  /* v1 <-- (v1 + vm1) / 2 = x0 + x2 */
+  if (vm1_neg)
+    {
+#if HAVE_NATIVE_mpn_rsh1sub_n
+      mpn_rsh1sub_n (v1, v1, vm1, 2*n+1);
+#else
+      mpn_sub_n (v1, v1, vm1, 2*n+1);
+      ASSERT_NOCARRY (mpn_rshift (v1, v1, 2*n+1, 1));
+#endif
+    }
+  else
+    {
+#if HAVE_NATIVE_mpn_rsh1add_n
+      mpn_rsh1add_n (v1, v1, vm1, 2*n+1);
+#else
+      mpn_add_n (v1, v1, vm1, 2*n+1);
+      ASSERT_NOCARRY (mpn_rshift (v1, v1, 2*n+1, 1));
+#endif
+    }
+
+  /* We get x1 + x3 = (x0 + x2) - (x0 - x1 + x2 - x3), and hence
+
+     y = x1 + x3 + (x0 + x2) * B
+       = (x0 + x2) * B + (x0 + x2) - vm1.
+
+     y is 3*n + 1 limbs, y = y0 + y1 B + y2 B^2. We store them as
+     follows: y0 at scratch, y1 at pp + 2*n, and y2 at scratch + n
+     (already in place, except for carry propagation).
+
+     We thus add
+
+   B^3  B^2   B    1
+    |    |    |    |
+   +-----+----+
+ + |  x0 + x2 |
+   +----+-----+----+
+ +      |  x0 + x2 |
+	+----------+
+ -      |  vm1     |
+ --+----++----+----+-
+   | y2  | y1 | y0 |
+   +-----+----+----+
+
+  Since we store y0 at the same location as the low half of x0 + x2, we
+  need to do the middle sum first. */
+
+  hi = vm1[2*n];
+  cy = mpn_add_n (pp + 2*n, v1, v1 + n, n);
+  MPN_INCR_U (v1 + n, n + 1, cy + v1[2*n]);
+
+  /* FIXME: Can we get rid of this second vm1_neg conditional by
+     swapping the location of +1 and -1 values? */
+  if (vm1_neg)
+    {
+      cy = mpn_add_n (v1, v1, vm1, n);
+      hi += mpn_add_nc (pp + 2*n, pp + 2*n, vm1 + n, n, cy);
+      MPN_INCR_U (v1 + n, n+1, hi);
+    }
+  else
+    {
+      cy = mpn_sub_n (v1, v1, vm1, n);
+      hi += mpn_sub_nc (pp + 2*n, pp + 2*n, vm1 + n, n, cy);
+      MPN_DECR_U (v1 + n, n+1, hi);
+    }
+
+  TOOM32_MUL_N_REC (pp, a0, b0, n, scratch_out);
+  /* vinf, s+t limbs.  Use mpn_mul for now, to handle unbalanced operands */
+  if (s > t)  mpn_mul (pp+3*n, a2, s, b1, t);
+  else        mpn_mul (pp+3*n, b1, t, a2, s);
+
+  /* Remaining interpolation.
+
+     y * B + x0 + x3 B^3 - x0 B^2 - x3 B
+     = (x1 + x3) B + (x0 + x2) B^2 + x0 + x3 B^3 - x0 B^2 - x3 B
+     = y0 B + y1 B^2 + y3 B^3 + Lx0 + H x0 B
+       + L x3 B^3 + H x3 B^4 - Lx0 B^2 - H x0 B^3 - L x3 B - H x3 B^2
+     = L x0 + (y0 + H x0 - L x3) B + (y1 - L x0 - H x3) B^2
+       + (y2 - (H x0 - L x3)) B^3 + H x3 B^4
+
+	  B^4       B^3       B^2        B         1
+ |         |         |         |         |         |
+   +-------+                   +---------+---------+
+   |  Hx3  |                   | Hx0-Lx3 |    Lx0  |
+   +------+----------+---------+---------+---------+
+	  |    y2    |  y1     |   y0    |
+	  ++---------+---------+---------+
+	  -| Hx0-Lx3 | - Lx0   |
+	   +---------+---------+
+		      | - Hx3  |
+		      +--------+
+
+    We must take into account the carry from Hx0 - Lx3.
+  */
+
+  cy = mpn_sub_n (pp + n, pp + n, pp+3*n, n);
+  hi = scratch[2*n] + cy;
+
+  cy = mpn_sub_nc (pp + 2*n, pp + 2*n, pp, n, cy);
+  hi -= mpn_sub_nc (pp + 3*n, scratch + n, pp + n, n, cy);
+
+  hi += mpn_add (pp + n, pp + n, 3*n, scratch, n);
+
+  /* FIXME: Is support for s + t == n needed? */
+  if (LIKELY (s + t > n))
+    {
+      hi -= mpn_sub (pp + 2*n, pp + 2*n, 2*n, pp + 4*n, s+t-n);
+
+      if (hi < 0)
+	MPN_DECR_U (pp + 4*n, s+t-n, -hi);
+      else
+	MPN_INCR_U (pp + 4*n, s+t-n, hi);
+    }
+  else
+    ASSERT (hi == 0);
+}

diff --git a/third_party/gmp/mpn/generic/toom33_mul.c b/third_party/gmp/mpn/generic/toom33_mul.c
new file mode 100644
index 0000000..8f49f42
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom33_mul.c

@@ -0,0 +1,315 @@
+/* mpn_toom33_mul -- Multiply {ap,an} and {p,bn} where an and bn are close in
+   size.  Or more accurately, bn <= an < (3/2)bn.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Additional improvements by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2008, 2010, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +1, +2, +inf
+
+  <-s--><--n--><--n-->
+   ____ ______ ______
+  |_a2_|___a1_|___a0_|
+   |b2_|___b1_|___b0_|
+   <-t-><--n--><--n-->
+
+  v0  =  a0         * b0          #   A(0)*B(0)
+  v1  = (a0+ a1+ a2)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 2  bh <= 2
+  vm1 = (a0- a1+ a2)*(b0- b1+ b2) #  A(-1)*B(-1)    |ah| <= 1  bh <= 1
+  v2  = (a0+2a1+4a2)*(b0+2b1+4b2) #   A(2)*B(2)      ah  <= 6  bh <= 6
+  vinf=          a2 *         b2  # A(inf)*B(inf)
+*/
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_mul_basecase 1
+#define MAYBE_mul_toom33   1
+#else
+#define MAYBE_mul_basecase						\
+  (MUL_TOOM33_THRESHOLD < 3 * MUL_TOOM22_THRESHOLD)
+#define MAYBE_mul_toom33						\
+  (MUL_TOOM44_THRESHOLD >= 3 * MUL_TOOM33_THRESHOLD)
+#endif
+
+/* FIXME: TOOM33_MUL_N_REC is not quite right for a balanced
+   multiplication at the infinity point. We may have
+   MAYBE_mul_basecase == 0, and still get s just below
+   MUL_TOOM22_THRESHOLD. If MUL_TOOM33_THRESHOLD == 7, we can even get
+   s == 1 and mpn_toom22_mul will crash.
+*/
+
+#define TOOM33_MUL_N_REC(p, a, b, n, ws)				\
+  do {									\
+    if (MAYBE_mul_basecase						\
+	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))			\
+      mpn_mul_basecase (p, a, n, b, n);					\
+    else if (! MAYBE_mul_toom33						\
+	     || BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))		\
+      mpn_toom22_mul (p, a, n, b, n, ws);				\
+    else								\
+      mpn_toom33_mul (p, a, n, b, n, ws);				\
+  } while (0)
+
+void
+mpn_toom33_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn,
+		mp_ptr scratch)
+{
+  const int __gmpn_cpuvec_initialized = 1;
+  mp_size_t n, s, t;
+  int vm1_neg;
+  mp_limb_t cy, vinf0;
+  mp_ptr gp;
+  mp_ptr as1, asm1, as2;
+  mp_ptr bs1, bsm1, bs2;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+#define b0  bp
+#define b1  (bp + n)
+#define b2  (bp + 2*n)
+
+  n = (an + 2) / (size_t) 3;
+
+  s = an - 2 * n;
+  t = bn - 2 * n;
+
+  ASSERT (an >= bn);
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+
+  as1  = scratch + 4 * n + 4;
+  asm1 = scratch + 2 * n + 2;
+  as2 = pp + n + 1;
+
+  bs1 = pp;
+  bsm1 = scratch + 3 * n + 3; /* we need 4n+4 <= 4n+s+t */
+  bs2 = pp + 2 * n + 2;
+
+  gp = scratch;
+
+  vm1_neg = 0;
+
+  /* Compute as1 and asm1.  */
+  cy = mpn_add (gp, a0, n, a2, s);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
+    {
+      cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
+      as1[n] = cy >> 1;
+      asm1[n] = 0;
+      vm1_neg = 1;
+    }
+  else
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
+      as1[n] = cy + (cy2 >> 1);
+      asm1[n] = cy - (cy2 & 1);
+    }
+#else
+  as1[n] = cy + mpn_add_n (as1, gp, a1, n);
+  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
+    {
+      mpn_sub_n (asm1, a1, gp, n);
+      asm1[n] = 0;
+      vm1_neg = 1;
+    }
+  else
+    {
+      cy -= mpn_sub_n (asm1, gp, a1, n);
+      asm1[n] = cy;
+    }
+#endif
+
+  /* Compute as2.  */
+#if HAVE_NATIVE_mpn_rsblsh1_n
+  cy = mpn_add_n (as2, a2, as1, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
+  cy += as1[n];
+  cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
+#else
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy  = mpn_addlsh1_n (as2, a1, a2, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+  cy = mpn_add_n (as2, a2, as1, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
+  cy += as1[n];
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy -= mpn_sub_n (as2, as2, a0, n);
+#endif
+#endif
+  as2[n] = cy;
+
+  /* Compute bs1 and bsm1.  */
+  cy = mpn_add (gp, b0, n, b2, t);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
+    {
+      cy = mpn_add_n_sub_n (bs1, bsm1, b1, gp, n);
+      bs1[n] = cy >> 1;
+      bsm1[n] = 0;
+      vm1_neg ^= 1;
+    }
+  else
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_add_n_sub_n (bs1, bsm1, gp, b1, n);
+      bs1[n] = cy + (cy2 >> 1);
+      bsm1[n] = cy - (cy2 & 1);
+    }
+#else
+  bs1[n] = cy + mpn_add_n (bs1, gp, b1, n);
+  if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
+    {
+      mpn_sub_n (bsm1, b1, gp, n);
+      bsm1[n] = 0;
+      vm1_neg ^= 1;
+    }
+  else
+    {
+      cy -= mpn_sub_n (bsm1, gp, b1, n);
+      bsm1[n] = cy;
+    }
+#endif
+
+  /* Compute bs2.  */
+#if HAVE_NATIVE_mpn_rsblsh1_n
+  cy = mpn_add_n (bs2, b2, bs1, t);
+  if (t != n)
+    cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
+  cy += bs1[n];
+  cy = 2 * cy + mpn_rsblsh1_n (bs2, b0, bs2, n);
+#else
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy  = mpn_addlsh1_n (bs2, b1, b2, t);
+  if (t != n)
+    cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
+  cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
+#else
+  cy  = mpn_add_n (bs2, bs1, b2, t);
+  if (t != n)
+    cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
+  cy += bs1[n];
+  cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
+  cy -= mpn_sub_n (bs2, bs2, b0, n);
+#endif
+#endif
+  bs2[n] = cy;
+
+  ASSERT (as1[n] <= 2);
+  ASSERT (bs1[n] <= 2);
+  ASSERT (asm1[n] <= 1);
+  ASSERT (bsm1[n] <= 1);
+  ASSERT (as2[n] <= 6);
+  ASSERT (bs2[n] <= 6);
+
+#define v0    pp				/* 2n */
+#define v1    (pp + 2 * n)			/* 2n+1 */
+#define vinf  (pp + 4 * n)			/* s+t */
+#define vm1   scratch				/* 2n+1 */
+#define v2    (scratch + 2 * n + 1)		/* 2n+2 */
+#define scratch_out  (scratch + 5 * n + 5)
+
+  /* vm1, 2n+1 limbs */
+#ifdef SMALLER_RECURSION
+  TOOM33_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
+  cy = 0;
+  if (asm1[n] != 0)
+    cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
+  if (bsm1[n] != 0)
+    cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
+  vm1[2 * n] = cy;
+#else
+  TOOM33_MUL_N_REC (vm1, asm1, bsm1, n + 1, scratch_out);
+#endif
+
+  TOOM33_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out);	/* v2, 2n+1 limbs */
+
+  /* vinf, s+t limbs */
+  if (s > t)  mpn_mul (vinf, a2, s, b2, t);
+  else        TOOM33_MUL_N_REC (vinf, a2, b2, s, scratch_out);
+
+  vinf0 = vinf[0];				/* v1 overlaps with this */
+
+#ifdef SMALLER_RECURSION
+  /* v1, 2n+1 limbs */
+  TOOM33_MUL_N_REC (v1, as1, bs1, n, scratch_out);
+  if (as1[n] == 1)
+    {
+      cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
+    }
+  else if (as1[n] != 0)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+      cy = 2 * bs1[n] + mpn_addlsh1_n_ip1 (v1 + n, bs1, n);
+#else
+      cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
+#endif
+    }
+  else
+    cy = 0;
+  if (bs1[n] == 1)
+    {
+      cy += mpn_add_n (v1 + n, v1 + n, as1, n);
+    }
+  else if (bs1[n] != 0)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+      cy += mpn_addlsh1_n_ip1 (v1 + n, as1, n);
+#else
+      cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
+#endif
+    }
+  v1[2 * n] = cy;
+#else
+  cy = vinf[1];
+  TOOM33_MUL_N_REC (v1, as1, bs1, n + 1, scratch_out);
+  vinf[1] = cy;
+#endif
+
+  TOOM33_MUL_N_REC (v0, ap, bp, n, scratch_out);	/* v0, 2n limbs */
+
+  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
+}

diff --git a/third_party/gmp/mpn/generic/toom3_sqr.c b/third_party/gmp/mpn/generic/toom3_sqr.c
new file mode 100644
index 0000000..7be15bf
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom3_sqr.c

@@ -0,0 +1,225 @@
+/* mpn_toom3_sqr -- Square {ap,an}.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Additional improvements by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2010, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +1, +2, +inf
+
+  <-s--><--n--><--n-->
+   ____ ______ ______
+  |_a2_|___a1_|___a0_|
+
+  v0  =  a0         ^2 #   A(0)^2
+  v1  = (a0+ a1+ a2)^2 #   A(1)^2    ah  <= 2
+  vm1 = (a0- a1+ a2)^2 #  A(-1)^2   |ah| <= 1
+  v2  = (a0+2a1+4a2)^2 #   A(2)^2    ah  <= 6
+  vinf=          a2 ^2 # A(inf)^2
+*/
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_sqr_basecase 1
+#define MAYBE_sqr_toom3   1
+#else
+#define MAYBE_sqr_basecase						\
+  (SQR_TOOM3_THRESHOLD < 3 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_toom3							\
+  (SQR_TOOM4_THRESHOLD >= 3 * SQR_TOOM3_THRESHOLD)
+#endif
+
+#define TOOM3_SQR_REC(p, a, n, ws)					\
+  do {									\
+    if (MAYBE_sqr_basecase						\
+	&& BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
+      mpn_sqr_basecase (p, a, n);					\
+    else if (! MAYBE_sqr_toom3						\
+	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))		\
+      mpn_toom2_sqr (p, a, n, ws);					\
+    else								\
+      mpn_toom3_sqr (p, a, n, ws);					\
+  } while (0)
+
+void
+mpn_toom3_sqr (mp_ptr pp,
+	       mp_srcptr ap, mp_size_t an,
+	       mp_ptr scratch)
+{
+  const int __gmpn_cpuvec_initialized = 1;
+  mp_size_t n, s;
+  mp_limb_t cy, vinf0;
+  mp_ptr gp;
+  mp_ptr as1, asm1, as2;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+
+  n = (an + 2) / (size_t) 3;
+
+  s = an - 2 * n;
+
+  ASSERT (0 < s && s <= n);
+
+  as1 = scratch + 4 * n + 4;
+  asm1 = scratch + 2 * n + 2;
+  as2 = pp + n + 1;
+
+  gp = scratch;
+
+  /* Compute as1 and asm1.  */
+  cy = mpn_add (gp, a0, n, a2, s);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
+    {
+      cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
+      as1[n] = cy >> 1;
+      asm1[n] = 0;
+    }
+  else
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
+      as1[n] = cy + (cy2 >> 1);
+      asm1[n] = cy - (cy2 & 1);
+    }
+#else
+  as1[n] = cy + mpn_add_n (as1, gp, a1, n);
+  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
+    {
+      mpn_sub_n (asm1, a1, gp, n);
+      asm1[n] = 0;
+    }
+  else
+    {
+      cy -= mpn_sub_n (asm1, gp, a1, n);
+      asm1[n] = cy;
+    }
+#endif
+
+  /* Compute as2.  */
+#if HAVE_NATIVE_mpn_rsblsh1_n
+  cy = mpn_add_n (as2, a2, as1, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
+  cy += as1[n];
+  cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
+#else
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy  = mpn_addlsh1_n (as2, a1, a2, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+  cy = mpn_add_n (as2, a2, as1, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
+  cy += as1[n];
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy -= mpn_sub_n (as2, as2, a0, n);
+#endif
+#endif
+  as2[n] = cy;
+
+  ASSERT (as1[n] <= 2);
+  ASSERT (asm1[n] <= 1);
+
+#define v0    pp				/* 2n */
+#define v1    (pp + 2 * n)			/* 2n+1 */
+#define vinf  (pp + 4 * n)			/* s+s */
+#define vm1   scratch				/* 2n+1 */
+#define v2    (scratch + 2 * n + 1)		/* 2n+2 */
+#define scratch_out  (scratch + 5 * n + 5)
+
+  /* vm1, 2n+1 limbs */
+#ifdef SMALLER_RECURSION
+  TOOM3_SQR_REC (vm1, asm1, n, scratch_out);
+  cy = 0;
+  if (asm1[n] != 0)
+    cy = asm1[n] + mpn_add_n (vm1 + n, vm1 + n, asm1, n);
+  if (asm1[n] != 0)
+    cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
+  vm1[2 * n] = cy;
+#else
+  TOOM3_SQR_REC (vm1, asm1, n + 1, scratch_out);
+#endif
+
+  TOOM3_SQR_REC (v2, as2, n + 1, scratch_out);	/* v2, 2n+1 limbs */
+
+  TOOM3_SQR_REC (vinf, a2, s, scratch_out);	/* vinf, s+s limbs */
+
+  vinf0 = vinf[0];				/* v1 overlaps with this */
+
+#ifdef SMALLER_RECURSION
+  /* v1, 2n+1 limbs */
+  TOOM3_SQR_REC (v1, as1, n, scratch_out);
+  if (as1[n] == 1)
+    {
+      cy = as1[n] + mpn_add_n (v1 + n, v1 + n, as1, n);
+    }
+  else if (as1[n] != 0)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+      cy = 2 * as1[n] + mpn_addlsh1_n_ip1 (v1 + n, as1, n);
+#else
+      cy = 2 * as1[n] + mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
+#endif
+    }
+  else
+    cy = 0;
+  if (as1[n] == 1)
+    {
+      cy += mpn_add_n (v1 + n, v1 + n, as1, n);
+    }
+  else if (as1[n] != 0)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+      cy += mpn_addlsh1_n_ip1 (v1 + n, as1, n);
+#else
+      cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
+#endif
+    }
+  v1[2 * n] = cy;
+#else
+  cy = vinf[1];
+  TOOM3_SQR_REC (v1, as1, n + 1, scratch_out);
+  vinf[1] = cy;
+#endif
+
+  TOOM3_SQR_REC (v0, ap, n, scratch_out);	/* v0, 2n limbs */
+
+  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + s, 0, vinf0);
+}

diff --git a/third_party/gmp/mpn/generic/toom42_mul.c b/third_party/gmp/mpn/generic/toom42_mul.c
new file mode 100644
index 0000000..2dfba9b
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom42_mul.c

@@ -0,0 +1,234 @@
+/* mpn_toom42_mul -- Multiply {ap,an} and {bp,bn} where an is nominally twice
+   as large as bn.  Or more accurately, (3/2)bn < an < 4bn.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Additional improvements by Marco Bodrato.
+
+   The idea of applying toom to unbalanced multiplication is due to Marco
+   Bodrato and Alberto Zanoni.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2008, 2012, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +1, +2, +inf
+
+  <-s-><--n--><--n--><--n-->
+   ___ ______ ______ ______
+  |a3_|___a2_|___a1_|___a0_|
+	       |_b1_|___b0_|
+	       <-t--><--n-->
+
+  v0  =  a0             * b0      #   A(0)*B(0)
+  v1  = (a0+ a1+ a2+ a3)*(b0+ b1) #   A(1)*B(1)      ah  <= 3  bh <= 1
+  vm1 = (a0- a1+ a2- a3)*(b0- b1) #  A(-1)*B(-1)    |ah| <= 1  bh  = 0
+  v2  = (a0+2a1+4a2+8a3)*(b0+2b1) #   A(2)*B(2)      ah  <= 14 bh <= 2
+  vinf=              a3 *     b1  # A(inf)*B(inf)
+*/
+
+#define TOOM42_MUL_N_REC(p, a, b, n, ws)				\
+  do {									\
+    mpn_mul_n (p, a, b, n);						\
+  } while (0)
+
+void
+mpn_toom42_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn,
+		mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  int vm1_neg;
+  mp_limb_t cy, vinf0;
+  mp_ptr a0_a2;
+  mp_ptr as1, asm1, as2;
+  mp_ptr bs1, bsm1, bs2;
+  mp_ptr tmp;
+  TMP_DECL;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+#define a3  (ap + 3*n)
+#define b0  bp
+#define b1  (bp + n)
+
+  n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1;
+
+  s = an - 3 * n;
+  t = bn - n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+
+  TMP_MARK;
+
+  tmp = TMP_ALLOC_LIMBS (6 * n + 5);
+  as1  = tmp; tmp += n + 1;
+  asm1 = tmp; tmp += n + 1;
+  as2  = tmp; tmp += n + 1;
+  bs1  = tmp; tmp += n + 1;
+  bsm1 = tmp; tmp += n;
+  bs2  = tmp; tmp += n + 1;
+
+  a0_a2 = pp;
+
+  /* Compute as1 and asm1.  */
+  vm1_neg = mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0_a2) & 1;
+
+  /* Compute as2.  */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy  = mpn_addlsh1_n (as2, a2, a3, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+  cy  = mpn_lshift (as2, a3, s, 1);
+  cy += mpn_add_n (as2, a2, as2, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a0, as2, n);
+#endif
+  as2[n] = cy;
+
+  /* Compute bs1 and bsm1.  */
+  if (t == n)
+    {
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      if (mpn_cmp (b0, b1, n) < 0)
+	{
+	  cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
+	  vm1_neg ^= 1;
+	}
+      else
+	{
+	  cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
+	}
+      bs1[n] = cy >> 1;
+#else
+      bs1[n] = mpn_add_n (bs1, b0, b1, n);
+
+      if (mpn_cmp (b0, b1, n) < 0)
+	{
+	  mpn_sub_n (bsm1, b1, b0, n);
+	  vm1_neg ^= 1;
+	}
+      else
+	{
+	  mpn_sub_n (bsm1, b0, b1, n);
+	}
+#endif
+    }
+  else
+    {
+      bs1[n] = mpn_add (bs1, b0, n, b1, t);
+
+      if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+	{
+	  mpn_sub_n (bsm1, b1, b0, t);
+	  MPN_ZERO (bsm1 + t, n - t);
+	  vm1_neg ^= 1;
+	}
+      else
+	{
+	  mpn_sub (bsm1, b0, n, b1, t);
+	}
+    }
+
+  /* Compute bs2, recycling bs1. bs2=bs1+b1  */
+  mpn_add (bs2, bs1, n + 1, b1, t);
+
+  ASSERT (as1[n] <= 3);
+  ASSERT (bs1[n] <= 1);
+  ASSERT (asm1[n] <= 1);
+/*ASSERT (bsm1[n] == 0);*/
+  ASSERT (as2[n] <= 14);
+  ASSERT (bs2[n] <= 2);
+
+#define v0    pp				/* 2n */
+#define v1    (pp + 2 * n)			/* 2n+1 */
+#define vinf  (pp + 4 * n)			/* s+t */
+#define vm1   scratch				/* 2n+1 */
+#define v2    (scratch + 2 * n + 1)		/* 2n+2 */
+#define scratch_out	scratch + 4 * n + 4	/* Currently unused. */
+
+  /* vm1, 2n+1 limbs */
+  TOOM42_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
+  cy = 0;
+  if (asm1[n] != 0)
+    cy = mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
+  vm1[2 * n] = cy;
+
+  TOOM42_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out);	/* v2, 2n+1 limbs */
+
+  /* vinf, s+t limbs */
+  if (s > t)  mpn_mul (vinf, a3, s, b1, t);
+  else        mpn_mul (vinf, b1, t, a3, s);
+
+  vinf0 = vinf[0];				/* v1 overlaps with this */
+
+  /* v1, 2n+1 limbs */
+  TOOM42_MUL_N_REC (v1, as1, bs1, n, scratch_out);
+  if (as1[n] == 1)
+    {
+      cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
+    }
+  else if (as1[n] == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n
+      cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n);
+#else
+      cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
+#endif
+    }
+  else if (as1[n] == 3)
+    {
+      cy = 3 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(3));
+    }
+  else
+    cy = 0;
+  if (bs1[n] != 0)
+    cy += mpn_add_n (v1 + n, v1 + n, as1, n);
+  v1[2 * n] = cy;
+
+  TOOM42_MUL_N_REC (v0, ap, bp, n, scratch_out);	/* v0, 2n limbs */
+
+  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
+
+  TMP_FREE;
+}

diff --git a/third_party/gmp/mpn/generic/toom42_mulmid.c b/third_party/gmp/mpn/generic/toom42_mulmid.c
new file mode 100644
index 0000000..f581b10
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom42_mulmid.c

@@ -0,0 +1,237 @@
+/* mpn_toom42_mulmid -- toom42 middle product
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+
+
+/*
+  Middle product of {ap,2n-1} and {bp,n}, output written to {rp,n+2}.
+
+  Neither ap nor bp may overlap rp.
+
+  Must have n >= 4.
+
+  Amount of scratch space required is given by mpn_toom42_mulmid_itch().
+
+  FIXME: this code assumes that n is small compared to GMP_NUMB_MAX. The exact
+  requirements should be clarified.
+*/
+void
+mpn_toom42_mulmid (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n,
+                   mp_ptr scratch)
+{
+  mp_limb_t cy, e[12], zh, zl;
+  mp_size_t m;
+  int neg;
+
+  ASSERT (n >= 4);
+  ASSERT (! MPN_OVERLAP_P (rp, n + 2, ap, 2*n - 1));
+  ASSERT (! MPN_OVERLAP_P (rp, n + 2, bp, n));
+
+  ap += n & 1;   /* handle odd row and diagonal later */
+  m = n / 2;
+
+  /* (e0h:e0l) etc are correction terms, in 2's complement */
+#define e0l (e[0])
+#define e0h (e[1])
+#define e1l (e[2])
+#define e1h (e[3])
+#define e2l (e[4])
+#define e2h (e[5])
+#define e3l (e[6])
+#define e3h (e[7])
+#define e4l (e[8])
+#define e4h (e[9])
+#define e5l (e[10])
+#define e5h (e[11])
+
+#define s (scratch + 2)
+#define t (rp + m + 2)
+#define p0 rp
+#define p1 scratch
+#define p2 (rp + m)
+#define next_scratch (scratch + 3*m + 1)
+
+  /*
+            rp                            scratch
+  |---------|-----------|    |---------|---------|----------|
+  0         m         2m+2   0         m         2m        3m+1
+            <----p2---->       <-------------s------------->
+  <----p0----><---t---->     <----p1---->
+  */
+
+  /* compute {s,3m-1} = {a,3m-1} + {a+m,3m-1} and error terms e0, e1, e2, e3 */
+  cy = mpn_add_err1_n (s, ap, ap + m, &e0l, bp + m, m - 1, 0);
+  cy = mpn_add_err2_n (s + m - 1, ap + m - 1, ap + 2*m - 1, &e1l,
+		       bp + m, bp, m, cy);
+  mpn_add_err1_n (s + 2*m - 1, ap + 2*m - 1, ap + 3*m - 1, &e3l, bp, m, cy);
+
+  /* compute t = (-1)^neg * ({b,m} - {b+m,m}) and error terms e4, e5 */
+  if (mpn_cmp (bp + m, bp, m) < 0)
+    {
+      ASSERT_NOCARRY (mpn_sub_err2_n (t, bp, bp + m, &e4l,
+				      ap + m - 1, ap + 2*m - 1, m, 0));
+      neg = 1;
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_sub_err2_n (t, bp + m, bp, &e4l,
+				      ap + m - 1, ap + 2*m - 1, m, 0));
+      neg = 0;
+    }
+
+  /* recursive middle products. The picture is:
+
+      b[2m-1]   A   A   A   B   B   B   -   -   -   -   -
+      ...       -   A   A   A   B   B   B   -   -   -   -
+      b[m]      -   -   A   A   A   B   B   B   -   -   -
+      b[m-1]    -   -   -   C   C   C   D   D   D   -   -
+      ...       -   -   -   -   C   C   C   D   D   D   -
+      b[0]      -   -   -   -   -   C   C   C   D   D   D
+               a[0]   ...  a[m]  ...  a[2m]    ...    a[4m-2]
+  */
+
+  if (m < MULMID_TOOM42_THRESHOLD)
+    {
+      /* A + B */
+      mpn_mulmid_basecase (p0, s, 2*m - 1, bp + m, m);
+      /* accumulate high limbs of p0 into e1 */
+      ADDC_LIMB (cy, e1l, e1l, p0[m]);
+      e1h += p0[m + 1] + cy;
+      /* (-1)^neg * (B - C)   (overwrites first m limbs of s) */
+      mpn_mulmid_basecase (p1, ap + m, 2*m - 1, t, m);
+      /* C + D   (overwrites t) */
+      mpn_mulmid_basecase (p2, s + m, 2*m - 1, bp, m);
+    }
+  else
+    {
+      /* as above, but use toom42 instead */
+      mpn_toom42_mulmid (p0, s, bp + m, m, next_scratch);
+      ADDC_LIMB (cy, e1l, e1l, p0[m]);
+      e1h += p0[m + 1] + cy;
+      mpn_toom42_mulmid (p1, ap + m, t, m, next_scratch);
+      mpn_toom42_mulmid (p2, s + m, bp, m, next_scratch);
+    }
+
+  /* apply error terms */
+
+  /* -e0 at rp[0] */
+  SUBC_LIMB (cy, rp[0], rp[0], e0l);
+  SUBC_LIMB (cy, rp[1], rp[1], e0h + cy);
+  if (UNLIKELY (cy))
+    {
+      cy = (m > 2) ? mpn_sub_1 (rp + 2, rp + 2, m - 2, 1) : 1;
+      SUBC_LIMB (cy, e1l, e1l, cy);
+      e1h -= cy;
+    }
+
+  /* z = e1 - e2 + high(p0) */
+  SUBC_LIMB (cy, zl, e1l, e2l);
+  zh = e1h - e2h - cy;
+
+  /* z at rp[m] */
+  ADDC_LIMB (cy, rp[m], rp[m], zl);
+  zh = (zh + cy) & GMP_NUMB_MASK;
+  ADDC_LIMB (cy, rp[m + 1], rp[m + 1], zh);
+  cy -= (zh >> (GMP_NUMB_BITS - 1));
+  if (UNLIKELY (cy))
+    {
+      if (cy == 1)
+	mpn_add_1 (rp + m + 2, rp + m + 2, m, 1);
+      else /* cy == -1 */
+	mpn_sub_1 (rp + m + 2, rp + m + 2, m, 1);
+    }
+
+  /* e3 at rp[2*m] */
+  ADDC_LIMB (cy, rp[2*m], rp[2*m], e3l);
+  rp[2*m + 1] = (rp[2*m + 1] + e3h + cy) & GMP_NUMB_MASK;
+
+  /* e4 at p1[0] */
+  ADDC_LIMB (cy, p1[0], p1[0], e4l);
+  ADDC_LIMB (cy, p1[1], p1[1], e4h + cy);
+  if (UNLIKELY (cy))
+    mpn_add_1 (p1 + 2, p1 + 2, m, 1);
+
+  /* -e5 at p1[m] */
+  SUBC_LIMB (cy, p1[m], p1[m], e5l);
+  p1[m + 1] = (p1[m + 1] - e5h - cy) & GMP_NUMB_MASK;
+
+  /* adjustment if p1 ends up negative */
+  cy = (p1[m + 1] >> (GMP_NUMB_BITS - 1));
+
+  /* add (-1)^neg * (p1 - B^m * p1) to output */
+  if (neg)
+    {
+      mpn_sub_1 (rp + m + 2, rp + m + 2, m, cy);
+      mpn_add (rp, rp, 2*m + 2, p1, m + 2);             /* A + C */
+      mpn_sub_n (rp + m, rp + m, p1, m + 2);            /* B + D */
+    }
+  else
+    {
+      mpn_add_1 (rp + m + 2, rp + m + 2, m, cy);
+      mpn_sub (rp, rp, 2*m + 2, p1, m + 2);             /* A + C */
+      mpn_add_n (rp + m, rp + m, p1, m + 2);            /* B + D */
+    }
+
+  /* odd row and diagonal */
+  if (n & 1)
+    {
+      /*
+        Products marked E are already done. We need to do products marked O.
+
+        OOOOO----
+        -EEEEO---
+        --EEEEO--
+        ---EEEEO-
+        ----EEEEO
+       */
+
+      /* first row of O's */
+      cy = mpn_addmul_1 (rp, ap - 1, n, bp[n - 1]);
+      ADDC_LIMB (rp[n + 1], rp[n], rp[n], cy);
+
+      /* O's on diagonal */
+      /* FIXME: should probably define an interface "mpn_mulmid_diag_1"
+         that can handle the sum below. Currently we're relying on
+         mulmid_basecase being pretty fast for a diagonal sum like this,
+	 which is true at least for the K8 asm version, but surely false
+	 for the generic version. */
+      mpn_mulmid_basecase (e, ap + n - 1, n - 1, bp, n - 1);
+      mpn_add_n (rp + n - 1, rp + n - 1, e, 3);
+    }
+}

diff --git a/third_party/gmp/mpn/generic/toom43_mul.c b/third_party/gmp/mpn/generic/toom43_mul.c
new file mode 100644
index 0000000..0650138
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom43_mul.c

@@ -0,0 +1,233 @@
+/* mpn_toom43_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3
+   times as large as bn.  Or more accurately, bn < an < 2 bn.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   The idea of applying toom to unbalanced multiplication is due to Marco
+   Bodrato and Alberto Zanoni.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -2, -1, 0, +1, +2, +inf
+
+  <-s-><--n--><--n--><--n-->
+   ___ ______ ______ ______
+  |a3_|___a2_|___a1_|___a0_|
+	|_b2_|___b1_|___b0_|
+	<-t--><--n--><--n-->
+
+  v0  =  a0             * b0          #   A(0)*B(0)
+  v1  = (a0+ a1+ a2+ a3)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 3  bh <= 2
+  vm1 = (a0- a1+ a2- a3)*(b0- b1+ b2) #  A(-1)*B(-1)    |ah| <= 1 |bh|<= 1
+  v2  = (a0+2a1+4a2+8a3)*(b0+2b1+4b2) #   A(2)*B(2)      ah  <= 14 bh <= 6
+  vm2 = (a0-2a1+4a2-8a3)*(b0-2b1+4b2) #  A(-2)*B(-2)    |ah| <= 9 |bh|<= 4
+  vinf=              a3 *         b2  # A(inf)*B(inf)
+*/
+
+void
+mpn_toom43_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  enum toom6_flags flags;
+  mp_limb_t cy;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2 * n)
+#define a3  (ap + 3 * n)
+#define b0  bp
+#define b1  (bp + n)
+#define b2  (bp + 2 * n)
+
+  n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3);
+
+  s = an - 3 * n;
+  t = bn - 2 * n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+
+  /* This is true whenever an >= 25 or bn >= 19, I think. It
+     guarantees that we can fit 5 values of size n+1 in the product
+     area. */
+  ASSERT (s+t >= 5);
+
+#define v0    pp				/* 2n */
+#define vm1   (scratch)				/* 2n+1 */
+#define v1    (pp + 2*n)			/* 2n+1 */
+#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
+#define v2    (scratch + 4 * n + 2)		/* 2n+1 */
+#define vinf  (pp + 5 * n)			/* s+t */
+#define bs1    pp				/* n+1 */
+#define bsm1  (scratch + 2 * n + 2)		/* n+1 */
+#define asm1  (scratch + 3 * n + 3)		/* n+1 */
+#define asm2  (scratch + 4 * n + 4)		/* n+1 */
+#define bsm2  (pp + n + 1)			/* n+1 */
+#define bs2   (pp + 2 * n + 2)			/* n+1 */
+#define as2   (pp + 3 * n + 3)			/* n+1 */
+#define as1   (pp + 4 * n + 4)			/* n+1 */
+
+  /* Total sccratch need is 6 * n + 3 + 1; we allocate one extra
+     limb, because products will overwrite 2n+2 limbs. */
+
+#define a0a2  scratch
+#define b0b2  scratch
+#define a1a3  asm1
+#define b1d   bsm1
+
+  /* Compute as2 and asm2.  */
+  flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_dgr3_pm2 (as2, asm2, ap, n, s, a1a3));
+
+  /* Compute bs2 and bsm2.  */
+  b1d[n] = mpn_lshift (b1d, b1, n, 1);			/*       2b1      */
+  cy  = mpn_lshift (b0b2, b2, t, 2);			/*  4b2           */
+  cy += mpn_add_n (b0b2, b0b2, b0, t);			/*  4b2      + b0 */
+  if (t != n)
+    cy = mpn_add_1 (b0b2 + t, b0 + t, n - t, cy);
+  b0b2[n] = cy;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (mpn_cmp (b0b2, b1d, n+1) < 0)
+    {
+      mpn_add_n_sub_n (bs2, bsm2, b1d, b0b2, n+1);
+      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
+    }
+  else
+    {
+      mpn_add_n_sub_n (bs2, bsm2, b0b2, b1d, n+1);
+    }
+#else
+  mpn_add_n (bs2, b0b2, b1d, n+1);
+  if (mpn_cmp (b0b2, b1d, n+1) < 0)
+    {
+      mpn_sub_n (bsm2, b1d, b0b2, n+1);
+      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
+    }
+  else
+    {
+      mpn_sub_n (bsm2, b0b2, b1d, n+1);
+    }
+#endif
+
+  /* Compute as1 and asm1.  */
+  flags = (enum toom6_flags) (flags ^ (toom6_vm1_neg & mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0a2)));
+
+  /* Compute bs1 and bsm1.  */
+  bsm1[n] = mpn_add (bsm1, b0, n, b2, t);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
+    {
+      cy = mpn_add_n_sub_n (bs1, bsm1, b1, bsm1, n);
+      bs1[n] = cy >> 1;
+      flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
+    }
+  else
+    {
+      cy = mpn_add_n_sub_n (bs1, bsm1, bsm1, b1, n);
+      bs1[n] = bsm1[n] + (cy >> 1);
+      bsm1[n]-= cy & 1;
+    }
+#else
+  bs1[n] = bsm1[n] + mpn_add_n (bs1, bsm1, b1, n);
+  if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
+    {
+      mpn_sub_n (bsm1, b1, bsm1, n);
+      flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
+    }
+  else
+    {
+      bsm1[n] -= mpn_sub_n (bsm1, bsm1, b1, n);
+    }
+#endif
+
+  ASSERT (as1[n] <= 3);
+  ASSERT (bs1[n] <= 2);
+  ASSERT (asm1[n] <= 1);
+  ASSERT (bsm1[n] <= 1);
+  ASSERT (as2[n] <=14);
+  ASSERT (bs2[n] <= 6);
+  ASSERT (asm2[n] <= 9);
+  ASSERT (bsm2[n] <= 4);
+
+  /* vm1, 2n+1 limbs */
+  mpn_mul_n (vm1, asm1, bsm1, n+1);  /* W4 */
+
+  /* vm2, 2n+1 limbs */
+  mpn_mul_n (vm2, asm2, bsm2, n+1);  /* W2 */
+
+  /* v2, 2n+1 limbs */
+  mpn_mul_n (v2, as2, bs2, n+1);  /* W1 */
+
+  /* v1, 2n+1 limbs */
+  mpn_mul_n (v1, as1, bs1, n+1);  /* W3 */
+
+  /* vinf, s+t limbs */   /* W0 */
+  if (s > t)  mpn_mul (vinf, a3, s, b2, t);
+  else        mpn_mul (vinf, b2, t, a3, s);
+
+  /* v0, 2n limbs */
+  mpn_mul_n (v0, ap, bp, n);  /* W5 */
+
+  mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s);
+
+#undef v0
+#undef vm1
+#undef v1
+#undef vm2
+#undef v2
+#undef vinf
+#undef bs1
+#undef bs2
+#undef bsm1
+#undef bsm2
+#undef asm1
+#undef asm2
+/* #undef as1 */
+/* #undef as2 */
+#undef a0a2
+#undef b0b2
+#undef a1a3
+#undef b1d
+#undef a0
+#undef a1
+#undef a2
+#undef a3
+#undef b0
+#undef b1
+#undef b2
+}

diff --git a/third_party/gmp/mpn/generic/toom44_mul.c b/third_party/gmp/mpn/generic/toom44_mul.c
new file mode 100644
index 0000000..77d5083
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom44_mul.c

@@ -0,0 +1,235 @@
+/* mpn_toom44_mul -- Multiply {ap,an} and {bp,bn} where an and bn are close in
+   size.  Or more accurately, bn <= an < (4/3)bn.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2008, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: 0, +1, -1, +2, -2, 1/2, +inf
+
+  <-s--><--n--><--n--><--n-->
+   ____ ______ ______ ______
+  |_a3_|___a2_|___a1_|___a0_|
+   |b3_|___b2_|___b1_|___b0_|
+   <-t-><--n--><--n--><--n-->
+
+  v0  =   a0             *  b0              #    A(0)*B(0)
+  v1  = ( a0+ a1+ a2+ a3)*( b0+ b1+ b2+ b3) #    A(1)*B(1)      ah  <= 3   bh  <= 3
+  vm1 = ( a0- a1+ a2- a3)*( b0- b1+ b2- b3) #   A(-1)*B(-1)    |ah| <= 1  |bh| <= 1
+  v2  = ( a0+2a1+4a2+8a3)*( b0+2b1+4b2+8b3) #    A(2)*B(2)      ah  <= 14  bh  <= 14
+  vm2 = ( a0-2a1+4a2-8a3)*( b0-2b1+4b2-8b3) #    A(2)*B(2)      ah  <= 9  |bh| <= 9
+  vh  = (8a0+4a1+2a2+ a3)*(8b0+4b1+2b2+ b3) #  A(1/2)*B(1/2)    ah  <= 14  bh  <= 14
+  vinf=               a3 *          b2      #  A(inf)*B(inf)
+*/
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_mul_basecase 1
+#define MAYBE_mul_toom22   1
+#define MAYBE_mul_toom44   1
+#else
+#define MAYBE_mul_basecase						\
+  (MUL_TOOM44_THRESHOLD < 4 * MUL_TOOM22_THRESHOLD)
+#define MAYBE_mul_toom22						\
+  (MUL_TOOM44_THRESHOLD < 4 * MUL_TOOM33_THRESHOLD)
+#define MAYBE_mul_toom44						\
+  (MUL_TOOM6H_THRESHOLD >= 4 * MUL_TOOM44_THRESHOLD)
+#endif
+
+#define TOOM44_MUL_N_REC(p, a, b, n, ws)				\
+  do {									\
+    if (MAYBE_mul_basecase						\
+	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))			\
+      mpn_mul_basecase (p, a, n, b, n);					\
+    else if (MAYBE_mul_toom22						\
+	     && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))		\
+      mpn_toom22_mul (p, a, n, b, n, ws);				\
+    else if (! MAYBE_mul_toom44						\
+	     || BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD))		\
+      mpn_toom33_mul (p, a, n, b, n, ws);				\
+    else								\
+      mpn_toom44_mul (p, a, n, b, n, ws);				\
+  } while (0)
+
+/* Use of scratch space. In the product area, we store
+
+      ___________________
+     |vinf|____|_v1_|_v0_|
+      s+t  2n-1 2n+1  2n
+
+   The other recursive products, vm1, v2, vm2, vh are stored in the
+   scratch area. When computing them, we use the product area for
+   intermediate values.
+
+   Next, we compute v1. We can store the intermediate factors at v0
+   and at vh + 2n + 2.
+
+   Finally, for v0 and vinf, factors are parts of the input operands,
+   and we need scratch space only for the recursive multiplication.
+
+   In all, if S(an) is the scratch need, the needed space is bounded by
+
+     S(an) <= 4 (2*ceil(an/4) + 1) + 1 + S(ceil(an/4) + 1)
+
+   which should give S(n) = 8 n/3 + c log(n) for some constant c.
+*/
+
+void
+mpn_toom44_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn,
+		mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  mp_limb_t cy;
+  enum toom7_flags flags;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+#define a3  (ap + 3*n)
+#define b0  bp
+#define b1  (bp + n)
+#define b2  (bp + 2*n)
+#define b3  (bp + 3*n)
+
+  ASSERT (an >= bn);
+
+  n = (an + 3) >> 2;
+
+  s = an - 3 * n;
+  t = bn - 3 * n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+  ASSERT (s >= t);
+
+  /* NOTE: The multiplications to v2, vm2, vh and vm1 overwrites the
+   * following limb, so these must be computed in order, and we need a
+   * one limb gap to tp. */
+#define v0    pp				/* 2n */
+#define v1    (pp + 2 * n)			/* 2n+1 */
+#define vinf  (pp + 6 * n)			/* s+t */
+#define v2    scratch				/* 2n+1 */
+#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
+#define vh    (scratch + 4 * n + 2)		/* 2n+1 */
+#define vm1   (scratch + 6 * n + 3)		/* 2n+1 */
+#define tp (scratch + 8*n + 5)
+
+  /* apx and bpx must not overlap with v1 */
+#define apx   pp				/* n+1 */
+#define amx   (pp + n + 1)			/* n+1 */
+#define bmx   (pp + 2*n + 2)			/* n+1 */
+#define bpx   (pp + 4*n + 2)			/* n+1 */
+
+  /* Total scratch need: 8*n + 5 + scratch for recursive calls. This
+     gives roughly 32 n/3 + log term. */
+
+  /* Compute apx = a0 + 2 a1 + 4 a2 + 8 a3 and amx = a0 - 2 a1 + 4 a2 - 8 a3.  */
+  flags = (enum toom7_flags) (toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (apx, amx, ap, n, s, tp));
+
+  /* Compute bpx = b0 + 2 b1 + 4 b2 + 8 b3 and bmx = b0 - 2 b1 + 4 b2 - 8 b3.  */
+  flags = (enum toom7_flags) (flags ^ (toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (bpx, bmx, bp, n, t, tp)));
+
+  TOOM44_MUL_N_REC (v2, apx, bpx, n + 1, tp);	/* v2,  2n+1 limbs */
+  TOOM44_MUL_N_REC (vm2, amx, bmx, n + 1, tp);	/* vm2,  2n+1 limbs */
+
+  /* Compute apx = 8 a0 + 4 a1 + 2 a2 + a3 = (((2*a0 + a1) * 2 + a2) * 2 + a3 */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (apx, a1, a0, n);
+  cy = 2*cy + mpn_addlsh1_n (apx, a2, apx, n);
+  if (s < n)
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_addlsh1_n (apx, a3, apx, s);
+      apx[n] = 2*cy + mpn_lshift (apx + s, apx + s, n - s, 1);
+      MPN_INCR_U (apx + s, n+1-s, cy2);
+    }
+  else
+    apx[n] = 2*cy + mpn_addlsh1_n (apx, a3, apx, n);
+#else
+  cy = mpn_lshift (apx, a0, n, 1);
+  cy += mpn_add_n (apx, apx, a1, n);
+  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
+  cy += mpn_add_n (apx, apx, a2, n);
+  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
+  apx[n] = cy + mpn_add (apx, apx, n, a3, s);
+#endif
+
+  /* Compute bpx = 8 b0 + 4 b1 + 2 b2 + b3 = (((2*b0 + b1) * 2 + b2) * 2 + b3 */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (bpx, b1, b0, n);
+  cy = 2*cy + mpn_addlsh1_n (bpx, b2, bpx, n);
+  if (t < n)
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_addlsh1_n (bpx, b3, bpx, t);
+      bpx[n] = 2*cy + mpn_lshift (bpx + t, bpx + t, n - t, 1);
+      MPN_INCR_U (bpx + t, n+1-t, cy2);
+    }
+  else
+    bpx[n] = 2*cy + mpn_addlsh1_n (bpx, b3, bpx, n);
+#else
+  cy = mpn_lshift (bpx, b0, n, 1);
+  cy += mpn_add_n (bpx, bpx, b1, n);
+  cy = 2*cy + mpn_lshift (bpx, bpx, n, 1);
+  cy += mpn_add_n (bpx, bpx, b2, n);
+  cy = 2*cy + mpn_lshift (bpx, bpx, n, 1);
+  bpx[n] = cy + mpn_add (bpx, bpx, n, b3, t);
+#endif
+
+  ASSERT (apx[n] < 15);
+  ASSERT (bpx[n] < 15);
+
+  TOOM44_MUL_N_REC (vh, apx, bpx, n + 1, tp);	/* vh,  2n+1 limbs */
+
+  /* Compute apx = a0 + a1 + a2 + a3 and amx = a0 - a1 + a2 - a3.  */
+  flags = (enum toom7_flags) (flags | (toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (apx, amx, ap, n, s, tp)));
+
+  /* Compute bpx = b0 + b1 + b2 + b3 and bmx = b0 - b1 + b2 - b3.  */
+  flags = (enum toom7_flags) (flags ^ (toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (bpx, bmx, bp, n, t, tp)));
+
+  TOOM44_MUL_N_REC (vm1, amx, bmx, n + 1, tp);	/* vm1,  2n+1 limbs */
+  /* Clobbers amx, bmx. */
+  TOOM44_MUL_N_REC (v1, apx, bpx, n + 1, tp);	/* v1,  2n+1 limbs */
+
+  TOOM44_MUL_N_REC (v0, a0, b0, n, tp);
+  if (s > t)
+    mpn_mul (vinf, a3, s, b3, t);
+  else
+    TOOM44_MUL_N_REC (vinf, a3, b3, s, tp);	/* vinf, s+t limbs */
+
+  mpn_toom_interpolate_7pts (pp, n, flags, vm2, vm1, v2, vh, s + t, tp);
+}

diff --git a/third_party/gmp/mpn/generic/toom4_sqr.c b/third_party/gmp/mpn/generic/toom4_sqr.c
new file mode 100644
index 0000000..aec84c1
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom4_sqr.c

@@ -0,0 +1,163 @@
+/* mpn_toom4_sqr -- Square {ap,an}.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2010, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, -1/2, 0, +1/2, +1, +2, +inf
+
+  <-s--><--n--><--n--><--n-->
+   ____ ______ ______ ______
+  |_a3_|___a2_|___a1_|___a0_|
+
+  v0  =   a0             ^2 #    A(0)^2
+  v1  = ( a0+ a1+ a2+ a3)^2 #    A(1)^2   ah  <= 3
+  vm1 = ( a0- a1+ a2- a3)^2 #   A(-1)^2  |ah| <= 1
+  v2  = ( a0+2a1+4a2+8a3)^2 #    A(2)^2   ah  <= 14
+  vh  = (8a0+4a1+2a2+ a3)^2 #  A(1/2)^2   ah  <= 14
+  vmh = (8a0-4a1+2a2- a3)^2 # A(-1/2)^2  -4<=ah<=9
+  vinf=               a3 ^2 #  A(inf)^2
+*/
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_sqr_basecase 1
+#define MAYBE_sqr_toom2   1
+#define MAYBE_sqr_toom4   1
+#else
+#define MAYBE_sqr_basecase						\
+  (SQR_TOOM4_THRESHOLD < 4 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_toom2							\
+  (SQR_TOOM4_THRESHOLD < 4 * SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_toom4							\
+  (SQR_TOOM6_THRESHOLD >= 4 * SQR_TOOM4_THRESHOLD)
+#endif
+
+#define TOOM4_SQR_REC(p, a, n, ws)					\
+  do {									\
+    if (MAYBE_sqr_basecase						\
+	&& BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
+      mpn_sqr_basecase (p, a, n);					\
+    else if (MAYBE_sqr_toom2						\
+	     && BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))		\
+      mpn_toom2_sqr (p, a, n, ws);					\
+    else if (! MAYBE_sqr_toom4						\
+	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))		\
+      mpn_toom3_sqr (p, a, n, ws);					\
+    else								\
+      mpn_toom4_sqr (p, a, n, ws);					\
+  } while (0)
+
+void
+mpn_toom4_sqr (mp_ptr pp,
+	       mp_srcptr ap, mp_size_t an,
+	       mp_ptr scratch)
+{
+  mp_size_t n, s;
+  mp_limb_t cy;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+#define a3  (ap + 3*n)
+
+  n = (an + 3) >> 2;
+
+  s = an - 3 * n;
+
+  ASSERT (0 < s && s <= n);
+
+  /* NOTE: The multiplications to v2, vm2, vh and vm1 overwrites the
+   * following limb, so these must be computed in order, and we need a
+   * one limb gap to tp. */
+#define v0    pp				/* 2n */
+#define v1    (pp + 2 * n)			/* 2n+1 */
+#define vinf  (pp + 6 * n)			/* s+t */
+#define v2    scratch				/* 2n+1 */
+#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
+#define vh    (scratch + 4 * n + 2)		/* 2n+1 */
+#define vm1   (scratch + 6 * n + 3)		/* 2n+1 */
+#define tp (scratch + 8*n + 5)
+
+  /* No overlap with v1 */
+#define apx   pp				/* n+1 */
+#define amx   (pp + 4*n + 2)			/* n+1 */
+
+  /* Total scratch need: 8*n + 5 + scratch for recursive calls. This
+     gives roughly 32 n/3 + log term. */
+
+  /* Compute apx = a0 + 2 a1 + 4 a2 + 8 a3 and amx = a0 - 2 a1 + 4 a2 - 8 a3.  */
+  mpn_toom_eval_dgr3_pm2 (apx, amx, ap, n, s, tp);
+
+  TOOM4_SQR_REC (v2, apx, n + 1, tp);	/* v2,  2n+1 limbs */
+  TOOM4_SQR_REC (vm2, amx, n + 1, tp);	/* vm2,  2n+1 limbs */
+
+  /* Compute apx = 8 a0 + 4 a1 + 2 a2 + a3 = (((2*a0 + a1) * 2 + a2) * 2 + a3 */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (apx, a1, a0, n);
+  cy = 2*cy + mpn_addlsh1_n (apx, a2, apx, n);
+  if (s < n)
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_addlsh1_n (apx, a3, apx, s);
+      apx[n] = 2*cy + mpn_lshift (apx + s, apx + s, n - s, 1);
+      MPN_INCR_U (apx + s, n+1-s, cy2);
+    }
+  else
+    apx[n] = 2*cy + mpn_addlsh1_n (apx, a3, apx, n);
+#else
+  cy = mpn_lshift (apx, a0, n, 1);
+  cy += mpn_add_n (apx, apx, a1, n);
+  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
+  cy += mpn_add_n (apx, apx, a2, n);
+  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
+  apx[n] = cy + mpn_add (apx, apx, n, a3, s);
+#endif
+
+  ASSERT (apx[n] < 15);
+
+  TOOM4_SQR_REC (vh, apx, n + 1, tp);	/* vh,  2n+1 limbs */
+
+  /* Compute apx = a0 + a1 + a2 + a3 and amx = a0 - a1 + a2 - a3.  */
+  mpn_toom_eval_dgr3_pm1 (apx, amx, ap, n, s, tp);
+
+  TOOM4_SQR_REC (v1, apx, n + 1, tp);	/* v1,  2n+1 limbs */
+  TOOM4_SQR_REC (vm1, amx, n + 1, tp);	/* vm1,  2n+1 limbs */
+
+  TOOM4_SQR_REC (v0, a0, n, tp);
+  TOOM4_SQR_REC (vinf, a3, s, tp);	/* vinf, 2s limbs */
+
+  mpn_toom_interpolate_7pts (pp, n, (enum toom7_flags) 0, vm2, vm1, v2, vh, 2*s, tp);
+}

diff --git a/third_party/gmp/mpn/generic/toom52_mul.c b/third_party/gmp/mpn/generic/toom52_mul.c
new file mode 100644
index 0000000..974059b
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom52_mul.c

@@ -0,0 +1,256 @@
+/* mpn_toom52_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3
+   times as large as bn.  Or more accurately, bn < an < 2 bn.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   The idea of applying toom to unbalanced multiplication is due to Marco
+   Bodrato and Alberto Zanoni.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -2, -1, 0, +1, +2, +inf
+
+  <-s-><--n--><--n--><--n--><--n-->
+   ___ ______ ______ ______ ______
+  |a4_|___a3_|___a2_|___a1_|___a0_|
+			|b1|___b0_|
+			<t-><--n-->
+
+  v0  =  a0                  * b0      #   A(0)*B(0)
+  v1  = (a0+ a1+ a2+ a3+  a4)*(b0+ b1) #   A(1)*B(1)      ah  <= 4   bh <= 1
+  vm1 = (a0- a1+ a2- a3+  a4)*(b0- b1) #  A(-1)*B(-1)    |ah| <= 2   bh  = 0
+  v2  = (a0+2a1+4a2+8a3+16a4)*(b0+2b1) #   A(2)*B(2)      ah  <= 30  bh <= 2
+  vm2 = (a0-2a1+4a2-8a3+16a4)*(b0-2b1) #  A(-2)*B(-2)    |ah| <= 20 |bh|<= 1
+  vinf=                   a4 *     b1  # A(inf)*B(inf)
+
+  Some slight optimization in evaluation are taken from the paper:
+  "Towards Optimal Toom-Cook Multiplication for Univariate and
+  Multivariate Polynomials in Characteristic 2 and 0."
+*/
+
+void
+mpn_toom52_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  enum toom6_flags flags;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2 * n)
+#define a3  (ap + 3 * n)
+#define a4  (ap + 4 * n)
+#define b0  bp
+#define b1  (bp + n)
+
+  n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1);
+
+  s = an - 4 * n;
+  t = bn - n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+
+  /* Ensures that 5 values of n+1 limbs each fits in the product area.
+     Borderline cases are an = 32, bn = 8, n = 7, and an = 36, bn = 9,
+     n = 8. */
+  ASSERT (s+t >= 5);
+
+#define v0    pp				/* 2n */
+#define vm1   (scratch)				/* 2n+1 */
+#define v1    (pp + 2 * n)			/* 2n+1 */
+#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
+#define v2    (scratch + 4 * n + 2)		/* 2n+1 */
+#define vinf  (pp + 5 * n)			/* s+t */
+#define bs1    pp				/* n+1 */
+#define bsm1  (scratch + 2 * n + 2)		/* n   */
+#define asm1  (scratch + 3 * n + 3)		/* n+1 */
+#define asm2  (scratch + 4 * n + 4)		/* n+1 */
+#define bsm2  (pp + n + 1)			/* n+1 */
+#define bs2   (pp + 2 * n + 2)			/* n+1 */
+#define as2   (pp + 3 * n + 3)			/* n+1 */
+#define as1   (pp + 4 * n + 4)			/* n+1 */
+
+  /* Scratch need is 6 * n + 3 + 1. We need one extra limb, because
+     products will overwrite 2n+2 limbs. */
+
+#define a0a2  scratch
+#define a1a3  asm1
+
+  /* Compute as2 and asm2.  */
+  flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, a1a3));
+
+  /* Compute bs1 and bsm1.  */
+  if (t == n)
+    {
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      mp_limb_t cy;
+
+      if (mpn_cmp (b0, b1, n) < 0)
+	{
+	  cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
+	  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
+	}
+      else
+	{
+	  cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
+	}
+      bs1[n] = cy >> 1;
+#else
+      bs1[n] = mpn_add_n (bs1, b0, b1, n);
+      if (mpn_cmp (b0, b1, n) < 0)
+	{
+	  mpn_sub_n (bsm1, b1, b0, n);
+	  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
+	}
+      else
+	{
+	  mpn_sub_n (bsm1, b0, b1, n);
+	}
+#endif
+    }
+  else
+    {
+      bs1[n] = mpn_add (bs1, b0, n, b1, t);
+      if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+	{
+	  mpn_sub_n (bsm1, b1, b0, t);
+	  MPN_ZERO (bsm1 + t, n - t);
+	  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
+	}
+      else
+	{
+	  mpn_sub (bsm1, b0, n, b1, t);
+	}
+    }
+
+  /* Compute bs2 and bsm2, recycling bs1 and bsm1. bs2=bs1+b1; bsm2=bsm1-b1  */
+  mpn_add (bs2, bs1, n+1, b1, t);
+  if (flags & toom6_vm1_neg)
+    {
+      bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
+      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
+    }
+  else
+    {
+      bsm2[n] = 0;
+      if (t == n)
+	{
+	  if (mpn_cmp (bsm1, b1, n) < 0)
+	    {
+	      mpn_sub_n (bsm2, b1, bsm1, n);
+	      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
+	    }
+	  else
+	    {
+	      mpn_sub_n (bsm2, bsm1, b1, n);
+	    }
+	}
+      else
+	{
+	  if (mpn_zero_p (bsm1 + t, n - t) && mpn_cmp (bsm1, b1, t) < 0)
+	    {
+	      mpn_sub_n (bsm2, b1, bsm1, t);
+	      MPN_ZERO (bsm2 + t, n - t);
+	      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
+	    }
+	  else
+	    {
+	      mpn_sub (bsm2, bsm1, n, b1, t);
+	    }
+	}
+    }
+
+  /* Compute as1 and asm1.  */
+  flags = (enum toom6_flags) (flags ^ (toom6_vm1_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, a0a2)));
+
+  ASSERT (as1[n] <= 4);
+  ASSERT (bs1[n] <= 1);
+  ASSERT (asm1[n] <= 2);
+/*   ASSERT (bsm1[n] <= 1); */
+  ASSERT (as2[n] <=30);
+  ASSERT (bs2[n] <= 2);
+  ASSERT (asm2[n] <= 20);
+  ASSERT (bsm2[n] <= 1);
+
+  /* vm1, 2n+1 limbs */
+  mpn_mul (vm1, asm1, n+1, bsm1, n);  /* W4 */
+
+  /* vm2, 2n+1 limbs */
+  mpn_mul_n (vm2, asm2, bsm2, n+1);  /* W2 */
+
+  /* v2, 2n+1 limbs */
+  mpn_mul_n (v2, as2, bs2, n+1);  /* W1 */
+
+  /* v1, 2n+1 limbs */
+  mpn_mul_n (v1, as1, bs1, n+1);  /* W3 */
+
+  /* vinf, s+t limbs */   /* W0 */
+  if (s > t)  mpn_mul (vinf, a4, s, b1, t);
+  else        mpn_mul (vinf, b1, t, a4, s);
+
+  /* v0, 2n limbs */
+  mpn_mul_n (v0, ap, bp, n);  /* W5 */
+
+  mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s);
+
+#undef v0
+#undef vm1
+#undef v1
+#undef vm2
+#undef v2
+#undef vinf
+#undef bs1
+#undef bs2
+#undef bsm1
+#undef bsm2
+#undef asm1
+#undef asm2
+#undef as1
+#undef as2
+#undef a0a2
+#undef b0b2
+#undef a1a3
+#undef a0
+#undef a1
+#undef a2
+#undef a3
+#undef b0
+#undef b1
+#undef b2
+
+}

diff --git a/third_party/gmp/mpn/generic/toom53_mul.c b/third_party/gmp/mpn/generic/toom53_mul.c
new file mode 100644
index 0000000..c934297
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom53_mul.c

@@ -0,0 +1,331 @@
+/* mpn_toom53_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 5/3
+   times as large as bn.  Or more accurately, (4/3)bn < an < (5/2)bn.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   The idea of applying toom to unbalanced multiplication is due to Marco
+   Bodrato and Alberto Zanoni.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2008, 2012, 2014, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: 0, +1, -1, +2, -2, 1/2, +inf
+
+  <-s-><--n--><--n--><--n--><--n-->
+   ___ ______ ______ ______ ______
+  |a4_|___a3_|___a2_|___a1_|___a0_|
+	       |__b2|___b1_|___b0_|
+	       <-t--><--n--><--n-->
+
+  v0  =    a0                  *  b0          #    A(0)*B(0)
+  v1  = (  a0+ a1+ a2+ a3+  a4)*( b0+ b1+ b2) #    A(1)*B(1)      ah  <= 4   bh <= 2
+  vm1 = (  a0- a1+ a2- a3+  a4)*( b0- b1+ b2) #   A(-1)*B(-1)    |ah| <= 2   bh <= 1
+  v2  = (  a0+2a1+4a2+8a3+16a4)*( b0+2b1+4b2) #    A(2)*B(2)      ah  <= 30  bh <= 6
+  vm2 = (  a0-2a1+4a2-8a3+16a4)*( b0-2b1+4b2) #    A(2)*B(2)     -9<=ah<=20 -1<=bh<=4
+  vh  = (16a0+8a1+4a2+2a3+  a4)*(4b0+2b1+ b2) #  A(1/2)*B(1/2)    ah  <= 30  bh <= 6
+  vinf=                     a4 *          b2  #  A(inf)*B(inf)
+*/
+
+void
+mpn_toom53_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn,
+		mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  mp_limb_t cy;
+  mp_ptr gp;
+  mp_ptr as1, asm1, as2, asm2, ash;
+  mp_ptr bs1, bsm1, bs2, bsm2, bsh;
+  mp_ptr tmp;
+  enum toom7_flags flags;
+  TMP_DECL;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+#define a3  (ap + 3*n)
+#define a4  (ap + 4*n)
+#define b0  bp
+#define b1  (bp + n)
+#define b2  (bp + 2*n)
+
+  n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);
+
+  s = an - 4 * n;
+  t = bn - 2 * n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+
+  TMP_MARK;
+
+  tmp = TMP_ALLOC_LIMBS (10 * (n + 1));
+  as1  = tmp; tmp += n + 1;
+  asm1 = tmp; tmp += n + 1;
+  as2  = tmp; tmp += n + 1;
+  asm2 = tmp; tmp += n + 1;
+  ash  = tmp; tmp += n + 1;
+  bs1  = tmp; tmp += n + 1;
+  bsm1 = tmp; tmp += n + 1;
+  bs2  = tmp; tmp += n + 1;
+  bsm2 = tmp; tmp += n + 1;
+  bsh  = tmp; tmp += n + 1;
+
+  gp = pp;
+
+  /* Compute as1 and asm1.  */
+  flags = (enum toom7_flags) (toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, gp));
+
+  /* Compute as2 and asm2. */
+  flags = (enum toom7_flags) (flags | (toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, gp)));
+
+  /* Compute ash = 16 a0 + 8 a1 + 4 a2 + 2 a3 + a4
+     = 2*(2*(2*(2*a0 + a1) + a2) + a3) + a4  */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (ash, a1, a0, n);
+  cy = 2*cy + mpn_addlsh1_n (ash, a2, ash, n);
+  cy = 2*cy + mpn_addlsh1_n (ash, a3, ash, n);
+  if (s < n)
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_addlsh1_n (ash, a4, ash, s);
+      ash[n] = 2*cy + mpn_lshift (ash + s, ash + s, n - s, 1);
+      MPN_INCR_U (ash + s, n+1-s, cy2);
+    }
+  else
+    ash[n] = 2*cy + mpn_addlsh1_n (ash, a4, ash, n);
+#else
+  cy = mpn_lshift (ash, a0, n, 1);
+  cy += mpn_add_n (ash, ash, a1, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  cy += mpn_add_n (ash, ash, a2, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  cy += mpn_add_n (ash, ash, a3, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  ash[n] = cy + mpn_add (ash, ash, n, a4, s);
+#endif
+
+  /* Compute bs1 and bsm1.  */
+  bs1[n] = mpn_add (bs1, b0, n, b2, t);		/* b0 + b2 */
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0)
+    {
+      bs1[n] = mpn_add_n_sub_n (bs1, bsm1, b1, bs1, n) >> 1;
+      bsm1[n] = 0;
+      flags = (enum toom7_flags) (flags ^ toom7_w3_neg);
+    }
+  else
+    {
+      cy = mpn_add_n_sub_n (bs1, bsm1, bs1, b1, n);
+      bsm1[n] = bs1[n] - (cy & 1);
+      bs1[n] += (cy >> 1);
+    }
+#else
+  if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0)
+    {
+      mpn_sub_n (bsm1, b1, bs1, n);
+      bsm1[n] = 0;
+      flags = (enum toom7_flags) (flags ^ toom7_w3_neg);
+    }
+  else
+    {
+      bsm1[n] = bs1[n] - mpn_sub_n (bsm1, bs1, b1, n);
+    }
+  bs1[n] += mpn_add_n (bs1, bs1, b1, n);  /* b0+b1+b2 */
+#endif
+
+  /* Compute bs2 and bsm2. */
+#if HAVE_NATIVE_mpn_addlsh_n || HAVE_NATIVE_mpn_addlsh2_n
+#if HAVE_NATIVE_mpn_addlsh2_n
+  cy = mpn_addlsh2_n (bs2, b0, b2, t);
+#else /* HAVE_NATIVE_mpn_addlsh_n */
+  cy = mpn_addlsh_n (bs2, b0, b2, t, 2);
+#endif
+  if (t < n)
+    cy = mpn_add_1 (bs2 + t, b0 + t, n - t, cy);
+  bs2[n] = cy;
+#else
+  cy = mpn_lshift (gp, b2, t, 2);
+  bs2[n] = mpn_add (bs2, b0, n, gp, t);
+  MPN_INCR_U (bs2 + t, n+1-t, cy);
+#endif
+
+  gp[n] = mpn_lshift (gp, b1, n, 1);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (mpn_cmp (bs2, gp, n+1) < 0)
+    {
+      ASSERT_NOCARRY (mpn_add_n_sub_n (bs2, bsm2, gp, bs2, n+1));
+      flags = (enum toom7_flags) (flags ^ toom7_w1_neg);
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_add_n_sub_n (bs2, bsm2, bs2, gp, n+1));
+    }
+#else
+  if (mpn_cmp (bs2, gp, n+1) < 0)
+    {
+      ASSERT_NOCARRY (mpn_sub_n (bsm2, gp, bs2, n+1));
+      flags = (enum toom7_flags) (flags ^ toom7_w1_neg);
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_sub_n (bsm2, bs2, gp, n+1));
+    }
+  mpn_add_n (bs2, bs2, gp, n+1);
+#endif
+
+  /* Compute bsh = 4 b0 + 2 b1 + b2 = 2*(2*b0 + b1)+b2.  */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (bsh, b1, b0, n);
+  if (t < n)
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_addlsh1_n (bsh, b2, bsh, t);
+      bsh[n] = 2*cy + mpn_lshift (bsh + t, bsh + t, n - t, 1);
+      MPN_INCR_U (bsh + t, n+1-t, cy2);
+    }
+  else
+    bsh[n] = 2*cy + mpn_addlsh1_n (bsh, b2, bsh, n);
+#else
+  cy = mpn_lshift (bsh, b0, n, 1);
+  cy += mpn_add_n (bsh, bsh, b1, n);
+  cy = 2*cy + mpn_lshift (bsh, bsh, n, 1);
+  bsh[n] = cy + mpn_add (bsh, bsh, n, b2, t);
+#endif
+
+  ASSERT (as1[n] <= 4);
+  ASSERT (bs1[n] <= 2);
+  ASSERT (asm1[n] <= 2);
+  ASSERT (bsm1[n] <= 1);
+  ASSERT (as2[n] <= 30);
+  ASSERT (bs2[n] <= 6);
+  ASSERT (asm2[n] <= 20);
+  ASSERT (bsm2[n] <= 4);
+  ASSERT (ash[n] <= 30);
+  ASSERT (bsh[n] <= 6);
+
+#define v0    pp				/* 2n */
+#define v1    (pp + 2 * n)			/* 2n+1 */
+#define vinf  (pp + 6 * n)			/* s+t */
+#define v2    scratch				/* 2n+1 */
+#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
+#define vh    (scratch + 4 * n + 2)		/* 2n+1 */
+#define vm1   (scratch + 6 * n + 3)		/* 2n+1 */
+#define scratch_out (scratch + 8 * n + 4)		/* 2n+1 */
+  /* Total scratch need: 10*n+5 */
+
+  /* Must be in allocation order, as they overwrite one limb beyond
+   * 2n+1. */
+  mpn_mul_n (v2, as2, bs2, n + 1);		/* v2, 2n+1 limbs */
+  mpn_mul_n (vm2, asm2, bsm2, n + 1);		/* vm2, 2n+1 limbs */
+  mpn_mul_n (vh, ash, bsh, n + 1);		/* vh, 2n+1 limbs */
+
+  /* vm1, 2n+1 limbs */
+#ifdef SMALLER_RECURSION
+  mpn_mul_n (vm1, asm1, bsm1, n);
+  if (asm1[n] == 1)
+    {
+      cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
+    }
+  else if (asm1[n] == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+      cy = 2 * bsm1[n] + mpn_addlsh1_n_ip1 (vm1 + n, bsm1, n);
+#else
+      cy = 2 * bsm1[n] + mpn_addmul_1 (vm1 + n, bsm1, n, CNST_LIMB(2));
+#endif
+    }
+  else
+    cy = 0;
+  if (bsm1[n] != 0)
+    cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
+  vm1[2 * n] = cy;
+#else /* SMALLER_RECURSION */
+  vm1[2 * n] = 0;
+  mpn_mul_n (vm1, asm1, bsm1, n + ((asm1[n] | bsm1[n]) != 0));
+#endif /* SMALLER_RECURSION */
+
+  /* v1, 2n+1 limbs */
+#ifdef SMALLER_RECURSION
+  mpn_mul_n (v1, as1, bs1, n);
+  if (as1[n] == 1)
+    {
+      cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
+    }
+  else if (as1[n] == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+      cy = 2 * bs1[n] + mpn_addlsh1_n_ip1 (v1 + n, bs1, n);
+#else
+      cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
+#endif
+    }
+  else if (as1[n] != 0)
+    {
+      cy = as1[n] * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, as1[n]);
+    }
+  else
+    cy = 0;
+  if (bs1[n] == 1)
+    {
+      cy += mpn_add_n (v1 + n, v1 + n, as1, n);
+    }
+  else if (bs1[n] == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+      cy += mpn_addlsh1_n_ip1 (v1 + n, as1, n);
+#else
+      cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
+#endif
+    }
+  v1[2 * n] = cy;
+#else /* SMALLER_RECURSION */
+  v1[2 * n] = 0;
+  mpn_mul_n (v1, as1, bs1, n + ((as1[n] | bs1[n]) != 0));
+#endif /* SMALLER_RECURSION */
+
+  mpn_mul_n (v0, a0, b0, n);			/* v0, 2n limbs */
+
+  /* vinf, s+t limbs */
+  if (s > t)  mpn_mul (vinf, a4, s, b2, t);
+  else        mpn_mul (vinf, b2, t, a4, s);
+
+  mpn_toom_interpolate_7pts (pp, n, flags, vm2, vm1, v2, vh, s + t,
+			     scratch_out);
+
+  TMP_FREE;
+}

diff --git a/third_party/gmp/mpn/generic/toom54_mul.c b/third_party/gmp/mpn/generic/toom54_mul.c
new file mode 100644
index 0000000..343b02e
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom54_mul.c

@@ -0,0 +1,142 @@
+/* Implementation of the algorithm for Toom-Cook 4.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Toom-4.5, the splitting 5x4 unbalanced version.
+   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
+
+  <--s-><--n--><--n--><--n--><--n-->
+   ____ ______ ______ ______ ______
+  |_a4_|__a3__|__a2__|__a1__|__a0__|
+	  |b3_|__b2__|__b1__|__b0__|
+	  <-t-><--n--><--n--><--n-->
+
+*/
+#define TOOM_54_MUL_N_REC(p, a, b, n, ws)		\
+  do {	mpn_mul_n (p, a, b, n);				\
+  } while (0)
+
+#define TOOM_54_MUL_REC(p, a, na, b, nb, ws)		\
+  do {	mpn_mul (p, a, na, b, nb);			\
+  } while (0)
+
+void
+mpn_toom54_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  int sign;
+
+  /***************************** decomposition *******************************/
+#define a4  (ap + 4 * n)
+#define b3  (bp + 3 * n)
+
+  ASSERT (an >= bn);
+  n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4);
+
+  s = an - 4 * n;
+  t = bn - 3 * n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+  /* Required by mpn_toom_interpolate_8pts. */
+  ASSERT ( s + t >= n );
+  ASSERT ( s + t > 4);
+  ASSERT ( n > 2);
+
+#define   r8    pp				/* 2n   */
+#define   r7    scratch				/* 3n+1 */
+#define   r5    (pp + 3*n)			/* 3n+1 */
+#define   v0    (pp + 3*n)			/* n+1 */
+#define   v1    (pp + 4*n+1)			/* n+1 */
+#define   v2    (pp + 5*n+2)			/* n+1 */
+#define   v3    (pp + 6*n+3)			/* n+1 */
+#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
+#define   r1    (pp + 7*n)			/* s+t <= 2*n */
+#define   ws    (scratch + 6 * n + 2)		/* ??? */
+
+  /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
+     need all of them, when DO_mpn_sublsh_n usea a scratch  */
+  /********************** evaluation and recursive calls *********************/
+  /* $\pm4$ */
+  sign = mpn_toom_eval_pm2exp (v2, v0, 4, ap, n, s, 2, pp)
+       ^ mpn_toom_eval_pm2exp (v3, v1, 3, bp, n, t, 2, pp);
+  TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
+  TOOM_54_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
+  mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
+
+  /* $\pm1$ */
+  sign = mpn_toom_eval_pm1 (v2, v0, 4, ap, n, s,    pp)
+       ^ mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
+  TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
+  TOOM_54_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
+  mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
+
+  /* $\pm2$ */
+  sign = mpn_toom_eval_pm2 (v2, v0, 4, ap, n, s, pp)
+       ^ mpn_toom_eval_dgr3_pm2 (v3, v1, bp, n, t, pp);
+  TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
+  TOOM_54_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
+  mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
+
+  /* A(0)*B(0) */
+  TOOM_54_MUL_N_REC(pp, ap, bp, n, ws);
+
+  /* Infinity */
+  if (s > t) {
+    TOOM_54_MUL_REC(r1, a4, s, b3, t, ws);
+  } else {
+    TOOM_54_MUL_REC(r1, b3, t, a4, s, ws);
+  };
+
+  mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
+
+#undef a4
+#undef b3
+#undef r1
+#undef r3
+#undef r5
+#undef v0
+#undef v1
+#undef v2
+#undef v3
+#undef r7
+#undef r8
+#undef ws
+}

diff --git a/third_party/gmp/mpn/generic/toom62_mul.c b/third_party/gmp/mpn/generic/toom62_mul.c
new file mode 100644
index 0000000..d971cc0
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom62_mul.c

@@ -0,0 +1,310 @@
+/* mpn_toom62_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 3 times
+   as large as bn.  Or more accurately, (5/2)bn < an < 6bn.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   The idea of applying toom to unbalanced multiplication is due to Marco
+   Bodrato and Alberto Zanoni.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2008, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in:
+   0, +1, -1, +2, -2, 1/2, +inf
+
+  <-s-><--n--><--n--><--n--><--n--><--n-->
+   ___ ______ ______ ______ ______ ______
+  |a5_|___a4_|___a3_|___a2_|___a1_|___a0_|
+			     |_b1_|___b0_|
+			     <-t--><--n-->
+
+  v0  =    a0                       *   b0      #    A(0)*B(0)
+  v1  = (  a0+  a1+ a2+ a3+  a4+  a5)*( b0+ b1) #    A(1)*B(1)      ah  <= 5   bh <= 1
+  vm1 = (  a0-  a1+ a2- a3+  a4-  a5)*( b0- b1) #   A(-1)*B(-1)    |ah| <= 2   bh  = 0
+  v2  = (  a0+ 2a1+4a2+8a3+16a4+32a5)*( b0+2b1) #    A(2)*B(2)      ah  <= 62  bh <= 2
+  vm2 = (  a0- 2a1+4a2-8a3+16a4-32a5)*( b0-2b1) #   A(-2)*B(-2)    -41<=ah<=20 -1<=bh<=0
+  vh  = (32a0+16a1+8a2+4a3+ 2a4+  a5)*(2b0+ b1) #  A(1/2)*B(1/2)    ah  <= 62  bh <= 2
+  vinf=                           a5 *      b1  #  A(inf)*B(inf)
+*/
+
+void
+mpn_toom62_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn,
+		mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  mp_limb_t cy;
+  mp_ptr as1, asm1, as2, asm2, ash;
+  mp_ptr bs1, bsm1, bs2, bsm2, bsh;
+  mp_ptr gp;
+  enum toom7_flags aflags, bflags;
+  TMP_DECL;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+#define a3  (ap + 3*n)
+#define a4  (ap + 4*n)
+#define a5  (ap + 5*n)
+#define b0  bp
+#define b1  (bp + n)
+
+  n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1);
+
+  s = an - 5 * n;
+  t = bn - n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+
+  TMP_MARK;
+
+  as1 = TMP_SALLOC_LIMBS (n + 1);
+  asm1 = TMP_SALLOC_LIMBS (n + 1);
+  as2 = TMP_SALLOC_LIMBS (n + 1);
+  asm2 = TMP_SALLOC_LIMBS (n + 1);
+  ash = TMP_SALLOC_LIMBS (n + 1);
+
+  bs1 = TMP_SALLOC_LIMBS (n + 1);
+  bsm1 = TMP_SALLOC_LIMBS (n);
+  bs2 = TMP_SALLOC_LIMBS (n + 1);
+  bsm2 = TMP_SALLOC_LIMBS (n + 1);
+  bsh = TMP_SALLOC_LIMBS (n + 1);
+
+  gp = pp;
+
+  /* Compute as1 and asm1.  */
+  aflags = (enum toom7_flags) (toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 5, ap, n, s, gp));
+
+  /* Compute as2 and asm2. */
+  aflags = (enum toom7_flags) (aflags | (toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 5, ap, n, s, gp)));
+
+  /* Compute ash = 32 a0 + 16 a1 + 8 a2 + 4 a3 + 2 a4 + a5
+     = 2*(2*(2*(2*(2*a0 + a1) + a2) + a3) + a4) + a5  */
+
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (ash, a1, a0, n);
+  cy = 2*cy + mpn_addlsh1_n (ash, a2, ash, n);
+  cy = 2*cy + mpn_addlsh1_n (ash, a3, ash, n);
+  cy = 2*cy + mpn_addlsh1_n (ash, a4, ash, n);
+  if (s < n)
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_addlsh1_n (ash, a5, ash, s);
+      ash[n] = 2*cy + mpn_lshift (ash + s, ash + s, n - s, 1);
+      MPN_INCR_U (ash + s, n+1-s, cy2);
+    }
+  else
+    ash[n] = 2*cy + mpn_addlsh1_n (ash, a5, ash, n);
+#else
+  cy = mpn_lshift (ash, a0, n, 1);
+  cy += mpn_add_n (ash, ash, a1, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  cy += mpn_add_n (ash, ash, a2, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  cy += mpn_add_n (ash, ash, a3, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  cy += mpn_add_n (ash, ash, a4, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  ash[n] = cy + mpn_add (ash, ash, n, a5, s);
+#endif
+
+  /* Compute bs1 and bsm1.  */
+  if (t == n)
+    {
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      if (mpn_cmp (b0, b1, n) < 0)
+	{
+	  cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
+	  bflags = toom7_w3_neg;
+	}
+      else
+	{
+	  cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
+	  bflags = (enum toom7_flags) 0;
+	}
+      bs1[n] = cy >> 1;
+#else
+      bs1[n] = mpn_add_n (bs1, b0, b1, n);
+      if (mpn_cmp (b0, b1, n) < 0)
+	{
+	  mpn_sub_n (bsm1, b1, b0, n);
+	  bflags = toom7_w3_neg;
+	}
+      else
+	{
+	  mpn_sub_n (bsm1, b0, b1, n);
+	  bflags = (enum toom7_flags) 0;
+	}
+#endif
+    }
+  else
+    {
+      bs1[n] = mpn_add (bs1, b0, n, b1, t);
+      if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+	{
+	  mpn_sub_n (bsm1, b1, b0, t);
+	  MPN_ZERO (bsm1 + t, n - t);
+	  bflags = toom7_w3_neg;
+	}
+      else
+	{
+	  mpn_sub (bsm1, b0, n, b1, t);
+	  bflags = (enum toom7_flags) 0;
+	}
+    }
+
+  /* Compute bs2 and bsm2. Recycling bs1 and bsm1; bs2=bs1+b1, bsm2 =
+     bsm1 - b1 */
+  mpn_add (bs2, bs1, n + 1, b1, t);
+  if (bflags & toom7_w3_neg)
+    {
+      bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
+      bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
+    }
+  else
+    {
+      /* FIXME: Simplify this logic? */
+      if (t < n)
+	{
+	  if (mpn_zero_p (bsm1 + t, n - t) && mpn_cmp (bsm1, b1, t) < 0)
+	    {
+	      ASSERT_NOCARRY (mpn_sub_n (bsm2, b1, bsm1, t));
+	      MPN_ZERO (bsm2 + t, n + 1 - t);
+	      bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
+	    }
+	  else
+	    {
+	      ASSERT_NOCARRY (mpn_sub (bsm2, bsm1, n, b1, t));
+	      bsm2[n] = 0;
+	    }
+	}
+      else
+	{
+	  if (mpn_cmp (bsm1, b1, n) < 0)
+	    {
+	      ASSERT_NOCARRY (mpn_sub_n (bsm2, b1, bsm1, n));
+	      bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
+	    }
+	  else
+	    {
+	      ASSERT_NOCARRY (mpn_sub_n (bsm2, bsm1, b1, n));
+	    }
+	  bsm2[n] = 0;
+	}
+    }
+
+  /* Compute bsh, recycling bs1. bsh=bs1+b0;  */
+  bsh[n] = bs1[n] + mpn_add_n (bsh, bs1, b0, n);
+
+  ASSERT (as1[n] <= 5);
+  ASSERT (bs1[n] <= 1);
+  ASSERT (asm1[n] <= 2);
+  ASSERT (as2[n] <= 62);
+  ASSERT (bs2[n] <= 2);
+  ASSERT (asm2[n] <= 41);
+  ASSERT (bsm2[n] <= 1);
+  ASSERT (ash[n] <= 62);
+  ASSERT (bsh[n] <= 2);
+
+#define v0    pp				/* 2n */
+#define v1    (pp + 2 * n)			/* 2n+1 */
+#define vinf  (pp + 6 * n)			/* s+t */
+#define v2    scratch				/* 2n+1 */
+#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
+#define vh    (scratch + 4 * n + 2)		/* 2n+1 */
+#define vm1   (scratch + 6 * n + 3)		/* 2n+1 */
+#define scratch_out (scratch + 8 * n + 4)		/* 2n+1 */
+  /* Total scratch need: 10*n+5 */
+
+  /* Must be in allocation order, as they overwrite one limb beyond
+   * 2n+1. */
+  mpn_mul_n (v2, as2, bs2, n + 1);		/* v2, 2n+1 limbs */
+  mpn_mul_n (vm2, asm2, bsm2, n + 1);		/* vm2, 2n+1 limbs */
+  mpn_mul_n (vh, ash, bsh, n + 1);		/* vh, 2n+1 limbs */
+
+  /* vm1, 2n+1 limbs */
+  mpn_mul_n (vm1, asm1, bsm1, n);
+  cy = 0;
+  if (asm1[n] == 1)
+    {
+      cy = mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
+    }
+  else if (asm1[n] == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n
+      cy = mpn_addlsh1_n (vm1 + n, vm1 + n, bsm1, n);
+#else
+      cy = mpn_addmul_1 (vm1 + n, bsm1, n, CNST_LIMB(2));
+#endif
+    }
+  vm1[2 * n] = cy;
+
+  /* v1, 2n+1 limbs */
+  mpn_mul_n (v1, as1, bs1, n);
+  if (as1[n] == 1)
+    {
+      cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
+    }
+  else if (as1[n] == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n
+      cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n);
+#else
+      cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
+#endif
+    }
+  else if (as1[n] != 0)
+    {
+      cy = as1[n] * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, as1[n]);
+    }
+  else
+    cy = 0;
+  if (bs1[n] != 0)
+    cy += mpn_add_n (v1 + n, v1 + n, as1, n);
+  v1[2 * n] = cy;
+
+  mpn_mul_n (v0, a0, b0, n);			/* v0, 2n limbs */
+
+  /* vinf, s+t limbs */
+  if (s > t)  mpn_mul (vinf, a5, s, b1, t);
+  else        mpn_mul (vinf, b1, t, a5, s);
+
+  mpn_toom_interpolate_7pts (pp, n, (enum toom7_flags) (aflags ^ bflags),
+			     vm2, vm1, v2, vh, s + t, scratch_out);
+
+  TMP_FREE;
+}

diff --git a/third_party/gmp/mpn/generic/toom63_mul.c b/third_party/gmp/mpn/generic/toom63_mul.c
new file mode 100644
index 0000000..181996d
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom63_mul.c

@@ -0,0 +1,231 @@
+/* Implementation of the algorithm for Toom-Cook 4.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Stores |{ap,n}-{bp,n}| in {rp,n}, returns the sign. */
+static int
+abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  mp_limb_t  x, y;
+  while (--n >= 0)
+    {
+      x = ap[n];
+      y = bp[n];
+      if (x != y)
+	{
+	  n++;
+	  if (x > y)
+	    {
+	      mpn_sub_n (rp, ap, bp, n);
+	      return 0;
+	    }
+	  else
+	    {
+	      mpn_sub_n (rp, bp, ap, n);
+	      return ~0;
+	    }
+	}
+      rp[n] = 0;
+    }
+  return 0;
+}
+
+static int
+abs_sub_add_n (mp_ptr rm, mp_ptr rp, mp_srcptr rs, mp_size_t n) {
+  int result;
+  result = abs_sub_n (rm, rp, rs, n);
+  ASSERT_NOCARRY(mpn_add_n (rp, rp, rs, n));
+  return result;
+}
+
+
+/* Toom-4.5, the splitting 6x3 unbalanced version.
+   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
+
+  <--s-><--n--><--n--><--n--><--n--><--n-->
+   ____ ______ ______ ______ ______ ______
+  |_a5_|__a4__|__a3__|__a2__|__a1__|__a0__|
+			|b2_|__b1__|__b0__|
+			<-t-><--n--><--n-->
+
+*/
+#define TOOM_63_MUL_N_REC(p, a, b, n, ws)		\
+  do {	mpn_mul_n (p, a, b, n);				\
+  } while (0)
+
+#define TOOM_63_MUL_REC(p, a, na, b, nb, ws)		\
+  do {	mpn_mul (p, a, na, b, nb);			\
+  } while (0)
+
+void
+mpn_toom63_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  mp_limb_t cy;
+  int sign;
+
+  /***************************** decomposition *******************************/
+#define a5  (ap + 5 * n)
+#define b0  (bp + 0 * n)
+#define b1  (bp + 1 * n)
+#define b2  (bp + 2 * n)
+
+  ASSERT (an >= bn);
+  n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
+
+  s = an - 5 * n;
+  t = bn - 2 * n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+  /* WARNING! it assumes s+t>=n */
+  ASSERT ( s + t >= n );
+  ASSERT ( s + t > 4);
+  /* WARNING! it assumes n>1 */
+  ASSERT ( n > 2);
+
+#define   r8    pp				/* 2n   */
+#define   r7    scratch				/* 3n+1 */
+#define   r5    (pp + 3*n)			/* 3n+1 */
+#define   v0    (pp + 3*n)			/* n+1 */
+#define   v1    (pp + 4*n+1)			/* n+1 */
+#define   v2    (pp + 5*n+2)			/* n+1 */
+#define   v3    (pp + 6*n+3)			/* n+1 */
+#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
+#define   r1    (pp + 7*n)			/* s+t <= 2*n */
+#define   ws    (scratch + 6 * n + 2)		/* ??? */
+
+  /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
+     need all of them, when DO_mpn_sublsh_n usea a scratch  */
+/*   if (scratch == NULL) scratch = TMP_SALLOC_LIMBS (9 * n + 3); */
+
+  /********************** evaluation and recursive calls *********************/
+  /* $\pm4$ */
+  sign = mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
+  pp[n] = mpn_lshift (pp, b1, n, 2); /* 4b1 */
+  /* FIXME: use addlsh */
+  v3[t] = mpn_lshift (v3, b2, t, 4);/* 16b2 */
+  if ( n == t )
+    v3[n]+= mpn_add_n (v3, v3, b0, n); /* 16b2+b0 */
+  else
+    v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 16b2+b0 */
+  sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
+  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
+  TOOM_63_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
+  mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
+
+  /* $\pm1$ */
+  sign = mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
+  /* Compute bs1 and bsm1. Code taken from toom33 */
+  cy = mpn_add (ws, b0, n, b2, t);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
+    {
+      cy = mpn_add_n_sub_n (v3, v1, b1, ws, n);
+      v3[n] = cy >> 1;
+      v1[n] = 0;
+      sign = ~sign;
+    }
+  else
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_add_n_sub_n (v3, v1, ws, b1, n);
+      v3[n] = cy + (cy2 >> 1);
+      v1[n] = cy - (cy2 & 1);
+    }
+#else
+  v3[n] = cy + mpn_add_n (v3, ws, b1, n);
+  if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
+    {
+      mpn_sub_n (v1, b1, ws, n);
+      v1[n] = 0;
+      sign = ~sign;
+    }
+  else
+    {
+      cy -= mpn_sub_n (v1, ws, b1, n);
+      v1[n] = cy;
+    }
+#endif
+  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
+  TOOM_63_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
+  mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
+
+  /* $\pm2$ */
+  sign = mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
+  pp[n] = mpn_lshift (pp, b1, n, 1); /* 2b1 */
+  /* FIXME: use addlsh or addlsh2 */
+  v3[t] = mpn_lshift (v3, b2, t, 2);/* 4b2 */
+  if ( n == t )
+    v3[n]+= mpn_add_n (v3, v3, b0, n); /* 4b2+b0 */
+  else
+    v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 4b2+b0 */
+  sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
+  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
+  TOOM_63_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
+  mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
+
+  /* A(0)*B(0) */
+  TOOM_63_MUL_N_REC(pp, ap, bp, n, ws);
+
+  /* Infinity */
+  if (s > t) {
+    TOOM_63_MUL_REC(r1, a5, s, b2, t, ws);
+  } else {
+    TOOM_63_MUL_REC(r1, b2, t, a5, s, ws);
+  };
+
+  mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
+
+#undef a5
+#undef b0
+#undef b1
+#undef b2
+#undef r1
+#undef r3
+#undef r5
+#undef v0
+#undef v1
+#undef v2
+#undef v3
+#undef r7
+#undef r8
+#undef ws
+}

diff --git a/third_party/gmp/mpn/generic/toom6_sqr.c b/third_party/gmp/mpn/generic/toom6_sqr.c
new file mode 100644
index 0000000..336eef9
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom6_sqr.c

@@ -0,0 +1,181 @@
+/* Implementation of the squaring algorithm with Toom-Cook 6.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+
+#if GMP_NUMB_BITS < 21
+#error Not implemented.
+#endif
+
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_sqr_basecase 1
+#define MAYBE_sqr_above_basecase   1
+#define MAYBE_sqr_toom2   1
+#define MAYBE_sqr_above_toom2   1
+#define MAYBE_sqr_toom3   1
+#define MAYBE_sqr_above_toom3   1
+#define MAYBE_sqr_above_toom4   1
+#else
+#ifdef  SQR_TOOM8_THRESHOLD
+#define SQR_TOOM6_MAX ((SQR_TOOM8_THRESHOLD+6*2-1+5)/6)
+#else
+#define SQR_TOOM6_MAX					\
+  ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (6*2-1+5)) ?	\
+   ((SQR_FFT_THRESHOLD+6*2-1+5)/6)			\
+   : MP_SIZE_T_MAX )
+#endif
+#define MAYBE_sqr_basecase					\
+  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_above_basecase				\
+  (SQR_TOOM6_MAX >=  SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_toom2						\
+  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_above_toom2					\
+  (SQR_TOOM6_MAX >= SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_toom3						\
+  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_above_toom3					\
+  (SQR_TOOM6_MAX >= SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_above_toom4					\
+  (SQR_TOOM6_MAX >= SQR_TOOM6_THRESHOLD)
+#endif
+
+#define TOOM6_SQR_REC(p, a, n, ws)					\
+  do {									\
+    if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase		\
+	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)))			\
+      mpn_sqr_basecase (p, a, n);					\
+    else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2		\
+	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)))		\
+      mpn_toom2_sqr (p, a, n, ws);					\
+    else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3		\
+	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD)))		\
+      mpn_toom3_sqr (p, a, n, ws);					\
+    else if (! MAYBE_sqr_above_toom4					\
+	     || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))		\
+      mpn_toom4_sqr (p, a, n, ws);					\
+    else								\
+      mpn_toom6_sqr (p, a, n, ws);					\
+  } while (0)
+
+void
+mpn_toom6_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
+{
+  mp_size_t n, s;
+
+  /***************************** decomposition *******************************/
+
+  ASSERT( an >= 18 );
+
+  n = 1 + (an - 1) / (size_t) 6;
+
+  s = an - 5 * n;
+
+  ASSERT (0 < s && s <= n);
+
+#define   r4    (pp + 3 * n)			/* 3n+1 */
+#define   r2    (pp + 7 * n)			/* 3n+1 */
+#define   r0    (pp +11 * n)			/* s+t <= 2*n */
+#define   r5    (scratch)			/* 3n+1 */
+#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
+#define   r1    (scratch + 6 * n + 2)		/* 3n+1 */
+#define   v0    (pp + 7 * n)			/* n+1 */
+#define   v2    (pp + 9 * n+2)			/* n+1 */
+#define   wse   (scratch + 9 * n + 3)		/* 3n+1 */
+
+  /* Alloc also 3n+1 limbs for ws... toom_interpolate_12pts may
+     need all of them, when DO_mpn_sublsh_n usea a scratch  */
+/*   if (scratch== NULL) */
+/*     scratch = TMP_SALLOC_LIMBS (12 * n + 6); */
+
+  /********************** evaluation and recursive calls *********************/
+  /* $\pm1/2$ */
+  mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 1, pp);
+  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
+  TOOM6_SQR_REC(r5, v2, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
+  mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 1, 0);
+
+  /* $\pm1$ */
+  mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
+  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1)*B(-1) */
+  TOOM6_SQR_REC(r3, v2, n + 1, wse); /* A(1)*B(1) */
+  mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 0, 0);
+
+  /* $\pm4$ */
+  mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
+  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-4)*B(-4) */
+  TOOM6_SQR_REC(r1, v2, n + 1, wse); /* A(+4)*B(+4) */
+  mpn_toom_couple_handling (r1, 2 * n + 1, pp, 0, n, 2, 4);
+
+  /* $\pm1/4$ */
+  mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 2, pp);
+  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
+  TOOM6_SQR_REC(r4, v2, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
+  mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 2, 0);
+
+  /* $\pm2$ */
+  mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
+  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-2)*B(-2) */
+  TOOM6_SQR_REC(r2, v2, n + 1, wse); /* A(+2)*B(+2) */
+  mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 1, 2);
+
+#undef v0
+#undef v2
+
+  /* A(0)*B(0) */
+  TOOM6_SQR_REC(pp, ap, n, wse);
+
+  mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, 2 * s, 0, wse);
+
+#undef r0
+#undef r1
+#undef r2
+#undef r3
+#undef r4
+#undef r5
+
+}
+#undef TOOM6_SQR_REC
+#undef MAYBE_sqr_basecase
+#undef MAYBE_sqr_above_basecase
+#undef MAYBE_sqr_toom2
+#undef MAYBE_sqr_above_toom2
+#undef MAYBE_sqr_toom3
+#undef MAYBE_sqr_above_toom3
+#undef MAYBE_sqr_above_toom4

diff --git a/third_party/gmp/mpn/generic/toom6h_mul.c b/third_party/gmp/mpn/generic/toom6h_mul.c
new file mode 100644
index 0000000..637f2a5
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom6h_mul.c

@@ -0,0 +1,262 @@
+/* Implementation of the multiplication algorithm for Toom-Cook 6.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+
+#if GMP_NUMB_BITS < 21
+#error Not implemented.
+#endif
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_mul_basecase 1
+#define MAYBE_mul_toom22   1
+#define MAYBE_mul_toom33   1
+#define MAYBE_mul_toom6h   1
+#else
+#define MAYBE_mul_basecase						\
+  (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM22_THRESHOLD)
+#define MAYBE_mul_toom22						\
+  (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM33_THRESHOLD)
+#define MAYBE_mul_toom33						\
+  (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM44_THRESHOLD)
+#define MAYBE_mul_toom6h						\
+  (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD)
+#endif
+
+#define TOOM6H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)			\
+  do {									\
+    if (MAYBE_mul_basecase						\
+	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {			\
+      mpn_mul_basecase (p, a, n, b, n);					\
+      if (f)								\
+	mpn_mul_basecase (p2, a2, n, b2, n);				\
+    } else if (MAYBE_mul_toom22						\
+	       && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {		\
+      mpn_toom22_mul (p, a, n, b, n, ws);				\
+      if (f)								\
+	mpn_toom22_mul (p2, a2, n, b2, n, ws);				\
+    } else if (MAYBE_mul_toom33						\
+	       && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {		\
+      mpn_toom33_mul (p, a, n, b, n, ws);				\
+      if (f)								\
+	mpn_toom33_mul (p2, a2, n, b2, n, ws);				\
+    } else if (! MAYBE_mul_toom6h					\
+	       || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {		\
+      mpn_toom44_mul (p, a, n, b, n, ws);				\
+      if (f)								\
+	mpn_toom44_mul (p2, a2, n, b2, n, ws);				\
+    } else {								\
+      mpn_toom6h_mul (p, a, n, b, n, ws);				\
+      if (f)								\
+	mpn_toom6h_mul (p2, a2, n, b2, n, ws);				\
+    }									\
+  } while (0)
+
+#define TOOM6H_MUL_REC(p, a, na, b, nb, ws)		\
+  do { mpn_mul (p, a, na, b, nb);			\
+  } while (0)
+
+/* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
+   With: an >= bn >= 46, an*6 <  bn * 17.
+   It _may_ work with bn<=46 and bn*17 < an*6 < bn*18
+
+   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0.
+*/
+/* Estimate on needed scratch:
+   S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
+   since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
+ */
+
+void
+mpn_toom6h_mul   (mp_ptr pp,
+		  mp_srcptr ap, mp_size_t an,
+		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  int p, q, half;
+  int sign;
+
+  /***************************** decomposition *******************************/
+
+  ASSERT (an >= bn);
+  /* Can not handle too much unbalancement */
+  ASSERT (bn >= 42);
+  /* Can not handle too much unbalancement */
+  ASSERT ((an*3 <  bn * 8) || (bn >= 46 && an * 6 <  bn * 17));
+
+  /* Limit num/den is a rational number between
+     (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1))             */
+#define LIMIT_numerator (18)
+#define LIMIT_denominat (17)
+
+  if (LIKELY (an * LIMIT_denominat < LIMIT_numerator * bn)) /* is 6*... < 6*... */
+    {
+      n = 1 + (an - 1) / (size_t) 6;
+      p = q = 5;
+      half = 0;
+
+      s = an - 5 * n;
+      t = bn - 5 * n;
+    }
+  else {
+    if (an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn)
+      { p = 7; q = 6; }
+    else if (an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn)
+      { p = 7; q = 5; }
+    else if (an * LIMIT_numerator < LIMIT_denominat * 2 * bn)  /* is 4*... < 8*... */
+      { p = 8; q = 5; }
+    else if (an * LIMIT_denominat < LIMIT_numerator * 2 * bn)  /* is 4*... < 8*... */
+      { p = 8; q = 4; }
+    else
+      { p = 9; q = 4; }
+
+    half = (p ^ q) & 1;
+    n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
+    p--; q--;
+
+    s = an - p * n;
+    t = bn - q * n;
+
+    /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
+    if (half) { /* Recover from badly chosen splitting */
+      if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
+      else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
+    }
+  }
+#undef LIMIT_numerator
+#undef LIMIT_denominat
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+  ASSERT (half || s + t > 3);
+  ASSERT (n > 2);
+
+#define   r4    (pp + 3 * n)			/* 3n+1 */
+#define   r2    (pp + 7 * n)			/* 3n+1 */
+#define   r0    (pp +11 * n)			/* s+t <= 2*n */
+#define   r5    (scratch)			/* 3n+1 */
+#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
+#define   r1    (scratch + 6 * n + 2)		/* 3n+1 */
+#define   v0    (pp + 7 * n)			/* n+1 */
+#define   v1    (pp + 8 * n+1)			/* n+1 */
+#define   v2    (pp + 9 * n+2)			/* n+1 */
+#define   v3    (scratch + 9 * n + 3)		/* n+1 */
+#define   wsi   (scratch + 9 * n + 3)		/* 3n+1 */
+#define   wse   (scratch +10 * n + 4)		/* 2n+1 */
+
+  /* Alloc also 3n+1 limbs for wsi... toom_interpolate_12pts may
+     need all of them  */
+/*   if (scratch == NULL) */
+/*     scratch = TMP_SALLOC_LIMBS(mpn_toom6_sqr_itch(n * 6)); */
+  ASSERT (12 * n + 6 <= mpn_toom6h_mul_itch(an,bn));
+  ASSERT (12 * n + 6 <= mpn_toom6_sqr_itch(n * 6));
+
+  /********************** evaluation and recursive calls *********************/
+  /* $\pm1/2$ */
+  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
+	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
+  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
+  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half);
+
+  /* $\pm1$ */
+  sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
+  if (UNLIKELY (q == 3))
+    sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
+  else
+    sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
+  /* A(-1)*B(-1) */ /* A(1)*B(1) */
+  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0);
+
+  /* $\pm4$ */
+  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
+	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
+  /* A(-4)*B(-4) */
+  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
+  mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4);
+
+  /* $\pm1/4$ */
+  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
+	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
+  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
+  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
+
+  /* $\pm2$ */
+  sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
+	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
+  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
+  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2);
+
+#undef v0
+#undef v1
+#undef v2
+#undef v3
+#undef wse
+
+  /* A(0)*B(0) */
+  TOOM6H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
+
+  /* Infinity */
+  if (UNLIKELY (half != 0)) {
+    if (s > t) {
+      TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
+    } else {
+      TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
+    };
+  };
+
+  mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, s+t, half, wsi);
+
+#undef r0
+#undef r1
+#undef r2
+#undef r3
+#undef r4
+#undef r5
+#undef wsi
+}
+
+#undef TOOM6H_MUL_N_REC
+#undef TOOM6H_MUL_REC
+#undef MAYBE_mul_basecase
+#undef MAYBE_mul_toom22
+#undef MAYBE_mul_toom33
+#undef MAYBE_mul_toom6h

diff --git a/third_party/gmp/mpn/generic/toom8_sqr.c b/third_party/gmp/mpn/generic/toom8_sqr.c
new file mode 100644
index 0000000..03e5c64
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom8_sqr.c

@@ -0,0 +1,225 @@
+/* Implementation of the squaring algorithm with Toom-Cook 8.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+#if GMP_NUMB_BITS < 29
+#error Not implemented.
+#endif
+
+#if GMP_NUMB_BITS < 43
+#define BIT_CORRECTION 1
+#define CORRECTION_BITS GMP_NUMB_BITS
+#else
+#define BIT_CORRECTION 0
+#define CORRECTION_BITS 0
+#endif
+
+#ifndef SQR_TOOM8_THRESHOLD
+#define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
+#endif
+
+#ifndef SQR_TOOM6_THRESHOLD
+#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
+#endif
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_sqr_basecase 1
+#define MAYBE_sqr_above_basecase   1
+#define MAYBE_sqr_toom2   1
+#define MAYBE_sqr_above_toom2   1
+#define MAYBE_sqr_toom3   1
+#define MAYBE_sqr_above_toom3   1
+#define MAYBE_sqr_toom4   1
+#define MAYBE_sqr_above_toom4   1
+#define MAYBE_sqr_above_toom6   1
+#else
+#define SQR_TOOM8_MAX					\
+  ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (8*2-1+7)) ?	\
+   ((SQR_FFT_THRESHOLD+8*2-1+7)/8)			\
+   : MP_SIZE_T_MAX )
+#define MAYBE_sqr_basecase					\
+  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_above_basecase				\
+  (SQR_TOOM8_MAX >= SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_toom2						\
+  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_above_toom2					\
+  (SQR_TOOM8_MAX >= SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_toom3						\
+  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_above_toom3					\
+  (SQR_TOOM8_MAX >= SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_toom4						\
+  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM6_THRESHOLD)
+#define MAYBE_sqr_above_toom4					\
+  (SQR_TOOM8_MAX >= SQR_TOOM6_THRESHOLD)
+#define MAYBE_sqr_above_toom6					\
+  (SQR_TOOM8_MAX >= SQR_TOOM8_THRESHOLD)
+#endif
+
+#define TOOM8_SQR_REC(p, a, f, p2, a2, n, ws)				\
+  do {									\
+    if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase		\
+	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) {			\
+      mpn_sqr_basecase (p, a, n);					\
+      if (f) mpn_sqr_basecase (p2, a2, n);				\
+    } else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2		\
+	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) {		\
+      mpn_toom2_sqr (p, a, n, ws);					\
+      if (f) mpn_toom2_sqr (p2, a2, n, ws);				\
+    } else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3		\
+	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) {		\
+      mpn_toom3_sqr (p, a, n, ws);					\
+      if (f) mpn_toom3_sqr (p2, a2, n, ws);				\
+    } else if (MAYBE_sqr_toom4 && ( !MAYBE_sqr_above_toom4		\
+	     || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))) {		\
+      mpn_toom4_sqr (p, a, n, ws);					\
+      if (f) mpn_toom4_sqr (p2, a2, n, ws);				\
+    } else if (! MAYBE_sqr_above_toom6					\
+	     || BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD)) {		\
+      mpn_toom6_sqr (p, a, n, ws);					\
+      if (f) mpn_toom6_sqr (p2, a2, n, ws);				\
+    } else {								\
+      mpn_toom8_sqr (p, a, n, ws);					\
+      if (f) mpn_toom8_sqr (p2, a2, n, ws);				\
+    }									\
+  } while (0)
+
+void
+mpn_toom8_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
+{
+  mp_size_t n, s;
+
+  /***************************** decomposition *******************************/
+
+  ASSERT ( an >= 40 );
+
+  n = 1 + ((an - 1)>>3);
+
+  s = an - 7 * n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT ( s + s > 3 );
+
+#define   r6    (pp + 3 * n)			/* 3n+1 */
+#define   r4    (pp + 7 * n)			/* 3n+1 */
+#define   r2    (pp +11 * n)			/* 3n+1 */
+#define   r0    (pp +15 * n)			/* s+t <= 2*n */
+#define   r7    (scratch)			/* 3n+1 */
+#define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
+#define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
+#define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
+#define   v0    (pp +11 * n)			/* n+1 */
+#define   v2    (pp +13 * n+2)			/* n+1 */
+#define   wse   (scratch +12 * n + 4)		/* 3n+1 */
+
+  /* Alloc also 3n+1 limbs for ws... toom_interpolate_16pts may
+     need all of them, when DO_mpn_sublsh_n usea a scratch  */
+/*   if (scratch == NULL) */
+/*     scratch = TMP_SALLOC_LIMBS (30 * n + 6); */
+
+  /********************** evaluation and recursive calls *********************/
+  /* $\pm1/8$ */
+  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 3, pp);
+  /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
+  TOOM8_SQR_REC(pp, v0, 2, r7, v2, n + 1, wse);
+  mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 0);
+
+  /* $\pm1/4$ */
+  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 2, pp);
+  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
+  TOOM8_SQR_REC(pp, v0, 2, r5, v2, n + 1, wse);
+  mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 2, 0);
+
+  /* $\pm2$ */
+  mpn_toom_eval_pm2 (v2, v0, 7, ap, n, s, pp);
+  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
+  TOOM8_SQR_REC(pp, v0, 2, r3, v2, n + 1, wse);
+  mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 1, 2);
+
+  /* $\pm8$ */
+  mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 3, pp);
+  /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
+  TOOM8_SQR_REC(pp, v0, 2, r1, v2, n + 1, wse);
+  mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 6);
+
+  /* $\pm1/2$ */
+  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 1, pp);
+  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
+  TOOM8_SQR_REC(pp, v0, 2, r6, v2, n + 1, wse);
+  mpn_toom_couple_handling (r6, 2 * n + 1, pp, 0, n, 1, 0);
+
+  /* $\pm1$ */
+  mpn_toom_eval_pm1 (v2, v0, 7, ap, n, s,    pp);
+  /* A(-1)*B(-1) */ /* A(1)*B(1) */
+  TOOM8_SQR_REC(pp, v0, 2, r4, v2, n + 1, wse);
+  mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 0, 0);
+
+  /* $\pm4$ */
+  mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 2, pp);
+  /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
+  TOOM8_SQR_REC(pp, v0, 2, r2, v2, n + 1, wse);
+  mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 2, 4);
+
+#undef v0
+#undef v2
+
+  /* A(0)*B(0) */
+  TOOM8_SQR_REC(pp, ap, 0, pp, ap, n, wse);
+
+  mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, 2 * s, 0, wse);
+
+#undef r0
+#undef r1
+#undef r2
+#undef r3
+#undef r4
+#undef r5
+#undef r6
+#undef wse
+
+}
+
+#undef TOOM8_SQR_REC
+#undef MAYBE_sqr_basecase
+#undef MAYBE_sqr_above_basecase
+#undef MAYBE_sqr_toom2
+#undef MAYBE_sqr_above_toom2
+#undef MAYBE_sqr_toom3
+#undef MAYBE_sqr_above_toom3
+#undef MAYBE_sqr_above_toom4

diff --git a/third_party/gmp/mpn/generic/toom8h_mul.c b/third_party/gmp/mpn/generic/toom8h_mul.c
new file mode 100644
index 0000000..5ba259a
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom8h_mul.c

@@ -0,0 +1,305 @@
+/* Implementation of the multiplication algorithm for Toom-Cook 8.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+
+#if GMP_NUMB_BITS < 29
+#error Not implemented.
+#endif
+
+#if GMP_NUMB_BITS < 43
+#define BIT_CORRECTION 1
+#define CORRECTION_BITS GMP_NUMB_BITS
+#else
+#define BIT_CORRECTION 0
+#define CORRECTION_BITS 0
+#endif
+
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_mul_basecase 1
+#define MAYBE_mul_toom22   1
+#define MAYBE_mul_toom33   1
+#define MAYBE_mul_toom44   1
+#define MAYBE_mul_toom8h   1
+#else
+#define MAYBE_mul_basecase						\
+  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD)
+#define MAYBE_mul_toom22						\
+  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD)
+#define MAYBE_mul_toom33						\
+  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD)
+#define MAYBE_mul_toom44						\
+  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD)
+#define MAYBE_mul_toom8h						\
+  (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
+#endif
+
+#define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)			\
+  do {									\
+    if (MAYBE_mul_basecase						\
+	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {			\
+      mpn_mul_basecase (p, a, n, b, n);					\
+      if (f) mpn_mul_basecase (p2, a2, n, b2, n);			\
+    } else if (MAYBE_mul_toom22						\
+	     && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {		\
+      mpn_toom22_mul (p, a, n, b, n, ws);				\
+      if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws);			\
+    } else if (MAYBE_mul_toom33						\
+	     && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {		\
+      mpn_toom33_mul (p, a, n, b, n, ws);				\
+      if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws);			\
+    } else if (MAYBE_mul_toom44						\
+	     && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {		\
+      mpn_toom44_mul (p, a, n, b, n, ws);				\
+      if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws);			\
+    } else if (! MAYBE_mul_toom8h					\
+	     || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) {		\
+      mpn_toom6h_mul (p, a, n, b, n, ws);				\
+      if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws);			\
+    } else {								\
+      mpn_toom8h_mul (p, a, n, b, n, ws);				\
+      if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws);			\
+    }									\
+  } while (0)
+
+#define TOOM8H_MUL_REC(p, a, na, b, nb, ws)		\
+  do { mpn_mul (p, a, na, b, nb); } while (0)
+
+/* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
+   With: an >= bn >= 86, an*5 <  bn * 11.
+   It _may_ work with bn<=?? and bn*?? < an*? < bn*??
+
+   Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0.
+*/
+/* Estimate on needed scratch:
+   S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8),
+   since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6
+ */
+
+void
+mpn_toom8h_mul   (mp_ptr pp,
+		  mp_srcptr ap, mp_size_t an,
+		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  int p, q, half;
+  int sign;
+
+  /***************************** decomposition *******************************/
+
+  ASSERT (an >= bn);
+  /* Can not handle too small operands */
+  ASSERT (bn >= 86);
+  /* Can not handle too much unbalancement */
+  ASSERT (an <= bn*4);
+  ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11);
+  ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2);
+  ASSERT (GMP_NUMB_BITS >  9*3 || an*2 <= bn* 3);
+
+  /* Limit num/den is a rational number between
+     (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1))             */
+#define LIMIT_numerator (21)
+#define LIMIT_denominat (20)
+
+  if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */
+    {
+      half = 0;
+      n = 1 + ((an - 1)>>3);
+      p = q = 7;
+      s = an - 7 * n;
+      t = bn - 7 * n;
+    }
+  else
+    {
+      if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */
+	{ p = 9; q = 8; }
+      else if (GMP_NUMB_BITS <= 9*3 ||
+	       an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1))
+	{ p = 9; q = 7; }
+      else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */
+	{ p =10; q = 7; }
+      else if (GMP_NUMB_BITS <= 10*3 ||
+	       an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn)
+	{ p =10; q = 6; }
+      else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/
+	{ p =11; q = 6; }
+      else if (GMP_NUMB_BITS <= 11*3 ||
+	       an * 4 < 9 * bn)
+	{ p =11; q = 5; }
+      else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn)  /* is 4*... <12*... */
+	{ p =12; q = 5; }
+      else if (GMP_NUMB_BITS <= 12*3 ||
+	       an * 9 < 28 * bn )  /* is 4*... <12*... */
+	{ p =12; q = 4; }
+      else
+	{ p =13; q = 4; }
+
+      half = (p+q)&1;
+      n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
+      p--; q--;
+
+      s = an - p * n;
+      t = bn - q * n;
+
+      if(half) { /* Recover from badly chosen splitting */
+	if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
+	else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
+      }
+    }
+#undef LIMIT_numerator
+#undef LIMIT_denominat
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+  ASSERT (half || s + t > 3);
+  ASSERT (n > 2);
+
+#define   r6    (pp + 3 * n)			/* 3n+1 */
+#define   r4    (pp + 7 * n)			/* 3n+1 */
+#define   r2    (pp +11 * n)			/* 3n+1 */
+#define   r0    (pp +15 * n)			/* s+t <= 2*n */
+#define   r7    (scratch)			/* 3n+1 */
+#define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
+#define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
+#define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
+#define   v0    (pp +11 * n)			/* n+1 */
+#define   v1    (pp +12 * n+1)			/* n+1 */
+#define   v2    (pp +13 * n+2)			/* n+1 */
+#define   v3    (scratch +12 * n + 4)		/* n+1 */
+#define   wsi   (scratch +12 * n + 4)		/* 3n+1 */
+#define   wse   (scratch +13 * n + 5)		/* 2n+1 */
+
+  /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may
+     need all of them  */
+/*   if (scratch == NULL) */
+/*     scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */
+  ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn));
+  ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8));
+
+  /********************** evaluation and recursive calls *********************/
+
+  /* $\pm1/8$ */
+  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
+	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
+  /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
+
+  /* $\pm1/4$ */
+  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
+	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
+  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
+
+  /* $\pm2$ */
+  sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
+	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
+  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
+
+  /* $\pm8$ */
+  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
+	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
+  /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
+
+  /* $\pm1/2$ */
+  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
+	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
+  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
+
+  /* $\pm1$ */
+  sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
+  if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3))
+    sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
+  else
+    sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
+  /* A(-1)*B(-1) */ /* A(1)*B(1) */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
+
+  /* $\pm4$ */
+  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
+	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
+  /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
+
+#undef v0
+#undef v1
+#undef v2
+#undef v3
+#undef wse
+
+  /* A(0)*B(0) */
+  TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
+
+  /* Infinity */
+  if (UNLIKELY (half != 0)) {
+    if (s > t) {
+      TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
+    } else {
+      TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
+    };
+  };
+
+  mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi);
+
+#undef r0
+#undef r1
+#undef r2
+#undef r3
+#undef r4
+#undef r5
+#undef r6
+#undef wsi
+}
+
+#undef TOOM8H_MUL_N_REC
+#undef TOOM8H_MUL_REC
+#undef MAYBE_mul_basecase
+#undef MAYBE_mul_toom22
+#undef MAYBE_mul_toom33
+#undef MAYBE_mul_toom44
+#undef MAYBE_mul_toom8h

diff --git a/third_party/gmp/mpn/generic/toom_couple_handling.c b/third_party/gmp/mpn/generic/toom_couple_handling.c
new file mode 100644
index 0000000..cd253f7
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom_couple_handling.c

@@ -0,0 +1,80 @@
+/* Helper function for high degree Toom-Cook algorithms.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Gets {pp,n} and (sign?-1:1)*{np,n}. Computes at once:
+     {pp,n} <- ({pp,n}+{np,n})/2^{ps+1}
+     {pn,n} <- ({pp,n}-{np,n})/2^{ns+1}
+   Finally recompose them obtaining:
+     {pp,n+off} <- {pp,n}+{np,n}*2^{off*GMP_NUMB_BITS}
+*/
+void
+mpn_toom_couple_handling (mp_ptr pp, mp_size_t n, mp_ptr np,
+			  int nsign, mp_size_t off, int ps, int ns)
+{
+  if (nsign) {
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+    mpn_rsh1sub_n (np, pp, np, n);
+#else
+    mpn_sub_n (np, pp, np, n);
+    mpn_rshift (np, np, n, 1);
+#endif
+  } else {
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+    mpn_rsh1add_n (np, pp, np, n);
+#else
+    mpn_add_n (np, pp, np, n);
+    mpn_rshift (np, np, n, 1);
+#endif
+  }
+
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+  if (ps == 1)
+    mpn_rsh1sub_n (pp, pp, np, n);
+  else
+#endif
+  {
+    mpn_sub_n (pp, pp, np, n);
+    if (ps > 0)
+      mpn_rshift (pp, pp, n, ps);
+  }
+  if (ns > 0)
+    mpn_rshift (np, np, n, ns);
+  pp[n] = mpn_add_n (pp+off, pp+off, np, n-off);
+  ASSERT_NOCARRY (mpn_add_1(pp+n, np+n-off, off, pp[n]) );
+}

diff --git a/third_party/gmp/mpn/generic/toom_eval_dgr3_pm1.c b/third_party/gmp/mpn/generic/toom_eval_dgr3_pm1.c
new file mode 100644
index 0000000..5f491b6
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom_eval_dgr3_pm1.c

@@ -0,0 +1,72 @@
+/* mpn_toom_eval_dgr3_pm1 -- Evaluate a degree 3 polynomial in +1 and -1
+
+   Contributed to the GNU project by Niels Möller
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+int
+mpn_toom_eval_dgr3_pm1 (mp_ptr xp1, mp_ptr xm1,
+			mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)
+{
+  int neg;
+
+  ASSERT (x3n > 0);
+  ASSERT (x3n <= n);
+
+  xp1[n] = mpn_add_n (xp1, xp, xp + 2*n, n);
+  tp[n] = mpn_add (tp, xp + n, n, xp + 3*n, x3n);
+
+  neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (neg)
+    mpn_add_n_sub_n (xp1, xm1, tp, xp1, n + 1);
+  else
+    mpn_add_n_sub_n (xp1, xm1, xp1, tp, n + 1);
+#else
+  if (neg)
+    mpn_sub_n (xm1, tp, xp1, n + 1);
+  else
+    mpn_sub_n (xm1, xp1, tp, n + 1);
+
+  mpn_add_n (xp1, xp1, tp, n + 1);
+#endif
+
+  ASSERT (xp1[n] <= 3);
+  ASSERT (xm1[n] <= 1);
+
+  return neg;
+}

diff --git a/third_party/gmp/mpn/generic/toom_eval_dgr3_pm2.c b/third_party/gmp/mpn/generic/toom_eval_dgr3_pm2.c
new file mode 100644
index 0000000..55e6b89
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom_eval_dgr3_pm2.c

@@ -0,0 +1,97 @@
+/* mpn_toom_eval_dgr3_pm2 -- Evaluate a degree 3 polynomial in +2 and -2
+
+   Contributed to the GNU project by Niels Möller
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Needs n+1 limbs of temporary storage. */
+int
+mpn_toom_eval_dgr3_pm2 (mp_ptr xp2, mp_ptr xm2,
+			mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)
+{
+  mp_limb_t cy;
+  int neg;
+
+  ASSERT (x3n > 0);
+  ASSERT (x3n <= n);
+
+  /* (x0 + 4 * x2) +/- (2 x1 + 8 x_3) */
+#if HAVE_NATIVE_mpn_addlsh_n || HAVE_NATIVE_mpn_addlsh2_n
+#if HAVE_NATIVE_mpn_addlsh2_n
+  xp2[n] = mpn_addlsh2_n (xp2, xp, xp + 2*n, n);
+
+  cy = mpn_addlsh2_n (tp, xp + n, xp + 3*n, x3n);
+#else /* HAVE_NATIVE_mpn_addlsh_n */
+  xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2);
+
+  cy = mpn_addlsh_n (tp, xp + n, xp + 3*n, x3n, 2);
+#endif
+  if (x3n < n)
+    cy = mpn_add_1 (tp + x3n, xp + n + x3n, n - x3n, cy);
+  tp[n] = cy;
+#else
+  cy = mpn_lshift (tp, xp + 2*n, n, 2);
+  xp2[n] = cy + mpn_add_n (xp2, tp, xp, n);
+
+  tp[x3n] = mpn_lshift (tp, xp + 3*n, x3n, 2);
+  if (x3n < n)
+    tp[n] = mpn_add (tp, xp + n, n, tp, x3n + 1);
+  else
+    tp[n] += mpn_add_n (tp, xp + n, tp, n);
+#endif
+  mpn_lshift (tp, tp, n+1, 1);
+
+  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (neg)
+    mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
+  else
+    mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
+#else
+  if (neg)
+    mpn_sub_n (xm2, tp, xp2, n + 1);
+  else
+    mpn_sub_n (xm2, xp2, tp, n + 1);
+
+  mpn_add_n (xp2, xp2, tp, n + 1);
+#endif
+
+  ASSERT (xp2[n] < 15);
+  ASSERT (xm2[n] < 10);
+
+  return neg;
+}

diff --git a/third_party/gmp/mpn/generic/toom_eval_pm1.c b/third_party/gmp/mpn/generic/toom_eval_pm1.c
new file mode 100644
index 0000000..a8cfa93
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom_eval_pm1.c

@@ -0,0 +1,89 @@
+/* mpn_toom_eval_pm1 -- Evaluate a polynomial in +1 and -1
+
+   Contributed to the GNU project by Niels Möller
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluates a polynomial of degree k > 3, in the points +1 and -1. */
+int
+mpn_toom_eval_pm1 (mp_ptr xp1, mp_ptr xm1, unsigned k,
+		   mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
+{
+  unsigned i;
+  int neg;
+
+  ASSERT (k >= 4);
+
+  ASSERT (hn > 0);
+  ASSERT (hn <= n);
+
+  /* The degree k is also the number of full-size coefficients, so
+   * that last coefficient, of size hn, starts at xp + k*n. */
+
+  xp1[n] = mpn_add_n (xp1, xp, xp + 2*n, n);
+  for (i = 4; i < k; i += 2)
+    ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+i*n, n));
+
+  tp[n] = mpn_add_n (tp, xp + n, xp + 3*n, n);
+  for (i = 5; i < k; i += 2)
+    ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+i*n, n));
+
+  if (k & 1)
+    ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+k*n, hn));
+  else
+    ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+k*n, hn));
+
+  neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (neg)
+    mpn_add_n_sub_n (xp1, xm1, tp, xp1, n + 1);
+  else
+    mpn_add_n_sub_n (xp1, xm1, xp1, tp, n + 1);
+#else
+  if (neg)
+    mpn_sub_n (xm1, tp, xp1, n + 1);
+  else
+    mpn_sub_n (xm1, xp1, tp, n + 1);
+
+  mpn_add_n (xp1, xp1, tp, n + 1);
+#endif
+
+  ASSERT (xp1[n] <= k);
+  ASSERT (xm1[n] <= k/2 + 1);
+
+  return neg;
+}

diff --git a/third_party/gmp/mpn/generic/toom_eval_pm2.c b/third_party/gmp/mpn/generic/toom_eval_pm2.c
new file mode 100644
index 0000000..be682c7
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom_eval_pm2.c

@@ -0,0 +1,130 @@
+/* mpn_toom_eval_pm2 -- Evaluate a polynomial in +2 and -2
+
+   Contributed to the GNU project by Niels Möller and Marco Bodrato
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* DO_addlsh2(d,a,b,n,cy) computes cy,{d,n} <- {a,n} + 4*(cy,{b,n}), it
+   can be used as DO_addlsh2(d,a,d,n,d[n]), for accumulation on {d,n+1}. */
+#if HAVE_NATIVE_mpn_addlsh2_n
+#define DO_addlsh2(d, a, b, n, cy)	\
+do {					\
+  (cy) <<= 2;				\
+  (cy) += mpn_addlsh2_n(d, a, b, n);	\
+} while (0)
+#else
+#if HAVE_NATIVE_mpn_addlsh_n
+#define DO_addlsh2(d, a, b, n, cy)	\
+do {					\
+  (cy) <<= 2;				\
+  (cy) += mpn_addlsh_n(d, a, b, n, 2);	\
+} while (0)
+#else
+/* The following is not a general substitute for addlsh2.
+   It is correct if d == b, but it is not if d == a.  */
+#define DO_addlsh2(d, a, b, n, cy)	\
+do {					\
+  (cy) <<= 2;				\
+  (cy) += mpn_lshift(d, b, n, 2);	\
+  (cy) += mpn_add_n(d, d, a, n);	\
+} while (0)
+#endif
+#endif
+
+/* Evaluates a polynomial of degree 2 < k < GMP_NUMB_BITS, in the
+   points +2 and -2. */
+int
+mpn_toom_eval_pm2 (mp_ptr xp2, mp_ptr xm2, unsigned k,
+		   mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
+{
+  int i;
+  int neg;
+  mp_limb_t cy;
+
+  ASSERT (k >= 3);
+  ASSERT (k < GMP_NUMB_BITS);
+
+  ASSERT (hn > 0);
+  ASSERT (hn <= n);
+
+  /* The degree k is also the number of full-size coefficients, so
+   * that last coefficient, of size hn, starts at xp + k*n. */
+
+  cy = 0;
+  DO_addlsh2 (xp2, xp + (k-2) * n, xp + k * n, hn, cy);
+  if (hn != n)
+    cy = mpn_add_1 (xp2 + hn, xp + (k-2) * n + hn, n - hn, cy);
+  for (i = k - 4; i >= 0; i -= 2)
+    DO_addlsh2 (xp2, xp + i * n, xp2, n, cy);
+  xp2[n] = cy;
+
+  k--;
+
+  cy = 0;
+  DO_addlsh2 (tp, xp + (k-2) * n, xp + k * n, n, cy);
+  for (i = k - 4; i >= 0; i -= 2)
+    DO_addlsh2 (tp, xp + i * n, tp, n, cy);
+  tp[n] = cy;
+
+  if (k & 1)
+    ASSERT_NOCARRY(mpn_lshift (tp , tp , n + 1, 1));
+  else
+    ASSERT_NOCARRY(mpn_lshift (xp2, xp2, n + 1, 1));
+
+  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (neg)
+    mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
+  else
+    mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
+#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
+  if (neg)
+    mpn_sub_n (xm2, tp, xp2, n + 1);
+  else
+    mpn_sub_n (xm2, xp2, tp, n + 1);
+
+  mpn_add_n (xp2, xp2, tp, n + 1);
+#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */
+
+  ASSERT (xp2[n] < (1<<(k+2))-1);
+  ASSERT (xm2[n] < ((1<<(k+3))-1 - (1^k&1))/3);
+
+  neg ^= ((k & 1) - 1);
+
+  return neg;
+}
+
+#undef DO_addlsh2

diff --git a/third_party/gmp/mpn/generic/toom_eval_pm2exp.c b/third_party/gmp/mpn/generic/toom_eval_pm2exp.c
new file mode 100644
index 0000000..c3c4651
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom_eval_pm2exp.c

@@ -0,0 +1,127 @@
+/* mpn_toom_eval_pm2exp -- Evaluate a polynomial in +2^k and -2^k
+
+   Contributed to the GNU project by Niels Möller
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluates a polynomial of degree k > 2, in the points +2^shift and -2^shift. */
+int
+mpn_toom_eval_pm2exp (mp_ptr xp2, mp_ptr xm2, unsigned k,
+		      mp_srcptr xp, mp_size_t n, mp_size_t hn, unsigned shift,
+		      mp_ptr tp)
+{
+  unsigned i;
+  int neg;
+#if HAVE_NATIVE_mpn_addlsh_n
+  mp_limb_t cy;
+#endif
+
+  ASSERT (k >= 3);
+  ASSERT (shift*k < GMP_NUMB_BITS);
+
+  ASSERT (hn > 0);
+  ASSERT (hn <= n);
+
+  /* The degree k is also the number of full-size coefficients, so
+   * that last coefficient, of size hn, starts at xp + k*n. */
+
+#if HAVE_NATIVE_mpn_addlsh_n
+  xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2*shift);
+  for (i = 4; i < k; i += 2)
+    xp2[n] += mpn_addlsh_n (xp2, xp2, xp + i*n, n, i*shift);
+
+  tp[n] = mpn_lshift (tp, xp+n, n, shift);
+  for (i = 3; i < k; i+= 2)
+    tp[n] += mpn_addlsh_n (tp, tp, xp+i*n, n, i*shift);
+
+  if (k & 1)
+    {
+      cy = mpn_addlsh_n (tp, tp, xp+k*n, hn, k*shift);
+      MPN_INCR_U (tp + hn, n+1 - hn, cy);
+    }
+  else
+    {
+      cy = mpn_addlsh_n (xp2, xp2, xp+k*n, hn, k*shift);
+      MPN_INCR_U (xp2 + hn, n+1 - hn, cy);
+    }
+
+#else /* !HAVE_NATIVE_mpn_addlsh_n */
+  xp2[n] = mpn_lshift (tp, xp+2*n, n, 2*shift);
+  xp2[n] += mpn_add_n (xp2, xp, tp, n);
+  for (i = 4; i < k; i += 2)
+    {
+      xp2[n] += mpn_lshift (tp, xp + i*n, n, i*shift);
+      xp2[n] += mpn_add_n (xp2, xp2, tp, n);
+    }
+
+  tp[n] = mpn_lshift (tp, xp+n, n, shift);
+  for (i = 3; i < k; i+= 2)
+    {
+      tp[n] += mpn_lshift (xm2, xp + i*n, n, i*shift);
+      tp[n] += mpn_add_n (tp, tp, xm2, n);
+    }
+
+  xm2[hn] = mpn_lshift (xm2, xp + k*n, hn, k*shift);
+  if (k & 1)
+    mpn_add (tp, tp, n+1, xm2, hn+1);
+  else
+    mpn_add (xp2, xp2, n+1, xm2, hn+1);
+#endif /* !HAVE_NATIVE_mpn_addlsh_n */
+
+  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (neg)
+    mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
+  else
+    mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
+#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
+  if (neg)
+    mpn_sub_n (xm2, tp, xp2, n + 1);
+  else
+    mpn_sub_n (xm2, xp2, tp, n + 1);
+
+  mpn_add_n (xp2, xp2, tp, n + 1);
+#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */
+
+  /* FIXME: the following asserts are useless if (k+1)*shift >= GMP_LIMB_BITS */
+  ASSERT ((k+1)*shift >= GMP_LIMB_BITS ||
+	  xp2[n] < ((CNST_LIMB(1)<<((k+1)*shift))-1)/((CNST_LIMB(1)<<shift)-1));
+  ASSERT ((k+2)*shift >= GMP_LIMB_BITS ||
+	  xm2[n] < ((CNST_LIMB(1)<<((k+2)*shift))-((k&1)?(CNST_LIMB(1)<<shift):1))/((CNST_LIMB(1)<<(2*shift))-1));
+
+  return neg;
+}

diff --git a/third_party/gmp/mpn/generic/toom_eval_pm2rexp.c b/third_party/gmp/mpn/generic/toom_eval_pm2rexp.c
new file mode 100644
index 0000000..6cd62fb
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom_eval_pm2rexp.c

@@ -0,0 +1,101 @@
+/* mpn_toom_eval_pm2rexp -- Evaluate a polynomial in +2^-k and -2^-k
+
+   Contributed to the GNU project by Marco Bodrato
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+#if HAVE_NATIVE_mpn_addlsh_n
+#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+  mp_limb_t __cy;
+  __cy = mpn_lshift(ws,src,n,s);
+  return    __cy + mpn_add_n(dst,dst,ws,n);
+#endif
+}
+#endif
+
+/* Evaluates a polynomial of degree k >= 3. */
+int
+mpn_toom_eval_pm2rexp (mp_ptr rp, mp_ptr rm,
+		      unsigned int q, mp_srcptr ap, mp_size_t n, mp_size_t t,
+		      unsigned int s, mp_ptr ws)
+{
+  unsigned int i;
+  int neg;
+  /* {ap,q*n+t} -> {rp,n+1} {rm,n+1} , with {ws, n+1}*/
+  ASSERT (n >= t);
+  ASSERT (s != 0); /* or _eval_pm1 should be used */
+  ASSERT (q > 1);
+  ASSERT (s*q < GMP_NUMB_BITS);
+  rp[n] = mpn_lshift(rp, ap, n, s*q);
+  ws[n] = mpn_lshift(ws, ap+n, n, s*(q-1));
+  if( (q & 1) != 0) {
+    ASSERT_NOCARRY(mpn_add(ws,ws,n+1,ap+n*q,t));
+    rp[n] += DO_mpn_addlsh_n(rp, ap+n*(q-1), n, s, rm);
+  } else {
+    ASSERT_NOCARRY(mpn_add(rp,rp,n+1,ap+n*q,t));
+  }
+  for(i=2; i<q-1; i++)
+  {
+    rp[n] += DO_mpn_addlsh_n(rp, ap+n*i, n, s*(q-i), rm);
+    i++;
+    ws[n] += DO_mpn_addlsh_n(ws, ap+n*i, n, s*(q-i), rm);
+  };
+
+  neg = (mpn_cmp (rp, ws, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (neg)
+    mpn_add_n_sub_n (rp, rm, ws, rp, n + 1);
+  else
+    mpn_add_n_sub_n (rp, rm, rp, ws, n + 1);
+#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
+  if (neg)
+    mpn_sub_n (rm, ws, rp, n + 1);
+  else
+    mpn_sub_n (rm, rp, ws, n + 1);
+
+  ASSERT_NOCARRY (mpn_add_n (rp, rp, ws, n + 1));
+#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */
+
+  return neg;
+}

diff --git a/third_party/gmp/mpn/generic/toom_interpolate_12pts.c b/third_party/gmp/mpn/generic/toom_interpolate_12pts.c
new file mode 100644
index 0000000..347e341
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom_interpolate_12pts.c

@@ -0,0 +1,370 @@
+/* Interpolation for the algorithm Toom-Cook 6.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+
+#if HAVE_NATIVE_mpn_sublsh_n
+#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+  mp_limb_t __cy;
+  __cy = mpn_lshift(ws,src,n,s);
+  return    __cy + mpn_sub_n(dst,dst,ws,n);
+#endif
+}
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh_n
+#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+  mp_limb_t __cy;
+  __cy = mpn_lshift(ws,src,n,s);
+  return    __cy + mpn_add_n(dst,dst,ws,n);
+#endif
+}
+#endif
+
+#if HAVE_NATIVE_mpn_subrsh
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s)
+#else
+/* FIXME: This is not a correct definition, it assumes no carry */
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)				\
+do {									\
+  mp_limb_t __cy;							\
+  MPN_DECR_U (dst, nd, src[0] >> s);					\
+  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);	\
+  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);				\
+} while (0)
+#endif
+
+
+#if GMP_NUMB_BITS < 21
+#error Not implemented: Both sublsh_n(,,,20) should be corrected.
+#endif
+
+#if GMP_NUMB_BITS < 16
+#error Not implemented: divexact_by42525 needs splitting.
+#endif
+
+#if GMP_NUMB_BITS < 12
+#error Not implemented: Hard to adapt...
+#endif
+
+/* FIXME: tuneup should decide the best variant */
+#ifndef AORSMUL_FASTER_AORS_AORSLSH
+#define AORSMUL_FASTER_AORS_AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_AORS_2AORSLSH
+#define AORSMUL_FASTER_AORS_2AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_2AORSLSH
+#define AORSMUL_FASTER_2AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_3AORSLSH
+#define AORSMUL_FASTER_3AORSLSH 1
+#endif
+
+#define BINVERT_9 \
+  ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
+
+#define BINVERT_255 \
+  (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8)))
+
+  /* FIXME: find some more general expressions for 2835^-1, 42525^-1 */
+#if GMP_LIMB_BITS == 32
+#define BINVERT_2835  (GMP_NUMB_MASK &		CNST_LIMB(0x53E3771B))
+#define BINVERT_42525 (GMP_NUMB_MASK &		CNST_LIMB(0x9F314C35))
+#else
+#if GMP_LIMB_BITS == 64
+#define BINVERT_2835  (GMP_NUMB_MASK &	CNST_LIMB(0x938CC70553E3771B))
+#define BINVERT_42525 (GMP_NUMB_MASK &	CNST_LIMB(0xE7B40D449F314C35))
+#endif
+#endif
+
+#ifndef mpn_divexact_by255
+#if GMP_NUMB_BITS % 8 == 0
+#define mpn_divexact_by255(dst,src,size) \
+  (255 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255)))
+#else
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0)
+#else
+#define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255))
+#endif
+#endif
+#endif
+
+#ifndef mpn_divexact_by9x4
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by9x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,2)
+#else
+#define mpn_divexact_by9x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<2)
+#endif
+#endif
+
+#ifndef mpn_divexact_by42525
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525)
+#define mpn_divexact_by42525(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,0)
+#else
+#define mpn_divexact_by42525(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525))
+#endif
+#endif
+
+#ifndef mpn_divexact_by2835x4
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835)
+#define mpn_divexact_by2835x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,2)
+#else
+#define mpn_divexact_by2835x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<2)
+#endif
+#endif
+
+/* Interpolation for Toom-6.5 (or Toom-6), using the evaluation
+   points: infinity(6.5 only), +-4, +-2, +-1, +-1/4, +-1/2, 0. More precisely,
+   we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
+   degree 11 (or 10), given the 12 (rsp. 11) values:
+
+     r0 = limit at infinity of f(x) / x^11,
+     r1 = f(4),f(-4),
+     r2 = f(2),f(-2),
+     r3 = f(1),f(-1),
+     r4 = f(1/4),f(-1/4),
+     r5 = f(1/2),f(-1/2),
+     r6 = f(0).
+
+   All couples of the form f(n),f(-n) must be already mixed with
+   toom_couple_handling(f(n),...,f(-n),...)
+
+   The result is stored in {pp, spt + 7*n (or 6*n)}.
+   At entry, r6 is stored at {pp, 2n},
+   r4 is stored at {pp + 3n, 3n + 1}.
+   r2 is stored at {pp + 7n, 3n + 1}.
+   r0 is stored at {pp +11n, spt}.
+
+   The other values are 3n+1 limbs each (with most significant limbs small).
+
+   Negative intermediate results are stored two-complemented.
+   Inputs are destroyed.
+*/
+
+void
+mpn_toom_interpolate_12pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5,
+			mp_size_t n, mp_size_t spt, int half, mp_ptr wsi)
+{
+  mp_limb_t cy;
+  mp_size_t n3;
+  mp_size_t n3p1;
+  n3 = 3 * n;
+  n3p1 = n3 + 1;
+
+#define   r4    (pp + n3)			/* 3n+1 */
+#define   r2    (pp + 7 * n)			/* 3n+1 */
+#define   r0    (pp +11 * n)			/* s+t <= 2*n */
+
+  /******************************* interpolation *****************************/
+  if (half != 0) {
+    cy = mpn_sub_n (r3, r3, r0, spt);
+    MPN_DECR_U (r3 + spt, n3p1 - spt, cy);
+
+    cy = DO_mpn_sublsh_n (r2, r0, spt, 10, wsi);
+    MPN_DECR_U (r2 + spt, n3p1 - spt, cy);
+    DO_mpn_subrsh(r5, n3p1, r0, spt, 2, wsi);
+
+    cy = DO_mpn_sublsh_n (r1, r0, spt, 20, wsi);
+    MPN_DECR_U (r1 + spt, n3p1 - spt, cy);
+    DO_mpn_subrsh(r4, n3p1, r0, spt, 4, wsi);
+  };
+
+  r4[n3] -= DO_mpn_sublsh_n (r4 + n, pp, 2 * n, 20, wsi);
+  DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 4, wsi);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  mpn_add_n_sub_n (r1, r4, r4, r1, n3p1);
+#else
+  ASSERT_NOCARRY(mpn_add_n (wsi, r1, r4, n3p1));
+  mpn_sub_n (r4, r4, r1, n3p1); /* can be negative */
+  MP_PTR_SWAP(r1, wsi);
+#endif
+
+  r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 10, wsi);
+  DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 2, wsi);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  mpn_add_n_sub_n (r2, r5, r5, r2, n3p1);
+#else
+  mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */
+  ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1));
+  MP_PTR_SWAP(r5, wsi);
+#endif
+
+  r3[n3] -= mpn_sub_n (r3+n, r3+n, pp, 2 * n);
+
+#if AORSMUL_FASTER_AORS_AORSLSH
+  mpn_submul_1 (r4, r5, n3p1, 257); /* can be negative */
+#else
+  mpn_sub_n (r4, r4, r5, n3p1); /* can be negative */
+  DO_mpn_sublsh_n (r4, r5, n3p1, 8, wsi); /* can be negative */
+#endif
+  /* A division by 2835x4 follows. Warning: the operand can be negative! */
+  mpn_divexact_by2835x4(r4, r4, n3p1);
+  if ((r4[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
+    r4[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
+
+#if AORSMUL_FASTER_2AORSLSH
+  mpn_addmul_1 (r5, r4, n3p1, 60); /* can be negative */
+#else
+  DO_mpn_sublsh_n (r5, r4, n3p1, 2, wsi); /* can be negative */
+  DO_mpn_addlsh_n (r5, r4, n3p1, 6, wsi); /* can give a carry */
+#endif
+  mpn_divexact_by255(r5, r5, n3p1);
+
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r3, n3p1, 5, wsi));
+
+#if AORSMUL_FASTER_3AORSLSH
+  ASSERT_NOCARRY(mpn_submul_1 (r1, r2, n3p1, 100));
+#else
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 6, wsi));
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 5, wsi));
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 2, wsi));
+#endif
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r3, n3p1, 9, wsi));
+  mpn_divexact_by42525(r1, r1, n3p1);
+
+#if AORSMUL_FASTER_AORS_2AORSLSH
+  ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 225));
+#else
+  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r1, n3p1));
+  ASSERT_NOCARRY(DO_mpn_addlsh_n (r2, r1, n3p1, 5, wsi));
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r1, n3p1, 8, wsi));
+#endif
+  mpn_divexact_by9x4(r2, r2, n3p1);
+
+  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r2, n3p1));
+
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+  mpn_rsh1sub_n (r4, r2, r4, n3p1);
+  r4 [n3p1 - 1] &= GMP_NUMB_MASK >> 1;
+#else
+  mpn_sub_n (r4, r2, r4, n3p1);
+  ASSERT_NOCARRY(mpn_rshift(r4, r4, n3p1, 1));
+#endif
+  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r4, n3p1));
+
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+  mpn_rsh1add_n (r5, r5, r1, n3p1);
+  r5 [n3p1 - 1] &= GMP_NUMB_MASK >> 1;
+#else
+  mpn_add_n (r5, r5, r1, n3p1);
+  ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1));
+#endif
+
+  /* last interpolation steps... */
+  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1));
+  ASSERT_NOCARRY(mpn_sub_n (r1, r1, r5, n3p1));
+  /* ... could be mixed with recomposition
+	||H-r5|M-r5|L-r5|   ||H-r1|M-r1|L-r1|
+  */
+
+  /***************************** recomposition *******************************/
+  /*
+    pp[] prior to operations:
+    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp
+
+    summation scheme for remaining operations:
+    |__12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp
+    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp
+	||H r1|M r1|L r1|   ||H r3|M r3|L r3|   ||H_r5|M_r5|L_r5|
+  */
+
+  cy = mpn_add_n (pp + n, pp + n, r5, n);
+  cy = mpn_add_1 (pp + 2 * n, r5 + n, n, cy);
+#if HAVE_NATIVE_mpn_add_nc
+  cy = r5[n3] + mpn_add_nc(pp + n3, pp + n3, r5 + 2 * n, n, cy);
+#else
+  MPN_INCR_U (r5 + 2 * n, n + 1, cy);
+  cy = r5[n3] + mpn_add_n (pp + n3, pp + n3, r5 + 2 * n, n);
+#endif
+  MPN_INCR_U (pp + n3 + n, 2 * n + 1, cy);
+
+  pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r3, n);
+  cy = mpn_add_1 (pp + 2 * n3, r3 + n, n, pp[2 * n3]);
+#if HAVE_NATIVE_mpn_add_nc
+  cy = r3[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r3 + 2 * n, n, cy);
+#else
+  MPN_INCR_U (r3 + 2 * n, n + 1, cy);
+  cy = r3[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r3 + 2 * n, n);
+#endif
+  MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy);
+
+  pp[10*n]+=mpn_add_n (pp + 9 * n, pp + 9 * n, r1, n);
+  if (half) {
+    cy = mpn_add_1 (pp + 10 * n, r1 + n, n, pp[10 * n]);
+#if HAVE_NATIVE_mpn_add_nc
+    if (LIKELY (spt > n)) {
+      cy = r1[n3] + mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, n, cy);
+      MPN_INCR_U (pp + 4 * n3, spt - n, cy);
+    } else {
+      ASSERT_NOCARRY(mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt, cy));
+    }
+#else
+    MPN_INCR_U (r1 + 2 * n, n + 1, cy);
+    if (LIKELY (spt > n)) {
+      cy = r1[n3] + mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, n);
+      MPN_INCR_U (pp + 4 * n3, spt - n, cy);
+    } else {
+      ASSERT_NOCARRY(mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt));
+    }
+#endif
+  } else {
+    ASSERT_NOCARRY(mpn_add_1 (pp + 10 * n, r1 + n, spt, pp[10 * n]));
+  }
+
+#undef   r0
+#undef   r2
+#undef   r4
+}

diff --git a/third_party/gmp/mpn/generic/toom_interpolate_16pts.c b/third_party/gmp/mpn/generic/toom_interpolate_16pts.c
new file mode 100644
index 0000000..5d76bba
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom_interpolate_16pts.c

@@ -0,0 +1,541 @@
+/* Interpolation for the algorithm Toom-Cook 8.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+#if GMP_NUMB_BITS < 29
+#error Not implemented: Both sublsh_n(,,,28) should be corrected; r2 and r5 need one more LIMB.
+#endif
+
+#if GMP_NUMB_BITS < 28
+#error Not implemented: divexact_by188513325 and _by182712915 will not work.
+#endif
+
+
+#if HAVE_NATIVE_mpn_sublsh_n
+#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+  mp_limb_t __cy;
+  __cy = mpn_lshift(ws,src,n,s);
+  return    __cy + mpn_sub_n(dst,dst,ws,n);
+#endif
+}
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh_n
+#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+  mp_limb_t __cy;
+  __cy = mpn_lshift(ws,src,n,s);
+  return    __cy + mpn_add_n(dst,dst,ws,n);
+#endif
+}
+#endif
+
+#if HAVE_NATIVE_mpn_subrsh
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s)
+#else
+/* FIXME: This is not a correct definition, it assumes no carry */
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)				\
+do {									\
+  mp_limb_t __cy;							\
+  MPN_DECR_U (dst, nd, src[0] >> s);					\
+  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);	\
+  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);				\
+} while (0)
+#endif
+
+
+/* FIXME: tuneup should decide the best variant */
+#ifndef AORSMUL_FASTER_AORS_AORSLSH
+#define AORSMUL_FASTER_AORS_AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_AORS_2AORSLSH
+#define AORSMUL_FASTER_AORS_2AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_2AORSLSH
+#define AORSMUL_FASTER_2AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_3AORSLSH
+#define AORSMUL_FASTER_3AORSLSH 1
+#endif
+
+#if GMP_NUMB_BITS < 43
+#define BIT_CORRECTION 1
+#define CORRECTION_BITS GMP_NUMB_BITS
+#else
+#define BIT_CORRECTION 0
+#define CORRECTION_BITS 0
+#endif
+
+#define BINVERT_9 \
+  ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
+
+#define BINVERT_255 \
+  (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8)))
+
+  /* FIXME: find some more general expressions for inverses */
+#if GMP_LIMB_BITS == 32
+#define BINVERT_2835  (GMP_NUMB_MASK &		CNST_LIMB(0x53E3771B))
+#define BINVERT_42525 (GMP_NUMB_MASK &		CNST_LIMB(0x9F314C35))
+#define BINVERT_182712915 (GMP_NUMB_MASK &	CNST_LIMB(0x550659DB))
+#define BINVERT_188513325 (GMP_NUMB_MASK &	CNST_LIMB(0xFBC333A5))
+#define BINVERT_255x182712915L (GMP_NUMB_MASK &	CNST_LIMB(0x6FC4CB25))
+#define BINVERT_255x188513325L (GMP_NUMB_MASK &	CNST_LIMB(0x6864275B))
+#if GMP_NAIL_BITS == 0
+#define BINVERT_255x182712915H CNST_LIMB(0x1B649A07)
+#define BINVERT_255x188513325H CNST_LIMB(0x06DB993A)
+#else /* GMP_NAIL_BITS != 0 */
+#define BINVERT_255x182712915H \
+  (GMP_NUMB_MASK & CNST_LIMB((0x1B649A07<<GMP_NAIL_BITS) | (0x6FC4CB25>>GMP_NUMB_BITS)))
+#define BINVERT_255x188513325H \
+  (GMP_NUMB_MASK & CNST_LIMB((0x06DB993A<<GMP_NAIL_BITS) | (0x6864275B>>GMP_NUMB_BITS)))
+#endif
+#else
+#if GMP_LIMB_BITS == 64
+#define BINVERT_2835  (GMP_NUMB_MASK &	CNST_LIMB(0x938CC70553E3771B))
+#define BINVERT_42525 (GMP_NUMB_MASK &	CNST_LIMB(0xE7B40D449F314C35))
+#define BINVERT_255x182712915  (GMP_NUMB_MASK &	CNST_LIMB(0x1B649A076FC4CB25))
+#define BINVERT_255x188513325  (GMP_NUMB_MASK &	CNST_LIMB(0x06DB993A6864275B))
+#endif
+#endif
+
+#ifndef mpn_divexact_by255
+#if GMP_NUMB_BITS % 8 == 0
+#define mpn_divexact_by255(dst,src,size) \
+  (255 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255)))
+#else
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0)
+#else
+#define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255))
+#endif
+#endif
+#endif
+
+#ifndef mpn_divexact_by255x4
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by255x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,2)
+#else
+#define mpn_divexact_by255x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255)<<2)
+#endif
+#endif
+
+#ifndef mpn_divexact_by9x16
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by9x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,4)
+#else
+#define mpn_divexact_by9x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<4)
+#endif
+#endif
+
+#ifndef mpn_divexact_by42525x16
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525)
+#define mpn_divexact_by42525x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,4)
+#else
+#define mpn_divexact_by42525x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525)<<4)
+#endif
+#endif
+
+#ifndef mpn_divexact_by2835x64
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835)
+#define mpn_divexact_by2835x64(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,6)
+#else
+#define mpn_divexact_by2835x64(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<6)
+#endif
+#endif
+
+#ifndef  mpn_divexact_by255x182712915
+#if GMP_NUMB_BITS < 36
+#if HAVE_NATIVE_mpn_bdiv_q_2_pi2 && defined(BINVERT_255x182712915H)
+/* FIXME: use mpn_bdiv_q_2_pi2 */
+#endif
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_182712915)
+#define mpn_divexact_by255x182712915(dst,src,size)				\
+  do {										\
+    mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(182712915),BINVERT_182712915,0);	\
+    mpn_divexact_by255(dst,dst,size);						\
+  } while(0)
+#else
+#define mpn_divexact_by255x182712915(dst,src,size)	\
+  do {							\
+    mpn_divexact_1(dst,src,size,CNST_LIMB(182712915));	\
+    mpn_divexact_by255(dst,dst,size);			\
+  } while(0)
+#endif
+#else /* GMP_NUMB_BITS > 35 */
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x182712915)
+#define mpn_divexact_by255x182712915(dst,src,size) \
+  mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(182712915),BINVERT_255x182712915,0)
+#else
+#define mpn_divexact_by255x182712915(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(182712915))
+#endif
+#endif /* GMP_NUMB_BITS >?< 36 */
+#endif
+
+#ifndef  mpn_divexact_by255x188513325
+#if GMP_NUMB_BITS < 36
+#if HAVE_NATIVE_mpn_bdiv_q_1_pi2 && defined(BINVERT_255x188513325H)
+/* FIXME: use mpn_bdiv_q_1_pi2 */
+#endif
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_188513325)
+#define mpn_divexact_by255x188513325(dst,src,size)			\
+  do {									\
+    mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(188513325),BINVERT_188513325,0);	\
+    mpn_divexact_by255(dst,dst,size);					\
+  } while(0)
+#else
+#define mpn_divexact_by255x188513325(dst,src,size)	\
+  do {							\
+    mpn_divexact_1(dst,src,size,CNST_LIMB(188513325));	\
+    mpn_divexact_by255(dst,dst,size);			\
+  } while(0)
+#endif
+#else /* GMP_NUMB_BITS > 35 */
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x188513325)
+#define mpn_divexact_by255x188513325(dst,src,size) \
+  mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(188513325),BINVERT_255x188513325,0)
+#else
+#define mpn_divexact_by255x188513325(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(188513325))
+#endif
+#endif /* GMP_NUMB_BITS >?< 36 */
+#endif
+
+/* Interpolation for Toom-8.5 (or Toom-8), using the evaluation
+   points: infinity(8.5 only), +-8, +-4, +-2, +-1, +-1/4, +-1/2,
+   +-1/8, 0. More precisely, we want to compute
+   f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 15 (or
+   14), given the 16 (rsp. 15) values:
+
+     r0 = limit at infinity of f(x) / x^15,
+     r1 = f(8),f(-8),
+     r2 = f(4),f(-4),
+     r3 = f(2),f(-2),
+     r4 = f(1),f(-1),
+     r5 = f(1/4),f(-1/4),
+     r6 = f(1/2),f(-1/2),
+     r7 = f(1/8),f(-1/8),
+     r8 = f(0).
+
+   All couples of the form f(n),f(-n) must be already mixed with
+   toom_couple_handling(f(n),...,f(-n),...)
+
+   The result is stored in {pp, spt + 7*n (or 8*n)}.
+   At entry, r8 is stored at {pp, 2n},
+   r6 is stored at {pp + 3n, 3n + 1}.
+   r4 is stored at {pp + 7n, 3n + 1}.
+   r2 is stored at {pp +11n, 3n + 1}.
+   r0 is stored at {pp +15n, spt}.
+
+   The other values are 3n+1 limbs each (with most significant limbs small).
+
+   Negative intermediate results are stored two-complemented.
+   Inputs are destroyed.
+*/
+
+void
+mpn_toom_interpolate_16pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_ptr r7,
+			mp_size_t n, mp_size_t spt, int half, mp_ptr wsi)
+{
+  mp_limb_t cy;
+  mp_size_t n3;
+  mp_size_t n3p1;
+  n3 = 3 * n;
+  n3p1 = n3 + 1;
+
+#define   r6    (pp + n3)			/* 3n+1 */
+#define   r4    (pp + 7 * n)			/* 3n+1 */
+#define   r2    (pp +11 * n)			/* 3n+1 */
+#define   r0    (pp +15 * n)			/* s+t <= 2*n */
+
+  ASSERT( spt <= 2 * n );
+  /******************************* interpolation *****************************/
+  if( half != 0) {
+    cy = mpn_sub_n (r4, r4, r0, spt);
+    MPN_DECR_U (r4 + spt, n3p1 - spt, cy);
+
+    cy = DO_mpn_sublsh_n (r3, r0, spt, 14, wsi);
+    MPN_DECR_U (r3 + spt, n3p1 - spt, cy);
+    DO_mpn_subrsh(r6, n3p1, r0, spt, 2, wsi);
+
+    cy = DO_mpn_sublsh_n (r2, r0, spt, 28, wsi);
+    MPN_DECR_U (r2 + spt, n3p1 - spt, cy);
+    DO_mpn_subrsh(r5, n3p1, r0, spt, 4, wsi);
+
+    cy = DO_mpn_sublsh_n (r1 + BIT_CORRECTION, r0, spt, 42 - CORRECTION_BITS, wsi);
+#if BIT_CORRECTION
+    cy = mpn_sub_1 (r1 + spt + BIT_CORRECTION, r1 + spt + BIT_CORRECTION,
+		    n3p1 - spt - BIT_CORRECTION, cy);
+    ASSERT (BIT_CORRECTION > 0 || cy == 0);
+    /* FIXME: assumes r7[n3p1] is writable (it is if r5 follows). */
+    cy = r7[n3p1];
+    r7[n3p1] = 0x80;
+#else
+    MPN_DECR_U (r1 + spt + BIT_CORRECTION, n3p1 - spt - BIT_CORRECTION, cy);
+#endif
+    DO_mpn_subrsh(r7, n3p1 + BIT_CORRECTION, r0, spt, 6, wsi);
+#if BIT_CORRECTION
+    /* FIXME: assumes r7[n3p1] is writable. */
+    ASSERT ( BIT_CORRECTION > 0 || r7[n3p1] == 0x80 );
+    r7[n3p1] = cy;
+#endif
+  };
+
+  r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 28, wsi);
+  DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 4, wsi);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  mpn_add_n_sub_n (r2, r5, r5, r2, n3p1);
+#else
+  mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */
+  ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1));
+  MP_PTR_SWAP(r5, wsi);
+#endif
+
+  r6[n3] -= DO_mpn_sublsh_n (r6 + n, pp, 2 * n, 14, wsi);
+  DO_mpn_subrsh(r3 + n, 2 * n + 1, pp, 2 * n, 2, wsi);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  mpn_add_n_sub_n (r3, r6, r6, r3, n3p1);
+#else
+  ASSERT_NOCARRY(mpn_add_n (wsi, r3, r6, n3p1));
+  mpn_sub_n (r6, r6, r3, n3p1); /* can be negative */
+  MP_PTR_SWAP(r3, wsi);
+#endif
+
+  cy = DO_mpn_sublsh_n (r7 + n + BIT_CORRECTION, pp, 2 * n, 42 - CORRECTION_BITS, wsi);
+#if BIT_CORRECTION
+  MPN_DECR_U (r1 + n, 2 * n + 1, pp[0] >> 6);
+  cy = DO_mpn_sublsh_n (r1 + n, pp + 1, 2 * n - 1, GMP_NUMB_BITS - 6, wsi);
+  cy = mpn_sub_1(r1 + 3 * n - 1, r1 + 3 * n - 1, 2, cy);
+  ASSERT ( BIT_CORRECTION > 0 || cy != 0 );
+#else
+  r7[n3] -= cy;
+  DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 6, wsi);
+#endif
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  mpn_add_n_sub_n (r1, r7, r7, r1, n3p1);
+#else
+  mpn_sub_n (wsi, r7, r1, n3p1); /* can be negative */
+  mpn_add_n (r1, r1, r7, n3p1);  /* if BIT_CORRECTION != 0, can give a carry. */
+  MP_PTR_SWAP(r7, wsi);
+#endif
+
+  r4[n3] -= mpn_sub_n (r4+n, r4+n, pp, 2 * n);
+
+#if AORSMUL_FASTER_2AORSLSH
+  mpn_submul_1 (r5, r6, n3p1, 1028); /* can be negative */
+#else
+  DO_mpn_sublsh_n (r5, r6, n3p1, 2, wsi); /* can be negative */
+  DO_mpn_sublsh_n (r5, r6, n3p1,10, wsi); /* can be negative */
+#endif
+
+  mpn_submul_1 (r7, r5, n3p1, 1300); /* can be negative */
+#if AORSMUL_FASTER_3AORSLSH
+  mpn_submul_1 (r7, r6, n3p1, 1052688); /* can be negative */
+#else
+  DO_mpn_sublsh_n (r7, r6, n3p1, 4, wsi); /* can be negative */
+  DO_mpn_sublsh_n (r7, r6, n3p1,12, wsi); /* can be negative */
+  DO_mpn_sublsh_n (r7, r6, n3p1,20, wsi); /* can be negative */
+#endif
+  mpn_divexact_by255x188513325(r7, r7, n3p1);
+
+  mpn_submul_1 (r5, r7, n3p1, 12567555); /* can be negative */
+  /* A division by 2835x64 follows. Warning: the operand can be negative! */
+  mpn_divexact_by2835x64(r5, r5, n3p1);
+  if ((r5[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-7))) != 0)
+    r5[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-6));
+
+#if AORSMUL_FASTER_AORS_AORSLSH
+  mpn_submul_1 (r6, r7, n3p1, 4095); /* can be negative */
+#else
+  mpn_add_n (r6, r6, r7, n3p1); /* can give a carry */
+  DO_mpn_sublsh_n (r6, r7, n3p1, 12, wsi); /* can be negative */
+#endif
+#if AORSMUL_FASTER_2AORSLSH
+  mpn_addmul_1 (r6, r5, n3p1, 240); /* can be negative */
+#else
+  DO_mpn_addlsh_n (r6, r5, n3p1, 8, wsi); /* can give a carry */
+  DO_mpn_sublsh_n (r6, r5, n3p1, 4, wsi); /* can be negative */
+#endif
+  /* A division by 255x4 follows. Warning: the operand can be negative! */
+  mpn_divexact_by255x4(r6, r6, n3p1);
+  if ((r6[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
+    r6[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
+
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r4, n3p1, 7, wsi));
+
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r4, n3p1, 13, wsi));
+  ASSERT_NOCARRY(mpn_submul_1 (r2, r3, n3p1, 400));
+
+  /* If GMP_NUMB_BITS < 42 next operations on r1 can give a carry!*/
+  DO_mpn_sublsh_n (r1, r4, n3p1, 19, wsi);
+  mpn_submul_1 (r1, r2, n3p1, 1428);
+  mpn_submul_1 (r1, r3, n3p1, 112896);
+  mpn_divexact_by255x182712915(r1, r1, n3p1);
+
+  ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 15181425));
+  mpn_divexact_by42525x16(r2, r2, n3p1);
+
+#if AORSMUL_FASTER_AORS_2AORSLSH
+  ASSERT_NOCARRY(mpn_submul_1 (r3, r1, n3p1, 3969));
+#else
+  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1));
+  ASSERT_NOCARRY(DO_mpn_addlsh_n (r3, r1, n3p1, 7, wsi));
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r1, n3p1, 12, wsi));
+#endif
+  ASSERT_NOCARRY(mpn_submul_1 (r3, r2, n3p1, 900));
+  mpn_divexact_by9x16(r3, r3, n3p1);
+
+  ASSERT_NOCARRY(mpn_sub_n (r4, r4, r1, n3p1));
+  ASSERT_NOCARRY(mpn_sub_n (r4, r4, r3, n3p1));
+  ASSERT_NOCARRY(mpn_sub_n (r4, r4, r2, n3p1));
+
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+  mpn_rsh1add_n (r6, r2, r6, n3p1);
+  r6 [n3p1 - 1] &= GMP_NUMB_MASK >> 1;
+#else
+  mpn_add_n (r6, r2, r6, n3p1);
+  ASSERT_NOCARRY(mpn_rshift(r6, r6, n3p1, 1));
+#endif
+  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r6, n3p1));
+
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+  mpn_rsh1sub_n (r5, r3, r5, n3p1);
+  r5 [n3p1 - 1] &= GMP_NUMB_MASK >> 1;
+#else
+  mpn_sub_n (r5, r3, r5, n3p1);
+  ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1));
+#endif
+  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, n3p1));
+
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+  mpn_rsh1add_n (r7, r1, r7, n3p1);
+  r7 [n3p1 - 1] &= GMP_NUMB_MASK >> 1;
+#else
+  mpn_add_n (r7, r1, r7, n3p1);
+  ASSERT_NOCARRY(mpn_rshift(r7, r7, n3p1, 1));
+#endif
+  ASSERT_NOCARRY(mpn_sub_n (r1, r1, r7, n3p1));
+
+  /* last interpolation steps... */
+  /* ... could be mixed with recomposition
+	||H-r7|M-r7|L-r7|   ||H-r5|M-r5|L-r5|
+  */
+
+  /***************************** recomposition *******************************/
+  /*
+    pp[] prior to operations:
+    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
+
+    summation scheme for remaining operations:
+    |__16|n_15|n_14|n_13|n_12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp
+    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
+	||H r1|M r1|L r1|   ||H r3|M r3|L r3|   ||H_r5|M_r5|L_r5|   ||H r7|M r7|L r7|
+  */
+
+  cy = mpn_add_n (pp + n, pp + n, r7, n);
+  cy = mpn_add_1 (pp + 2 * n, r7 + n, n, cy);
+#if HAVE_NATIVE_mpn_add_nc
+  cy = r7[n3] + mpn_add_nc(pp + n3, pp + n3, r7 + 2 * n, n, cy);
+#else
+  MPN_INCR_U (r7 + 2 * n, n + 1, cy);
+  cy = r7[n3] + mpn_add_n (pp + n3, pp + n3, r7 + 2 * n, n);
+#endif
+  MPN_INCR_U (pp + 4 * n, 2 * n + 1, cy);
+
+  pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r5, n);
+  cy = mpn_add_1 (pp + 2 * n3, r5 + n, n, pp[2 * n3]);
+#if HAVE_NATIVE_mpn_add_nc
+  cy = r5[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r5 + 2 * n, n, cy);
+#else
+  MPN_INCR_U (r5 + 2 * n, n + 1, cy);
+  cy = r5[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r5 + 2 * n, n);
+#endif
+  MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy);
+
+  pp[10 * n]+= mpn_add_n (pp + 9 * n, pp + 9 * n, r3, n);
+  cy = mpn_add_1 (pp + 10 * n, r3 + n, n, pp[10 * n]);
+#if HAVE_NATIVE_mpn_add_nc
+  cy = r3[n3] + mpn_add_nc(pp +11 * n, pp +11 * n, r3 + 2 * n, n, cy);
+#else
+  MPN_INCR_U (r3 + 2 * n, n + 1, cy);
+  cy = r3[n3] + mpn_add_n (pp +11 * n, pp +11 * n, r3 + 2 * n, n);
+#endif
+  MPN_INCR_U (pp +12 * n, 2 * n + 1, cy);
+
+  pp[14 * n]+=mpn_add_n (pp +13 * n, pp +13 * n, r1, n);
+  if ( half ) {
+    cy = mpn_add_1 (pp + 14 * n, r1 + n, n, pp[14 * n]);
+#if HAVE_NATIVE_mpn_add_nc
+    if(LIKELY(spt > n)) {
+      cy = r1[n3] + mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, n, cy);
+      MPN_INCR_U (pp + 16 * n, spt - n, cy);
+    } else {
+      ASSERT_NOCARRY(mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt, cy));
+    }
+#else
+    MPN_INCR_U (r1 + 2 * n, n + 1, cy);
+    if(LIKELY(spt > n)) {
+      cy = r1[n3] + mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, n);
+      MPN_INCR_U (pp + 16 * n, spt - n, cy);
+    } else {
+      ASSERT_NOCARRY(mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt));
+    }
+#endif
+  } else {
+    ASSERT_NOCARRY(mpn_add_1 (pp + 14 * n, r1 + n, spt, pp[14 * n]));
+  }
+
+#undef   r0
+#undef   r2
+#undef   r4
+#undef   r6
+}

diff --git a/third_party/gmp/mpn/generic/toom_interpolate_5pts.c b/third_party/gmp/mpn/generic/toom_interpolate_5pts.c
new file mode 100644
index 0000000..466ab85
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom_interpolate_5pts.c

@@ -0,0 +1,198 @@
+/* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42.
+
+   Contributed to the GNU project by Robert Harley.
+   Improvements by Paul Zimmermann and Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2000-2003, 2005-2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
+			   mp_size_t k, mp_size_t twor, int sa,
+			   mp_limb_t vinf0)
+{
+  mp_limb_t cy, saved;
+  mp_size_t twok;
+  mp_size_t kk1;
+  mp_ptr c1, v1, c3, vinf;
+
+  twok = k + k;
+  kk1 = twok + 1;
+
+  c1 = c  + k;
+  v1 = c1 + k;
+  c3 = v1 + k;
+  vinf = c3 + k;
+
+#define v0 (c)
+  /* (1) v2 <- v2-vm1 < v2+|vm1|,       (16 8 4 2 1) - (1 -1 1 -1  1) =
+     thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k)             (15 9 3  3  0)
+  */
+  if (sa)
+    ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1));
+  else
+    ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1));
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       v1       hi(vinf)       |vm1|     v2-vm1      EMPTY */
+
+  ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */
+						      /* (5 3 1 1 0)*/
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       v1      hi(vinf)       |vm1|     (v2-vm1)/3    EMPTY */
+
+  /* (2) vm1 <- tm1 := (v1 - vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
+     tm1 >= 0                                         (0  1 0  1 0)
+     No carry comes out from {v1, kk1} +/- {vm1, kk1},
+     and the division by two is exact.
+     If (sa!=0) the sign of vm1 is negative */
+  if (sa)
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+      mpn_rsh1add_n (vm1, v1, vm1, kk1);
+#else
+      ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1));
+      ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
+#endif
+    }
+  else
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+      mpn_rsh1sub_n (vm1, v1, vm1, kk1);
+#else
+      ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1));
+      ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
+#endif
+    }
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       v1        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
+
+  /* (3) v1 <- t1 := v1 - v0    (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)
+     t1 >= 0
+  */
+  vinf[0] -= mpn_sub_n (v1, v1, c, twok);
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0     v1-v0        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
+
+  /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6
+     t2 >= 0                  [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)
+  */
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+  mpn_rsh1sub_n (v2, v2, v1, kk1);
+#else
+  ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1));
+  ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1));
+#endif
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0     v1-v0        hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */
+
+  /* (5) v1 <- t1-tm1           (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
+     result is v1 >= 0
+  */
+  ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1));
+
+  /* We do not need to read the value in vm1, so we add it in {c+k, ...} */
+  cy = mpn_add_n (c1, c1, vm1, kk1);
+  MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
+  /* Memory allocated for vm1 is now free, it can be recycled ...*/
+
+  /* (6) v2 <- v2 - 2*vinf,     (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
+     result is v2 >= 0 */
+  saved = vinf[0];       /* Remember v1's highest byte (will be overwritten). */
+  vinf[0] = vinf0;       /* Set the right value for vinf0                     */
+#ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1
+  cy = mpn_sublsh1_n_ip1 (v2, vinf, twor);
+#else
+  /* Overwrite unused vm1 */
+  cy = mpn_lshift (vm1, vinf, twor, 1);
+  cy += mpn_sub_n (v2, v2, vm1, twor);
+#endif
+  MPN_DECR_U (v2 + twor, kk1 - twor, cy);
+
+  /* Current matrix is
+     [1 0 0 0 0; vinf
+      0 1 0 0 0; v2
+      1 0 1 0 0; v1
+      0 1 0 1 0; vm1
+      0 0 0 0 1] v0
+     Some values already are in-place (we added vm1 in the correct position)
+     | vinf|  v1 |  v0 |
+	      | vm1 |
+     One still is in a separated area
+	| +v2 |
+     We have to compute v1-=vinf; vm1 -= v2,
+	   |-vinf|
+	      | -v2 |
+     Carefully reordering operations we can avoid to compute twice the sum
+     of the high half of v2 plus the low half of vinf.
+  */
+
+  /* Add the high half of t2 in {vinf} */
+  if ( LIKELY(twor > k + 1) ) { /* This is the expected flow  */
+    cy = mpn_add_n (vinf, vinf, v2 + k, k + 1);
+    MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
+  } else { /* triggered only by very unbalanced cases like
+	      (k+k+(k-2))x(k+k+1) , should be handled by toom32 */
+    ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor));
+  }
+  /* (7) v1 <- v1 - vinf,       (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
+     result is >= 0 */
+  /* Side effect: we also subtracted (high half) vm1 -= v2 */
+  cy = mpn_sub_n (v1, v1, vinf, twor);          /* vinf is at most twor long.  */
+  vinf0 = vinf[0];                     /* Save again the right value for vinf0 */
+  vinf[0] = saved;
+  MPN_DECR_U (v1 + twor, kk1 - twor, cy);       /* Treat the last bytes.       */
+
+  /* (8) vm1 <- vm1-v2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
+     Operate only on the low half.
+  */
+  cy = mpn_sub_n (c1, c1, v2, k);
+  MPN_DECR_U (v1, kk1, cy);
+
+  /********************* Beginning the final phase **********************/
+
+  /* Most of the recomposition was done */
+
+  /* add t2 in {c+3k, ...}, but only the low half */
+  cy = mpn_add_n (c3, c3, v2, k);
+  vinf[0] += cy;
+  ASSERT(vinf[0] >= cy); /* No carry */
+  MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */
+
+#undef v0
+}

diff --git a/third_party/gmp/mpn/generic/toom_interpolate_6pts.c b/third_party/gmp/mpn/generic/toom_interpolate_6pts.c
new file mode 100644
index 0000000..eb23661
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom_interpolate_6pts.c

@@ -0,0 +1,241 @@
+/* mpn_toom_interpolate_6pts -- Interpolate for toom43, 52
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#define BINVERT_3 MODLIMB_INVERSE_3
+
+/* For odd divisors, mpn_divexact_1 works fine with two's complement. */
+#ifndef mpn_divexact_by3
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
+#else
+#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
+#endif
+#endif
+
+/* Interpolation for Toom-3.5, using the evaluation points: infinity,
+   1, -1, 2, -2. More precisely, we want to compute
+   f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 5, given the
+   six values
+
+     w5 = f(0),
+     w4 = f(-1),
+     w3 = f(1)
+     w2 = f(-2),
+     w1 = f(2),
+     w0 = limit at infinity of f(x) / x^5,
+
+   The result is stored in {pp, 5*n + w0n}. At entry, w5 is stored at
+   {pp, 2n}, w3 is stored at {pp + 2n, 2n+1}, and w0 is stored at
+   {pp + 5n, w0n}. The other values are 2n + 1 limbs each (with most
+   significant limbs small). f(-1) and f(-2) may be negative, signs
+   determined by the flag bits. All intermediate results are positive.
+   Inputs are destroyed.
+
+   Interpolation sequence was taken from the paper: "Integer and
+   Polynomial Multiplication: Towards Optimal Toom-Cook Matrices".
+   Some slight variations were introduced: adaptation to "gmp
+   instruction set", and a final saving of an operation by interlacing
+   interpolation and recomposition phases.
+*/
+
+void
+mpn_toom_interpolate_6pts (mp_ptr pp, mp_size_t n, enum toom6_flags flags,
+			   mp_ptr w4, mp_ptr w2, mp_ptr w1,
+			   mp_size_t w0n)
+{
+  mp_limb_t cy;
+  /* cy6 can be stored in w1[2*n], cy4 in w4[0], embankment in w2[0] */
+  mp_limb_t cy4, cy6, embankment;
+
+  ASSERT( n > 0 );
+  ASSERT( 2*n >= w0n && w0n > 0 );
+
+#define w5  pp					/* 2n   */
+#define w3  (pp + 2 * n)			/* 2n+1 */
+#define w0  (pp + 5 * n)			/* w0n  */
+
+  /* Interpolate with sequence:
+     W2 =(W1 - W2)>>2
+     W1 =(W1 - W5)>>1
+     W1 =(W1 - W2)>>1
+     W4 =(W3 - W4)>>1
+     W2 =(W2 - W4)/3
+     W3 = W3 - W4 - W5
+     W1 =(W1 - W3)/3
+     // Last steps are mixed with recomposition...
+     W2 = W2 - W0<<2
+     W4 = W4 - W2
+     W3 = W3 - W1
+     W2 = W2 - W0
+  */
+
+  /* W2 =(W1 - W2)>>2 */
+  if (flags & toom6_vm2_neg)
+    mpn_add_n (w2, w1, w2, 2 * n + 1);
+  else
+    mpn_sub_n (w2, w1, w2, 2 * n + 1);
+  mpn_rshift (w2, w2, 2 * n + 1, 2);
+
+  /* W1 =(W1 - W5)>>1 */
+  w1[2*n] -= mpn_sub_n (w1, w1, w5, 2*n);
+  mpn_rshift (w1, w1, 2 * n + 1, 1);
+
+  /* W1 =(W1 - W2)>>1 */
+#if HAVE_NATIVE_mpn_rsh1sub_n
+  mpn_rsh1sub_n (w1, w1, w2, 2 * n + 1);
+#else
+  mpn_sub_n (w1, w1, w2, 2 * n + 1);
+  mpn_rshift (w1, w1, 2 * n + 1, 1);
+#endif
+
+  /* W4 =(W3 - W4)>>1 */
+  if (flags & toom6_vm1_neg)
+    {
+#if HAVE_NATIVE_mpn_rsh1add_n
+      mpn_rsh1add_n (w4, w3, w4, 2 * n + 1);
+#else
+      mpn_add_n (w4, w3, w4, 2 * n + 1);
+      mpn_rshift (w4, w4, 2 * n + 1, 1);
+#endif
+    }
+  else
+    {
+#if HAVE_NATIVE_mpn_rsh1sub_n
+      mpn_rsh1sub_n (w4, w3, w4, 2 * n + 1);
+#else
+      mpn_sub_n (w4, w3, w4, 2 * n + 1);
+      mpn_rshift (w4, w4, 2 * n + 1, 1);
+#endif
+    }
+
+  /* W2 =(W2 - W4)/3 */
+  mpn_sub_n (w2, w2, w4, 2 * n + 1);
+  mpn_divexact_by3 (w2, w2, 2 * n + 1);
+
+  /* W3 = W3 - W4 - W5 */
+  mpn_sub_n (w3, w3, w4, 2 * n + 1);
+  w3[2 * n] -= mpn_sub_n (w3, w3, w5, 2 * n);
+
+  /* W1 =(W1 - W3)/3 */
+  mpn_sub_n (w1, w1, w3, 2 * n + 1);
+  mpn_divexact_by3 (w1, w1, 2 * n + 1);
+
+  /*
+    [1 0 0 0 0 0;
+     0 1 0 0 0 0;
+     1 0 1 0 0 0;
+     0 1 0 1 0 0;
+     1 0 1 0 1 0;
+     0 0 0 0 0 1]
+
+    pp[] prior to operations:
+     |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
+
+    summation scheme for remaining operations:
+     |______________5|n_____4|n_____3|n_____2|n______|n______|pp
+     |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
+				    || H w4  | L w4  |
+		    || H w2  | L w2  |
+	    || H w1  | L w1  |
+			    ||-H w1  |-L w1  |
+		     |-H w0  |-L w0 ||-H w2  |-L w2  |
+  */
+  cy = mpn_add_n (pp + n, pp + n, w4, 2 * n + 1);
+  MPN_INCR_U (pp + 3 * n + 1, n, cy);
+
+  /* W2 -= W0<<2 */
+#if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n_ip1
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+  cy = mpn_sublsh2_n_ip1 (w2, w0, w0n);
+#else
+  cy = mpn_sublsh_n (w2, w2, w0, w0n, 2);
+#endif
+#else
+  /* {W4,2*n+1} is now free and can be overwritten. */
+  cy = mpn_lshift(w4, w0, w0n, 2);
+  cy+= mpn_sub_n(w2, w2, w4, w0n);
+#endif
+  MPN_DECR_U (w2 + w0n, 2 * n + 1 - w0n, cy);
+
+  /* W4L = W4L - W2L */
+  cy = mpn_sub_n (pp + n, pp + n, w2, n);
+  MPN_DECR_U (w3, 2 * n + 1, cy);
+
+  /* W3H = W3H + W2L */
+  cy4 = w3[2 * n] + mpn_add_n (pp + 3 * n, pp + 3 * n, w2, n);
+  /* W1L + W2H */
+  cy = w2[2 * n] + mpn_add_n (pp + 4 * n, w1, w2 + n, n);
+  MPN_INCR_U (w1 + n, n + 1, cy);
+
+  /* W0 = W0 + W1H */
+  if (LIKELY (w0n > n))
+    cy6 = w1[2 * n] + mpn_add_n (w0, w0, w1 + n, n);
+  else
+    cy6 = mpn_add_n (w0, w0, w1 + n, w0n);
+
+  /*
+    summation scheme for the next operation:
+     |...____5|n_____4|n_____3|n_____2|n______|n______|pp
+     |...w0___|_w1_w2_|_H w3__|_L w3__|_H w5__|_L w5__|
+		     ...-w0___|-w1_w2 |
+  */
+  /* if(LIKELY(w0n>n)) the two operands below DO overlap! */
+  cy = mpn_sub_n (pp + 2 * n, pp + 2 * n, pp + 4 * n, n + w0n);
+
+  /* embankment is a "dirty trick" to avoid carry/borrow propagation
+     beyond allocated memory */
+  embankment = w0[w0n - 1] - 1;
+  w0[w0n - 1] = 1;
+  if (LIKELY (w0n > n)) {
+    if (cy4 > cy6)
+      MPN_INCR_U (pp + 4 * n, w0n + n, cy4 - cy6);
+    else
+      MPN_DECR_U (pp + 4 * n, w0n + n, cy6 - cy4);
+    MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy);
+    MPN_INCR_U (w0 + n, w0n - n, cy6);
+  } else {
+    MPN_INCR_U (pp + 4 * n, w0n + n, cy4);
+    MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy + cy6);
+  }
+  w0[w0n - 1] += embankment;
+
+#undef w5
+#undef w3
+#undef w0
+
+}

diff --git a/third_party/gmp/mpn/generic/toom_interpolate_7pts.c b/third_party/gmp/mpn/generic/toom_interpolate_7pts.c
new file mode 100644
index 0000000..167c45b
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom_interpolate_7pts.c

@@ -0,0 +1,274 @@
+/* mpn_toom_interpolate_7pts -- Interpolate for toom44, 53, 62.
+
+   Contributed to the GNU project by Niels Möller.
+   Improvements by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006, 2007, 2009, 2014, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#define BINVERT_3 MODLIMB_INVERSE_3
+
+#define BINVERT_9 \
+  ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
+
+#define BINVERT_15 \
+  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
+
+/* For the various mpn_divexact_byN here, fall back to using either
+   mpn_pi1_bdiv_q_1 or mpn_divexact_1.  The former has less overhead and is
+   many faster if it is native.  For now, since mpn_divexact_1 is native on
+   several platforms where mpn_pi1_bdiv_q_1 does not yet exist, do not use
+   mpn_pi1_bdiv_q_1 unconditionally.  FIXME.  */
+
+/* For odd divisors, mpn_divexact_1 works fine with two's complement. */
+#ifndef mpn_divexact_by3
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
+#else
+#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
+#endif
+#endif
+
+#ifndef mpn_divexact_by9
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by9(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,9,BINVERT_9,0)
+#else
+#define mpn_divexact_by9(dst,src,size) mpn_divexact_1(dst,src,size,9)
+#endif
+#endif
+
+#ifndef mpn_divexact_by15
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by15(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,15,BINVERT_15,0)
+#else
+#define mpn_divexact_by15(dst,src,size) mpn_divexact_1(dst,src,size,15)
+#endif
+#endif
+
+/* Interpolation for toom4, using the evaluation points 0, infinity,
+   1, -1, 2, -2, 1/2. More precisely, we want to compute
+   f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 6, given the
+   seven values
+
+     w0 = f(0),
+     w1 = f(-2),
+     w2 = f(1),
+     w3 = f(-1),
+     w4 = f(2)
+     w5 = 64 * f(1/2)
+     w6 = limit at infinity of f(x) / x^6,
+
+   The result is 6*n + w6n limbs. At entry, w0 is stored at {rp, 2n },
+   w2 is stored at { rp + 2n, 2n+1 }, and w6 is stored at { rp + 6n,
+   w6n }. The other values are 2n + 1 limbs each (with most
+   significant limbs small). f(-1) and f(-1/2) may be negative, signs
+   determined by the flag bits. Inputs are destroyed.
+
+   Needs (2*n + 1) limbs of temporary storage.
+*/
+
+void
+mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom7_flags flags,
+			   mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5,
+			   mp_size_t w6n, mp_ptr tp)
+{
+  mp_size_t m;
+  mp_limb_t cy;
+
+  m = 2*n + 1;
+#define w0 rp
+#define w2 (rp + 2*n)
+#define w6 (rp + 6*n)
+
+  ASSERT (w6n > 0);
+  ASSERT (w6n <= 2*n);
+
+  /* Using formulas similar to Marco Bodrato's
+
+     W5 = W5 + W4
+     W1 =(W4 - W1)/2
+     W4 = W4 - W0
+     W4 =(W4 - W1)/4 - W6*16
+     W3 =(W2 - W3)/2
+     W2 = W2 - W3
+
+     W5 = W5 - W2*65      May be negative.
+     W2 = W2 - W6 - W0
+     W5 =(W5 + W2*45)/2   Now >= 0 again.
+     W4 =(W4 - W2)/3
+     W2 = W2 - W4
+
+     W1 = W5 - W1         May be negative.
+     W5 =(W5 - W3*8)/9
+     W3 = W3 - W5
+     W1 =(W1/15 + W5)/2   Now >= 0 again.
+     W5 = W5 - W1
+
+     where W0 = f(0), W1 = f(-2), W2 = f(1), W3 = f(-1),
+	   W4 = f(2), W5 = f(1/2), W6 = f(oo),
+
+     Note that most intermediate results are positive; the ones that
+     may be negative are represented in two's complement. We must
+     never shift right a value that may be negative, since that would
+     invalidate the sign bit. On the other hand, divexact by odd
+     numbers work fine with two's complement.
+  */
+
+  mpn_add_n (w5, w5, w4, m);
+  if (flags & toom7_w1_neg)
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+      mpn_rsh1add_n (w1, w1, w4, m);
+#else
+      mpn_add_n (w1, w1, w4, m);  ASSERT (!(w1[0] & 1));
+      mpn_rshift (w1, w1, m, 1);
+#endif
+    }
+  else
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+      mpn_rsh1sub_n (w1, w4, w1, m);
+#else
+      mpn_sub_n (w1, w4, w1, m);  ASSERT (!(w1[0] & 1));
+      mpn_rshift (w1, w1, m, 1);
+#endif
+    }
+  mpn_sub (w4, w4, m, w0, 2*n);
+  mpn_sub_n (w4, w4, w1, m);  ASSERT (!(w4[0] & 3));
+  mpn_rshift (w4, w4, m, 2); /* w4>=0 */
+
+  tp[w6n] = mpn_lshift (tp, w6, w6n, 4);
+  mpn_sub (w4, w4, m, tp, w6n+1);
+
+  if (flags & toom7_w3_neg)
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+      mpn_rsh1add_n (w3, w3, w2, m);
+#else
+      mpn_add_n (w3, w3, w2, m);  ASSERT (!(w3[0] & 1));
+      mpn_rshift (w3, w3, m, 1);
+#endif
+    }
+  else
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+      mpn_rsh1sub_n (w3, w2, w3, m);
+#else
+      mpn_sub_n (w3, w2, w3, m);  ASSERT (!(w3[0] & 1));
+      mpn_rshift (w3, w3, m, 1);
+#endif
+    }
+
+  mpn_sub_n (w2, w2, w3, m);
+
+  mpn_submul_1 (w5, w2, m, 65);
+  mpn_sub (w2, w2, m, w6, w6n);
+  mpn_sub (w2, w2, m, w0, 2*n);
+
+  mpn_addmul_1 (w5, w2, m, 45);  ASSERT (!(w5[0] & 1));
+  mpn_rshift (w5, w5, m, 1);
+  mpn_sub_n (w4, w4, w2, m);
+
+  mpn_divexact_by3 (w4, w4, m);
+  mpn_sub_n (w2, w2, w4, m);
+
+  mpn_sub_n (w1, w5, w1, m);
+  mpn_lshift (tp, w3, m, 3);
+  mpn_sub_n (w5, w5, tp, m);
+  mpn_divexact_by9 (w5, w5, m);
+  mpn_sub_n (w3, w3, w5, m);
+
+  mpn_divexact_by15 (w1, w1, m);
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+  mpn_rsh1add_n (w1, w1, w5, m);
+  w1[m - 1] &= GMP_NUMB_MASK >> 1;
+#else
+  mpn_add_n (w1, w1, w5, m);  ASSERT (!(w1[0] & 1));
+  mpn_rshift (w1, w1, m, 1); /* w1>=0 now */
+#endif
+
+  mpn_sub_n (w5, w5, w1, m);
+
+  /* These bounds are valid for the 4x4 polynomial product of toom44,
+   * and they are conservative for toom53 and toom62. */
+  ASSERT (w1[2*n] < 2);
+  ASSERT (w2[2*n] < 3);
+  ASSERT (w3[2*n] < 4);
+  ASSERT (w4[2*n] < 3);
+  ASSERT (w5[2*n] < 2);
+
+  /* Addition chain. Note carries and the 2n'th limbs that need to be
+   * added in.
+   *
+   * Special care is needed for w2[2n] and the corresponding carry,
+   * since the "simple" way of adding it all together would overwrite
+   * the limb at wp[2*n] and rp[4*n] (same location) with the sum of
+   * the high half of w3 and the low half of w4.
+   *
+   *         7    6    5    4    3    2    1    0
+   *    |    |    |    |    |    |    |    |    |
+   *                  ||w3 (2n+1)|
+   *             ||w4 (2n+1)|
+   *        ||w5 (2n+1)|        ||w1 (2n+1)|
+   *  + | w6 (w6n)|        ||w2 (2n+1)| w0 (2n) |  (share storage with r)
+   *  -----------------------------------------------
+   *  r |    |    |    |    |    |    |    |    |
+   *        c7   c6   c5   c4   c3                 Carries to propagate
+   */
+
+  cy = mpn_add_n (rp + n, rp + n, w1, m);
+  MPN_INCR_U (w2 + n + 1, n , cy);
+  cy = mpn_add_n (rp + 3*n, rp + 3*n, w3, n);
+  MPN_INCR_U (w3 + n, n + 1, w2[2*n] + cy);
+  cy = mpn_add_n (rp + 4*n, w3 + n, w4, n);
+  MPN_INCR_U (w4 + n, n + 1, w3[2*n] + cy);
+  cy = mpn_add_n (rp + 5*n, w4 + n, w5, n);
+  MPN_INCR_U (w5 + n, n + 1, w4[2*n] + cy);
+  if (w6n > n + 1)
+    {
+      cy = mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, n + 1);
+      MPN_INCR_U (rp + 7*n + 1, w6n - n - 1, cy);
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, w6n));
+#if WANT_ASSERT
+      {
+	mp_size_t i;
+	for (i = w6n; i <= n; i++)
+	  ASSERT (w5[n + i] == 0);
+      }
+#endif
+    }
+}

diff --git a/third_party/gmp/mpn/generic/toom_interpolate_8pts.c b/third_party/gmp/mpn/generic/toom_interpolate_8pts.c
new file mode 100644
index 0000000..5e65fab
--- /dev/null
+++ b/third_party/gmp/mpn/generic/toom_interpolate_8pts.c

@@ -0,0 +1,211 @@
+/* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#define BINVERT_3 MODLIMB_INVERSE_3
+
+#define BINVERT_15 \
+  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
+
+#define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)
+
+#ifndef mpn_divexact_by3
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
+#else
+#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
+#endif
+#endif
+
+#ifndef mpn_divexact_by45
+#if GMP_NUMB_BITS % 12 == 0
+#define mpn_divexact_by45(dst,src,size) \
+  (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
+#else
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
+#else
+#define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
+#endif
+#endif
+#endif
+
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+#define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
+#else
+#define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
+#endif
+
+#if HAVE_NATIVE_mpn_sublsh_n
+#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+  mp_limb_t __cy;
+  __cy = mpn_lshift (ws,src,n,s);
+  return __cy + mpn_sub_n (dst,dst,ws,n);
+#endif
+}
+#endif
+
+
+#if HAVE_NATIVE_mpn_subrsh
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
+#else
+/* This is not a correct definition, it assumes no carry */
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)				\
+do {									\
+  mp_limb_t __cy;							\
+  MPN_DECR_U (dst, nd, src[0] >> s);					\
+  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);	\
+  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);				\
+} while (0)
+#endif
+
+/* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
+   points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
+   we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
+   degree 7 (or 6), given the 8 (rsp. 7) values:
+
+     r1 = limit at infinity of f(x) / x^7,
+     r2 = f(4),
+     r3 = f(-4),
+     r4 = f(2),
+     r5 = f(-2),
+     r6 = f(1),
+     r7 = f(-1),
+     r8 = f(0).
+
+   All couples of the form f(n),f(-n) must be already mixed with
+   toom_couple_handling(f(n),...,f(-n),...)
+
+   The result is stored in {pp, spt + 7*n (or 6*n)}.
+   At entry, r8 is stored at {pp, 2n},
+   r5 is stored at {pp + 3n, 3n + 1}.
+
+   The other values are 2n+... limbs each (with most significant limbs small).
+
+   All intermediate results are positive.
+   Inputs are destroyed.
+*/
+
+void
+mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
+			   mp_ptr r3, mp_ptr r7,
+			   mp_size_t spt, mp_ptr ws)
+{
+  mp_limb_signed_t cy;
+  mp_ptr r5, r1;
+  r5 = (pp + 3 * n);			/* 3n+1 */
+  r1 = (pp + 7 * n);			/* spt */
+
+  /******************************* interpolation *****************************/
+
+  DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
+  cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
+  MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);
+
+  DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
+  cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
+  MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);
+
+  r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
+  cy = mpn_sub_n (r7, r7, r1, spt);
+  MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);
+
+  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
+  ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));
+
+  ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));
+
+  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
+
+  mpn_divexact_by45 (r3, r3, 3 * n + 1);
+
+  ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
+
+  ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));
+
+  /* last interpolation steps... */
+  /* ... are mixed with recomposition */
+
+  /***************************** recomposition *******************************/
+  /*
+    pp[] prior to operations:
+     |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp
+
+    summation scheme for remaining operations:
+     |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
+     |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
+	  ||_H r3|_M r3|_L*r3|
+				  ||_H_r7|_M_r7|_L_r7|
+		      ||-H r3|-M r3|-L*r3|
+				  ||-H*r5|-M_r5|-L_r5|
+  */
+
+  cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
+  cy-= mpn_sub_n (pp + n, pp + n, r5, n);
+  if (cy > 0) {
+    MPN_INCR_U (r7 + n, 2*n + 1, 1);
+    cy = 0;
+  }
+
+  cy = mpn_sub_nc (pp + 2*n, r7 + n, r5 + n, n, -cy); /* Mr7-Mr5 */
+  MPN_DECR_U (r7 + 2*n, n + 1, cy);
+
+  cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
+  r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
+  cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
+  if (UNLIKELY(0 > cy))
+    MPN_DECR_U (r5 + n + 1, 2*n, 1);
+  else
+    MPN_INCR_U (r5 + n + 1, 2*n, cy);
+
+  ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */
+
+  cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
+  MPN_INCR_U (r3 + 2*n, n + 1, cy);
+  cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
+  if (LIKELY(spt != n))
+    MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
+  else
+    ASSERT (r3[3*n] + cy == 0);
+}

diff --git a/third_party/gmp/mpn/generic/trialdiv.c b/third_party/gmp/mpn/generic/trialdiv.c
new file mode 100644
index 0000000..65e089f
--- /dev/null
+++ b/third_party/gmp/mpn/generic/trialdiv.c

@@ -0,0 +1,131 @@
+/* mpn_trialdiv -- find small factors of an mpn number using trial division.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/*
+   This function finds the first (smallest) factor represented in
+   trialdivtab.h.  It does not stop the factoring effort just because it has
+   reached some sensible limit, such as the square root of the input number.
+
+   The caller can limit the factoring effort by passing NPRIMES.  The function
+   will then divide until that limit, or perhaps a few primes more.  A position
+   which only mpn_trialdiv can make sense of is returned in the WHERE
+   parameter.  It can be used for restarting the factoring effort; the first
+   call should pass 0 here.
+
+   Input:        1. A non-negative number T = {tp,tn}
+                 2. NPRIMES as described above,
+                 3. *WHERE as described above.
+   Output:       1. *WHERE updated as described above.
+                 2. Return value is non-zero if we found a factor, else zero
+                    To get the actual prime factor, compute the mod B inverse
+                    of the return value.
+*/
+
+#include "gmp-impl.h"
+
+struct gmp_primes_dtab {
+  mp_limb_t binv;
+  mp_limb_t lim;
+};
+
+struct gmp_primes_ptab {
+  mp_limb_t ppp;	/* primes, multiplied together */
+  mp_limb_t cps[7];	/* ppp values pre-computed for mpn_mod_1s_4p */
+  gmp_uint_least32_t idx:24;	/* index of  first primes in dtab */
+  gmp_uint_least32_t np :8;	/* number of primes related to this entry */
+};
+
+
+static const struct gmp_primes_dtab gmp_primes_dtab[] =
+{
+#define WANT_dtab
+#define P(p,inv,lim) {inv,lim}
+#include "trialdivtab.h"
+#undef WANT_dtab
+#undef P
+  {0,0}
+};
+
+static const struct gmp_primes_ptab gmp_primes_ptab[] =
+{
+#define WANT_ptab
+#include "trialdivtab.h"
+#undef WANT_ptab
+};
+
+#define PTAB_LINES (sizeof (gmp_primes_ptab) / sizeof (gmp_primes_ptab[0]))
+
+/* FIXME: We could optimize out one of the outer loop conditions if we
+   had a final ptab entry with a huge np field.  */
+mp_limb_t
+mpn_trialdiv (mp_srcptr tp, mp_size_t tn, mp_size_t nprimes, int *where)
+{
+  mp_limb_t ppp;
+  const mp_limb_t *cps;
+  const struct gmp_primes_dtab *dp;
+  long i, j, idx, np;
+  mp_limb_t r, q;
+
+  ASSERT (tn >= 1);
+
+  for (i = *where; i < PTAB_LINES; i++)
+    {
+      ppp = gmp_primes_ptab[i].ppp;
+      cps = gmp_primes_ptab[i].cps;
+
+      r = mpn_mod_1s_4p (tp, tn, ppp << cps[1], cps);
+
+      idx = gmp_primes_ptab[i].idx;
+      np = gmp_primes_ptab[i].np;
+
+      /* Check divisibility by individual primes.  */
+      dp = &gmp_primes_dtab[idx] + np;
+      for (j = -np; j < 0; j++)
+	{
+	  q = r * dp[j].binv;
+	  if (q <= dp[j].lim)
+	    {
+	      *where = i;
+	      return dp[j].binv;
+	    }
+	}
+
+      nprimes -= np;
+      if (nprimes <= 0)
+	return 0;
+    }
+  return 0;
+}

diff --git a/third_party/gmp/mpn/generic/udiv_w_sdiv.c b/third_party/gmp/mpn/generic/udiv_w_sdiv.c
new file mode 100644
index 0000000..7907135
--- /dev/null
+++ b/third_party/gmp/mpn/generic/udiv_w_sdiv.c

@@ -0,0 +1,141 @@
+/* mpn_udiv_w_sdiv -- implement udiv_qrnnd on machines with only signed
+   division.
+
+   Contributed by Peter L. Montgomery.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY SAFE
+   TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THIS FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE
+   GNU MP RELEASE.
+
+
+Copyright 1992, 1994, 1996, 2000, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_udiv_w_sdiv (mp_limb_t *rp, mp_limb_t a1, mp_limb_t a0, mp_limb_t d)
+{
+  mp_limb_t q, r;
+  mp_limb_t c0, c1, b1;
+
+  ASSERT (d != 0);
+  ASSERT (a1 < d);
+
+  if ((mp_limb_signed_t) d >= 0)
+    {
+      if (a1 < d - a1 - (a0 >> (GMP_LIMB_BITS - 1)))
+	{
+	  /* dividend, divisor, and quotient are nonnegative */
+	  sdiv_qrnnd (q, r, a1, a0, d);
+	}
+      else
+	{
+	  /* Compute c1*2^32 + c0 = a1*2^32 + a0 - 2^31*d */
+	  sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (GMP_LIMB_BITS - 1));
+	  /* Divide (c1*2^32 + c0) by d */
+	  sdiv_qrnnd (q, r, c1, c0, d);
+	  /* Add 2^31 to quotient */
+	  q += (mp_limb_t) 1 << (GMP_LIMB_BITS - 1);
+	}
+    }
+  else
+    {
+      b1 = d >> 1;			/* d/2, between 2^30 and 2^31 - 1 */
+      c1 = a1 >> 1;			/* A/2 */
+      c0 = (a1 << (GMP_LIMB_BITS - 1)) + (a0 >> 1);
+
+      if (a1 < b1)			/* A < 2^32*b1, so A/2 < 2^31*b1 */
+	{
+	  sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
+
+	  r = 2*r + (a0 & 1);		/* Remainder from A/(2*b1) */
+	  if ((d & 1) != 0)
+	    {
+	      if (r >= q)
+		r = r - q;
+	      else if (q - r <= d)
+		{
+		  r = r - q + d;
+		  q--;
+		}
+	      else
+		{
+		  r = r - q + 2*d;
+		  q -= 2;
+		}
+	    }
+	}
+      else if (c1 < b1)			/* So 2^31 <= (A/2)/b1 < 2^32 */
+	{
+	  c1 = (b1 - 1) - c1;
+	  c0 = ~c0;			/* logical NOT */
+
+	  sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
+
+	  q = ~q;			/* (A/2)/b1 */
+	  r = (b1 - 1) - r;
+
+	  r = 2*r + (a0 & 1);		/* A/(2*b1) */
+
+	  if ((d & 1) != 0)
+	    {
+	      if (r >= q)
+		r = r - q;
+	      else if (q - r <= d)
+		{
+		  r = r - q + d;
+		  q--;
+		}
+	      else
+		{
+		  r = r - q + 2*d;
+		  q -= 2;
+		}
+	    }
+	}
+      else				/* Implies c1 = b1 */
+	{				/* Hence a1 = d - 1 = 2*b1 - 1 */
+	  if (a0 >= -d)
+	    {
+	      q = -CNST_LIMB(1);
+	      r = a0 + d;
+	    }
+	  else
+	    {
+	      q = -CNST_LIMB(2);
+	      r = a0 + 2*d;
+	    }
+	}
+    }
+
+  *rp = r;
+  return q;
+}

diff --git a/third_party/gmp/mpn/generic/zero.c b/third_party/gmp/mpn/generic/zero.c
new file mode 100644
index 0000000..1a05453
--- /dev/null
+++ b/third_party/gmp/mpn/generic/zero.c

@@ -0,0 +1,41 @@
+/* mpn_zero
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpn_zero (mp_ptr rp, mp_size_t n)
+{
+  mp_size_t i;
+
+  rp += n;
+  for (i = -n; i != 0; i++)
+    rp[i] = 0;
+}

diff --git a/third_party/gmp/mpn/generic/zero_p.c b/third_party/gmp/mpn/generic/zero_p.c
new file mode 100644
index 0000000..c92f9b8
--- /dev/null
+++ b/third_party/gmp/mpn/generic/zero_p.c

@@ -0,0 +1,33 @@
+/* mpn_zero_p (x,xsize) -- Return 1 if X is zero, 0 if it is non-zero.
+
+Copyright 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_zero_p 1
+
+#include "gmp-impl.h"
commit	dace2a60b36477ef4295dea0cd01ae43a65f986f	[log] [tgz]
author	Austin Schuh <austin.linux@gmail.com>	Tue Aug 18 10:56:48 2020 -0700
committer	Austin Schuh <austin.linux@gmail.com>	Wed Aug 19 16:14:33 2020 -0700
tree	f6c709631ecad56062e08308ae06dd1425613c36
parent	1b6c20f6a7928823e49afda98bf720d99c5689ba [diff]