Revert "Remove gmp from AOS"

This reverts commit f37c97684f0910a3f241394549392f00145ab0f7.

We need gmp for SymEngine for symbolicmanipultion in C++

Change-Id: Ia13216d1715cf96944f7b4f422b7a799f921d4a4
Signed-off-by: Austin Schuh <austin.linux@gmail.com>
diff --git a/third_party/gmp/mpn/cray/README b/third_party/gmp/mpn/cray/README
new file mode 100644
index 0000000..3a347d2
--- /dev/null
+++ b/third_party/gmp/mpn/cray/README
@@ -0,0 +1,121 @@
+Copyright 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.
+
+
+
+
+
+
+The code in this directory works for Cray vector systems such as C90,
+J90, T90 (both the CFP variant and the IEEE variant) and SV1.  (For
+the T3E and T3D systems, see the `alpha' subdirectory at the same
+level as the directory containing this file.)
+
+The cfp subdirectory is for systems utilizing the traditional Cray
+floating-point format, and the ieee subdirectory is for the newer
+systems that use the IEEE floating-point format.
+
+There are several issues that reduces speed on Cray systems.  For
+systems with cfp floating point, the main obstacle is the forming of
+128-bit products.  For IEEE systems, adding, and in particular
+computing carry is the main issue.  There are no vectorizing
+unsigned-less-than instructions, and the sequence that implement that
+operation is very long.
+
+Shifting is the only operation that is simple to make fast.  All Cray
+systems have a bitblt instructions (Vi Vj,Vj<Ak and Vi Vj,Vj>Ak) that
+should be really useful.
+
+For best speed for cfp systems, we need a mul_basecase, since that
+reduces the need for carry propagation to a minimum.  Depending on the
+size (vn) of the smaller of the two operands (V), we should split U and V
+in different chunk sizes:
+
+U split in 2 32-bit parts
+V split according to the table:
+parts			4	5	6	7	8
+bits/part		16	13	11	10	8
+max allowed vn		1	8	32	64	256
+number of multiplies	8	10	12	14	16
+peak cycles/limb	4	5	6	7	8
+
+U split in 3 22-bit parts
+V split according to the table:
+parts			3	4	5
+bits/part		22	16	13
+max allowed vn		16	1024	8192
+number of multiplies	9	12	15
+peak cycles/limb	4.5	6	7.5
+
+U split in 4 16-bit parts
+V split according to the table:
+parts			4
+bits/part		16
+max allowed vn		65536
+number of multiplies	16
+peak cycles/limb	8
+
+(A T90 CPU can accumulate two products per cycle.)
+
+IDEA:
+* Rewrite mpn_add_n:
+    short cy[n + 1];
+    #pragma _CRI ivdep
+      for (i = 0; i < n; i++)
+	{ s = up[i] + vp[i];
+	  rp[i] = s;
+	  cy[i + 1] = s < up[i]; }
+      more_carries = 0;
+    #pragma _CRI ivdep
+      for (i = 1; i < n; i++)
+	{ s = rp[i] + cy[i];
+	  rp[i] = s;
+	  more_carries += s < cy[i]; }
+      cys = 0;
+      if (more_carries)
+	{
+	  cys = rp[1] < cy[1];
+	  for (i = 2; i < n; i++)
+	    { rp[i] += cys;
+	      cys = rp[i] < cys; }
+	}
+      return cys + cy[n];
+
+* Write mpn_add3_n for adding three operands.  First add operands 1
+  and 2, and generate cy[].  Then add operand 3 to the partial result,
+  and accumulate carry into cy[].  Finally propagate carry just like
+  in the new mpn_add_n.
+
+IDEA:
+
+Store fewer bits, perhaps 62, per limb.  That brings mpn_add_n time
+down to 2.5 cycles/limb and mpn_addmul_1 times to 4 cycles/limb.  By
+storing even fewer bits per limb, perhaps 56, it would be possible to
+write a mul_mul_basecase that would run at effectively 1 cycle/limb.
+(Use VM here to better handle the romb-shaped multiply area, perhaps
+rounding operand sizes up to the next power of 2.)
diff --git a/third_party/gmp/mpn/cray/add_n.c b/third_party/gmp/mpn/cray/add_n.c
new file mode 100644
index 0000000..af49159
--- /dev/null
+++ b/third_party/gmp/mpn/cray/add_n.c
@@ -0,0 +1,90 @@
+/* Cray PVP mpn_add_n -- add two limb vectors and store their sum in a third
+   limb vector.
+
+Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* This code runs at 4 cycles/limb.  It may be possible to bring it down
+   to 3 cycles/limb.  */
+
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t cy[n];
+  mp_limb_t a, b, r, s0, c0, c1;
+  mp_size_t i;
+  int more_carries;
+
+  /* Main add loop.  Generate a raw output sum in rp[] and a carry vector
+     in cy[].  */
+#pragma _CRI ivdep
+  for (i = 0; i < n; i++)
+    {
+      a = up[i];
+      b = vp[i];
+      s0 = a + b;
+      rp[i] = s0;
+      c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
+      cy[i] = c0;
+    }
+  /* Carry add loop.  Add the carry vector cy[] to the raw sum rp[] and
+     store the new sum back to rp[0].  If this generates further carry, set
+     more_carries.  */
+  more_carries = 0;
+#pragma _CRI ivdep
+  for (i = 1; i < n; i++)
+    {
+      r = rp[i];
+      c0 = cy[i - 1];
+      s0 = r + c0;
+      rp[i] = s0;
+      c0 = (r & ~s0) >> 63;
+      more_carries += c0;
+    }
+  /* If that second loop generated carry, handle that in scalar loop.  */
+  if (more_carries)
+    {
+      mp_limb_t cyrec = 0;
+      /* Look for places where rp[k] is zero and cy[k-1] is non-zero.
+	 These are where we got a recurrency carry.  */
+      for (i = 1; i < n; i++)
+	{
+	  r = rp[i];
+	  c0 = (r == 0 && cy[i - 1] != 0);
+	  s0 = r + cyrec;
+	  rp[i] = s0;
+	  c1 = (r & ~s0) >> 63;
+	  cyrec = c0 | c1;
+	}
+      return cyrec | cy[n - 1];
+    }
+
+  return cy[n - 1];
+}
diff --git a/third_party/gmp/mpn/cray/cfp/addmul_1.c b/third_party/gmp/mpn/cray/cfp/addmul_1.c
new file mode 100644
index 0000000..9c7f383
--- /dev/null
+++ b/third_party/gmp/mpn/cray/cfp/addmul_1.c
@@ -0,0 +1,48 @@
+/* mpn_addmul_1 for Cray PVP.
+
+Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t limb)
+{
+  mp_limb_t p0[n], p1[n], tp[n];
+  mp_limb_t cy_limb;
+
+  GMPN_MULWW (p1, p0, up, &n, &limb);
+  cy_limb = mpn_add_n (tp, rp, p0, n);
+  rp[0] = tp[0];
+  if (n != 1)
+    cy_limb += mpn_add_n (rp + 1, tp + 1, p1, n - 1);
+  cy_limb += p1[n - 1];
+
+  return cy_limb;
+}
diff --git a/third_party/gmp/mpn/cray/cfp/mul_1.c b/third_party/gmp/mpn/cray/cfp/mul_1.c
new file mode 100644
index 0000000..33a6a05
--- /dev/null
+++ b/third_party/gmp/mpn/cray/cfp/mul_1.c
@@ -0,0 +1,47 @@
+/* mpn_mul_1 for Cray PVP.
+
+Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t limb)
+{
+  mp_limb_t p0[n], p1[n];
+  mp_limb_t cy_limb;
+
+  GMPN_MULWW (p1, p0, up, &n, &limb);
+  rp[0] = p0[0];
+  cy_limb = p1[n - 1];
+  if (n != 1)
+    cy_limb += mpn_add_n (rp + 1, p0 + 1, p1, n - 1);
+
+  return cy_limb;
+}
diff --git a/third_party/gmp/mpn/cray/cfp/mulwwc90.s b/third_party/gmp/mpn/cray/cfp/mulwwc90.s
new file mode 100644
index 0000000..71d2285
--- /dev/null
+++ b/third_party/gmp/mpn/cray/cfp/mulwwc90.s
@@ -0,0 +1,254 @@
+*    Helper for mpn_mul_1, mpn_addmul_1, and mpn_submul_1 for Cray PVP.
+
+*    Copyright 1996, 2000 Free Software Foundation, Inc.
+*    This file is generated from mulww.f in this same directory.
+
+*  This file is part of the GNU MP Library.
+*
+*  The GNU MP Library is free software; you can redistribute it and/or modify
+*  it under the terms of either:
+*
+*    * the GNU Lesser General Public License as published by the Free
+*      Software Foundation; either version 3 of the License, or (at your
+*      option) any later version.
+*
+*  or
+*
+*    * the GNU General Public License as published by the Free Software
+*      Foundation; either version 2 of the License, or (at your option) any
+*      later version.
+*
+*  or both in parallel, as here.
+*
+*  The GNU MP Library is distributed in the hope that it will be useful, but
+*  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+*  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+*  for more details.
+*
+*  You should have received copies of the GNU General Public License and the
+*  GNU Lesser General Public License along with the GNU MP Library.  If not,
+*  see https://www.gnu.org/licenses/.
+
+            IDENT           GMPN_MULWW
+**********************************************
+*      Assemble with Cal Version 2.0         *
+*                                            *
+* Generated by CFT77   6.0.4.19              *
+*           on 06/27/00 at 04:34:13          *
+*                                            *
+**********************************************
+* ALLOW UNDERSCORES IN IDENTIFIERS
+            EDIT            OFF
+            FORMAT          NEW
+@DATA       SECTION         DATA,CM
+@DATA       =               W.*
+            CON             O'0000000000040000000000
+            CON             O'0435152404713723252514
+            CON             O'0535270000000000000000
+            CON             O'0000000000000001200012
+            VWD             32/0,32/P.GMPN_MULWW
+            CON             O'0014003000000000001416
+            CON             O'0000000000000000000011
+            CON             O'0000000000000000000215
+            BSSZ            1
+@CODE       SECTION         CODE
+@CODE       =               P.*
+L3          =               P.*
+            A0              A6
+            A5              6
+            B03,A5          0,A0
+            A0              A1+A2
+            A5              1
+            0,A0            T00,A5
+            B02             A2
+            B66             A3
+            B01             A6
+            A7              P.L4
+            B00             A7
+            A6              @DATA
+            J               $STKOFEN
+GMPN_MULWW  =               P.*
+            A0              @DATA+3
+            B77             A0
+            A1              13
+            A0              B66
+            A2              B66
+            A4              B67
+            0,A0            B77,A1
+            A7              782
+            A3              A2+A7
+            A0              A4-A3
+            JAM             L3
+            A0              A6
+            A5              6
+            B03,A5          0,A0
+            A0              A1+A2
+            A5              1
+            0,A0            T00,A5
+            B02             A2
+            B66             A3
+            B01             A6
+L4          =               P.*
+            A7              B07
+            S7              0,A7
+            A6              B10
+            S6              0,A6
+            S5              1
+            S4              <22
+            S7              S7-S5
+            S5              #S7
+            T00             S6
+            S6              S6>22
+            S7              T00
+            S7              S7>44
+            S3              T00
+            S3              S3&S4
+            S6              S6&S4
+            S7              S7&S4
+            S3              S3<24
+            S6              S6<24
+            S7              S7<24
+            S0              S5
+            S4              S5
+            S1              S6
+            S2              S3
+            S3              S7
+            JSP             L5
+L6          =               P.*
+            S7              -S4
+            A2              S7
+            VL              A2
+            A3              B06
+            A5              B05
+            A4              B04
+            A1              VL
+            A2              S4
+L7          =               P.*
+            A0              A3
+            VL              A1
+            V7              ,A0,1
+            B11             A5
+            A7              22
+            B12             A4
+            V6              V7>A7
+            B13             A3
+            S7              <22
+            A3              B02
+            V5              S7&V6
+            A6              24
+            V4              V5<A6
+            V3              S1*FV4
+            V2              S7&V7
+            V1              V2<A6
+            V0              S3*FV1
+            V6              V0+V3
+            A5              44
+            V5              V7>A5
+            V2              S1*FV1
+            V3              S7&V5
+            A0              14
+            B77             A0
+            A4              B77
+            A0              A4+A3
+            ,A0,1           V2
+            V0              V3<A6
+            V7              S2*FV1
+            A4              142
+            A0              A4+A3
+            ,A0,1           V7
+            V5              V7>A7
+            V2              S2*FV0
+            V3              V6+V2
+            S7              <20
+            V1              S7&V3
+            A4              270
+            A0              A4+A3
+            ,A0,1           V0
+            A4              14
+            A0              A4+A3
+            V7              ,A0,1
+            V6              V1<A7
+            V2              S2*FV4
+            V0              V7+V2
+            S7              <42
+            V1              S7&V0
+            A4              398
+            A0              A4+A3
+            ,A0,1           V0
+            V7              S3*FV4
+            V2              V5+V1
+            V0              V3<A5
+            A5              526
+            A0              A5+A3
+            ,A0,1           V0
+            A5              270
+            A0              A5+A3
+            V4              ,A0,1
+            V5              V2+V6
+            A5              20
+            V1              V3>A5
+            V0              S1*FV4
+            A5              654
+            A0              A5+A3
+            ,A0,1           V1
+            V6              V7+V0
+            A5              2
+            V2              V6<A5
+            V3              S3*FV4
+            A5              142
+            A0              A5+A3
+            V1              ,A0,1
+            A5              526
+            A0              A5+A3
+            V7              ,A0,1
+            V0              V1+V7
+            V6              V3<A6
+            V4              V6+V2
+            A6              42
+            V7              V5>A6
+            A5              654
+            CPW
+            A0              A5+A3
+            V1              ,A0,1
+            A5              398
+            A0              A5+A3
+            V3              ,A0,1
+            V6              V4+V1
+            V2              V3>A6
+            V5              V6+V2
+            A6              B12
+            V4              V3<A7
+            A7              B13
+            A3              A7+A1
+            A7              B11
+            A5              A7+A1
+            A4              A6+A1
+            A7              A2+A1
+            A0              A2+A1
+            A2              128
+            B13             A0
+            V1              V0+V4
+            A0              B11
+            ,A0,1           V1
+            V6              V5+V7
+            A0              A6
+            ,A0,1           V6
+            A0              B13
+            A1              A2
+            A2              A7
+            JAN             L7
+L8          =               P.*
+L5          =               P.*
+            S1              0
+            A0              B02
+            A2              B02
+            A1              13
+            B66             A0
+            B77,A1          0,A0
+            A0              A2+A1
+            A1              1
+            T00,A1          0,A0
+            J               B00
+            EXT             $STKOFEN:p
+            ENTRY           GMPN_MULWW
+            END
diff --git a/third_party/gmp/mpn/cray/cfp/mulwwj90.s b/third_party/gmp/mpn/cray/cfp/mulwwj90.s
new file mode 100644
index 0000000..1c2c7cd
--- /dev/null
+++ b/third_party/gmp/mpn/cray/cfp/mulwwj90.s
@@ -0,0 +1,253 @@
+*    Helper for mpn_mul_1, mpn_addmul_1, and mpn_submul_1 for Cray PVP.
+
+*    Copyright 1996, 2000 Free Software Foundation, Inc.
+*    This file is generated from mulww.f in this same directory.
+
+*  This file is part of the GNU MP Library.
+*
+*  The GNU MP Library is free software; you can redistribute it and/or modify
+*  it under the terms of either:
+*
+*    * the GNU Lesser General Public License as published by the Free
+*      Software Foundation; either version 3 of the License, or (at your
+*      option) any later version.
+*
+*  or
+*
+*    * the GNU General Public License as published by the Free Software
+*      Foundation; either version 2 of the License, or (at your option) any
+*      later version.
+*
+*  or both in parallel, as here.
+*
+*  The GNU MP Library is distributed in the hope that it will be useful, but
+*  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+*  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+*  for more details.
+*
+*  You should have received copies of the GNU General Public License and the
+*  GNU Lesser General Public License along with the GNU MP Library.  If not,
+*  see https://www.gnu.org/licenses/.
+
+            IDENT           GMPN_MULWW
+**********************************************
+*      Assemble with Cal Version 2.0         *
+*                                            *
+* Generated by CFT77   6.0.4.19              *
+*           on 06/27/00 at 04:34:13          *
+*                                            *
+**********************************************
+* ALLOW UNDERSCORES IN IDENTIFIERS
+            EDIT            OFF
+            FORMAT          NEW
+@DATA       SECTION         DATA,CM
+@DATA       =               W.*
+            CON             O'0000000000040000000000
+            CON             O'0435152404713723252514
+            CON             O'0535270000000000000000
+            CON             O'0000000000000001200012
+            VWD             32/0,32/P.GMPN_MULWW
+            CON             O'0014003000000000001416
+            CON             O'0000000000000000000011
+            CON             O'0000000000000000000215
+            BSSZ            1
+@CODE       SECTION         CODE
+@CODE       =               P.*
+L3          =               P.*
+            A0              A6
+            A5              6
+            B03,A5          0,A0
+            A0              A1+A2
+            A5              1
+            0,A0            T00,A5
+            B02             A2
+            B66             A3
+            B01             A6
+            A7              P.L4
+            B00             A7
+            A6              @DATA
+            J               $STKOFEN
+GMPN_MULWW  =               P.*
+            A0              @DATA+3
+            B77             A0
+            A1              13
+            A0              B66
+            A2              B66
+            A4              B67
+            0,A0            B77,A1
+            A7              782
+            A3              A2+A7
+            A0              A4-A3
+            JAM             L3
+            A0              A6
+            A5              6
+            B03,A5          0,A0
+            A0              A1+A2
+            A5              1
+            0,A0            T00,A5
+            B02             A2
+            B66             A3
+            B01             A6
+L4          =               P.*
+            A7              B07
+            S7              0,A7
+            A6              B10
+            S6              0,A6
+            S5              1
+            S4              <22
+            S7              S7-S5
+            S5              #S7
+            T00             S6
+            S6              S6>22
+            S7              T00
+            S7              S7>44
+            S3              T00
+            S3              S3&S4
+            S6              S6&S4
+            S7              S7&S4
+            S3              S3<24
+            S6              S6<24
+            S7              S7<24
+            S0              S5
+            S4              S5
+            S1              S6
+            S2              S3
+            S3              S7
+            JSP             L5
+L6          =               P.*
+            S7              -S4
+            A2              S7
+            VL              A2
+            A3              B06
+            A5              B05
+            A4              B04
+            A1              VL
+            A2              S4
+L7          =               P.*
+            A0              A3
+            VL              A1
+            V7              ,A0,1
+            B11             A5
+            A7              22
+            B12             A4
+            V6              V7>A7
+            B13             A3
+            S7              <22
+            A3              B02
+            V5              S7&V6
+            A6              24
+            V4              V5<A6
+            V3              S1*FV4
+            V2              S7&V7
+            V1              V2<A6
+            V0              S3*FV1
+            V6              V0+V3
+            A5              44
+            V5              V7>A5
+            V2              S1*FV1
+            V3              S7&V5
+            A0              14
+            B77             A0
+            A4              B77
+            A0              A4+A3
+            ,A0,1           V2
+            V0              V3<A6
+            V7              S2*FV1
+            A4              142
+            A0              A4+A3
+            ,A0,1           V7
+            V5              V7>A7
+            V2              S2*FV0
+            V3              V6+V2
+            S7              <20
+            V1              S7&V3
+            A4              270
+            A0              A4+A3
+            ,A0,1           V0
+            A4              14
+            A0              A4+A3
+            V7              ,A0,1
+            V6              V1<A7
+            V2              S2*FV4
+            V0              V7+V2
+            S7              <42
+            V1              S7&V0
+            A4              398
+            A0              A4+A3
+            ,A0,1           V0
+            V7              S3*FV4
+            V2              V5+V1
+            V0              V3<A5
+            A5              526
+            A0              A5+A3
+            ,A0,1           V0
+            A5              270
+            A0              A5+A3
+            V4              ,A0,1
+            V5              V2+V6
+            A5              20
+            V1              V3>A5
+            V0              S1*FV4
+            A5              654
+            A0              A5+A3
+            ,A0,1           V1
+            V6              V7+V0
+            A5              2
+            V2              V6<A5
+            V3              S3*FV4
+            A5              142
+            A0              A5+A3
+            V1              ,A0,1
+            A5              526
+            A0              A5+A3
+            V7              ,A0,1
+            V0              V1+V7
+            V6              V3<A6
+            V4              V6+V2
+            A6              42
+            V7              V5>A6
+            A5              654
+            A0              A5+A3
+            V1              ,A0,1
+            A5              398
+            A0              A5+A3
+            V3              ,A0,1
+            V6              V4+V1
+            V2              V3>A6
+            V5              V6+V2
+            A6              B12
+            V4              V3<A7
+            A7              B13
+            A3              A7+A1
+            A7              B11
+            A5              A7+A1
+            A4              A6+A1
+            A7              A2+A1
+            A0              A2+A1
+            A2              64
+            B13             A0
+            V1              V0+V4
+            A0              B11
+            ,A0,1           V1
+            V6              V5+V7
+            A0              A6
+            ,A0,1           V6
+            A0              B13
+            A1              A2
+            A2              A7
+            JAN             L7
+L8          =               P.*
+L5          =               P.*
+            S1              0
+            A0              B02
+            A2              B02
+            A1              13
+            B66             A0
+            B77,A1          0,A0
+            A0              A2+A1
+            A1              1
+            T00,A1          0,A0
+            J               B00
+            EXT             $STKOFEN:p
+            ENTRY           GMPN_MULWW
+            END
diff --git a/third_party/gmp/mpn/cray/cfp/submul_1.c b/third_party/gmp/mpn/cray/cfp/submul_1.c
new file mode 100644
index 0000000..622c275
--- /dev/null
+++ b/third_party/gmp/mpn/cray/cfp/submul_1.c
@@ -0,0 +1,48 @@
+/* mpn_submul_1 for Cray PVP.
+
+Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t limb)
+{
+  mp_limb_t p0[n], p1[n], tp[n];
+  mp_limb_t cy_limb;
+
+  GMPN_MULWW (p1, p0, up, &n, &limb);
+  cy_limb = mpn_sub_n (tp, rp, p0, n);
+  rp[0] = tp[0];
+  if (n != 1)
+    cy_limb += mpn_sub_n (rp + 1, tp + 1, p1, n - 1);
+  cy_limb += p1[n - 1];
+
+  return cy_limb;
+}
diff --git a/third_party/gmp/mpn/cray/gmp-mparam.h b/third_party/gmp/mpn/cray/gmp-mparam.h
new file mode 100644
index 0000000..1baed1e
--- /dev/null
+++ b/third_party/gmp/mpn/cray/gmp-mparam.h
@@ -0,0 +1,74 @@
+/* Cray T90 CFP gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1996, 2000-2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+/* T90 Unicos 10.0.X in CFP mode */
+
+/* Generated by tuneup.c, 2004-02-07, system compiler */
+
+#define MUL_TOOM22_THRESHOLD             71
+#define MUL_TOOM33_THRESHOLD            131
+
+#define SQR_BASECASE_THRESHOLD           32
+#define SQR_TOOM2_THRESHOLD             199
+#define SQR_TOOM3_THRESHOLD             363
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* (preinv always) */
+#define DIV_DC_THRESHOLD                996
+#define POWM_THRESHOLD                  601
+
+#define HGCD_THRESHOLD                  964
+#define GCD_ACCEL_THRESHOLD               3
+#define GCD_DC_THRESHOLD               2874
+#define JACOBI_BASE_METHOD                2
+
+#define DIVREM_1_NORM_THRESHOLD           0  /* preinv always */
+#define DIVREM_1_UNNORM_THRESHOLD         0  /* always */
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define USE_PREINV_DIVREM_1               1  /* preinv always */
+#define USE_PREINV_MOD_1                  1  /* preinv always */
+#define DIVREM_2_THRESHOLD                0  /* preinv always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             26
+#define GET_STR_PRECOMPUTE_THRESHOLD     42
+#define SET_STR_THRESHOLD            145756
+
+#define MUL_FFT_TABLE  { 272, 544, 1088, 2304, 5120, 12288, 49152, 0 }
+#define MUL_FFT_MODF_THRESHOLD          200
+#define MUL_FFT_THRESHOLD              1664
+
+#define SQR_FFT_TABLE  { 1008, 2080, 3904, 7936, 17408, 45056, 0 }
+#define SQR_FFT_MODF_THRESHOLD          600
+#define SQR_FFT_THRESHOLD              2976
diff --git a/third_party/gmp/mpn/cray/hamdist.c b/third_party/gmp/mpn/cray/hamdist.c
new file mode 100644
index 0000000..e7f177a
--- /dev/null
+++ b/third_party/gmp/mpn/cray/hamdist.c
@@ -0,0 +1,42 @@
+/* Cray mpn_hamdist -- hamming distance count.
+
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <intrinsics.h>
+#include "gmp-impl.h"
+
+unsigned long int
+mpn_hamdist (mp_srcptr p1, mp_srcptr p2, mp_size_t n)
+{
+  unsigned long int result = 0;
+  mp_size_t i;
+  for (i = 0; i < n; i++)
+    result += _popcnt (p1[i] ^ p2[i]);
+  return result;
+}
diff --git a/third_party/gmp/mpn/cray/ieee/addmul_1.c b/third_party/gmp/mpn/cray/ieee/addmul_1.c
new file mode 100644
index 0000000..ce7dfbb
--- /dev/null
+++ b/third_party/gmp/mpn/cray/ieee/addmul_1.c
@@ -0,0 +1,111 @@
+/* Cray PVP/IEEE mpn_addmul_1 -- multiply a limb vector with a limb and add the
+   result to a second limb vector.
+
+Copyright 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* This code runs at just under 9 cycles/limb on a T90.  That is not perfect,
+   mainly due to vector register shortage in the main loop.  Assembly code
+   should bring it down to perhaps 7 cycles/limb.  */
+
+#include <intrinsics.h>
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t cy[n];
+  mp_limb_t a, b, r, s0, s1, c0, c1;
+  mp_size_t i;
+  int more_carries;
+
+  if (up == rp)
+    {
+      /* The algorithm used below cannot handle overlap.  Handle it here by
+	 making a temporary copy of the source vector, then call ourselves.  */
+      mp_limb_t xp[n];
+      MPN_COPY (xp, up, n);
+      return mpn_addmul_1 (rp, xp, n, vl);
+    }
+
+  a = up[0] * vl;
+  r = rp[0];
+  s0 = a + r;
+  rp[0] = s0;
+  c0 = ((a & r) | ((a | r) & ~s0)) >> 63;
+  cy[0] = c0;
+
+  /* Main multiply loop.  Generate a raw accumulated output product in rp[]
+     and a carry vector in cy[].  */
+#pragma _CRI ivdep
+  for (i = 1; i < n; i++)
+    {
+      a = up[i] * vl;
+      b = _int_mult_upper (up[i - 1], vl);
+      s0 = a + b;
+      c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
+      r = rp[i];
+      s1 = s0 + r;
+      rp[i] = s1;
+      c1 = ((s0 & r) | ((s0 | r) & ~s1)) >> 63;
+      cy[i] = c0 + c1;
+    }
+  /* Carry add loop.  Add the carry vector cy[] to the raw result rp[] and
+     store the new result back to rp[].  */
+  more_carries = 0;
+#pragma _CRI ivdep
+  for (i = 1; i < n; i++)
+    {
+      r = rp[i];
+      c0 = cy[i - 1];
+      s0 = r + c0;
+      rp[i] = s0;
+      c0 = (r & ~s0) >> 63;
+      more_carries += c0;
+    }
+  /* If that second loop generated carry, handle that in scalar loop.  */
+  if (more_carries)
+    {
+      mp_limb_t cyrec = 0;
+      /* Look for places where rp[k] == 0 and cy[k-1] == 1 or
+	 rp[k] == 1 and cy[k-1] == 2.
+	 These are where we got a recurrency carry.  */
+      for (i = 1; i < n; i++)
+	{
+	  r = rp[i];
+	  c0 = r < cy[i - 1];
+	  s0 = r + cyrec;
+	  rp[i] = s0;
+	  c1 = (r & ~s0) >> 63;
+	  cyrec = c0 | c1;
+	}
+      return _int_mult_upper (up[n - 1], vl) + cyrec + cy[n - 1];
+    }
+
+  return _int_mult_upper (up[n - 1], vl) + cy[n - 1];
+}
diff --git a/third_party/gmp/mpn/cray/ieee/gmp-mparam.h b/third_party/gmp/mpn/cray/ieee/gmp-mparam.h
new file mode 100644
index 0000000..1fdc286
--- /dev/null
+++ b/third_party/gmp/mpn/cray/ieee/gmp-mparam.h
@@ -0,0 +1,73 @@
+/* Cray T90 IEEE gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1996, 2000-2002, 2004 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+/* Generated by tuneup.c, 2004-02-07, system compiler */
+
+#define MUL_TOOM22_THRESHOLD            130
+#define MUL_TOOM33_THRESHOLD            260
+
+#define SQR_BASECASE_THRESHOLD            9  /* karatsuba */
+#define SQR_TOOM2_THRESHOLD               0  /* never sqr_basecase */
+#define SQR_TOOM3_THRESHOLD              34
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* preinv always */
+#define DIV_DC_THRESHOLD                390
+#define POWM_THRESHOLD                  656
+
+#define HGCD_THRESHOLD                  964
+#define GCD_ACCEL_THRESHOLD               3
+#define GCD_DC_THRESHOLD                964
+#define JACOBI_BASE_METHOD                2
+
+#define DIVREM_1_NORM_THRESHOLD           0  /* preinv always */
+#define DIVREM_1_UNNORM_THRESHOLD         0  /* always */
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define USE_PREINV_DIVREM_1               1  /* preinv always */
+#define USE_PREINV_MOD_1                  1  /* preinv always */
+#define DIVREM_2_THRESHOLD                0  /* preinv always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             45
+#define GET_STR_PRECOMPUTE_THRESHOLD     77
+#define SET_STR_THRESHOLD            145756
+
+#define MUL_FFT_TABLE  { 1104, 2208, 4416, 8960, 19456, 45056, 0 }
+#define MUL_FFT_MODF_THRESHOLD         1168
+#define MUL_FFT_THRESHOLD              6528
+
+#define SQR_FFT_TABLE  { 368, 736, 1600, 2816, 7168, 12288, 0 }
+#define SQR_FFT_MODF_THRESHOLD          296
+#define SQR_FFT_THRESHOLD              1312
diff --git a/third_party/gmp/mpn/cray/ieee/invert_limb.c b/third_party/gmp/mpn/cray/ieee/invert_limb.c
new file mode 100644
index 0000000..774a27b
--- /dev/null
+++ b/third_party/gmp/mpn/cray/ieee/invert_limb.c
@@ -0,0 +1,127 @@
+/* mpn_invert_limb -- Invert a normalized limb.
+
+Copyright 1991, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/*
+  This is needed to make configure define HAVE_NATIVE_mpn_invert_limb:
+  PROLOGUE(mpn_invert_limb)
+*/
+
+static const unsigned short int approx_tab[0x100] =
+{
+  /* 0x400, */
+  0x3ff,
+         0x3fc, 0x3f8, 0x3f4, 0x3f0, 0x3ec, 0x3e8, 0x3e4,
+  0x3e0, 0x3dd, 0x3d9, 0x3d5, 0x3d2, 0x3ce, 0x3ca, 0x3c7,
+  0x3c3, 0x3c0, 0x3bc, 0x3b9, 0x3b5, 0x3b2, 0x3ae, 0x3ab,
+  0x3a8, 0x3a4, 0x3a1, 0x39e, 0x39b, 0x397, 0x394, 0x391,
+  0x38e, 0x38b, 0x387, 0x384, 0x381, 0x37e, 0x37b, 0x378,
+  0x375, 0x372, 0x36f, 0x36c, 0x369, 0x366, 0x364, 0x361,
+  0x35e, 0x35b, 0x358, 0x355, 0x353, 0x350, 0x34d, 0x34a,
+  0x348, 0x345, 0x342, 0x340, 0x33d, 0x33a, 0x338, 0x335,
+  0x333, 0x330, 0x32e, 0x32b, 0x329, 0x326, 0x324, 0x321,
+  0x31f, 0x31c, 0x31a, 0x317, 0x315, 0x313, 0x310, 0x30e,
+  0x30c, 0x309, 0x307, 0x305, 0x303, 0x300, 0x2fe, 0x2fc,
+  0x2fa, 0x2f7, 0x2f5, 0x2f3, 0x2f1, 0x2ef, 0x2ec, 0x2ea,
+  0x2e8, 0x2e6, 0x2e4, 0x2e2, 0x2e0, 0x2de, 0x2dc, 0x2da,
+  0x2d8, 0x2d6, 0x2d4, 0x2d2, 0x2d0, 0x2ce, 0x2cc, 0x2ca,
+  0x2c8, 0x2c6, 0x2c4, 0x2c2, 0x2c0, 0x2be, 0x2bc, 0x2bb,
+  0x2b9, 0x2b7, 0x2b5, 0x2b3, 0x2b1, 0x2b0, 0x2ae, 0x2ac,
+  0x2aa, 0x2a8, 0x2a7, 0x2a5, 0x2a3, 0x2a1, 0x2a0, 0x29e,
+  0x29c, 0x29b, 0x299, 0x297, 0x295, 0x294, 0x292, 0x291,
+  0x28f, 0x28d, 0x28c, 0x28a, 0x288, 0x287, 0x285, 0x284,
+  0x282, 0x280, 0x27f, 0x27d, 0x27c, 0x27a, 0x279, 0x277,
+  0x276, 0x274, 0x273, 0x271, 0x270, 0x26e, 0x26d, 0x26b,
+  0x26a, 0x268, 0x267, 0x265, 0x264, 0x263, 0x261, 0x260,
+  0x25e, 0x25d, 0x25c, 0x25a, 0x259, 0x257, 0x256, 0x255,
+  0x253, 0x252, 0x251, 0x24f, 0x24e, 0x24d, 0x24b, 0x24a,
+  0x249, 0x247, 0x246, 0x245, 0x243, 0x242, 0x241, 0x240,
+  0x23e, 0x23d, 0x23c, 0x23b, 0x239, 0x238, 0x237, 0x236,
+  0x234, 0x233, 0x232, 0x231, 0x230, 0x22e, 0x22d, 0x22c,
+  0x22b, 0x22a, 0x229, 0x227, 0x226, 0x225, 0x224, 0x223,
+  0x222, 0x220, 0x21f, 0x21e, 0x21d, 0x21c, 0x21b, 0x21a,
+  0x219, 0x218, 0x216, 0x215, 0x214, 0x213, 0x212, 0x211,
+  0x210, 0x20f, 0x20e, 0x20d, 0x20c, 0x20b, 0x20a, 0x209,
+  0x208, 0x207, 0x206, 0x205, 0x204, 0x203, 0x202, 0x201,
+};
+
+/* iteration: z = 2z-(z**2)d */
+
+mp_limb_t
+mpn_invert_limb (mp_limb_t d)
+{
+  mp_limb_t z, z2l, z2h, tl, th;
+  mp_limb_t xh, xl;
+  mp_limb_t zh, zl;
+
+#if GMP_LIMB_BITS == 32
+  z = approx_tab[(d >> 23) - 0x100] << 6;	/* z < 2^16 */
+
+  z2l = z * z;					/* z2l < 2^32 */
+  umul_ppmm (th, tl, z2l, d);
+  z = (z << 17) - (th << 1);
+#endif
+#if GMP_LIMB_BITS == 64
+  z = approx_tab[(d >> 55) - 0x100] << 6;	/* z < 2^16 */
+
+  z2l = z * z;					/* z2l < 2^32 */
+  th = z2l * (d >> 32);				/* th < 2^64 */
+  z = (z << 17) - (th >> 31);			/* z < 2^32 */
+
+  z2l = z * z;
+  umul_ppmm (th, tl, z2l, d);
+  z = (z << 33) - (th << 1);
+#endif
+
+  umul_ppmm (z2h, z2l, z, z);
+  umul_ppmm (th, tl, z2h, d);
+  umul_ppmm (xh, xl, z2l, d);
+  tl += xh;
+  th += tl < xh;
+  th = (th << 2) | (tl >> GMP_LIMB_BITS - 2);
+  tl = tl << 2;
+  sub_ddmmss (zh, zl, z << 2, 0, th, tl);
+
+  umul_ppmm (xh, xl, d, zh);
+  xh += d;		/* add_ssaaaa (xh, xl, xh, xl, d, 0); */
+  if (~xh != 0)
+    {
+      add_ssaaaa (xh, xl, xh, xl, 0, d);
+      zh++;
+    }
+
+  add_ssaaaa (xh, xl, xh, xl, 0, d);
+  if (xh != 0)
+    zh++;
+
+  return zh;
+}
diff --git a/third_party/gmp/mpn/cray/ieee/mul_1.c b/third_party/gmp/mpn/cray/ieee/mul_1.c
new file mode 100644
index 0000000..40139fb
--- /dev/null
+++ b/third_party/gmp/mpn/cray/ieee/mul_1.c
@@ -0,0 +1,103 @@
+/* Cray PVP/IEEE mpn_mul_1 -- multiply a limb vector with a limb and store the
+   result in a second limb vector.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* This code runs at 5 cycles/limb on a T90.  That would probably
+   be hard to improve upon, even with assembly code.  */
+
+#include <intrinsics.h>
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t cy[n];
+  mp_limb_t a, b, r, s0, s1, c0, c1;
+  mp_size_t i;
+  int more_carries;
+
+  if (up == rp)
+    {
+      /* The algorithm used below cannot handle overlap.  Handle it here by
+	 making a temporary copy of the source vector, then call ourselves.  */
+      mp_limb_t xp[n];
+      MPN_COPY (xp, up, n);
+      return mpn_mul_1 (rp, xp, n, vl);
+    }
+
+  a = up[0] * vl;
+  rp[0] = a;
+  cy[0] = 0;
+
+  /* Main multiply loop.  Generate a raw accumulated output product in rp[]
+     and a carry vector in cy[].  */
+#pragma _CRI ivdep
+  for (i = 1; i < n; i++)
+    {
+      a = up[i] * vl;
+      b = _int_mult_upper (up[i - 1], vl);
+      s0 = a + b;
+      c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
+      rp[i] = s0;
+      cy[i] = c0;
+    }
+  /* Carry add loop.  Add the carry vector cy[] to the raw sum rp[] and
+     store the new sum back to rp[0].  */
+  more_carries = 0;
+#pragma _CRI ivdep
+  for (i = 2; i < n; i++)
+    {
+      r = rp[i];
+      c0 = cy[i - 1];
+      s0 = r + c0;
+      rp[i] = s0;
+      c0 = (r & ~s0) >> 63;
+      more_carries += c0;
+    }
+  /* If that second loop generated carry, handle that in scalar loop.  */
+  if (more_carries)
+    {
+      mp_limb_t cyrec = 0;
+      /* Look for places where rp[k] is zero and cy[k-1] is non-zero.
+	 These are where we got a recurrency carry.  */
+      for (i = 2; i < n; i++)
+	{
+	  r = rp[i];
+	  c0 = (r == 0 && cy[i - 1] != 0);
+	  s0 = r + cyrec;
+	  rp[i] = s0;
+	  c1 = (r & ~s0) >> 63;
+	  cyrec = c0 | c1;
+	}
+      return _int_mult_upper (up[n - 1], vl) + cyrec + cy[n - 1];
+    }
+
+  return _int_mult_upper (up[n - 1], vl) + cy[n - 1];
+}
diff --git a/third_party/gmp/mpn/cray/ieee/mul_basecase.c b/third_party/gmp/mpn/cray/ieee/mul_basecase.c
new file mode 100644
index 0000000..72628f7
--- /dev/null
+++ b/third_party/gmp/mpn/cray/ieee/mul_basecase.c
@@ -0,0 +1,107 @@
+/* Cray PVP/IEEE mpn_mul_basecase.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* The most critical loop of this code runs at about 5 cycles/limb on a T90.
+   That is not perfect, mainly due to vector register shortage.  */
+
+#include <intrinsics.h>
+#include "gmp-impl.h"
+
+void
+mpn_mul_basecase (mp_ptr rp,
+		  mp_srcptr up, mp_size_t un,
+		  mp_srcptr vp, mp_size_t vn)
+{
+  mp_limb_t cy[un + vn];
+  mp_limb_t vl;
+  mp_limb_t a, b, r, s0, s1, c0, c1;
+  mp_size_t i, j;
+  int more_carries;
+
+  for (i = 0; i < un + vn; i++)
+    {
+      rp[i] = 0;
+      cy[i] = 0;
+    }
+
+#pragma _CRI novector
+  for (j = 0; j < vn; j++)
+    {
+      vl = vp[j];
+
+      a = up[0] * vl;
+      r = rp[j];
+      s0 = a + r;
+      rp[j] = s0;
+      c0 = ((a & r) | ((a | r) & ~s0)) >> 63;
+      cy[j] += c0;
+
+#pragma _CRI ivdep
+      for (i = 1; i < un; i++)
+	{
+	  a = up[i] * vl;
+	  b = _int_mult_upper (up[i - 1], vl);
+	  s0 = a + b;
+	  c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
+	  r = rp[j + i];
+	  s1 = s0 + r;
+	  rp[j + i] = s1;
+	  c1 = ((s0 & r) | ((s0 | r) & ~s1)) >> 63;
+	  cy[j + i] += c0 + c1;
+	}
+      rp[j + un] = _int_mult_upper (up[un - 1], vl);
+    }
+
+  more_carries = 0;
+#pragma _CRI ivdep
+  for (i = 1; i < un + vn; i++)
+    {
+      r = rp[i];
+      c0 = cy[i - 1];
+      s0 = r + c0;
+      rp[i] = s0;
+      c0 = (r & ~s0) >> 63;
+      more_carries += c0;
+    }
+  /* If that second loop generated carry, handle that in scalar loop.  */
+  if (more_carries)
+    {
+      mp_limb_t cyrec = 0;
+      for (i = 1; i < un + vn; i++)
+	{
+	  r = rp[i];
+	  c0 = (r < cy[i - 1]);
+	  s0 = r + cyrec;
+	  rp[i] = s0;
+	  c1 = (r & ~s0) >> 63;
+	  cyrec = c0 | c1;
+	}
+    }
+}
diff --git a/third_party/gmp/mpn/cray/ieee/sqr_basecase.c b/third_party/gmp/mpn/cray/ieee/sqr_basecase.c
new file mode 100644
index 0000000..5bd4e56
--- /dev/null
+++ b/third_party/gmp/mpn/cray/ieee/sqr_basecase.c
@@ -0,0 +1,105 @@
+/* Cray PVP/IEEE mpn_sqr_basecase.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* This is just mpn_mul_basecase with trivial modifications.  */
+
+#include <intrinsics.h>
+#include "gmp-impl.h"
+
+void
+mpn_sqr_basecase (mp_ptr rp,
+		  mp_srcptr up, mp_size_t un)
+{
+  mp_limb_t cy[un + un];
+  mp_limb_t ul;
+  mp_limb_t a, b, r, s0, s1, c0, c1;
+  mp_size_t i, j;
+  int more_carries;
+
+  for (i = 0; i < un + un; i++)
+    {
+      rp[i] = 0;
+      cy[i] = 0;
+    }
+
+#pragma _CRI novector
+  for (j = 0; j < un; j++)
+    {
+      ul = up[j];
+
+      a = up[0] * ul;
+      r = rp[j];
+      s0 = a + r;
+      rp[j] = s0;
+      c0 = ((a & r) | ((a | r) & ~s0)) >> 63;
+      cy[j] += c0;
+
+#pragma _CRI ivdep
+      for (i = 1; i < un; i++)
+	{
+	  a = up[i] * ul;
+	  b = _int_mult_upper (up[i - 1], ul);
+	  s0 = a + b;
+	  c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
+	  r = rp[j + i];
+	  s1 = s0 + r;
+	  rp[j + i] = s1;
+	  c1 = ((s0 & r) | ((s0 | r) & ~s1)) >> 63;
+	  cy[j + i] += c0 + c1;
+	}
+      rp[j + un] = _int_mult_upper (up[un - 1], ul);
+    }
+
+  more_carries = 0;
+#pragma _CRI ivdep
+  for (i = 1; i < un + un; i++)
+    {
+      r = rp[i];
+      c0 = cy[i - 1];
+      s0 = r + c0;
+      rp[i] = s0;
+      c0 = (r & ~s0) >> 63;
+      more_carries += c0;
+    }
+  /* If that second loop generated carry, handle that in scalar loop.  */
+  if (more_carries)
+    {
+      mp_limb_t cyrec = 0;
+      for (i = 1; i < un + un; i++)
+	{
+	  r = rp[i];
+	  c0 = (r < cy[i - 1]);
+	  s0 = r + cyrec;
+	  rp[i] = s0;
+	  c1 = (r & ~s0) >> 63;
+	  cyrec = c0 | c1;
+	}
+    }
+}
diff --git a/third_party/gmp/mpn/cray/ieee/submul_1.c b/third_party/gmp/mpn/cray/ieee/submul_1.c
new file mode 100644
index 0000000..2b3ca21
--- /dev/null
+++ b/third_party/gmp/mpn/cray/ieee/submul_1.c
@@ -0,0 +1,111 @@
+/* Cray PVP/IEEE mpn_submul_1 -- multiply a limb vector with a limb and
+   subtract the result from a second limb vector.
+
+Copyright 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* This code runs at just under 9 cycles/limb on a T90.  That is not perfect,
+   mainly due to vector register shortage in the main loop.  Assembly code
+   should bring it down to perhaps 7 cycles/limb.  */
+
+#include <intrinsics.h>
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t cy[n];
+  mp_limb_t a, b, r, s0, s1, c0, c1;
+  mp_size_t i;
+  int more_carries;
+
+  if (up == rp)
+    {
+      /* The algorithm used below cannot handle overlap.  Handle it here by
+	 making a temporary copy of the source vector, then call ourselves.  */
+      mp_limb_t xp[n];
+      MPN_COPY (xp, up, n);
+      return mpn_submul_1 (rp, xp, n, vl);
+    }
+
+  a = up[0] * vl;
+  r = rp[0];
+  s0 = r - a;
+  rp[0] = s0;
+  c1 = ((s0 & a) | ((s0 | a) & ~r)) >> 63;
+  cy[0] = c1;
+
+  /* Main multiply loop.  Generate a raw accumulated output product in rp[]
+     and a carry vector in cy[].  */
+#pragma _CRI ivdep
+  for (i = 1; i < n; i++)
+    {
+      a = up[i] * vl;
+      b = _int_mult_upper (up[i - 1], vl);
+      s0 = a + b;
+      c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
+      r = rp[i];
+      s1 = r - s0;
+      rp[i] = s1;
+      c1 = ((s1 & s0) | ((s1 | s0) & ~r)) >> 63;
+      cy[i] = c0 + c1;
+    }
+  /* Carry subtract loop.  Subtract the carry vector cy[] from the raw result
+     rp[] and store the new result back to rp[].  */
+  more_carries = 0;
+#pragma _CRI ivdep
+  for (i = 1; i < n; i++)
+    {
+      r = rp[i];
+      c0 = cy[i - 1];
+      s0 = r - c0;
+      rp[i] = s0;
+      c0 = (s0 & ~r) >> 63;
+      more_carries += c0;
+    }
+  /* If that second loop generated carry, handle that in scalar loop.  */
+  if (more_carries)
+    {
+      mp_limb_t cyrec = 0;
+      /* Look for places where rp[k] == ~0 and cy[k-1] == 1 or
+	 rp[k] == ~1 and cy[k-1] == 2.
+	 These are where we got a recurrency carry.  */
+      for (i = 1; i < n; i++)
+	{
+	  r = rp[i];
+	  c0 = ~r < cy[i - 1];
+	  s0 = r - cyrec;
+	  rp[i] = s0;
+	  c1 = (s0 & ~r) >> 63;
+	  cyrec = c0 | c1;
+	}
+      return _int_mult_upper (up[n - 1], vl) + cyrec + cy[n - 1];
+    }
+
+  return _int_mult_upper (up[n - 1], vl) + cy[n - 1];
+}
diff --git a/third_party/gmp/mpn/cray/lshift.c b/third_party/gmp/mpn/cray/lshift.c
new file mode 100644
index 0000000..8534e93
--- /dev/null
+++ b/third_party/gmp/mpn/cray/lshift.c
@@ -0,0 +1,58 @@
+/* mpn_lshift -- Shift left low level for Cray vector processors.
+
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <intrinsics.h>
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_lshift (mp_ptr wp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  unsigned sh_1, sh_2;
+  mp_size_t i;
+  mp_limb_t retval;
+
+  sh_1 = cnt;
+  sh_2 = GMP_LIMB_BITS - sh_1;
+  retval = up[n - 1] >> sh_2;
+
+#pragma _CRI ivdep
+  for (i = n - 1; i > 0; i--)
+    {
+#if 1
+      wp[i] = (up[i] << sh_1) | (up[i - 1] >> sh_2);
+#else
+      /* This is the recommended way, but at least on SV1 it is slower.  */
+      wp[i] = _dshiftl (up[i], up[i - 1], sh_1);
+#endif
+    }
+
+  wp[0] = up[0] << sh_1;
+  return retval;
+}
diff --git a/third_party/gmp/mpn/cray/mulww.f b/third_party/gmp/mpn/cray/mulww.f
new file mode 100644
index 0000000..6885dfc
--- /dev/null
+++ b/third_party/gmp/mpn/cray/mulww.f
@@ -0,0 +1,63 @@
+c    Helper for mpn_mul_1, mpn_addmul_1, and mpn_submul_1 for Cray PVP.
+
+c    Copyright 1996, 2000 Free Software Foundation, Inc.
+
+c    This file is part of the GNU MP Library.
+c
+c    The GNU MP Library is free software; you can redistribute it and/or modify
+c    it under the terms of either:
+c
+c      * the GNU Lesser General Public License as published by the Free
+c        Software Foundation; either version 3 of the License, or (at your
+c        option) any later version.
+c
+c    or
+c
+c      * the GNU General Public License as published by the Free Software
+c        Foundation; either version 2 of the License, or (at your option) any
+c        later version.
+c
+c    or both in parallel, as here.
+c
+c    The GNU MP Library is distributed in the hope that it will be useful, but
+c    WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+c    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+c    for more details.
+c
+c    You should have received copies of the GNU General Public License and the
+c    GNU Lesser General Public License along with the GNU MP Library.  If not,
+c    see https://www.gnu.org/licenses/.
+
+c    p1[] = hi(a[]*s); the upper limbs of each product
+c    p0[] = low(a[]*s); the corresponding lower limbs
+c    n is number of limbs in the vectors
+
+      subroutine gmpn_mulww(p1,p0,a,n,s)
+      integer*8 p1(0:*),p0(0:*),a(0:*),s
+      integer n
+
+      integer*8 a0,a1,a2,s0,s1,s2,c
+      integer*8 ai,t0,t1,t2,t3,t4
+
+      s0 = shiftl(and(s,4194303),24)
+      s1 = shiftl(and(shiftr(s,22),4194303),24)
+      s2 = shiftl(and(shiftr(s,44),4194303),24)
+
+      do i = 0,n-1
+         ai = a(i)
+         a0 = shiftl(and(ai,4194303),24)
+         a1 = shiftl(and(shiftr(ai,22),4194303),24)
+         a2 = shiftl(and(shiftr(ai,44),4194303),24)
+
+         t0 = i24mult(a0,s0)
+         t1 = i24mult(a0,s1)+i24mult(a1,s0)
+         t2 = i24mult(a0,s2)+i24mult(a1,s1)+i24mult(a2,s0)
+         t3 = i24mult(a1,s2)+i24mult(a2,s1)
+         t4 = i24mult(a2,s2)
+
+         p0(i)=shiftl(t2,44)+shiftl(t1,22)+t0
+         c=shiftr(shiftr(t0,22)+and(t1,4398046511103)+
+     $        shiftl(and(t2,1048575),22),42)
+         p1(i)=shiftl(t4,24)+shiftl(t3,2)+shiftr(t2,20)+shiftr(t1,42)+c
+      end do
+      end
diff --git a/third_party/gmp/mpn/cray/popcount.c b/third_party/gmp/mpn/cray/popcount.c
new file mode 100644
index 0000000..a79211f
--- /dev/null
+++ b/third_party/gmp/mpn/cray/popcount.c
@@ -0,0 +1,42 @@
+/* Cray mpn_popcount -- population count.
+
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <intrinsics.h>
+#include "gmp-impl.h"
+
+unsigned long int
+mpn_popcount (mp_srcptr p, mp_size_t n)
+{
+  unsigned long int result = 0;
+  mp_size_t i;
+  for (i = 0; i < n; i++)
+    result += _popcnt (p[i]);
+  return result;
+}
diff --git a/third_party/gmp/mpn/cray/rshift.c b/third_party/gmp/mpn/cray/rshift.c
new file mode 100644
index 0000000..9c4aa22
--- /dev/null
+++ b/third_party/gmp/mpn/cray/rshift.c
@@ -0,0 +1,58 @@
+/* mpn_rshift -- Shift right low level for Cray vector processors.
+
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <intrinsics.h>
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_rshift (mp_ptr wp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  unsigned sh_1, sh_2;
+  mp_size_t i;
+  mp_limb_t retval;
+
+  sh_1 = cnt;
+  sh_2 = GMP_LIMB_BITS - sh_1;
+  retval = up[0] << sh_2;
+
+#pragma _CRI ivdep
+  for (i = 0; i < n - 1; i++)
+    {
+#if 1
+      wp[i] = (up[i] >> sh_1) | (up[i + 1] << sh_2);
+#else
+      /* This is the recommended way, but at least on SV1 it is slower.  */
+      wp[i] = _dshiftr (up[i + 1], up[i], sh_1);
+#endif
+    }
+
+  wp[n - 1] = up[n - 1] >> sh_1;
+  return retval;
+}
diff --git a/third_party/gmp/mpn/cray/sub_n.c b/third_party/gmp/mpn/cray/sub_n.c
new file mode 100644
index 0000000..f518764
--- /dev/null
+++ b/third_party/gmp/mpn/cray/sub_n.c
@@ -0,0 +1,90 @@
+/* Cray PVP mpn_sub_n -- subtract two limb vectors and store their difference
+   in a third limb vector.
+
+Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* This code runs at 4 cycles/limb.  It may be possible to bring it down
+   to 3 cycles/limb.  */
+
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t cy[n];
+  mp_limb_t a, b, r, s0, c0, c1;
+  mp_size_t i;
+  int more_carries;
+
+  /* Main subtract loop.  Generate a raw output difference in rp[] and a
+     borrow vector in cy[].  */
+#pragma _CRI ivdep
+  for (i = 0; i < n; i++)
+    {
+      a = up[i];
+      b = vp[i];
+      s0 = a - b;		/* a = s0 + b */
+      rp[i] = s0;
+      c0 = ((s0 & b) | ((s0 | b) & ~a)) >> 63;
+      cy[i] = c0;
+    }
+  /* Borrow subtract loop.  Subtract the borrow vector cy[] from the raw
+     difference rp[] and store the new difference back to rp[0].  If this
+     generates further borrow, set more_carries.  */
+  more_carries = 0;
+#pragma _CRI ivdep
+  for (i = 1; i < n; i++)
+    {
+      r = rp[i];
+      c0 = cy[i - 1];
+      s0 = r - c0;		/* r = s0 + c0 */
+      rp[i] = s0;
+      c0 = (s0 & ~r) >> 63;
+      more_carries += c0;
+    }
+  /* If that second loop generated borrow, handle that in scalar loop.  */
+  if (more_carries)
+    {
+      mp_limb_t cyrec = 0;
+      /* Look for places where rp[k] contains just ones and cy[k-1] is
+	 non-zero.  These are where we got a recurrency borrow.  */
+      for (i = 1; i < n; i++)
+	{
+	  r = rp[i];
+	  c0 = (~r == 0 && cy[i - 1] != 0);
+	  s0 = r - cyrec;
+	  rp[i] = s0;
+	  c1 = (s0 & ~r) >> 63;
+	  cyrec = c0 | c1;
+	}
+      return cyrec | cy[n - 1];
+    }
+
+  return cy[n - 1];
+}