Revert "Remove gmp from AOS"

This reverts commit f37c97684f0910a3f241394549392f00145ab0f7.

We need gmp for SymEngine for symbolicmanipultion in C++

Change-Id: Ia13216d1715cf96944f7b4f422b7a799f921d4a4
Signed-off-by: Austin Schuh <austin.linux@gmail.com>
diff --git a/third_party/gmp/mpn/generic/gcd.c b/third_party/gmp/mpn/generic/gcd.c
new file mode 100644
index 0000000..3f92cbf
--- /dev/null
+++ b/third_party/gmp/mpn/generic/gcd.c
@@ -0,0 +1,266 @@
+/* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
+
+Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012, 2019 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Uses the HGCD operation described in
+
+     N. Möller, On Schönhage's algorithm and subquadratic integer gcd
+     computation, Math. Comp. 77 (2008), 589-607.
+
+  to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
+  then uses Lehmer's algorithm.
+*/
+
+/* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
+ * 2)/3, which gives a balanced multiplication in
+ * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
+ * performance. The matrix-vector multiplication is then
+ * 4:1-unbalanced, with matrix elements of size n/6, and vector
+ * elements of size p = 2n/3. */
+
+/* From analysis of the theoretical running time, it appears that when
+ * multiplication takes time O(n^alpha), p should be chosen so that
+ * the ratio of the time for the mpn_hgcd call, and the time for the
+ * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
+ * 1). */
+#ifdef TUNE_GCD_P
+#define P_TABLE_SIZE 10000
+mp_size_t p_table[P_TABLE_SIZE];
+#define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
+#else
+#define CHOOSE_P(n) (2*(n) / 3)
+#endif
+
+struct gcd_ctx
+{
+  mp_ptr gp;
+  mp_size_t gn;
+};
+
+static void
+gcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
+	  mp_srcptr qp, mp_size_t qn, int d)
+{
+  struct gcd_ctx *ctx = (struct gcd_ctx *) p;
+  MPN_COPY (ctx->gp, gp, gn);
+  ctx->gn = gn;
+}
+
+mp_size_t
+mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
+{
+  mp_size_t talloc;
+  mp_size_t scratch;
+  mp_size_t matrix_scratch;
+
+  struct gcd_ctx ctx;
+  mp_ptr tp;
+  TMP_DECL;
+
+  ASSERT (usize >= n);
+  ASSERT (n > 0);
+  ASSERT (vp[n-1] > 0);
+
+  /* FIXME: Check for small sizes first, before setting up temporary
+     storage etc. */
+  talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);
+
+  /* For initial division */
+  scratch = usize - n + 1;
+  if (scratch > talloc)
+    talloc = scratch;
+
+#if TUNE_GCD_P
+  if (CHOOSE_P (n) > 0)
+#else
+  if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
+#endif
+    {
+      mp_size_t hgcd_scratch;
+      mp_size_t update_scratch;
+      mp_size_t p = CHOOSE_P (n);
+      mp_size_t scratch;
+#if TUNE_GCD_P
+      /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
+	 is increasing */
+      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
+      hgcd_scratch = mpn_hgcd_itch (n);
+      update_scratch = 2*(n - 1);
+#else
+      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+      hgcd_scratch = mpn_hgcd_itch (n - p);
+      update_scratch = p + n - 1;
+#endif
+      scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
+      if (scratch > talloc)
+	talloc = scratch;
+    }
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS(talloc);
+
+  if (usize > n)
+    {
+      mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
+
+      if (mpn_zero_p (up, n))
+	{
+	  MPN_COPY (gp, vp, n);
+	  ctx.gn = n;
+	  goto done;
+	}
+    }
+
+  ctx.gp = gp;
+
+#if TUNE_GCD_P
+  while (CHOOSE_P (n) > 0)
+#else
+  while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
+#endif
+    {
+      struct hgcd_matrix M;
+      mp_size_t p = CHOOSE_P (n);
+      mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+      mp_size_t nn;
+      mpn_hgcd_matrix_init (&M, n - p, tp);
+      nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
+      if (nn > 0)
+	{
+	  ASSERT (M.n <= (n - p - 1)/2);
+	  ASSERT (M.n + p <= (p + n - 1) / 2);
+	  /* Temporary storage 2 (p + M->n) <= p + n - 1. */
+	  n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
+	}
+      else
+	{
+	  /* Temporary storage n */
+	  n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);
+	  if (n == 0)
+	    goto done;
+	}
+    }
+
+  while (n > 2)
+    {
+      struct hgcd_matrix1 M;
+      mp_limb_t uh, ul, vh, vl;
+      mp_limb_t mask;
+
+      mask = up[n-1] | vp[n-1];
+      ASSERT (mask > 0);
+
+      if (mask & GMP_NUMB_HIGHBIT)
+	{
+	  uh = up[n-1]; ul = up[n-2];
+	  vh = vp[n-1]; vl = vp[n-2];
+	}
+      else
+	{
+	  int shift;
+
+	  count_leading_zeros (shift, mask);
+	  uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);
+	  ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);
+	  vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);
+	  vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);
+	}
+
+      /* Try an mpn_hgcd2 step */
+      if (mpn_hgcd2 (uh, ul, vh, vl, &M))
+	{
+	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);
+	  MP_PTR_SWAP (up, tp);
+	}
+      else
+	{
+	  /* mpn_hgcd2 has failed. Then either one of a or b is very
+	     small, or the difference is very small. Perform one
+	     subtraction followed by one division. */
+
+	  /* Temporary storage n */
+	  n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);
+	  if (n == 0)
+	    goto done;
+	}
+    }
+
+  ASSERT(up[n-1] | vp[n-1]);
+
+  /* Due to the calling convention for mpn_gcd, at most one can be even. */
+  if ((up[0] & 1) == 0)
+    MP_PTR_SWAP (up, vp);
+  ASSERT ((up[0] & 1) != 0);
+
+  {
+    mp_limb_t u0, u1, v0, v1;
+    mp_double_limb_t g;
+
+    u0 = up[0];
+    v0 = vp[0];
+
+    if (n == 1)
+      {
+	int cnt;
+	count_trailing_zeros (cnt, v0);
+	*gp = mpn_gcd_11 (u0, v0 >> cnt);
+	ctx.gn = 1;
+	goto done;
+      }
+
+    v1 = vp[1];
+    if (UNLIKELY (v0 == 0))
+      {
+	v0 = v1;
+	v1 = 0;
+	/* FIXME: We could invoke a mpn_gcd_21 here, just like mpn_gcd_22 could
+	   when this situation occurs internally.  */
+      }
+    if ((v0 & 1) == 0)
+      {
+	int cnt;
+	count_trailing_zeros (cnt, v0);
+	v0 = ((v1 << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK) | (v0 >> cnt);
+	v1 >>= cnt;
+      }
+
+    u1 = up[1];
+    g = mpn_gcd_22 (u1, u0, v1, v0);
+    gp[0] = g.d0;
+    gp[1] = g.d1;
+    ctx.gn = 1 + (g.d1 > 0);
+  }
+done:
+  TMP_FREE;
+  return ctx.gn;
+}